source: mainline/uspace/lib/posix/source/stdio/scanf.c@ 33b8d024

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 33b8d024 was 33b8d024, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

Remove const qualifier from the argument of free() and realloc(),
as well as in numerous other variables that hold ownership of memory.

By convention, a pointer that holds ownership is _never_ qualified by const.
This is reflected in the standard type signature of free() and realloc().
Allowing const pointers to hold ownership may seem superficially convenient,
but is actually quite confusing to experienced C programmers.

  • Property mode set to 100644
File size: 33.0 KB
RevLine 
[08053f7]1/*
2 * Copyright (c) 2011 Petr Koupy
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Implementation of the scanf backend.
33 */
34
35#define LIBPOSIX_INTERNAL
[fdf97f6]36#define __POSIX_DEF__(x) posix_##x
[08053f7]37
[e8d3c6f5]38#include <assert.h>
[0d0b319]39
40#include <errno.h>
[08053f7]41
[a3da2b2]42#include "posix/stdio.h"
43#include "posix/stdlib.h"
44#include "posix/stddef.h"
45#include "posix/string.h"
46#include "posix/ctype.h"
47#include "posix/sys/types.h"
[08053f7]48
49#include "../internal/common.h"
[a3da2b2]50#include "libc/malloc.h"
[3e6a98c5]51#include "libc/stdbool.h"
[08053f7]52
53/** Unified data type for possible data sources for scanf. */
54typedef union __data_source {
55 FILE *stream; /**< Input file stream. */
56 const char *string; /**< Input string. */
57} _data_source;
58
59/** Internal state of the input provider. */
60enum {
61 /** Partly constructed but not yet functional. */
62 _PROV_CONSTRUCTED,
63 /** Ready to serve any request. */
64 _PROV_READY,
65 /** Cursor is temporarily lent to the external entity. No action is
66 * possible until the cursor is returned. */
67 _PROV_CURSOR_LENT,
68};
69
70/** Universal abstraction over data input for scanf. */
71typedef struct __input_provider {
72 /** Source of data elements. */
73 _data_source source;
74 /** How many elements was already processed. */
75 int consumed;
76 /** How many elements was already fetched from the source. */
77 int fetched;
78 /** Elements are fetched from the source in batches (e.g. by getline())
79 * to allow using strtol/strtod family even on streams. */
80 char *window;
81 /** Size of the current window. */
82 size_t window_size;
83 /** Points to the next element to be processed inside the current window. */
84 const char *cursor;
85 /** Internal state of the provider. */
86 int state;
87
88 /** Take control over data source. Finish initialization of the internal
89 * structures (e.g. allocation of window). */
90 void (*capture)(struct __input_provider *);
91 /** Get a single element from the source and update the internal structures
92 * accordingly (e.g. greedy update of the window). Return -1 if the
93 * element cannot be obtained. */
94 int (*pop)(struct __input_provider *);
95 /** Undo the most recent not-undone pop operation. Might be necesarry to
96 * flush current window and seek data source backwards. Return 0 if the
97 * pop history is exhausted, non-zero on success. */
98 int (*undo)(struct __input_provider *);
99 /** Lend the cursor to the caller. */
100 const char * (*borrow_cursor)(struct __input_provider *);
101 /** Take control over possibly incremented cursor and update the internal
102 * structures if necessary. */
103 void (*return_cursor)(struct __input_provider *, const char *);
104 /** Release the control over the source. That is, synchronize any
105 * fetched but non-consumed elements (e.g. by seeking) and destruct
106 * internal structures (e.g. window deallocation). */
107 void (*release)(struct __input_provider *);
108} _input_provider;
109
110/** @see __input_provider */
111static void _capture_stream(_input_provider *self)
112{
113 assert(self->source.stream);
114 assert(self->state == _PROV_CONSTRUCTED);
115 /* Caller could already pre-allocated the window. */
116 assert((self->window == NULL && self->window_size == 0) ||
[12fb8498]117 (self->window && self->window_size > 0));
[08053f7]118
119 /* Initialize internal structures. */
120 self->consumed = 0;
121 ssize_t fetched = posix_getline(
[12fb8498]122 &self->window, &self->window_size, self->source.stream);
[08053f7]123 if (fetched != -1) {
124 self->fetched = fetched;
125 self->cursor = self->window;
126 } else {
127 /* EOF encountered. */
128 self->fetched = 0;
129 self->cursor = NULL;
130 }
131 self->state = _PROV_READY;
132}
133
134/** @see __input_provider */
135static void _capture_string(_input_provider *self)
136{
137 assert(self->source.string);
138 assert(self->state == _PROV_CONSTRUCTED);
139
140 /* Initialize internal structures. */
141 self->consumed = 0;
142 self->fetched = posix_strlen(self->source.string);
143 self->window = (char *) self->source.string;
144 self->window_size = self->fetched + 1;
145 self->cursor = self->window;
146 self->state = _PROV_READY;
147}
148
149/** @see __input_provider */
150static int _pop_stream(_input_provider *self)
151{
152 assert(self->state == _PROV_READY);
153
154 if (self->cursor) {
155 int c = *self->cursor;
156 ++self->consumed;
157 ++self->cursor;
158 /* Do we need to fetch a new line from the source? */
159 if (*self->cursor == '\0') {
160 ssize_t fetched = posix_getline(&self->window,
[12fb8498]161 &self->window_size, self->source.stream);
[08053f7]162 if (fetched != -1) {
163 self->fetched += fetched;
164 self->cursor = self->window;
165 } else {
166 /* EOF encountered. */
167 self->cursor = NULL;
168 }
169 }
170 return c;
171 } else {
172 /* Already at EOF. */
173 return -1;
174 }
175}
176
177/** @see __input_provider */
178static int _pop_string(_input_provider *self)
179{
180 assert(self->state == _PROV_READY);
181
182 if (*self->cursor != '\0') {
183 int c = *self->cursor;
184 ++self->consumed;
185 ++self->cursor;
186 return c;
187 } else {
188 /* String depleted. */
189 return -1;
190 }
191}
192
193/** @see __input_provider */
194static int _undo_stream(_input_provider *self)
195{
196 assert(self->state == _PROV_READY);
197
198 if (self->consumed == 0) {
199 /* Undo history exhausted. */
200 return 0;
201 }
202
203 if (!self->cursor || self->window == self->cursor) {
204 /* Complex case. Either at EOF (cursor == NULL) or there is no more
205 * place to retreat to inside the window. Seek the source backwards
206 * and flush the window. Regarding the scanf, this could happend only
207 * when matching unbounded string (%s) or unbounded scanset (%[) not
[9c1984f]208 * containing newline, while at the same time newline is the character
[08053f7]209 * that breaks the matching process. */
210 int rc = posix_fseek(
211 self->source.stream, -1, SEEK_CUR);
212 if (rc == -1) {
213 /* Seek failed. */
214 return 0;
215 }
216 ssize_t fetched = posix_getline(&self->window,
[12fb8498]217 &self->window_size, self->source.stream);
[08053f7]218 if (fetched != -1) {
219 assert(fetched == 1);
220 self->fetched = self->consumed + 1;
221 self->cursor = self->window;
222 } else {
223 /* Stream is broken. */
224 return 0;
225 }
226 } else {
227 /* Simple case. Still inside window. */
228 --self->cursor;
229 }
230 --self->consumed;
231 return 1; /* Success. */
232}
233
234/** @see __input_provider */
235static int _undo_string(_input_provider *self)
236{
237 assert(self->state == _PROV_READY);
238
239 if (self->consumed > 0) {
240 --self->consumed;
241 --self->cursor;
242 } else {
243 /* Undo history exhausted. */
244 return 0;
245 }
246 return 1; /* Success. */
247}
248
249/** @see __input_provider */
250static const char *_borrow_cursor_universal(_input_provider *self)
251{
252 assert(self->state == _PROV_READY);
253
254 self->state = _PROV_CURSOR_LENT;
255 return self->cursor;
256}
257
258/** @see __input_provider */
259static void _return_cursor_stream(_input_provider *self, const char *cursor)
260{
261 assert(self->state == _PROV_CURSOR_LENT);
262
263 /* Check how much of the window did external entity consumed. */
264 self->consumed += cursor - self->cursor;
265 self->cursor = cursor;
266 if (*self->cursor == '\0') {
267 /* Window was completely consumed, fetch new data. */
268 ssize_t fetched = posix_getline(&self->window,
[12fb8498]269 &self->window_size, self->source.stream);
[08053f7]270 if (fetched != -1) {
271 self->fetched += fetched;
272 self->cursor = self->window;
273 } else {
274 /* EOF encountered. */
275 self->cursor = NULL;
276 }
277 }
278 self->state = _PROV_READY;
279}
280
281/** @see __input_provider */
282static void _return_cursor_string(_input_provider *self, const char *cursor)
283{
284 assert(self->state == _PROV_CURSOR_LENT);
285
286 /* Check how much of the window did external entity consumed. */
287 self->consumed += cursor - self->cursor;
288 self->cursor = cursor;
289 self->state = _PROV_READY;
290}
291
292/** @see __input_provider */
293static void _release_stream(_input_provider *self)
294{
295 assert(self->state == _PROV_READY);
296 assert(self->consumed >= self->fetched);
297
298 /* Try to correct the difference between the stream position and what was
299 * actually consumed. If it is not possible, continue anyway. */
300 posix_fseek(self->source.stream, self->consumed - self->fetched, SEEK_CUR);
301
302 /* Destruct internal structures. */
303 self->fetched = 0;
304 self->cursor = NULL;
[a12f7f1]305 if (self->window) {
306 free(self->window);
307 self->window = NULL;
308 }
[08053f7]309 self->window_size = 0;
310 self->state = _PROV_CONSTRUCTED;
311}
312
313/** @see __input_provider */
314static void _release_string(_input_provider *self)
315{
316 assert(self->state == _PROV_READY);
317
318 /* Destruct internal structures. */
319 self->fetched = 0;
320 self->cursor = NULL;
321 self->window = NULL;
322 self->window_size = 0;
323 self->state = _PROV_CONSTRUCTED;
324}
325
326/** Length modifier values. */
327enum {
328 LMOD_NONE,
329 LMOD_hh,
330 LMOD_h,
331 LMOD_l,
332 LMOD_ll,
333 LMOD_j,
334 LMOD_z,
335 LMOD_t,
336 LMOD_L,
337 LMOD_p, /* Reserved for %p conversion. */
338};
339
340/**
341 * Decides whether provided characters specify length modifier. If so, the
342 * recognized modifier is stored through provider pointer.
343 *
344 * @param c Candidate on the length modifier.
345 * @param _c Next character (might be NUL).
346 * @param modifier Pointer to the modifier value.
347 * @return Whether the modifier was recognized or not.
348 */
349static inline int is_length_mod(int c, int _c, int *modifier)
350{
351 assert(modifier);
352
353 switch (c) {
354 case 'h':
[9c1984f]355 /* Check whether the modifier was not already recognized. */
356 if (*modifier == LMOD_NONE) {
357 *modifier = _c == 'h' ? LMOD_hh : LMOD_h;
358 } else {
359 /* Format string is invalid. Notify the caller. */
360 *modifier = LMOD_NONE;
361 }
[08053f7]362 return 1;
363 case 'l':
[9c1984f]364 if (*modifier == LMOD_NONE) {
365 *modifier = _c == 'l' ? LMOD_ll : LMOD_l;
366 } else {
367 *modifier = LMOD_NONE;
368 }
[08053f7]369 return 1;
370 case 'j':
[9c1984f]371 *modifier = *modifier == LMOD_NONE ? LMOD_j : LMOD_NONE;
[08053f7]372 return 1;
373 case 'z':
[9c1984f]374 *modifier = *modifier == LMOD_NONE ? LMOD_z : LMOD_NONE;
[08053f7]375 return 1;
376 case 't':
[9c1984f]377 *modifier = *modifier == LMOD_NONE ? LMOD_t : LMOD_NONE;
[08053f7]378 return 1;
379 case 'L':
[9c1984f]380 *modifier = *modifier == LMOD_NONE ? LMOD_L : LMOD_NONE;
[08053f7]381 return 1;
382 default:
383 return 0;
384 }
385}
386
387/**
388 * Decides whether provided character specifies integer conversion. If so, the
389 * semantics of the conversion is stored through provided pointers..
390 *
391 * @param c Candidate on the integer conversion.
392 * @param is_unsigned Pointer to store whether the conversion is signed or not.
393 * @param base Pointer to store the base of the integer conversion.
394 * @return Whether the conversion was recognized or not.
395 */
396static inline int is_int_conv(int c, bool *is_unsigned, int *base)
397{
398 assert(is_unsigned && base);
399
400 switch (c) {
401 case 'd':
402 *is_unsigned = false;
403 *base = 10;
404 return 1;
405 case 'i':
406 *is_unsigned = false;
407 *base = 0;
408 return 1;
409 case 'o':
410 *is_unsigned = true;
411 *base = 8;
412 return 1;
413 case 'u':
414 *is_unsigned = true;
415 *base = 10;
416 return 1;
417 case 'p': /* According to POSIX, %p modifier is implementation defined but
418 * must correspond to its printf counterpart. */
419 case 'x':
420 case 'X':
421 *is_unsigned = true;
422 *base = 16;
423 return 1;
424 return 1;
425 default:
426 return 0;
427 }
428}
429
430/**
431 * Decides whether provided character specifies conversion of the floating
432 * point number.
433 *
434 * @param c Candidate on the floating point conversion.
435 * @return Whether the conversion was recognized or not.
436 */
437static inline int is_float_conv(int c)
438{
439 switch (c) {
440 case 'a':
441 case 'A':
442 case 'e':
443 case 'E':
444 case 'f':
445 case 'F':
446 case 'g':
447 case 'G':
448 return 1;
449 default:
450 return 0;
451 }
452}
453
454/**
455 * Decides whether provided character specifies conversion of the character
456 * sequence.
457 *
458 * @param c Candidate on the character sequence conversion.
459 * @param modifier Pointer to store length modifier for wide chars.
460 * @return Whether the conversion was recognized or not.
461 */
462static inline int is_seq_conv(int c, int *modifier)
463{
464 assert(modifier);
465
466 switch (c) {
467 case 'S':
468 *modifier = LMOD_l;
[dc12262]469 /* Fallthrough */
[08053f7]470 case 's':
471 return 1;
472 case 'C':
473 *modifier = LMOD_l;
[dc12262]474 /* Fallthrough */
[08053f7]475 case 'c':
476 return 1;
477 case '[':
478 return 1;
479 default:
480 return 0;
481 }
482}
483
484/**
485 * Backend for the whole family of scanf functions. Uses input provider
486 * to abstract over differences between strings and streams. Should be
487 * POSIX compliant (apart from the not supported stuff).
488 *
489 * NOT SUPPORTED: locale (see strtold), wide chars, numbered output arguments
490 *
491 * @param in Input provider.
492 * @param fmt Format description.
493 * @param arg Output arguments.
494 * @return The number of converted output items or EOF on failure.
495 */
496static inline int _internal_scanf(
497 _input_provider *in, const char *restrict fmt, va_list arg)
498{
499 int c = -1;
500 int converted_cnt = 0;
501 bool converting = false;
502 bool matching_failure = false;
503
504 bool assign_supress = false;
505 bool assign_alloc = false;
506 long width = -1;
507 int length_mod = LMOD_NONE;
508 bool int_conv_unsigned = false;
509 int int_conv_base = 0;
510
511 /* Buffers allocated by scanf for optional 'm' specifier must be remembered
512 * to deallocaate them in case of an error. Because each of those buffers
513 * corresponds to one of the argument from va_list, there is an upper bound
514 * on the number of those arguments. In case of C99, this uppper bound is
515 * 127 arguments. */
516 char *buffers[127];
517 for (int i = 0; i < 127; ++i) {
518 buffers[i] = NULL;
519 }
520 int next_unused_buffer_idx = 0;
521
522 in->capture(in);
523
524 /* Interpret format string. Control shall prematurely jump from the cycle
525 * on input failure, matching failure or illegal format string. In order
526 * to keep error reporting simple enough and to keep input consistent,
527 * error condition shall be always manifested as jump from the cycle,
528 * not function return. Format string pointer shall be updated specifically
529 * for each sub-case (i.e. there shall be no loop-wide increment).*/
530 while (*fmt) {
531
532 if (converting) {
533
534 /* Processing inside conversion specifier. Either collect optional
535 * parameters or execute the conversion. When the conversion
536 * is successfully completed, increment conversion count and switch
537 * back to normal mode. */
538 if (*fmt == '*') {
539 /* Assignment-supression (optional). */
540 if (assign_supress) {
541 /* Already set. Illegal format string. */
542 break;
543 }
544 assign_supress = true;
545 ++fmt;
546 } else if (*fmt == 'm') {
547 /* Assignment-allocation (optional). */
548 if (assign_alloc) {
549 /* Already set. Illegal format string. */
550 break;
551 }
552 assign_alloc = true;
553 ++fmt;
554 } else if (*fmt == '$') {
555 /* Reference to numbered output argument. */
556 // TODO
557 not_implemented();
558 } else if (isdigit(*fmt)) {
559 /* Maximum field length (optional). */
560 if (width != -1) {
561 /* Already set. Illegal format string. */
562 break;
563 }
564 char *fmt_new = NULL;
[d39c46e0]565 width = strtol(fmt, &fmt_new, 10);
[08053f7]566 if (width != 0) {
567 fmt = fmt_new;
568 } else {
569 /* Since POSIX requires width to be non-zero, it is
570 * sufficient to interpret zero width as error without
571 * referring to errno. */
572 break;
573 }
574 } else if (is_length_mod(*fmt, *(fmt + 1), &length_mod)) {
575 /* Length modifier (optional). */
576 if (length_mod == LMOD_NONE) {
577 /* Already set. Illegal format string. The actual detection
578 * is carried out in the is_length_mod(). */
579 break;
580 }
581 if (length_mod == LMOD_hh || length_mod == LMOD_ll) {
582 /* Modifier was two characters long. */
583 ++fmt;
584 }
585 ++fmt;
586 } else if (is_int_conv(*fmt, &int_conv_unsigned, &int_conv_base)) {
587 /* Integer conversion. */
588
589 /* Check sanity of optional parts of conversion specifier. */
590 if (assign_alloc || length_mod == LMOD_L) {
591 /* Illegal format string. */
592 break;
593 }
594
595 /* Conversion of the integer with %p specifier needs special
596 * handling, because it is not allowed to have arbitrary
597 * length modifier. */
598 if (*fmt == 'p') {
599 if (length_mod == LMOD_NONE) {
600 length_mod = LMOD_p;
601 } else {
602 /* Already set. Illegal format string. */
603 break;
604 }
605 }
606
607 /* First consume any white spaces, so we can borrow cursor
608 * from the input provider. This way, the cursor will either
609 * point to the non-white space while the input will be
610 * prefetched up to the newline (which is suitable for strtol),
611 * or the input will be at EOF. */
612 do {
613 c = in->pop(in);
614 } while (isspace(c));
615
616 /* After skipping the white spaces, can we actually continue? */
617 if (c == -1) {
618 /* Input failure. */
619 break;
620 } else {
621 /* Everything is OK, just undo the last pop, so the cursor
622 * can be borrowed. */
623 in->undo(in);
624 }
[9c1984f]625
[08053f7]626 const char *cur_borrowed = NULL;
[33b8d024]627 char *cur_duplicated = NULL;
[08053f7]628 const char *cur_limited = NULL;
[33b8d024]629 const char *cur_updated = NULL;
[08053f7]630
631 /* Borrow the cursor. Until it is returned to the provider
632 * we cannot jump from the cycle, because it would leave
633 * the input inconsistent. */
634 cur_borrowed = in->borrow_cursor(in);
635
636 /* If the width is limited, the cursor horizont must be
637 * decreased accordingly. Otherwise the strtol could read more
638 * than allowed by width. */
639 if (width != -1) {
[33b8d024]640 cur_duplicated = posix_strndup(cur_borrowed, width);
641 cur_limited = cur_duplicated;
[08053f7]642 } else {
643 cur_limited = cur_borrowed;
644 }
[33b8d024]645 cur_updated = cur_limited;
[08053f7]646
647 long long sres = 0;
648 unsigned long long ures = 0;
649 errno = 0; /* Reset errno to recognize error later. */
650 /* Try to convert the integer. */
651 if (int_conv_unsigned) {
[33b8d024]652 ures = strtoull(cur_limited, (char **) &cur_updated, int_conv_base);
[08053f7]653 } else {
[33b8d024]654 sres = strtoll(cur_limited, (char **) &cur_updated, int_conv_base);
[08053f7]655 }
656
657 /* Update the cursor so it can be returned to the provider. */
658 cur_borrowed += cur_updated - cur_limited;
[33b8d024]659 if (cur_duplicated != NULL) {
[08053f7]660 /* Deallocate duplicated part of the cursor view. */
[33b8d024]661 free(cur_duplicated);
[08053f7]662 }
663 cur_limited = NULL;
664 cur_updated = NULL;
[33b8d024]665 cur_duplicated = NULL;
[08053f7]666 /* Return the cursor to the provider. Input consistency is again
667 * the job of the provider, so we can report errors from
668 * now on. */
669 in->return_cursor(in, cur_borrowed);
670 cur_borrowed = NULL;
671
672 /* Check whether the conversion was successful. */
673 if (errno != EOK) {
674 matching_failure = true;
675 break;
676 }
677
[9c1984f]678 /* If not supressed, assign the converted integer into
[08053f7]679 * the next output argument. */
680 if (!assign_supress) {
681 if (int_conv_unsigned) {
682 switch (length_mod) {
683 case LMOD_hh: ; /* Label cannot be part of declaration. */
684 unsigned char *phh = va_arg(arg, unsigned char *);
685 *phh = (unsigned char) ures;
686 break;
687 case LMOD_h: ;
688 unsigned short *ph = va_arg(arg, unsigned short *);
689 *ph = (unsigned short) ures;
690 break;
691 case LMOD_NONE: ;
692 unsigned *pdef = va_arg(arg, unsigned *);
693 *pdef = (unsigned) ures;
694 break;
695 case LMOD_l: ;
696 unsigned long *pl = va_arg(arg, unsigned long *);
697 *pl = (unsigned long) ures;
698 break;
699 case LMOD_ll: ;
700 unsigned long long *pll = va_arg(arg, unsigned long long *);
701 *pll = (unsigned long long) ures;
702 break;
703 case LMOD_j: ;
[d6c98451]704 uintmax_t *pj = va_arg(arg, uintmax_t *);
705 *pj = (uintmax_t) ures;
[08053f7]706 break;
707 case LMOD_z: ;
708 size_t *pz = va_arg(arg, size_t *);
709 *pz = (size_t) ures;
710 break;
711 case LMOD_t: ;
[6921178]712 // XXX: What is unsigned counterpart of the ptrdiff_t?
[08053f7]713 size_t *pt = va_arg(arg, size_t *);
714 *pt = (size_t) ures;
715 break;
716 case LMOD_p: ;
717 void **pp = va_arg(arg, void **);
718 *pp = (void *) (uintptr_t) ures;
719 break;
720 default:
721 assert(false);
722 }
723 } else {
724 switch (length_mod) {
725 case LMOD_hh: ; /* Label cannot be part of declaration. */
726 signed char *phh = va_arg(arg, signed char *);
727 *phh = (signed char) sres;
728 break;
729 case LMOD_h: ;
730 short *ph = va_arg(arg, short *);
731 *ph = (short) sres;
732 break;
733 case LMOD_NONE: ;
734 int *pdef = va_arg(arg, int *);
735 *pdef = (int) sres;
736 break;
737 case LMOD_l: ;
738 long *pl = va_arg(arg, long *);
739 *pl = (long) sres;
740 break;
741 case LMOD_ll: ;
742 long long *pll = va_arg(arg, long long *);
743 *pll = (long long) sres;
744 break;
745 case LMOD_j: ;
[d6c98451]746 intmax_t *pj = va_arg(arg, intmax_t *);
747 *pj = (intmax_t) sres;
[08053f7]748 break;
749 case LMOD_z: ;
750 ssize_t *pz = va_arg(arg, ssize_t *);
751 *pz = (ssize_t) sres;
752 break;
753 case LMOD_t: ;
[fbf4dc1]754 ptrdiff_t *pt = va_arg(arg, ptrdiff_t *);
755 *pt = (ptrdiff_t) sres;
[08053f7]756 break;
757 default:
758 assert(false);
759 }
760 }
761 ++converted_cnt;
762 }
763
764 converting = false;
765 ++fmt;
766 } else if (is_float_conv(*fmt)) {
767 /* Floating point number conversion. */
768
769 /* Check sanity of optional parts of conversion specifier. */
770 if (assign_alloc) {
771 /* Illegal format string. */
772 break;
773 }
774 if (length_mod != LMOD_NONE &&
775 length_mod != LMOD_l &&
776 length_mod != LMOD_L) {
777 /* Illegal format string. */
778 break;
779 }
780
781 /* First consume any white spaces, so we can borrow cursor
782 * from the input provider. This way, the cursor will either
783 * point to the non-white space while the input will be
784 * prefetched up to the newline (which is suitable for strtof),
785 * or the input will be at EOF. */
786 do {
787 c = in->pop(in);
788 } while (isspace(c));
789
790 /* After skipping the white spaces, can we actually continue? */
791 if (c == -1) {
792 /* Input failure. */
793 break;
794 } else {
795 /* Everything is OK, just undo the last pop, so the cursor
796 * can be borrowed. */
797 in->undo(in);
798 }
799
800 const char *cur_borrowed = NULL;
801 const char *cur_limited = NULL;
[33b8d024]802 char *cur_duplicated = NULL;
803 const char *cur_updated = NULL;
[08053f7]804
805 /* Borrow the cursor. Until it is returned to the provider
806 * we cannot jump from the cycle, because it would leave
807 * the input inconsistent. */
808 cur_borrowed = in->borrow_cursor(in);
809
810 /* If the width is limited, the cursor horizont must be
811 * decreased accordingly. Otherwise the strtof could read more
812 * than allowed by width. */
813 if (width != -1) {
[33b8d024]814 cur_duplicated = posix_strndup(cur_borrowed, width);
815 cur_limited = cur_duplicated;
[08053f7]816 } else {
817 cur_limited = cur_borrowed;
818 }
[33b8d024]819 cur_updated = cur_limited;
[08053f7]820
821 float fres = 0.0;
822 double dres = 0.0;
823 long double ldres = 0.0;
824 errno = 0; /* Reset errno to recognize error later. */
825 /* Try to convert the floating point nubmer. */
826 switch (length_mod) {
827 case LMOD_NONE:
[33b8d024]828 fres = posix_strtof(cur_limited, (char **) &cur_updated);
[08053f7]829 break;
830 case LMOD_l:
[33b8d024]831 dres = posix_strtod(cur_limited, (char **) &cur_updated);
[08053f7]832 break;
833 case LMOD_L:
[33b8d024]834 ldres = posix_strtold(cur_limited, (char **) &cur_updated);
[08053f7]835 break;
836 default:
837 assert(false);
838 }
839
840 /* Update the cursor so it can be returned to the provider. */
841 cur_borrowed += cur_updated - cur_limited;
[33b8d024]842 if (cur_duplicated != NULL) {
[08053f7]843 /* Deallocate duplicated part of the cursor view. */
[33b8d024]844 free(cur_duplicated);
[08053f7]845 }
846 cur_limited = NULL;
847 cur_updated = NULL;
848 /* Return the cursor to the provider. Input consistency is again
849 * the job of the provider, so we can report errors from
850 * now on. */
851 in->return_cursor(in, cur_borrowed);
852 cur_borrowed = NULL;
853
854 /* Check whether the conversion was successful. */
855 if (errno != EOK) {
856 matching_failure = true;
857 break;
858 }
859
860 /* If nto supressed, assign the converted floating point number
861 * into the next output argument. */
862 if (!assign_supress) {
863 switch (length_mod) {
864 case LMOD_NONE: ; /* Label cannot be part of declaration. */
865 float *pf = va_arg(arg, float *);
866 *pf = fres;
867 break;
868 case LMOD_l: ;
869 double *pd = va_arg(arg, double *);
870 *pd = dres;
871 break;
872 case LMOD_L: ;
873 long double *pld = va_arg(arg, long double *);
874 *pld = ldres;
875 break;
876 default:
877 assert(false);
878 }
879 ++converted_cnt;
880 }
881
882 converting = false;
883 ++fmt;
884 } else if (is_seq_conv(*fmt, &length_mod)) {
885 /* Character sequence conversion. */
886
887 /* Check sanity of optional parts of conversion specifier. */
888 if (length_mod != LMOD_NONE &&
889 length_mod != LMOD_l) {
890 /* Illegal format string. */
891 break;
892 }
893
894 if (length_mod == LMOD_l) {
895 /* Wide chars not supported. */
896 // TODO
897 not_implemented();
898 }
899
900 int term_size = 1; /* Size of the terminator (0 or 1)). */
901 if (*fmt == 'c') {
902 term_size = 0;
903 width = width == -1 ? 1 : width;
904 }
905
906 if (*fmt == 's') {
907 /* Skip white spaces. */
908 do {
909 c = in->pop(in);
910 } while (isspace(c));
911 } else {
912 /* Fetch a single character. */
913 c = in->pop(in);
914 }
915
916 /* Check whether there is still input to read. */
917 if (c == -1) {
918 /* Input failure. */
919 break;
920 }
921
922 /* Prepare scanset. */
923 char terminate_on[256];
924 for (int i = 0; i < 256; ++i) {
925 terminate_on[i] = 0;
926 }
927 if (*fmt == 'c') {
928 ++fmt;
929 } else if (*fmt == 's') {
930 terminate_on[' '] = 1;
931 terminate_on['\n'] = 1;
932 terminate_on['\t'] = 1;
933 terminate_on['\f'] = 1;
934 terminate_on['\r'] = 1;
935 terminate_on['\v'] = 1;
936 ++fmt;
937 } else {
938 assert(*fmt == '[');
939 bool not = false;
940 bool dash = false;
941 ++fmt;
942 /* Check for negation. */
943 if (*fmt == '^') {
944 not = true;
945 ++fmt;
946 }
947 /* Check for escape sequences. */
948 if (*fmt == '-' || *fmt == ']') {
949 terminate_on[(int) *fmt] = 1;
950 ++fmt;
951 }
952 /* Check for ordinary characters and ranges. */
953 while (*fmt != '\0' && *fmt != ']') {
954 if (dash) {
955 for (char chr = *(fmt - 2); chr <= *fmt; ++chr) {
956 terminate_on[(int) chr] = 1;
957 }
958 dash = false;
959 } else if (*fmt == '-') {
960 dash = true;
961 } else {
962 terminate_on[(int) *fmt] = 1;
963 }
964 ++fmt;
965 }
966 /* Check for escape sequence. */
967 if (dash == true) {
968 terminate_on['-'] = 1;
969 }
970 /* Check whether the specifier was correctly terminated.*/
971 if (*fmt == '\0') {
972 /* Illegal format string. */
973 break;
974 } else {
975 ++fmt;
976 }
977 /* Inverse the scanset if necessary. */
978 if (not == false) {
979 for (int i = 0; i < 256; ++i) {
980 terminate_on[i] = terminate_on[i] ? 0 : 1;
981 }
982 }
983 }
984
985 char * buf = NULL;
986 size_t buf_size = 0;
987 char * cur = NULL;
988 size_t alloc_step = 80; /* Buffer size gain during reallocation. */
989 int my_buffer_idx = 0;
990
991 /* Retrieve the buffer into which popped characters
992 * will be stored. */
993 if (!assign_supress) {
994 if (assign_alloc) {
995 /* We must allocate our own buffer. */
996 buf_size =
997 width == -1 ? alloc_step : (size_t) width + term_size;
998 buf = malloc(buf_size);
999 if (!buf) {
1000 /* No memory. */
1001 break;
1002 }
1003 my_buffer_idx = next_unused_buffer_idx;
1004 ++next_unused_buffer_idx;
1005 buffers[my_buffer_idx] = buf;
1006 cur = buf;
1007 } else {
1008 /* Caller provided its buffer. */
1009 buf = va_arg(arg, char *);
1010 cur = buf;
1011 buf_size =
1012 width == -1 ? SIZE_MAX : (size_t) width + term_size;
1013 }
1014 }
1015
1016 /* Match the string. The next character is already popped. */
1017 while ((width == -1 || width > 0) && c != -1 && !terminate_on[c]) {
1018
1019 /* Check whether the buffer is still sufficiently large. */
1020 if (!assign_supress) {
1021 /* Always reserve space for the null terminator. */
1022 if (cur == buf + buf_size - term_size) {
1023 /* Buffer size must be increased. */
1024 buf = realloc(buf, buf_size + alloc_step);
1025 if (buf) {
1026 buffers[my_buffer_idx] = buf;
1027 cur = buf + buf_size - term_size;
1028 buf_size += alloc_step;
1029 } else {
1030 /* Break just from this tight loop. Errno will
1031 * be checked after it. */
1032 break;
1033 }
1034 }
1035 /* Store the input character. */
1036 *cur = c;
1037 }
1038
1039 width = width == -1 ? -1 : width - 1;
1040 ++cur;
1041 c = in->pop(in);
1042 }
1043 if (errno == ENOMEM) {
1044 /* No memory. */
1045 break;
1046 }
1047 if (c != -1) {
1048 /* There is still more input, so undo the last pop. */
1049 in->undo(in);
1050 }
1051
1052 /* Check for failures. */
1053 if (cur == buf) {
1054 /* Matching failure. Input failure was already checked
1055 * earlier. */
1056 matching_failure = true;
1057 if (!assign_supress && assign_alloc) {
1058 /* Roll back. */
1059 free(buf);
1060 buffers[my_buffer_idx] = NULL;
1061 --next_unused_buffer_idx;
1062 }
1063 break;
1064 }
1065
1066 /* Store the terminator. */
1067 if (!assign_supress && term_size > 0) {
1068 /* Space for the terminator was reserved. */
1069 *cur = '\0';
1070 }
1071
1072 /* Store the result if not already stored. */
1073 if (!assign_supress) {
1074 if (assign_alloc) {
1075 char **pbuf = va_arg(arg, char **);
1076 *pbuf = buf;
1077 }
1078 ++converted_cnt;
1079 }
1080
1081 converting = false;
1082 /* Format string pointer already incremented. */
1083 } else if (*fmt == 'n') {
1084 /* Report the number of consumed bytes so far. */
1085
1086 /* Sanity check. */
1087 bool sane =
1088 width == -1 &&
[12fb8498]1089 length_mod == LMOD_NONE &&
1090 assign_alloc == false &&
1091 assign_supress == false;
[08053f7]1092
1093 if (sane) {
1094 int *pi = va_arg(arg, int *);
1095 *pi = in->consumed;
1096 } else {
1097 /* Illegal format string. */
1098 break;
1099 }
1100
1101 /* This shall not be counted as conversion. */
1102 converting = false;
1103 ++fmt;
1104 } else {
1105 /* Illegal format string. */
1106 break;
1107 }
1108
1109 } else {
1110
1111 /* Processing outside conversion specifier. Either skip white
1112 * spaces or match characters one by one. If conversion specifier
1113 * is detected, switch to coversion mode. */
1114 if (isspace(*fmt)) {
1115 /* Skip white spaces in the format string. */
1116 while (isspace(*fmt)) {
1117 ++fmt;
1118 }
1119 /* Skip white spaces in the input. */
1120 do {
1121 c = in->pop(in);
1122 } while (isspace(c));
1123 if (c != -1) {
1124 /* Input is not at EOF, so undo the last pop operation. */
1125 in->undo(in);
1126 }
1127 } else if (*fmt == '%' && *(fmt + 1) != '%') {
1128 /* Conversion specifier detected. Switch modes. */
1129 converting = true;
1130 /* Reset the conversion context. */
1131 assign_supress = false;
1132 assign_alloc = false;
1133 width = -1;
1134 length_mod = LMOD_NONE;
1135 int_conv_unsigned = false;
1136 int_conv_base = 0;
1137 ++fmt;
1138 } else {
1139 /* One by one matching. */
1140 if (*fmt == '%') {
1141 /* Escape sequence detected. */
1142 ++fmt;
1143 assert(*fmt == '%');
1144 }
1145 c = in->pop(in);
1146 if (c == -1) {
1147 /* Input failure. */
1148 break;
1149 } else if (c != *fmt) {
1150 /* Matching failure. */
1151 in->undo(in);
1152 matching_failure = true;
1153 break;
1154 } else {
1155 ++fmt;
1156 }
1157 }
1158
1159 }
1160
1161 }
1162
1163 in->release(in);
1164
1165 /* This somewhat complicated return value decision is required by POSIX. */
1166 int rc;
1167 if (matching_failure) {
1168 rc = converted_cnt;
1169 } else {
1170 if (errno == EOK) {
1171 rc = converted_cnt > 0 ? converted_cnt : EOF;
1172 } else {
1173 rc = EOF;
1174 }
1175 }
1176 if (rc == EOF) {
1177 /* Caller will not know how many arguments were successfully converted,
1178 * so the deallocation of buffers is our responsibility. */
1179 for (int i = 0; i < next_unused_buffer_idx; ++i) {
1180 free(buffers[i]);
1181 buffers[i] = NULL;
1182 }
1183 next_unused_buffer_idx = 0;
1184 }
1185 return rc;
1186}
1187
1188/**
1189 * Convert formatted input from the stream.
1190 *
1191 * @param stream Input stream.
1192 * @param format Format description.
1193 * @param arg Output items.
1194 * @return The number of converted output items or EOF on failure.
1195 */
1196int posix_vfscanf(
1197 FILE *restrict stream, const char *restrict format, va_list arg)
1198{
1199 _input_provider provider = {
1200 { 0 }, 0, 0, NULL, 0, NULL, _PROV_CONSTRUCTED,
1201 _capture_stream, _pop_stream, _undo_stream,
1202 _borrow_cursor_universal, _return_cursor_stream, _release_stream
1203 };
1204 provider.source.stream = stream;
1205 return _internal_scanf(&provider, format, arg);
1206}
1207
1208/**
1209 * Convert formatted input from the string.
1210 *
1211 * @param s Input string.
1212 * @param format Format description.
1213 * @param arg Output items.
1214 * @return The number of converted output items or EOF on failure.
1215 */
1216int posix_vsscanf(
1217 const char *restrict s, const char *restrict format, va_list arg)
1218{
1219 _input_provider provider = {
1220 { 0 }, 0, 0, NULL, 0, NULL, _PROV_CONSTRUCTED,
1221 _capture_string, _pop_string, _undo_string,
1222 _borrow_cursor_universal, _return_cursor_string, _release_string
1223 };
1224 provider.source.string = s;
1225 return _internal_scanf(&provider, format, arg);
1226}
1227
1228/** @}
1229 */
Note: See TracBrowser for help on using the repository browser.