source: mainline/uspace/lib/posix/src/stdio/scanf.c@ 8565a42

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 8565a42 was a35b458, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on _all_ lines, including empty ones, for particular file types.

Command used: tools/srepl '\s\+$' '' -- *.c *.h *.py *.sh *.s *.S *.ag

Currently, whitespace on empty lines is very inconsistent.
There are two basic choices: Either remove the whitespace, or keep empty lines
indented to the level of surrounding code. The former is AFAICT more common,
and also much easier to do automatically.

Alternatively, we could write script for automatic indentation, and use that
instead. However, if such a script exists, it's possible to use the indented
style locally, by having the editor apply relevant conversions on load/save,
without affecting remote repository. IMO, it makes more sense to adopt
the simpler rule.

  • Property mode set to 100644
File size: 32.8 KB
Line 
1/*
2 * Copyright (c) 2011 Petr Koupy
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Implementation of the scanf backend.
33 */
34
35#include <assert.h>
36
37#include <errno.h>
38
39#include "posix/stdio.h"
40#include "posix/stdlib.h"
41#include "posix/stddef.h"
42#include "posix/string.h"
43#include "posix/ctype.h"
44#include "posix/sys/types.h"
45
46#include "../internal/common.h"
47#include "libc/malloc.h"
48#include "libc/stdbool.h"
49
50/** Unified data type for possible data sources for scanf. */
51typedef union __data_source {
52 FILE *stream; /**< Input file stream. */
53 const char *string; /**< Input string. */
54} _data_source;
55
56/** Internal state of the input provider. */
57enum {
58 /** Partly constructed but not yet functional. */
59 _PROV_CONSTRUCTED,
60 /** Ready to serve any request. */
61 _PROV_READY,
62 /** Cursor is temporarily lent to the external entity. No action is
63 * possible until the cursor is returned. */
64 _PROV_CURSOR_LENT,
65};
66
67/** Universal abstraction over data input for scanf. */
68typedef struct __input_provider {
69 /** Source of data elements. */
70 _data_source source;
71 /** How many elements was already processed. */
72 int consumed;
73 /** How many elements was already fetched from the source. */
74 int fetched;
75 /** Elements are fetched from the source in batches (e.g. by getline())
76 * to allow using strtol/strtod family even on streams. */
77 char *window;
78 /** Size of the current window. */
79 size_t window_size;
80 /** Points to the next element to be processed inside the current window. */
81 const char *cursor;
82 /** Internal state of the provider. */
83 int state;
84
85 /** Take control over data source. Finish initialization of the internal
86 * structures (e.g. allocation of window). */
87 void (*capture)(struct __input_provider *);
88 /** Get a single element from the source and update the internal structures
89 * accordingly (e.g. greedy update of the window). Return -1 if the
90 * element cannot be obtained. */
91 int (*pop)(struct __input_provider *);
92 /** Undo the most recent not-undone pop operation. Might be necesarry to
93 * flush current window and seek data source backwards. Return 0 if the
94 * pop history is exhausted, non-zero on success. */
95 int (*undo)(struct __input_provider *);
96 /** Lend the cursor to the caller. */
97 const char * (*borrow_cursor)(struct __input_provider *);
98 /** Take control over possibly incremented cursor and update the internal
99 * structures if necessary. */
100 void (*return_cursor)(struct __input_provider *, const char *);
101 /** Release the control over the source. That is, synchronize any
102 * fetched but non-consumed elements (e.g. by seeking) and destruct
103 * internal structures (e.g. window deallocation). */
104 void (*release)(struct __input_provider *);
105} _input_provider;
106
107/** @see __input_provider */
108static void _capture_stream(_input_provider *self)
109{
110 assert(self->source.stream);
111 assert(self->state == _PROV_CONSTRUCTED);
112 /* Caller could already pre-allocated the window. */
113 assert((self->window == NULL && self->window_size == 0) ||
114 (self->window && self->window_size > 0));
115
116 /* Initialize internal structures. */
117 self->consumed = 0;
118 ssize_t fetched = getline(
119 &self->window, &self->window_size, self->source.stream);
120 if (fetched != -1) {
121 self->fetched = fetched;
122 self->cursor = self->window;
123 } else {
124 /* EOF encountered. */
125 self->fetched = 0;
126 self->cursor = NULL;
127 }
128 self->state = _PROV_READY;
129}
130
131/** @see __input_provider */
132static void _capture_string(_input_provider *self)
133{
134 assert(self->source.string);
135 assert(self->state == _PROV_CONSTRUCTED);
136
137 /* Initialize internal structures. */
138 self->consumed = 0;
139 self->fetched = strlen(self->source.string);
140 self->window = (char *) self->source.string;
141 self->window_size = self->fetched + 1;
142 self->cursor = self->window;
143 self->state = _PROV_READY;
144}
145
146/** @see __input_provider */
147static int _pop_stream(_input_provider *self)
148{
149 assert(self->state == _PROV_READY);
150
151 if (self->cursor) {
152 int c = *self->cursor;
153 ++self->consumed;
154 ++self->cursor;
155 /* Do we need to fetch a new line from the source? */
156 if (*self->cursor == '\0') {
157 ssize_t fetched = getline(&self->window,
158 &self->window_size, self->source.stream);
159 if (fetched != -1) {
160 self->fetched += fetched;
161 self->cursor = self->window;
162 } else {
163 /* EOF encountered. */
164 self->cursor = NULL;
165 }
166 }
167 return c;
168 } else {
169 /* Already at EOF. */
170 return -1;
171 }
172}
173
174/** @see __input_provider */
175static int _pop_string(_input_provider *self)
176{
177 assert(self->state == _PROV_READY);
178
179 if (*self->cursor != '\0') {
180 int c = *self->cursor;
181 ++self->consumed;
182 ++self->cursor;
183 return c;
184 } else {
185 /* String depleted. */
186 return -1;
187 }
188}
189
190/** @see __input_provider */
191static int _undo_stream(_input_provider *self)
192{
193 assert(self->state == _PROV_READY);
194
195 if (self->consumed == 0) {
196 /* Undo history exhausted. */
197 return 0;
198 }
199
200 if (!self->cursor || self->window == self->cursor) {
201 /* Complex case. Either at EOF (cursor == NULL) or there is no more
202 * place to retreat to inside the window. Seek the source backwards
203 * and flush the window. Regarding the scanf, this could happend only
204 * when matching unbounded string (%s) or unbounded scanset (%[) not
205 * containing newline, while at the same time newline is the character
206 * that breaks the matching process. */
207 int rc = fseek(self->source.stream, -1, SEEK_CUR);
208 if (rc == -1) {
209 /* Seek failed. */
210 return 0;
211 }
212 ssize_t fetched = getline(&self->window,
213 &self->window_size, self->source.stream);
214 if (fetched != -1) {
215 assert(fetched == 1);
216 self->fetched = self->consumed + 1;
217 self->cursor = self->window;
218 } else {
219 /* Stream is broken. */
220 return 0;
221 }
222 } else {
223 /* Simple case. Still inside window. */
224 --self->cursor;
225 }
226 --self->consumed;
227 return 1; /* Success. */
228}
229
230/** @see __input_provider */
231static int _undo_string(_input_provider *self)
232{
233 assert(self->state == _PROV_READY);
234
235 if (self->consumed > 0) {
236 --self->consumed;
237 --self->cursor;
238 } else {
239 /* Undo history exhausted. */
240 return 0;
241 }
242 return 1; /* Success. */
243}
244
245/** @see __input_provider */
246static const char *_borrow_cursor_universal(_input_provider *self)
247{
248 assert(self->state == _PROV_READY);
249
250 self->state = _PROV_CURSOR_LENT;
251 return self->cursor;
252}
253
254/** @see __input_provider */
255static void _return_cursor_stream(_input_provider *self, const char *cursor)
256{
257 assert(self->state == _PROV_CURSOR_LENT);
258
259 /* Check how much of the window did external entity consumed. */
260 self->consumed += cursor - self->cursor;
261 self->cursor = cursor;
262 if (*self->cursor == '\0') {
263 /* Window was completely consumed, fetch new data. */
264 ssize_t fetched = getline(&self->window,
265 &self->window_size, self->source.stream);
266 if (fetched != -1) {
267 self->fetched += fetched;
268 self->cursor = self->window;
269 } else {
270 /* EOF encountered. */
271 self->cursor = NULL;
272 }
273 }
274 self->state = _PROV_READY;
275}
276
277/** @see __input_provider */
278static void _return_cursor_string(_input_provider *self, const char *cursor)
279{
280 assert(self->state == _PROV_CURSOR_LENT);
281
282 /* Check how much of the window did external entity consumed. */
283 self->consumed += cursor - self->cursor;
284 self->cursor = cursor;
285 self->state = _PROV_READY;
286}
287
288/** @see __input_provider */
289static void _release_stream(_input_provider *self)
290{
291 assert(self->state == _PROV_READY);
292 assert(self->consumed >= self->fetched);
293
294 /* Try to correct the difference between the stream position and what was
295 * actually consumed. If it is not possible, continue anyway. */
296 fseek(self->source.stream, self->consumed - self->fetched, SEEK_CUR);
297
298 /* Destruct internal structures. */
299 self->fetched = 0;
300 self->cursor = NULL;
301 if (self->window) {
302 free(self->window);
303 self->window = NULL;
304 }
305 self->window_size = 0;
306 self->state = _PROV_CONSTRUCTED;
307}
308
309/** @see __input_provider */
310static void _release_string(_input_provider *self)
311{
312 assert(self->state == _PROV_READY);
313
314 /* Destruct internal structures. */
315 self->fetched = 0;
316 self->cursor = NULL;
317 self->window = NULL;
318 self->window_size = 0;
319 self->state = _PROV_CONSTRUCTED;
320}
321
322/** Length modifier values. */
323enum {
324 LMOD_NONE,
325 LMOD_hh,
326 LMOD_h,
327 LMOD_l,
328 LMOD_ll,
329 LMOD_j,
330 LMOD_z,
331 LMOD_t,
332 LMOD_L,
333 LMOD_p, /* Reserved for %p conversion. */
334};
335
336/**
337 * Decides whether provided characters specify length modifier. If so, the
338 * recognized modifier is stored through provider pointer.
339 *
340 * @param c Candidate on the length modifier.
341 * @param _c Next character (might be NUL).
342 * @param modifier Pointer to the modifier value.
343 * @return Whether the modifier was recognized or not.
344 */
345static inline int is_length_mod(int c, int _c, int *modifier)
346{
347 assert(modifier);
348
349 switch (c) {
350 case 'h':
351 /* Check whether the modifier was not already recognized. */
352 if (*modifier == LMOD_NONE) {
353 *modifier = _c == 'h' ? LMOD_hh : LMOD_h;
354 } else {
355 /* Format string is invalid. Notify the caller. */
356 *modifier = LMOD_NONE;
357 }
358 return 1;
359 case 'l':
360 if (*modifier == LMOD_NONE) {
361 *modifier = _c == 'l' ? LMOD_ll : LMOD_l;
362 } else {
363 *modifier = LMOD_NONE;
364 }
365 return 1;
366 case 'j':
367 *modifier = *modifier == LMOD_NONE ? LMOD_j : LMOD_NONE;
368 return 1;
369 case 'z':
370 *modifier = *modifier == LMOD_NONE ? LMOD_z : LMOD_NONE;
371 return 1;
372 case 't':
373 *modifier = *modifier == LMOD_NONE ? LMOD_t : LMOD_NONE;
374 return 1;
375 case 'L':
376 *modifier = *modifier == LMOD_NONE ? LMOD_L : LMOD_NONE;
377 return 1;
378 default:
379 return 0;
380 }
381}
382
383/**
384 * Decides whether provided character specifies integer conversion. If so, the
385 * semantics of the conversion is stored through provided pointers..
386 *
387 * @param c Candidate on the integer conversion.
388 * @param is_unsigned Pointer to store whether the conversion is signed or not.
389 * @param base Pointer to store the base of the integer conversion.
390 * @return Whether the conversion was recognized or not.
391 */
392static inline int is_int_conv(int c, bool *is_unsigned, int *base)
393{
394 assert(is_unsigned && base);
395
396 switch (c) {
397 case 'd':
398 *is_unsigned = false;
399 *base = 10;
400 return 1;
401 case 'i':
402 *is_unsigned = false;
403 *base = 0;
404 return 1;
405 case 'o':
406 *is_unsigned = true;
407 *base = 8;
408 return 1;
409 case 'u':
410 *is_unsigned = true;
411 *base = 10;
412 return 1;
413 case 'p': /* According to POSIX, %p modifier is implementation defined but
414 * must correspond to its printf counterpart. */
415 case 'x':
416 case 'X':
417 *is_unsigned = true;
418 *base = 16;
419 return 1;
420 return 1;
421 default:
422 return 0;
423 }
424}
425
426/**
427 * Decides whether provided character specifies conversion of the floating
428 * point number.
429 *
430 * @param c Candidate on the floating point conversion.
431 * @return Whether the conversion was recognized or not.
432 */
433static inline int is_float_conv(int c)
434{
435 switch (c) {
436 case 'a':
437 case 'A':
438 case 'e':
439 case 'E':
440 case 'f':
441 case 'F':
442 case 'g':
443 case 'G':
444 return 1;
445 default:
446 return 0;
447 }
448}
449
450/**
451 * Decides whether provided character specifies conversion of the character
452 * sequence.
453 *
454 * @param c Candidate on the character sequence conversion.
455 * @param modifier Pointer to store length modifier for wide chars.
456 * @return Whether the conversion was recognized or not.
457 */
458static inline int is_seq_conv(int c, int *modifier)
459{
460 assert(modifier);
461
462 switch (c) {
463 case 'S':
464 *modifier = LMOD_l;
465 /* Fallthrough */
466 case 's':
467 return 1;
468 case 'C':
469 *modifier = LMOD_l;
470 /* Fallthrough */
471 case 'c':
472 return 1;
473 case '[':
474 return 1;
475 default:
476 return 0;
477 }
478}
479
480/**
481 * Backend for the whole family of scanf functions. Uses input provider
482 * to abstract over differences between strings and streams. Should be
483 * POSIX compliant (apart from the not supported stuff).
484 *
485 * NOT SUPPORTED: locale (see strtold), wide chars, numbered output arguments
486 *
487 * @param in Input provider.
488 * @param fmt Format description.
489 * @param arg Output arguments.
490 * @return The number of converted output items or EOF on failure.
491 */
492static inline int _internal_scanf(
493 _input_provider *in, const char *restrict fmt, va_list arg)
494{
495 int c = -1;
496 int converted_cnt = 0;
497 bool converting = false;
498 bool matching_failure = false;
499
500 bool assign_supress = false;
501 bool assign_alloc = false;
502 long width = -1;
503 int length_mod = LMOD_NONE;
504 bool int_conv_unsigned = false;
505 int int_conv_base = 0;
506
507 /* Buffers allocated by scanf for optional 'm' specifier must be remembered
508 * to deallocaate them in case of an error. Because each of those buffers
509 * corresponds to one of the argument from va_list, there is an upper bound
510 * on the number of those arguments. In case of C99, this uppper bound is
511 * 127 arguments. */
512 char *buffers[127];
513 for (int i = 0; i < 127; ++i) {
514 buffers[i] = NULL;
515 }
516 int next_unused_buffer_idx = 0;
517
518 in->capture(in);
519
520 /* Interpret format string. Control shall prematurely jump from the cycle
521 * on input failure, matching failure or illegal format string. In order
522 * to keep error reporting simple enough and to keep input consistent,
523 * error condition shall be always manifested as jump from the cycle,
524 * not function return. Format string pointer shall be updated specifically
525 * for each sub-case (i.e. there shall be no loop-wide increment).*/
526 while (*fmt) {
527
528 if (converting) {
529
530 /* Processing inside conversion specifier. Either collect optional
531 * parameters or execute the conversion. When the conversion
532 * is successfully completed, increment conversion count and switch
533 * back to normal mode. */
534 if (*fmt == '*') {
535 /* Assignment-supression (optional). */
536 if (assign_supress) {
537 /* Already set. Illegal format string. */
538 break;
539 }
540 assign_supress = true;
541 ++fmt;
542 } else if (*fmt == 'm') {
543 /* Assignment-allocation (optional). */
544 if (assign_alloc) {
545 /* Already set. Illegal format string. */
546 break;
547 }
548 assign_alloc = true;
549 ++fmt;
550 } else if (*fmt == '$') {
551 /* Reference to numbered output argument. */
552 // TODO
553 not_implemented();
554 } else if (isdigit(*fmt)) {
555 /* Maximum field length (optional). */
556 if (width != -1) {
557 /* Already set. Illegal format string. */
558 break;
559 }
560 char *fmt_new = NULL;
561 width = strtol(fmt, &fmt_new, 10);
562 if (width != 0) {
563 fmt = fmt_new;
564 } else {
565 /* Since POSIX requires width to be non-zero, it is
566 * sufficient to interpret zero width as error without
567 * referring to errno. */
568 break;
569 }
570 } else if (is_length_mod(*fmt, *(fmt + 1), &length_mod)) {
571 /* Length modifier (optional). */
572 if (length_mod == LMOD_NONE) {
573 /* Already set. Illegal format string. The actual detection
574 * is carried out in the is_length_mod(). */
575 break;
576 }
577 if (length_mod == LMOD_hh || length_mod == LMOD_ll) {
578 /* Modifier was two characters long. */
579 ++fmt;
580 }
581 ++fmt;
582 } else if (is_int_conv(*fmt, &int_conv_unsigned, &int_conv_base)) {
583 /* Integer conversion. */
584
585 /* Check sanity of optional parts of conversion specifier. */
586 if (assign_alloc || length_mod == LMOD_L) {
587 /* Illegal format string. */
588 break;
589 }
590
591 /* Conversion of the integer with %p specifier needs special
592 * handling, because it is not allowed to have arbitrary
593 * length modifier. */
594 if (*fmt == 'p') {
595 if (length_mod == LMOD_NONE) {
596 length_mod = LMOD_p;
597 } else {
598 /* Already set. Illegal format string. */
599 break;
600 }
601 }
602
603 /* First consume any white spaces, so we can borrow cursor
604 * from the input provider. This way, the cursor will either
605 * point to the non-white space while the input will be
606 * prefetched up to the newline (which is suitable for strtol),
607 * or the input will be at EOF. */
608 do {
609 c = in->pop(in);
610 } while (isspace(c));
611
612 /* After skipping the white spaces, can we actually continue? */
613 if (c == -1) {
614 /* Input failure. */
615 break;
616 } else {
617 /* Everything is OK, just undo the last pop, so the cursor
618 * can be borrowed. */
619 in->undo(in);
620 }
621
622 const char *cur_borrowed = NULL;
623 char *cur_duplicated = NULL;
624 const char *cur_limited = NULL;
625 const char *cur_updated = NULL;
626
627 /* Borrow the cursor. Until it is returned to the provider
628 * we cannot jump from the cycle, because it would leave
629 * the input inconsistent. */
630 cur_borrowed = in->borrow_cursor(in);
631
632 /* If the width is limited, the cursor horizont must be
633 * decreased accordingly. Otherwise the strtol could read more
634 * than allowed by width. */
635 if (width != -1) {
636 cur_duplicated = strndup(cur_borrowed, width);
637 cur_limited = cur_duplicated;
638 } else {
639 cur_limited = cur_borrowed;
640 }
641 cur_updated = cur_limited;
642
643 long long sres = 0;
644 unsigned long long ures = 0;
645 errno = 0; /* Reset errno to recognize error later. */
646 /* Try to convert the integer. */
647 if (int_conv_unsigned) {
648 ures = strtoull(cur_limited, (char **) &cur_updated, int_conv_base);
649 } else {
650 sres = strtoll(cur_limited, (char **) &cur_updated, int_conv_base);
651 }
652
653 /* Update the cursor so it can be returned to the provider. */
654 cur_borrowed += cur_updated - cur_limited;
655 if (cur_duplicated != NULL) {
656 /* Deallocate duplicated part of the cursor view. */
657 free(cur_duplicated);
658 }
659 cur_limited = NULL;
660 cur_updated = NULL;
661 cur_duplicated = NULL;
662 /* Return the cursor to the provider. Input consistency is again
663 * the job of the provider, so we can report errors from
664 * now on. */
665 in->return_cursor(in, cur_borrowed);
666 cur_borrowed = NULL;
667
668 /* Check whether the conversion was successful. */
669 if (errno != EOK) {
670 matching_failure = true;
671 break;
672 }
673
674 /* If not supressed, assign the converted integer into
675 * the next output argument. */
676 if (!assign_supress) {
677 if (int_conv_unsigned) {
678 switch (length_mod) {
679 case LMOD_hh: ; /* Label cannot be part of declaration. */
680 unsigned char *phh = va_arg(arg, unsigned char *);
681 *phh = (unsigned char) ures;
682 break;
683 case LMOD_h: ;
684 unsigned short *ph = va_arg(arg, unsigned short *);
685 *ph = (unsigned short) ures;
686 break;
687 case LMOD_NONE: ;
688 unsigned *pdef = va_arg(arg, unsigned *);
689 *pdef = (unsigned) ures;
690 break;
691 case LMOD_l: ;
692 unsigned long *pl = va_arg(arg, unsigned long *);
693 *pl = (unsigned long) ures;
694 break;
695 case LMOD_ll: ;
696 unsigned long long *pll = va_arg(arg, unsigned long long *);
697 *pll = (unsigned long long) ures;
698 break;
699 case LMOD_j: ;
700 uintmax_t *pj = va_arg(arg, uintmax_t *);
701 *pj = (uintmax_t) ures;
702 break;
703 case LMOD_z: ;
704 size_t *pz = va_arg(arg, size_t *);
705 *pz = (size_t) ures;
706 break;
707 case LMOD_t: ;
708 // XXX: What is unsigned counterpart of the ptrdiff_t?
709 size_t *pt = va_arg(arg, size_t *);
710 *pt = (size_t) ures;
711 break;
712 case LMOD_p: ;
713 void **pp = va_arg(arg, void **);
714 *pp = (void *) (uintptr_t) ures;
715 break;
716 default:
717 assert(false);
718 }
719 } else {
720 switch (length_mod) {
721 case LMOD_hh: ; /* Label cannot be part of declaration. */
722 signed char *phh = va_arg(arg, signed char *);
723 *phh = (signed char) sres;
724 break;
725 case LMOD_h: ;
726 short *ph = va_arg(arg, short *);
727 *ph = (short) sres;
728 break;
729 case LMOD_NONE: ;
730 int *pdef = va_arg(arg, int *);
731 *pdef = (int) sres;
732 break;
733 case LMOD_l: ;
734 long *pl = va_arg(arg, long *);
735 *pl = (long) sres;
736 break;
737 case LMOD_ll: ;
738 long long *pll = va_arg(arg, long long *);
739 *pll = (long long) sres;
740 break;
741 case LMOD_j: ;
742 intmax_t *pj = va_arg(arg, intmax_t *);
743 *pj = (intmax_t) sres;
744 break;
745 case LMOD_z: ;
746 ssize_t *pz = va_arg(arg, ssize_t *);
747 *pz = (ssize_t) sres;
748 break;
749 case LMOD_t: ;
750 ptrdiff_t *pt = va_arg(arg, ptrdiff_t *);
751 *pt = (ptrdiff_t) sres;
752 break;
753 default:
754 assert(false);
755 }
756 }
757 ++converted_cnt;
758 }
759
760 converting = false;
761 ++fmt;
762 } else if (is_float_conv(*fmt)) {
763 /* Floating point number conversion. */
764
765 /* Check sanity of optional parts of conversion specifier. */
766 if (assign_alloc) {
767 /* Illegal format string. */
768 break;
769 }
770 if (length_mod != LMOD_NONE &&
771 length_mod != LMOD_l &&
772 length_mod != LMOD_L) {
773 /* Illegal format string. */
774 break;
775 }
776
777 /* First consume any white spaces, so we can borrow cursor
778 * from the input provider. This way, the cursor will either
779 * point to the non-white space while the input will be
780 * prefetched up to the newline (which is suitable for strtof),
781 * or the input will be at EOF. */
782 do {
783 c = in->pop(in);
784 } while (isspace(c));
785
786 /* After skipping the white spaces, can we actually continue? */
787 if (c == -1) {
788 /* Input failure. */
789 break;
790 } else {
791 /* Everything is OK, just undo the last pop, so the cursor
792 * can be borrowed. */
793 in->undo(in);
794 }
795
796 const char *cur_borrowed = NULL;
797 const char *cur_limited = NULL;
798 char *cur_duplicated = NULL;
799 const char *cur_updated = NULL;
800
801 /* Borrow the cursor. Until it is returned to the provider
802 * we cannot jump from the cycle, because it would leave
803 * the input inconsistent. */
804 cur_borrowed = in->borrow_cursor(in);
805
806 /* If the width is limited, the cursor horizont must be
807 * decreased accordingly. Otherwise the strtof could read more
808 * than allowed by width. */
809 if (width != -1) {
810 cur_duplicated = strndup(cur_borrowed, width);
811 cur_limited = cur_duplicated;
812 } else {
813 cur_limited = cur_borrowed;
814 }
815 cur_updated = cur_limited;
816
817 float fres = 0.0;
818 double dres = 0.0;
819 long double ldres = 0.0;
820 errno = 0; /* Reset errno to recognize error later. */
821 /* Try to convert the floating point nubmer. */
822 switch (length_mod) {
823 case LMOD_NONE:
824 fres = strtof(cur_limited, (char **) &cur_updated);
825 break;
826 case LMOD_l:
827 dres = strtod(cur_limited, (char **) &cur_updated);
828 break;
829 case LMOD_L:
830 ldres = strtold(cur_limited, (char **) &cur_updated);
831 break;
832 default:
833 assert(false);
834 }
835
836 /* Update the cursor so it can be returned to the provider. */
837 cur_borrowed += cur_updated - cur_limited;
838 if (cur_duplicated != NULL) {
839 /* Deallocate duplicated part of the cursor view. */
840 free(cur_duplicated);
841 }
842 cur_limited = NULL;
843 cur_updated = NULL;
844 /* Return the cursor to the provider. Input consistency is again
845 * the job of the provider, so we can report errors from
846 * now on. */
847 in->return_cursor(in, cur_borrowed);
848 cur_borrowed = NULL;
849
850 /* Check whether the conversion was successful. */
851 if (errno != EOK) {
852 matching_failure = true;
853 break;
854 }
855
856 /* If nto supressed, assign the converted floating point number
857 * into the next output argument. */
858 if (!assign_supress) {
859 switch (length_mod) {
860 case LMOD_NONE: ; /* Label cannot be part of declaration. */
861 float *pf = va_arg(arg, float *);
862 *pf = fres;
863 break;
864 case LMOD_l: ;
865 double *pd = va_arg(arg, double *);
866 *pd = dres;
867 break;
868 case LMOD_L: ;
869 long double *pld = va_arg(arg, long double *);
870 *pld = ldres;
871 break;
872 default:
873 assert(false);
874 }
875 ++converted_cnt;
876 }
877
878 converting = false;
879 ++fmt;
880 } else if (is_seq_conv(*fmt, &length_mod)) {
881 /* Character sequence conversion. */
882
883 /* Check sanity of optional parts of conversion specifier. */
884 if (length_mod != LMOD_NONE &&
885 length_mod != LMOD_l) {
886 /* Illegal format string. */
887 break;
888 }
889
890 if (length_mod == LMOD_l) {
891 /* Wide chars not supported. */
892 // TODO
893 not_implemented();
894 }
895
896 int term_size = 1; /* Size of the terminator (0 or 1)). */
897 if (*fmt == 'c') {
898 term_size = 0;
899 width = width == -1 ? 1 : width;
900 }
901
902 if (*fmt == 's') {
903 /* Skip white spaces. */
904 do {
905 c = in->pop(in);
906 } while (isspace(c));
907 } else {
908 /* Fetch a single character. */
909 c = in->pop(in);
910 }
911
912 /* Check whether there is still input to read. */
913 if (c == -1) {
914 /* Input failure. */
915 break;
916 }
917
918 /* Prepare scanset. */
919 char terminate_on[256];
920 for (int i = 0; i < 256; ++i) {
921 terminate_on[i] = 0;
922 }
923 if (*fmt == 'c') {
924 ++fmt;
925 } else if (*fmt == 's') {
926 terminate_on[' '] = 1;
927 terminate_on['\n'] = 1;
928 terminate_on['\t'] = 1;
929 terminate_on['\f'] = 1;
930 terminate_on['\r'] = 1;
931 terminate_on['\v'] = 1;
932 ++fmt;
933 } else {
934 assert(*fmt == '[');
935 bool not = false;
936 bool dash = false;
937 ++fmt;
938 /* Check for negation. */
939 if (*fmt == '^') {
940 not = true;
941 ++fmt;
942 }
943 /* Check for escape sequences. */
944 if (*fmt == '-' || *fmt == ']') {
945 terminate_on[(int) *fmt] = 1;
946 ++fmt;
947 }
948 /* Check for ordinary characters and ranges. */
949 while (*fmt != '\0' && *fmt != ']') {
950 if (dash) {
951 for (char chr = *(fmt - 2); chr <= *fmt; ++chr) {
952 terminate_on[(int) chr] = 1;
953 }
954 dash = false;
955 } else if (*fmt == '-') {
956 dash = true;
957 } else {
958 terminate_on[(int) *fmt] = 1;
959 }
960 ++fmt;
961 }
962 /* Check for escape sequence. */
963 if (dash == true) {
964 terminate_on['-'] = 1;
965 }
966 /* Check whether the specifier was correctly terminated.*/
967 if (*fmt == '\0') {
968 /* Illegal format string. */
969 break;
970 } else {
971 ++fmt;
972 }
973 /* Inverse the scanset if necessary. */
974 if (not == false) {
975 for (int i = 0; i < 256; ++i) {
976 terminate_on[i] = terminate_on[i] ? 0 : 1;
977 }
978 }
979 }
980
981 char * buf = NULL;
982 size_t buf_size = 0;
983 char * cur = NULL;
984 size_t alloc_step = 80; /* Buffer size gain during reallocation. */
985 int my_buffer_idx = 0;
986
987 /* Retrieve the buffer into which popped characters
988 * will be stored. */
989 if (!assign_supress) {
990 if (assign_alloc) {
991 /* We must allocate our own buffer. */
992 buf_size =
993 width == -1 ? alloc_step : (size_t) width + term_size;
994 buf = malloc(buf_size);
995 if (!buf) {
996 /* No memory. */
997 break;
998 }
999 my_buffer_idx = next_unused_buffer_idx;
1000 ++next_unused_buffer_idx;
1001 buffers[my_buffer_idx] = buf;
1002 cur = buf;
1003 } else {
1004 /* Caller provided its buffer. */
1005 buf = va_arg(arg, char *);
1006 cur = buf;
1007 buf_size =
1008 width == -1 ? SIZE_MAX : (size_t) width + term_size;
1009 }
1010 }
1011
1012 /* Match the string. The next character is already popped. */
1013 while ((width == -1 || width > 0) && c != -1 && !terminate_on[c]) {
1014
1015 /* Check whether the buffer is still sufficiently large. */
1016 if (!assign_supress) {
1017 /* Always reserve space for the null terminator. */
1018 if (cur == buf + buf_size - term_size) {
1019 /* Buffer size must be increased. */
1020 buf = realloc(buf, buf_size + alloc_step);
1021 if (buf) {
1022 buffers[my_buffer_idx] = buf;
1023 cur = buf + buf_size - term_size;
1024 buf_size += alloc_step;
1025 } else {
1026 /* Break just from this tight loop. Errno will
1027 * be checked after it. */
1028 break;
1029 }
1030 }
1031 /* Store the input character. */
1032 *cur = c;
1033 }
1034
1035 width = width == -1 ? -1 : width - 1;
1036 ++cur;
1037 c = in->pop(in);
1038 }
1039 if (errno == ENOMEM) {
1040 /* No memory. */
1041 break;
1042 }
1043 if (c != -1) {
1044 /* There is still more input, so undo the last pop. */
1045 in->undo(in);
1046 }
1047
1048 /* Check for failures. */
1049 if (cur == buf) {
1050 /* Matching failure. Input failure was already checked
1051 * earlier. */
1052 matching_failure = true;
1053 if (!assign_supress && assign_alloc) {
1054 /* Roll back. */
1055 free(buf);
1056 buffers[my_buffer_idx] = NULL;
1057 --next_unused_buffer_idx;
1058 }
1059 break;
1060 }
1061
1062 /* Store the terminator. */
1063 if (!assign_supress && term_size > 0) {
1064 /* Space for the terminator was reserved. */
1065 *cur = '\0';
1066 }
1067
1068 /* Store the result if not already stored. */
1069 if (!assign_supress) {
1070 if (assign_alloc) {
1071 char **pbuf = va_arg(arg, char **);
1072 *pbuf = buf;
1073 }
1074 ++converted_cnt;
1075 }
1076
1077 converting = false;
1078 /* Format string pointer already incremented. */
1079 } else if (*fmt == 'n') {
1080 /* Report the number of consumed bytes so far. */
1081
1082 /* Sanity check. */
1083 bool sane =
1084 width == -1 &&
1085 length_mod == LMOD_NONE &&
1086 assign_alloc == false &&
1087 assign_supress == false;
1088
1089 if (sane) {
1090 int *pi = va_arg(arg, int *);
1091 *pi = in->consumed;
1092 } else {
1093 /* Illegal format string. */
1094 break;
1095 }
1096
1097 /* This shall not be counted as conversion. */
1098 converting = false;
1099 ++fmt;
1100 } else {
1101 /* Illegal format string. */
1102 break;
1103 }
1104
1105 } else {
1106
1107 /* Processing outside conversion specifier. Either skip white
1108 * spaces or match characters one by one. If conversion specifier
1109 * is detected, switch to coversion mode. */
1110 if (isspace(*fmt)) {
1111 /* Skip white spaces in the format string. */
1112 while (isspace(*fmt)) {
1113 ++fmt;
1114 }
1115 /* Skip white spaces in the input. */
1116 do {
1117 c = in->pop(in);
1118 } while (isspace(c));
1119 if (c != -1) {
1120 /* Input is not at EOF, so undo the last pop operation. */
1121 in->undo(in);
1122 }
1123 } else if (*fmt == '%' && *(fmt + 1) != '%') {
1124 /* Conversion specifier detected. Switch modes. */
1125 converting = true;
1126 /* Reset the conversion context. */
1127 assign_supress = false;
1128 assign_alloc = false;
1129 width = -1;
1130 length_mod = LMOD_NONE;
1131 int_conv_unsigned = false;
1132 int_conv_base = 0;
1133 ++fmt;
1134 } else {
1135 /* One by one matching. */
1136 if (*fmt == '%') {
1137 /* Escape sequence detected. */
1138 ++fmt;
1139 assert(*fmt == '%');
1140 }
1141 c = in->pop(in);
1142 if (c == -1) {
1143 /* Input failure. */
1144 break;
1145 } else if (c != *fmt) {
1146 /* Matching failure. */
1147 in->undo(in);
1148 matching_failure = true;
1149 break;
1150 } else {
1151 ++fmt;
1152 }
1153 }
1154
1155 }
1156
1157 }
1158
1159 in->release(in);
1160
1161 /* This somewhat complicated return value decision is required by POSIX. */
1162 int rc;
1163 if (matching_failure) {
1164 rc = converted_cnt;
1165 } else {
1166 if (errno == EOK) {
1167 rc = converted_cnt > 0 ? converted_cnt : EOF;
1168 } else {
1169 rc = EOF;
1170 }
1171 }
1172 if (rc == EOF) {
1173 /* Caller will not know how many arguments were successfully converted,
1174 * so the deallocation of buffers is our responsibility. */
1175 for (int i = 0; i < next_unused_buffer_idx; ++i) {
1176 free(buffers[i]);
1177 buffers[i] = NULL;
1178 }
1179 next_unused_buffer_idx = 0;
1180 }
1181 return rc;
1182}
1183
1184/**
1185 * Convert formatted input from the stream.
1186 *
1187 * @param stream Input stream.
1188 * @param format Format description.
1189 * @param arg Output items.
1190 * @return The number of converted output items or EOF on failure.
1191 */
1192int vfscanf(
1193 FILE *restrict stream, const char *restrict format, va_list arg)
1194{
1195 _input_provider provider = {
1196 { 0 }, 0, 0, NULL, 0, NULL, _PROV_CONSTRUCTED,
1197 _capture_stream, _pop_stream, _undo_stream,
1198 _borrow_cursor_universal, _return_cursor_stream, _release_stream
1199 };
1200 provider.source.stream = stream;
1201 return _internal_scanf(&provider, format, arg);
1202}
1203
1204/**
1205 * Convert formatted input from the string.
1206 *
1207 * @param s Input string.
1208 * @param format Format description.
1209 * @param arg Output items.
1210 * @return The number of converted output items or EOF on failure.
1211 */
1212int vsscanf(
1213 const char *restrict s, const char *restrict format, va_list arg)
1214{
1215 _input_provider provider = {
1216 { 0 }, 0, 0, NULL, 0, NULL, _PROV_CONSTRUCTED,
1217 _capture_string, _pop_string, _undo_string,
1218 _borrow_cursor_universal, _return_cursor_string, _release_string
1219 };
1220 provider.source.string = s;
1221 return _internal_scanf(&provider, format, arg);
1222}
1223
1224/** @}
1225 */
Note: See TracBrowser for help on using the repository browser.