source: mainline/uspace/lib/posix/source/stdio/scanf.c@ bf45993

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since bf45993 was fdf97f6, checked in by Vojtech Horky <vojtechhorky@…>, 12 years ago

Libposix functions are without posix_ prefix

Prior this commit, libposix headers declared all functions as posix_*
and used macros to rename e.g. strncpy to posix_strncpy in all (ported)
sources.

After this change, libposix headers look as normal POSIX compliant headers
(well, almost) and no renaming is done in the source codei (of the ported
applications). Instead, the renaming is done at object files level to
bypass weird problems that are bound to happen if you use macros.

The scheme is following. libposix headers use special macro to declare
the names. When included from outside, the functions have their normal
(standard) names. When included from the libposix sources, posix_ prefix
is added. Thus, when libposix is compiled and linked, it contains the
posix_* naming while compiling of ported software uses the normal
non-prefixed versions. This way the posix_* can use HelenOS libc without
any problem. Before linking, the posix_* prefix is removed from all
symbols and special prefix helenos_libc_ is added to all functions
that exists in our (HelenOS) libc and its name clashes with the POSIX
one.

The following happens, for example, to the open() function that exists in
both libposix and in libc.

  • Headers and sources of libc are left intact.
  • Copy of libc.a is made and to all clashing functions is added the helenos_libc prefix. This library is called libc4posix.a.
  • POSIX_DEF(open)(const char *) is used in libposix headers. This macro expands to plain open when included from the "outside world". But it expands to posix_open when included from libposix sources.
  • Libposix is compiled and linked, containing posix_open() that internally calls open() [the original one from libc].
  • Libposix is transformed - all open() are replaced with prefix variant: helenos_libc_open() and all posix_open() are replaced with open(). The transformed library is stored as libposixaslibc.a

Binutils and PCC are then linked with libc4posix and libposixaslibc
libraries instead of libc and libposix as was done previously.

WARNING: it looks that binutils, PCC and MSIM still works but not all
architectures were tested.

  • Property mode set to 100644
File size: 35.4 KB
Line 
1/*
2 * Copyright (c) 2011 Petr Koupy
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Implementation of the scanf backend.
33 */
34
35#define LIBPOSIX_INTERNAL
36#define __POSIX_DEF__(x) posix_##x
37
38#include "posix/assert.h"
39#include "posix/errno.h"
40
41#include "posix/stdio.h"
42#include "posix/stdlib.h"
43#include "posix/stddef.h"
44#include "posix/string.h"
45#include "posix/ctype.h"
46#include "posix/sys/types.h"
47
48#include "../internal/common.h"
49#include "libc/malloc.h"
50#include "libc/stdbool.h"
51
52/** Unified data type for possible data sources for scanf. */
53typedef union __data_source {
54 FILE *stream; /**< Input file stream. */
55 const char *string; /**< Input string. */
56} _data_source;
57
58/** Internal state of the input provider. */
59enum {
60 /** Partly constructed but not yet functional. */
61 _PROV_CONSTRUCTED,
62 /** Ready to serve any request. */
63 _PROV_READY,
64 /** Cursor is temporarily lent to the external entity. No action is
65 * possible until the cursor is returned. */
66 _PROV_CURSOR_LENT,
67};
68
69/** Universal abstraction over data input for scanf. */
70typedef struct __input_provider {
71 /** Source of data elements. */
72 _data_source source;
73 /** How many elements was already processed. */
74 int consumed;
75 /** How many elements was already fetched from the source. */
76 int fetched;
77 /** Elements are fetched from the source in batches (e.g. by getline())
78 * to allow using strtol/strtod family even on streams. */
79 char *window;
80 /** Size of the current window. */
81 size_t window_size;
82 /** Points to the next element to be processed inside the current window. */
83 const char *cursor;
84 /** Internal state of the provider. */
85 int state;
86
87 /** Take control over data source. Finish initialization of the internal
88 * structures (e.g. allocation of window). */
89 void (*capture)(struct __input_provider *);
90 /** Get a single element from the source and update the internal structures
91 * accordingly (e.g. greedy update of the window). Return -1 if the
92 * element cannot be obtained. */
93 int (*pop)(struct __input_provider *);
94 /** Undo the most recent not-undone pop operation. Might be necesarry to
95 * flush current window and seek data source backwards. Return 0 if the
96 * pop history is exhausted, non-zero on success. */
97 int (*undo)(struct __input_provider *);
98 /** Lend the cursor to the caller. */
99 const char * (*borrow_cursor)(struct __input_provider *);
100 /** Take control over possibly incremented cursor and update the internal
101 * structures if necessary. */
102 void (*return_cursor)(struct __input_provider *, const char *);
103 /** Release the control over the source. That is, synchronize any
104 * fetched but non-consumed elements (e.g. by seeking) and destruct
105 * internal structures (e.g. window deallocation). */
106 void (*release)(struct __input_provider *);
107} _input_provider;
108
109/** @see __input_provider */
110static void _capture_stream(_input_provider *self)
111{
112 assert(self->source.stream);
113 assert(self->state == _PROV_CONSTRUCTED);
114 /* Caller could already pre-allocated the window. */
115 assert((self->window == NULL && self->window_size == 0) ||
116 (self->window && self->window_size > 0));
117
118 /* Initialize internal structures. */
119 self->consumed = 0;
120 ssize_t fetched = posix_getline(
121 &self->window, &self->window_size, self->source.stream);
122 if (fetched != -1) {
123 self->fetched = fetched;
124 self->cursor = self->window;
125 } else {
126 /* EOF encountered. */
127 self->fetched = 0;
128 self->cursor = NULL;
129 }
130 self->state = _PROV_READY;
131}
132
133/** @see __input_provider */
134static void _capture_string(_input_provider *self)
135{
136 assert(self->source.string);
137 assert(self->state == _PROV_CONSTRUCTED);
138
139 /* Initialize internal structures. */
140 self->consumed = 0;
141 self->fetched = posix_strlen(self->source.string);
142 self->window = (char *) self->source.string;
143 self->window_size = self->fetched + 1;
144 self->cursor = self->window;
145 self->state = _PROV_READY;
146}
147
148/** @see __input_provider */
149static int _pop_stream(_input_provider *self)
150{
151 assert(self->state == _PROV_READY);
152
153 if (self->cursor) {
154 int c = *self->cursor;
155 ++self->consumed;
156 ++self->cursor;
157 /* Do we need to fetch a new line from the source? */
158 if (*self->cursor == '\0') {
159 ssize_t fetched = posix_getline(&self->window,
160 &self->window_size, self->source.stream);
161 if (fetched != -1) {
162 self->fetched += fetched;
163 self->cursor = self->window;
164 } else {
165 /* EOF encountered. */
166 self->cursor = NULL;
167 }
168 }
169 return c;
170 } else {
171 /* Already at EOF. */
172 return -1;
173 }
174}
175
176/** @see __input_provider */
177static int _pop_string(_input_provider *self)
178{
179 assert(self->state == _PROV_READY);
180
181 if (*self->cursor != '\0') {
182 int c = *self->cursor;
183 ++self->consumed;
184 ++self->cursor;
185 return c;
186 } else {
187 /* String depleted. */
188 return -1;
189 }
190}
191
192/** @see __input_provider */
193static int _undo_stream(_input_provider *self)
194{
195 assert(self->state == _PROV_READY);
196
197 if (self->consumed == 0) {
198 /* Undo history exhausted. */
199 return 0;
200 }
201
202 if (!self->cursor || self->window == self->cursor) {
203 /* Complex case. Either at EOF (cursor == NULL) or there is no more
204 * place to retreat to inside the window. Seek the source backwards
205 * and flush the window. Regarding the scanf, this could happend only
206 * when matching unbounded string (%s) or unbounded scanset (%[) not
207 * containing newline, while at the same time newline is the character
208 * that breaks the matching process. */
209 int rc = posix_fseek(
210 self->source.stream, -1, SEEK_CUR);
211 if (rc == -1) {
212 /* Seek failed. */
213 return 0;
214 }
215 ssize_t fetched = posix_getline(&self->window,
216 &self->window_size, self->source.stream);
217 if (fetched != -1) {
218 assert(fetched == 1);
219 self->fetched = self->consumed + 1;
220 self->cursor = self->window;
221 } else {
222 /* Stream is broken. */
223 return 0;
224 }
225 } else {
226 /* Simple case. Still inside window. */
227 --self->cursor;
228 }
229 --self->consumed;
230 return 1; /* Success. */
231}
232
233/** @see __input_provider */
234static int _undo_string(_input_provider *self)
235{
236 assert(self->state == _PROV_READY);
237
238 if (self->consumed > 0) {
239 --self->consumed;
240 --self->cursor;
241 } else {
242 /* Undo history exhausted. */
243 return 0;
244 }
245 return 1; /* Success. */
246}
247
248/** @see __input_provider */
249static const char *_borrow_cursor_universal(_input_provider *self)
250{
251 assert(self->state == _PROV_READY);
252
253 self->state = _PROV_CURSOR_LENT;
254 return self->cursor;
255}
256
257/** @see __input_provider */
258static void _return_cursor_stream(_input_provider *self, const char *cursor)
259{
260 assert(self->state == _PROV_CURSOR_LENT);
261
262 /* Check how much of the window did external entity consumed. */
263 self->consumed += cursor - self->cursor;
264 self->cursor = cursor;
265 if (*self->cursor == '\0') {
266 /* Window was completely consumed, fetch new data. */
267 ssize_t fetched = posix_getline(&self->window,
268 &self->window_size, self->source.stream);
269 if (fetched != -1) {
270 self->fetched += fetched;
271 self->cursor = self->window;
272 } else {
273 /* EOF encountered. */
274 self->cursor = NULL;
275 }
276 }
277 self->state = _PROV_READY;
278}
279
280/** @see __input_provider */
281static void _return_cursor_string(_input_provider *self, const char *cursor)
282{
283 assert(self->state == _PROV_CURSOR_LENT);
284
285 /* Check how much of the window did external entity consumed. */
286 self->consumed += cursor - self->cursor;
287 self->cursor = cursor;
288 self->state = _PROV_READY;
289}
290
291/** @see __input_provider */
292static void _release_stream(_input_provider *self)
293{
294 assert(self->state == _PROV_READY);
295 assert(self->consumed >= self->fetched);
296
297 /* Try to correct the difference between the stream position and what was
298 * actually consumed. If it is not possible, continue anyway. */
299 posix_fseek(self->source.stream, self->consumed - self->fetched, SEEK_CUR);
300
301 /* Destruct internal structures. */
302 self->fetched = 0;
303 self->cursor = NULL;
304 if (self->window) {
305 free(self->window);
306 self->window = NULL;
307 }
308 self->window_size = 0;
309 self->state = _PROV_CONSTRUCTED;
310}
311
312/** @see __input_provider */
313static void _release_string(_input_provider *self)
314{
315 assert(self->state == _PROV_READY);
316
317 /* Destruct internal structures. */
318 self->fetched = 0;
319 self->cursor = NULL;
320 self->window = NULL;
321 self->window_size = 0;
322 self->state = _PROV_CONSTRUCTED;
323}
324
325/** Length modifier values. */
326enum {
327 LMOD_NONE,
328 LMOD_hh,
329 LMOD_h,
330 LMOD_l,
331 LMOD_ll,
332 LMOD_j,
333 LMOD_z,
334 LMOD_t,
335 LMOD_L,
336 LMOD_p, /* Reserved for %p conversion. */
337};
338
339/**
340 * Decides whether provided characters specify length modifier. If so, the
341 * recognized modifier is stored through provider pointer.
342 *
343 * @param c Candidate on the length modifier.
344 * @param _c Next character (might be NUL).
345 * @param modifier Pointer to the modifier value.
346 * @return Whether the modifier was recognized or not.
347 */
348static inline int is_length_mod(int c, int _c, int *modifier)
349{
350 assert(modifier);
351
352 switch (c) {
353 case 'h':
354 /* Check whether the modifier was not already recognized. */
355 if (*modifier == LMOD_NONE) {
356 *modifier = _c == 'h' ? LMOD_hh : LMOD_h;
357 } else {
358 /* Format string is invalid. Notify the caller. */
359 *modifier = LMOD_NONE;
360 }
361 return 1;
362 case 'l':
363 if (*modifier == LMOD_NONE) {
364 *modifier = _c == 'l' ? LMOD_ll : LMOD_l;
365 } else {
366 *modifier = LMOD_NONE;
367 }
368 return 1;
369 case 'j':
370 *modifier = *modifier == LMOD_NONE ? LMOD_j : LMOD_NONE;
371 return 1;
372 case 'z':
373 *modifier = *modifier == LMOD_NONE ? LMOD_z : LMOD_NONE;
374 return 1;
375 case 't':
376 *modifier = *modifier == LMOD_NONE ? LMOD_t : LMOD_NONE;
377 return 1;
378 case 'L':
379 *modifier = *modifier == LMOD_NONE ? LMOD_L : LMOD_NONE;
380 return 1;
381 default:
382 return 0;
383 }
384}
385
386/**
387 * Decides whether provided character specifies integer conversion. If so, the
388 * semantics of the conversion is stored through provided pointers..
389 *
390 * @param c Candidate on the integer conversion.
391 * @param is_unsigned Pointer to store whether the conversion is signed or not.
392 * @param base Pointer to store the base of the integer conversion.
393 * @return Whether the conversion was recognized or not.
394 */
395static inline int is_int_conv(int c, bool *is_unsigned, int *base)
396{
397 assert(is_unsigned && base);
398
399 switch (c) {
400 case 'd':
401 *is_unsigned = false;
402 *base = 10;
403 return 1;
404 case 'i':
405 *is_unsigned = false;
406 *base = 0;
407 return 1;
408 case 'o':
409 *is_unsigned = true;
410 *base = 8;
411 return 1;
412 case 'u':
413 *is_unsigned = true;
414 *base = 10;
415 return 1;
416 case 'p': /* According to POSIX, %p modifier is implementation defined but
417 * must correspond to its printf counterpart. */
418 case 'x':
419 case 'X':
420 *is_unsigned = true;
421 *base = 16;
422 return 1;
423 return 1;
424 default:
425 return 0;
426 }
427}
428
429/**
430 * Decides whether provided character specifies conversion of the floating
431 * point number.
432 *
433 * @param c Candidate on the floating point conversion.
434 * @return Whether the conversion was recognized or not.
435 */
436static inline int is_float_conv(int c)
437{
438 switch (c) {
439 case 'a':
440 case 'A':
441 case 'e':
442 case 'E':
443 case 'f':
444 case 'F':
445 case 'g':
446 case 'G':
447 return 1;
448 default:
449 return 0;
450 }
451}
452
453/**
454 * Decides whether provided character specifies conversion of the character
455 * sequence.
456 *
457 * @param c Candidate on the character sequence conversion.
458 * @param modifier Pointer to store length modifier for wide chars.
459 * @return Whether the conversion was recognized or not.
460 */
461static inline int is_seq_conv(int c, int *modifier)
462{
463 assert(modifier);
464
465 switch (c) {
466 case 'S':
467 *modifier = LMOD_l;
468 /* fallthrough */
469 case 's':
470 return 1;
471 case 'C':
472 *modifier = LMOD_l;
473 /* fallthrough */
474 case 'c':
475 return 1;
476 case '[':
477 return 1;
478 default:
479 return 0;
480 }
481}
482
483/**
484 * Backend for the whole family of scanf functions. Uses input provider
485 * to abstract over differences between strings and streams. Should be
486 * POSIX compliant (apart from the not supported stuff).
487 *
488 * NOT SUPPORTED: locale (see strtold), wide chars, numbered output arguments
489 *
490 * @param in Input provider.
491 * @param fmt Format description.
492 * @param arg Output arguments.
493 * @return The number of converted output items or EOF on failure.
494 */
495static inline int _internal_scanf(
496 _input_provider *in, const char *restrict fmt, va_list arg)
497{
498 int c = -1;
499 int converted_cnt = 0;
500 bool converting = false;
501 bool matching_failure = false;
502
503 bool assign_supress = false;
504 bool assign_alloc = false;
505 long width = -1;
506 int length_mod = LMOD_NONE;
507 bool int_conv_unsigned = false;
508 int int_conv_base = 0;
509
510 /* Buffers allocated by scanf for optional 'm' specifier must be remembered
511 * to deallocaate them in case of an error. Because each of those buffers
512 * corresponds to one of the argument from va_list, there is an upper bound
513 * on the number of those arguments. In case of C99, this uppper bound is
514 * 127 arguments. */
515 char *buffers[127];
516 for (int i = 0; i < 127; ++i) {
517 buffers[i] = NULL;
518 }
519 int next_unused_buffer_idx = 0;
520
521 in->capture(in);
522
523 /* Interpret format string. Control shall prematurely jump from the cycle
524 * on input failure, matching failure or illegal format string. In order
525 * to keep error reporting simple enough and to keep input consistent,
526 * error condition shall be always manifested as jump from the cycle,
527 * not function return. Format string pointer shall be updated specifically
528 * for each sub-case (i.e. there shall be no loop-wide increment).*/
529 while (*fmt) {
530
531 if (converting) {
532
533 /* Processing inside conversion specifier. Either collect optional
534 * parameters or execute the conversion. When the conversion
535 * is successfully completed, increment conversion count and switch
536 * back to normal mode. */
537 if (*fmt == '*') {
538 /* Assignment-supression (optional). */
539 if (assign_supress) {
540 /* Already set. Illegal format string. */
541 break;
542 }
543 assign_supress = true;
544 ++fmt;
545 } else if (*fmt == 'm') {
546 /* Assignment-allocation (optional). */
547 if (assign_alloc) {
548 /* Already set. Illegal format string. */
549 break;
550 }
551 assign_alloc = true;
552 ++fmt;
553 } else if (*fmt == '$') {
554 /* Reference to numbered output argument. */
555 // TODO
556 not_implemented();
557 } else if (isdigit(*fmt)) {
558 /* Maximum field length (optional). */
559 if (width != -1) {
560 /* Already set. Illegal format string. */
561 break;
562 }
563 char *fmt_new = NULL;
564 width = posix_strtol(fmt, &fmt_new, 10);
565 if (width != 0) {
566 fmt = fmt_new;
567 } else {
568 /* Since POSIX requires width to be non-zero, it is
569 * sufficient to interpret zero width as error without
570 * referring to errno. */
571 break;
572 }
573 } else if (is_length_mod(*fmt, *(fmt + 1), &length_mod)) {
574 /* Length modifier (optional). */
575 if (length_mod == LMOD_NONE) {
576 /* Already set. Illegal format string. The actual detection
577 * is carried out in the is_length_mod(). */
578 break;
579 }
580 if (length_mod == LMOD_hh || length_mod == LMOD_ll) {
581 /* Modifier was two characters long. */
582 ++fmt;
583 }
584 ++fmt;
585 } else if (is_int_conv(*fmt, &int_conv_unsigned, &int_conv_base)) {
586 /* Integer conversion. */
587
588 /* Check sanity of optional parts of conversion specifier. */
589 if (assign_alloc || length_mod == LMOD_L) {
590 /* Illegal format string. */
591 break;
592 }
593
594 /* Conversion of the integer with %p specifier needs special
595 * handling, because it is not allowed to have arbitrary
596 * length modifier. */
597 if (*fmt == 'p') {
598 if (length_mod == LMOD_NONE) {
599 length_mod = LMOD_p;
600 } else {
601 /* Already set. Illegal format string. */
602 break;
603 }
604 }
605
606 /* First consume any white spaces, so we can borrow cursor
607 * from the input provider. This way, the cursor will either
608 * point to the non-white space while the input will be
609 * prefetched up to the newline (which is suitable for strtol),
610 * or the input will be at EOF. */
611 do {
612 c = in->pop(in);
613 } while (isspace(c));
614
615 /* After skipping the white spaces, can we actually continue? */
616 if (c == -1) {
617 /* Input failure. */
618 break;
619 } else {
620 /* Everything is OK, just undo the last pop, so the cursor
621 * can be borrowed. */
622 in->undo(in);
623 }
624
625 const char *cur_borrowed = NULL;
626 const char *cur_limited = NULL;
627 char *cur_updated = NULL;
628
629 /* Borrow the cursor. Until it is returned to the provider
630 * we cannot jump from the cycle, because it would leave
631 * the input inconsistent. */
632 cur_borrowed = in->borrow_cursor(in);
633
634 /* If the width is limited, the cursor horizont must be
635 * decreased accordingly. Otherwise the strtol could read more
636 * than allowed by width. */
637 if (width != -1) {
638 cur_limited = posix_strndup(cur_borrowed, width);
639 } else {
640 cur_limited = cur_borrowed;
641 }
642 cur_updated = (char *) cur_limited;
643
644 long long sres = 0;
645 unsigned long long ures = 0;
646 errno = 0; /* Reset errno to recognize error later. */
647 /* Try to convert the integer. */
648 if (int_conv_unsigned) {
649 ures = posix_strtoull(cur_limited, &cur_updated, int_conv_base);
650 } else {
651 sres = posix_strtoll(cur_limited, &cur_updated, int_conv_base);
652 }
653
654 /* Update the cursor so it can be returned to the provider. */
655 cur_borrowed += cur_updated - cur_limited;
656 if (width != -1 && cur_limited != NULL) {
657 /* Deallocate duplicated part of the cursor view. */
658 free(cur_limited);
659 }
660 cur_limited = NULL;
661 cur_updated = NULL;
662 /* Return the cursor to the provider. Input consistency is again
663 * the job of the provider, so we can report errors from
664 * now on. */
665 in->return_cursor(in, cur_borrowed);
666 cur_borrowed = NULL;
667
668 /* Check whether the conversion was successful. */
669 if (errno != EOK) {
670 matching_failure = true;
671 break;
672 }
673
674 /* If not supressed, assign the converted integer into
675 * the next output argument. */
676 if (!assign_supress) {
677 if (int_conv_unsigned) {
678 switch (length_mod) {
679 case LMOD_hh: ; /* Label cannot be part of declaration. */
680 unsigned char *phh = va_arg(arg, unsigned char *);
681 *phh = (unsigned char) ures;
682 break;
683 case LMOD_h: ;
684 unsigned short *ph = va_arg(arg, unsigned short *);
685 *ph = (unsigned short) ures;
686 break;
687 case LMOD_NONE: ;
688 unsigned *pdef = va_arg(arg, unsigned *);
689 *pdef = (unsigned) ures;
690 break;
691 case LMOD_l: ;
692 unsigned long *pl = va_arg(arg, unsigned long *);
693 *pl = (unsigned long) ures;
694 break;
695 case LMOD_ll: ;
696 unsigned long long *pll = va_arg(arg, unsigned long long *);
697 *pll = (unsigned long long) ures;
698 break;
699 case LMOD_j: ;
700 posix_uintmax_t *pj = va_arg(arg, posix_uintmax_t *);
701 *pj = (posix_uintmax_t) ures;
702 break;
703 case LMOD_z: ;
704 size_t *pz = va_arg(arg, size_t *);
705 *pz = (size_t) ures;
706 break;
707 case LMOD_t: ;
708 // XXX: What is unsigned counterpart of the ptrdiff_t?
709 size_t *pt = va_arg(arg, size_t *);
710 *pt = (size_t) ures;
711 break;
712 case LMOD_p: ;
713 void **pp = va_arg(arg, void **);
714 *pp = (void *) (uintptr_t) ures;
715 break;
716 default:
717 assert(false);
718 }
719 } else {
720 switch (length_mod) {
721 case LMOD_hh: ; /* Label cannot be part of declaration. */
722 signed char *phh = va_arg(arg, signed char *);
723 *phh = (signed char) sres;
724 break;
725 case LMOD_h: ;
726 short *ph = va_arg(arg, short *);
727 *ph = (short) sres;
728 break;
729 case LMOD_NONE: ;
730 int *pdef = va_arg(arg, int *);
731 *pdef = (int) sres;
732 break;
733 case LMOD_l: ;
734 long *pl = va_arg(arg, long *);
735 *pl = (long) sres;
736 break;
737 case LMOD_ll: ;
738 long long *pll = va_arg(arg, long long *);
739 *pll = (long long) sres;
740 break;
741 case LMOD_j: ;
742 posix_intmax_t *pj = va_arg(arg, posix_intmax_t *);
743 *pj = (posix_intmax_t) sres;
744 break;
745 case LMOD_z: ;
746 ssize_t *pz = va_arg(arg, ssize_t *);
747 *pz = (ssize_t) sres;
748 break;
749 case LMOD_t: ;
750 posix_ptrdiff_t *pt = va_arg(arg, posix_ptrdiff_t *);
751 *pt = (posix_ptrdiff_t) sres;
752 break;
753 default:
754 assert(false);
755 }
756 }
757 ++converted_cnt;
758 }
759
760 converting = false;
761 ++fmt;
762 } else if (is_float_conv(*fmt)) {
763 /* Floating point number conversion. */
764
765 /* Check sanity of optional parts of conversion specifier. */
766 if (assign_alloc) {
767 /* Illegal format string. */
768 break;
769 }
770 if (length_mod != LMOD_NONE &&
771 length_mod != LMOD_l &&
772 length_mod != LMOD_L) {
773 /* Illegal format string. */
774 break;
775 }
776
777 /* First consume any white spaces, so we can borrow cursor
778 * from the input provider. This way, the cursor will either
779 * point to the non-white space while the input will be
780 * prefetched up to the newline (which is suitable for strtof),
781 * or the input will be at EOF. */
782 do {
783 c = in->pop(in);
784 } while (isspace(c));
785
786 /* After skipping the white spaces, can we actually continue? */
787 if (c == -1) {
788 /* Input failure. */
789 break;
790 } else {
791 /* Everything is OK, just undo the last pop, so the cursor
792 * can be borrowed. */
793 in->undo(in);
794 }
795
796 const char *cur_borrowed = NULL;
797 const char *cur_limited = NULL;
798 char *cur_updated = NULL;
799
800 /* Borrow the cursor. Until it is returned to the provider
801 * we cannot jump from the cycle, because it would leave
802 * the input inconsistent. */
803 cur_borrowed = in->borrow_cursor(in);
804
805 /* If the width is limited, the cursor horizont must be
806 * decreased accordingly. Otherwise the strtof could read more
807 * than allowed by width. */
808 if (width != -1) {
809 cur_limited = posix_strndup(cur_borrowed, width);
810 } else {
811 cur_limited = cur_borrowed;
812 }
813 cur_updated = (char *) cur_limited;
814
815 float fres = 0.0;
816 double dres = 0.0;
817 long double ldres = 0.0;
818 errno = 0; /* Reset errno to recognize error later. */
819 /* Try to convert the floating point nubmer. */
820 switch (length_mod) {
821 case LMOD_NONE:
822 fres = posix_strtof(cur_limited, &cur_updated);
823 break;
824 case LMOD_l:
825 dres = posix_strtod(cur_limited, &cur_updated);
826 break;
827 case LMOD_L:
828 ldres = posix_strtold(cur_limited, &cur_updated);
829 break;
830 default:
831 assert(false);
832 }
833
834 /* Update the cursor so it can be returned to the provider. */
835 cur_borrowed += cur_updated - cur_limited;
836 if (width != -1 && cur_limited != NULL) {
837 /* Deallocate duplicated part of the cursor view. */
838 free(cur_limited);
839 }
840 cur_limited = NULL;
841 cur_updated = NULL;
842 /* Return the cursor to the provider. Input consistency is again
843 * the job of the provider, so we can report errors from
844 * now on. */
845 in->return_cursor(in, cur_borrowed);
846 cur_borrowed = NULL;
847
848 /* Check whether the conversion was successful. */
849 if (errno != EOK) {
850 matching_failure = true;
851 break;
852 }
853
854 /* If nto supressed, assign the converted floating point number
855 * into the next output argument. */
856 if (!assign_supress) {
857 switch (length_mod) {
858 case LMOD_NONE: ; /* Label cannot be part of declaration. */
859 float *pf = va_arg(arg, float *);
860 *pf = fres;
861 break;
862 case LMOD_l: ;
863 double *pd = va_arg(arg, double *);
864 *pd = dres;
865 break;
866 case LMOD_L: ;
867 long double *pld = va_arg(arg, long double *);
868 *pld = ldres;
869 break;
870 default:
871 assert(false);
872 }
873 ++converted_cnt;
874 }
875
876 converting = false;
877 ++fmt;
878 } else if (is_seq_conv(*fmt, &length_mod)) {
879 /* Character sequence conversion. */
880
881 /* Check sanity of optional parts of conversion specifier. */
882 if (length_mod != LMOD_NONE &&
883 length_mod != LMOD_l) {
884 /* Illegal format string. */
885 break;
886 }
887
888 if (length_mod == LMOD_l) {
889 /* Wide chars not supported. */
890 // TODO
891 not_implemented();
892 }
893
894 int term_size = 1; /* Size of the terminator (0 or 1)). */
895 if (*fmt == 'c') {
896 term_size = 0;
897 width = width == -1 ? 1 : width;
898 }
899
900 if (*fmt == 's') {
901 /* Skip white spaces. */
902 do {
903 c = in->pop(in);
904 } while (isspace(c));
905 } else {
906 /* Fetch a single character. */
907 c = in->pop(in);
908 }
909
910 /* Check whether there is still input to read. */
911 if (c == -1) {
912 /* Input failure. */
913 break;
914 }
915
916 /* Prepare scanset. */
917 char terminate_on[256];
918 for (int i = 0; i < 256; ++i) {
919 terminate_on[i] = 0;
920 }
921 if (*fmt == 'c') {
922 ++fmt;
923 } else if (*fmt == 's') {
924 terminate_on[' '] = 1;
925 terminate_on['\n'] = 1;
926 terminate_on['\t'] = 1;
927 terminate_on['\f'] = 1;
928 terminate_on['\r'] = 1;
929 terminate_on['\v'] = 1;
930 ++fmt;
931 } else {
932 assert(*fmt == '[');
933 bool not = false;
934 bool dash = false;
935 ++fmt;
936 /* Check for negation. */
937 if (*fmt == '^') {
938 not = true;
939 ++fmt;
940 }
941 /* Check for escape sequences. */
942 if (*fmt == '-' || *fmt == ']') {
943 terminate_on[(int) *fmt] = 1;
944 ++fmt;
945 }
946 /* Check for ordinary characters and ranges. */
947 while (*fmt != '\0' && *fmt != ']') {
948 if (dash) {
949 for (char chr = *(fmt - 2); chr <= *fmt; ++chr) {
950 terminate_on[(int) chr] = 1;
951 }
952 dash = false;
953 } else if (*fmt == '-') {
954 dash = true;
955 } else {
956 terminate_on[(int) *fmt] = 1;
957 }
958 ++fmt;
959 }
960 /* Check for escape sequence. */
961 if (dash == true) {
962 terminate_on['-'] = 1;
963 }
964 /* Check whether the specifier was correctly terminated.*/
965 if (*fmt == '\0') {
966 /* Illegal format string. */
967 break;
968 } else {
969 ++fmt;
970 }
971 /* Inverse the scanset if necessary. */
972 if (not == false) {
973 for (int i = 0; i < 256; ++i) {
974 terminate_on[i] = terminate_on[i] ? 0 : 1;
975 }
976 }
977 }
978
979 char * buf = NULL;
980 size_t buf_size = 0;
981 char * cur = NULL;
982 size_t alloc_step = 80; /* Buffer size gain during reallocation. */
983 int my_buffer_idx = 0;
984
985 /* Retrieve the buffer into which popped characters
986 * will be stored. */
987 if (!assign_supress) {
988 if (assign_alloc) {
989 /* We must allocate our own buffer. */
990 buf_size =
991 width == -1 ? alloc_step : (size_t) width + term_size;
992 buf = malloc(buf_size);
993 if (!buf) {
994 /* No memory. */
995 break;
996 }
997 my_buffer_idx = next_unused_buffer_idx;
998 ++next_unused_buffer_idx;
999 buffers[my_buffer_idx] = buf;
1000 cur = buf;
1001 } else {
1002 /* Caller provided its buffer. */
1003 buf = va_arg(arg, char *);
1004 cur = buf;
1005 buf_size =
1006 width == -1 ? SIZE_MAX : (size_t) width + term_size;
1007 }
1008 }
1009
1010 /* Match the string. The next character is already popped. */
1011 while ((width == -1 || width > 0) && c != -1 && !terminate_on[c]) {
1012
1013 /* Check whether the buffer is still sufficiently large. */
1014 if (!assign_supress) {
1015 /* Always reserve space for the null terminator. */
1016 if (cur == buf + buf_size - term_size) {
1017 /* Buffer size must be increased. */
1018 buf = realloc(buf, buf_size + alloc_step);
1019 if (buf) {
1020 buffers[my_buffer_idx] = buf;
1021 cur = buf + buf_size - term_size;
1022 buf_size += alloc_step;
1023 } else {
1024 /* Break just from this tight loop. Errno will
1025 * be checked after it. */
1026 break;
1027 }
1028 }
1029 /* Store the input character. */
1030 *cur = c;
1031 }
1032
1033 width = width == -1 ? -1 : width - 1;
1034 ++cur;
1035 c = in->pop(in);
1036 }
1037 if (errno == ENOMEM) {
1038 /* No memory. */
1039 break;
1040 }
1041 if (c != -1) {
1042 /* There is still more input, so undo the last pop. */
1043 in->undo(in);
1044 }
1045
1046 /* Check for failures. */
1047 if (cur == buf) {
1048 /* Matching failure. Input failure was already checked
1049 * earlier. */
1050 matching_failure = true;
1051 if (!assign_supress && assign_alloc) {
1052 /* Roll back. */
1053 free(buf);
1054 buffers[my_buffer_idx] = NULL;
1055 --next_unused_buffer_idx;
1056 }
1057 break;
1058 }
1059
1060 /* Store the terminator. */
1061 if (!assign_supress && term_size > 0) {
1062 /* Space for the terminator was reserved. */
1063 *cur = '\0';
1064 }
1065
1066 /* Store the result if not already stored. */
1067 if (!assign_supress) {
1068 if (assign_alloc) {
1069 char **pbuf = va_arg(arg, char **);
1070 *pbuf = buf;
1071 }
1072 ++converted_cnt;
1073 }
1074
1075 converting = false;
1076 /* Format string pointer already incremented. */
1077 } else if (*fmt == 'n') {
1078 /* Report the number of consumed bytes so far. */
1079
1080 /* Sanity check. */
1081 bool sane =
1082 width == -1 &&
1083 length_mod == LMOD_NONE &&
1084 assign_alloc == false &&
1085 assign_supress == false;
1086
1087 if (sane) {
1088 int *pi = va_arg(arg, int *);
1089 *pi = in->consumed;
1090 } else {
1091 /* Illegal format string. */
1092 break;
1093 }
1094
1095 /* This shall not be counted as conversion. */
1096 converting = false;
1097 ++fmt;
1098 } else {
1099 /* Illegal format string. */
1100 break;
1101 }
1102
1103 } else {
1104
1105 /* Processing outside conversion specifier. Either skip white
1106 * spaces or match characters one by one. If conversion specifier
1107 * is detected, switch to coversion mode. */
1108 if (isspace(*fmt)) {
1109 /* Skip white spaces in the format string. */
1110 while (isspace(*fmt)) {
1111 ++fmt;
1112 }
1113 /* Skip white spaces in the input. */
1114 do {
1115 c = in->pop(in);
1116 } while (isspace(c));
1117 if (c != -1) {
1118 /* Input is not at EOF, so undo the last pop operation. */
1119 in->undo(in);
1120 }
1121 } else if (*fmt == '%' && *(fmt + 1) != '%') {
1122 /* Conversion specifier detected. Switch modes. */
1123 converting = true;
1124 /* Reset the conversion context. */
1125 assign_supress = false;
1126 assign_alloc = false;
1127 width = -1;
1128 length_mod = LMOD_NONE;
1129 int_conv_unsigned = false;
1130 int_conv_base = 0;
1131 ++fmt;
1132 } else {
1133 /* One by one matching. */
1134 if (*fmt == '%') {
1135 /* Escape sequence detected. */
1136 ++fmt;
1137 assert(*fmt == '%');
1138 }
1139 c = in->pop(in);
1140 if (c == -1) {
1141 /* Input failure. */
1142 break;
1143 } else if (c != *fmt) {
1144 /* Matching failure. */
1145 in->undo(in);
1146 matching_failure = true;
1147 break;
1148 } else {
1149 ++fmt;
1150 }
1151 }
1152
1153 }
1154
1155 }
1156
1157 in->release(in);
1158
1159 /* This somewhat complicated return value decision is required by POSIX. */
1160 int rc;
1161 if (matching_failure) {
1162 rc = converted_cnt;
1163 } else {
1164 if (errno == EOK) {
1165 rc = converted_cnt > 0 ? converted_cnt : EOF;
1166 } else {
1167 rc = EOF;
1168 }
1169 }
1170 if (rc == EOF) {
1171 /* Caller will not know how many arguments were successfully converted,
1172 * so the deallocation of buffers is our responsibility. */
1173 for (int i = 0; i < next_unused_buffer_idx; ++i) {
1174 free(buffers[i]);
1175 buffers[i] = NULL;
1176 }
1177 next_unused_buffer_idx = 0;
1178 }
1179 return rc;
1180}
1181
1182/**
1183 * Convert formatted input from the stream.
1184 *
1185 * @param stream Input stream.
1186 * @param format Format description.
1187 * @param arg Output items.
1188 * @return The number of converted output items or EOF on failure.
1189 */
1190int posix_vfscanf(
1191 FILE *restrict stream, const char *restrict format, va_list arg)
1192{
1193 _input_provider provider = {
1194 { 0 }, 0, 0, NULL, 0, NULL, _PROV_CONSTRUCTED,
1195 _capture_stream, _pop_stream, _undo_stream,
1196 _borrow_cursor_universal, _return_cursor_stream, _release_stream
1197 };
1198 provider.source.stream = stream;
1199 return _internal_scanf(&provider, format, arg);
1200}
1201
1202/**
1203 * Convert formatted input from the string.
1204 *
1205 * @param s Input string.
1206 * @param format Format description.
1207 * @param arg Output items.
1208 * @return The number of converted output items or EOF on failure.
1209 */
1210int posix_vsscanf(
1211 const char *restrict s, const char *restrict format, va_list arg)
1212{
1213 _input_provider provider = {
1214 { 0 }, 0, 0, NULL, 0, NULL, _PROV_CONSTRUCTED,
1215 _capture_string, _pop_string, _undo_string,
1216 _borrow_cursor_universal, _return_cursor_string, _release_string
1217 };
1218 provider.source.string = s;
1219 return _internal_scanf(&provider, format, arg);
1220}
1221
1222// FIXME: put the testcases to the app/tester after scanf is included into libc
1223
1224#if 0
1225
1226//#include <stdio.h>
1227//#include <malloc.h>
1228//#include <string.h>
1229
1230#define test_val(fmt, exp_val, act_val) \
1231 if (exp_val == act_val) { \
1232 printf("succ, expected "fmt", actual "fmt"\n", exp_val, act_val); \
1233 } else { \
1234 printf("fail, expected "fmt", actual "fmt"\n", exp_val, act_val); \
1235 ++fail; \
1236 }
1237
1238#define test_str(fmt, exp_str, act_str) \
1239 if (posix_strcmp(exp_str, act_str) == 0) { \
1240 printf("succ, expected "fmt", actual "fmt"\n", exp_str, act_str); \
1241 } else { \
1242 printf("fail, expected "fmt", actual "fmt"\n", exp_str, act_str); \
1243 ++fail; \
1244 }
1245
1246void __posix_scanf_test(void);
1247void __posix_scanf_test(void)
1248{
1249 int fail = 0;
1250
1251 int ret;
1252
1253 unsigned char uhh;
1254 signed char shh;
1255 unsigned short uh;
1256 short sh;
1257 unsigned udef;
1258 int sdef;
1259 unsigned long ul;
1260 long sl;
1261 unsigned long long ull;
1262 long long sll;
1263 void *p;
1264
1265 float f;
1266 double d;
1267 long double ld;
1268
1269 char str[20];
1270 char seq[20];
1271 char scanset[20];
1272
1273 char *pstr;
1274 char *pseq;
1275 char *pscanset;
1276
1277 ret = posix_sscanf(
1278 "\n j tt % \t -121314 98765 aqw 0765 0x77 0xABCDEF88 -99 884",
1279 " j tt %%%3hhd%1hhu%3hd %3hu%u aqw%n %lo%llx %p %li %lld",
1280 &shh, &uhh, &sh, &uh, &udef, &sdef, &ul, &ull, &p, &sl, &sll);
1281 test_val("%d", -12, shh);
1282 test_val("%u", 1, uhh);
1283 test_val("%d", 314, sh);
1284 test_val("%u", 987, uh);
1285 test_val("%u", 65, udef);
1286 test_val("%d", 28, sdef);
1287 test_val("%lo", (unsigned long) 0765, ul);
1288 test_val("%llx", (unsigned long long) 0x77, ull);
1289 test_val("%p", (void *) 0xABCDEF88, p);
1290 test_val("%ld", (long) -99, sl);
1291 test_val("%lld", (long long) 884, sll);
1292 test_val("%d", 10, ret);
1293
1294 ret = posix_sscanf(
1295 "\n \t\t1.0 -0x555.AP10 1234.5678e12",
1296 "%f %lf %Lf",
1297 &f, &d, &ld);
1298 test_val("%f", 1.0, f);
1299 test_val("%lf", (double) -0x555.AP10, d);
1300 test_val("%Lf", (long double) 1234.5678e12, ld);
1301 test_val("%d", 3, ret);
1302
1303 ret = posix_sscanf(
1304 "\n\n\thello world \n",
1305 "%5s %ms",
1306 str, &pstr);
1307 test_str("%s", "hello", str);
1308 test_str("%s", "world", pstr);
1309 test_val("%d", 2, ret);
1310 free(pstr);
1311
1312 ret = posix_sscanf(
1313 "\n\n\thello world \n",
1314 " %5c %mc",
1315 seq, &pseq);
1316 seq[5] = '\0';
1317 pseq[1] = '\0';
1318 test_str("%s", "hello", seq);
1319 test_str("%s", "w", pseq);
1320 test_val("%d", 2, ret);
1321 free(pseq);
1322
1323 ret = posix_sscanf(
1324 "\n\n\th-e-l-l-o world-] \n",
1325 " %9[-eh-o] %m[^]-]",
1326 scanset, &pscanset);
1327 test_str("%s", "h-e-l-l-o", scanset);
1328 test_str("%s", "world", pscanset);
1329 test_val("%d", 2, ret);
1330 free(pscanset);
1331
1332 printf("Failed: %d\n", fail);
1333}
1334
1335#endif
1336
1337/** @}
1338 */
Note: See TracBrowser for help on using the repository browser.