source: mainline/uspace/lib/posix/src/fnmatch.c@ 02fe42e

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 02fe42e was d1582b50, checked in by Jiri Svoboda <jiri@…>, 5 years ago

Fix spacing in single-line comments using latest ccheck

This found incorrectly formatted section comments (with blocks of
asterisks or dashes). I strongly believe against using section comments
but I am not simply removing them since that would probably be
controversial.

  • Property mode set to 100644
File size: 18.9 KB
Line 
1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Filename-matching.
33 */
34
35/*
36 * This file contains an implementation of the fnmatch() pattern matching
37 * function. There is more code than necessary to account for the possibility
38 * of adding POSIX-like locale support to the system in the future. Functions
39 * that are only necessary for locale support currently simply use single
40 * characters for "collation elements".
41 * When (or if) locales are properly implemented, extending this implementation
42 * will be fairly straightforward.
43 */
44
45#include <stdbool.h>
46#include <ctype.h>
47#include <string.h>
48#include <stdlib.h>
49#include <assert.h>
50
51#include "internal/common.h"
52#include <fnmatch.h>
53
54/* Returned by _match... functions. */
55#define INVALID_PATTERN -1
56
57/*
58 * Type for collating element, simple identity with characters now,
59 * but may be extended for better locale support.
60 */
61typedef int coll_elm_t;
62
63/** Return value indicating that the element in question
64 * is not valid in the current locale. (That is, if locales are supported.)
65 */
66#define COLL_ELM_INVALID -1
67
68/**
69 * Get collating element matching a string.
70 *
71 * @param str String representation of the element.
72 * @return Matching collating element or COLL_ELM_INVALID.
73 */
74static coll_elm_t _coll_elm_get(const char *str)
75{
76 if (str[0] == '\0' || str[1] != '\0') {
77 return COLL_ELM_INVALID;
78 }
79 return str[0];
80}
81
82/**
83 * Get collating element matching a single character.
84 *
85 * @param c Character representation of the element.
86 * @return Matching collating element.
87 */
88static coll_elm_t _coll_elm_char(int c)
89{
90 return c;
91}
92
93/**
94 * Match collating element with a beginning of a string.
95 *
96 * @param elm Collating element to match.
97 * @param str String which beginning should match the element.
98 * @return 0 if the element doesn't match, or the number of characters matched.
99 */
100static int _coll_elm_match(coll_elm_t elm, const char *str)
101{
102 return elm == *str;
103}
104
105/**
106 * Checks whether a string begins with a collating element in the given range.
107 * Ordering depends on the locale (if locales are supported).
108 *
109 * @param first First element of the range.
110 * @param second Last element of the range.
111 * @param str String to match.
112 * @return 0 if there is no match, or the number of characters matched.
113 */
114static int _coll_elm_between(coll_elm_t first, coll_elm_t second,
115 const char *str)
116{
117 return *str >= first && *str <= second;
118}
119
120/**
121 * Read a string delimited by [? and ?].
122 *
123 * @param pattern Pointer to the string to read from. Its position is moved
124 * to the first character after the closing ].
125 * @param seq The character on the inside of brackets.
126 * @param buf Read buffer.
127 * @param buf_sz Read buffer's size. If the buffer is not large enough for
128 * the entire string, the string is cut with no error indication.
129 * @param flags Flags modifying the behavior.
130 * @return True on success, false if the pattern is invalid.
131 */
132static bool _get_delimited(
133 const char **pattern, int seq,
134 char *buf, size_t buf_sz, int flags)
135{
136 const bool noescape = (flags & FNM_NOESCAPE) != 0;
137 const bool pathname = (flags & FNM_PATHNAME) != 0;
138
139 const char *p = *pattern;
140 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
141 p += 2;
142
143 while (true) {
144 if (*p == seq && *(p + 1) == ']') {
145 /* String properly ended, return. */
146 *pattern = p + 2;
147 *buf = '\0';
148 return true;
149 }
150 if (!noescape && *p == '\\') {
151 p++;
152 }
153 if (*p == '\0') {
154 /* String not ended properly, invalid pattern. */
155 return false;
156 }
157 if (pathname && *p == '/') {
158 /* Slash in a pathname pattern is invalid. */
159 return false;
160 }
161 if (buf_sz > 1) {
162 /* Only add to the buffer if there is space. */
163 *buf = *p;
164 buf++;
165 buf_sz--;
166 }
167 p++;
168 }
169}
170
171/*
172 * CHARACTER CLASSES
173 */
174
175#define MAX_CLASS_OR_COLL_LEN 6
176
177struct _char_class {
178 const char *name;
179 int (*func) (int);
180};
181
182/* List of supported character classes. */
183static const struct _char_class _char_classes[] = {
184 { "alnum", isalnum },
185 { "alpha", isalpha },
186 { "blank", isblank },
187 { "cntrl", iscntrl },
188 { "digit", isdigit },
189 { "graph", isgraph },
190 { "lower", islower },
191 { "print", isprint },
192 { "punct", ispunct },
193 { "space", isspace },
194 { "upper", isupper },
195 { "xdigit", isxdigit }
196};
197
198/**
199 * Compare function for binary search in the _char_classes array.
200 *
201 * @param key Key of the searched element.
202 * @param elem Element of _char_classes array.
203 * @return Ordering indicator (-1 less than, 0 equal, 1 greater than).
204 */
205static int _class_compare(const void *key, const void *elem)
206{
207 const struct _char_class *class = elem;
208 return strcmp((const char *) key, class->name);
209}
210
211/**
212 * Returns whether the given character belongs to the specified character class.
213 *
214 * @param cname Name of the character class.
215 * @param c Character.
216 * @return True if the character belongs to the class, false otherwise.
217 */
218static bool _is_in_class (const char *cname, int c)
219{
220 /* Search for class in the array of supported character classes. */
221 const struct _char_class *class = bsearch(cname, _char_classes,
222 sizeof(_char_classes) / sizeof(struct _char_class),
223 sizeof(struct _char_class), _class_compare);
224
225 if (class == NULL) {
226 /* No such class supported - treat as an empty class. */
227 return false;
228 } else {
229 /* Class matched. */
230 return class->func(c);
231 }
232}
233
234/**
235 * Tries to parse an initial part of the pattern as a character class pattern,
236 * and if successful, matches the beginning of the given string against the class.
237 *
238 * @param pattern Pointer to the pattern to match. Must begin with a class
239 * specifier and is repositioned to the first character after the specifier
240 * if successful.
241 * @param str String to match.
242 * @param flags Flags modifying the behavior (see fnmatch()).
243 * @return INVALID_PATTERN if the pattern doesn't start with a valid class
244 * specifier, 0 if the beginning of the matched string doesn't belong
245 * to the class, or positive number of characters matched.
246 */
247static int _match_char_class(const char **pattern, const char *str, int flags)
248{
249 char class[MAX_CLASS_OR_COLL_LEN + 1];
250
251 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
252 return INVALID_PATTERN;
253 }
254
255 return _is_in_class(class, *str);
256}
257
258/*
259 * END CHARACTER CLASSES
260 */
261
262/**
263 * Reads the next collating element in the pattern, taking into account
264 * locale (if supported) and flags (see fnmatch()).
265 *
266 * @param pattern Pattern.
267 * @param flags Flags given to fnmatch().
268 * @return Collating element on success,
269 * or COLL_ELM_INVALID if the pattern is invalid.
270 */
271static coll_elm_t _next_coll_elm(const char **pattern, int flags)
272{
273 assert(pattern != NULL);
274 assert(*pattern != NULL);
275 assert(**pattern != '\0');
276
277 const char *p = *pattern;
278 const bool noescape = (flags & FNM_NOESCAPE) != 0;
279 const bool pathname = (flags & FNM_PATHNAME) != 0;
280
281 if (*p == '[') {
282 if (*(p + 1) == '.') {
283 char buf[MAX_CLASS_OR_COLL_LEN + 1];
284 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
285 return COLL_ELM_INVALID;
286 }
287 return _coll_elm_get(buf);
288 }
289
290 if (*(p + 1) == '=') {
291 char buf[MAX_CLASS_OR_COLL_LEN + 1];
292 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
293 return COLL_ELM_INVALID;
294 }
295 return _coll_elm_get(buf);
296 }
297 }
298
299 if (!noescape && *p == '\\') {
300 p++;
301 if (*p == '\0') {
302 *pattern = p;
303 return COLL_ELM_INVALID;
304 }
305 }
306 if (pathname && *p == '/') {
307 return COLL_ELM_INVALID;
308 }
309
310 *pattern = p + 1;
311 return _coll_elm_char(*p);
312}
313
314#define _matched(match) { \
315 int _match = match; \
316 if (_match < 0) { \
317 /* Invalid pattern */ \
318 return _match; \
319 } else if (matched == 0 && _match > 0) { \
320 /* First match */ \
321 matched = _match; \
322 } \
323 }
324
325/**
326 * Matches the beginning of the given string against a bracket expression
327 * the pattern begins with.
328 *
329 * @param pattern Pointer to the beginning of a bracket expression in a pattern.
330 * On success, the pointer is moved to the first character after the
331 * bracket expression.
332 * @param str Unmatched part of the string.
333 * @param flags Flags given to fnmatch().
334 * @return INVALID_PATTERN if the pattern is invalid, 0 if there is no match
335 * or the number of matched characters on success.
336 */
337static int _match_bracket_expr(const char **pattern, const char *str, int flags)
338{
339 const bool pathname = (flags & FNM_PATHNAME) != 0;
340 const bool special_period = (flags & FNM_PERIOD) != 0;
341 const char *p = *pattern;
342 bool negative = false;
343 int matched = 0;
344
345 assert(*p == '['); /* calling code should ensure this */
346 p++;
347
348 if (*str == '\0' || (pathname && *str == '/') ||
349 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
350 /*
351 * No bracket expression matches end of string,
352 * slash in pathname match or initial period with FNM_PERIOD
353 * option.
354 */
355 return 0;
356 }
357
358 if (*p == '^' || *p == '!') {
359 negative = true;
360 p++;
361 }
362
363 if (*p == ']') {
364 /* When ']' is first, treat it as a normal character. */
365 _matched(*str == ']');
366 p++;
367 }
368
369 coll_elm_t current_elm = COLL_ELM_INVALID;
370
371 while (*p != ']') {
372 if (*p == '-' && *(p + 1) != ']' &&
373 current_elm != COLL_ELM_INVALID) {
374 /* Range expression. */
375 p++;
376 coll_elm_t end_elm = _next_coll_elm(&p, flags);
377 if (end_elm == COLL_ELM_INVALID) {
378 return INVALID_PATTERN;
379 }
380 _matched(_coll_elm_between(current_elm, end_elm, str));
381 continue;
382 }
383
384 if (*p == '[' && *(p + 1) == ':') {
385 current_elm = COLL_ELM_INVALID;
386 _matched(_match_char_class(&p, str, flags));
387 continue;
388 }
389
390 current_elm = _next_coll_elm(&p, flags);
391 if (current_elm == COLL_ELM_INVALID) {
392 return INVALID_PATTERN;
393 }
394 _matched(_coll_elm_match(current_elm, str));
395 }
396
397 /* No error occured - update pattern pointer. */
398 *pattern = p + 1;
399
400 if (matched == 0) {
401 /* No match found */
402 return negative;
403 } else {
404 /* Matched 'match' characters. */
405 return negative ? 0 : matched;
406 }
407}
408
409/**
410 * Matches a portion of the pattern containing no asterisks (*) against
411 * the given string.
412 *
413 * @param pattern Pointer to the unmatched portion of the pattern.
414 * On success, the pointer is moved to the first asterisk, or to the
415 * terminating nul character, whichever occurs first.
416 * @param string Pointer to the input string. On success, the pointer is moved
417 * to the first character that wasn't explicitly matched.
418 * @param flags Flags given to fnmatch().
419 * @return True if the entire subpattern matched. False otherwise.
420 */
421static bool _partial_match(const char **pattern, const char **string, int flags)
422{
423 /*
424 * Only a single *-delimited subpattern is matched here.
425 * So in this function, '*' is understood as the end of pattern.
426 */
427
428 const bool pathname = (flags & FNM_PATHNAME) != 0;
429 const bool special_period = (flags & FNM_PERIOD) != 0;
430 const bool noescape = (flags & FNM_NOESCAPE) != 0;
431 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
432
433 const char *s = *string;
434 const char *p = *pattern;
435
436 while (*p != '*') {
437 /* Bracket expression. */
438 if (*p == '[') {
439 int matched = _match_bracket_expr(&p, s, flags);
440 if (matched == 0) {
441 /* Doesn't match. */
442 return false;
443 }
444 if (matched != INVALID_PATTERN) {
445 s += matched;
446 continue;
447 }
448
449 assert(matched == INVALID_PATTERN);
450 /* Fall through to match [ as an ordinary character. */
451 }
452
453 /* Wildcard match. */
454 if (*p == '?') {
455 if (*s == '\0') {
456 /* No character to match. */
457 return false;
458 }
459 if (pathname && *s == '/') {
460 /* Slash must be matched explicitly. */
461 return false;
462 }
463 if (special_period && pathname &&
464 *s == '.' && *(s - 1) == '/') {
465 /* Initial period must be matched explicitly. */
466 return false;
467 }
468
469 /* None of the above, match anything else. */
470 p++;
471 s++;
472 continue;
473 }
474
475 if (!noescape && *p == '\\') {
476 /* Escaped character. */
477 p++;
478 }
479
480 if (*p == '\0') {
481 /*
482 * End of pattern, must match end of string or
483 * an end of subdirectory name (optional).
484 */
485
486 if (*s == '\0' || (leading_dir && *s == '/')) {
487 break;
488 }
489
490 return false;
491 }
492
493 if (*p == *s) {
494 /* Exact match. */
495 p++;
496 s++;
497 continue;
498 }
499
500 /* Nothing matched. */
501 return false;
502 }
503
504 /* Entire sub-pattern matched. */
505
506 /* postconditions */
507 assert(*p == '\0' || *p == '*');
508 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
509
510 *pattern = p;
511 *string = s;
512 return true;
513}
514
515/**
516 * Match string against a pattern.
517 *
518 * @param pattern Pattern.
519 * @param string String to match.
520 * @param flags Flags given to fnmatch().
521 * @return True if the string matched the pattern, false otherwise.
522 */
523static bool _full_match(const char *pattern, const char *string, int flags)
524{
525 const bool pathname = (flags & FNM_PATHNAME) != 0;
526 const bool special_period = (flags & FNM_PERIOD) != 0;
527 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
528
529 if (special_period && *string == '.') {
530 /* Initial dot must be matched by an explicit dot in pattern. */
531 if (*pattern != '.') {
532 return false;
533 }
534 pattern++;
535 string++;
536 }
537
538 if (*pattern != '*') {
539 if (!_partial_match(&pattern, &string, flags)) {
540 /* The initial match must succeed. */
541 return false;
542 }
543 }
544
545 while (*pattern != '\0') {
546 assert(*pattern == '*');
547 pattern++;
548
549 bool matched = false;
550
551 const char *end;
552 if (pathname && special_period &&
553 *string == '.' && *(string - 1) == '/') {
554 end = string;
555 } else {
556 end = gnu_strchrnul(string, pathname ? '/' : '\0');
557 }
558
559 /* Try to match every possible offset. */
560 while (string <= end) {
561 if (_partial_match(&pattern, &string, flags)) {
562 matched = true;
563 break;
564 }
565 string++;
566 }
567
568 if (matched) {
569 continue;
570 }
571
572 return false;
573 }
574
575 return *string == '\0' || (leading_dir && *string == '/');
576}
577
578/**
579 * Transform the entire string to lowercase.
580 *
581 * @param s Input string.
582 * @return Newly allocated copy of the input string with all uppercase
583 * characters folded to their lowercase variants.
584 */
585static char *_casefold(const char *s)
586{
587 assert(s != NULL);
588 char *result = strdup(s);
589 for (char *i = result; *i != '\0'; ++i) {
590 *i = tolower(*i);
591 }
592 return result;
593}
594
595/**
596 * Filename pattern matching.
597 *
598 * @param pattern Pattern to match the string against.
599 * @param string Matched string.
600 * @param flags Flags altering the matching of special characters
601 * (mainly for dot and slash).
602 * @return Zero if the string matches the pattern, FNM_NOMATCH otherwise.
603 */
604int fnmatch(const char *pattern, const char *string, int flags)
605{
606 assert(pattern != NULL);
607 assert(string != NULL);
608
609 // TODO: don't fold everything in advance, but only when needed
610
611 if ((flags & FNM_CASEFOLD) != 0) {
612 /* Just fold the entire pattern and string. */
613 pattern = _casefold(pattern);
614 string = _casefold(string);
615 }
616
617 bool result = _full_match(pattern, string, flags);
618
619 if ((flags & FNM_CASEFOLD) != 0) {
620 if (pattern) {
621 free((char *) pattern);
622 }
623 if (string) {
624 free((char *) string);
625 }
626 }
627
628 return result ? 0 : FNM_NOMATCH;
629}
630
631// FIXME: put the testcases to the app/tester after fnmatch is included into libc
632
633#if 0
634
635#include <stdio.h>
636
637#define fnmatch_test(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
638#define match(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == 0)
639#define nomatch(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == FNM_NOMATCH)
640
641void __posix_fnmatch_test()
642{
643 int fail = 0;
644
645 static_assert(FNM_PATHNAME == FNM_FILE_NAME);
646 match("", "", 0);
647 match("*", "hello", 0);
648 match("hello", "hello", 0);
649 match("hello*", "hello", 0);
650 nomatch("hello?", "hello", 0);
651 match("*hello", "prdel hello", 0);
652 match("he[sl]lo", "hello", 0);
653 match("he[sl]lo", "heslo", 0);
654 nomatch("he[sl]lo", "heblo", 0);
655 nomatch("he[^sl]lo", "hello", 0);
656 nomatch("he[^sl]lo", "heslo", 0);
657 match("he[^sl]lo", "heblo", 0);
658 nomatch("he[!sl]lo", "hello", 0);
659 nomatch("he[!sl]lo", "heslo", 0);
660 match("he[!sl]lo", "heblo", 0);
661 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
662 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
663 match("[a-ce-z]", "a", 0);
664 match("[a-ce-z]", "c", 0);
665 nomatch("[a-ce-z]", "d", 0);
666 match("[a-ce-z]", "e", 0);
667 match("[a-ce-z]", "z", 0);
668 nomatch("[^a-ce-z]", "a", 0);
669 nomatch("[^a-ce-z]", "c", 0);
670 match("[^a-ce-z]", "d", 0);
671 nomatch("[^a-ce-z]", "e", 0);
672 nomatch("[^a-ce-z]", "z", 0);
673 match("helen??", "helenos", 0);
674 match("****booo****", "booo", 0);
675
676 match("hello[[:space:]]world", "hello world", 0);
677 nomatch("hello[[:alpha:]]world", "hello world", 0);
678
679 match("/hoooo*", "/hooooooo/hooo", 0);
680 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
681 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
682 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
683 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
684 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
685 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
686 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
687 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
688
689 match("*", "hell", 0);
690 match("*?", "hell", 0);
691 match("?*?", "hell", 0);
692 match("?*??", "hell", 0);
693 match("??*??", "hell", 0);
694 nomatch("???*??", "hell", 0);
695
696 nomatch("", "hell", 0);
697 nomatch("?", "hell", 0);
698 nomatch("??", "hell", 0);
699 nomatch("???", "hell", 0);
700 match("????", "hell", 0);
701
702 match("*", "h.ello", FNM_PERIOD);
703 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
704 nomatch("*", ".hello", FNM_PERIOD);
705 match("h?ello", "h.ello", FNM_PERIOD);
706 nomatch("?hello", ".hello", FNM_PERIOD);
707 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
708 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
709 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
710
711 nomatch("HeLlO", "hello", 0);
712 match("HeLlO", "hello", FNM_CASEFOLD);
713
714 printf("Failed: %d\n", fail);
715}
716
717#endif
718
719/** @}
720 */
Note: See TracBrowser for help on using the repository browser.