source: mainline/uspace/lib/posix/src/fnmatch.c@ 3061bc1

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 3061bc1 was 1b20da0, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on non-empty lines, in certain file types.

Command used: tools/srepl '\([^[:space:]]\)\s\+$' '\1' -- *.c *.h *.py *.sh *.s *.S *.ag

  • Property mode set to 100644
File size: 19.0 KB
Line 
1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Filename-matching.
33 */
34
35/* This file contains an implementation of the fnmatch() pattern matching
36 * function. There is more code than necessary to account for the possibility
37 * of adding POSIX-like locale support to the system in the future. Functions
38 * that are only necessary for locale support currently simply use single
39 * characters for "collation elements".
40 * When (or if) locales are properly implemented, extending this implementation
41 * will be fairly straightforward.
42 */
43
44#include "libc/stdbool.h"
45#include "posix/ctype.h"
46#include "posix/string.h"
47#include "posix/stdlib.h"
48#include <assert.h>
49
50#include "internal/common.h"
51#include "posix/fnmatch.h"
52
53/* Returned by _match... functions. */
54#define INVALID_PATTERN -1
55
56/* Type for collating element, simple identity with characters now,
57 * but may be extended for better locale support.
58 */
59typedef int coll_elm_t;
60
61/** Return value indicating that the element in question
62 * is not valid in the current locale. (That is, if locales are supported.)
63 */
64#define COLL_ELM_INVALID -1
65
66/**
67 * Get collating element matching a string.
68 *
69 * @param str String representation of the element.
70 * @return Matching collating element or COLL_ELM_INVALID.
71 */
72static coll_elm_t _coll_elm_get(const char* str)
73{
74 if (str[0] == '\0' || str[1] != '\0') {
75 return COLL_ELM_INVALID;
76 }
77 return str[0];
78}
79
80/**
81 * Get collating element matching a single character.
82 *
83 * @param c Character representation of the element.
84 * @return Matching collating element.
85 */
86static coll_elm_t _coll_elm_char(int c)
87{
88 return c;
89}
90
91/**
92 * Match collating element with a beginning of a string.
93 *
94 * @param elm Collating element to match.
95 * @param str String which beginning should match the element.
96 * @return 0 if the element doesn't match, or the number of characters matched.
97 */
98static int _coll_elm_match(coll_elm_t elm, const char *str)
99{
100 return elm == *str;
101}
102
103/**
104 * Checks whether a string begins with a collating element in the given range.
105 * Ordering depends on the locale (if locales are supported).
106 *
107 * @param first First element of the range.
108 * @param second Last element of the range.
109 * @param str String to match.
110 * @return 0 if there is no match, or the number of characters matched.
111 */
112static int _coll_elm_between(coll_elm_t first, coll_elm_t second,
113 const char *str)
114{
115 return *str >= first && *str <= second;
116}
117
118/**
119 * Read a string delimited by [? and ?].
120 *
121 * @param pattern Pointer to the string to read from. Its position is moved
122 * to the first character after the closing ].
123 * @param seq The character on the inside of brackets.
124 * @param buf Read buffer.
125 * @param buf_sz Read buffer's size. If the buffer is not large enough for
126 * the entire string, the string is cut with no error indication.
127 * @param flags Flags modifying the behavior.
128 * @return True on success, false if the pattern is invalid.
129 */
130static bool _get_delimited(
131 const char **pattern, int seq,
132 char *buf, size_t buf_sz, int flags)
133{
134 const bool noescape = (flags & FNM_NOESCAPE) != 0;
135 const bool pathname = (flags & FNM_PATHNAME) != 0;
136
137 const char *p = *pattern;
138 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
139 p += 2;
140
141 while (true) {
142 if (*p == seq && *(p + 1) == ']') {
143 /* String properly ended, return. */
144 *pattern = p + 2;
145 *buf = '\0';
146 return true;
147 }
148 if (!noescape && *p == '\\') {
149 p++;
150 }
151 if (*p == '\0') {
152 /* String not ended properly, invalid pattern. */
153 return false;
154 }
155 if (pathname && *p == '/') {
156 /* Slash in a pathname pattern is invalid. */
157 return false;
158 }
159 if (buf_sz > 1) {
160 /* Only add to the buffer if there is space. */
161 *buf = *p;
162 buf++;
163 buf_sz--;
164 }
165 p++;
166 }
167}
168
169/************** CHARACTER CLASSES ****************/
170
171#define MAX_CLASS_OR_COLL_LEN 6
172
173struct _char_class {
174 const char *name;
175 int (*func) (int);
176};
177
178/* List of supported character classes. */
179static const struct _char_class _char_classes[] = {
180 { "alnum", isalnum },
181 { "alpha", isalpha },
182 { "blank", isblank },
183 { "cntrl", iscntrl },
184 { "digit", isdigit },
185 { "graph", isgraph },
186 { "lower", islower },
187 { "print", isprint },
188 { "punct", ispunct },
189 { "space", isspace },
190 { "upper", isupper },
191 { "xdigit", isxdigit }
192};
193
194/**
195 * Compare function for binary search in the _char_classes array.
196 *
197 * @param key Key of the searched element.
198 * @param elem Element of _char_classes array.
199 * @return Ordering indicator (-1 less than, 0 equal, 1 greater than).
200 */
201static int _class_compare(const void *key, const void *elem)
202{
203 const struct _char_class *class = elem;
204 return strcmp((const char *) key, class->name);
205}
206
207/**
208 * Returns whether the given character belongs to the specified character class.
209 *
210 * @param cname Name of the character class.
211 * @param c Character.
212 * @return True if the character belongs to the class, false otherwise.
213 */
214static bool _is_in_class (const char *cname, int c)
215{
216 /* Search for class in the array of supported character classes. */
217 const struct _char_class *class = bsearch(cname, _char_classes,
218 sizeof(_char_classes) / sizeof(struct _char_class),
219 sizeof(struct _char_class), _class_compare);
220
221 if (class == NULL) {
222 /* No such class supported - treat as an empty class. */
223 return false;
224 } else {
225 /* Class matched. */
226 return class->func(c);
227 }
228}
229
230/**
231 * Tries to parse an initial part of the pattern as a character class pattern,
232 * and if successful, matches the beginning of the given string against the class.
233 *
234 * @param pattern Pointer to the pattern to match. Must begin with a class
235 * specifier and is repositioned to the first character after the specifier
236 * if successful.
237 * @param str String to match.
238 * @param flags Flags modifying the behavior (see fnmatch()).
239 * @return INVALID_PATTERN if the pattern doesn't start with a valid class
240 * specifier, 0 if the beginning of the matched string doesn't belong
241 * to the class, or positive number of characters matched.
242 */
243static int _match_char_class(const char **pattern, const char *str, int flags)
244{
245 char class[MAX_CLASS_OR_COLL_LEN + 1];
246
247 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
248 return INVALID_PATTERN;
249 }
250
251 return _is_in_class(class, *str);
252}
253
254/************** END CHARACTER CLASSES ****************/
255
256/**
257 * Reads the next collating element in the pattern, taking into account
258 * locale (if supported) and flags (see fnmatch()).
259 *
260 * @param pattern Pattern.
261 * @param flags Flags given to fnmatch().
262 * @return Collating element on success,
263 * or COLL_ELM_INVALID if the pattern is invalid.
264 */
265static coll_elm_t _next_coll_elm(const char **pattern, int flags)
266{
267 assert(pattern != NULL);
268 assert(*pattern != NULL);
269 assert(**pattern != '\0');
270
271 const char *p = *pattern;
272 const bool noescape = (flags & FNM_NOESCAPE) != 0;
273 const bool pathname = (flags & FNM_PATHNAME) != 0;
274
275 if (*p == '[') {
276 if (*(p + 1) == '.') {
277 char buf[MAX_CLASS_OR_COLL_LEN + 1];
278 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
279 return COLL_ELM_INVALID;
280 }
281 return _coll_elm_get(buf);
282 }
283
284 if (*(p + 1) == '=') {
285 char buf[MAX_CLASS_OR_COLL_LEN + 1];
286 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
287 return COLL_ELM_INVALID;
288 }
289 return _coll_elm_get(buf);
290 }
291 }
292
293 if (!noescape && *p == '\\') {
294 p++;
295 if (*p == '\0') {
296 *pattern = p;
297 return COLL_ELM_INVALID;
298 }
299 }
300 if (pathname && *p == '/') {
301 return COLL_ELM_INVALID;
302 }
303
304 *pattern = p + 1;
305 return _coll_elm_char(*p);
306}
307
308/**
309 * Matches the beginning of the given string against a bracket expression
310 * the pattern begins with.
311 *
312 * @param pattern Pointer to the beginning of a bracket expression in a pattern.
313 * On success, the pointer is moved to the first character after the
314 * bracket expression.
315 * @param str Unmatched part of the string.
316 * @param flags Flags given to fnmatch().
317 * @return INVALID_PATTERN if the pattern is invalid, 0 if there is no match
318 * or the number of matched characters on success.
319 */
320static int _match_bracket_expr(const char **pattern, const char *str, int flags)
321{
322 const bool pathname = (flags & FNM_PATHNAME) != 0;
323 const bool special_period = (flags & FNM_PERIOD) != 0;
324 const char *p = *pattern;
325 bool negative = false;
326 int matched = 0;
327
328 #define _matched(match) { \
329 int _match = match; \
330 if (_match < 0) { \
331 /* Invalid pattern */ \
332 return _match; \
333 } else if (matched == 0 && _match > 0) { \
334 /* First match */ \
335 matched = _match; \
336 } \
337 }
338
339 assert(*p == '['); /* calling code should ensure this */
340 p++;
341
342 if (*str == '\0' || (pathname && *str == '/') ||
343 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
344 /* No bracket expression matches end of string,
345 * slash in pathname match or initial period with FNM_PERIOD
346 * option.
347 */
348 return 0;
349 }
350
351 if (*p == '^' || *p == '!') {
352 negative = true;
353 p++;
354 }
355
356 if (*p == ']') {
357 /* When ']' is first, treat it as a normal character. */
358 _matched(*str == ']');
359 p++;
360 }
361
362 coll_elm_t current_elm = COLL_ELM_INVALID;
363
364 while (*p != ']') {
365 if (*p == '-' && *(p + 1) != ']' &&
366 current_elm != COLL_ELM_INVALID) {
367 /* Range expression. */
368 p++;
369 coll_elm_t end_elm = _next_coll_elm(&p, flags);
370 if (end_elm == COLL_ELM_INVALID) {
371 return INVALID_PATTERN;
372 }
373 _matched(_coll_elm_between(current_elm, end_elm, str));
374 continue;
375 }
376
377 if (*p == '[' && *(p + 1) == ':') {
378 current_elm = COLL_ELM_INVALID;
379 _matched(_match_char_class(&p, str, flags));
380 continue;
381 }
382
383 current_elm = _next_coll_elm(&p, flags);
384 if (current_elm == COLL_ELM_INVALID) {
385 return INVALID_PATTERN;
386 }
387 _matched(_coll_elm_match(current_elm, str));
388 }
389
390 /* No error occured - update pattern pointer. */
391 *pattern = p + 1;
392
393 if (matched == 0) {
394 /* No match found */
395 return negative;
396 } else {
397 /* Matched 'match' characters. */
398 return negative ? 0 : matched;
399 }
400
401 #undef _matched
402}
403
404/**
405 * Matches a portion of the pattern containing no asterisks (*) against
406 * the given string.
407 *
408 * @param pattern Pointer to the unmatched portion of the pattern.
409 * On success, the pointer is moved to the first asterisk, or to the
410 * terminating nul character, whichever occurs first.
411 * @param string Pointer to the input string. On success, the pointer is moved
412 * to the first character that wasn't explicitly matched.
413 * @param flags Flags given to fnmatch().
414 * @return True if the entire subpattern matched. False otherwise.
415 */
416static bool _partial_match(const char **pattern, const char **string, int flags)
417{
418 /* Only a single *-delimited subpattern is matched here.
419 * So in this function, '*' is understood as the end of pattern.
420 */
421
422 const bool pathname = (flags & FNM_PATHNAME) != 0;
423 const bool special_period = (flags & FNM_PERIOD) != 0;
424 const bool noescape = (flags & FNM_NOESCAPE) != 0;
425 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
426
427 const char *s = *string;
428 const char *p = *pattern;
429
430 while (*p != '*') {
431 /* Bracket expression. */
432 if (*p == '[') {
433 int matched = _match_bracket_expr(&p, s, flags);
434 if (matched == 0) {
435 /* Doesn't match. */
436 return false;
437 }
438 if (matched != INVALID_PATTERN) {
439 s += matched;
440 continue;
441 }
442
443 assert(matched == INVALID_PATTERN);
444 /* Fall through to match [ as an ordinary character. */
445 }
446
447 /* Wildcard match. */
448 if (*p == '?') {
449 if (*s == '\0') {
450 /* No character to match. */
451 return false;
452 }
453 if (pathname && *s == '/') {
454 /* Slash must be matched explicitly. */
455 return false;
456 }
457 if (special_period && pathname &&
458 *s == '.' && *(s - 1) == '/') {
459 /* Initial period must be matched explicitly. */
460 return false;
461 }
462
463 /* None of the above, match anything else. */
464 p++;
465 s++;
466 continue;
467 }
468
469 if (!noescape && *p == '\\') {
470 /* Escaped character. */
471 p++;
472 }
473
474 if (*p == '\0') {
475 /* End of pattern, must match end of string or
476 * an end of subdirectory name (optional).
477 */
478
479 if (*s == '\0' || (leading_dir && *s == '/')) {
480 break;
481 }
482
483 return false;
484 }
485
486 if (*p == *s) {
487 /* Exact match. */
488 p++;
489 s++;
490 continue;
491 }
492
493 /* Nothing matched. */
494 return false;
495 }
496
497 /* Entire sub-pattern matched. */
498
499 /* postconditions */
500 assert(*p == '\0' || *p == '*');
501 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
502
503 *pattern = p;
504 *string = s;
505 return true;
506}
507
508/**
509 * Match string against a pattern.
510 *
511 * @param pattern Pattern.
512 * @param string String to match.
513 * @param flags Flags given to fnmatch().
514 * @return True if the string matched the pattern, false otherwise.
515 */
516static bool _full_match(const char *pattern, const char *string, int flags)
517{
518 const bool pathname = (flags & FNM_PATHNAME) != 0;
519 const bool special_period = (flags & FNM_PERIOD) != 0;
520 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
521
522 if (special_period && *string == '.') {
523 /* Initial dot must be matched by an explicit dot in pattern. */
524 if (*pattern != '.') {
525 return false;
526 }
527 pattern++;
528 string++;
529 }
530
531 if (*pattern != '*') {
532 if (!_partial_match(&pattern, &string, flags)) {
533 /* The initial match must succeed. */
534 return false;
535 }
536 }
537
538 while (*pattern != '\0') {
539 assert(*pattern == '*');
540 pattern++;
541
542 bool matched = false;
543
544 const char *end;
545 if (pathname && special_period &&
546 *string == '.' && *(string - 1) == '/') {
547 end = string;
548 } else {
549 end = gnu_strchrnul(string, pathname ? '/' : '\0');
550 }
551
552 /* Try to match every possible offset. */
553 while (string <= end) {
554 if (_partial_match(&pattern, &string, flags)) {
555 matched = true;
556 break;
557 }
558 string++;
559 }
560
561 if (matched) {
562 continue;
563 }
564
565 return false;
566 }
567
568 return *string == '\0' || (leading_dir && *string == '/');
569}
570
571/**
572 * Transform the entire string to lowercase.
573 *
574 * @param s Input string.
575 * @return Newly allocated copy of the input string with all uppercase
576 * characters folded to their lowercase variants.
577 */
578static char *_casefold(const char *s)
579{
580 assert(s != NULL);
581 char *result = strdup(s);
582 for (char *i = result; *i != '\0'; ++i) {
583 *i = tolower(*i);
584 }
585 return result;
586}
587
588/**
589 * Filename pattern matching.
590 *
591 * @param pattern Pattern to match the string against.
592 * @param string Matched string.
593 * @param flags Flags altering the matching of special characters
594 * (mainly for dot and slash).
595 * @return Zero if the string matches the pattern, FNM_NOMATCH otherwise.
596 */
597int fnmatch(const char *pattern, const char *string, int flags)
598{
599 assert(pattern != NULL);
600 assert(string != NULL);
601
602 // TODO: don't fold everything in advance, but only when needed
603
604 if ((flags & FNM_CASEFOLD) != 0) {
605 /* Just fold the entire pattern and string. */
606 pattern = _casefold(pattern);
607 string = _casefold(string);
608 }
609
610 bool result = _full_match(pattern, string, flags);
611
612 if ((flags & FNM_CASEFOLD) != 0) {
613 if (pattern) {
614 free((char *) pattern);
615 }
616 if (string) {
617 free((char *) string);
618 }
619 }
620
621 return result ? 0 : FNM_NOMATCH;
622}
623
624// FIXME: put the testcases to the app/tester after fnmatch is included into libc
625
626#if 0
627
628#include <stdio.h>
629
630void __posix_fnmatch_test()
631{
632 int fail = 0;
633
634 #undef assert
635 #define assert(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
636 #define match(s1, s2, flags) assert(fnmatch(s1, s2, flags) == 0)
637 #define nomatch(s1, s2, flags) assert(fnmatch(s1, s2, flags) == FNM_NOMATCH)
638
639 static_assert(FNM_PATHNAME == FNM_FILE_NAME);
640 match("", "", 0);
641 match("*", "hello", 0);
642 match("hello", "hello", 0);
643 match("hello*", "hello", 0);
644 nomatch("hello?", "hello", 0);
645 match("*hello", "prdel hello", 0);
646 match("he[sl]lo", "hello", 0);
647 match("he[sl]lo", "heslo", 0);
648 nomatch("he[sl]lo", "heblo", 0);
649 nomatch("he[^sl]lo", "hello", 0);
650 nomatch("he[^sl]lo", "heslo", 0);
651 match("he[^sl]lo", "heblo", 0);
652 nomatch("he[!sl]lo", "hello", 0);
653 nomatch("he[!sl]lo", "heslo", 0);
654 match("he[!sl]lo", "heblo", 0);
655 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
656 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
657 match("[a-ce-z]", "a", 0);
658 match("[a-ce-z]", "c", 0);
659 nomatch("[a-ce-z]", "d", 0);
660 match("[a-ce-z]", "e", 0);
661 match("[a-ce-z]", "z", 0);
662 nomatch("[^a-ce-z]", "a", 0);
663 nomatch("[^a-ce-z]", "c", 0);
664 match("[^a-ce-z]", "d", 0);
665 nomatch("[^a-ce-z]", "e", 0);
666 nomatch("[^a-ce-z]", "z", 0);
667 match("helen??", "helenos", 0);
668 match("****booo****", "booo", 0);
669
670 match("hello[[:space:]]world", "hello world", 0);
671 nomatch("hello[[:alpha:]]world", "hello world", 0);
672
673 match("/hoooo*", "/hooooooo/hooo", 0);
674 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
675 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
676 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
677 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
678 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
679 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
680 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
681 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
682
683 match("*", "hell", 0);
684 match("*?", "hell", 0);
685 match("?*?", "hell", 0);
686 match("?*??", "hell", 0);
687 match("??*??", "hell", 0);
688 nomatch("???*??", "hell", 0);
689
690 nomatch("", "hell", 0);
691 nomatch("?", "hell", 0);
692 nomatch("??", "hell", 0);
693 nomatch("???", "hell", 0);
694 match("????", "hell", 0);
695
696 match("*", "h.ello", FNM_PERIOD);
697 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
698 nomatch("*", ".hello", FNM_PERIOD);
699 match("h?ello", "h.ello", FNM_PERIOD);
700 nomatch("?hello", ".hello", FNM_PERIOD);
701 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
702 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
703 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
704
705 nomatch("HeLlO", "hello", 0);
706 match("HeLlO", "hello", FNM_CASEFOLD);
707
708 printf("Failed: %d\n", fail);
709}
710
711#endif
712
713/** @}
714 */
Note: See TracBrowser for help on using the repository browser.