source: mainline/uspace/lib/posix/src/fnmatch.c@ a35b458

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since a35b458 was a35b458, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on _all_ lines, including empty ones, for particular file types.

Command used: tools/srepl '\s\+$' '' -- *.c *.h *.py *.sh *.s *.S *.ag

Currently, whitespace on empty lines is very inconsistent.
There are two basic choices: Either remove the whitespace, or keep empty lines
indented to the level of surrounding code. The former is AFAICT more common,
and also much easier to do automatically.

Alternatively, we could write script for automatic indentation, and use that
instead. However, if such a script exists, it's possible to use the indented
style locally, by having the editor apply relevant conversions on load/save,
without affecting remote repository. IMO, it makes more sense to adopt
the simpler rule.

  • Property mode set to 100644
File size: 18.9 KB
Line 
1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Filename-matching.
33 */
34
35/* This file contains an implementation of the fnmatch() pattern matching
36 * function. There is more code than necessary to account for the possibility
37 * of adding POSIX-like locale support to the system in the future. Functions
38 * that are only necessary for locale support currently simply use single
39 * characters for "collation elements".
40 * When (or if) locales are properly implemented, extending this implementation
41 * will be fairly straightforward.
42 */
43
44#include "libc/stdbool.h"
45#include "posix/ctype.h"
46#include "posix/string.h"
47#include "posix/stdlib.h"
48#include <assert.h>
49
50#include "internal/common.h"
51#include "posix/fnmatch.h"
52
53/* Returned by _match... functions. */
54#define INVALID_PATTERN -1
55
56/* Type for collating element, simple identity with characters now,
57 * but may be extended for better locale support.
58 */
59typedef int coll_elm_t;
60
61/** Return value indicating that the element in question
62 * is not valid in the current locale. (That is, if locales are supported.)
63 */
64#define COLL_ELM_INVALID -1
65
66/**
67 * Get collating element matching a string.
68 *
69 * @param str String representation of the element.
70 * @return Matching collating element or COLL_ELM_INVALID.
71 */
72static coll_elm_t _coll_elm_get(const char* str)
73{
74 if (str[0] == '\0' || str[1] != '\0') {
75 return COLL_ELM_INVALID;
76 }
77 return str[0];
78}
79
80/**
81 * Get collating element matching a single character.
82 *
83 * @param c Character representation of the element.
84 * @return Matching collating element.
85 */
86static coll_elm_t _coll_elm_char(int c)
87{
88 return c;
89}
90
91/**
92 * Match collating element with a beginning of a string.
93 *
94 * @param elm Collating element to match.
95 * @param str String which beginning should match the element.
96 * @return 0 if the element doesn't match, or the number of characters matched.
97 */
98static int _coll_elm_match(coll_elm_t elm, const char *str)
99{
100 return elm == *str;
101}
102
103/**
104 * Checks whether a string begins with a collating element in the given range.
105 * Ordering depends on the locale (if locales are supported).
106 *
107 * @param first First element of the range.
108 * @param second Last element of the range.
109 * @param str String to match.
110 * @return 0 if there is no match, or the number of characters matched.
111 */
112static int _coll_elm_between(coll_elm_t first, coll_elm_t second,
113 const char *str)
114{
115 return *str >= first && *str <= second;
116}
117
118/**
119 * Read a string delimited by [? and ?].
120 *
121 * @param pattern Pointer to the string to read from. Its position is moved
122 * to the first character after the closing ].
123 * @param seq The character on the inside of brackets.
124 * @param buf Read buffer.
125 * @param buf_sz Read buffer's size. If the buffer is not large enough for
126 * the entire string, the string is cut with no error indication.
127 * @param flags Flags modifying the behavior.
128 * @return True on success, false if the pattern is invalid.
129 */
130static bool _get_delimited(
131 const char **pattern, int seq,
132 char *buf, size_t buf_sz, int flags)
133{
134 const bool noescape = (flags & FNM_NOESCAPE) != 0;
135 const bool pathname = (flags & FNM_PATHNAME) != 0;
136
137 const char *p = *pattern;
138 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
139 p += 2;
140
141 while (true) {
142 if (*p == seq && *(p + 1) == ']') {
143 /* String properly ended, return. */
144 *pattern = p + 2;
145 *buf = '\0';
146 return true;
147 }
148 if (!noescape && *p == '\\') {
149 p++;
150 }
151 if (*p == '\0') {
152 /* String not ended properly, invalid pattern. */
153 return false;
154 }
155 if (pathname && *p == '/') {
156 /* Slash in a pathname pattern is invalid. */
157 return false;
158 }
159 if (buf_sz > 1) {
160 /* Only add to the buffer if there is space. */
161 *buf = *p;
162 buf++;
163 buf_sz--;
164 }
165 p++;
166 }
167}
168
169/************** CHARACTER CLASSES ****************/
170
171#define MAX_CLASS_OR_COLL_LEN 6
172
173struct _char_class {
174 const char *name;
175 int (*func) (int);
176};
177
178/* List of supported character classes. */
179static const struct _char_class _char_classes[] = {
180 { "alnum", isalnum },
181 { "alpha", isalpha },
182 { "blank", isblank },
183 { "cntrl", iscntrl },
184 { "digit", isdigit },
185 { "graph", isgraph },
186 { "lower", islower },
187 { "print", isprint },
188 { "punct", ispunct },
189 { "space", isspace },
190 { "upper", isupper },
191 { "xdigit", isxdigit }
192};
193
194/**
195 * Compare function for binary search in the _char_classes array.
196 *
197 * @param key Key of the searched element.
198 * @param elem Element of _char_classes array.
199 * @return Ordering indicator (-1 less than, 0 equal, 1 greater than).
200 */
201static int _class_compare(const void *key, const void *elem)
202{
203 const struct _char_class *class = elem;
204 return strcmp((const char *) key, class->name);
205}
206
207/**
208 * Returns whether the given character belongs to the specified character class.
209 *
210 * @param cname Name of the character class.
211 * @param c Character.
212 * @return True if the character belongs to the class, false otherwise.
213 */
214static bool _is_in_class (const char *cname, int c)
215{
216 /* Search for class in the array of supported character classes. */
217 const struct _char_class *class = bsearch(cname, _char_classes,
218 sizeof(_char_classes) / sizeof(struct _char_class),
219 sizeof(struct _char_class), _class_compare);
220
221 if (class == NULL) {
222 /* No such class supported - treat as an empty class. */
223 return false;
224 } else {
225 /* Class matched. */
226 return class->func(c);
227 }
228}
229
230/**
231 * Tries to parse an initial part of the pattern as a character class pattern,
232 * and if successful, matches the beginning of the given string against the class.
233 *
234 * @param pattern Pointer to the pattern to match. Must begin with a class
235 * specifier and is repositioned to the first character after the specifier
236 * if successful.
237 * @param str String to match.
238 * @param flags Flags modifying the behavior (see fnmatch()).
239 * @return INVALID_PATTERN if the pattern doesn't start with a valid class
240 * specifier, 0 if the beginning of the matched string doesn't belong
241 * to the class, or positive number of characters matched.
242 */
243static int _match_char_class(const char **pattern, const char *str, int flags)
244{
245 char class[MAX_CLASS_OR_COLL_LEN + 1];
246
247 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
248 return INVALID_PATTERN;
249 }
250
251 return _is_in_class(class, *str);
252}
253
254/************** END CHARACTER CLASSES ****************/
255
256/**
257 * Reads the next collating element in the pattern, taking into account
258 * locale (if supported) and flags (see fnmatch()).
259 *
260 * @param pattern Pattern.
261 * @param flags Flags given to fnmatch().
262 * @return Collating element on success,
263 * or COLL_ELM_INVALID if the pattern is invalid.
264 */
265static coll_elm_t _next_coll_elm(const char **pattern, int flags)
266{
267 assert(pattern != NULL);
268 assert(*pattern != NULL);
269 assert(**pattern != '\0');
270
271 const char *p = *pattern;
272 const bool noescape = (flags & FNM_NOESCAPE) != 0;
273 const bool pathname = (flags & FNM_PATHNAME) != 0;
274
275 if (*p == '[') {
276 if (*(p + 1) == '.') {
277 char buf[MAX_CLASS_OR_COLL_LEN + 1];
278 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
279 return COLL_ELM_INVALID;
280 }
281 return _coll_elm_get(buf);
282 }
283
284 if (*(p + 1) == '=') {
285 char buf[MAX_CLASS_OR_COLL_LEN + 1];
286 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
287 return COLL_ELM_INVALID;
288 }
289 return _coll_elm_get(buf);
290 }
291 }
292
293 if (!noescape && *p == '\\') {
294 p++;
295 if (*p == '\0') {
296 *pattern = p;
297 return COLL_ELM_INVALID;
298 }
299 }
300 if (pathname && *p == '/') {
301 return COLL_ELM_INVALID;
302 }
303
304 *pattern = p + 1;
305 return _coll_elm_char(*p);
306}
307
308/**
309 * Matches the beginning of the given string against a bracket expression
310 * the pattern begins with.
311 *
312 * @param pattern Pointer to the beginning of a bracket expression in a pattern.
313 * On success, the pointer is moved to the first character after the
314 * bracket expression.
315 * @param str Unmatched part of the string.
316 * @param flags Flags given to fnmatch().
317 * @return INVALID_PATTERN if the pattern is invalid, 0 if there is no match
318 * or the number of matched characters on success.
319 */
320static int _match_bracket_expr(const char **pattern, const char *str, int flags)
321{
322 const bool pathname = (flags & FNM_PATHNAME) != 0;
323 const bool special_period = (flags & FNM_PERIOD) != 0;
324 const char *p = *pattern;
325 bool negative = false;
326 int matched = 0;
327
328 #define _matched(match) { \
329 int _match = match; \
330 if (_match < 0) { \
331 /* Invalid pattern */ \
332 return _match; \
333 } else if (matched == 0 && _match > 0) { \
334 /* First match */ \
335 matched = _match; \
336 } \
337 }
338
339 assert(*p == '['); /* calling code should ensure this */
340 p++;
341
342 if (*str == '\0' || (pathname && *str == '/') ||
343 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
344 /* No bracket expression matches end of string,
345 * slash in pathname match or initial period with FNM_PERIOD
346 * option.
347 */
348 return 0;
349 }
350
351 if (*p == '^' || *p == '!') {
352 negative = true;
353 p++;
354 }
355
356 if (*p == ']') {
357 /* When ']' is first, treat it as a normal character. */
358 _matched(*str == ']');
359 p++;
360 }
361
362 coll_elm_t current_elm = COLL_ELM_INVALID;
363
364 while (*p != ']') {
365 if (*p == '-' && *(p + 1) != ']' &&
366 current_elm != COLL_ELM_INVALID) {
367 /* Range expression. */
368 p++;
369 coll_elm_t end_elm = _next_coll_elm(&p, flags);
370 if (end_elm == COLL_ELM_INVALID) {
371 return INVALID_PATTERN;
372 }
373 _matched(_coll_elm_between(current_elm, end_elm, str));
374 continue;
375 }
376
377 if (*p == '[' && *(p + 1) == ':') {
378 current_elm = COLL_ELM_INVALID;
379 _matched(_match_char_class(&p, str, flags));
380 continue;
381 }
382
383 current_elm = _next_coll_elm(&p, flags);
384 if (current_elm == COLL_ELM_INVALID) {
385 return INVALID_PATTERN;
386 }
387 _matched(_coll_elm_match(current_elm, str));
388 }
389
390 /* No error occured - update pattern pointer. */
391 *pattern = p + 1;
392
393 if (matched == 0) {
394 /* No match found */
395 return negative;
396 } else {
397 /* Matched 'match' characters. */
398 return negative ? 0 : matched;
399 }
400
401 #undef _matched
402}
403
404/**
405 * Matches a portion of the pattern containing no asterisks (*) against
406 * the given string.
407 *
408 * @param pattern Pointer to the unmatched portion of the pattern.
409 * On success, the pointer is moved to the first asterisk, or to the
410 * terminating nul character, whichever occurs first.
411 * @param string Pointer to the input string. On success, the pointer is moved
412 * to the first character that wasn't explicitly matched.
413 * @param flags Flags given to fnmatch().
414 * @return True if the entire subpattern matched. False otherwise.
415 */
416static bool _partial_match(const char **pattern, const char **string, int flags)
417{
418 /* Only a single *-delimited subpattern is matched here.
419 * So in this function, '*' is understood as the end of pattern.
420 */
421
422 const bool pathname = (flags & FNM_PATHNAME) != 0;
423 const bool special_period = (flags & FNM_PERIOD) != 0;
424 const bool noescape = (flags & FNM_NOESCAPE) != 0;
425 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
426
427 const char *s = *string;
428 const char *p = *pattern;
429
430 while (*p != '*') {
431 /* Bracket expression. */
432 if (*p == '[') {
433 int matched = _match_bracket_expr(&p, s, flags);
434 if (matched == 0) {
435 /* Doesn't match. */
436 return false;
437 }
438 if (matched != INVALID_PATTERN) {
439 s += matched;
440 continue;
441 }
442
443 assert(matched == INVALID_PATTERN);
444 /* Fall through to match [ as an ordinary character. */
445 }
446
447 /* Wildcard match. */
448 if (*p == '?') {
449 if (*s == '\0') {
450 /* No character to match. */
451 return false;
452 }
453 if (pathname && *s == '/') {
454 /* Slash must be matched explicitly. */
455 return false;
456 }
457 if (special_period && pathname &&
458 *s == '.' && *(s - 1) == '/') {
459 /* Initial period must be matched explicitly. */
460 return false;
461 }
462
463 /* None of the above, match anything else. */
464 p++;
465 s++;
466 continue;
467 }
468
469 if (!noescape && *p == '\\') {
470 /* Escaped character. */
471 p++;
472 }
473
474 if (*p == '\0') {
475 /* End of pattern, must match end of string or
476 * an end of subdirectory name (optional).
477 */
478
479 if (*s == '\0' || (leading_dir && *s == '/')) {
480 break;
481 }
482
483 return false;
484 }
485
486 if (*p == *s) {
487 /* Exact match. */
488 p++;
489 s++;
490 continue;
491 }
492
493 /* Nothing matched. */
494 return false;
495 }
496
497 /* Entire sub-pattern matched. */
498
499 /* postconditions */
500 assert(*p == '\0' || *p == '*');
501 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
502
503 *pattern = p;
504 *string = s;
505 return true;
506}
507
508/**
509 * Match string against a pattern.
510 *
511 * @param pattern Pattern.
512 * @param string String to match.
513 * @param flags Flags given to fnmatch().
514 * @return True if the string matched the pattern, false otherwise.
515 */
516static bool _full_match(const char *pattern, const char *string, int flags)
517{
518 const bool pathname = (flags & FNM_PATHNAME) != 0;
519 const bool special_period = (flags & FNM_PERIOD) != 0;
520 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
521
522 if (special_period && *string == '.') {
523 /* Initial dot must be matched by an explicit dot in pattern. */
524 if (*pattern != '.') {
525 return false;
526 }
527 pattern++;
528 string++;
529 }
530
531 if (*pattern != '*') {
532 if (!_partial_match(&pattern, &string, flags)) {
533 /* The initial match must succeed. */
534 return false;
535 }
536 }
537
538 while (*pattern != '\0') {
539 assert(*pattern == '*');
540 pattern++;
541
542 bool matched = false;
543
544 const char *end;
545 if (pathname && special_period &&
546 *string == '.' && *(string - 1) == '/') {
547 end = string;
548 } else {
549 end = gnu_strchrnul(string, pathname ? '/' : '\0');
550 }
551
552 /* Try to match every possible offset. */
553 while (string <= end) {
554 if (_partial_match(&pattern, &string, flags)) {
555 matched = true;
556 break;
557 }
558 string++;
559 }
560
561 if (matched) {
562 continue;
563 }
564
565 return false;
566 }
567
568 return *string == '\0' || (leading_dir && *string == '/');
569}
570
571/**
572 * Transform the entire string to lowercase.
573 *
574 * @param s Input string.
575 * @return Newly allocated copy of the input string with all uppercase
576 * characters folded to their lowercase variants.
577 */
578static char *_casefold(const char *s)
579{
580 assert(s != NULL);
581 char *result = strdup(s);
582 for (char *i = result; *i != '\0'; ++i) {
583 *i = tolower(*i);
584 }
585 return result;
586}
587
588/**
589 * Filename pattern matching.
590 *
591 * @param pattern Pattern to match the string against.
592 * @param string Matched string.
593 * @param flags Flags altering the matching of special characters
594 * (mainly for dot and slash).
595 * @return Zero if the string matches the pattern, FNM_NOMATCH otherwise.
596 */
597int fnmatch(const char *pattern, const char *string, int flags)
598{
599 assert(pattern != NULL);
600 assert(string != NULL);
601
602 // TODO: don't fold everything in advance, but only when needed
603
604 if ((flags & FNM_CASEFOLD) != 0) {
605 /* Just fold the entire pattern and string. */
606 pattern = _casefold(pattern);
607 string = _casefold(string);
608 }
609
610 bool result = _full_match(pattern, string, flags);
611
612 if ((flags & FNM_CASEFOLD) != 0) {
613 if (pattern) {
614 free((char *) pattern);
615 }
616 if (string) {
617 free((char *) string);
618 }
619 }
620
621 return result ? 0 : FNM_NOMATCH;
622}
623
624// FIXME: put the testcases to the app/tester after fnmatch is included into libc
625
626#if 0
627
628#include <stdio.h>
629
630void __posix_fnmatch_test()
631{
632 int fail = 0;
633
634 #undef assert
635 #define assert(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
636 #define match(s1, s2, flags) assert(fnmatch(s1, s2, flags) == 0)
637 #define nomatch(s1, s2, flags) assert(fnmatch(s1, s2, flags) == FNM_NOMATCH)
638
639 static_assert(FNM_PATHNAME == FNM_FILE_NAME);
640 match("", "", 0);
641 match("*", "hello", 0);
642 match("hello", "hello", 0);
643 match("hello*", "hello", 0);
644 nomatch("hello?", "hello", 0);
645 match("*hello", "prdel hello", 0);
646 match("he[sl]lo", "hello", 0);
647 match("he[sl]lo", "heslo", 0);
648 nomatch("he[sl]lo", "heblo", 0);
649 nomatch("he[^sl]lo", "hello", 0);
650 nomatch("he[^sl]lo", "heslo", 0);
651 match("he[^sl]lo", "heblo", 0);
652 nomatch("he[!sl]lo", "hello", 0);
653 nomatch("he[!sl]lo", "heslo", 0);
654 match("he[!sl]lo", "heblo", 0);
655 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
656 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
657 match("[a-ce-z]", "a", 0);
658 match("[a-ce-z]", "c", 0);
659 nomatch("[a-ce-z]", "d", 0);
660 match("[a-ce-z]", "e", 0);
661 match("[a-ce-z]", "z", 0);
662 nomatch("[^a-ce-z]", "a", 0);
663 nomatch("[^a-ce-z]", "c", 0);
664 match("[^a-ce-z]", "d", 0);
665 nomatch("[^a-ce-z]", "e", 0);
666 nomatch("[^a-ce-z]", "z", 0);
667 match("helen??", "helenos", 0);
668 match("****booo****", "booo", 0);
669
670 match("hello[[:space:]]world", "hello world", 0);
671 nomatch("hello[[:alpha:]]world", "hello world", 0);
672
673 match("/hoooo*", "/hooooooo/hooo", 0);
674 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
675 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
676 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
677 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
678 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
679 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
680 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
681 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
682
683 match("*", "hell", 0);
684 match("*?", "hell", 0);
685 match("?*?", "hell", 0);
686 match("?*??", "hell", 0);
687 match("??*??", "hell", 0);
688 nomatch("???*??", "hell", 0);
689
690 nomatch("", "hell", 0);
691 nomatch("?", "hell", 0);
692 nomatch("??", "hell", 0);
693 nomatch("???", "hell", 0);
694 match("????", "hell", 0);
695
696 match("*", "h.ello", FNM_PERIOD);
697 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
698 nomatch("*", ".hello", FNM_PERIOD);
699 match("h?ello", "h.ello", FNM_PERIOD);
700 nomatch("?hello", ".hello", FNM_PERIOD);
701 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
702 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
703 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
704
705 nomatch("HeLlO", "hello", 0);
706 match("HeLlO", "hello", FNM_CASEFOLD);
707
708 printf("Failed: %d\n", fail);
709}
710
711#endif
712
713/** @}
714 */
Note: See TracBrowser for help on using the repository browser.