source: mainline/uspace/lib/posix/source/fnmatch.c@ 1cf26ab

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 1cf26ab was 1cf26ab, checked in by Jakub Jermar <jakub@…>, 9 years ago

Use static assert instead of regular ones

  • Property mode set to 100644
File size: 19.1 KB
Line 
1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Filename-matching.
33 */
34
35/* This file contains an implementation of the fnmatch() pattern matching
36 * function. There is more code than necessary to account for the possibility
37 * of adding POSIX-like locale support to the system in the future. Functions
38 * that are only necessary for locale support currently simply use single
39 * characters for "collation elements".
40 * When (or if) locales are properly implemented, extending this implementation
41 * will be fairly straightforward.
42 */
43
44#define LIBPOSIX_INTERNAL
45#define __POSIX_DEF__(x) posix_##x
46
47#include "libc/stdbool.h"
48#include "posix/ctype.h"
49#include "posix/string.h"
50#include "posix/stdlib.h"
51#include "posix/assert.h"
52
53#include "internal/common.h"
54#include "posix/fnmatch.h"
55
56/* Returned by _match... functions. */
57#define INVALID_PATTERN -1
58
59/* Type for collating element, simple identity with characters now,
60 * but may be extended for better locale support.
61 */
62typedef int coll_elm_t;
63
64/** Return value indicating that the element in question
65 * is not valid in the current locale. (That is, if locales are supported.)
66 */
67#define COLL_ELM_INVALID -1
68
69/**
70 * Get collating element matching a string.
71 *
72 * @param str String representation of the element.
73 * @return Matching collating element or COLL_ELM_INVALID.
74 */
75static coll_elm_t _coll_elm_get(const char* str)
76{
77 if (str[0] == '\0' || str[1] != '\0') {
78 return COLL_ELM_INVALID;
79 }
80 return str[0];
81}
82
83/**
84 * Get collating element matching a single character.
85 *
86 * @param c Character representation of the element.
87 * @return Matching collating element.
88 */
89static coll_elm_t _coll_elm_char(int c)
90{
91 return c;
92}
93
94/**
95 * Match collating element with a beginning of a string.
96 *
97 * @param elm Collating element to match.
98 * @param str String which beginning should match the element.
99 * @return 0 if the element doesn't match, or the number of characters matched.
100 */
101static int _coll_elm_match(coll_elm_t elm, const char *str)
102{
103 return elm == *str;
104}
105
106/**
107 * Checks whether a string begins with a collating element in the given range.
108 * Ordering depends on the locale (if locales are supported).
109 *
110 * @param first First element of the range.
111 * @param second Last element of the range.
112 * @param str String to match.
113 * @return 0 if there is no match, or the number of characters matched.
114 */
115static int _coll_elm_between(coll_elm_t first, coll_elm_t second,
116 const char *str)
117{
118 return *str >= first && *str <= second;
119}
120
121/**
122 * Read a string delimited by [? and ?].
123 *
124 * @param pattern Pointer to the string to read from. Its position is moved
125 * to the first character after the closing ].
126 * @param seq The character on the inside of brackets.
127 * @param buf Read buffer.
128 * @param buf_sz Read buffer's size. If the buffer is not large enough for
129 * the entire string, the string is cut with no error indication.
130 * @param flags Flags modifying the behavior.
131 * @return True on success, false if the pattern is invalid.
132 */
133static bool _get_delimited(
134 const char **pattern, int seq,
135 char *buf, size_t buf_sz, int flags)
136{
137 const bool noescape = (flags & FNM_NOESCAPE) != 0;
138 const bool pathname = (flags & FNM_PATHNAME) != 0;
139
140 const char *p = *pattern;
141 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
142 p += 2;
143
144 while (true) {
145 if (*p == seq && *(p + 1) == ']') {
146 /* String properly ended, return. */
147 *pattern = p + 2;
148 *buf = '\0';
149 return true;
150 }
151 if (!noescape && *p == '\\') {
152 p++;
153 }
154 if (*p == '\0') {
155 /* String not ended properly, invalid pattern. */
156 return false;
157 }
158 if (pathname && *p == '/') {
159 /* Slash in a pathname pattern is invalid. */
160 return false;
161 }
162 if (buf_sz > 1) {
163 /* Only add to the buffer if there is space. */
164 *buf = *p;
165 buf++;
166 buf_sz--;
167 }
168 p++;
169 }
170}
171
172/************** CHARACTER CLASSES ****************/
173
174#define MAX_CLASS_OR_COLL_LEN 6
175
176struct _char_class {
177 const char *name;
178 int (*func) (int);
179};
180
181/* List of supported character classes. */
182static const struct _char_class _char_classes[] = {
183 { "alnum", isalnum },
184 { "alpha", isalpha },
185 { "blank", posix_isblank },
186 { "cntrl", posix_iscntrl },
187 { "digit", isdigit },
188 { "graph", posix_isgraph },
189 { "lower", islower },
190 { "print", posix_isprint },
191 { "punct", posix_ispunct },
192 { "space", isspace },
193 { "upper", isupper },
194 { "xdigit", posix_isxdigit }
195};
196
197/**
198 * Compare function for binary search in the _char_classes array.
199 *
200 * @param key Key of the searched element.
201 * @param elem Element of _char_classes array.
202 * @return Ordering indicator (-1 less than, 0 equal, 1 greater than).
203 */
204static int _class_compare(const void *key, const void *elem)
205{
206 const struct _char_class *class = elem;
207 return posix_strcmp((const char *) key, class->name);
208}
209
210/**
211 * Returns whether the given character belongs to the specified character class.
212 *
213 * @param cname Name of the character class.
214 * @param c Character.
215 * @return True if the character belongs to the class, false otherwise.
216 */
217static bool _is_in_class (const char *cname, int c)
218{
219 /* Search for class in the array of supported character classes. */
220 const struct _char_class *class = posix_bsearch(cname, _char_classes,
221 sizeof(_char_classes) / sizeof(struct _char_class),
222 sizeof(struct _char_class), _class_compare);
223
224 if (class == NULL) {
225 /* No such class supported - treat as an empty class. */
226 return false;
227 } else {
228 /* Class matched. */
229 return class->func(c);
230 }
231}
232
233/**
234 * Tries to parse an initial part of the pattern as a character class pattern,
235 * and if successful, matches the beginning of the given string against the class.
236 *
237 * @param pattern Pointer to the pattern to match. Must begin with a class
238 * specifier and is repositioned to the first character after the specifier
239 * if successful.
240 * @param str String to match.
241 * @param flags Flags modifying the behavior (see fnmatch()).
242 * @return INVALID_PATTERN if the pattern doesn't start with a valid class
243 * specifier, 0 if the beginning of the matched string doesn't belong
244 * to the class, or positive number of characters matched.
245 */
246static int _match_char_class(const char **pattern, const char *str, int flags)
247{
248 char class[MAX_CLASS_OR_COLL_LEN + 1];
249
250 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
251 return INVALID_PATTERN;
252 }
253
254 return _is_in_class(class, *str);
255}
256
257/************** END CHARACTER CLASSES ****************/
258
259/**
260 * Reads the next collating element in the pattern, taking into account
261 * locale (if supported) and flags (see fnmatch()).
262 *
263 * @param pattern Pattern.
264 * @param flags Flags given to fnmatch().
265 * @return Collating element on success,
266 * or COLL_ELM_INVALID if the pattern is invalid.
267 */
268static coll_elm_t _next_coll_elm(const char **pattern, int flags)
269{
270 assert(pattern != NULL);
271 assert(*pattern != NULL);
272 assert(**pattern != '\0');
273
274 const char *p = *pattern;
275 const bool noescape = (flags & FNM_NOESCAPE) != 0;
276 const bool pathname = (flags & FNM_PATHNAME) != 0;
277
278 if (*p == '[') {
279 if (*(p + 1) == '.') {
280 char buf[MAX_CLASS_OR_COLL_LEN + 1];
281 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
282 return COLL_ELM_INVALID;
283 }
284 return _coll_elm_get(buf);
285 }
286
287 if (*(p + 1) == '=') {
288 char buf[MAX_CLASS_OR_COLL_LEN + 1];
289 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
290 return COLL_ELM_INVALID;
291 }
292 return _coll_elm_get(buf);
293 }
294 }
295
296 if (!noescape && *p == '\\') {
297 p++;
298 if (*p == '\0') {
299 *pattern = p;
300 return COLL_ELM_INVALID;
301 }
302 }
303 if (pathname && *p == '/') {
304 return COLL_ELM_INVALID;
305 }
306
307 *pattern = p + 1;
308 return _coll_elm_char(*p);
309}
310
311/**
312 * Matches the beginning of the given string against a bracket expression
313 * the pattern begins with.
314 *
315 * @param pattern Pointer to the beginning of a bracket expression in a pattern.
316 * On success, the pointer is moved to the first character after the
317 * bracket expression.
318 * @param str Unmatched part of the string.
319 * @param flags Flags given to fnmatch().
320 * @return INVALID_PATTERN if the pattern is invalid, 0 if there is no match
321 * or the number of matched characters on success.
322 */
323static int _match_bracket_expr(const char **pattern, const char *str, int flags)
324{
325 const bool pathname = (flags & FNM_PATHNAME) != 0;
326 const bool special_period = (flags & FNM_PERIOD) != 0;
327 const char *p = *pattern;
328 bool negative = false;
329 int matched = 0;
330
331 #define _matched(match) { \
332 int _match = match; \
333 if (_match < 0) { \
334 /* Invalid pattern */ \
335 return _match; \
336 } else if (matched == 0 && _match > 0) { \
337 /* First match */ \
338 matched = _match; \
339 } \
340 }
341
342 assert(*p == '['); /* calling code should ensure this */
343 p++;
344
345 if (*str == '\0' || (pathname && *str == '/') ||
346 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
347 /* No bracket expression matches end of string,
348 * slash in pathname match or initial period with FNM_PERIOD
349 * option.
350 */
351 return 0;
352 }
353
354 if (*p == '^' || *p == '!') {
355 negative = true;
356 p++;
357 }
358
359 if (*p == ']') {
360 /* When ']' is first, treat it as a normal character. */
361 _matched(*str == ']');
362 p++;
363 }
364
365 coll_elm_t current_elm = COLL_ELM_INVALID;
366
367 while (*p != ']') {
368 if (*p == '-' && *(p + 1) != ']' &&
369 current_elm != COLL_ELM_INVALID) {
370 /* Range expression. */
371 p++;
372 coll_elm_t end_elm = _next_coll_elm(&p, flags);
373 if (end_elm == COLL_ELM_INVALID) {
374 return INVALID_PATTERN;
375 }
376 _matched(_coll_elm_between(current_elm, end_elm, str));
377 continue;
378 }
379
380 if (*p == '[' && *(p + 1) == ':') {
381 current_elm = COLL_ELM_INVALID;
382 _matched(_match_char_class(&p, str, flags));
383 continue;
384 }
385
386 current_elm = _next_coll_elm(&p, flags);
387 if (current_elm == COLL_ELM_INVALID) {
388 return INVALID_PATTERN;
389 }
390 _matched(_coll_elm_match(current_elm, str));
391 }
392
393 /* No error occured - update pattern pointer. */
394 *pattern = p + 1;
395
396 if (matched == 0) {
397 /* No match found */
398 return negative;
399 } else {
400 /* Matched 'match' characters. */
401 return negative ? 0 : matched;
402 }
403
404 #undef _matched
405}
406
407/**
408 * Matches a portion of the pattern containing no asterisks (*) against
409 * the given string.
410 *
411 * @param pattern Pointer to the unmatched portion of the pattern.
412 * On success, the pointer is moved to the first asterisk, or to the
413 * terminating nul character, whichever occurs first.
414 * @param string Pointer to the input string. On success, the pointer is moved
415 * to the first character that wasn't explicitly matched.
416 * @param flags Flags given to fnmatch().
417 * @return True if the entire subpattern matched. False otherwise.
418 */
419static bool _partial_match(const char **pattern, const char **string, int flags)
420{
421 /* Only a single *-delimited subpattern is matched here.
422 * So in this function, '*' is understood as the end of pattern.
423 */
424
425 const bool pathname = (flags & FNM_PATHNAME) != 0;
426 const bool special_period = (flags & FNM_PERIOD) != 0;
427 const bool noescape = (flags & FNM_NOESCAPE) != 0;
428 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
429
430 const char *s = *string;
431 const char *p = *pattern;
432
433 while (*p != '*') {
434 /* Bracket expression. */
435 if (*p == '[') {
436 int matched = _match_bracket_expr(&p, s, flags);
437 if (matched == 0) {
438 /* Doesn't match. */
439 return false;
440 }
441 if (matched != INVALID_PATTERN) {
442 s += matched;
443 continue;
444 }
445
446 assert(matched == INVALID_PATTERN);
447 /* Fall through to match [ as an ordinary character. */
448 }
449
450 /* Wildcard match. */
451 if (*p == '?') {
452 if (*s == '\0') {
453 /* No character to match. */
454 return false;
455 }
456 if (pathname && *s == '/') {
457 /* Slash must be matched explicitly. */
458 return false;
459 }
460 if (special_period && pathname &&
461 *s == '.' && *(s - 1) == '/') {
462 /* Initial period must be matched explicitly. */
463 return false;
464 }
465
466 /* None of the above, match anything else. */
467 p++;
468 s++;
469 continue;
470 }
471
472 if (!noescape && *p == '\\') {
473 /* Escaped character. */
474 p++;
475 }
476
477 if (*p == '\0') {
478 /* End of pattern, must match end of string or
479 * an end of subdirectory name (optional).
480 */
481
482 if (*s == '\0' || (leading_dir && *s == '/')) {
483 break;
484 }
485
486 return false;
487 }
488
489 if (*p == *s) {
490 /* Exact match. */
491 p++;
492 s++;
493 continue;
494 }
495
496 /* Nothing matched. */
497 return false;
498 }
499
500 /* Entire sub-pattern matched. */
501
502 /* postconditions */
503 assert(*p == '\0' || *p == '*');
504 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
505
506 *pattern = p;
507 *string = s;
508 return true;
509}
510
511/**
512 * Match string against a pattern.
513 *
514 * @param pattern Pattern.
515 * @param string String to match.
516 * @param flags Flags given to fnmatch().
517 * @return True if the string matched the pattern, false otherwise.
518 */
519static bool _full_match(const char *pattern, const char *string, int flags)
520{
521 const bool pathname = (flags & FNM_PATHNAME) != 0;
522 const bool special_period = (flags & FNM_PERIOD) != 0;
523 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
524
525 if (special_period && *string == '.') {
526 /* Initial dot must be matched by an explicit dot in pattern. */
527 if (*pattern != '.') {
528 return false;
529 }
530 pattern++;
531 string++;
532 }
533
534 if (*pattern != '*') {
535 if (!_partial_match(&pattern, &string, flags)) {
536 /* The initial match must succeed. */
537 return false;
538 }
539 }
540
541 while (*pattern != '\0') {
542 assert(*pattern == '*');
543 pattern++;
544
545 bool matched = false;
546
547 const char *end;
548 if (pathname && special_period &&
549 *string == '.' && *(string - 1) == '/') {
550 end = string;
551 } else {
552 end = gnu_strchrnul(string, pathname ? '/' : '\0');
553 }
554
555 /* Try to match every possible offset. */
556 while (string <= end) {
557 if (_partial_match(&pattern, &string, flags)) {
558 matched = true;
559 break;
560 }
561 string++;
562 }
563
564 if (matched) {
565 continue;
566 }
567
568 return false;
569 }
570
571 return *string == '\0' || (leading_dir && *string == '/');
572}
573
574/**
575 * Transform the entire string to lowercase.
576 *
577 * @param s Input string.
578 * @return Newly allocated copy of the input string with all uppercase
579 * characters folded to their lowercase variants.
580 */
581static char *_casefold(const char *s)
582{
583 assert(s != NULL);
584 char *result = posix_strdup(s);
585 for (char *i = result; *i != '\0'; ++i) {
586 *i = tolower(*i);
587 }
588 return result;
589}
590
591/**
592 * Filename pattern matching.
593 *
594 * @param pattern Pattern to match the string against.
595 * @param string Matched string.
596 * @param flags Flags altering the matching of special characters
597 * (mainly for dot and slash).
598 * @return Zero if the string matches the pattern, FNM_NOMATCH otherwise.
599 */
600int posix_fnmatch(const char *pattern, const char *string, int flags)
601{
602 assert(pattern != NULL);
603 assert(string != NULL);
604
605 // TODO: don't fold everything in advance, but only when needed
606
607 if ((flags & FNM_CASEFOLD) != 0) {
608 /* Just fold the entire pattern and string. */
609 pattern = _casefold(pattern);
610 string = _casefold(string);
611 }
612
613 bool result = _full_match(pattern, string, flags);
614
615 if ((flags & FNM_CASEFOLD) != 0) {
616 if (pattern) {
617 free((char *) pattern);
618 }
619 if (string) {
620 free((char *) string);
621 }
622 }
623
624 return result ? 0 : FNM_NOMATCH;
625}
626
627// FIXME: put the testcases to the app/tester after fnmatch is included into libc
628
629#if 0
630
631#include <stdio.h>
632
633void __posix_fnmatch_test()
634{
635 int fail = 0;
636
637 #undef assert
638 #define assert(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
639 #define match(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == 0)
640 #define nomatch(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == FNM_NOMATCH)
641
642 static_assert(FNM_PATHNAME == FNM_FILE_NAME);
643 match("", "", 0);
644 match("*", "hello", 0);
645 match("hello", "hello", 0);
646 match("hello*", "hello", 0);
647 nomatch("hello?", "hello", 0);
648 match("*hello", "prdel hello", 0);
649 match("he[sl]lo", "hello", 0);
650 match("he[sl]lo", "heslo", 0);
651 nomatch("he[sl]lo", "heblo", 0);
652 nomatch("he[^sl]lo", "hello", 0);
653 nomatch("he[^sl]lo", "heslo", 0);
654 match("he[^sl]lo", "heblo", 0);
655 nomatch("he[!sl]lo", "hello", 0);
656 nomatch("he[!sl]lo", "heslo", 0);
657 match("he[!sl]lo", "heblo", 0);
658 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
659 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
660 match("[a-ce-z]", "a", 0);
661 match("[a-ce-z]", "c", 0);
662 nomatch("[a-ce-z]", "d", 0);
663 match("[a-ce-z]", "e", 0);
664 match("[a-ce-z]", "z", 0);
665 nomatch("[^a-ce-z]", "a", 0);
666 nomatch("[^a-ce-z]", "c", 0);
667 match("[^a-ce-z]", "d", 0);
668 nomatch("[^a-ce-z]", "e", 0);
669 nomatch("[^a-ce-z]", "z", 0);
670 match("helen??", "helenos", 0);
671 match("****booo****", "booo", 0);
672
673 match("hello[[:space:]]world", "hello world", 0);
674 nomatch("hello[[:alpha:]]world", "hello world", 0);
675
676 match("/hoooo*", "/hooooooo/hooo", 0);
677 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
678 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
679 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
680 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
681 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
682 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
683 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
684 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
685
686 match("*", "hell", 0);
687 match("*?", "hell", 0);
688 match("?*?", "hell", 0);
689 match("?*??", "hell", 0);
690 match("??*??", "hell", 0);
691 nomatch("???*??", "hell", 0);
692
693 nomatch("", "hell", 0);
694 nomatch("?", "hell", 0);
695 nomatch("??", "hell", 0);
696 nomatch("???", "hell", 0);
697 match("????", "hell", 0);
698
699 match("*", "h.ello", FNM_PERIOD);
700 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
701 nomatch("*", ".hello", FNM_PERIOD);
702 match("h?ello", "h.ello", FNM_PERIOD);
703 nomatch("?hello", ".hello", FNM_PERIOD);
704 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
705 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
706 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
707
708 nomatch("HeLlO", "hello", 0);
709 match("HeLlO", "hello", FNM_CASEFOLD);
710
711 printf("Failed: %d\n", fail);
712}
713
714#endif
715
716/** @}
717 */
Note: See TracBrowser for help on using the repository browser.