source: mainline/uspace/lib/posix/src/fnmatch.c@ 7570a95f

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 7570a95f was 09ab0a9a, checked in by Jiri Svoboda <jiri@…>, 7 years ago

Fix vertical spacing with new Ccheck revision.

  • Property mode set to 100644
File size: 19.0 KB
Line 
1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Filename-matching.
33 */
34
35/*
36 * This file contains an implementation of the fnmatch() pattern matching
37 * function. There is more code than necessary to account for the possibility
38 * of adding POSIX-like locale support to the system in the future. Functions
39 * that are only necessary for locale support currently simply use single
40 * characters for "collation elements".
41 * When (or if) locales are properly implemented, extending this implementation
42 * will be fairly straightforward.
43 */
44
45#include "libc/stdbool.h"
46#include "posix/ctype.h"
47#include "posix/string.h"
48#include "posix/stdlib.h"
49#include <assert.h>
50
51#include "internal/common.h"
52#include "posix/fnmatch.h"
53
54/* Returned by _match... functions. */
55#define INVALID_PATTERN -1
56
57/*
58 * Type for collating element, simple identity with characters now,
59 * but may be extended for better locale support.
60 */
61typedef int coll_elm_t;
62
63/** Return value indicating that the element in question
64 * is not valid in the current locale. (That is, if locales are supported.)
65 */
66#define COLL_ELM_INVALID -1
67
68/**
69 * Get collating element matching a string.
70 *
71 * @param str String representation of the element.
72 * @return Matching collating element or COLL_ELM_INVALID.
73 */
74static coll_elm_t _coll_elm_get(const char *str)
75{
76 if (str[0] == '\0' || str[1] != '\0') {
77 return COLL_ELM_INVALID;
78 }
79 return str[0];
80}
81
82/**
83 * Get collating element matching a single character.
84 *
85 * @param c Character representation of the element.
86 * @return Matching collating element.
87 */
88static coll_elm_t _coll_elm_char(int c)
89{
90 return c;
91}
92
93/**
94 * Match collating element with a beginning of a string.
95 *
96 * @param elm Collating element to match.
97 * @param str String which beginning should match the element.
98 * @return 0 if the element doesn't match, or the number of characters matched.
99 */
100static int _coll_elm_match(coll_elm_t elm, const char *str)
101{
102 return elm == *str;
103}
104
105/**
106 * Checks whether a string begins with a collating element in the given range.
107 * Ordering depends on the locale (if locales are supported).
108 *
109 * @param first First element of the range.
110 * @param second Last element of the range.
111 * @param str String to match.
112 * @return 0 if there is no match, or the number of characters matched.
113 */
114static int _coll_elm_between(coll_elm_t first, coll_elm_t second,
115 const char *str)
116{
117 return *str >= first && *str <= second;
118}
119
120/**
121 * Read a string delimited by [? and ?].
122 *
123 * @param pattern Pointer to the string to read from. Its position is moved
124 * to the first character after the closing ].
125 * @param seq The character on the inside of brackets.
126 * @param buf Read buffer.
127 * @param buf_sz Read buffer's size. If the buffer is not large enough for
128 * the entire string, the string is cut with no error indication.
129 * @param flags Flags modifying the behavior.
130 * @return True on success, false if the pattern is invalid.
131 */
132static bool _get_delimited(
133 const char **pattern, int seq,
134 char *buf, size_t buf_sz, int flags)
135{
136 const bool noescape = (flags & FNM_NOESCAPE) != 0;
137 const bool pathname = (flags & FNM_PATHNAME) != 0;
138
139 const char *p = *pattern;
140 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
141 p += 2;
142
143 while (true) {
144 if (*p == seq && *(p + 1) == ']') {
145 /* String properly ended, return. */
146 *pattern = p + 2;
147 *buf = '\0';
148 return true;
149 }
150 if (!noescape && *p == '\\') {
151 p++;
152 }
153 if (*p == '\0') {
154 /* String not ended properly, invalid pattern. */
155 return false;
156 }
157 if (pathname && *p == '/') {
158 /* Slash in a pathname pattern is invalid. */
159 return false;
160 }
161 if (buf_sz > 1) {
162 /* Only add to the buffer if there is space. */
163 *buf = *p;
164 buf++;
165 buf_sz--;
166 }
167 p++;
168 }
169}
170
171/************** CHARACTER CLASSES ****************/
172
173#define MAX_CLASS_OR_COLL_LEN 6
174
175struct _char_class {
176 const char *name;
177 int (*func) (int);
178};
179
180/* List of supported character classes. */
181static const struct _char_class _char_classes[] = {
182 { "alnum", isalnum },
183 { "alpha", isalpha },
184 { "blank", isblank },
185 { "cntrl", iscntrl },
186 { "digit", isdigit },
187 { "graph", isgraph },
188 { "lower", islower },
189 { "print", isprint },
190 { "punct", ispunct },
191 { "space", isspace },
192 { "upper", isupper },
193 { "xdigit", isxdigit }
194};
195
196/**
197 * Compare function for binary search in the _char_classes array.
198 *
199 * @param key Key of the searched element.
200 * @param elem Element of _char_classes array.
201 * @return Ordering indicator (-1 less than, 0 equal, 1 greater than).
202 */
203static int _class_compare(const void *key, const void *elem)
204{
205 const struct _char_class *class = elem;
206 return strcmp((const char *) key, class->name);
207}
208
209/**
210 * Returns whether the given character belongs to the specified character class.
211 *
212 * @param cname Name of the character class.
213 * @param c Character.
214 * @return True if the character belongs to the class, false otherwise.
215 */
216static bool _is_in_class (const char *cname, int c)
217{
218 /* Search for class in the array of supported character classes. */
219 const struct _char_class *class = bsearch(cname, _char_classes,
220 sizeof(_char_classes) / sizeof(struct _char_class),
221 sizeof(struct _char_class), _class_compare);
222
223 if (class == NULL) {
224 /* No such class supported - treat as an empty class. */
225 return false;
226 } else {
227 /* Class matched. */
228 return class->func(c);
229 }
230}
231
232/**
233 * Tries to parse an initial part of the pattern as a character class pattern,
234 * and if successful, matches the beginning of the given string against the class.
235 *
236 * @param pattern Pointer to the pattern to match. Must begin with a class
237 * specifier and is repositioned to the first character after the specifier
238 * if successful.
239 * @param str String to match.
240 * @param flags Flags modifying the behavior (see fnmatch()).
241 * @return INVALID_PATTERN if the pattern doesn't start with a valid class
242 * specifier, 0 if the beginning of the matched string doesn't belong
243 * to the class, or positive number of characters matched.
244 */
245static int _match_char_class(const char **pattern, const char *str, int flags)
246{
247 char class[MAX_CLASS_OR_COLL_LEN + 1];
248
249 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
250 return INVALID_PATTERN;
251 }
252
253 return _is_in_class(class, *str);
254}
255
256/************** END CHARACTER CLASSES ****************/
257
258/**
259 * Reads the next collating element in the pattern, taking into account
260 * locale (if supported) and flags (see fnmatch()).
261 *
262 * @param pattern Pattern.
263 * @param flags Flags given to fnmatch().
264 * @return Collating element on success,
265 * or COLL_ELM_INVALID if the pattern is invalid.
266 */
267static coll_elm_t _next_coll_elm(const char **pattern, int flags)
268{
269 assert(pattern != NULL);
270 assert(*pattern != NULL);
271 assert(**pattern != '\0');
272
273 const char *p = *pattern;
274 const bool noescape = (flags & FNM_NOESCAPE) != 0;
275 const bool pathname = (flags & FNM_PATHNAME) != 0;
276
277 if (*p == '[') {
278 if (*(p + 1) == '.') {
279 char buf[MAX_CLASS_OR_COLL_LEN + 1];
280 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
281 return COLL_ELM_INVALID;
282 }
283 return _coll_elm_get(buf);
284 }
285
286 if (*(p + 1) == '=') {
287 char buf[MAX_CLASS_OR_COLL_LEN + 1];
288 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
289 return COLL_ELM_INVALID;
290 }
291 return _coll_elm_get(buf);
292 }
293 }
294
295 if (!noescape && *p == '\\') {
296 p++;
297 if (*p == '\0') {
298 *pattern = p;
299 return COLL_ELM_INVALID;
300 }
301 }
302 if (pathname && *p == '/') {
303 return COLL_ELM_INVALID;
304 }
305
306 *pattern = p + 1;
307 return _coll_elm_char(*p);
308}
309
310#define _matched(match) { \
311 int _match = match; \
312 if (_match < 0) { \
313 /* Invalid pattern */ \
314 return _match; \
315 } else if (matched == 0 && _match > 0) { \
316 /* First match */ \
317 matched = _match; \
318 } \
319 }
320
321/**
322 * Matches the beginning of the given string against a bracket expression
323 * the pattern begins with.
324 *
325 * @param pattern Pointer to the beginning of a bracket expression in a pattern.
326 * On success, the pointer is moved to the first character after the
327 * bracket expression.
328 * @param str Unmatched part of the string.
329 * @param flags Flags given to fnmatch().
330 * @return INVALID_PATTERN if the pattern is invalid, 0 if there is no match
331 * or the number of matched characters on success.
332 */
333static int _match_bracket_expr(const char **pattern, const char *str, int flags)
334{
335 const bool pathname = (flags & FNM_PATHNAME) != 0;
336 const bool special_period = (flags & FNM_PERIOD) != 0;
337 const char *p = *pattern;
338 bool negative = false;
339 int matched = 0;
340
341 assert(*p == '['); /* calling code should ensure this */
342 p++;
343
344 if (*str == '\0' || (pathname && *str == '/') ||
345 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
346 /*
347 * No bracket expression matches end of string,
348 * slash in pathname match or initial period with FNM_PERIOD
349 * option.
350 */
351 return 0;
352 }
353
354 if (*p == '^' || *p == '!') {
355 negative = true;
356 p++;
357 }
358
359 if (*p == ']') {
360 /* When ']' is first, treat it as a normal character. */
361 _matched(*str == ']');
362 p++;
363 }
364
365 coll_elm_t current_elm = COLL_ELM_INVALID;
366
367 while (*p != ']') {
368 if (*p == '-' && *(p + 1) != ']' &&
369 current_elm != COLL_ELM_INVALID) {
370 /* Range expression. */
371 p++;
372 coll_elm_t end_elm = _next_coll_elm(&p, flags);
373 if (end_elm == COLL_ELM_INVALID) {
374 return INVALID_PATTERN;
375 }
376 _matched(_coll_elm_between(current_elm, end_elm, str));
377 continue;
378 }
379
380 if (*p == '[' && *(p + 1) == ':') {
381 current_elm = COLL_ELM_INVALID;
382 _matched(_match_char_class(&p, str, flags));
383 continue;
384 }
385
386 current_elm = _next_coll_elm(&p, flags);
387 if (current_elm == COLL_ELM_INVALID) {
388 return INVALID_PATTERN;
389 }
390 _matched(_coll_elm_match(current_elm, str));
391 }
392
393 /* No error occured - update pattern pointer. */
394 *pattern = p + 1;
395
396 if (matched == 0) {
397 /* No match found */
398 return negative;
399 } else {
400 /* Matched 'match' characters. */
401 return negative ? 0 : matched;
402 }
403}
404
405/**
406 * Matches a portion of the pattern containing no asterisks (*) against
407 * the given string.
408 *
409 * @param pattern Pointer to the unmatched portion of the pattern.
410 * On success, the pointer is moved to the first asterisk, or to the
411 * terminating nul character, whichever occurs first.
412 * @param string Pointer to the input string. On success, the pointer is moved
413 * to the first character that wasn't explicitly matched.
414 * @param flags Flags given to fnmatch().
415 * @return True if the entire subpattern matched. False otherwise.
416 */
417static bool _partial_match(const char **pattern, const char **string, int flags)
418{
419 /*
420 * Only a single *-delimited subpattern is matched here.
421 * So in this function, '*' is understood as the end of pattern.
422 */
423
424 const bool pathname = (flags & FNM_PATHNAME) != 0;
425 const bool special_period = (flags & FNM_PERIOD) != 0;
426 const bool noescape = (flags & FNM_NOESCAPE) != 0;
427 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
428
429 const char *s = *string;
430 const char *p = *pattern;
431
432 while (*p != '*') {
433 /* Bracket expression. */
434 if (*p == '[') {
435 int matched = _match_bracket_expr(&p, s, flags);
436 if (matched == 0) {
437 /* Doesn't match. */
438 return false;
439 }
440 if (matched != INVALID_PATTERN) {
441 s += matched;
442 continue;
443 }
444
445 assert(matched == INVALID_PATTERN);
446 /* Fall through to match [ as an ordinary character. */
447 }
448
449 /* Wildcard match. */
450 if (*p == '?') {
451 if (*s == '\0') {
452 /* No character to match. */
453 return false;
454 }
455 if (pathname && *s == '/') {
456 /* Slash must be matched explicitly. */
457 return false;
458 }
459 if (special_period && pathname &&
460 *s == '.' && *(s - 1) == '/') {
461 /* Initial period must be matched explicitly. */
462 return false;
463 }
464
465 /* None of the above, match anything else. */
466 p++;
467 s++;
468 continue;
469 }
470
471 if (!noescape && *p == '\\') {
472 /* Escaped character. */
473 p++;
474 }
475
476 if (*p == '\0') {
477 /*
478 * End of pattern, must match end of string or
479 * an end of subdirectory name (optional).
480 */
481
482 if (*s == '\0' || (leading_dir && *s == '/')) {
483 break;
484 }
485
486 return false;
487 }
488
489 if (*p == *s) {
490 /* Exact match. */
491 p++;
492 s++;
493 continue;
494 }
495
496 /* Nothing matched. */
497 return false;
498 }
499
500 /* Entire sub-pattern matched. */
501
502 /* postconditions */
503 assert(*p == '\0' || *p == '*');
504 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
505
506 *pattern = p;
507 *string = s;
508 return true;
509}
510
511/**
512 * Match string against a pattern.
513 *
514 * @param pattern Pattern.
515 * @param string String to match.
516 * @param flags Flags given to fnmatch().
517 * @return True if the string matched the pattern, false otherwise.
518 */
519static bool _full_match(const char *pattern, const char *string, int flags)
520{
521 const bool pathname = (flags & FNM_PATHNAME) != 0;
522 const bool special_period = (flags & FNM_PERIOD) != 0;
523 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
524
525 if (special_period && *string == '.') {
526 /* Initial dot must be matched by an explicit dot in pattern. */
527 if (*pattern != '.') {
528 return false;
529 }
530 pattern++;
531 string++;
532 }
533
534 if (*pattern != '*') {
535 if (!_partial_match(&pattern, &string, flags)) {
536 /* The initial match must succeed. */
537 return false;
538 }
539 }
540
541 while (*pattern != '\0') {
542 assert(*pattern == '*');
543 pattern++;
544
545 bool matched = false;
546
547 const char *end;
548 if (pathname && special_period &&
549 *string == '.' && *(string - 1) == '/') {
550 end = string;
551 } else {
552 end = gnu_strchrnul(string, pathname ? '/' : '\0');
553 }
554
555 /* Try to match every possible offset. */
556 while (string <= end) {
557 if (_partial_match(&pattern, &string, flags)) {
558 matched = true;
559 break;
560 }
561 string++;
562 }
563
564 if (matched) {
565 continue;
566 }
567
568 return false;
569 }
570
571 return *string == '\0' || (leading_dir && *string == '/');
572}
573
574/**
575 * Transform the entire string to lowercase.
576 *
577 * @param s Input string.
578 * @return Newly allocated copy of the input string with all uppercase
579 * characters folded to their lowercase variants.
580 */
581static char *_casefold(const char *s)
582{
583 assert(s != NULL);
584 char *result = strdup(s);
585 for (char *i = result; *i != '\0'; ++i) {
586 *i = tolower(*i);
587 }
588 return result;
589}
590
591/**
592 * Filename pattern matching.
593 *
594 * @param pattern Pattern to match the string against.
595 * @param string Matched string.
596 * @param flags Flags altering the matching of special characters
597 * (mainly for dot and slash).
598 * @return Zero if the string matches the pattern, FNM_NOMATCH otherwise.
599 */
600int fnmatch(const char *pattern, const char *string, int flags)
601{
602 assert(pattern != NULL);
603 assert(string != NULL);
604
605 // TODO: don't fold everything in advance, but only when needed
606
607 if ((flags & FNM_CASEFOLD) != 0) {
608 /* Just fold the entire pattern and string. */
609 pattern = _casefold(pattern);
610 string = _casefold(string);
611 }
612
613 bool result = _full_match(pattern, string, flags);
614
615 if ((flags & FNM_CASEFOLD) != 0) {
616 if (pattern) {
617 free((char *) pattern);
618 }
619 if (string) {
620 free((char *) string);
621 }
622 }
623
624 return result ? 0 : FNM_NOMATCH;
625}
626
627// FIXME: put the testcases to the app/tester after fnmatch is included into libc
628
629#if 0
630
631#include <stdio.h>
632
633#define fnmatch_test(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
634#define match(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == 0)
635#define nomatch(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == FNM_NOMATCH)
636
637void __posix_fnmatch_test()
638{
639 int fail = 0;
640
641 static_assert(FNM_PATHNAME == FNM_FILE_NAME);
642 match("", "", 0);
643 match("*", "hello", 0);
644 match("hello", "hello", 0);
645 match("hello*", "hello", 0);
646 nomatch("hello?", "hello", 0);
647 match("*hello", "prdel hello", 0);
648 match("he[sl]lo", "hello", 0);
649 match("he[sl]lo", "heslo", 0);
650 nomatch("he[sl]lo", "heblo", 0);
651 nomatch("he[^sl]lo", "hello", 0);
652 nomatch("he[^sl]lo", "heslo", 0);
653 match("he[^sl]lo", "heblo", 0);
654 nomatch("he[!sl]lo", "hello", 0);
655 nomatch("he[!sl]lo", "heslo", 0);
656 match("he[!sl]lo", "heblo", 0);
657 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
658 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
659 match("[a-ce-z]", "a", 0);
660 match("[a-ce-z]", "c", 0);
661 nomatch("[a-ce-z]", "d", 0);
662 match("[a-ce-z]", "e", 0);
663 match("[a-ce-z]", "z", 0);
664 nomatch("[^a-ce-z]", "a", 0);
665 nomatch("[^a-ce-z]", "c", 0);
666 match("[^a-ce-z]", "d", 0);
667 nomatch("[^a-ce-z]", "e", 0);
668 nomatch("[^a-ce-z]", "z", 0);
669 match("helen??", "helenos", 0);
670 match("****booo****", "booo", 0);
671
672 match("hello[[:space:]]world", "hello world", 0);
673 nomatch("hello[[:alpha:]]world", "hello world", 0);
674
675 match("/hoooo*", "/hooooooo/hooo", 0);
676 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
677 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
678 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
679 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
680 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
681 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
682 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
683 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
684
685 match("*", "hell", 0);
686 match("*?", "hell", 0);
687 match("?*?", "hell", 0);
688 match("?*??", "hell", 0);
689 match("??*??", "hell", 0);
690 nomatch("???*??", "hell", 0);
691
692 nomatch("", "hell", 0);
693 nomatch("?", "hell", 0);
694 nomatch("??", "hell", 0);
695 nomatch("???", "hell", 0);
696 match("????", "hell", 0);
697
698 match("*", "h.ello", FNM_PERIOD);
699 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
700 nomatch("*", ".hello", FNM_PERIOD);
701 match("h?ello", "h.ello", FNM_PERIOD);
702 nomatch("?hello", ".hello", FNM_PERIOD);
703 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
704 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
705 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
706
707 nomatch("HeLlO", "hello", 0);
708 match("HeLlO", "hello", FNM_CASEFOLD);
709
710 printf("Failed: %d\n", fail);
711}
712
713#endif
714
715/** @}
716 */
Note: See TracBrowser for help on using the repository browser.