source: mainline/uspace/lib/posix/src/fnmatch.c@ 9b8be79

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 9b8be79 was 9b8be79, checked in by Jiří Zárevúcky <jiri.zarevucky@…>, 7 years ago

libposix: Change header organization and remove passthrough headers

Posix headers now function like an overlay. The system include directories
are searched after posix directories. The headers don't need to be patched
for export now. libposix files now include headers using bracket notation
instead of quoted notation.

  • Property mode set to 100644
File size: 18.9 KB
Line 
1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Filename-matching.
33 */
34
35/*
36 * This file contains an implementation of the fnmatch() pattern matching
37 * function. There is more code than necessary to account for the possibility
38 * of adding POSIX-like locale support to the system in the future. Functions
39 * that are only necessary for locale support currently simply use single
40 * characters for "collation elements".
41 * When (or if) locales are properly implemented, extending this implementation
42 * will be fairly straightforward.
43 */
44
45#include <stdbool.h>
46#include <ctype.h>
47#include <string.h>
48#include <stdlib.h>
49#include <assert.h>
50
51#include "internal/common.h"
52#include <fnmatch.h>
53
54/* Returned by _match... functions. */
55#define INVALID_PATTERN -1
56
57/*
58 * Type for collating element, simple identity with characters now,
59 * but may be extended for better locale support.
60 */
61typedef int coll_elm_t;
62
63/** Return value indicating that the element in question
64 * is not valid in the current locale. (That is, if locales are supported.)
65 */
66#define COLL_ELM_INVALID -1
67
68/**
69 * Get collating element matching a string.
70 *
71 * @param str String representation of the element.
72 * @return Matching collating element or COLL_ELM_INVALID.
73 */
74static coll_elm_t _coll_elm_get(const char *str)
75{
76 if (str[0] == '\0' || str[1] != '\0') {
77 return COLL_ELM_INVALID;
78 }
79 return str[0];
80}
81
82/**
83 * Get collating element matching a single character.
84 *
85 * @param c Character representation of the element.
86 * @return Matching collating element.
87 */
88static coll_elm_t _coll_elm_char(int c)
89{
90 return c;
91}
92
93/**
94 * Match collating element with a beginning of a string.
95 *
96 * @param elm Collating element to match.
97 * @param str String which beginning should match the element.
98 * @return 0 if the element doesn't match, or the number of characters matched.
99 */
100static int _coll_elm_match(coll_elm_t elm, const char *str)
101{
102 return elm == *str;
103}
104
105/**
106 * Checks whether a string begins with a collating element in the given range.
107 * Ordering depends on the locale (if locales are supported).
108 *
109 * @param first First element of the range.
110 * @param second Last element of the range.
111 * @param str String to match.
112 * @return 0 if there is no match, or the number of characters matched.
113 */
114static int _coll_elm_between(coll_elm_t first, coll_elm_t second,
115 const char *str)
116{
117 return *str >= first && *str <= second;
118}
119
120/**
121 * Read a string delimited by [? and ?].
122 *
123 * @param pattern Pointer to the string to read from. Its position is moved
124 * to the first character after the closing ].
125 * @param seq The character on the inside of brackets.
126 * @param buf Read buffer.
127 * @param buf_sz Read buffer's size. If the buffer is not large enough for
128 * the entire string, the string is cut with no error indication.
129 * @param flags Flags modifying the behavior.
130 * @return True on success, false if the pattern is invalid.
131 */
132static bool _get_delimited(
133 const char **pattern, int seq,
134 char *buf, size_t buf_sz, int flags)
135{
136 const bool noescape = (flags & FNM_NOESCAPE) != 0;
137 const bool pathname = (flags & FNM_PATHNAME) != 0;
138
139 const char *p = *pattern;
140 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
141 p += 2;
142
143 while (true) {
144 if (*p == seq && *(p + 1) == ']') {
145 /* String properly ended, return. */
146 *pattern = p + 2;
147 *buf = '\0';
148 return true;
149 }
150 if (!noescape && *p == '\\') {
151 p++;
152 }
153 if (*p == '\0') {
154 /* String not ended properly, invalid pattern. */
155 return false;
156 }
157 if (pathname && *p == '/') {
158 /* Slash in a pathname pattern is invalid. */
159 return false;
160 }
161 if (buf_sz > 1) {
162 /* Only add to the buffer if there is space. */
163 *buf = *p;
164 buf++;
165 buf_sz--;
166 }
167 p++;
168 }
169}
170
171/************** CHARACTER CLASSES ****************/
172
173#define MAX_CLASS_OR_COLL_LEN 6
174
175struct _char_class {
176 const char *name;
177 int (*func) (int);
178};
179
180/* List of supported character classes. */
181static const struct _char_class _char_classes[] = {
182 { "alnum", isalnum },
183 { "alpha", isalpha },
184 { "blank", isblank },
185 { "cntrl", iscntrl },
186 { "digit", isdigit },
187 { "graph", isgraph },
188 { "lower", islower },
189 { "print", isprint },
190 { "punct", ispunct },
191 { "space", isspace },
192 { "upper", isupper },
193 { "xdigit", isxdigit }
194};
195
196/**
197 * Compare function for binary search in the _char_classes array.
198 *
199 * @param key Key of the searched element.
200 * @param elem Element of _char_classes array.
201 * @return Ordering indicator (-1 less than, 0 equal, 1 greater than).
202 */
203static int _class_compare(const void *key, const void *elem)
204{
205 const struct _char_class *class = elem;
206 return strcmp((const char *) key, class->name);
207}
208
209/**
210 * Returns whether the given character belongs to the specified character class.
211 *
212 * @param cname Name of the character class.
213 * @param c Character.
214 * @return True if the character belongs to the class, false otherwise.
215 */
216static bool _is_in_class (const char *cname, int c)
217{
218 /* Search for class in the array of supported character classes. */
219 const struct _char_class *class = bsearch(cname, _char_classes,
220 sizeof(_char_classes) / sizeof(struct _char_class),
221 sizeof(struct _char_class), _class_compare);
222
223 if (class == NULL) {
224 /* No such class supported - treat as an empty class. */
225 return false;
226 } else {
227 /* Class matched. */
228 return class->func(c);
229 }
230}
231
232/**
233 * Tries to parse an initial part of the pattern as a character class pattern,
234 * and if successful, matches the beginning of the given string against the class.
235 *
236 * @param pattern Pointer to the pattern to match. Must begin with a class
237 * specifier and is repositioned to the first character after the specifier
238 * if successful.
239 * @param str String to match.
240 * @param flags Flags modifying the behavior (see fnmatch()).
241 * @return INVALID_PATTERN if the pattern doesn't start with a valid class
242 * specifier, 0 if the beginning of the matched string doesn't belong
243 * to the class, or positive number of characters matched.
244 */
245static int _match_char_class(const char **pattern, const char *str, int flags)
246{
247 char class[MAX_CLASS_OR_COLL_LEN + 1];
248
249 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
250 return INVALID_PATTERN;
251 }
252
253 return _is_in_class(class, *str);
254}
255
256/************** END CHARACTER CLASSES ****************/
257
258/**
259 * Reads the next collating element in the pattern, taking into account
260 * locale (if supported) and flags (see fnmatch()).
261 *
262 * @param pattern Pattern.
263 * @param flags Flags given to fnmatch().
264 * @return Collating element on success,
265 * or COLL_ELM_INVALID if the pattern is invalid.
266 */
267static coll_elm_t _next_coll_elm(const char **pattern, int flags)
268{
269 assert(pattern != NULL);
270 assert(*pattern != NULL);
271 assert(**pattern != '\0');
272
273 const char *p = *pattern;
274 const bool noescape = (flags & FNM_NOESCAPE) != 0;
275 const bool pathname = (flags & FNM_PATHNAME) != 0;
276
277 if (*p == '[') {
278 if (*(p + 1) == '.') {
279 char buf[MAX_CLASS_OR_COLL_LEN + 1];
280 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
281 return COLL_ELM_INVALID;
282 }
283 return _coll_elm_get(buf);
284 }
285
286 if (*(p + 1) == '=') {
287 char buf[MAX_CLASS_OR_COLL_LEN + 1];
288 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
289 return COLL_ELM_INVALID;
290 }
291 return _coll_elm_get(buf);
292 }
293 }
294
295 if (!noescape && *p == '\\') {
296 p++;
297 if (*p == '\0') {
298 *pattern = p;
299 return COLL_ELM_INVALID;
300 }
301 }
302 if (pathname && *p == '/') {
303 return COLL_ELM_INVALID;
304 }
305
306 *pattern = p + 1;
307 return _coll_elm_char(*p);
308}
309
310#define _matched(match) { \
311 int _match = match; \
312 if (_match < 0) { \
313 /* Invalid pattern */ \
314 return _match; \
315 } else if (matched == 0 && _match > 0) { \
316 /* First match */ \
317 matched = _match; \
318 } \
319 }
320
321/**
322 * Matches the beginning of the given string against a bracket expression
323 * the pattern begins with.
324 *
325 * @param pattern Pointer to the beginning of a bracket expression in a pattern.
326 * On success, the pointer is moved to the first character after the
327 * bracket expression.
328 * @param str Unmatched part of the string.
329 * @param flags Flags given to fnmatch().
330 * @return INVALID_PATTERN if the pattern is invalid, 0 if there is no match
331 * or the number of matched characters on success.
332 */
333static int _match_bracket_expr(const char **pattern, const char *str, int flags)
334{
335 const bool pathname = (flags & FNM_PATHNAME) != 0;
336 const bool special_period = (flags & FNM_PERIOD) != 0;
337 const char *p = *pattern;
338 bool negative = false;
339 int matched = 0;
340
341 assert(*p == '['); /* calling code should ensure this */
342 p++;
343
344 if (*str == '\0' || (pathname && *str == '/') ||
345 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
346 /*
347 * No bracket expression matches end of string,
348 * slash in pathname match or initial period with FNM_PERIOD
349 * option.
350 */
351 return 0;
352 }
353
354 if (*p == '^' || *p == '!') {
355 negative = true;
356 p++;
357 }
358
359 if (*p == ']') {
360 /* When ']' is first, treat it as a normal character. */
361 _matched(*str == ']');
362 p++;
363 }
364
365 coll_elm_t current_elm = COLL_ELM_INVALID;
366
367 while (*p != ']') {
368 if (*p == '-' && *(p + 1) != ']' &&
369 current_elm != COLL_ELM_INVALID) {
370 /* Range expression. */
371 p++;
372 coll_elm_t end_elm = _next_coll_elm(&p, flags);
373 if (end_elm == COLL_ELM_INVALID) {
374 return INVALID_PATTERN;
375 }
376 _matched(_coll_elm_between(current_elm, end_elm, str));
377 continue;
378 }
379
380 if (*p == '[' && *(p + 1) == ':') {
381 current_elm = COLL_ELM_INVALID;
382 _matched(_match_char_class(&p, str, flags));
383 continue;
384 }
385
386 current_elm = _next_coll_elm(&p, flags);
387 if (current_elm == COLL_ELM_INVALID) {
388 return INVALID_PATTERN;
389 }
390 _matched(_coll_elm_match(current_elm, str));
391 }
392
393 /* No error occured - update pattern pointer. */
394 *pattern = p + 1;
395
396 if (matched == 0) {
397 /* No match found */
398 return negative;
399 } else {
400 /* Matched 'match' characters. */
401 return negative ? 0 : matched;
402 }
403}
404
405/**
406 * Matches a portion of the pattern containing no asterisks (*) against
407 * the given string.
408 *
409 * @param pattern Pointer to the unmatched portion of the pattern.
410 * On success, the pointer is moved to the first asterisk, or to the
411 * terminating nul character, whichever occurs first.
412 * @param string Pointer to the input string. On success, the pointer is moved
413 * to the first character that wasn't explicitly matched.
414 * @param flags Flags given to fnmatch().
415 * @return True if the entire subpattern matched. False otherwise.
416 */
417static bool _partial_match(const char **pattern, const char **string, int flags)
418{
419 /*
420 * Only a single *-delimited subpattern is matched here.
421 * So in this function, '*' is understood as the end of pattern.
422 */
423
424 const bool pathname = (flags & FNM_PATHNAME) != 0;
425 const bool special_period = (flags & FNM_PERIOD) != 0;
426 const bool noescape = (flags & FNM_NOESCAPE) != 0;
427 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
428
429 const char *s = *string;
430 const char *p = *pattern;
431
432 while (*p != '*') {
433 /* Bracket expression. */
434 if (*p == '[') {
435 int matched = _match_bracket_expr(&p, s, flags);
436 if (matched == 0) {
437 /* Doesn't match. */
438 return false;
439 }
440 if (matched != INVALID_PATTERN) {
441 s += matched;
442 continue;
443 }
444
445 assert(matched == INVALID_PATTERN);
446 /* Fall through to match [ as an ordinary character. */
447 }
448
449 /* Wildcard match. */
450 if (*p == '?') {
451 if (*s == '\0') {
452 /* No character to match. */
453 return false;
454 }
455 if (pathname && *s == '/') {
456 /* Slash must be matched explicitly. */
457 return false;
458 }
459 if (special_period && pathname &&
460 *s == '.' && *(s - 1) == '/') {
461 /* Initial period must be matched explicitly. */
462 return false;
463 }
464
465 /* None of the above, match anything else. */
466 p++;
467 s++;
468 continue;
469 }
470
471 if (!noescape && *p == '\\') {
472 /* Escaped character. */
473 p++;
474 }
475
476 if (*p == '\0') {
477 /*
478 * End of pattern, must match end of string or
479 * an end of subdirectory name (optional).
480 */
481
482 if (*s == '\0' || (leading_dir && *s == '/')) {
483 break;
484 }
485
486 return false;
487 }
488
489 if (*p == *s) {
490 /* Exact match. */
491 p++;
492 s++;
493 continue;
494 }
495
496 /* Nothing matched. */
497 return false;
498 }
499
500 /* Entire sub-pattern matched. */
501
502 /* postconditions */
503 assert(*p == '\0' || *p == '*');
504 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
505
506 *pattern = p;
507 *string = s;
508 return true;
509}
510
511/**
512 * Match string against a pattern.
513 *
514 * @param pattern Pattern.
515 * @param string String to match.
516 * @param flags Flags given to fnmatch().
517 * @return True if the string matched the pattern, false otherwise.
518 */
519static bool _full_match(const char *pattern, const char *string, int flags)
520{
521 const bool pathname = (flags & FNM_PATHNAME) != 0;
522 const bool special_period = (flags & FNM_PERIOD) != 0;
523 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
524
525 if (special_period && *string == '.') {
526 /* Initial dot must be matched by an explicit dot in pattern. */
527 if (*pattern != '.') {
528 return false;
529 }
530 pattern++;
531 string++;
532 }
533
534 if (*pattern != '*') {
535 if (!_partial_match(&pattern, &string, flags)) {
536 /* The initial match must succeed. */
537 return false;
538 }
539 }
540
541 while (*pattern != '\0') {
542 assert(*pattern == '*');
543 pattern++;
544
545 bool matched = false;
546
547 const char *end;
548 if (pathname && special_period &&
549 *string == '.' && *(string - 1) == '/') {
550 end = string;
551 } else {
552 end = gnu_strchrnul(string, pathname ? '/' : '\0');
553 }
554
555 /* Try to match every possible offset. */
556 while (string <= end) {
557 if (_partial_match(&pattern, &string, flags)) {
558 matched = true;
559 break;
560 }
561 string++;
562 }
563
564 if (matched) {
565 continue;
566 }
567
568 return false;
569 }
570
571 return *string == '\0' || (leading_dir && *string == '/');
572}
573
574/**
575 * Transform the entire string to lowercase.
576 *
577 * @param s Input string.
578 * @return Newly allocated copy of the input string with all uppercase
579 * characters folded to their lowercase variants.
580 */
581static char *_casefold(const char *s)
582{
583 assert(s != NULL);
584 char *result = strdup(s);
585 for (char *i = result; *i != '\0'; ++i) {
586 *i = tolower(*i);
587 }
588 return result;
589}
590
591/**
592 * Filename pattern matching.
593 *
594 * @param pattern Pattern to match the string against.
595 * @param string Matched string.
596 * @param flags Flags altering the matching of special characters
597 * (mainly for dot and slash).
598 * @return Zero if the string matches the pattern, FNM_NOMATCH otherwise.
599 */
600int fnmatch(const char *pattern, const char *string, int flags)
601{
602 assert(pattern != NULL);
603 assert(string != NULL);
604
605 // TODO: don't fold everything in advance, but only when needed
606
607 if ((flags & FNM_CASEFOLD) != 0) {
608 /* Just fold the entire pattern and string. */
609 pattern = _casefold(pattern);
610 string = _casefold(string);
611 }
612
613 bool result = _full_match(pattern, string, flags);
614
615 if ((flags & FNM_CASEFOLD) != 0) {
616 if (pattern) {
617 free((char *) pattern);
618 }
619 if (string) {
620 free((char *) string);
621 }
622 }
623
624 return result ? 0 : FNM_NOMATCH;
625}
626
627// FIXME: put the testcases to the app/tester after fnmatch is included into libc
628
629#if 0
630
631#include <stdio.h>
632
633#define fnmatch_test(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
634#define match(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == 0)
635#define nomatch(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == FNM_NOMATCH)
636
637void __posix_fnmatch_test()
638{
639 int fail = 0;
640
641 static_assert(FNM_PATHNAME == FNM_FILE_NAME);
642 match("", "", 0);
643 match("*", "hello", 0);
644 match("hello", "hello", 0);
645 match("hello*", "hello", 0);
646 nomatch("hello?", "hello", 0);
647 match("*hello", "prdel hello", 0);
648 match("he[sl]lo", "hello", 0);
649 match("he[sl]lo", "heslo", 0);
650 nomatch("he[sl]lo", "heblo", 0);
651 nomatch("he[^sl]lo", "hello", 0);
652 nomatch("he[^sl]lo", "heslo", 0);
653 match("he[^sl]lo", "heblo", 0);
654 nomatch("he[!sl]lo", "hello", 0);
655 nomatch("he[!sl]lo", "heslo", 0);
656 match("he[!sl]lo", "heblo", 0);
657 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
658 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
659 match("[a-ce-z]", "a", 0);
660 match("[a-ce-z]", "c", 0);
661 nomatch("[a-ce-z]", "d", 0);
662 match("[a-ce-z]", "e", 0);
663 match("[a-ce-z]", "z", 0);
664 nomatch("[^a-ce-z]", "a", 0);
665 nomatch("[^a-ce-z]", "c", 0);
666 match("[^a-ce-z]", "d", 0);
667 nomatch("[^a-ce-z]", "e", 0);
668 nomatch("[^a-ce-z]", "z", 0);
669 match("helen??", "helenos", 0);
670 match("****booo****", "booo", 0);
671
672 match("hello[[:space:]]world", "hello world", 0);
673 nomatch("hello[[:alpha:]]world", "hello world", 0);
674
675 match("/hoooo*", "/hooooooo/hooo", 0);
676 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
677 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
678 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
679 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
680 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
681 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
682 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
683 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
684
685 match("*", "hell", 0);
686 match("*?", "hell", 0);
687 match("?*?", "hell", 0);
688 match("?*??", "hell", 0);
689 match("??*??", "hell", 0);
690 nomatch("???*??", "hell", 0);
691
692 nomatch("", "hell", 0);
693 nomatch("?", "hell", 0);
694 nomatch("??", "hell", 0);
695 nomatch("???", "hell", 0);
696 match("????", "hell", 0);
697
698 match("*", "h.ello", FNM_PERIOD);
699 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
700 nomatch("*", ".hello", FNM_PERIOD);
701 match("h?ello", "h.ello", FNM_PERIOD);
702 nomatch("?hello", ".hello", FNM_PERIOD);
703 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
704 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
705 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
706
707 nomatch("HeLlO", "hello", 0);
708 match("HeLlO", "hello", FNM_CASEFOLD);
709
710 printf("Failed: %d\n", fail);
711}
712
713#endif
714
715/** @}
716 */
Note: See TracBrowser for help on using the repository browser.