source: mainline/uspace/lib/posix/fnmatch.c@ f9c3ef44

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since f9c3ef44 was 6921178, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Edited or removed some TODO/FIXME comments.

  • Property mode set to 100644
File size: 19.0 KB
Line 
1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Filename-matching.
33 */
34
35/* This file contains an implementation of the fnmatch() pattern matching
36 * function. There is more code than necessary to account for the possibility
37 * of adding POSIX-like locale support to the system in the future. Functions
38 * that are only necessary for locale support currently simply use single
39 * characters for "collation elements".
40 * When (or if) locales are properly implemented, extending this implementation
41 * will be fairly straightforward.
42 */
43
44#include "stdbool.h"
45#include "ctype.h"
46#include "string.h"
47#include "stdlib.h"
48#include "assert.h"
49
50#define LIBPOSIX_INTERNAL
51
52#include "internal/common.h"
53#include "fnmatch.h"
54
55/* Returned by _match... functions. */
56#define INVALID_PATTERN -1
57
58/* Type for collating element, simple identity with characters now,
59 * but may be extended for better locale support.
60 */
61typedef int coll_elm_t;
62
63/** Return value indicating that the element in question
64 * is not valid in the current locale. (That is, if locales are supported.)
65 */
66#define COLL_ELM_INVALID -1
67
68/**
69 * Get collating element matching a string.
70 *
71 * @param str String representation of the element.
72 * @return Matching collating element or COLL_ELM_INVALID.
73 */
74static coll_elm_t _coll_elm_get(const char* str)
75{
76 if (str[0] == '\0' || str[1] != '\0') {
77 return COLL_ELM_INVALID;
78 }
79 return str[0];
80}
81
82/**
83 * Get collating element matching a single character.
84 *
85 * @param c Character representation of the element.
86 * @return Matching collating element.
87 */
88static coll_elm_t _coll_elm_char(int c)
89{
90 return c;
91}
92
93/**
94 * Match collating element with a beginning of a string.
95 *
96 * @param elm Collating element to match.
97 * @param str String which beginning should match the element.
98 * @return 0 if the element doesn't match, or the number of characters matched.
99 */
100static int _coll_elm_match(coll_elm_t elm, const char *str)
101{
102 return elm == *str;
103}
104
105/**
106 * Checks whether a string begins with a collating element in the given range.
107 * Ordering depends on the locale (if locales are supported).
108 *
109 * @param first First element of the range.
110 * @param second Last element of the range.
111 * @param str String to match.
112 * @return 0 if there is no match, or the number of characters matched.
113 */
114static int _coll_elm_between(coll_elm_t first, coll_elm_t second,
115 const char *str)
116{
117 return *str >= first && *str <= second;
118}
119
120/**
121 * Read a string delimited by [? and ?].
122 *
123 * @param pattern Pointer to the string to read from. Its position is moved
124 * to the first character after the closing ].
125 * @param seq The character on the inside of brackets.
126 * @param buf Read buffer.
127 * @param buf_sz Read buffer's size. If the buffer is not large enough for
128 * the entire string, the string is cut with no error indication.
129 * @param flags Flags modifying the behavior.
130 * @return True on success, false if the pattern is invalid.
131 */
132static bool _get_delimited(
133 const char **pattern, int seq,
134 char *buf, size_t buf_sz, int flags)
135{
136 const bool noescape = (flags & FNM_NOESCAPE) != 0;
137 const bool pathname = (flags & FNM_PATHNAME) != 0;
138
139 const char *p = *pattern;
140 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
141 p += 2;
142
143 while (true) {
144 if (*p == seq && *(p + 1) == ']') {
145 /* String properly ended, return. */
146 *pattern = p + 2;
147 *buf = '\0';
148 return true;
149 }
150 if (!noescape && *p == '\\') {
151 p++;
152 }
153 if (*p == '\0') {
154 /* String not ended properly, invalid pattern. */
155 return false;
156 }
157 if (pathname && *p == '/') {
158 /* Slash in a pathname pattern is invalid. */
159 return false;
160 }
161 if (buf_sz > 1) {
162 /* Only add to the buffer if there is space. */
163 *buf = *p;
164 buf++;
165 buf_sz--;
166 }
167 p++;
168 }
169}
170
171/************** CHARACTER CLASSES ****************/
172
173#define MAX_CLASS_OR_COLL_LEN 6
174
175struct _char_class {
176 const char *name;
177 int (*func) (int);
178};
179
180/* List of supported character classes. */
181static const struct _char_class _char_classes[] = {
182 { "alnum", isalnum },
183 { "alpha", isalpha },
184 { "blank", isblank },
185 { "cntrl", iscntrl },
186 { "digit", isdigit },
187 { "graph", isgraph },
188 { "lower", islower },
189 { "print", isprint },
190 { "punct", ispunct },
191 { "space", isspace },
192 { "upper", isupper },
193 { "xdigit", isxdigit }
194};
195
196/**
197 * Compare function for binary search in the _char_classes array.
198 *
199 * @param key Key of the searched element.
200 * @param elem Element of _char_classes array.
201 * @return Ordering indicator (-1 less than, 0 equal, 1 greater than).
202 */
203static int _class_compare(const void *key, const void *elem)
204{
205 const struct _char_class *class = elem;
206 return strcmp((const char *) key, class->name);
207}
208
209/**
210 * Returns whether the given character belongs to the specified character class.
211 *
212 * @param cname Name of the character class.
213 * @param c Character.
214 * @return True if the character belongs to the class, false otherwise.
215 */
216static bool _is_in_class (const char *cname, int c)
217{
218 /* Search for class in the array of supported character classes. */
219 const struct _char_class *class = bsearch(cname, _char_classes,
220 sizeof(_char_classes) / sizeof(struct _char_class),
221 sizeof(struct _char_class), _class_compare);
222
223 if (class == NULL) {
224 /* No such class supported - treat as an empty class. */
225 return false;
226 } else {
227 /* Class matched. */
228 return class->func(c);
229 }
230}
231
232/**
233 * Tries to parse an initial part of the pattern as a character class pattern,
234 * and if successful, matches the beginning of the given string against the class.
235 *
236 * @param pattern Pointer to the pattern to match. Must begin with a class
237 * specifier and is repositioned to the first character after the specifier
238 * if successful.
239 * @param str String to match.
240 * @param flags Flags modifying the behavior (see fnmatch()).
241 * @return INVALID_PATTERN if the pattern doesn't start with a valid class
242 * specifier, 0 if the beginning of the matched string doesn't belong
243 * to the class, or positive number of characters matched.
244 */
245static int _match_char_class(const char **pattern, const char *str, int flags)
246{
247 char class[MAX_CLASS_OR_COLL_LEN + 1];
248
249 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
250 return INVALID_PATTERN;
251 }
252
253 return _is_in_class(class, *str);
254}
255
256/************** END CHARACTER CLASSES ****************/
257
258/**
259 * Reads the next collating element in the pattern, taking into account
260 * locale (if supported) and flags (see fnmatch()).
261 *
262 * @param pattern Pattern.
263 * @param flags Flags given to fnmatch().
264 * @return Collating element on success,
265 * or COLL_ELM_INVALID if the pattern is invalid.
266 */
267static coll_elm_t _next_coll_elm(const char **pattern, int flags)
268{
269 assert(pattern != NULL);
270 assert(*pattern != NULL);
271 assert(**pattern != '\0');
272
273 const char *p = *pattern;
274 const bool noescape = (flags & FNM_NOESCAPE) != 0;
275 const bool pathname = (flags & FNM_PATHNAME) != 0;
276
277 if (*p == '[') {
278 if (*(p + 1) == '.') {
279 char buf[MAX_CLASS_OR_COLL_LEN + 1];
280 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
281 return COLL_ELM_INVALID;
282 }
283 return _coll_elm_get(buf);
284 }
285
286 if (*(p + 1) == '=') {
287 char buf[MAX_CLASS_OR_COLL_LEN + 1];
288 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
289 return COLL_ELM_INVALID;
290 }
291 return _coll_elm_get(buf);
292 }
293 }
294
295 if (!noescape && *p == '\\') {
296 p++;
297 if (*p == '\0') {
298 *pattern = p;
299 return COLL_ELM_INVALID;
300 }
301 }
302 if (pathname && *p == '/') {
303 return COLL_ELM_INVALID;
304 }
305
306 *pattern = p + 1;
307 return _coll_elm_char(*p);
308}
309
310/**
311 * Matches the beginning of the given string against a bracket expression
312 * the pattern begins with.
313 *
314 * @param pattern Pointer to the beginning of a bracket expression in a pattern.
315 * On success, the pointer is moved to the first character after the
316 * bracket expression.
317 * @param str Unmatched part of the string.
318 * @param flags Flags given to fnmatch().
319 * @return INVALID_PATTERN if the pattern is invalid, 0 if there is no match
320 * or the number of matched characters on success.
321 */
322static int _match_bracket_expr(const char **pattern, const char *str, int flags)
323{
324 const bool pathname = (flags & FNM_PATHNAME) != 0;
325 const bool special_period = (flags & FNM_PERIOD) != 0;
326 const char *p = *pattern;
327 bool negative = false;
328 int matched = 0;
329
330 #define _matched(match) { \
331 int _match = match; \
332 if (_match < 0) { \
333 /* Invalid pattern */ \
334 return _match; \
335 } else if (matched == 0 && _match > 0) { \
336 /* First match */ \
337 matched = _match; \
338 } \
339 }
340
341 assert(*p == '['); /* calling code should ensure this */
342 p++;
343
344 if (*str == '\0' || (pathname && *str == '/') ||
345 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
346 /* No bracket expression matches end of string,
347 * slash in pathname match or initial period with FNM_PERIOD
348 * option.
349 */
350 return 0;
351 }
352
353 if (*p == '^' || *p == '!') {
354 negative = true;
355 p++;
356 }
357
358 if (*p == ']') {
359 /* When ']' is first, treat it as a normal character. */
360 _matched(*str == ']');
361 p++;
362 }
363
364 coll_elm_t current_elm = COLL_ELM_INVALID;
365
366 while (*p != ']') {
367 if (*p == '-' && *(p + 1) != ']' &&
368 current_elm != COLL_ELM_INVALID) {
369 /* Range expression. */
370 p++;
371 coll_elm_t end_elm = _next_coll_elm(&p, flags);
372 if (end_elm == COLL_ELM_INVALID) {
373 return INVALID_PATTERN;
374 }
375 _matched(_coll_elm_between(current_elm, end_elm, str));
376 continue;
377 }
378
379 if (*p == '[' && *(p + 1) == ':') {
380 current_elm = COLL_ELM_INVALID;
381 _matched(_match_char_class(&p, str, flags));
382 continue;
383 }
384
385 current_elm = _next_coll_elm(&p, flags);
386 if (current_elm == COLL_ELM_INVALID) {
387 return INVALID_PATTERN;
388 }
389 _matched(_coll_elm_match(current_elm, str));
390 }
391
392 /* No error occured - update pattern pointer. */
393 *pattern = p + 1;
394
395 if (matched == 0) {
396 /* No match found */
397 return negative;
398 } else {
399 /* Matched 'match' characters. */
400 return negative ? 0 : matched;
401 }
402
403 #undef _matched
404}
405
406/**
407 * Matches a portion of the pattern containing no asterisks (*) against
408 * the given string.
409 *
410 * @param pattern Pointer to the unmatched portion of the pattern.
411 * On success, the pointer is moved to the first asterisk, or to the
412 * terminating nul character, whichever occurs first.
413 * @param string Pointer to the input string. On success, the pointer is moved
414 * to the first character that wasn't explicitly matched.
415 * @param flags Flags given to fnmatch().
416 * @return True if the entire subpattern matched. False otherwise.
417 */
418static bool _partial_match(const char **pattern, const char **string, int flags)
419{
420 /* Only a single *-delimited subpattern is matched here.
421 * So in this function, '*' is understood as the end of pattern.
422 */
423
424 const bool pathname = (flags & FNM_PATHNAME) != 0;
425 const bool special_period = (flags & FNM_PERIOD) != 0;
426 const bool noescape = (flags & FNM_NOESCAPE) != 0;
427 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
428
429 const char *s = *string;
430 const char *p = *pattern;
431
432 while (*p != '*') {
433 /* Bracket expression. */
434 if (*p == '[') {
435 int matched = _match_bracket_expr(&p, s, flags);
436 if (matched == 0) {
437 /* Doesn't match. */
438 return false;
439 }
440 if (matched != INVALID_PATTERN) {
441 s += matched;
442 continue;
443 }
444
445 assert(matched == INVALID_PATTERN);
446 /* Fall through to match [ as an ordinary character. */
447 }
448
449 /* Wildcard match. */
450 if (*p == '?') {
451 if (*s == '\0') {
452 /* No character to match. */
453 return false;
454 }
455 if (pathname && *s == '/') {
456 /* Slash must be matched explicitly. */
457 return false;
458 }
459 if (special_period && pathname &&
460 *s == '.' && *(s - 1) == '/') {
461 /* Initial period must be matched explicitly. */
462 return false;
463 }
464
465 /* None of the above, match anything else. */
466 p++;
467 s++;
468 continue;
469 }
470
471 if (!noescape && *p == '\\') {
472 /* Escaped character. */
473 p++;
474 }
475
476 if (*p == '\0') {
477 /* End of pattern, must match end of string or
478 * an end of subdirectory name (optional).
479 */
480
481 if (*s == '\0' || (leading_dir && *s == '/')) {
482 break;
483 }
484
485 return false;
486 }
487
488 if (*p == *s) {
489 /* Exact match. */
490 p++;
491 s++;
492 continue;
493 }
494
495 /* Nothing matched. */
496 return false;
497 }
498
499 /* Entire sub-pattern matched. */
500
501 /* postconditions */
502 assert(*p == '\0' || *p == '*');
503 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
504
505 *pattern = p;
506 *string = s;
507 return true;
508}
509
510/**
511 * Match string against a pattern.
512 *
513 * @param pattern Pattern.
514 * @param string String to match.
515 * @param flags Flags given to fnmatch().
516 * @return True if the string matched the pattern, false otherwise.
517 */
518static bool _full_match(const char *pattern, const char *string, int flags)
519{
520 const bool pathname = (flags & FNM_PATHNAME) != 0;
521 const bool special_period = (flags & FNM_PERIOD) != 0;
522 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
523
524 if (special_period && *string == '.') {
525 /* Initial dot must be matched by an explicit dot in pattern. */
526 if (*pattern != '.') {
527 return false;
528 }
529 pattern++;
530 string++;
531 }
532
533 if (*pattern != '*') {
534 if (!_partial_match(&pattern, &string, flags)) {
535 /* The initial match must succeed. */
536 return false;
537 }
538 }
539
540 while (*pattern != '\0') {
541 assert(*pattern == '*');
542 pattern++;
543
544 bool matched = false;
545
546 const char *end;
547 if (pathname && special_period &&
548 *string == '.' && *(string - 1) == '/') {
549 end = string;
550 } else {
551 end = strchrnul(string, pathname ? '/' : '\0');
552 }
553
554 /* Try to match every possible offset. */
555 while (string <= end) {
556 if (_partial_match(&pattern, &string, flags)) {
557 matched = true;
558 break;
559 }
560 string++;
561 }
562
563 if (matched) {
564 continue;
565 }
566
567 return false;
568 }
569
570 return *string == '\0' || (leading_dir && *string == '/');
571}
572
573/**
574 * Transform the entire string to lowercase.
575 *
576 * @param s Input string.
577 * @return Newly allocated copy of the input string with all uppercase
578 * characters folded to their lowercase variants.
579 */
580static char *_casefold(const char *s)
581{
582 assert(s != NULL);
583 char *result = strdup(s);
584 for (char *i = result; *i != '\0'; ++i) {
585 *i = tolower(*i);
586 }
587 return result;
588}
589
590/**
591 * Filename pattern matching.
592 *
593 * @param pattern Pattern to match the string against.
594 * @param string Matched string.
595 * @param flags Flags altering the matching of special characters
596 * (mainly for dot and slash).
597 * @return Zero if the string matches the pattern, FNM_NOMATCH otherwise.
598 */
599int posix_fnmatch(const char *pattern, const char *string, int flags)
600{
601 assert(pattern != NULL);
602 assert(string != NULL);
603
604 // TODO: don't fold everything in advance, but only when needed
605
606 if ((flags & FNM_CASEFOLD) != 0) {
607 /* Just fold the entire pattern and string. */
608 pattern = _casefold(pattern);
609 string = _casefold(string);
610 }
611
612 bool result = _full_match(pattern, string, flags);
613
614 if ((flags & FNM_CASEFOLD) != 0) {
615 if (pattern) {
616 free((char *) pattern);
617 }
618 if (string) {
619 free((char *) string);
620 }
621 }
622
623 return result ? 0 : FNM_NOMATCH;
624}
625
626// FIXME: put the testcases to the app/tester after fnmatch is included into libc
627
628#if 0
629
630#include <stdio.h>
631
632void __posix_fnmatch_test()
633{
634 int fail = 0;
635
636 #undef assert
637 #define assert(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
638 #define match(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == 0)
639 #define nomatch(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == FNM_NOMATCH)
640
641 assert(FNM_PATHNAME == FNM_FILE_NAME);
642 match("", "", 0);
643 match("*", "hello", 0);
644 match("hello", "hello", 0);
645 match("hello*", "hello", 0);
646 nomatch("hello?", "hello", 0);
647 match("*hello", "prdel hello", 0);
648 match("he[sl]lo", "hello", 0);
649 match("he[sl]lo", "heslo", 0);
650 nomatch("he[sl]lo", "heblo", 0);
651 nomatch("he[^sl]lo", "hello", 0);
652 nomatch("he[^sl]lo", "heslo", 0);
653 match("he[^sl]lo", "heblo", 0);
654 nomatch("he[!sl]lo", "hello", 0);
655 nomatch("he[!sl]lo", "heslo", 0);
656 match("he[!sl]lo", "heblo", 0);
657 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
658 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
659 match("[a-ce-z]", "a", 0);
660 match("[a-ce-z]", "c", 0);
661 nomatch("[a-ce-z]", "d", 0);
662 match("[a-ce-z]", "e", 0);
663 match("[a-ce-z]", "z", 0);
664 nomatch("[^a-ce-z]", "a", 0);
665 nomatch("[^a-ce-z]", "c", 0);
666 match("[^a-ce-z]", "d", 0);
667 nomatch("[^a-ce-z]", "e", 0);
668 nomatch("[^a-ce-z]", "z", 0);
669 match("helen??", "helenos", 0);
670 match("****booo****", "booo", 0);
671
672 match("hello[[:space:]]world", "hello world", 0);
673 nomatch("hello[[:alpha:]]world", "hello world", 0);
674
675 match("/hoooo*", "/hooooooo/hooo", 0);
676 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
677 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
678 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
679 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
680 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
681 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
682 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
683 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
684
685 match("*", "hell", 0);
686 match("*?", "hell", 0);
687 match("?*?", "hell", 0);
688 match("?*??", "hell", 0);
689 match("??*??", "hell", 0);
690 nomatch("???*??", "hell", 0);
691
692 nomatch("", "hell", 0);
693 nomatch("?", "hell", 0);
694 nomatch("??", "hell", 0);
695 nomatch("???", "hell", 0);
696 match("????", "hell", 0);
697
698 match("*", "h.ello", FNM_PERIOD);
699 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
700 nomatch("*", ".hello", FNM_PERIOD);
701 match("h?ello", "h.ello", FNM_PERIOD);
702 nomatch("?hello", ".hello", FNM_PERIOD);
703 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
704 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
705 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
706
707 nomatch("HeLlO", "hello", 0);
708 match("HeLlO", "hello", FNM_CASEFOLD);
709
710 printf("Failed: %d\n", fail);
711}
712
713#endif
714
715/** @}
716 */
Note: See TracBrowser for help on using the repository browser.