source: mainline/uspace/lib/posix/fnmatch.c@ 5273eb6

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 5273eb6 was a43fbc95, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 14 years ago

fnmatch.h: Finish and fix bugs. Also add some basic tests (at the bottom, commented out until a proper place is found).

  • Property mode set to 100644
File size: 14.4 KB
RevLine 
[be64a84]1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file
33 */
34
[a43fbc95]35// TODO: clean this up a bit
36
37#include "stdbool.h"
38#include "ctype.h"
39#include "string.h"
40#include "stdlib.h"
41#include "assert.h"
42
[be64a84]43#define LIBPOSIX_INTERNAL
44
45#include "internal/common.h"
46#include "fnmatch.h"
47
[65ed8f4]48#define INVALID_PATTERN -1
49
50/* Type for collating element, simple identity with characters now,
51 * but may be extended for better locale support.
52 */
53typedef int _coll_elm_t;
54
55#define COLL_ELM_INVALID -1
56
[a43fbc95]57/** Get collating element matching a string.
58 *
59 * @param str
60 * @return
61 */
[65ed8f4]62static _coll_elm_t _coll_elm_get(const char* str)
63{
64 if (str[0] == '\0' || str[1] != '\0') {
65 return COLL_ELM_INVALID;
66 }
67 return str[0];
68}
69
[a43fbc95]70/** Get collating element matching a single character.
71 *
72 * @param c
73 * @return
74 */
[65ed8f4]75static _coll_elm_t _coll_elm_char(int c)
76{
77 return c;
78}
79
[a43fbc95]80/** Match collating element with a beginning of a string.
[65ed8f4]81 *
82 * @param elm
[a43fbc95]83 * @param str
[65ed8f4]84 * @return 0 if the element doesn't match, or the number of characters matched.
85 */
86static int _coll_elm_match(_coll_elm_t elm, const char *str)
87{
88 return elm == *str;
89}
90
91static int _coll_elm_between(_coll_elm_t first, _coll_elm_t second,
92 const char *str)
93{
94 return *str >= first && *str <= second;
95}
96
97/** Read a string delimited by [? and ?].
98 *
99 * @param pattern Pointer to the string to read from. Its position is moved
100 * to the first character after the closing ].
101 * @param seq The character on the inside of brackets.
102 * @param buf Read buffer.
103 * @param buf_sz Read buffer's size. If the buffer is not large enough for
104 * the entire string, the string is cut with no error indication.
105 * @return
106 */
107static bool _get_delimited(
108 const char **pattern, int seq,
109 char *buf, size_t buf_sz, int flags)
110{
111 const bool noescape = (flags & FNM_NOESCAPE) != 0;
112 const bool pathname = (flags & FNM_PATHNAME) != 0;
113
114 const char *p = *pattern;
115 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
116 p += 2;
117
118 while (true) {
119 if (*p == seq && *(p + 1) == ']') {
120 /* String properly ended, return. */
121 *pattern = p + 2;
122 *buf = '\0';
123 return true;
124 }
125 if (!noescape && *p == '\\') {
126 p++;
127 }
128 if (*p == '\0') {
129 /* String not ended properly, invalid pattern. */
130 return false;
131 }
132 if (pathname && *p == '/') {
133 /* Slash in a pathname pattern is invalid. */
134 return false;
135 }
136 if (buf_sz > 1) {
137 /* Only add to the buffer if there is space. */
138 *buf = *p;
139 buf++;
140 buf_sz--;
141 }
142 p++;
143 }
144}
145
146/************** CHARACTER CLASSES ****************/
147
148#define MAX_CLASS_OR_COLL_LEN 6
149
150struct _char_class {
151 const char *name;
152 int (*func) (int);
153};
154
155/* List of supported character classes. */
156static const struct _char_class _char_classes[] = {
157 { "alnum", isalnum },
158 { "alpha", isalpha },
[a43fbc95]159 { "blank", isblank },
160 { "cntrl", iscntrl },
[65ed8f4]161 { "digit", isdigit },
[a43fbc95]162 { "graph", isgraph },
[65ed8f4]163 { "lower", islower },
[a43fbc95]164 { "print", isprint },
165 { "punct", ispunct },
[65ed8f4]166 { "space", isspace },
167 { "upper", isupper },
[a43fbc95]168 { "xdigit", isxdigit }
[65ed8f4]169};
170
171static int _class_compare(const void *key, const void *elem)
172{
173 const struct _char_class *class = elem;
[a43fbc95]174 return strcmp((const char *) key, class->name);
[65ed8f4]175}
176
177static bool _is_in_class (const char *cname, int c)
178{
179 /* Search for class in the array of supported character classes. */
[a43fbc95]180 const struct _char_class *class = bsearch(cname, _char_classes,
[65ed8f4]181 sizeof(_char_classes) / sizeof(struct _char_class),
182 sizeof(struct _char_class), _class_compare);
183
184 if (class == NULL) {
185 /* No such class supported - treat as an empty class. */
186 return false;
187 } else {
188 /* Class matched. */
189 return class->func(c);
190 }
191}
192
193static int _match_char_class(const char **pattern, const char *str, int flags)
194{
195 char class[MAX_CLASS_OR_COLL_LEN + 1];
196
197 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
198 return INVALID_PATTERN;
199 }
200
201 return _is_in_class(class, *str);
202}
203
204/************** END CHARACTER CLASSES ****************/
205
206static _coll_elm_t _next_coll_elm(const char **pattern, int flags)
207{
208 const char *p = *pattern;
209 const bool noescape = (flags & FNM_NOESCAPE) != 0;
210 const bool pathname = (flags & FNM_PATHNAME) != 0;
211
212 if (*p == '[') {
213 if (*(p + 1) == '.') {
214 char buf[MAX_CLASS_OR_COLL_LEN + 1];
215 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
216 return COLL_ELM_INVALID;
217 }
218 return _coll_elm_get(buf);
219 }
220
221 if (*(p + 1) == '=') {
222 char buf[MAX_CLASS_OR_COLL_LEN + 1];
223 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
224 return COLL_ELM_INVALID;
225 }
226 return _coll_elm_get(buf);
227 }
228 }
229
230 if (!noescape && *p == '\\') {
231 p++;
232 }
233 if (pathname && *p == '/') {
234 return COLL_ELM_INVALID;
235 }
236
237 *pattern = p + 1;
238 return _coll_elm_char(*p);
239}
240
241/**
242 *
243 */
244static int _match_bracket_expr(const char **pattern, const char *str, int flags)
245{
246 const bool pathname = (flags & FNM_PATHNAME) != 0;
247 const bool special_period = (flags & FNM_PERIOD) != 0;
248 const char *p = *pattern;
249 bool negative = false;
250 int matched = 0;
251
252 #define _matched(match) { \
253 int _match = match; \
254 if (_match < 0) { \
255 /* Invalid pattern */ \
256 return _match; \
257 } else if (matched == 0 && _match > 0) { \
258 /* First match */ \
259 matched = _match; \
260 } \
261 }
262
263 assert(*p == '['); /* calling code should ensure this */
264 p++;
265
266 if (*str == '\0' || (pathname && *str == '/') ||
267 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
268 /* No bracket expression matches end of string,
269 * slash in pathname match or initial period with FNM_PERIOD
270 * option.
271 */
272 return 0;
273 }
274
275 if (*p == '^' || *p == '!') {
276 negative = true;
277 p++;
278 }
279
280 if (*p == ']') {
281 /* When ']' is first, treat it as a normal character. */
282 _matched(*str == ']');
283 p++;
284 }
285
286 _coll_elm_t current_elm = COLL_ELM_INVALID;
287
288 while (*p != ']') {
289 if (*p == '-' && *(p + 1) != ']' &&
290 current_elm != COLL_ELM_INVALID) {
291 /* Range expression. */
292 p++;
293 _coll_elm_t end_elm = _next_coll_elm(&p, flags);
294 if (end_elm == COLL_ELM_INVALID) {
295 return INVALID_PATTERN;
296 }
297 _matched(_coll_elm_between(current_elm, end_elm, str));
298 continue;
299 }
300
301 if (*p == '[' && *(p + 1) == ':') {
302 current_elm = COLL_ELM_INVALID;
303 _matched(_match_char_class(&p, str, flags));
304 continue;
305 }
306
307 current_elm = _next_coll_elm(&p, flags);
308 if (current_elm == COLL_ELM_INVALID) {
309 return INVALID_PATTERN;
310 }
311 _matched(_coll_elm_match(current_elm, str));
312 }
313
314 /* No error occured - update pattern pointer. */
315 *pattern = p + 1;
316
317 if (matched == 0) {
318 /* No match found */
319 return negative;
320 } else {
321 /* Matched 'match' characters. */
322 return negative ? 0 : matched;
323 }
324
325 #undef _matched
326}
327
328/**
329 *
330 */
331static bool _partial_match(const char **pattern, const char **string, int flags)
332{
333 /* Only a single *-delimited subpattern is matched here.
334 * So in this function, '*' is understood as the end of pattern.
335 */
336
337 const bool pathname = (flags & FNM_PATHNAME) != 0;
338 const bool special_period = (flags & FNM_PERIOD) != 0;
339 const bool noescape = (flags & FNM_NOESCAPE) != 0;
[a43fbc95]340 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
341
[65ed8f4]342 const char *s = *string;
343 const char *p = *pattern;
344
[a43fbc95]345 while (*p != '*') {
346 /* Bracket expression. */
[65ed8f4]347 if (*p == '[') {
348 int matched = _match_bracket_expr(&p, s, flags);
349 if (matched == 0) {
350 /* Doesn't match. */
351 return false;
352 }
[a43fbc95]353 if (matched != INVALID_PATTERN) {
[65ed8f4]354 s += matched;
355 continue;
356 }
[a43fbc95]357
358 assert(matched == INVALID_PATTERN);
359 /* Fall through to match [ as an ordinary character. */
[65ed8f4]360 }
361
[a43fbc95]362 /* Wildcard match. */
[65ed8f4]363 if (*p == '?') {
[a43fbc95]364 if (*s == '\0') {
365 /* No character to match. */
[65ed8f4]366 return false;
367 }
[a43fbc95]368 if (pathname && *s == '/') {
369 /* Slash must be matched explicitly. */
370 return false;
371 }
372 if (special_period && pathname &&
373 *s == '.' && *(s - 1) == '/') {
374 /* Initial period must be matched explicitly. */
375 return false;
376 }
377
378 /* None of the above, match anything else. */
[65ed8f4]379 p++;
380 s++;
381 continue;
382 }
383
384 if (!noescape && *p == '\\') {
385 /* Escaped character. */
386 p++;
387 }
388
[a43fbc95]389 if (*p == '\0') {
390 /* End of pattern, must match end of string or
391 * an end of subdirectory name (optional).
392 */
393
394 if (*s == '\0' || (leading_dir && *s == '/')) {
[65ed8f4]395 break;
396 }
[a43fbc95]397
398 return false;
399 }
400
401 if (*p == *s) {
402 /* Exact match. */
403 p++;
404 s++;
[65ed8f4]405 continue;
406 }
407
408 /* Nothing matched. */
409 return false;
410 }
411
[a43fbc95]412 /* Entire sub-pattern matched. */
413
414 /* postconditions */
415 assert(*p == '\0' || *p == '*');
416 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
417
[65ed8f4]418 *pattern = p;
419 *string = s;
420 return true;
421}
422
423static bool _full_match(const char *pattern, const char *string, int flags)
424{
[a43fbc95]425 const bool pathname = (flags & FNM_PATHNAME) != 0;
[65ed8f4]426 const bool special_period = (flags & FNM_PERIOD) != 0;
[a43fbc95]427 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
[65ed8f4]428
429 if (special_period && *string == '.') {
430 /* Initial dot must be matched by an explicit dot in pattern. */
431 if (*pattern != '.') {
432 return false;
433 }
434 pattern++;
435 string++;
436 }
437
438 if (*pattern != '*') {
439 if (!_partial_match(&pattern, &string, flags)) {
440 /* The initial match must succeed. */
441 return false;
442 }
443 }
444
445 while (*pattern != '\0') {
446 assert(*pattern == '*');
[a43fbc95]447 pattern++;
448
449 bool matched = false;
[65ed8f4]450
[a43fbc95]451 const char *end;
452 if (pathname && special_period &&
453 *string == '.' && *(string - 1) == '/') {
454 end = string;
455 } else {
456 end= strchrnul(string, pathname ? '/' : '\0');
[65ed8f4]457 }
458
459 /* Try to match every possible offset. */
[a43fbc95]460 while (string <= end) {
[65ed8f4]461 if (_partial_match(&pattern, &string, flags)) {
[a43fbc95]462 matched = true;
[65ed8f4]463 break;
464 }
465 string++;
466 }
[a43fbc95]467
468 if (matched) {
469 continue;
470 }
471
472 return false;
[65ed8f4]473 }
474
[a43fbc95]475 return *string == '\0' || (leading_dir && *string == '/');
476}
477
478static char *_casefold(const char *s)
479{
480 char *result = strdup(s);
481 for (char *i = result; *i != '\0'; ++i) {
482 *i = tolower(*i);
483 }
484 return result;
[65ed8f4]485}
486
[be64a84]487/**
488 * Filename pattern matching.
489 *
490 * @param pattern
491 * @param string
492 * @param flags
493 * @return
494 */
495int posix_fnmatch(const char *pattern, const char *string, int flags)
496{
[a43fbc95]497 // TODO: don't fold everything in advance, but only when needed
498
499 if ((flags & FNM_CASEFOLD) != 0) {
500 /* Just fold the entire pattern and string. */
501 pattern = _casefold(pattern);
502 string = _casefold(string);
503 }
504
[65ed8f4]505 bool result = _full_match(pattern, string, flags);
[a43fbc95]506
507 if ((flags & FNM_CASEFOLD) != 0) {
508 free((char *) pattern);
509 free((char *) string);
510 }
511
[65ed8f4]512 return result ? 0 : FNM_NOMATCH;
[be64a84]513}
514
[a43fbc95]515// FIXME: put the testcases somewhere else
516
517#if 0
518
519#include <stdio.h>
520
521void __posix_fnmatch_test()
522{
523 int fail = 0;
524
525 #undef assert
526 #define assert(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
527 #define match(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == 0)
528 #define nomatch(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == FNM_NOMATCH)
529
530 assert(FNM_PATHNAME == FNM_FILE_NAME);
531 match("", "", 0);
532 match("*", "hello", 0);
533 match("hello", "hello", 0);
534 match("hello*", "hello", 0);
535 nomatch("hello?", "hello", 0);
536 match("*hello", "prdel hello", 0);
537 match("he[sl]lo", "hello", 0);
538 match("he[sl]lo", "heslo", 0);
539 nomatch("he[sl]lo", "heblo", 0);
540 nomatch("he[^sl]lo", "hello", 0);
541 nomatch("he[^sl]lo", "heslo", 0);
542 match("he[^sl]lo", "heblo", 0);
543 nomatch("he[!sl]lo", "hello", 0);
544 nomatch("he[!sl]lo", "heslo", 0);
545 match("he[!sl]lo", "heblo", 0);
546 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
547 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
548 match("[a-ce-z]", "a", 0);
549 match("[a-ce-z]", "c", 0);
550 nomatch("[a-ce-z]", "d", 0);
551 match("[a-ce-z]", "e", 0);
552 match("[a-ce-z]", "z", 0);
553 nomatch("[^a-ce-z]", "a", 0);
554 nomatch("[^a-ce-z]", "c", 0);
555 match("[^a-ce-z]", "d", 0);
556 nomatch("[^a-ce-z]", "e", 0);
557 nomatch("[^a-ce-z]", "z", 0);
558 match("helen??", "helenos", 0);
559 match("****booo****", "booo", 0);
560
561 match("hello[[:space:]]world", "hello world", 0);
562 nomatch("hello[[:alpha:]]world", "hello world", 0);
563
564 match("/hoooo*", "/hooooooo/hooo", 0);
565 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
566 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
567 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
568 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
569 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
570 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
571 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
572 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
573
574 match("*", "hell", 0);
575 match("*?", "hell", 0);
576 match("?*?", "hell", 0);
577 match("?*??", "hell", 0);
578 match("??*??", "hell", 0);
579 nomatch("???*??", "hell", 0);
580
581 nomatch("", "hell", 0);
582 nomatch("?", "hell", 0);
583 nomatch("??", "hell", 0);
584 nomatch("???", "hell", 0);
585 match("????", "hell", 0);
586
587 match("*", "h.ello", FNM_PERIOD);
588 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
589 nomatch("*", ".hello", FNM_PERIOD);
590 match("h?ello", "h.ello", FNM_PERIOD);
591 nomatch("?hello", ".hello", FNM_PERIOD);
592 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
593 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
594 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
595
596 nomatch("HeLlO", "hello", 0);
597 match("HeLlO", "hello", FNM_CASEFOLD);
598
599 printf("Failed: %d\n", fail);
600}
601
602#endif
603
[be64a84]604/** @}
605 */
606
Note: See TracBrowser for help on using the repository browser.