source: mainline/uspace/lib/posix/fnmatch.c@ cfbb5d18

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since cfbb5d18 was a12f7f1, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

All occurences of call to native free() secured from passing NULL pointer.

  • Property mode set to 100644
File size: 14.9 KB
Line 
1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file Filename-matching.
33 */
34
35// TODO: clean this up a bit
36
37#include "stdbool.h"
38#include "ctype.h"
39#include "string.h"
40#include "stdlib.h"
41#include "assert.h"
42
43#define LIBPOSIX_INTERNAL
44
45#include "internal/common.h"
46#include "fnmatch.h"
47
48// TODO: documentation
49
50#define INVALID_PATTERN -1
51
52/* Type for collating element, simple identity with characters now,
53 * but may be extended for better locale support.
54 */
55typedef int _coll_elm_t;
56
57#define COLL_ELM_INVALID -1
58
59/** Get collating element matching a string.
60 *
61 * @param str
62 * @return
63 */
64static _coll_elm_t _coll_elm_get(const char* str)
65{
66 if (str[0] == '\0' || str[1] != '\0') {
67 return COLL_ELM_INVALID;
68 }
69 return str[0];
70}
71
72/** Get collating element matching a single character.
73 *
74 * @param c
75 * @return
76 */
77static _coll_elm_t _coll_elm_char(int c)
78{
79 return c;
80}
81
82/** Match collating element with a beginning of a string.
83 *
84 * @param elm
85 * @param str
86 * @return 0 if the element doesn't match, or the number of characters matched.
87 */
88static int _coll_elm_match(_coll_elm_t elm, const char *str)
89{
90 return elm == *str;
91}
92
93static int _coll_elm_between(_coll_elm_t first, _coll_elm_t second,
94 const char *str)
95{
96 return *str >= first && *str <= second;
97}
98
99/** Read a string delimited by [? and ?].
100 *
101 * @param pattern Pointer to the string to read from. Its position is moved
102 * to the first character after the closing ].
103 * @param seq The character on the inside of brackets.
104 * @param buf Read buffer.
105 * @param buf_sz Read buffer's size. If the buffer is not large enough for
106 * the entire string, the string is cut with no error indication.
107 * @return
108 */
109static bool _get_delimited(
110 const char **pattern, int seq,
111 char *buf, size_t buf_sz, int flags)
112{
113 const bool noescape = (flags & FNM_NOESCAPE) != 0;
114 const bool pathname = (flags & FNM_PATHNAME) != 0;
115
116 const char *p = *pattern;
117 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
118 p += 2;
119
120 while (true) {
121 if (*p == seq && *(p + 1) == ']') {
122 /* String properly ended, return. */
123 *pattern = p + 2;
124 *buf = '\0';
125 return true;
126 }
127 if (!noescape && *p == '\\') {
128 p++;
129 }
130 if (*p == '\0') {
131 /* String not ended properly, invalid pattern. */
132 return false;
133 }
134 if (pathname && *p == '/') {
135 /* Slash in a pathname pattern is invalid. */
136 return false;
137 }
138 if (buf_sz > 1) {
139 /* Only add to the buffer if there is space. */
140 *buf = *p;
141 buf++;
142 buf_sz--;
143 }
144 p++;
145 }
146}
147
148/************** CHARACTER CLASSES ****************/
149
150#define MAX_CLASS_OR_COLL_LEN 6
151
152struct _char_class {
153 const char *name;
154 int (*func) (int);
155};
156
157/* List of supported character classes. */
158static const struct _char_class _char_classes[] = {
159 { "alnum", isalnum },
160 { "alpha", isalpha },
161 { "blank", isblank },
162 { "cntrl", iscntrl },
163 { "digit", isdigit },
164 { "graph", isgraph },
165 { "lower", islower },
166 { "print", isprint },
167 { "punct", ispunct },
168 { "space", isspace },
169 { "upper", isupper },
170 { "xdigit", isxdigit }
171};
172
173/**
174 *
175 * @param key
176 * @param elem
177 * @return
178 */
179static int _class_compare(const void *key, const void *elem)
180{
181 const struct _char_class *class = elem;
182 return strcmp((const char *) key, class->name);
183}
184
185/**
186 *
187 * @param cname
188 * @param c
189 * @return
190 */
191static bool _is_in_class (const char *cname, int c)
192{
193 /* Search for class in the array of supported character classes. */
194 const struct _char_class *class = bsearch(cname, _char_classes,
195 sizeof(_char_classes) / sizeof(struct _char_class),
196 sizeof(struct _char_class), _class_compare);
197
198 if (class == NULL) {
199 /* No such class supported - treat as an empty class. */
200 return false;
201 } else {
202 /* Class matched. */
203 return class->func(c);
204 }
205}
206
207/**
208 *
209 * @param pattern
210 * @param str
211 * @param flags
212 * @return
213 */
214static int _match_char_class(const char **pattern, const char *str, int flags)
215{
216 char class[MAX_CLASS_OR_COLL_LEN + 1];
217
218 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
219 return INVALID_PATTERN;
220 }
221
222 return _is_in_class(class, *str);
223}
224
225/************** END CHARACTER CLASSES ****************/
226
227/**
228 *
229 * @param pattern
230 * @param flags
231 * @return
232 */
233static _coll_elm_t _next_coll_elm(const char **pattern, int flags)
234{
235 const char *p = *pattern;
236 const bool noescape = (flags & FNM_NOESCAPE) != 0;
237 const bool pathname = (flags & FNM_PATHNAME) != 0;
238
239 if (*p == '[') {
240 if (*(p + 1) == '.') {
241 char buf[MAX_CLASS_OR_COLL_LEN + 1];
242 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
243 return COLL_ELM_INVALID;
244 }
245 return _coll_elm_get(buf);
246 }
247
248 if (*(p + 1) == '=') {
249 char buf[MAX_CLASS_OR_COLL_LEN + 1];
250 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
251 return COLL_ELM_INVALID;
252 }
253 return _coll_elm_get(buf);
254 }
255 }
256
257 if (!noescape && *p == '\\') {
258 p++;
259 }
260 if (pathname && *p == '/') {
261 return COLL_ELM_INVALID;
262 }
263
264 *pattern = p + 1;
265 return _coll_elm_char(*p);
266}
267
268/**
269 *
270 * @param pattern
271 * @param str
272 * @param flags
273 * @return
274 */
275static int _match_bracket_expr(const char **pattern, const char *str, int flags)
276{
277 const bool pathname = (flags & FNM_PATHNAME) != 0;
278 const bool special_period = (flags & FNM_PERIOD) != 0;
279 const char *p = *pattern;
280 bool negative = false;
281 int matched = 0;
282
283 #define _matched(match) { \
284 int _match = match; \
285 if (_match < 0) { \
286 /* Invalid pattern */ \
287 return _match; \
288 } else if (matched == 0 && _match > 0) { \
289 /* First match */ \
290 matched = _match; \
291 } \
292 }
293
294 assert(*p == '['); /* calling code should ensure this */
295 p++;
296
297 if (*str == '\0' || (pathname && *str == '/') ||
298 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
299 /* No bracket expression matches end of string,
300 * slash in pathname match or initial period with FNM_PERIOD
301 * option.
302 */
303 return 0;
304 }
305
306 if (*p == '^' || *p == '!') {
307 negative = true;
308 p++;
309 }
310
311 if (*p == ']') {
312 /* When ']' is first, treat it as a normal character. */
313 _matched(*str == ']');
314 p++;
315 }
316
317 _coll_elm_t current_elm = COLL_ELM_INVALID;
318
319 while (*p != ']') {
320 if (*p == '-' && *(p + 1) != ']' &&
321 current_elm != COLL_ELM_INVALID) {
322 /* Range expression. */
323 p++;
324 _coll_elm_t end_elm = _next_coll_elm(&p, flags);
325 if (end_elm == COLL_ELM_INVALID) {
326 return INVALID_PATTERN;
327 }
328 _matched(_coll_elm_between(current_elm, end_elm, str));
329 continue;
330 }
331
332 if (*p == '[' && *(p + 1) == ':') {
333 current_elm = COLL_ELM_INVALID;
334 _matched(_match_char_class(&p, str, flags));
335 continue;
336 }
337
338 current_elm = _next_coll_elm(&p, flags);
339 if (current_elm == COLL_ELM_INVALID) {
340 return INVALID_PATTERN;
341 }
342 _matched(_coll_elm_match(current_elm, str));
343 }
344
345 /* No error occured - update pattern pointer. */
346 *pattern = p + 1;
347
348 if (matched == 0) {
349 /* No match found */
350 return negative;
351 } else {
352 /* Matched 'match' characters. */
353 return negative ? 0 : matched;
354 }
355
356 #undef _matched
357}
358
359/**
360 *
361 * @param pattern
362 * @param string
363 * @param flags
364 * @return
365 */
366static bool _partial_match(const char **pattern, const char **string, int flags)
367{
368 /* Only a single *-delimited subpattern is matched here.
369 * So in this function, '*' is understood as the end of pattern.
370 */
371
372 const bool pathname = (flags & FNM_PATHNAME) != 0;
373 const bool special_period = (flags & FNM_PERIOD) != 0;
374 const bool noescape = (flags & FNM_NOESCAPE) != 0;
375 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
376
377 const char *s = *string;
378 const char *p = *pattern;
379
380 while (*p != '*') {
381 /* Bracket expression. */
382 if (*p == '[') {
383 int matched = _match_bracket_expr(&p, s, flags);
384 if (matched == 0) {
385 /* Doesn't match. */
386 return false;
387 }
388 if (matched != INVALID_PATTERN) {
389 s += matched;
390 continue;
391 }
392
393 assert(matched == INVALID_PATTERN);
394 /* Fall through to match [ as an ordinary character. */
395 }
396
397 /* Wildcard match. */
398 if (*p == '?') {
399 if (*s == '\0') {
400 /* No character to match. */
401 return false;
402 }
403 if (pathname && *s == '/') {
404 /* Slash must be matched explicitly. */
405 return false;
406 }
407 if (special_period && pathname &&
408 *s == '.' && *(s - 1) == '/') {
409 /* Initial period must be matched explicitly. */
410 return false;
411 }
412
413 /* None of the above, match anything else. */
414 p++;
415 s++;
416 continue;
417 }
418
419 if (!noescape && *p == '\\') {
420 /* Escaped character. */
421 p++;
422 }
423
424 if (*p == '\0') {
425 /* End of pattern, must match end of string or
426 * an end of subdirectory name (optional).
427 */
428
429 if (*s == '\0' || (leading_dir && *s == '/')) {
430 break;
431 }
432
433 return false;
434 }
435
436 if (*p == *s) {
437 /* Exact match. */
438 p++;
439 s++;
440 continue;
441 }
442
443 /* Nothing matched. */
444 return false;
445 }
446
447 /* Entire sub-pattern matched. */
448
449 /* postconditions */
450 assert(*p == '\0' || *p == '*');
451 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
452
453 *pattern = p;
454 *string = s;
455 return true;
456}
457
458/**
459 *
460 * @param pattern
461 * @param string
462 * @param flags
463 * @return
464 */
465static bool _full_match(const char *pattern, const char *string, int flags)
466{
467 const bool pathname = (flags & FNM_PATHNAME) != 0;
468 const bool special_period = (flags & FNM_PERIOD) != 0;
469 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
470
471 if (special_period && *string == '.') {
472 /* Initial dot must be matched by an explicit dot in pattern. */
473 if (*pattern != '.') {
474 return false;
475 }
476 pattern++;
477 string++;
478 }
479
480 if (*pattern != '*') {
481 if (!_partial_match(&pattern, &string, flags)) {
482 /* The initial match must succeed. */
483 return false;
484 }
485 }
486
487 while (*pattern != '\0') {
488 assert(*pattern == '*');
489 pattern++;
490
491 bool matched = false;
492
493 const char *end;
494 if (pathname && special_period &&
495 *string == '.' && *(string - 1) == '/') {
496 end = string;
497 } else {
498 end= strchrnul(string, pathname ? '/' : '\0');
499 }
500
501 /* Try to match every possible offset. */
502 while (string <= end) {
503 if (_partial_match(&pattern, &string, flags)) {
504 matched = true;
505 break;
506 }
507 string++;
508 }
509
510 if (matched) {
511 continue;
512 }
513
514 return false;
515 }
516
517 return *string == '\0' || (leading_dir && *string == '/');
518}
519
520/**
521 *
522 * @param s
523 * @return
524 */
525static char *_casefold(const char *s)
526{
527 char *result = strdup(s);
528 for (char *i = result; *i != '\0'; ++i) {
529 *i = tolower(*i);
530 }
531 return result;
532}
533
534/**
535 * Filename pattern matching.
536 *
537 * @param pattern
538 * @param string
539 * @param flags
540 * @return
541 */
542int posix_fnmatch(const char *pattern, const char *string, int flags)
543{
544 // TODO: don't fold everything in advance, but only when needed
545
546 if ((flags & FNM_CASEFOLD) != 0) {
547 /* Just fold the entire pattern and string. */
548 pattern = _casefold(pattern);
549 string = _casefold(string);
550 }
551
552 bool result = _full_match(pattern, string, flags);
553
554 if ((flags & FNM_CASEFOLD) != 0) {
555 if (pattern) {
556 free((char *) pattern);
557 }
558 if (string) {
559 free((char *) string);
560 }
561 }
562
563 return result ? 0 : FNM_NOMATCH;
564}
565
566// FIXME: put the testcases somewhere else
567
568#if 0
569
570#include <stdio.h>
571
572void __posix_fnmatch_test()
573{
574 int fail = 0;
575
576 #undef assert
577 #define assert(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
578 #define match(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == 0)
579 #define nomatch(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == FNM_NOMATCH)
580
581 assert(FNM_PATHNAME == FNM_FILE_NAME);
582 match("", "", 0);
583 match("*", "hello", 0);
584 match("hello", "hello", 0);
585 match("hello*", "hello", 0);
586 nomatch("hello?", "hello", 0);
587 match("*hello", "prdel hello", 0);
588 match("he[sl]lo", "hello", 0);
589 match("he[sl]lo", "heslo", 0);
590 nomatch("he[sl]lo", "heblo", 0);
591 nomatch("he[^sl]lo", "hello", 0);
592 nomatch("he[^sl]lo", "heslo", 0);
593 match("he[^sl]lo", "heblo", 0);
594 nomatch("he[!sl]lo", "hello", 0);
595 nomatch("he[!sl]lo", "heslo", 0);
596 match("he[!sl]lo", "heblo", 0);
597 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
598 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
599 match("[a-ce-z]", "a", 0);
600 match("[a-ce-z]", "c", 0);
601 nomatch("[a-ce-z]", "d", 0);
602 match("[a-ce-z]", "e", 0);
603 match("[a-ce-z]", "z", 0);
604 nomatch("[^a-ce-z]", "a", 0);
605 nomatch("[^a-ce-z]", "c", 0);
606 match("[^a-ce-z]", "d", 0);
607 nomatch("[^a-ce-z]", "e", 0);
608 nomatch("[^a-ce-z]", "z", 0);
609 match("helen??", "helenos", 0);
610 match("****booo****", "booo", 0);
611
612 match("hello[[:space:]]world", "hello world", 0);
613 nomatch("hello[[:alpha:]]world", "hello world", 0);
614
615 match("/hoooo*", "/hooooooo/hooo", 0);
616 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
617 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
618 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
619 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
620 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
621 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
622 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
623 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
624
625 match("*", "hell", 0);
626 match("*?", "hell", 0);
627 match("?*?", "hell", 0);
628 match("?*??", "hell", 0);
629 match("??*??", "hell", 0);
630 nomatch("???*??", "hell", 0);
631
632 nomatch("", "hell", 0);
633 nomatch("?", "hell", 0);
634 nomatch("??", "hell", 0);
635 nomatch("???", "hell", 0);
636 match("????", "hell", 0);
637
638 match("*", "h.ello", FNM_PERIOD);
639 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
640 nomatch("*", ".hello", FNM_PERIOD);
641 match("h?ello", "h.ello", FNM_PERIOD);
642 nomatch("?hello", ".hello", FNM_PERIOD);
643 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
644 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
645 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
646
647 nomatch("HeLlO", "hello", 0);
648 match("HeLlO", "hello", FNM_CASEFOLD);
649
650 printf("Failed: %d\n", fail);
651}
652
653#endif
654
655/** @}
656 */
Note: See TracBrowser for help on using the repository browser.