Changes in uspace/lib/posix/fnmatch.c [a43fbc95:be64a84] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/posix/fnmatch.c
ra43fbc95 rbe64a84 33 33 */ 34 34 35 // TODO: clean this up a bit36 37 #include "stdbool.h"38 #include "ctype.h"39 #include "string.h"40 #include "stdlib.h"41 #include "assert.h"42 43 35 #define LIBPOSIX_INTERNAL 44 36 45 37 #include "internal/common.h" 46 38 #include "fnmatch.h" 47 48 #define INVALID_PATTERN -149 50 /* Type for collating element, simple identity with characters now,51 * but may be extended for better locale support.52 */53 typedef int _coll_elm_t;54 55 #define COLL_ELM_INVALID -156 57 /** Get collating element matching a string.58 *59 * @param str60 * @return61 */62 static _coll_elm_t _coll_elm_get(const char* str)63 {64 if (str[0] == '\0' || str[1] != '\0') {65 return COLL_ELM_INVALID;66 }67 return str[0];68 }69 70 /** Get collating element matching a single character.71 *72 * @param c73 * @return74 */75 static _coll_elm_t _coll_elm_char(int c)76 {77 return c;78 }79 80 /** Match collating element with a beginning of a string.81 *82 * @param elm83 * @param str84 * @return 0 if the element doesn't match, or the number of characters matched.85 */86 static int _coll_elm_match(_coll_elm_t elm, const char *str)87 {88 return elm == *str;89 }90 91 static int _coll_elm_between(_coll_elm_t first, _coll_elm_t second,92 const char *str)93 {94 return *str >= first && *str <= second;95 }96 97 /** Read a string delimited by [? and ?].98 *99 * @param pattern Pointer to the string to read from. Its position is moved100 * to the first character after the closing ].101 * @param seq The character on the inside of brackets.102 * @param buf Read buffer.103 * @param buf_sz Read buffer's size. If the buffer is not large enough for104 * the entire string, the string is cut with no error indication.105 * @return106 */107 static bool _get_delimited(108 const char **pattern, int seq,109 char *buf, size_t buf_sz, int flags)110 {111 const bool noescape = (flags & FNM_NOESCAPE) != 0;112 const bool pathname = (flags & FNM_PATHNAME) != 0;113 114 const char *p = *pattern;115 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);116 p += 2;117 118 while (true) {119 if (*p == seq && *(p + 1) == ']') {120 /* String properly ended, return. */121 *pattern = p + 2;122 *buf = '\0';123 return true;124 }125 if (!noescape && *p == '\\') {126 p++;127 }128 if (*p == '\0') {129 /* String not ended properly, invalid pattern. */130 return false;131 }132 if (pathname && *p == '/') {133 /* Slash in a pathname pattern is invalid. */134 return false;135 }136 if (buf_sz > 1) {137 /* Only add to the buffer if there is space. */138 *buf = *p;139 buf++;140 buf_sz--;141 }142 p++;143 }144 }145 146 /************** CHARACTER CLASSES ****************/147 148 #define MAX_CLASS_OR_COLL_LEN 6149 150 struct _char_class {151 const char *name;152 int (*func) (int);153 };154 155 /* List of supported character classes. */156 static const struct _char_class _char_classes[] = {157 { "alnum", isalnum },158 { "alpha", isalpha },159 { "blank", isblank },160 { "cntrl", iscntrl },161 { "digit", isdigit },162 { "graph", isgraph },163 { "lower", islower },164 { "print", isprint },165 { "punct", ispunct },166 { "space", isspace },167 { "upper", isupper },168 { "xdigit", isxdigit }169 };170 171 static int _class_compare(const void *key, const void *elem)172 {173 const struct _char_class *class = elem;174 return strcmp((const char *) key, class->name);175 }176 177 static bool _is_in_class (const char *cname, int c)178 {179 /* Search for class in the array of supported character classes. */180 const struct _char_class *class = bsearch(cname, _char_classes,181 sizeof(_char_classes) / sizeof(struct _char_class),182 sizeof(struct _char_class), _class_compare);183 184 if (class == NULL) {185 /* No such class supported - treat as an empty class. */186 return false;187 } else {188 /* Class matched. */189 return class->func(c);190 }191 }192 193 static int _match_char_class(const char **pattern, const char *str, int flags)194 {195 char class[MAX_CLASS_OR_COLL_LEN + 1];196 197 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {198 return INVALID_PATTERN;199 }200 201 return _is_in_class(class, *str);202 }203 204 /************** END CHARACTER CLASSES ****************/205 206 static _coll_elm_t _next_coll_elm(const char **pattern, int flags)207 {208 const char *p = *pattern;209 const bool noescape = (flags & FNM_NOESCAPE) != 0;210 const bool pathname = (flags & FNM_PATHNAME) != 0;211 212 if (*p == '[') {213 if (*(p + 1) == '.') {214 char buf[MAX_CLASS_OR_COLL_LEN + 1];215 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {216 return COLL_ELM_INVALID;217 }218 return _coll_elm_get(buf);219 }220 221 if (*(p + 1) == '=') {222 char buf[MAX_CLASS_OR_COLL_LEN + 1];223 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {224 return COLL_ELM_INVALID;225 }226 return _coll_elm_get(buf);227 }228 }229 230 if (!noescape && *p == '\\') {231 p++;232 }233 if (pathname && *p == '/') {234 return COLL_ELM_INVALID;235 }236 237 *pattern = p + 1;238 return _coll_elm_char(*p);239 }240 241 /**242 *243 */244 static int _match_bracket_expr(const char **pattern, const char *str, int flags)245 {246 const bool pathname = (flags & FNM_PATHNAME) != 0;247 const bool special_period = (flags & FNM_PERIOD) != 0;248 const char *p = *pattern;249 bool negative = false;250 int matched = 0;251 252 #define _matched(match) { \253 int _match = match; \254 if (_match < 0) { \255 /* Invalid pattern */ \256 return _match; \257 } else if (matched == 0 && _match > 0) { \258 /* First match */ \259 matched = _match; \260 } \261 }262 263 assert(*p == '['); /* calling code should ensure this */264 p++;265 266 if (*str == '\0' || (pathname && *str == '/') ||267 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {268 /* No bracket expression matches end of string,269 * slash in pathname match or initial period with FNM_PERIOD270 * option.271 */272 return 0;273 }274 275 if (*p == '^' || *p == '!') {276 negative = true;277 p++;278 }279 280 if (*p == ']') {281 /* When ']' is first, treat it as a normal character. */282 _matched(*str == ']');283 p++;284 }285 286 _coll_elm_t current_elm = COLL_ELM_INVALID;287 288 while (*p != ']') {289 if (*p == '-' && *(p + 1) != ']' &&290 current_elm != COLL_ELM_INVALID) {291 /* Range expression. */292 p++;293 _coll_elm_t end_elm = _next_coll_elm(&p, flags);294 if (end_elm == COLL_ELM_INVALID) {295 return INVALID_PATTERN;296 }297 _matched(_coll_elm_between(current_elm, end_elm, str));298 continue;299 }300 301 if (*p == '[' && *(p + 1) == ':') {302 current_elm = COLL_ELM_INVALID;303 _matched(_match_char_class(&p, str, flags));304 continue;305 }306 307 current_elm = _next_coll_elm(&p, flags);308 if (current_elm == COLL_ELM_INVALID) {309 return INVALID_PATTERN;310 }311 _matched(_coll_elm_match(current_elm, str));312 }313 314 /* No error occured - update pattern pointer. */315 *pattern = p + 1;316 317 if (matched == 0) {318 /* No match found */319 return negative;320 } else {321 /* Matched 'match' characters. */322 return negative ? 0 : matched;323 }324 325 #undef _matched326 }327 328 /**329 *330 */331 static bool _partial_match(const char **pattern, const char **string, int flags)332 {333 /* Only a single *-delimited subpattern is matched here.334 * So in this function, '*' is understood as the end of pattern.335 */336 337 const bool pathname = (flags & FNM_PATHNAME) != 0;338 const bool special_period = (flags & FNM_PERIOD) != 0;339 const bool noescape = (flags & FNM_NOESCAPE) != 0;340 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;341 342 const char *s = *string;343 const char *p = *pattern;344 345 while (*p != '*') {346 /* Bracket expression. */347 if (*p == '[') {348 int matched = _match_bracket_expr(&p, s, flags);349 if (matched == 0) {350 /* Doesn't match. */351 return false;352 }353 if (matched != INVALID_PATTERN) {354 s += matched;355 continue;356 }357 358 assert(matched == INVALID_PATTERN);359 /* Fall through to match [ as an ordinary character. */360 }361 362 /* Wildcard match. */363 if (*p == '?') {364 if (*s == '\0') {365 /* No character to match. */366 return false;367 }368 if (pathname && *s == '/') {369 /* Slash must be matched explicitly. */370 return false;371 }372 if (special_period && pathname &&373 *s == '.' && *(s - 1) == '/') {374 /* Initial period must be matched explicitly. */375 return false;376 }377 378 /* None of the above, match anything else. */379 p++;380 s++;381 continue;382 }383 384 if (!noescape && *p == '\\') {385 /* Escaped character. */386 p++;387 }388 389 if (*p == '\0') {390 /* End of pattern, must match end of string or391 * an end of subdirectory name (optional).392 */393 394 if (*s == '\0' || (leading_dir && *s == '/')) {395 break;396 }397 398 return false;399 }400 401 if (*p == *s) {402 /* Exact match. */403 p++;404 s++;405 continue;406 }407 408 /* Nothing matched. */409 return false;410 }411 412 /* Entire sub-pattern matched. */413 414 /* postconditions */415 assert(*p == '\0' || *p == '*');416 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));417 418 *pattern = p;419 *string = s;420 return true;421 }422 423 static bool _full_match(const char *pattern, const char *string, int flags)424 {425 const bool pathname = (flags & FNM_PATHNAME) != 0;426 const bool special_period = (flags & FNM_PERIOD) != 0;427 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;428 429 if (special_period && *string == '.') {430 /* Initial dot must be matched by an explicit dot in pattern. */431 if (*pattern != '.') {432 return false;433 }434 pattern++;435 string++;436 }437 438 if (*pattern != '*') {439 if (!_partial_match(&pattern, &string, flags)) {440 /* The initial match must succeed. */441 return false;442 }443 }444 445 while (*pattern != '\0') {446 assert(*pattern == '*');447 pattern++;448 449 bool matched = false;450 451 const char *end;452 if (pathname && special_period &&453 *string == '.' && *(string - 1) == '/') {454 end = string;455 } else {456 end= strchrnul(string, pathname ? '/' : '\0');457 }458 459 /* Try to match every possible offset. */460 while (string <= end) {461 if (_partial_match(&pattern, &string, flags)) {462 matched = true;463 break;464 }465 string++;466 }467 468 if (matched) {469 continue;470 }471 472 return false;473 }474 475 return *string == '\0' || (leading_dir && *string == '/');476 }477 478 static char *_casefold(const char *s)479 {480 char *result = strdup(s);481 for (char *i = result; *i != '\0'; ++i) {482 *i = tolower(*i);483 }484 return result;485 }486 39 487 40 /** … … 495 48 int posix_fnmatch(const char *pattern, const char *string, int flags) 496 49 { 497 // TODO: don't fold everything in advance, but only when needed 498 499 if ((flags & FNM_CASEFOLD) != 0) { 500 /* Just fold the entire pattern and string. */ 501 pattern = _casefold(pattern); 502 string = _casefold(string); 503 } 504 505 bool result = _full_match(pattern, string, flags); 506 507 if ((flags & FNM_CASEFOLD) != 0) { 508 free((char *) pattern); 509 free((char *) string); 510 } 511 512 return result ? 0 : FNM_NOMATCH; 50 // TODO 51 not_implemented(); 513 52 } 514 515 // FIXME: put the testcases somewhere else516 517 #if 0518 519 #include <stdio.h>520 521 void __posix_fnmatch_test()522 {523 int fail = 0;524 525 #undef assert526 #define assert(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }527 #define match(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == 0)528 #define nomatch(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == FNM_NOMATCH)529 530 assert(FNM_PATHNAME == FNM_FILE_NAME);531 match("", "", 0);532 match("*", "hello", 0);533 match("hello", "hello", 0);534 match("hello*", "hello", 0);535 nomatch("hello?", "hello", 0);536 match("*hello", "prdel hello", 0);537 match("he[sl]lo", "hello", 0);538 match("he[sl]lo", "heslo", 0);539 nomatch("he[sl]lo", "heblo", 0);540 nomatch("he[^sl]lo", "hello", 0);541 nomatch("he[^sl]lo", "heslo", 0);542 match("he[^sl]lo", "heblo", 0);543 nomatch("he[!sl]lo", "hello", 0);544 nomatch("he[!sl]lo", "heslo", 0);545 match("he[!sl]lo", "heblo", 0);546 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);547 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);548 match("[a-ce-z]", "a", 0);549 match("[a-ce-z]", "c", 0);550 nomatch("[a-ce-z]", "d", 0);551 match("[a-ce-z]", "e", 0);552 match("[a-ce-z]", "z", 0);553 nomatch("[^a-ce-z]", "a", 0);554 nomatch("[^a-ce-z]", "c", 0);555 match("[^a-ce-z]", "d", 0);556 nomatch("[^a-ce-z]", "e", 0);557 nomatch("[^a-ce-z]", "z", 0);558 match("helen??", "helenos", 0);559 match("****booo****", "booo", 0);560 561 match("hello[[:space:]]world", "hello world", 0);562 nomatch("hello[[:alpha:]]world", "hello world", 0);563 564 match("/hoooo*", "/hooooooo/hooo", 0);565 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);566 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);567 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);568 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);569 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);570 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);571 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);572 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);573 574 match("*", "hell", 0);575 match("*?", "hell", 0);576 match("?*?", "hell", 0);577 match("?*??", "hell", 0);578 match("??*??", "hell", 0);579 nomatch("???*??", "hell", 0);580 581 nomatch("", "hell", 0);582 nomatch("?", "hell", 0);583 nomatch("??", "hell", 0);584 nomatch("???", "hell", 0);585 match("????", "hell", 0);586 587 match("*", "h.ello", FNM_PERIOD);588 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);589 nomatch("*", ".hello", FNM_PERIOD);590 match("h?ello", "h.ello", FNM_PERIOD);591 nomatch("?hello", ".hello", FNM_PERIOD);592 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);593 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);594 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);595 596 nomatch("HeLlO", "hello", 0);597 match("HeLlO", "hello", FNM_CASEFOLD);598 599 printf("Failed: %d\n", fail);600 }601 602 #endif603 53 604 54 /** @}
Note:
See TracChangeset
for help on using the changeset viewer.