source: mainline/uspace/lib/posix/fnmatch.c@ 6b4c64a

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 6b4c64a was 65ed8f4, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 14 years ago

Add unfinished and untested implementation of fnmatch() (work in progress)

  • Property mode set to 100644
File size: 10.0 KB
RevLine 
[be64a84]1/*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libposix
30 * @{
31 */
32/** @file
33 */
34
35#define LIBPOSIX_INTERNAL
36
37#include "internal/common.h"
38#include "fnmatch.h"
39
[65ed8f4]40#include "stdlib.h"
41#include "string.h"
42#include "ctype.h"
43
44#define INVALID_PATTERN -1
45
46/* Type for collating element, simple identity with characters now,
47 * but may be extended for better locale support.
48 */
49typedef int _coll_elm_t;
50
51#define COLL_ELM_INVALID -1
52
53static _coll_elm_t _coll_elm_get(const char* str)
54{
55 if (str[0] == '\0' || str[1] != '\0') {
56 return COLL_ELM_INVALID;
57 }
58 return str[0];
59}
60
61static _coll_elm_t _coll_elm_char(int c)
62{
63 return c;
64}
65
66/**
67 *
68 * @param elm
69 * @param pattern
70 * @return 0 if the element doesn't match, or the number of characters matched.
71 */
72static int _coll_elm_match(_coll_elm_t elm, const char *str)
73{
74 return elm == *str;
75}
76
77static int _coll_elm_between(_coll_elm_t first, _coll_elm_t second,
78 const char *str)
79{
80 return *str >= first && *str <= second;
81}
82
83/** Read a string delimited by [? and ?].
84 *
85 * @param pattern Pointer to the string to read from. Its position is moved
86 * to the first character after the closing ].
87 * @param seq The character on the inside of brackets.
88 * @param buf Read buffer.
89 * @param buf_sz Read buffer's size. If the buffer is not large enough for
90 * the entire string, the string is cut with no error indication.
91 * @return
92 */
93static bool _get_delimited(
94 const char **pattern, int seq,
95 char *buf, size_t buf_sz, int flags)
96{
97 const bool noescape = (flags & FNM_NOESCAPE) != 0;
98 const bool pathname = (flags & FNM_PATHNAME) != 0;
99
100 const char *p = *pattern;
101 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
102 p += 2;
103
104 while (true) {
105 if (*p == seq && *(p + 1) == ']') {
106 /* String properly ended, return. */
107 *pattern = p + 2;
108 *buf = '\0';
109 return true;
110 }
111 if (!noescape && *p == '\\') {
112 p++;
113 }
114 if (*p == '\0') {
115 /* String not ended properly, invalid pattern. */
116 return false;
117 }
118 if (pathname && *p == '/') {
119 /* Slash in a pathname pattern is invalid. */
120 return false;
121 }
122 if (buf_sz > 1) {
123 /* Only add to the buffer if there is space. */
124 *buf = *p;
125 buf++;
126 buf_sz--;
127 }
128 p++;
129 }
130}
131
132/************** CHARACTER CLASSES ****************/
133
134#define MAX_CLASS_OR_COLL_LEN 6
135
136struct _char_class {
137 const char *name;
138 int (*func) (int);
139};
140
141/* List of supported character classes. */
142static const struct _char_class _char_classes[] = {
143 { "alnum", isalnum },
144 { "alpha", isalpha },
145 { "blank", posix_isblank },
146 { "cntrl", posix_iscntrl },
147 { "digit", isdigit },
148 { "graph", posix_isgraph },
149 { "lower", islower },
150 { "print", posix_isprint },
151 { "punct", posix_ispunct },
152 { "space", isspace },
153 { "upper", isupper },
154 { "xdigit", posix_isxdigit }
155};
156
157static int _class_compare(const void *key, const void *elem)
158{
159 const struct _char_class *class = elem;
160 return posix_strcmp((const char *) key, class->name);
161}
162
163static bool _is_in_class (const char *cname, int c)
164{
165 /* Search for class in the array of supported character classes. */
166 const struct _char_class *class = posix_bsearch(cname, _char_classes,
167 sizeof(_char_classes) / sizeof(struct _char_class),
168 sizeof(struct _char_class), _class_compare);
169
170 if (class == NULL) {
171 /* No such class supported - treat as an empty class. */
172 return false;
173 } else {
174 /* Class matched. */
175 return class->func(c);
176 }
177}
178
179static int _match_char_class(const char **pattern, const char *str, int flags)
180{
181 char class[MAX_CLASS_OR_COLL_LEN + 1];
182
183 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
184 return INVALID_PATTERN;
185 }
186
187 return _is_in_class(class, *str);
188}
189
190/************** END CHARACTER CLASSES ****************/
191
192static _coll_elm_t _next_coll_elm(const char **pattern, int flags)
193{
194 const char *p = *pattern;
195 const bool noescape = (flags & FNM_NOESCAPE) != 0;
196 const bool pathname = (flags & FNM_PATHNAME) != 0;
197
198 if (*p == '[') {
199 if (*(p + 1) == '.') {
200 char buf[MAX_CLASS_OR_COLL_LEN + 1];
201 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
202 return COLL_ELM_INVALID;
203 }
204 return _coll_elm_get(buf);
205 }
206
207 if (*(p + 1) == '=') {
208 char buf[MAX_CLASS_OR_COLL_LEN + 1];
209 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
210 return COLL_ELM_INVALID;
211 }
212 return _coll_elm_get(buf);
213 }
214 }
215
216 if (!noescape && *p == '\\') {
217 p++;
218 }
219 if (pathname && *p == '/') {
220 return COLL_ELM_INVALID;
221 }
222
223 *pattern = p + 1;
224 return _coll_elm_char(*p);
225}
226
227/**
228 *
229 */
230static int _match_bracket_expr(const char **pattern, const char *str, int flags)
231{
232 const bool pathname = (flags & FNM_PATHNAME) != 0;
233 const bool special_period = (flags & FNM_PERIOD) != 0;
234 const char *p = *pattern;
235 bool negative = false;
236 int matched = 0;
237
238 #define _matched(match) { \
239 int _match = match; \
240 if (_match < 0) { \
241 /* Invalid pattern */ \
242 return _match; \
243 } else if (matched == 0 && _match > 0) { \
244 /* First match */ \
245 matched = _match; \
246 } \
247 }
248
249 assert(*p == '['); /* calling code should ensure this */
250 p++;
251
252 if (*str == '\0' || (pathname && *str == '/') ||
253 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
254 /* No bracket expression matches end of string,
255 * slash in pathname match or initial period with FNM_PERIOD
256 * option.
257 */
258 return 0;
259 }
260
261 if (*p == '^' || *p == '!') {
262 negative = true;
263 p++;
264 }
265
266 if (*p == ']') {
267 /* When ']' is first, treat it as a normal character. */
268 _matched(*str == ']');
269 p++;
270 }
271
272 _coll_elm_t current_elm = COLL_ELM_INVALID;
273
274 while (*p != ']') {
275 if (*p == '-' && *(p + 1) != ']' &&
276 current_elm != COLL_ELM_INVALID) {
277 /* Range expression. */
278 p++;
279 _coll_elm_t end_elm = _next_coll_elm(&p, flags);
280 if (end_elm == COLL_ELM_INVALID) {
281 return INVALID_PATTERN;
282 }
283 _matched(_coll_elm_between(current_elm, end_elm, str));
284 continue;
285 }
286
287 if (*p == '[' && *(p + 1) == ':') {
288 current_elm = COLL_ELM_INVALID;
289 _matched(_match_char_class(&p, str, flags));
290 continue;
291 }
292
293 current_elm = _next_coll_elm(&p, flags);
294 if (current_elm == COLL_ELM_INVALID) {
295 return INVALID_PATTERN;
296 }
297 _matched(_coll_elm_match(current_elm, str));
298 }
299
300 /* No error occured - update pattern pointer. */
301 *pattern = p + 1;
302
303 if (matched == 0) {
304 /* No match found */
305 return negative;
306 } else {
307 /* Matched 'match' characters. */
308 return negative ? 0 : matched;
309 }
310
311 #undef _matched
312}
313
314/**
315 *
316 */
317static bool _partial_match(const char **pattern, const char **string, int flags)
318{
319 /* Only a single *-delimited subpattern is matched here.
320 * So in this function, '*' is understood as the end of pattern.
321 */
322
323 const bool pathname = (flags & FNM_PATHNAME) != 0;
324 const bool special_period = (flags & FNM_PERIOD) != 0;
325 const bool noescape = (flags & FNM_NOESCAPE) != 0;
326 const char *s = *string;
327 const char *p = *pattern;
328
329 for (; *p != '*'; p++) {
330 if (*p == '[') {
331 /* Bracket expression. */
332 int matched = _match_bracket_expr(&p, s, flags);
333 if (matched == 0) {
334 /* Doesn't match. */
335 return false;
336 }
337 if (matched == INVALID_PATTERN) {
338 /* Fall through to match [ as an ordinary
339 * character.
340 */
341 } else {
342 s += matched;
343 continue;
344 }
345 }
346
347 if (*p == '?') {
348 /* Wildcard match. */
349 if (*s == '\0' || (pathname && *s == '/') ||
350 (special_period && pathname && *s == '.' &&
351 *(s - 1) == '/')) {
352 return false;
353 }
354 p++;
355 s++;
356 continue;
357 }
358
359 if (!noescape && *p == '\\') {
360 /* Escaped character. */
361 p++;
362 }
363
364 if (*p == *s) {
365 /* Exact match. */
366 if (*s == '\0') {
367 break;
368 }
369 continue;
370 }
371
372 /* Nothing matched. */
373 return false;
374 }
375
376 /* Entire pattern matched. */
377 *pattern = p;
378 *string = s;
379 return true;
380}
381
382static bool _full_match(const char *pattern, const char *string, int flags)
383{
384 const bool special_period = (flags & FNM_PERIOD) != 0;
385
386 if (special_period && *string == '.') {
387 /* Initial dot must be matched by an explicit dot in pattern. */
388 if (*pattern != '.') {
389 return false;
390 }
391 pattern++;
392 string++;
393 }
394
395 if (*pattern != '*') {
396 if (!_partial_match(&pattern, &string, flags)) {
397 /* The initial match must succeed. */
398 return false;
399 }
400 }
401
402 while (*pattern != '\0') {
403 assert(*pattern == '*');
404
405 while (*pattern == '*') {
406 pattern++;
407 }
408
409 /* Try to match every possible offset. */
410 while (*string != '\0') {
411 if (_partial_match(&pattern, &string, flags)) {
412 break;
413 }
414 string++;
415 }
416 }
417
418 return *string == '\0';
419}
420
[be64a84]421/**
422 * Filename pattern matching.
423 *
424 * @param pattern
425 * @param string
426 * @param flags
427 * @return
428 */
429int posix_fnmatch(const char *pattern, const char *string, int flags)
430{
[65ed8f4]431 bool result = _full_match(pattern, string, flags);
432 return result ? 0 : FNM_NOMATCH;
[be64a84]433}
434
435/** @}
436 */
437
Note: See TracBrowser for help on using the repository browser.