source: mainline/uspace/app/edit/search.c@ 0564ee47

Last change on this file since 0564ee47 was 28a5ebd, checked in by Martin Decky <martin@…>, 5 years ago

Use char32_t instead of wchat_t to represent UTF-32 strings

The intention of the native HelenOS string API has been always to
support Unicode in the UTF-8 and UTF-32 encodings as the sole character
representations and ignore the obsolete mess of older single-byte and
multibyte character encodings. Before C11, the wchar_t type has been
slightly misused for the purpose of the UTF-32 strings. The newer
char32_t type is obviously a much more suitable option. The standard
defines char32_t as uint_least32_t, thus we can take the liberty to fix
it to uint32_t.

To maintain compatilibity with the C Standard, the putwchar(wchar_t)
functions has been replaced by our custom putuchar(char32_t) functions
where appropriate.

  • Property mode set to 100644
File size: 3.9 KB
Line 
1/*
2 * Copyright (c) 2012 Martin Sucha
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup edit
30 * @{
31 */
32/**
33 * @file
34 * @brief Simple searching facility.
35 */
36
37#include <errno.h>
38#include <stdlib.h>
39#include <stddef.h>
40#include <types/common.h>
41
42#include "search.h"
43#include "search_impl.h"
44
45search_t *search_init(const char *pattern, void *client_data, search_ops_t ops,
46 bool reverse)
47{
48 search_t *search = calloc(1, sizeof(search_t));
49 if (search == NULL)
50 return NULL;
51
52 char32_t *p = str_to_awstr(pattern);
53 if (p == NULL) {
54 free(search);
55 return NULL;
56 }
57
58 search->pattern_length = wstr_length(p);
59
60 if (reverse) {
61 /* Reverse the pattern */
62 size_t pos, half;
63 half = search->pattern_length / 2;
64 for (pos = 0; pos < half; pos++) {
65 char32_t tmp = p[pos];
66 p[pos] = p[search->pattern_length - pos - 1];
67 p[search->pattern_length - pos - 1] = tmp;
68 }
69 }
70
71 search->pattern = p;
72
73 search->client_data = client_data;
74 search->ops = ops;
75 search->back_table = calloc(search->pattern_length, sizeof(ssize_t));
76 if (search->back_table == NULL) {
77 free(search->pattern);
78 free(search);
79 return NULL;
80 }
81
82 search->pattern_pos = 0;
83
84 search->back_table[0] = -1;
85 search->back_table[1] = 0;
86 size_t table_idx = 2;
87 size_t pattern_idx = 0;
88 while (table_idx < search->pattern_length) {
89 if (ops.equals(search->pattern[table_idx - 1],
90 search->pattern[pattern_idx])) {
91 pattern_idx++;
92 search->back_table[table_idx] = pattern_idx;
93 table_idx++;
94 } else if (pattern_idx > 0) {
95 pattern_idx = search->back_table[pattern_idx];
96 } else {
97 pattern_idx = 0;
98 table_idx++;
99 }
100 }
101
102 return search;
103}
104
105errno_t search_next_match(search_t *s, match_t *match)
106{
107 search_equals_fn eq = s->ops.equals;
108
109 char32_t cur_char;
110 errno_t rc = EOK;
111 while ((rc = s->ops.producer(s->client_data, &cur_char)) == EOK && cur_char > 0) {
112 /* Deal with mismatches */
113 while (s->pattern_pos > 0 && !eq(cur_char, s->pattern[s->pattern_pos])) {
114 s->pattern_pos = s->back_table[s->pattern_pos];
115 }
116 /* Check if the character matched */
117 if (eq(cur_char, s->pattern[s->pattern_pos])) {
118 s->pattern_pos++;
119 if (s->pattern_pos == s->pattern_length) {
120 s->pattern_pos = s->back_table[s->pattern_pos];
121 rc = s->ops.mark(s->client_data, &match->end);
122 if (rc != EOK)
123 return rc;
124 match->length = s->pattern_length;
125 return EOK;
126 }
127 }
128 }
129
130 match->end = NULL;
131 match->length = 0;
132
133 return rc;
134}
135
136void search_fini(search_t *search)
137{
138 free(search->pattern);
139 free(search->back_table);
140
141}
142
143bool char_exact_equals(const char32_t a, const char32_t b)
144{
145 return a == b;
146}
147
148/** @}
149 */
Note: See TracBrowser for help on using the repository browser.