Context Navigation

source: mainline/uspace/app/bdsh/tok.c@ 28ee877e

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 28ee877e was 6939edb, checked in by Martin Sucha <sucha14@…>, 14 years ago
Add more powerful tokenizer to bdsh
Property mode set to `100644`
File size: 6.4 KB

Line
1	/* Copyright (c) 2011, Martin Sucha <sucha14@st.fmph.uniba.sk>
2	* All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* Redistributions of source code must retain the above copyright notice, this
8	* list of conditions and the following disclaimer.
9	*
10	* Redistributions in binary form must reproduce the above copyright notice,
11	* this list of conditions and the following disclaimer in the documentation
12	* and/or other materials provided with the distribution.
13	*
14	* Neither the name of the original program's authors nor the names of its
15	* contributors may be used to endorse or promote products derived from this
16	* software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28	* POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	#include <str.h>
32	#include <assert.h>
33	#include <malloc.h>
34	#include <stdlib.h>
35	#include <errno.h>
36
37	#include "tok.h"
38
39	/* Forward declarations of static functions */
40	static wchar_t tok_get_char(tokenizer_t *);
41	static wchar_t tok_look_char(tokenizer_t *);
42	static int tok_push_char(tokenizer_t *, wchar_t);
43	static int tok_push_token(tokenizer_t *);
44	static bool tok_pending_chars(tokenizer_t *);
45	static int tok_finish_string(tokenizer_t *);
46
47	/** Initialize the token parser
48	*
49	* @param tok the tokenizer structure to initialize
50	* @param input the input string to tokenize
51	* @param out_tokens array of strings where to store the result
52	* @param max_tokens number of elements of the out_tokens array
53	*/
54	int tok_init(tokenizer_t tok, char input, char **out_tokens,
55	size_t max_tokens)
56	{
57	tok->in = input;
58	tok->in_offset = 0;
59
60	tok->outtok = out_tokens;
61	tok->outtok_offset = 0;
62	/* Leave one slot for a null terminator */
63	assert(max_tokens > 0);
64	tok->outtok_size = max_tokens - 1;
65
66	/* Prepare a buffer where all the token strings will be stored */
67	size_t len = str_size(input) + max_tokens + 1;
68	char *tmp = malloc(len);
69
70	if (tmp == NULL) {
71	return ENOMEM;
72	}
73
74	tok->outbuf = tmp;
75	tok->outbuf_offset = 0;
76	tok->outbuf_size = len;
77	tok->outbuf_last_start = 0;
78
79	return EOK;
80	}
81
82	/** Finalize the token parser */
83	void tok_fini(tokenizer_t *tok)
84	{
85	if (tok->outbuf != NULL) {
86	free(tok->outbuf);
87	}
88	}
89
90	/** Tokenize the input string into the tokens */
91	int tok_tokenize(tokenizer_t *tok)
92	{
93	int rc;
94	wchar_t cur_char;
95
96	/* Read the input line char by char and append tokens */
97	while ((cur_char = tok_get_char(tok)) != 0) {
98	if (cur_char == ' ') {
99	/* Spaces delimit tokens, but are not processed in any way
100	* Push the token if there is any.
101	* There may not be any pending char for a token in case
102	* there are several spaces in the input.
103	*/
104	if (tok_pending_chars(tok)) {
105	rc = tok_push_token(tok);
106	if (rc != EOK) {
107	return rc;
108	}
109	}
110	}
111	else if (cur_char == '\|') {
112	/* Pipes are tokens that are delimiters and should be output
113	* as a separate token
114	*/
115	if (tok_pending_chars(tok)) {
116	rc = tok_push_token(tok);
117	if (rc != EOK) {
118	return rc;
119	}
120	}
121
122	rc = tok_push_char(tok, '\|');
123	if (rc != EOK) {
124	return rc;
125	}
126
127	rc = tok_push_token(tok);
128	if (rc != EOK) {
129	return rc;
130	}
131	}
132	else if (cur_char == '\'') {
133	/* A string starts with a quote (') and ends again with a quote.
134	* A literal quote is written as ''
135	*/
136	rc = tok_finish_string(tok);
137	if (rc != EOK) {
138	return rc;
139	}
140	}
141	else {
142	/* If we are handling any other character, just append it to
143	* the current token.
144	*/
145	rc = tok_push_char(tok, cur_char);
146	if (rc != EOK) {
147	return rc;
148	}
149	}
150	}
151
152	/* Push the last token */
153	if (tok_pending_chars(tok)) {
154	rc = tok_push_token(tok);
155	if (rc != EOK) {
156	return rc;
157	}
158	}
159
160	/* We always have a space for the terminator, as we
161	* reserved it in tok_init */
162	tok->outtok[tok->outtok_offset] = 0;
163
164	return EOK;
165	}
166
167	/** Finish tokenizing an opened string */
168	int tok_finish_string(tokenizer_t *tok)
169	{
170	int rc;
171	wchar_t cur_char;
172
173	while ((cur_char = tok_get_char(tok)) != 0) {
174	if (cur_char == '\'') {
175	if (tok_look_char(tok) == '\'') {
176	/* Encode a single literal quote */
177	rc = tok_push_char(tok, '\'');
178	if (rc != EOK) {
179	return rc;
180	}
181
182	/* Swallow the additional one in the input */
183	tok_get_char(tok);
184	}
185	else {
186	/* The string end */
187	return tok_push_token(tok);
188	}
189	}
190	else {
191	rc = tok_push_char(tok, cur_char);
192	if (rc != EOK) {
193	return rc;
194	}
195	}
196	}
197
198	/* If we are here, the string run to the end without being closed */
199	return EINVAL;
200	}
201
202	/** Get a char from input, advancing the input position */
203	wchar_t tok_get_char(tokenizer_t *tok)
204	{
205	return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
206	}
207
208	/** Get a char from input, while staying on the same input position */
209	wchar_t tok_look_char(tokenizer_t *tok)
210	{
211	size_t old_offset = tok->in_offset;
212	wchar_t ret = tok_get_char(tok);
213	tok->in_offset = old_offset;
214	return ret;
215	}
216
217	/** Append a char to the end of the current token */
218	int tok_push_char(tokenizer_t *tok, wchar_t ch)
219	{
220	return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
221	}
222
223	/** Push the current token to the output array */
224	int tok_push_token(tokenizer_t *tok)
225	{
226	if (tok->outtok_offset >= tok->outtok_size) {
227	return EOVERFLOW;
228	}
229
230	if (tok->outbuf_offset >= tok->outbuf_size) {
231	return EOVERFLOW;
232	}
233
234	tok->outbuf[tok->outbuf_offset++] = 0;
235	tok->outtok[tok->outtok_offset++] = tok->outbuf + tok->outbuf_last_start;
236	tok->outbuf_last_start = tok->outbuf_offset;
237
238	return EOK;
239	}
240
241	/** Return true, if the current token is not empty */
242	bool tok_pending_chars(tokenizer_t *tok)
243	{
244	assert(tok->outbuf_offset >= tok->outbuf_last_start);
245	return (tok->outbuf_offset != tok->outbuf_last_start);
246	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: