Context Navigation

source: mainline/uspace/app/bdsh/tok.c@ eff10e03

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since eff10e03 was 36ab7c7, checked in by Jiri Svoboda <jiri@…>, 14 years ago
Standardize formatting of copyright headers in Bdsh and add some that were missing.
Property mode set to `100644`
File size: 6.3 KB

Line
1	/*
2	* Copyright (c) 2011 Martin Sucha
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#include <str.h>
30	#include <assert.h>
31	#include <malloc.h>
32	#include <stdlib.h>
33	#include <errno.h>
34
35	#include "tok.h"
36
37	/* Forward declarations of static functions */
38	static wchar_t tok_get_char(tokenizer_t *);
39	static wchar_t tok_look_char(tokenizer_t *);
40	static int tok_push_char(tokenizer_t *, wchar_t);
41	static int tok_push_token(tokenizer_t *);
42	static bool tok_pending_chars(tokenizer_t *);
43	static int tok_finish_string(tokenizer_t *);
44
45	/** Initialize the token parser
46	*
47	* @param tok the tokenizer structure to initialize
48	* @param input the input string to tokenize
49	* @param out_tokens array of strings where to store the result
50	* @param max_tokens number of elements of the out_tokens array
51	*/
52	int tok_init(tokenizer_t tok, char input, char **out_tokens,
53	size_t max_tokens)
54	{
55	tok->in = input;
56	tok->in_offset = 0;
57
58	tok->outtok = out_tokens;
59	tok->outtok_offset = 0;
60	/* Leave one slot for a null terminator */
61	assert(max_tokens > 0);
62	tok->outtok_size = max_tokens - 1;
63
64	/* Prepare a buffer where all the token strings will be stored */
65	size_t len = str_size(input) + max_tokens + 1;
66	char *tmp = malloc(len);
67
68	if (tmp == NULL) {
69	return ENOMEM;
70	}
71
72	tok->outbuf = tmp;
73	tok->outbuf_offset = 0;
74	tok->outbuf_size = len;
75	tok->outbuf_last_start = 0;
76
77	return EOK;
78	}
79
80	/** Finalize the token parser */
81	void tok_fini(tokenizer_t *tok)
82	{
83	if (tok->outbuf != NULL) {
84	free(tok->outbuf);
85	}
86	}
87
88	/** Tokenize the input string into the tokens */
89	int tok_tokenize(tokenizer_t *tok)
90	{
91	int rc;
92	wchar_t cur_char;
93
94	/* Read the input line char by char and append tokens */
95	while ((cur_char = tok_get_char(tok)) != 0) {
96	if (cur_char == ' ') {
97	/* Spaces delimit tokens, but are not processed in any way
98	* Push the token if there is any.
99	* There may not be any pending char for a token in case
100	* there are several spaces in the input.
101	*/
102	if (tok_pending_chars(tok)) {
103	rc = tok_push_token(tok);
104	if (rc != EOK) {
105	return rc;
106	}
107	}
108	}
109	else if (cur_char == '\|') {
110	/* Pipes are tokens that are delimiters and should be output
111	* as a separate token
112	*/
113	if (tok_pending_chars(tok)) {
114	rc = tok_push_token(tok);
115	if (rc != EOK) {
116	return rc;
117	}
118	}
119
120	rc = tok_push_char(tok, '\|');
121	if (rc != EOK) {
122	return rc;
123	}
124
125	rc = tok_push_token(tok);
126	if (rc != EOK) {
127	return rc;
128	}
129	}
130	else if (cur_char == '\'') {
131	/* A string starts with a quote (') and ends again with a quote.
132	* A literal quote is written as ''
133	*/
134	rc = tok_finish_string(tok);
135	if (rc != EOK) {
136	return rc;
137	}
138	}
139	else {
140	/* If we are handling any other character, just append it to
141	* the current token.
142	*/
143	rc = tok_push_char(tok, cur_char);
144	if (rc != EOK) {
145	return rc;
146	}
147	}
148	}
149
150	/* Push the last token */
151	if (tok_pending_chars(tok)) {
152	rc = tok_push_token(tok);
153	if (rc != EOK) {
154	return rc;
155	}
156	}
157
158	/* We always have a space for the terminator, as we
159	* reserved it in tok_init */
160	tok->outtok[tok->outtok_offset] = 0;
161
162	return EOK;
163	}
164
165	/** Finish tokenizing an opened string */
166	int tok_finish_string(tokenizer_t *tok)
167	{
168	int rc;
169	wchar_t cur_char;
170
171	while ((cur_char = tok_get_char(tok)) != 0) {
172	if (cur_char == '\'') {
173	if (tok_look_char(tok) == '\'') {
174	/* Encode a single literal quote */
175	rc = tok_push_char(tok, '\'');
176	if (rc != EOK) {
177	return rc;
178	}
179
180	/* Swallow the additional one in the input */
181	tok_get_char(tok);
182	}
183	else {
184	/* The string end */
185	return tok_push_token(tok);
186	}
187	}
188	else {
189	rc = tok_push_char(tok, cur_char);
190	if (rc != EOK) {
191	return rc;
192	}
193	}
194	}
195
196	/* If we are here, the string run to the end without being closed */
197	return EINVAL;
198	}
199
200	/** Get a char from input, advancing the input position */
201	wchar_t tok_get_char(tokenizer_t *tok)
202	{
203	return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
204	}
205
206	/** Get a char from input, while staying on the same input position */
207	wchar_t tok_look_char(tokenizer_t *tok)
208	{
209	size_t old_offset = tok->in_offset;
210	wchar_t ret = tok_get_char(tok);
211	tok->in_offset = old_offset;
212	return ret;
213	}
214
215	/** Append a char to the end of the current token */
216	int tok_push_char(tokenizer_t *tok, wchar_t ch)
217	{
218	return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
219	}
220
221	/** Push the current token to the output array */
222	int tok_push_token(tokenizer_t *tok)
223	{
224	if (tok->outtok_offset >= tok->outtok_size) {
225	return EOVERFLOW;
226	}
227
228	if (tok->outbuf_offset >= tok->outbuf_size) {
229	return EOVERFLOW;
230	}
231
232	tok->outbuf[tok->outbuf_offset++] = 0;
233	tok->outtok[tok->outtok_offset++] = tok->outbuf + tok->outbuf_last_start;
234	tok->outbuf_last_start = tok->outbuf_offset;
235
236	return EOK;
237	}
238
239	/** Return true, if the current token is not empty */
240	bool tok_pending_chars(tokenizer_t *tok)
241	{
242	assert(tok->outbuf_offset >= tok->outbuf_last_start);
243	return (tok->outbuf_offset != tok->outbuf_last_start);
244	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: