Context Navigation

← Previous Change
Next Change →

tok.c

Timestamp:

2011-08-20T09:05:14Z (13 years ago)

Author:

Petr Koupy <petr.koupy@…>

Branches:

lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export

Children:

c916dfc

Parents:

921b84f (diff), a0fc4be (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge mainline changes.

File:

: 1 edited

uspace/app/bdsh/tok.c (modified) (14 diffs)

Legend:

: Unmodified
: Added
: Removed

uspace/app/bdsh/tok.c

-              r921b84f
+              r5fb32c5
 static bool tok_pending_chars(tokenizer_t *);
 static int tok_finish_string(tokenizer_t *);
+static void tok_start_token(tokenizer_t *, token_type_t);
 /** Initialize the token parser
 …
  * @param max_tokens number of elements of the out_tokens array
  */
 int tok_init(tokenizer_t *tok, char *input, char **out_tokens,
+int tok_init(tokenizer_t *tok, char *input, token_t *out_tokens,
     size_t max_tokens)
+{
         tok->in = input;
         tok->in_offset = 0;
+        tok->last_in_offset = 0;
+        tok->in_char_offset = 0;
+        tok->last_in_char_offset = 0;
         tok->outtok = out_tokens;
         tok->outtok_offset = 0;
+        /* Leave one slot for a null terminator */
+        assert(max_tokens > 0);
+        tok->outtok_size = max_tokens - 1;
+        tok->outtok_size = max_tokens;
         /* Prepare a buffer where all the token strings will be stored */
 …
 /** Tokenize the input string into the tokens */
 int tok_tokenize(tokenizer_t *tok)
+int tok_tokenize(tokenizer_t *tok, size_t *tokens_length)
+{
         int rc;
         wchar_t cur_char;
+        wchar_t next_char;
         /* Read the input line char by char and append tokens */
+        while ((cur_char = tok_get_char(tok)) != 0) {
+                if (cur_char == ' ') {
+                        /* Spaces delimit tokens, but are not processed in any way
+                         * Push the token if there is any.
+        while ((next_char = tok_look_char(tok)) != 0) {
+                if (next_char == ' ') {
+                        /* Push the token if there is any.
                          * There may not be any pending char for a token in case
                          * there are several spaces in the input.
 …
+                                }
+                        }
+                }
+                else if (cur_char == '|') {
+                        /* Pipes are tokens that are delimiters and should be output
+                         * as a separate token
+                        tok_start_token(tok, TOKTYPE_SPACE);
+                        /* Eat all the spaces */
+                        while (tok_look_char(tok) == ' ') {
+                                tok_push_char(tok, tok_get_char(tok));
+                        }
+                        tok_push_token(tok);
+                }
+                else if (next_char == '|') {
+                        /* Pipes are tokens that are delimiters and should be
+                         * output as a separate token
                          */
                         if (tok_pending_chars(tok)) {
 …
+                        }
+                        rc = tok_push_char(tok, '|');
+                        tok_start_token(tok, TOKTYPE_PIPE);
+                        rc = tok_push_char(tok, tok_get_char(tok));
                         if (rc != EOK) {
                                 return rc;
 …
+                        }
+                }
                 else if (cur_char == '\'') {
+                else if (next_char == '\'') {
                         /* A string starts with a quote (') and ends again with a quote.
                          * A literal quote is written as ''
                          */
+                        tok_start_token(tok, TOKTYPE_TEXT);
+                        /* Eat the quote */
+                        tok_get_char(tok);
                         rc = tok_finish_string(tok);
                         if (rc != EOK) {
 …
+                }
                 else {
+                        if (!tok_pending_chars(tok)) {
+                                tok_start_token(tok, TOKTYPE_TEXT);
+                        }
                         /* If we are handling any other character, just append it to
                          * the current token.
                          */
                         rc = tok_push_char(tok, cur_char);
+                        rc = tok_push_char(tok, tok_get_char(tok));
                         if (rc != EOK) {
                                 return rc;
 …
+        }
+        /* We always have a space for the terminator, as we
+         * reserved it in tok_init */
+        tok->outtok[tok->outtok_offset] = 0;
+        *tokens_length = tok->outtok_offset;
         return EOK;
 …
+{
         int rc;
+        wchar_t cur_char;
+        while ((cur_char = tok_get_char(tok)) != 0) {
+                if (cur_char == '\'') {
+        wchar_t next_char;
+        while ((next_char = tok_look_char(tok)) != 0) {
+                if (next_char == '\'') {
+                        /* Eat the quote */
+                        tok_get_char(tok);
                         if (tok_look_char(tok) == '\'') {
                                 /* Encode a single literal quote */
 …
+                }
                 else {
                         rc = tok_push_char(tok, cur_char);
+                        rc = tok_push_char(tok, tok_get_char(tok));
                         if (rc != EOK) {
                                 return rc;
 …
 wchar_t tok_get_char(tokenizer_t *tok)
+{
+        tok->in_char_offset++;
         return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
+}
 …
 wchar_t tok_look_char(tokenizer_t *tok)
+{
+        size_t old_offset = tok->in_offset;
+        unsigned int old_offset = tok->in_offset;
+        unsigned int old_char_offset = tok->in_char_offset;
         wchar_t ret = tok_get_char(tok);
         tok->in_offset = old_offset;
+        tok->in_char_offset = old_char_offset;
         return ret;
+}
 …
+}
+void tok_start_token(tokenizer_t *tok, token_type_t type)
+{
+        tok->current_type = type;
+}
 /** Push the current token to the output array */
 int tok_push_token(tokenizer_t *tok)
 …
         tok->outbuf[tok->outbuf_offset++] = 0;
+        tok->outtok[tok->outtok_offset++] = tok->outbuf + tok->outbuf_last_start;
+        token_t *tokinfo = &tok->outtok[tok->outtok_offset++];
+        tokinfo->type = tok->current_type;
+        tokinfo->text = tok->outbuf + tok->outbuf_last_start;
+        tokinfo->byte_start = tok->last_in_offset;
+        tokinfo->byte_length = tok->in_offset - tok->last_in_offset;
+        tokinfo->char_start = tok->last_in_char_offset;
+        tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset;
         tok->outbuf_last_start = tok->outbuf_offset;
+        /* We have consumed the first char of the next token already */
+        tok->last_in_offset = tok->in_offset;
+        tok->last_in_char_offset = tok->in_char_offset;
         return EOK;

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 5fb32c5 in mainline for uspace/app/bdsh/tok.c

Legend:

uspace/app/bdsh/tok.c

Download in other formats: