Changeset 0662451 in mainline for uspace/app/bdsh/tok.c


Ignore:
Timestamp:
2011-08-19T14:44:49Z (13 years ago)
Author:
Martin Sucha <sucha14@…>
Branches:
lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
Children:
5992e0e
Parents:
89660f2
Message:

Extend bdsh tokenizer to include more information

File:
1 edited

Legend:

Unmodified
Added
Removed
  • uspace/app/bdsh/tok.c

    r89660f2 r0662451  
    4242static bool tok_pending_chars(tokenizer_t *);
    4343static int tok_finish_string(tokenizer_t *);
     44static void tok_start_token(tokenizer_t *, token_type_t);
    4445
    4546/** Initialize the token parser
     
    6162        tok->outtok = out_tokens;
    6263        tok->outtok_offset = 0;
    63         /* Leave one slot for a null terminator */
    64         assert(max_tokens > 0);
    65         tok->outtok_size = max_tokens - 1;
     64        tok->outtok_size = max_tokens;
    6665       
    6766        /* Prepare a buffer where all the token strings will be stored */
     
    9089
    9190/** Tokenize the input string into the tokens */
    92 int tok_tokenize(tokenizer_t *tok)
     91int tok_tokenize(tokenizer_t *tok, size_t *tokens_length)
    9392{
    9493        int rc;
     
    9897        while ((cur_char = tok_get_char(tok)) != 0) {
    9998                if (cur_char == ' ') {
    100                         /* Spaces delimit tokens, but are not processed in any way
    101                          * Push the token if there is any.
     99                        /* Push the token if there is any.
    102100                         * There may not be any pending char for a token in case
    103101                         * there are several spaces in the input.
     
    109107                                }
    110108                        }
     109                        tok_start_token(tok, TOKTYPE_SPACE);
     110                        /* Eat all spaces */
     111                        while (tok_look_char(tok) == ' ') {
     112                                tok_push_char(tok, tok_get_char(tok));
     113                        }
     114                        tok_push_token(tok);
     115                       
    111116                }
    112117                else if (cur_char == '|') {
     
    121126                        }
    122127                       
     128                        tok_start_token(tok, TOKTYPE_PIPE);
     129                       
    123130                        rc = tok_push_char(tok, '|');
    124131                        if (rc != EOK) {
     
    135142                         * A literal quote is written as ''
    136143                         */
     144                        tok_start_token(tok, TOKTYPE_TEXT);
    137145                        rc = tok_finish_string(tok);
    138146                        if (rc != EOK) {
     
    141149                }
    142150                else {
     151                        if (!tok_pending_chars(tok)) {
     152                                tok_start_token(tok, TOKTYPE_TEXT);
     153                        }
    143154                        /* If we are handling any other character, just append it to
    144155                         * the current token.
     
    159170        }
    160171       
    161         /* We always have a space for the terminator, as we
    162          * reserved it in tok_init */
    163         tok->outtok[tok->outtok_offset] = 0;
     172        *tokens_length = tok->outtok_offset;
    164173       
    165174        return EOK;
     
    211220wchar_t tok_look_char(tokenizer_t *tok)
    212221{
    213         off_t old_offset = tok->in_offset;
    214         off_t old_char_offset = tok->in_char_offset;
     222        unsigned int old_offset = tok->in_offset;
     223        unsigned int old_char_offset = tok->in_char_offset;
    215224        wchar_t ret = tok_get_char(tok);
    216225        tok->in_offset = old_offset;
     
    225234}
    226235
     236void tok_start_token(tokenizer_t *tok, token_type_t type)
     237{
     238        tok->current_type = type;
     239}
     240
    227241/** Push the current token to the output array */
    228242int tok_push_token(tokenizer_t *tok)
     
    237251       
    238252        tok->outbuf[tok->outbuf_offset++] = 0;
    239         token_t *tokinfo = &tok->outtok[tok->outtok_offset++]
    240         tokinfo.text = tok->outbuf + tok->outbuf_last_start;
    241         tokinfo.byte_start = tok->last_in_offset;
    242         tokinfo.byte_length = tok->in_offset - tok->last_in_offset - 1;
    243         tokinfo.char_start = tok->last_in_char_offset;
    244         tokinfo.char_length = tok->in_char_offset - tok->last_in_char_offset
     253        token_t *tokinfo = &tok->outtok[tok->outtok_offset++];
     254        tokinfo->type = tok->current_type;
     255        tokinfo->text = tok->outbuf + tok->outbuf_last_start;
     256        tokinfo->byte_start = tok->last_in_offset;
     257        tokinfo->byte_length = tok->in_offset - tok->last_in_offset - 1;
     258        tokinfo->char_start = tok->last_in_char_offset;
     259        tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset
    245260            - 1;
    246         tok->outtok[tok->outtok_offset]
    247261        tok->outbuf_last_start = tok->outbuf_offset;
    248262       
Note: See TracChangeset for help on using the changeset viewer.