/* * token lex for finsh shell. * * COPYRIGHT (C) 2006 - 2013, RT-Thread Development Team * * This file is part of RT-Thread (http://www.rt-thread.org) * Maintainer: bernard.xiong * * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * * Change Logs: * Date Author Notes * 2010-03-22 Bernard first version * 2013-04-03 Bernard strip more characters. */ #include #include #include "finsh_token.h" #include "finsh_error.h" #define is_alpha(ch) ((ch | 0x20) - 'a') < 26u #define is_digit(ch) ((ch) >= '0' && (ch) <= '9') #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \ || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_')) #define is_eof(self) (self)->eof struct name_table { char* name; enum finsh_token_type type; }; /* keyword */ static const struct name_table finsh_name_table[] = { {"void", finsh_token_type_void}, {"char", finsh_token_type_char}, {"short", finsh_token_type_short}, {"int", finsh_token_type_int}, {"long", finsh_token_type_long}, {"unsigned", finsh_token_type_unsigned}, {"NULL", finsh_token_type_value_null}, {"null", finsh_token_type_value_null} }; static char token_next_char(struct finsh_token* self); static void token_prev_char(struct finsh_token* self); static long token_spec_number(char* string, int length, int b); static void token_run(struct finsh_token* self); static int token_match_name(struct finsh_token* self, const char* str); static void token_proc_number(struct finsh_token* self); static u_char* token_proc_string(struct finsh_token* self); static void token_trim_space(struct finsh_token* self); static char token_proc_char(struct finsh_token* self); static int token_proc_escape(struct finsh_token* self); void finsh_token_init(struct finsh_token* self, u_char* line) { memset(self, 0, sizeof(struct finsh_token)); self->line = line; } enum finsh_token_type finsh_token_token(struct finsh_token* self) { if ( self->replay ) self->replay = 0; else token_run(self); return (enum finsh_token_type)self->current_token; } void finsh_token_get_token(struct finsh_token* self, u_char* token) { strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX); } int token_get_string(struct finsh_token* self, u_char* str) { unsigned char *p=str; char ch; ch = token_next_char(self); if (is_eof(self)) return -1; str[0] = '\0'; if ( is_digit(ch) )/*the first character of identifier is not a digit.*/ { token_prev_char(self); return -1; } while (!is_separator(ch) && !is_eof(self)) { *p++ = ch; ch = token_next_char(self); } self->eof = 0; token_prev_char(self); *p = '\0'; return 0; } /* get next character. */ static char token_next_char(struct finsh_token* self) { if (self->eof) return '\0'; if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n') { self->eof = 1; self->position = 0; return '\0'; } return self->line[self->position++]; } static void token_prev_char(struct finsh_token* self) { if ( self->eof ) return; if ( self->position == 0 ) return; else self->position--; } static void token_run(struct finsh_token* self) { char ch; token_trim_space(self); /* first trim space and tab. */ token_get_string(self, &(self->string[0])); if ( is_eof(self) ) /*if it is eof, break;*/ { self->current_token = finsh_token_type_eof; return ; } if (self->string[0] != '\0') /*It is a key word or a identifier.*/ { if ( !token_match_name(self, (char*)self->string) ) { self->current_token = finsh_token_type_identifier; } } else/*It is a operator character.*/ { ch = token_next_char(self); switch ( ch ) { case '(': self->current_token = finsh_token_type_left_paren; break; case ')': self->current_token = finsh_token_type_right_paren; break; case ',': self->current_token = finsh_token_type_comma; break; case ';': self->current_token = finsh_token_type_semicolon; break; case '&': self->current_token = finsh_token_type_and; break; case '*': self->current_token = finsh_token_type_mul; break; case '+': ch = token_next_char(self); if ( ch == '+' ) { self->current_token = finsh_token_type_inc; } else { token_prev_char(self); self->current_token = finsh_token_type_add; } break; case '-': ch = token_next_char(self); if ( ch == '-' ) { self->current_token = finsh_token_type_dec; } else { token_prev_char(self); self->current_token = finsh_token_type_sub; } break; case '/': ch = token_next_char(self); if (ch == '/') { /* line comments, set to end of file */ self->current_token = finsh_token_type_eof; } else { token_prev_char(self); self->current_token = finsh_token_type_div; } break; case '<': ch = token_next_char(self); if ( ch == '<' ) { self->current_token = finsh_token_type_shl; } else { token_prev_char(self); self->current_token = finsh_token_type_bad; } break; case '>': ch = token_next_char(self); if ( ch == '>' ) { self->current_token = finsh_token_type_shr; } else { token_prev_char(self); self->current_token = finsh_token_type_bad; } break; case '|': self->current_token = finsh_token_type_or; break; case '%': self->current_token = finsh_token_type_mod; break; case '~': self->current_token = finsh_token_type_bitwise; break; case '^': self->current_token = finsh_token_type_xor; break; case '=': self->current_token = finsh_token_type_assign; break; case '\'': self->value.char_value = token_proc_char(self); self->current_token = finsh_token_type_value_char; break; case '"': token_proc_string(self); self->current_token = finsh_token_type_value_string; break; default: if ( is_digit(ch) ) { token_prev_char(self); token_proc_number(self); break; } finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN); self->current_token = finsh_token_type_bad; break; } } } static int token_match_name(struct finsh_token* self, const char* str) { int i; for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++) { if ( strcmp(finsh_name_table[i].name, str)==0 ) { self->current_token = finsh_name_table[i].type; return 1; } } return 0; } static void token_trim_space(struct finsh_token* self) { char ch; while ( (ch = token_next_char(self)) ==' ' || ch == '\t' || ch == '\r'); token_prev_char(self); } static char token_proc_char(struct finsh_token* self) { char ch; char buf[4], *p; p = buf; ch = token_next_char(self); if ( ch == '\\' ) { ch = token_next_char(self); switch ( ch ) { case 'n': ch = '\n'; break; case 't': ch = '\t'; break; case 'v': ch = '\v'; break; case 'b': ch = '\b'; break; case 'r': ch = '\r'; break; case '\\': ch = '\\'; break; case '\'': ch = '\''; break; default : while ( is_digit(ch) )/*for '\113' char*/ { ch = token_next_char(self); *p++ = ch; } token_prev_char(self); *p = '\0'; ch = atoi(p); break; } } if ( token_next_char(self) != '\'' ) { token_prev_char(self); finsh_error_set(FINSH_ERROR_EXPECT_CHAR); return ch; } return ch; } static u_char* token_proc_string(struct finsh_token* self) { u_char* p; for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; ) { char ch = token_next_char(self); if ( is_eof(self) ) { finsh_error_set(FINSH_ERROR_UNEXPECT_END); return NULL;; } if ( ch == '\\' ) { ch = token_proc_escape(self); } else if ( ch == '"' )/*end of string.*/ { *p = '\0'; return self->string; } *p++ = ch; } return NULL; } static int token_proc_escape(struct finsh_token* self) { char ch; int result=0; ch = token_next_char(self); switch (ch) { case 'n': result = '\n'; break; case 't': result = '\t'; break; case 'v': result = '\v'; break; case 'b': result = '\b'; break; case 'r': result = '\r'; break; case 'f': result = '\f'; break; case 'a': result = '\007'; break; case 'x': result = 0; ch = token_next_char(self); while ( (ch - '0')<16u ) { result = result*16 + ch - '0'; ch = token_next_char(self); } token_prev_char(self); break; default: if ( (ch - '0') < 8u) { result = 0; while ( (ch - '0') < 8u ) { result = result*8 + ch - '0'; ch = token_next_char(self); } token_prev_char(self); } break; } return result; } /* (0|0x|0X|0b|0B)number+(l|L) */ static void token_proc_number(struct finsh_token* self) { char ch; char *p, buf[128]; long value; value = 0; p = buf; ch = token_next_char(self); if ( ch == '0' ) { int b; ch = token_next_char(self); if ( ch == 'x' || ch == 'X' )/*it's a hex number*/ { b = 16; ch = token_next_char(self); while ( is_digit(ch) || is_alpha(ch) ) { *p++ = ch; ch = token_next_char(self); } *p = '\0'; } else if ( ch == 'b' || ch == 'B' ) { b = 2; ch = token_next_char(self); while ( (ch=='0')||(ch=='1') ) { *p++ = ch; ch = token_next_char(self); } *p = '\0'; } else if ( '0' <= ch && ch <= '7' ) { b = 8; while ( '0' <= ch && ch <= '7' ) { *p++ = ch; ch = token_next_char(self); } *p = '\0'; } else { /* Not a valid number */ token_prev_char(self); return; } self->value.int_value = token_spec_number(buf, strlen(buf), b); self->current_token = finsh_token_type_value_int; } else { while ( is_digit(ch) ) { value = value*10 + ( ch - '0' ); ch = token_next_char(self); } self->value.int_value = value; self->current_token = finsh_token_type_value_int; } switch ( ch ) { case 'l': case 'L': self->current_token = finsh_token_type_value_long; break; default: token_prev_char(self); break; } } /*use 64 bit number*/ #define BN_SIZE 2 static long token_spec_number(char* string, int length, int b) { char* p; int t; int i, j, shift=1; unsigned int bn[BN_SIZE], v; long d; p = string; i = 0; switch ( b ) { case 16: shift = 4; break; case 8: shift = 3; break; case 2: shift = 1; break; default: break; } for ( j=0; j='a' && t <='f' ) { t = t - 'a' +10; } else if ( t >='A' && t <='F' ) { t = t - 'A' +10; } else t = t - '0'; for ( j=0; j> (32 - shift); } i++; } d = (long)bn[0]; return d; }