db06460208
The full log is at https://github.com/RTGUI/RTGUI/commits/merge_1 and it's difficult to merge the new tree commit by commit. I also converted all the file into unix eol so there are many fake diff. Big changes are noted in rtgui/doc/road_map.txt and rtgui/doc/attention.txt. Keep an eye on them if you want to migrate your old code. Note that the work is still in progress and the bsp is not prepared in trunk so far. git-svn-id: https://rt-thread.googlecode.com/svn/trunk@2092 bbd45198-f89e-11dd-88c7-29a3b14d5316
292 lines
10 KiB
C
292 lines
10 KiB
C
#include <rtgui/rtgui_xml.h>
|
|
#include <rtgui/rtgui_system.h>
|
|
|
|
/* Internal states that the parser can be in at any given time. */
|
|
enum {
|
|
STAT_START = 0, /* starting base state, default state */
|
|
STAT_TEXT, /* text state */
|
|
STAT_START_TAG, /* start tag state */
|
|
STAT_START_TAGNAME, /* start tagname state */
|
|
STAT_START_TAGNAME_END, /* start tagname ending state */
|
|
STAT_END_TAG, /* end tag state */
|
|
STAT_END_TAGNAME, /* end tag tagname state */
|
|
STAT_END_TAGNAME_END, /* end tag tagname ending */
|
|
STAT_EMPTY_TAG, /* empty tag state */
|
|
STAT_SPACE, /* linear whitespace state */
|
|
STAT_ATTR_NAME, /* attribute name state */
|
|
STAT_ATTR_NAME_END, /* attribute name ending state */
|
|
STAT_ATTR_VAL, /* attribute value starting state */
|
|
STAT_ATTR_VAL2, /* attribute value state */
|
|
STAT_ERROR /* error state */
|
|
};
|
|
|
|
/* character classes that we will match against; This could be expanded if
|
|
need be, however, we are aiming for simple. */
|
|
enum {
|
|
CLASS_TYPE_NONE = 0, /* matches nothing, a base state */
|
|
CLASS_TYPE_LEFT_ANGLE, /* matches start tag '<' */
|
|
CLASS_TYPE_SLASH, /* matches forward slash */
|
|
CLASS_TYPE_RIGHT_ANGLE, /* matches end tag '>' */
|
|
CLASS_TYPE_EQUALS, /* matches equals sign */
|
|
CLASS_TYPE_QUOTE, /* matches double-quotes */
|
|
CLASS_TYPE_LETTERS, /* matches a-zA-Z letters and digits 0-9 */
|
|
CLASS_TYPE_SPACE, /* matches whitespace */
|
|
CLASS_TYPE_ANY /* matches any ASCII character; will match all
|
|
above classes */
|
|
};
|
|
|
|
/* xml state transition table */
|
|
struct rtgui_xml_state
|
|
{
|
|
rt_uint8_t state;
|
|
rt_uint8_t class_type;
|
|
rt_uint8_t next_state;
|
|
rt_uint8_t event;
|
|
};
|
|
|
|
/* Note: States must be grouped in match order AND grouped together! */
|
|
static const struct rtgui_xml_state RTGUI_XML_STATES [] = {
|
|
/* [0-2] starting state, which also serves as the default state in case
|
|
of error */
|
|
{ STAT_START, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
|
|
{ STAT_START, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_NONE },
|
|
{ STAT_START, CLASS_TYPE_ANY, STAT_TEXT, EVENT_COPY },
|
|
|
|
/* [3-5] space state handles linear white space */
|
|
{ STAT_SPACE, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
|
|
{ STAT_SPACE, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_TEXT },
|
|
{ STAT_SPACE, CLASS_TYPE_ANY, STAT_TEXT, EVENT_COPY },
|
|
|
|
/* [6-8] handle start tag */
|
|
{ STAT_START_TAG, CLASS_TYPE_LETTERS, STAT_START_TAGNAME, EVENT_COPY },
|
|
{ STAT_START_TAG, CLASS_TYPE_SLASH, STAT_END_TAG, EVENT_COPY },
|
|
/* below added since some individuals get a little carried away with
|
|
spacing around tag names, e.g. < tag > */
|
|
{ STAT_START_TAG, CLASS_TYPE_SPACE, STAT_START_TAG, EVENT_NONE },
|
|
|
|
/* [9-12] handle start tag name */
|
|
{ STAT_START_TAGNAME, CLASS_TYPE_LETTERS, STAT_START_TAGNAME, EVENT_NONE },
|
|
{ STAT_START_TAGNAME, CLASS_TYPE_SPACE, STAT_START_TAGNAME_END, EVENT_START },
|
|
/* below added for tags without any space between tag and ending
|
|
slash, e.g., <br/> */
|
|
{ STAT_START_TAGNAME, CLASS_TYPE_SLASH, STAT_EMPTY_TAG, EVENT_END },
|
|
{ STAT_START_TAGNAME, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_START },
|
|
|
|
/* [13-16] handle start tag name end */
|
|
{ STAT_START_TAGNAME_END, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
|
|
/* below added to handle additional space in between attribute value
|
|
pairs in start tags, e.g., <tag attr="2" attr2="test" > */
|
|
{ STAT_START_TAGNAME_END, CLASS_TYPE_SPACE, STAT_START_TAGNAME_END, EVENT_NONE },
|
|
{ STAT_START_TAGNAME_END, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_START },
|
|
/* below supports tags that are self-closing, e.g., <br /> */
|
|
{ STAT_START_TAGNAME_END, CLASS_TYPE_SLASH, STAT_EMPTY_TAG, EVENT_COPY },
|
|
|
|
/* [17] handle empty tags, e.g., <br /> */
|
|
{ STAT_EMPTY_TAG, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_END },
|
|
|
|
/* [18] handle end tag, e.g., <tag /> */
|
|
{ STAT_END_TAG, CLASS_TYPE_LETTERS, STAT_END_TAGNAME, EVENT_NONE },
|
|
|
|
/* [19-21] handle end tag name */
|
|
{ STAT_END_TAGNAME, CLASS_TYPE_LETTERS, STAT_END_TAGNAME, EVENT_NONE },
|
|
{ STAT_END_TAGNAME, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_END },
|
|
/* below adds support for spaces at the end of an end tag (before
|
|
closing bracket) */
|
|
{ STAT_END_TAGNAME, CLASS_TYPE_SPACE, STAT_END_TAGNAME_END, EVENT_END },
|
|
|
|
/* [22] handle ending of end tag name */
|
|
{ STAT_END_TAGNAME_END, CLASS_TYPE_SPACE, STAT_END_TAGNAME_END, EVENT_NONE },
|
|
{ STAT_END_TAGNAME_END, CLASS_TYPE_RIGHT_ANGLE,STAT_START, EVENT_NONE },
|
|
|
|
/* [23-25] handle text */
|
|
{ STAT_TEXT, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
|
|
{ STAT_TEXT, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_TEXT },
|
|
{ STAT_TEXT, CLASS_TYPE_ANY, STAT_TEXT, EVENT_NONE },
|
|
|
|
/* [26-30] handle attribute names */
|
|
{ STAT_ATTR_NAME, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
|
|
/* below add support for space before the equals sign, e.g, <tag
|
|
attr ="2"> */
|
|
{ STAT_ATTR_NAME, CLASS_TYPE_SPACE, STAT_ATTR_NAME_END, EVENT_NAME },
|
|
{ STAT_ATTR_NAME, CLASS_TYPE_EQUALS, STAT_ATTR_VAL, EVENT_NAME },
|
|
|
|
/* [31-33] attribute name end */
|
|
{ STAT_ATTR_NAME_END, CLASS_TYPE_SPACE, STAT_ATTR_NAME_END, EVENT_NONE },
|
|
{ STAT_ATTR_NAME_END, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
|
|
{ STAT_ATTR_NAME_END, CLASS_TYPE_EQUALS, STAT_ATTR_VAL, EVENT_NONE },
|
|
|
|
/* [34-35] handle attribute values, initial quote and spaces */
|
|
{ STAT_ATTR_VAL, CLASS_TYPE_QUOTE, STAT_ATTR_VAL2, EVENT_NONE },
|
|
/* below handles initial spaces before quoted attribute value */
|
|
{ STAT_ATTR_VAL, CLASS_TYPE_SPACE, STAT_ATTR_VAL, EVENT_NONE },
|
|
|
|
/* [36-37] handle actual attribute values */
|
|
{ STAT_ATTR_VAL2, CLASS_TYPE_QUOTE, STAT_START_TAGNAME_END, EVENT_VAL },
|
|
{ STAT_ATTR_VAL2, CLASS_TYPE_LETTERS, STAT_ATTR_VAL2, EVENT_COPY },
|
|
{ STAT_ATTR_VAL2, CLASS_TYPE_SLASH, STAT_ATTR_VAL2, EVENT_NONE },
|
|
|
|
/* End of table marker */
|
|
{ STAT_ERROR, CLASS_TYPE_NONE, STAT_ERROR, EVENT_NONE }
|
|
};
|
|
|
|
struct rtgui_xml
|
|
{
|
|
/* event handler */
|
|
rtgui_xml_event_handler_t event_handler;
|
|
void* user;
|
|
|
|
char* buffer; /* xml buffer */
|
|
rt_size_t buffer_size; /* buffer size */
|
|
rt_size_t position; /* current position in buffer */
|
|
rt_uint16_t state, event; /* current state and event */
|
|
|
|
rt_bool_t copy; /* copy text into tmp buffer */
|
|
rt_bool_t halt; /* halt parsing of document */
|
|
};
|
|
|
|
rtgui_xml_t* rtgui_xml_create(rt_size_t buffer_size, rtgui_xml_event_handler_t handler,
|
|
void* user)
|
|
{
|
|
rtgui_xml_t* xml = (rtgui_xml_t*) rtgui_malloc(sizeof(struct rtgui_xml));
|
|
rt_memset(xml, 0, sizeof(rtgui_xml_t));
|
|
|
|
xml->event_handler = handler;
|
|
xml->user = user;
|
|
|
|
/* create buffer */
|
|
xml->buffer_size = buffer_size;
|
|
xml->buffer = (char*)rtgui_malloc(xml->buffer_size);
|
|
return xml;
|
|
}
|
|
|
|
void rtgui_xml_destroy(rtgui_xml_t* xml)
|
|
{
|
|
if(xml)
|
|
{
|
|
rtgui_free(xml->buffer);
|
|
rtgui_free(xml);
|
|
}
|
|
}
|
|
|
|
const char* rtgui_xml_event_str(rt_uint8_t event)
|
|
{
|
|
switch(event)
|
|
{
|
|
case EVENT_START:
|
|
return "start tag";
|
|
case EVENT_END:
|
|
return "end tag";
|
|
case EVENT_TEXT:
|
|
return "text";
|
|
case EVENT_NAME:
|
|
return "attr name";
|
|
case EVENT_VAL:
|
|
return "attr val";
|
|
case EVENT_END_DOC:
|
|
return "end document";
|
|
default:
|
|
break;
|
|
}
|
|
return "err";
|
|
}
|
|
|
|
int rtgui_xml_parse(rtgui_xml_t* xml, const char* buf, rt_size_t len)
|
|
{
|
|
int i, j, c, match;
|
|
|
|
#define is_space(ch) \
|
|
((rt_uint32_t)(ch - 9) < 5u || ch == ' ')
|
|
#define is_alpha(ch) \
|
|
((rt_uint32_t)((ch | 0x20) - 'a') < 26u)
|
|
#define is_digit(ch) \
|
|
((rt_uint32_t)(ch - '0') < 10u)
|
|
#define is_letters(ch) \
|
|
(is_alpha(ch) || is_digit(ch) || (ch == '.'))
|
|
|
|
for(i=0; i<len; i++)
|
|
{
|
|
if(xml->halt) break;
|
|
|
|
c = buf[i] & 0xff;
|
|
|
|
/* search in state table */
|
|
for(j=0, match = 0; RTGUI_XML_STATES[j].state != STAT_ERROR; j++)
|
|
{
|
|
if(RTGUI_XML_STATES[j].state != xml->state)
|
|
continue;
|
|
|
|
switch(RTGUI_XML_STATES[j].class_type)
|
|
{
|
|
case CLASS_TYPE_LETTERS:
|
|
match = is_letters(c);
|
|
break;
|
|
case CLASS_TYPE_LEFT_ANGLE:
|
|
match = (c == '<');
|
|
break;
|
|
case CLASS_TYPE_SLASH:
|
|
match = (c == '/');
|
|
break;
|
|
case CLASS_TYPE_RIGHT_ANGLE:
|
|
match = (c == '>');
|
|
break;
|
|
case CLASS_TYPE_EQUALS:
|
|
match = (c == '=');
|
|
break;
|
|
case CLASS_TYPE_QUOTE:
|
|
match = (c == '"');
|
|
break;
|
|
case CLASS_TYPE_SPACE:
|
|
match = is_space(c);
|
|
break;
|
|
case CLASS_TYPE_ANY:
|
|
match = 1;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* we matched a character class */
|
|
if(match)
|
|
{
|
|
if(RTGUI_XML_STATES[j].event == EVENT_COPY)
|
|
{
|
|
xml->copy = RT_TRUE;
|
|
}
|
|
else if(RTGUI_XML_STATES[j].event != EVENT_NONE)
|
|
{
|
|
if(xml->copy == RT_TRUE)
|
|
{
|
|
/* basically we are guaranteed never to have an event of
|
|
type EVENT_COPY or EVENT_NONE here. */
|
|
xml->event = RTGUI_XML_STATES[j].event;
|
|
xml->buffer[xml->position] = 0; /* make a string */
|
|
|
|
if(!xml->event_handler(RTGUI_XML_STATES[j].event,
|
|
xml->buffer, xml->position ,
|
|
xml->user))
|
|
{
|
|
xml->halt = 1; /* stop parsing from here out */
|
|
}
|
|
xml->position = 0;
|
|
xml->copy = RT_FALSE;
|
|
}
|
|
}
|
|
if(xml->copy == RT_TRUE)
|
|
{
|
|
/* check to see if we have room; one less for trailing
|
|
nul */
|
|
if(xml->position < xml->buffer_size-1)
|
|
{
|
|
xml->buffer[xml->position] = buf[i];
|
|
xml->position++;
|
|
}
|
|
}
|
|
xml->state = RTGUI_XML_STATES[j].next_state; /* change state */
|
|
break; /* break out of loop though state search */
|
|
}
|
|
}
|
|
}
|
|
|
|
return !xml->halt;
|
|
}
|