外籍人士很难.当你只对结构感兴趣而不是元素的内容时,expat会更好.为什么不使用libxml呢?您使用基于均值的解析器(如expat)而不是基于树的解析器的原因是什么?
无论如何,这样做的方法是设置一个字符数据处理程序.这是一个基于您的代码的示例:
#include#include #include #define BUFFER_SIZE 100000 /* track the current level in the xml tree */ static int depth = 0; static char *last_content; /* first when start element is encountered */ void start_element(void *data, const char *element, const char **attribute) { int i; for (i = 0; i < depth; i++) { printf(" "); } printf("%s", element); for (i = 0; attribute[i]; i += 2) { printf(" %s= '%s'", attribute[i], attribute[i + 1]); } printf("\n"); depth++; } /* decrement the current level of the tree */ void end_element(void *data, const char *el) { int i; for (i = 0; i < depth; i++) { printf(" "); } printf("Content of element %s was \"%s\"\n", el, last_content); depth--; } void handle_data(void *data, const char *content, int length) { char *tmp = malloc(length); strncpy(tmp, content, length); tmp[length] = '\0'; data = (void *) tmp; last_content = tmp; /* TODO: concatenate the text nodes? */ } int parse_xml(char *buff, size_t buff_size) { FILE *fp; fp = fopen("start_indication.xml", "r"); if (fp == NULL) { printf("Failed to open file\n"); return 1; } XML_Parser parser = XML_ParserCreate(NULL); XML_SetElementHandler(parser, start_element, end_element); XML_SetCharacterDataHandler(parser, handle_data); memset(buff, 0, buff_size); printf("strlen(buff) before parsing: %d\n", strlen(buff)); size_t file_size = 0; file_size = fread(buff, sizeof(char), buff_size, fp); /* parse the xml */ if (XML_Parse(parser, buff, strlen(buff), XML_TRUE) == XML_STATUS_ERROR) { printf("Error: %s\n", XML_ErrorString(XML_GetErrorCode(parser))); } fclose(fp); XML_ParserFree(parser); return 0; } int main(int argc, char **argv) { int result; char buffer[BUFFER_SIZE]; result = parse_xml(buffer, BUFFER_SIZE); printf("Result is %i\n", result); return 0; }
"值"20是标记名为"item"且名称属性为"frame"的元素中的字符数据"20".
要接收字符数据事件,请使用该XML_SetCharacterDataHandler
函数注册回调.
此回调将接收字符数据.解析器可能会分割字符数据 - 通常是为了处理到达缓冲区的末尾,或者是实体(因此对于foo&bar
你的处理程序将获得三个调用 - "foo","&"和"bar"),所以你必须粘贴字符串如果您需要整个数据,请再次组合在一起.
当您收到下一个元素启动或关闭回调时,您知道何时拥有节点内的所有字符数据.
拥有所有字符数据后,即可对其进行处理.
从您的代码中简化的独立示例:
#include#include #include #include static const char* xml = "\n"\ " \n"\ " \n"\ "\n"; void reset_char_data_buffer (); void process_char_data_buffer (); static bool grab_next_value; void start_element(void *data, const char *element, const char **attribute) { process_char_data_buffer(); reset_char_data_buffer(); if ( strcmp("item", element) == 0 ) { size_t matched = 0; for (size_t i = 0; attribute[i]; i += 2) { if ( ( strcmp("name", attribute[i]) == 0 ) && ( strcmp("frame", attribute[i+1]) == 0 ) ) ++matched; if ( ( strcmp("type", attribute[i]) == 0 ) && ( strcmp("int16", attribute[i+1]) == 0 ) ) ++matched; } if (matched == 2) { printf("this is the element you are looking for\n"); grab_next_value = true; } } } void end_element(void *data, const char *el) { process_char_data_buffer(); reset_char_data_buffer(); } static char char_data_buffer[1024]; static size_t offs; static bool overflow; void reset_char_data_buffer (void) { offs = 0; overflow = false; grab_next_value = false; } // pastes parts of the node together void char_data (void *userData, const XML_Char *s, int len) { if (!overflow) { if (len + offs >= sizeof(char_data_buffer) ) { overflow = true; } else { memcpy(char_data_buffer + offs, s, len); offs += len; } } } // if the element is the one we're after, convert the character data to // an integer value void process_char_data_buffer (void) { if (offs > 0) { char_data_buffer[ offs ] = '\0'; printf("character data: %s\n", char_data_buffer); if ( grab_next_value ) { int value = atoi( char_data_buffer ); printf("the value is %d\n", value); } } } int main (void ) { XML_Parser parser = XML_ParserCreate(NULL); XML_SetElementHandler(parser, start_element, end_element); XML_SetCharacterDataHandler(parser, char_data); reset_char_data_buffer(); if (XML_Parse(parser, xml, strlen(xml), XML_TRUE) == XML_STATUS_ERROR) printf("Error: %s\n", XML_ErrorString(XML_GetErrorCode(parser))); XML_ParserFree(parser); return 0; }- 16
\n"\ "- 3843747
\n"\ "- 0
\n"\ "- 20
\n"\ "