/* * Claws Mail -- a GTK based, lightweight, and fast e-mail client * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ #ifdef HAVE_CONFIG_H # include "config.h" #include "claws-features.h" #endif #include #include #include #include #include "xml.h" #include "utils.h" #include "codeconv.h" #include "file-utils.h" #define SPARSE_MEMORY /* if this is defined all attr.names and tag.names are stored * in a hash table */ #if defined(SPARSE_MEMORY) #include "stringtable.h" static StringTable *xml_string_table; static XMLTag *xml_copy_tag (XMLTag *tag); static XMLAttr *xml_copy_attr (XMLAttr *attr); static void xml_free_node (XMLNode *node); static void xml_free_tag (XMLTag *tag); static void xml_pop_tag (XMLFile *file); static void xml_push_tag (XMLFile *file, XMLTag *tag); static gint xml_read_line (XMLFile *file); static void xml_truncate_buf (XMLFile *file); static gint xml_unescape_str (gchar *str); static void xml_string_table_create(void) { if (xml_string_table == NULL) xml_string_table = string_table_new(); } #define XML_STRING_ADD(str) \ string_table_insert_string(xml_string_table, (str)) #define XML_STRING_FREE(str) \ string_table_free_string(xml_string_table, (str)) #define XML_STRING_TABLE_CREATE() \ xml_string_table_create() #else /* !SPARSE_MEMORY */ #define XML_STRING_ADD(str) \ g_strdup(str) #define XML_STRING_FREE(str) \ g_free(str) #define XML_STRING_TABLE_CREATE() #endif /* SPARSE_MEMORY */ static gint xml_get_parenthesis (XMLFile *file, gchar *buf, gint len); XMLFile *xml_open_file(const gchar *path) { XMLFile *newfile; cm_return_val_if_fail(path != NULL, NULL); newfile = g_new(XMLFile, 1); newfile->fp = claws_fopen(path, "rb"); if (!newfile->fp) { FILE_OP_ERROR(path, "fopen"); g_free(newfile); return NULL; } XML_STRING_TABLE_CREATE(); newfile->buf = g_string_new(NULL); newfile->bufp = newfile->buf->str; newfile->dtd = NULL; newfile->encoding = NULL; newfile->tag_stack = NULL; newfile->level = 0; newfile->is_empty_element = FALSE; newfile->path = g_strdup(path); return newfile; } void xml_close_file(XMLFile *file) { cm_return_if_fail(file != NULL); if (file->fp) claws_fclose(file->fp); g_string_free(file->buf, TRUE); g_free(file->dtd); g_free(file->encoding); g_free(file->path); while (file->tag_stack != NULL) xml_pop_tag(file); g_free(file); } static GNode *xml_build_tree(XMLFile *file, GNode *parent, guint level) { GNode *node = NULL; XMLNode *xmlnode; XMLTag *tag; while (xml_parse_next_tag(file) == 0) { if (file->level < level) break; if (file->level == level) { g_warning("xml_build_tree(): parse error in %s", file->path); break; } tag = xml_get_current_tag(file); if (!tag) break; xmlnode = xml_node_new(xml_copy_tag(tag), NULL); xmlnode->element = xml_get_element(file); if (!parent) node = g_node_new(xmlnode); else node = g_node_append_data(parent, xmlnode); xml_build_tree(file, node, file->level); if (file->level == 0) break; } return node; } GNode *xml_parse_file(const gchar *path) { XMLFile *file; GNode *node; file = xml_open_file(path); if (file == NULL) return NULL; xml_get_dtd(file); node = xml_build_tree(file, NULL, file->level); xml_close_file(file); #if defined(SPARSE_MEMORY) if (debug_get_mode()) string_table_get_stats(xml_string_table); #endif return node; } gint xml_get_dtd(XMLFile *file) { gchar buf[XMLBUFSIZE]; gchar *bufp = buf; if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) return -1; if ((*bufp++ == '?') && (bufp = strcasestr(bufp, "xml")) && (bufp = strcasestr(bufp + 3, "version")) && (bufp = strchr(bufp + 7, '?'))) { file->dtd = g_strdup(buf); if ((bufp = strcasestr(buf, "encoding=\""))) { bufp += 9; extract_quote(bufp, '"'); file->encoding = g_strdup(bufp); file->need_codeconv = g_strcmp0(bufp, CS_INTERNAL); } else { file->encoding = g_strdup(CS_INTERNAL); file->need_codeconv = FALSE; } } else { g_warning("can't get XML DTD in %s", file->path); return -1; } return 0; } gint xml_parse_next_tag(XMLFile *file) { gchar buf[XMLBUFSIZE]; gchar *bufp = buf; gchar *tag_str; XMLTag *tag; gint len; next: if (file->is_empty_element == TRUE) { file->is_empty_element = FALSE; xml_pop_tag(file); return 0; } if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) { g_warning("xml_parse_next_tag(): can't parse next tag in %s", file->path); return -1; } len = strlen(buf); /* end-tag */ if (buf[0] == '/') { if (strcmp(xml_get_current_tag(file)->tag, buf + 1) != 0) { g_warning("xml_parse_next_tag(): tag name mismatch in %s : %s (%s)", file->path, buf, xml_get_current_tag(file)->tag); return -1; } xml_pop_tag(file); return 0; } if (len >= 7 && !strncmp(buf, "!-- ", 4) && !strncmp(buf+len-3, " --", 3)) { /* skip comment */ goto next; } tag = xml_tag_new(NULL); xml_push_tag(file, tag); if (len > 0 && buf[len - 1] == '/') { file->is_empty_element = TRUE; buf[len - 1] = '\0'; g_strchomp(buf); } if (strlen(buf) == 0) { g_warning("xml_parse_next_tag(): tag name is empty in %s", file->path); return -1; } while (*bufp != '\0' && !g_ascii_isspace(*bufp)) bufp++; if (*bufp == '\0') { if (file->need_codeconv) { tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL); if (tag_str) { tag->tag = XML_STRING_ADD(tag_str); g_free(tag_str); } else tag->tag = XML_STRING_ADD(buf); } else tag->tag = XML_STRING_ADD(buf); return 0; } else { *bufp++ = '\0'; if (file->need_codeconv) { tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL); if (tag_str) { tag->tag = XML_STRING_ADD(tag_str); g_free(tag_str); } else tag->tag = XML_STRING_ADD(buf); } else tag->tag = XML_STRING_ADD(buf); } /* parse attributes ( name=value ) */ while (*bufp) { XMLAttr *attr; gchar *attr_name; gchar *attr_value; gchar *utf8_attr_name; gchar *utf8_attr_value; gchar *p; gchar quote; while (g_ascii_isspace(*bufp)) bufp++; attr_name = bufp; if ((p = strchr(attr_name, '=')) == NULL) { g_warning("xml_parse_next_tag(): syntax error in %s, tag (a) %s", file->path, attr_name); return -1; } bufp = p; *bufp++ = '\0'; while (g_ascii_isspace(*bufp)) bufp++; if (*bufp != '"' && *bufp != '\'') { g_warning("xml_parse_next_tag(): syntax error in %s, tag (b) %s", file->path, bufp); return -1; } quote = *bufp; bufp++; attr_value = bufp; if ((p = strchr(attr_value, quote)) == NULL) { g_warning("xml_parse_next_tag(): syntax error in %s, tag (c) %s", file->path, attr_value); return -1; } bufp = p; *bufp++ = '\0'; g_strchomp(attr_name); xml_unescape_str(attr_value); if (file->need_codeconv) { utf8_attr_name = conv_codeset_strdup (attr_name, file->encoding, CS_INTERNAL); utf8_attr_value = conv_codeset_strdup (attr_value, file->encoding, CS_INTERNAL); if (!utf8_attr_name) utf8_attr_name = g_strdup(attr_name); if (!utf8_attr_value) utf8_attr_value = g_strdup(attr_value); attr = xml_attr_new(utf8_attr_name, utf8_attr_value); g_free(utf8_attr_value); g_free(utf8_attr_name); } else { attr = xml_attr_new(attr_name, attr_value); } xml_tag_add_attr(tag, attr); } tag->attr = g_list_reverse(tag->attr); return 0; } static void xml_push_tag(XMLFile *file, XMLTag *tag) { cm_return_if_fail(tag != NULL); file->tag_stack = g_list_prepend(file->tag_stack, tag); file->level++; } static void xml_pop_tag(XMLFile *file) { XMLTag *tag; if (!file->tag_stack) return; tag = (XMLTag *)file->tag_stack->data; file->tag_stack = g_list_remove(file->tag_stack, tag); xml_free_tag(tag); file->level--; } XMLTag *xml_get_current_tag(XMLFile *file) { if (file->tag_stack) return (XMLTag *)file->tag_stack->data; else return NULL; } GList *xml_get_current_tag_attr(XMLFile *file) { XMLTag *tag; tag = xml_get_current_tag(file); if (!tag) return NULL; return tag->attr; } gchar *xml_get_element(XMLFile *file) { gchar *str; gchar *new_str; gchar *end; while ((end = strchr(file->bufp, '<')) == NULL) if (xml_read_line(file) < 0) return NULL; if (end == file->bufp) return NULL; str = g_strndup(file->bufp, end - file->bufp); /* this is not XML1.0 strict */ g_strstrip(str); xml_unescape_str(str); file->bufp = end; xml_truncate_buf(file); if (str[0] == '\0') { g_free(str); return NULL; } if (!file->need_codeconv) return str; new_str = conv_codeset_strdup(str, file->encoding, CS_INTERNAL); if (!new_str) new_str = g_strdup(str); g_free(str); return new_str; } static gint xml_read_line(XMLFile *file) { gchar buf[XMLBUFSIZE]; gint index; if (claws_fgets(buf, sizeof(buf), file->fp) == NULL) return -1; index = file->bufp - file->buf->str; g_string_append(file->buf, buf); file->bufp = file->buf->str + index; return 0; } static void xml_truncate_buf(XMLFile *file) { gint len; len = file->bufp - file->buf->str; if (len > 0) { g_string_erase(file->buf, 0, len); file->bufp = file->buf->str; } } gboolean xml_compare_tag(XMLFile *file, const gchar *name) { XMLTag *tag; tag = xml_get_current_tag(file); if (tag && strcmp(tag->tag, name) == 0) return TRUE; else return FALSE; } XMLNode *xml_node_new(XMLTag *tag, const gchar *text) { XMLNode *node; node = g_new(XMLNode, 1); node->tag = tag; node->element = g_strdup(text); return node; } XMLTag *xml_tag_new(const gchar *tag) { XMLTag *new_tag; new_tag = g_new(XMLTag, 1); if (tag) new_tag->tag = XML_STRING_ADD(tag); else new_tag->tag = NULL; new_tag->attr = NULL; return new_tag; } XMLAttr *xml_attr_new(const gchar *name, const gchar *value) { XMLAttr *new_attr; new_attr = g_new(XMLAttr, 1); new_attr->name = XML_STRING_ADD(name); new_attr->value = g_strdup(value); return new_attr; } XMLAttr *xml_attr_new_int(const gchar *name, const gint value) { XMLAttr *new_attr; gchar *valuestr; valuestr = g_strdup_printf("%d", value); new_attr = g_new(XMLAttr, 1); new_attr->name = XML_STRING_ADD(name); new_attr->value = valuestr; return new_attr; } void xml_tag_add_attr(XMLTag *tag, XMLAttr *attr) { tag->attr = g_list_prepend(tag->attr, attr); } static XMLTag *xml_copy_tag(XMLTag *tag) { XMLTag *new_tag; XMLAttr *attr; GList *list; new_tag = xml_tag_new(tag->tag); for (list = tag->attr; list != NULL; list = list->next) { attr = xml_copy_attr((XMLAttr *)list->data); xml_tag_add_attr(new_tag, attr); } tag->attr = g_list_reverse(tag->attr); return new_tag; } static XMLAttr *xml_copy_attr(XMLAttr *attr) { return xml_attr_new(attr->name, attr->value); } static gint xml_unescape_str(gchar *str) { gchar *start; gchar *end; gchar *p = str; gchar *esc_str; gchar ch; gint len; while ((start = strchr(p, '&')) != NULL) { if ((end = strchr(start + 1, ';')) == NULL) { g_warning("unescaped '&' appeared"); p = start + 1; continue; } len = end - start + 1; if (len < 3) { p = end + 1; continue; } Xstrndup_a(esc_str, start, len, return -1); if (!strcmp(esc_str, "<")) ch = '<'; else if (!strcmp(esc_str, ">")) ch = '>'; else if (!strcmp(esc_str, "&")) ch = '&'; else if (!strcmp(esc_str, "'")) ch = '\''; else if (!strcmp(esc_str, """)) ch = '\"'; else { p = end + 1; continue; } *start = ch; memmove(start + 1, end + 1, strlen(end + 1) + 1); p = start + 1; } return 0; } gint xml_file_put_escape_str(FILE *fp, const gchar *str) { const gchar *p; int result = 0; cm_return_val_if_fail(fp != NULL, -1); if (!str) return 0; for (p = str; *p != '\0'; p++) { switch (*p) { case '<': result = claws_fputs("<", fp); break; case '>': result = claws_fputs(">", fp); break; case '&': result = claws_fputs("&", fp); break; case '\'': result = claws_fputs("'", fp); break; case '\"': result = claws_fputs(""", fp); break; default: result = claws_fputc(*p, fp); } } return (result == EOF ? -1 : 0); } gint xml_file_put_xml_decl(FILE *fp) { cm_return_val_if_fail(fp != NULL, -1); XML_STRING_TABLE_CREATE(); return fprintf(fp, "\n", CS_INTERNAL); } static void xml_free_node(XMLNode *node) { if (!node) return; xml_free_tag(node->tag); g_free(node->element); g_free(node); } static gboolean xml_free_func(GNode *node, gpointer data) { XMLNode *xmlnode = node->data; xml_free_node(xmlnode); return FALSE; } void xml_free_tree(GNode *node) { cm_return_if_fail(node != NULL); g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, xml_free_func, NULL); g_node_destroy(node); } static void xml_free_tag(XMLTag *tag) { if (!tag) return; XML_STRING_FREE(tag->tag); while (tag->attr != NULL) { XMLAttr *attr = (XMLAttr *)tag->attr->data; tag->attr = g_list_remove(tag->attr, tag->attr->data); XML_STRING_FREE(attr->name); g_free(attr->value); /* __not__ XML_STRING_FREE */ g_free(attr); } g_free(tag); } static gint xml_get_parenthesis(XMLFile *file, gchar *buf, gint len) { gchar *start; gchar *end; buf[0] = '\0'; while ((start = strchr(file->bufp, '<')) == NULL) if (xml_read_line(file) < 0) return -1; start++; file->bufp = start; while ((end = strchr(file->bufp, '>')) == NULL) if (xml_read_line(file) < 0) return -1; strncpy2(buf, file->bufp, MIN(end - file->bufp + 1, len)); g_strstrip(buf); file->bufp = end + 1; xml_truncate_buf(file); return 0; } #define TRY(func) \ if (!(func)) \ { \ g_warning("failed to write part of XML tree"); \ return -1; \ } \ static int xml_write_tree_recursive(GNode *node, FILE *fp) { gint i, depth; XMLTag *tag; GList *cur; cm_return_val_if_fail(node != NULL, -1); cm_return_val_if_fail(fp != NULL, -1); depth = g_node_depth(node) - 1; for (i = 0; i < depth; i++) TRY(claws_fputs(" ", fp) != EOF); tag = ((XMLNode *) node->data)->tag; TRY(fprintf(fp, "<%s", tag->tag) > 0); for (cur = tag->attr; cur != NULL; cur = g_list_next(cur)) { XMLAttr *attr = (XMLAttr *) cur->data; TRY(fprintf(fp, " %s=\"", attr->name) > 0); TRY(xml_file_put_escape_str(fp, attr->value) == 0); TRY(claws_fputs("\"", fp) != EOF); } if (node->children) { GNode *child; TRY(claws_fputs(">\n", fp) != EOF); child = node->children; while (child) { GNode *cur; cur = child; child = cur->next; TRY(xml_write_tree_recursive(cur, fp) == 0); } for (i = 0; i < depth; i++) TRY(claws_fputs(" ", fp) != EOF); TRY(fprintf(fp, "\n", tag->tag) > 0); } else TRY(claws_fputs(" />\n", fp) != EOF); return 0; } #undef TRY int xml_write_tree(GNode *node, FILE *fp) { return xml_write_tree_recursive(node, fp); } static gpointer copy_node_func(gpointer nodedata, gpointer data) { XMLNode *xmlnode = (XMLNode *) nodedata; XMLNode *newxmlnode; newxmlnode = g_new0(XMLNode, 1); newxmlnode->tag = xml_copy_tag(xmlnode->tag); newxmlnode->element = g_strdup(xmlnode->element); return newxmlnode; } GNode *xml_copy_tree(GNode *node) { return g_node_map(node, copy_node_func, NULL); }