diff options
author | wawaka <wawaka@yandex-team.ru> | 2022-02-10 16:47:48 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:48 +0300 |
commit | ed524783c88c81047033c5d6e5543db3a2251ad5 (patch) | |
tree | 5c595c0ac1b14fbb70e7e71df44b52e47f850387 /contrib/libs/libxml/HTMLtree.c | |
parent | 11ec0273ab97c87692cd0004865c7f24d14f9902 (diff) | |
download | ydb-ed524783c88c81047033c5d6e5543db3a2251ad5.tar.gz |
Restoring authorship annotation for <wawaka@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/libxml/HTMLtree.c')
-rw-r--r-- | contrib/libs/libxml/HTMLtree.c | 2400 |
1 files changed, 1200 insertions, 1200 deletions
diff --git a/contrib/libs/libxml/HTMLtree.c b/contrib/libs/libxml/HTMLtree.c index db63b371f0..d457c42cbd 100644 --- a/contrib/libs/libxml/HTMLtree.c +++ b/contrib/libs/libxml/HTMLtree.c @@ -1,507 +1,507 @@ -/* - * HTMLtree.c : implementation of access function for an HTML tree. - * - * See Copyright for the status of this software. - * - * daniel@veillard.com - */ - - -#define IN_LIBXML -#include "libxml.h" -#ifdef LIBXML_HTML_ENABLED - -#include <string.h> /* for memset() only ! */ - -#ifdef HAVE_CTYPE_H -#include <ctype.h> -#endif -#ifdef HAVE_STDLIB_H -#include <stdlib.h> -#endif - -#include <libxml/xmlmemory.h> -#include <libxml/HTMLparser.h> -#include <libxml/HTMLtree.h> -#include <libxml/entities.h> -#include <libxml/valid.h> -#include <libxml/xmlerror.h> -#include <libxml/parserInternals.h> -#include <libxml/globals.h> -#include <libxml/uri.h> - -#include "buf.h" - -/************************************************************************ - * * - * Getting/Setting encoding meta tags * - * * - ************************************************************************/ - -/** - * htmlGetMetaEncoding: - * @doc: the document - * - * Encoding definition lookup in the Meta tags - * - * Returns the current encoding as flagged in the HTML source - */ -const xmlChar * -htmlGetMetaEncoding(htmlDocPtr doc) { - htmlNodePtr cur; - const xmlChar *content; - const xmlChar *encoding; - - if (doc == NULL) - return(NULL); - cur = doc->children; - - /* - * Search the html - */ - while (cur != NULL) { - if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { - if (xmlStrEqual(cur->name, BAD_CAST"html")) - break; - if (xmlStrEqual(cur->name, BAD_CAST"head")) - goto found_head; - if (xmlStrEqual(cur->name, BAD_CAST"meta")) - goto found_meta; - } - cur = cur->next; - } - if (cur == NULL) - return(NULL); - cur = cur->children; - - /* - * Search the head - */ - while (cur != NULL) { - if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { - if (xmlStrEqual(cur->name, BAD_CAST"head")) - break; - if (xmlStrEqual(cur->name, BAD_CAST"meta")) - goto found_meta; - } - cur = cur->next; - } - if (cur == NULL) - return(NULL); -found_head: - cur = cur->children; - - /* - * Search the meta elements - */ -found_meta: - while (cur != NULL) { - if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { - if (xmlStrEqual(cur->name, BAD_CAST"meta")) { - xmlAttrPtr attr = cur->properties; - int http; - const xmlChar *value; - - content = NULL; - http = 0; - while (attr != NULL) { - if ((attr->children != NULL) && - (attr->children->type == XML_TEXT_NODE) && - (attr->children->next == NULL)) { - value = attr->children->content; - if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) - && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) - http = 1; - else if ((value != NULL) - && (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) - content = value; - if ((http != 0) && (content != NULL)) - goto found_content; - } - attr = attr->next; - } - } - } - cur = cur->next; - } - return(NULL); - -found_content: - encoding = xmlStrstr(content, BAD_CAST"charset="); - if (encoding == NULL) - encoding = xmlStrstr(content, BAD_CAST"Charset="); - if (encoding == NULL) - encoding = xmlStrstr(content, BAD_CAST"CHARSET="); - if (encoding != NULL) { - encoding += 8; - } else { - encoding = xmlStrstr(content, BAD_CAST"charset ="); - if (encoding == NULL) - encoding = xmlStrstr(content, BAD_CAST"Charset ="); - if (encoding == NULL) - encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); - if (encoding != NULL) - encoding += 9; - } - if (encoding != NULL) { - while ((*encoding == ' ') || (*encoding == '\t')) encoding++; - } - return(encoding); -} - -/** - * htmlSetMetaEncoding: - * @doc: the document - * @encoding: the encoding string - * - * Sets the current encoding in the Meta tags - * NOTE: this will not change the document content encoding, just - * the META flag associated. - * - * Returns 0 in case of success and -1 in case of error - */ -int -htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { - htmlNodePtr cur, meta = NULL, head = NULL; - const xmlChar *content = NULL; - char newcontent[100]; - - newcontent[0] = 0; - - if (doc == NULL) - return(-1); - - /* html isn't a real encoding it's just libxml2 way to get entities */ - if (!xmlStrcasecmp(encoding, BAD_CAST "html")) - return(-1); - - if (encoding != NULL) { - snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", - (char *)encoding); - newcontent[sizeof(newcontent) - 1] = 0; - } - - cur = doc->children; - - /* - * Search the html - */ - while (cur != NULL) { - if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { - if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0) - break; - if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) - goto found_head; - if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) - goto found_meta; - } - cur = cur->next; - } - if (cur == NULL) - return(-1); - cur = cur->children; - - /* - * Search the head - */ - while (cur != NULL) { - if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { - if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) - break; - if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { - head = cur->parent; - goto found_meta; - } - } - cur = cur->next; - } - if (cur == NULL) - return(-1); -found_head: - head = cur; - if (cur->children == NULL) - goto create; - cur = cur->children; - -found_meta: - /* - * Search and update all the remaining the meta elements carrying - * encoding informations - */ - while (cur != NULL) { - if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { - if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { - xmlAttrPtr attr = cur->properties; - int http; - const xmlChar *value; - - content = NULL; - http = 0; - while (attr != NULL) { - if ((attr->children != NULL) && - (attr->children->type == XML_TEXT_NODE) && - (attr->children->next == NULL)) { - value = attr->children->content; - if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) - && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) - http = 1; - else - { - if ((value != NULL) && - (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) - content = value; - } - if ((http != 0) && (content != NULL)) - break; - } - attr = attr->next; - } - if ((http != 0) && (content != NULL)) { - meta = cur; - break; - } - - } - } - cur = cur->next; - } -create: - if (meta == NULL) { - if ((encoding != NULL) && (head != NULL)) { - /* - * Create a new Meta element with the right attributes - */ - - meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); - if (head->children == NULL) - xmlAddChild(head, meta); - else - xmlAddPrevSibling(head->children, meta); - xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); - xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); - } - } else { - /* remove the meta tag if NULL is passed */ - if (encoding == NULL) { - xmlUnlinkNode(meta); - xmlFreeNode(meta); - } - /* change the document only if there is a real encoding change */ - else if (xmlStrcasestr(content, encoding) == NULL) { - xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent); - } - } - - - return(0); -} - -/** - * booleanHTMLAttrs: - * - * These are the HTML attributes which will be output - * in minimized form, i.e. <option selected="selected"> will be - * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method" - * - */ -static const char* htmlBooleanAttrs[] = { - "checked", "compact", "declare", "defer", "disabled", "ismap", - "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", - "selected", NULL -}; - - -/** - * htmlIsBooleanAttr: - * @name: the name of the attribute to check - * - * Determine if a given attribute is a boolean attribute. - * - * returns: false if the attribute is not boolean, true otherwise. - */ -int -htmlIsBooleanAttr(const xmlChar *name) -{ - int i = 0; - - while (htmlBooleanAttrs[i] != NULL) { - if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0) - return 1; - i++; - } - return 0; -} - -#ifdef LIBXML_OUTPUT_ENABLED -/* - * private routine exported from xmlIO.c - */ -xmlOutputBufferPtr -xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); -/************************************************************************ - * * - * Output error handlers * - * * - ************************************************************************/ -/** - * htmlSaveErrMemory: - * @extra: extra informations - * - * Handle an out of memory condition - */ -static void -htmlSaveErrMemory(const char *extra) -{ - __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra); -} - -/** - * htmlSaveErr: - * @code: the error number - * @node: the location of the error. - * @extra: extra informations - * - * Handle an out of memory condition - */ -static void -htmlSaveErr(int code, xmlNodePtr node, const char *extra) -{ - const char *msg = NULL; - - switch(code) { - case XML_SAVE_NOT_UTF8: - msg = "string is not in UTF-8\n"; - break; - case XML_SAVE_CHAR_INVALID: - msg = "invalid character value\n"; - break; - case XML_SAVE_UNKNOWN_ENCODING: - msg = "unknown encoding %s\n"; - break; - case XML_SAVE_NO_DOCTYPE: - msg = "HTML has no DOCTYPE\n"; - break; - default: - msg = "unexpected error number\n"; - } - __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra); -} - -/************************************************************************ - * * - * Dumping HTML tree content to a simple buffer * - * * - ************************************************************************/ - -/** - * htmlBufNodeDumpFormat: - * @buf: the xmlBufPtr output - * @doc: the document - * @cur: the current node - * @format: should formatting spaces been added - * - * Dump an HTML node, recursive behaviour,children are printed too. - * - * Returns the number of byte written or -1 in case of error - */ -static size_t -htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, - int format) { - size_t use; - int ret; - xmlOutputBufferPtr outbuf; - - if (cur == NULL) { - return (-1); - } - if (buf == NULL) { - return (-1); - } - outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); - if (outbuf == NULL) { - htmlSaveErrMemory("allocating HTML output buffer"); - return (-1); - } - memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); - outbuf->buffer = buf; - outbuf->encoder = NULL; - outbuf->writecallback = NULL; - outbuf->closecallback = NULL; - outbuf->context = NULL; - outbuf->written = 0; - - use = xmlBufUse(buf); - htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); - xmlFree(outbuf); - ret = xmlBufUse(buf) - use; - return (ret); -} - -/** - * htmlNodeDump: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the current node - * - * Dump an HTML node, recursive behaviour,children are printed too, - * and formatting returns are added. - * - * Returns the number of byte written or -1 in case of error - */ -int -htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { - xmlBufPtr buffer; - size_t ret; - - if ((buf == NULL) || (cur == NULL)) - return(-1); - - xmlInitParser(); - buffer = xmlBufFromBuffer(buf); - if (buffer == NULL) - return(-1); - - ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1); - - xmlBufBackToBuffer(buffer); - - if (ret > INT_MAX) - return(-1); - return((int) ret); -} - -/** - * htmlNodeDumpFileFormat: - * @out: the FILE pointer - * @doc: the document - * @cur: the current node - * @encoding: the document encoding - * @format: should formatting spaces been added - * - * Dump an HTML node, recursive behaviour,children are printed too. - * - * TODO: if encoding == NULL try to save in the doc encoding - * - * returns: the number of byte written or -1 in case of failure. - */ -int -htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, - xmlNodePtr cur, const char *encoding, int format) { - xmlOutputBufferPtr buf; - xmlCharEncodingHandlerPtr handler = NULL; - int ret; - - xmlInitParser(); - - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); - if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - } +/* + * HTMLtree.c : implementation of access function for an HTML tree. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + + +#define IN_LIBXML +#include "libxml.h" +#ifdef LIBXML_HTML_ENABLED + +#include <string.h> /* for memset() only ! */ + +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/HTMLparser.h> +#include <libxml/HTMLtree.h> +#include <libxml/entities.h> +#include <libxml/valid.h> +#include <libxml/xmlerror.h> +#include <libxml/parserInternals.h> +#include <libxml/globals.h> +#include <libxml/uri.h> + +#include "buf.h" + +/************************************************************************ + * * + * Getting/Setting encoding meta tags * + * * + ************************************************************************/ + +/** + * htmlGetMetaEncoding: + * @doc: the document + * + * Encoding definition lookup in the Meta tags + * + * Returns the current encoding as flagged in the HTML source + */ +const xmlChar * +htmlGetMetaEncoding(htmlDocPtr doc) { + htmlNodePtr cur; + const xmlChar *content; + const xmlChar *encoding; + + if (doc == NULL) + return(NULL); + cur = doc->children; + + /* + * Search the html + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrEqual(cur->name, BAD_CAST"html")) + break; + if (xmlStrEqual(cur->name, BAD_CAST"head")) + goto found_head; + if (xmlStrEqual(cur->name, BAD_CAST"meta")) + goto found_meta; + } + cur = cur->next; + } + if (cur == NULL) + return(NULL); + cur = cur->children; + + /* + * Search the head + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrEqual(cur->name, BAD_CAST"head")) + break; + if (xmlStrEqual(cur->name, BAD_CAST"meta")) + goto found_meta; + } + cur = cur->next; + } + if (cur == NULL) + return(NULL); +found_head: + cur = cur->children; + + /* + * Search the meta elements + */ +found_meta: + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrEqual(cur->name, BAD_CAST"meta")) { + xmlAttrPtr attr = cur->properties; + int http; + const xmlChar *value; + + content = NULL; + http = 0; + while (attr != NULL) { + if ((attr->children != NULL) && + (attr->children->type == XML_TEXT_NODE) && + (attr->children->next == NULL)) { + value = attr->children->content; + if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) + && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) + http = 1; + else if ((value != NULL) + && (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) + content = value; + if ((http != 0) && (content != NULL)) + goto found_content; + } + attr = attr->next; + } + } + } + cur = cur->next; + } + return(NULL); + +found_content: + encoding = xmlStrstr(content, BAD_CAST"charset="); + if (encoding == NULL) + encoding = xmlStrstr(content, BAD_CAST"Charset="); + if (encoding == NULL) + encoding = xmlStrstr(content, BAD_CAST"CHARSET="); + if (encoding != NULL) { + encoding += 8; + } else { + encoding = xmlStrstr(content, BAD_CAST"charset ="); + if (encoding == NULL) + encoding = xmlStrstr(content, BAD_CAST"Charset ="); + if (encoding == NULL) + encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); + if (encoding != NULL) + encoding += 9; + } + if (encoding != NULL) { + while ((*encoding == ' ') || (*encoding == '\t')) encoding++; + } + return(encoding); +} + +/** + * htmlSetMetaEncoding: + * @doc: the document + * @encoding: the encoding string + * + * Sets the current encoding in the Meta tags + * NOTE: this will not change the document content encoding, just + * the META flag associated. + * + * Returns 0 in case of success and -1 in case of error + */ +int +htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { + htmlNodePtr cur, meta = NULL, head = NULL; + const xmlChar *content = NULL; + char newcontent[100]; + + newcontent[0] = 0; + + if (doc == NULL) + return(-1); + + /* html isn't a real encoding it's just libxml2 way to get entities */ + if (!xmlStrcasecmp(encoding, BAD_CAST "html")) + return(-1); + + if (encoding != NULL) { + snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", + (char *)encoding); + newcontent[sizeof(newcontent) - 1] = 0; + } + + cur = doc->children; + + /* + * Search the html + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0) + break; + if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) + goto found_head; + if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) + goto found_meta; + } + cur = cur->next; + } + if (cur == NULL) + return(-1); + cur = cur->children; + + /* + * Search the head + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) + break; + if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { + head = cur->parent; + goto found_meta; + } + } + cur = cur->next; + } + if (cur == NULL) + return(-1); +found_head: + head = cur; + if (cur->children == NULL) + goto create; + cur = cur->children; + +found_meta: + /* + * Search and update all the remaining the meta elements carrying + * encoding informations + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { + xmlAttrPtr attr = cur->properties; + int http; + const xmlChar *value; + + content = NULL; + http = 0; + while (attr != NULL) { + if ((attr->children != NULL) && + (attr->children->type == XML_TEXT_NODE) && + (attr->children->next == NULL)) { + value = attr->children->content; + if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) + && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) + http = 1; + else + { + if ((value != NULL) && + (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) + content = value; + } + if ((http != 0) && (content != NULL)) + break; + } + attr = attr->next; + } + if ((http != 0) && (content != NULL)) { + meta = cur; + break; + } + + } + } + cur = cur->next; + } +create: + if (meta == NULL) { + if ((encoding != NULL) && (head != NULL)) { + /* + * Create a new Meta element with the right attributes + */ + + meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); + if (head->children == NULL) + xmlAddChild(head, meta); + else + xmlAddPrevSibling(head->children, meta); + xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); + xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); + } + } else { + /* remove the meta tag if NULL is passed */ + if (encoding == NULL) { + xmlUnlinkNode(meta); + xmlFreeNode(meta); + } + /* change the document only if there is a real encoding change */ + else if (xmlStrcasestr(content, encoding) == NULL) { + xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent); + } + } + + + return(0); +} + +/** + * booleanHTMLAttrs: + * + * These are the HTML attributes which will be output + * in minimized form, i.e. <option selected="selected"> will be + * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method" + * + */ +static const char* htmlBooleanAttrs[] = { + "checked", "compact", "declare", "defer", "disabled", "ismap", + "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", + "selected", NULL +}; + + +/** + * htmlIsBooleanAttr: + * @name: the name of the attribute to check + * + * Determine if a given attribute is a boolean attribute. + * + * returns: false if the attribute is not boolean, true otherwise. + */ +int +htmlIsBooleanAttr(const xmlChar *name) +{ + int i = 0; + + while (htmlBooleanAttrs[i] != NULL) { + if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0) + return 1; + i++; + } + return 0; +} + +#ifdef LIBXML_OUTPUT_ENABLED +/* + * private routine exported from xmlIO.c + */ +xmlOutputBufferPtr +xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); +/************************************************************************ + * * + * Output error handlers * + * * + ************************************************************************/ +/** + * htmlSaveErrMemory: + * @extra: extra informations + * + * Handle an out of memory condition + */ +static void +htmlSaveErrMemory(const char *extra) +{ + __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra); +} + +/** + * htmlSaveErr: + * @code: the error number + * @node: the location of the error. + * @extra: extra informations + * + * Handle an out of memory condition + */ +static void +htmlSaveErr(int code, xmlNodePtr node, const char *extra) +{ + const char *msg = NULL; + + switch(code) { + case XML_SAVE_NOT_UTF8: + msg = "string is not in UTF-8\n"; + break; + case XML_SAVE_CHAR_INVALID: + msg = "invalid character value\n"; + break; + case XML_SAVE_UNKNOWN_ENCODING: + msg = "unknown encoding %s\n"; + break; + case XML_SAVE_NO_DOCTYPE: + msg = "HTML has no DOCTYPE\n"; + break; + default: + msg = "unexpected error number\n"; + } + __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra); +} + +/************************************************************************ + * * + * Dumping HTML tree content to a simple buffer * + * * + ************************************************************************/ + +/** + * htmlBufNodeDumpFormat: + * @buf: the xmlBufPtr output + * @doc: the document + * @cur: the current node + * @format: should formatting spaces been added + * + * Dump an HTML node, recursive behaviour,children are printed too. + * + * Returns the number of byte written or -1 in case of error + */ +static size_t +htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int format) { + size_t use; + int ret; + xmlOutputBufferPtr outbuf; + + if (cur == NULL) { + return (-1); + } + if (buf == NULL) { + return (-1); + } + outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); + if (outbuf == NULL) { + htmlSaveErrMemory("allocating HTML output buffer"); + return (-1); + } + memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); + outbuf->buffer = buf; + outbuf->encoder = NULL; + outbuf->writecallback = NULL; + outbuf->closecallback = NULL; + outbuf->context = NULL; + outbuf->written = 0; + + use = xmlBufUse(buf); + htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); + xmlFree(outbuf); + ret = xmlBufUse(buf) - use; + return (ret); +} + +/** + * htmlNodeDump: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node + * + * Dump an HTML node, recursive behaviour,children are printed too, + * and formatting returns are added. + * + * Returns the number of byte written or -1 in case of error + */ +int +htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { + xmlBufPtr buffer; + size_t ret; + + if ((buf == NULL) || (cur == NULL)) + return(-1); + + xmlInitParser(); + buffer = xmlBufFromBuffer(buf); + if (buffer == NULL) + return(-1); + + ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1); + + xmlBufBackToBuffer(buffer); + + if (ret > INT_MAX) + return(-1); + return((int) ret); +} + +/** + * htmlNodeDumpFileFormat: + * @out: the FILE pointer + * @doc: the document + * @cur: the current node + * @encoding: the document encoding + * @format: should formatting spaces been added + * + * Dump an HTML node, recursive behaviour,children are printed too. + * + * TODO: if encoding == NULL try to save in the doc encoding + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding, int format) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + int ret; + + xmlInitParser(); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != XML_CHAR_ENCODING_UTF8) { + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); + } } else { /* * Fallback to HTML or ASCII when the encoding is unspecified @@ -510,72 +510,72 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, handler = xmlFindCharEncodingHandler("HTML"); if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - } - - /* - * save the content to a temp buffer. - */ - buf = xmlOutputBufferCreateFile(out, handler); - if (buf == NULL) return(0); - - htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); - - ret = xmlOutputBufferClose(buf); - return(ret); -} - -/** - * htmlNodeDumpFile: - * @out: the FILE pointer - * @doc: the document - * @cur: the current node - * - * Dump an HTML node, recursive behaviour,children are printed too, - * and formatting returns are added. - */ -void -htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { - htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); -} - -/** - * htmlDocDumpMemoryFormat: - * @cur: the document - * @mem: OUT: the memory pointer - * @size: OUT: the memory length - * @format: should formatting spaces been added - * - * Dump an HTML document in memory and return the xmlChar * and it's size. - * It's up to the caller to free the memory. - */ -void -htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { - xmlOutputBufferPtr buf; - xmlCharEncodingHandlerPtr handler = NULL; - const char *encoding; - - xmlInitParser(); - - if ((mem == NULL) || (size == NULL)) - return; - if (cur == NULL) { - *mem = NULL; - *size = 0; - return; - } - - encoding = (const char *) htmlGetMetaEncoding(cur); - - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); + } + + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFile(out, handler); + if (buf == NULL) return(0); + + htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlNodeDumpFile: + * @out: the FILE pointer + * @doc: the document + * @cur: the current node + * + * Dump an HTML node, recursive behaviour,children are printed too, + * and formatting returns are added. + */ +void +htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { + htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); +} + +/** + * htmlDocDumpMemoryFormat: + * @cur: the document + * @mem: OUT: the memory pointer + * @size: OUT: the memory length + * @format: should formatting spaces been added + * + * Dump an HTML document in memory and return the xmlChar * and it's size. + * It's up to the caller to free the memory. + */ +void +htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + const char *encoding; + + xmlInitParser(); + + if ((mem == NULL) || (size == NULL)) + return; + if (cur == NULL) { + *mem = NULL; + *size = 0; + return; + } + + encoding = (const char *) htmlGetMetaEncoding(cur); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - - } + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); + + } } else { /* * Fallback to HTML or ASCII when the encoding is unspecified @@ -584,517 +584,517 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { handler = xmlFindCharEncodingHandler("HTML"); if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - } - - buf = xmlAllocOutputBufferInternal(handler); - if (buf == NULL) { - *mem = NULL; - *size = 0; - return; - } - - htmlDocContentDumpFormatOutput(buf, cur, NULL, format); - - xmlOutputBufferFlush(buf); - if (buf->conv != NULL) { - *size = xmlBufUse(buf->conv); - *mem = xmlStrndup(xmlBufContent(buf->conv), *size); - } else { - *size = xmlBufUse(buf->buffer); - *mem = xmlStrndup(xmlBufContent(buf->buffer), *size); - } - (void)xmlOutputBufferClose(buf); -} - -/** - * htmlDocDumpMemory: - * @cur: the document - * @mem: OUT: the memory pointer - * @size: OUT: the memory length - * - * Dump an HTML document in memory and return the xmlChar * and it's size. - * It's up to the caller to free the memory. - */ -void -htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { - htmlDocDumpMemoryFormat(cur, mem, size, 1); -} - - -/************************************************************************ - * * - * Dumping HTML tree content to an I/O output buffer * - * * - ************************************************************************/ - -void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); - -/** - * htmlDtdDumpOutput: - * @buf: the HTML buffer output - * @doc: the document - * @encoding: the encoding string - * - * TODO: check whether encoding is needed - * - * Dump the HTML document DTD, if any. - */ -static void -htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, - const char *encoding ATTRIBUTE_UNUSED) { - xmlDtdPtr cur = doc->intSubset; - - if (cur == NULL) { - htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL); - return; - } - xmlOutputBufferWriteString(buf, "<!DOCTYPE "); - xmlOutputBufferWriteString(buf, (const char *)cur->name); - if (cur->ExternalID != NULL) { - xmlOutputBufferWriteString(buf, " PUBLIC "); - xmlBufWriteQuotedString(buf->buffer, cur->ExternalID); - if (cur->SystemID != NULL) { - xmlOutputBufferWriteString(buf, " "); - xmlBufWriteQuotedString(buf->buffer, cur->SystemID); - } + } + + buf = xmlAllocOutputBufferInternal(handler); + if (buf == NULL) { + *mem = NULL; + *size = 0; + return; + } + + htmlDocContentDumpFormatOutput(buf, cur, NULL, format); + + xmlOutputBufferFlush(buf); + if (buf->conv != NULL) { + *size = xmlBufUse(buf->conv); + *mem = xmlStrndup(xmlBufContent(buf->conv), *size); + } else { + *size = xmlBufUse(buf->buffer); + *mem = xmlStrndup(xmlBufContent(buf->buffer), *size); + } + (void)xmlOutputBufferClose(buf); +} + +/** + * htmlDocDumpMemory: + * @cur: the document + * @mem: OUT: the memory pointer + * @size: OUT: the memory length + * + * Dump an HTML document in memory and return the xmlChar * and it's size. + * It's up to the caller to free the memory. + */ +void +htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { + htmlDocDumpMemoryFormat(cur, mem, size, 1); +} + + +/************************************************************************ + * * + * Dumping HTML tree content to an I/O output buffer * + * * + ************************************************************************/ + +void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); + +/** + * htmlDtdDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @encoding: the encoding string + * + * TODO: check whether encoding is needed + * + * Dump the HTML document DTD, if any. + */ +static void +htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + const char *encoding ATTRIBUTE_UNUSED) { + xmlDtdPtr cur = doc->intSubset; + + if (cur == NULL) { + htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL); + return; + } + xmlOutputBufferWriteString(buf, "<!DOCTYPE "); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->ExternalID != NULL) { + xmlOutputBufferWriteString(buf, " PUBLIC "); + xmlBufWriteQuotedString(buf->buffer, cur->ExternalID); + if (cur->SystemID != NULL) { + xmlOutputBufferWriteString(buf, " "); + xmlBufWriteQuotedString(buf->buffer, cur->SystemID); + } } else if (cur->SystemID != NULL && xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) { - xmlOutputBufferWriteString(buf, " SYSTEM "); - xmlBufWriteQuotedString(buf->buffer, cur->SystemID); - } - xmlOutputBufferWriteString(buf, ">\n"); -} - -/** - * htmlAttrDumpOutput: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the attribute pointer - * @encoding: the encoding string - * - * Dump an HTML attribute - */ -static void -htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, - const char *encoding ATTRIBUTE_UNUSED) { - xmlChar *value; - - /* - * The html output method should not escape a & character - * occurring in an attribute value immediately followed by - * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). - * This is implemented in xmlEncodeEntitiesReentrant - */ - - if (cur == NULL) { - return; - } - xmlOutputBufferWriteString(buf, " "); - if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWriteString(buf, ":"); - } - xmlOutputBufferWriteString(buf, (const char *)cur->name); - if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { - value = xmlNodeListGetString(doc, cur->children, 0); - if (value) { - xmlOutputBufferWriteString(buf, "="); - if ((cur->ns == NULL) && (cur->parent != NULL) && - (cur->parent->ns == NULL) && - ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || - (!xmlStrcasecmp(cur->name, BAD_CAST "action")) || - (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || - ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && - (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { - xmlChar *tmp = value; - /* xmlURIEscapeStr() escapes '"' so it can be safely used. */ - xmlBufCCat(buf->buffer, "\""); - - while (IS_BLANK_CH(*tmp)) tmp++; - - /* URI Escape everything, except server side includes. */ - for ( ; ; ) { - xmlChar *escaped; - xmlChar endChar; - xmlChar *end = NULL; - xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--"); - if (start != NULL) { - end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->"); - if (end != NULL) { - *start = '\0'; - } - } - - /* Escape the whole string, or until start (set to '\0'). */ - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); - if (escaped != NULL) { - xmlBufCat(buf->buffer, escaped); - xmlFree(escaped); - } else { - xmlBufCat(buf->buffer, tmp); - } - - if (end == NULL) { /* Everything has been written. */ - break; - } - - /* Do not escape anything within server side includes. */ - *start = '<'; /* Restore the first character of "<!--". */ - end += 3; /* strlen("-->") */ - endChar = *end; - *end = '\0'; - xmlBufCat(buf->buffer, start); - *end = endChar; - tmp = end; - } - - xmlBufCCat(buf->buffer, "\""); - } else { - xmlBufWriteQuotedString(buf->buffer, value); - } - xmlFree(value); - } else { - xmlOutputBufferWriteString(buf, "=\"\""); - } - } -} - -/** - * htmlAttrListDumpOutput: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the first attribute pointer - * @encoding: the encoding string - * - * Dump a list of HTML attributes - */ -static void -htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { - if (cur == NULL) { - return; - } - while (cur != NULL) { - htmlAttrDumpOutput(buf, doc, cur, encoding); - cur = cur->next; - } -} - - - -/** - * htmlNodeListDumpOutput: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the first node - * @encoding: the encoding string - * @format: should formatting spaces been added - * - * Dump an HTML node list, recursive behaviour,children are printed too. - */ -static void -htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, - xmlNodePtr cur, const char *encoding, int format) { - if (cur == NULL) { - return; - } - while (cur != NULL) { - htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); - cur = cur->next; - } -} - -/** - * htmlNodeDumpFormatOutput: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the current node - * @encoding: the encoding string - * @format: should formatting spaces been added - * - * Dump an HTML node, recursive behaviour,children are printed too. - */ -void -htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, - xmlNodePtr cur, const char *encoding, int format) { - const htmlElemDesc * info; - - xmlInitParser(); - - if ((cur == NULL) || (buf == NULL)) { - return; - } - /* - * Special cases. - */ - if (cur->type == XML_DTD_NODE) - return; - if ((cur->type == XML_HTML_DOCUMENT_NODE) || - (cur->type == XML_DOCUMENT_NODE)){ - htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); - return; - } - if (cur->type == XML_ATTRIBUTE_NODE) { - htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); - return; - } - if (cur->type == HTML_TEXT_NODE) { - if (cur->content != NULL) { - if (((cur->name == (const xmlChar *)xmlStringText) || - (cur->name != (const xmlChar *)xmlStringTextNoenc)) && - ((cur->parent == NULL) || - ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && - (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { - xmlChar *buffer; - - buffer = xmlEncodeEntitiesReentrant(doc, cur->content); - if (buffer != NULL) { - xmlOutputBufferWriteString(buf, (const char *)buffer); - xmlFree(buffer); - } - } else { - xmlOutputBufferWriteString(buf, (const char *)cur->content); - } - } - return; - } - if (cur->type == HTML_COMMENT_NODE) { - if (cur->content != NULL) { - xmlOutputBufferWriteString(buf, "<!--"); - xmlOutputBufferWriteString(buf, (const char *)cur->content); - xmlOutputBufferWriteString(buf, "-->"); - } - return; - } - if (cur->type == HTML_PI_NODE) { - if (cur->name == NULL) - return; - xmlOutputBufferWriteString(buf, "<?"); - xmlOutputBufferWriteString(buf, (const char *)cur->name); - if (cur->content != NULL) { - xmlOutputBufferWriteString(buf, " "); - xmlOutputBufferWriteString(buf, (const char *)cur->content); - } - xmlOutputBufferWriteString(buf, ">"); - return; - } - if (cur->type == HTML_ENTITY_REF_NODE) { - xmlOutputBufferWriteString(buf, "&"); - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWriteString(buf, ";"); - return; - } - if (cur->type == HTML_PRESERVE_NODE) { - if (cur->content != NULL) { - xmlOutputBufferWriteString(buf, (const char *)cur->content); - } - return; - } - - /* - * Get specific HTML info for that node. - */ - if (cur->ns == NULL) - info = htmlTagLookup(cur->name); - else - info = NULL; - - xmlOutputBufferWriteString(buf, "<"); - if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWriteString(buf, ":"); - } - xmlOutputBufferWriteString(buf, (const char *)cur->name); - if (cur->nsDef) - xmlNsListDumpOutput(buf, cur->nsDef); - if (cur->properties != NULL) - htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); - - if ((info != NULL) && (info->empty)) { - xmlOutputBufferWriteString(buf, ">"); - if ((format) && (!info->isinline) && (cur->next != NULL)) { - if ((cur->next->type != HTML_TEXT_NODE) && - (cur->next->type != HTML_ENTITY_REF_NODE) && - (cur->parent != NULL) && - (cur->parent->name != NULL) && - (cur->parent->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); - } - return; - } - if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && - (cur->children == NULL)) { - if ((info != NULL) && (info->saveEndTag != 0) && - (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && - (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { - xmlOutputBufferWriteString(buf, ">"); - } else { - xmlOutputBufferWriteString(buf, "></"); - if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWriteString(buf, ":"); - } - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWriteString(buf, ">"); - } - if ((format) && (cur->next != NULL) && - (info != NULL) && (!info->isinline)) { - if ((cur->next->type != HTML_TEXT_NODE) && - (cur->next->type != HTML_ENTITY_REF_NODE) && - (cur->parent != NULL) && - (cur->parent->name != NULL) && - (cur->parent->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); - } - return; - } - xmlOutputBufferWriteString(buf, ">"); - if ((cur->type != XML_ELEMENT_NODE) && - (cur->content != NULL)) { - /* - * Uses the OutputBuffer property to automatically convert - * invalids to charrefs - */ - - xmlOutputBufferWriteString(buf, (const char *) cur->content); - } - if (cur->children != NULL) { - if ((format) && (info != NULL) && (!info->isinline) && - (cur->children->type != HTML_TEXT_NODE) && - (cur->children->type != HTML_ENTITY_REF_NODE) && - (cur->children != cur->last) && - (cur->name != NULL) && - (cur->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); - htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format); - if ((format) && (info != NULL) && (!info->isinline) && - (cur->last->type != HTML_TEXT_NODE) && - (cur->last->type != HTML_ENTITY_REF_NODE) && - (cur->children != cur->last) && - (cur->name != NULL) && - (cur->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); - } - xmlOutputBufferWriteString(buf, "</"); - if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWriteString(buf, ":"); - } - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWriteString(buf, ">"); - if ((format) && (info != NULL) && (!info->isinline) && - (cur->next != NULL)) { - if ((cur->next->type != HTML_TEXT_NODE) && - (cur->next->type != HTML_ENTITY_REF_NODE) && - (cur->parent != NULL) && - (cur->parent->name != NULL) && - (cur->parent->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); - } -} - -/** - * htmlNodeDumpOutput: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the current node - * @encoding: the encoding string - * - * Dump an HTML node, recursive behaviour,children are printed too, - * and formatting returns/spaces are added. - */ -void -htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, - xmlNodePtr cur, const char *encoding) { - htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1); -} - -/** - * htmlDocContentDumpFormatOutput: - * @buf: the HTML buffer output - * @cur: the document - * @encoding: the encoding string - * @format: should formatting spaces been added - * - * Dump an HTML document. - */ -void -htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, - const char *encoding, int format) { - int type; - - xmlInitParser(); - - if ((buf == NULL) || (cur == NULL)) - return; - - /* - * force to output the stuff as HTML, especially for entities - */ - type = cur->type; - cur->type = XML_HTML_DOCUMENT_NODE; - if (cur->intSubset != NULL) { - htmlDtdDumpOutput(buf, cur, NULL); - } - if (cur->children != NULL) { - htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format); - } - xmlOutputBufferWriteString(buf, "\n"); - cur->type = (xmlElementType) type; -} - -/** - * htmlDocContentDumpOutput: - * @buf: the HTML buffer output - * @cur: the document - * @encoding: the encoding string - * + xmlOutputBufferWriteString(buf, " SYSTEM "); + xmlBufWriteQuotedString(buf->buffer, cur->SystemID); + } + xmlOutputBufferWriteString(buf, ">\n"); +} + +/** + * htmlAttrDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the attribute pointer + * @encoding: the encoding string + * + * Dump an HTML attribute + */ +static void +htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, + const char *encoding ATTRIBUTE_UNUSED) { + xmlChar *value; + + /* + * The html output method should not escape a & character + * occurring in an attribute value immediately followed by + * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + * This is implemented in xmlEncodeEntitiesReentrant + */ + + if (cur == NULL) { + return; + } + xmlOutputBufferWriteString(buf, " "); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { + value = xmlNodeListGetString(doc, cur->children, 0); + if (value) { + xmlOutputBufferWriteString(buf, "="); + if ((cur->ns == NULL) && (cur->parent != NULL) && + (cur->parent->ns == NULL) && + ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || + (!xmlStrcasecmp(cur->name, BAD_CAST "action")) || + (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || + ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && + (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { + xmlChar *tmp = value; + /* xmlURIEscapeStr() escapes '"' so it can be safely used. */ + xmlBufCCat(buf->buffer, "\""); + + while (IS_BLANK_CH(*tmp)) tmp++; + + /* URI Escape everything, except server side includes. */ + for ( ; ; ) { + xmlChar *escaped; + xmlChar endChar; + xmlChar *end = NULL; + xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--"); + if (start != NULL) { + end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->"); + if (end != NULL) { + *start = '\0'; + } + } + + /* Escape the whole string, or until start (set to '\0'). */ + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); + if (escaped != NULL) { + xmlBufCat(buf->buffer, escaped); + xmlFree(escaped); + } else { + xmlBufCat(buf->buffer, tmp); + } + + if (end == NULL) { /* Everything has been written. */ + break; + } + + /* Do not escape anything within server side includes. */ + *start = '<'; /* Restore the first character of "<!--". */ + end += 3; /* strlen("-->") */ + endChar = *end; + *end = '\0'; + xmlBufCat(buf->buffer, start); + *end = endChar; + tmp = end; + } + + xmlBufCCat(buf->buffer, "\""); + } else { + xmlBufWriteQuotedString(buf->buffer, value); + } + xmlFree(value); + } else { + xmlOutputBufferWriteString(buf, "=\"\""); + } + } +} + +/** + * htmlAttrListDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the first attribute pointer + * @encoding: the encoding string + * + * Dump a list of HTML attributes + */ +static void +htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { + if (cur == NULL) { + return; + } + while (cur != NULL) { + htmlAttrDumpOutput(buf, doc, cur, encoding); + cur = cur->next; + } +} + + + +/** + * htmlNodeListDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the first node + * @encoding: the encoding string + * @format: should formatting spaces been added + * + * Dump an HTML node list, recursive behaviour,children are printed too. + */ +static void +htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding, int format) { + if (cur == NULL) { + return; + } + while (cur != NULL) { + htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); + cur = cur->next; + } +} + +/** + * htmlNodeDumpFormatOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node + * @encoding: the encoding string + * @format: should formatting spaces been added + * + * Dump an HTML node, recursive behaviour,children are printed too. + */ +void +htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding, int format) { + const htmlElemDesc * info; + + xmlInitParser(); + + if ((cur == NULL) || (buf == NULL)) { + return; + } + /* + * Special cases. + */ + if (cur->type == XML_DTD_NODE) + return; + if ((cur->type == XML_HTML_DOCUMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE)){ + htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); + return; + } + if (cur->type == XML_ATTRIBUTE_NODE) { + htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); + return; + } + if (cur->type == HTML_TEXT_NODE) { + if (cur->content != NULL) { + if (((cur->name == (const xmlChar *)xmlStringText) || + (cur->name != (const xmlChar *)xmlStringTextNoenc)) && + ((cur->parent == NULL) || + ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && + (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { + xmlChar *buffer; + + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } else { + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + } + return; + } + if (cur->type == HTML_COMMENT_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, "<!--"); + xmlOutputBufferWriteString(buf, (const char *)cur->content); + xmlOutputBufferWriteString(buf, "-->"); + } + return; + } + if (cur->type == HTML_PI_NODE) { + if (cur->name == NULL) + return; + xmlOutputBufferWriteString(buf, "<?"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, " "); + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + xmlOutputBufferWriteString(buf, ">"); + return; + } + if (cur->type == HTML_ENTITY_REF_NODE) { + xmlOutputBufferWriteString(buf, "&"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ";"); + return; + } + if (cur->type == HTML_PRESERVE_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + return; + } + + /* + * Get specific HTML info for that node. + */ + if (cur->ns == NULL) + info = htmlTagLookup(cur->name); + else + info = NULL; + + xmlOutputBufferWriteString(buf, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->nsDef) + xmlNsListDumpOutput(buf, cur->nsDef); + if (cur->properties != NULL) + htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); + + if ((info != NULL) && (info->empty)) { + xmlOutputBufferWriteString(buf, ">"); + if ((format) && (!info->isinline) && (cur->next != NULL)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && + (cur->parent != NULL) && + (cur->parent->name != NULL) && + (cur->parent->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } + return; + } + if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && + (cur->children == NULL)) { + if ((info != NULL) && (info->saveEndTag != 0) && + (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && + (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { + xmlOutputBufferWriteString(buf, ">"); + } else { + xmlOutputBufferWriteString(buf, "></"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); + } + if ((format) && (cur->next != NULL) && + (info != NULL) && (!info->isinline)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && + (cur->parent != NULL) && + (cur->parent->name != NULL) && + (cur->parent->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } + return; + } + xmlOutputBufferWriteString(buf, ">"); + if ((cur->type != XML_ELEMENT_NODE) && + (cur->content != NULL)) { + /* + * Uses the OutputBuffer property to automatically convert + * invalids to charrefs + */ + + xmlOutputBufferWriteString(buf, (const char *) cur->content); + } + if (cur->children != NULL) { + if ((format) && (info != NULL) && (!info->isinline) && + (cur->children->type != HTML_TEXT_NODE) && + (cur->children->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last) && + (cur->name != NULL) && + (cur->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format); + if ((format) && (info != NULL) && (!info->isinline) && + (cur->last->type != HTML_TEXT_NODE) && + (cur->last->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last) && + (cur->name != NULL) && + (cur->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } + xmlOutputBufferWriteString(buf, "</"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); + if ((format) && (info != NULL) && (!info->isinline) && + (cur->next != NULL)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && + (cur->parent != NULL) && + (cur->parent->name != NULL) && + (cur->parent->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } +} + +/** + * htmlNodeDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node + * @encoding: the encoding string + * + * Dump an HTML node, recursive behaviour,children are printed too, + * and formatting returns/spaces are added. + */ +void +htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding) { + htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1); +} + +/** + * htmlDocContentDumpFormatOutput: + * @buf: the HTML buffer output + * @cur: the document + * @encoding: the encoding string + * @format: should formatting spaces been added + * + * Dump an HTML document. + */ +void +htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, + const char *encoding, int format) { + int type; + + xmlInitParser(); + + if ((buf == NULL) || (cur == NULL)) + return; + + /* + * force to output the stuff as HTML, especially for entities + */ + type = cur->type; + cur->type = XML_HTML_DOCUMENT_NODE; + if (cur->intSubset != NULL) { + htmlDtdDumpOutput(buf, cur, NULL); + } + if (cur->children != NULL) { + htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format); + } + xmlOutputBufferWriteString(buf, "\n"); + cur->type = (xmlElementType) type; +} + +/** + * htmlDocContentDumpOutput: + * @buf: the HTML buffer output + * @cur: the document + * @encoding: the encoding string + * * Dump an HTML document. Formatting return/spaces are added. - */ -void -htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, - const char *encoding) { - htmlDocContentDumpFormatOutput(buf, cur, encoding, 1); -} - -/************************************************************************ - * * - * Saving functions front-ends * - * * - ************************************************************************/ - -/** - * htmlDocDump: - * @f: the FILE* - * @cur: the document - * - * Dump an HTML document to an open FILE. - * - * returns: the number of byte written or -1 in case of failure. - */ -int -htmlDocDump(FILE *f, xmlDocPtr cur) { - xmlOutputBufferPtr buf; - xmlCharEncodingHandlerPtr handler = NULL; - const char *encoding; - int ret; - - xmlInitParser(); - - if ((cur == NULL) || (f == NULL)) { - return(-1); - } - - encoding = (const char *) htmlGetMetaEncoding(cur); - - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); + */ +void +htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, + const char *encoding) { + htmlDocContentDumpFormatOutput(buf, cur, encoding, 1); +} + +/************************************************************************ + * * + * Saving functions front-ends * + * * + ************************************************************************/ + +/** + * htmlDocDump: + * @f: the FILE* + * @cur: the document + * + * Dump an HTML document to an open FILE. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlDocDump(FILE *f, xmlDocPtr cur) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + const char *encoding; + int ret; + + xmlInitParser(); + + if ((cur == NULL) || (f == NULL)) { + return(-1); + } + + encoding = (const char *) htmlGetMetaEncoding(cur); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - } + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); + } } else { /* * Fallback to HTML or ASCII when the encoding is unspecified @@ -1103,48 +1103,48 @@ htmlDocDump(FILE *f, xmlDocPtr cur) { handler = xmlFindCharEncodingHandler("HTML"); if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - } - - buf = xmlOutputBufferCreateFile(f, handler); - if (buf == NULL) return(-1); - htmlDocContentDumpOutput(buf, cur, NULL); - - ret = xmlOutputBufferClose(buf); - return(ret); -} - -/** - * htmlSaveFile: - * @filename: the filename (or URL) - * @cur: the document - * - * Dump an HTML document to a file. If @filename is "-" the stdout file is - * used. - * returns: the number of byte written or -1 in case of failure. - */ -int -htmlSaveFile(const char *filename, xmlDocPtr cur) { - xmlOutputBufferPtr buf; - xmlCharEncodingHandlerPtr handler = NULL; - const char *encoding; - int ret; - - if ((cur == NULL) || (filename == NULL)) - return(-1); - - xmlInitParser(); - - encoding = (const char *) htmlGetMetaEncoding(cur); - - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); + } + + buf = xmlOutputBufferCreateFile(f, handler); + if (buf == NULL) return(-1); + htmlDocContentDumpOutput(buf, cur, NULL); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlSaveFile: + * @filename: the filename (or URL) + * @cur: the document + * + * Dump an HTML document to a file. If @filename is "-" the stdout file is + * used. + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlSaveFile(const char *filename, xmlDocPtr cur) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + const char *encoding; + int ret; + + if ((cur == NULL) || (filename == NULL)) + return(-1); + + xmlInitParser(); + + encoding = (const char *) htmlGetMetaEncoding(cur); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - } + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); + } } else { /* * Fallback to HTML or ASCII when the encoding is unspecified @@ -1153,55 +1153,55 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { handler = xmlFindCharEncodingHandler("HTML"); if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - } - - /* - * save the content to a temp buffer. - */ - buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); - if (buf == NULL) return(0); - - htmlDocContentDumpOutput(buf, cur, NULL); - - ret = xmlOutputBufferClose(buf); - return(ret); -} - -/** - * htmlSaveFileFormat: - * @filename: the filename - * @cur: the document - * @format: should formatting spaces been added - * @encoding: the document encoding - * - * Dump an HTML document to a file using a given encoding. - * - * returns: the number of byte written or -1 in case of failure. - */ -int -htmlSaveFileFormat(const char *filename, xmlDocPtr cur, - const char *encoding, int format) { - xmlOutputBufferPtr buf; - xmlCharEncodingHandlerPtr handler = NULL; - int ret; - - if ((cur == NULL) || (filename == NULL)) - return(-1); - - xmlInitParser(); - - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); + } + + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); + if (buf == NULL) return(0); + + htmlDocContentDumpOutput(buf, cur, NULL); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlSaveFileFormat: + * @filename: the filename + * @cur: the document + * @format: should formatting spaces been added + * @encoding: the document encoding + * + * Dump an HTML document to a file using a given encoding. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlSaveFileFormat(const char *filename, xmlDocPtr cur, + const char *encoding, int format) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + int ret; + + if ((cur == NULL) || (filename == NULL)) + return(-1); + + xmlInitParser(); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - } - htmlSetMetaEncoding(cur, (const xmlChar *) encoding); - } else { - htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8"); + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); + } + htmlSetMetaEncoding(cur, (const xmlChar *) encoding); + } else { + htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8"); /* * Fallback to HTML or ASCII when the encoding is unspecified @@ -1210,38 +1210,38 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, handler = xmlFindCharEncodingHandler("HTML"); if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - } - - /* - * save the content to a temp buffer. - */ - buf = xmlOutputBufferCreateFilename(filename, handler, 0); - if (buf == NULL) return(0); - - htmlDocContentDumpFormatOutput(buf, cur, encoding, format); - - ret = xmlOutputBufferClose(buf); - return(ret); -} - -/** - * htmlSaveFileEnc: - * @filename: the filename - * @cur: the document - * @encoding: the document encoding - * - * Dump an HTML document to a file using a given encoding - * and formatting returns/spaces are added. - * - * returns: the number of byte written or -1 in case of failure. - */ -int -htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { - return(htmlSaveFileFormat(filename, cur, encoding, 1)); -} - -#endif /* LIBXML_OUTPUT_ENABLED */ - -#define bottom_HTMLtree -#include "elfgcchack.h" -#endif /* LIBXML_HTML_ENABLED */ + } + + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFilename(filename, handler, 0); + if (buf == NULL) return(0); + + htmlDocContentDumpFormatOutput(buf, cur, encoding, format); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlSaveFileEnc: + * @filename: the filename + * @cur: the document + * @encoding: the document encoding + * + * Dump an HTML document to a file using a given encoding + * and formatting returns/spaces are added. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { + return(htmlSaveFileFormat(filename, cur, encoding, 1)); +} + +#endif /* LIBXML_OUTPUT_ENABLED */ + +#define bottom_HTMLtree +#include "elfgcchack.h" +#endif /* LIBXML_HTML_ENABLED */ |