ÿØÿàJFIFÿþ ÿÛC       ÿÛC ÿÀÿÄÿÄ"#QrÿÄÿÄ&1!A"2qQaáÿÚ ?Øy,æ/3JæÝ¹È߲؋5êXw²±ÉyˆR”¾I0ó2—PI¾IÌÚiMö¯–þrìN&"KgX:Šíµ•nTJnLK„…@!‰-ý ùúmë;ºgµŒ&ó±hw’¯Õ@”Ü— 9ñ-ë.²1<yà‚¹ïQÐU„ہ?.’¦èûbß±©Ö«Âw*VŒ) `$‰bØÔŸ’ëXÖ-ËTÜíGÚ3ð«g Ÿ§¯—Jx„–’U/ÂÅv_s(Hÿ@TñJÑãõçn­‚!ÈgfbÓc­:él[ðQe 9ÀPLbÃãCµm[5¿ç'ªjglå‡Ûí_§Úõl-;"PkÞÞÁQâ¼_Ñ^¢SŸx?"¸¦ùY騐ÒOÈ q’`~~ÚtËU¹CڒêV  I1Áß_ÿÙ/** * @file * * @brief HTML parser, doesn't support HTML5 * * This module orginally implemented an HTML parser based on the * (underspecified) HTML 4.0 spec. As of 2.14, the tokenizer * conforms to HTML5. Tree construction still follows a custom, * unspecified algorithm with many differences to HTML5. * * The parser defaults to ISO-8859-1, the default encoding of * HTTP/1.0. * * @copyright See Copyright for the status of this software. * * @author Daniel Veillard */ #ifndef __HTML_PARSER_H__ #define __HTML_PARSER_H__ #include #include #ifdef LIBXML_HTML_ENABLED #ifdef __cplusplus extern "C" { #endif /* * Backward compatibility */ #define UTF8ToHtml htmlUTF8ToHtml #define htmlDefaultSubelement(elt) elt->defaultsubelt #define htmlElementAllowedHereDesc(parent,elt) \ htmlElementAllowedHere((parent), (elt)->name) #define htmlRequiredAttrs(elt) (elt)->attrs_req /* * Most of the back-end structures from XML and HTML are shared. */ /** Same as xmlParserCtxt */ typedef xmlParserCtxt htmlParserCtxt; typedef xmlParserCtxtPtr htmlParserCtxtPtr; typedef xmlParserNodeInfo htmlParserNodeInfo; /** Same as xmlSAXHandler */ typedef xmlSAXHandler htmlSAXHandler; typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; /** Same as xmlParserInput */ typedef xmlParserInput htmlParserInput; typedef xmlParserInputPtr htmlParserInputPtr; typedef xmlDocPtr htmlDocPtr; typedef xmlNodePtr htmlNodePtr; /** @cond ignore */ /* * Internal description of an HTML element, representing HTML 4.01 * and XHTML 1.0 (which share the same structure). */ typedef struct _htmlElemDesc htmlElemDesc; typedef htmlElemDesc *htmlElemDescPtr; struct _htmlElemDesc { const char *name; /* The tag name */ char startTag; /* unused */ char endTag; /* Whether the end tag can be implied */ char saveEndTag; /* unused */ char empty; /* Is this an empty element ? */ char depr; /* unused */ char dtd; /* unused */ char isinline; /* is this a block 0 or inline 1 element */ const char *desc; /* the description */ const char** subelts XML_DEPRECATED_MEMBER; const char* defaultsubelt XML_DEPRECATED_MEMBER; const char** attrs_opt XML_DEPRECATED_MEMBER; const char** attrs_depr XML_DEPRECATED_MEMBER; const char** attrs_req XML_DEPRECATED_MEMBER; int dataMode; }; /* * Internal description of an HTML entity. */ typedef struct _htmlEntityDesc htmlEntityDesc; typedef htmlEntityDesc *htmlEntityDescPtr; struct _htmlEntityDesc { unsigned int value; /* the UNICODE value for the character */ const char *name; /* The entity name */ const char *desc; /* the description */ }; #ifdef LIBXML_SAX1_ENABLED /** * @deprecated Use #xmlSAX2InitHtmlDefaultSAXHandler */ XML_DEPRECATED XMLPUBVAR const xmlSAXHandlerV1 htmlDefaultSAXHandler; #endif /* LIBXML_SAX1_ENABLED */ /** @endcond */ /* * There is only few public functions. */ XML_DEPRECATED XMLPUBFUN void htmlInitAutoClose (void); XML_DEPRECATED XMLPUBFUN const htmlElemDesc * htmlTagLookup (const xmlChar *tag); XML_DEPRECATED XMLPUBFUN const htmlEntityDesc * htmlEntityLookup(const xmlChar *name); XML_DEPRECATED XMLPUBFUN const htmlEntityDesc * htmlEntityValueLookup(unsigned int value); XML_DEPRECATED XMLPUBFUN int htmlIsAutoClosed(xmlDoc *doc, xmlNode *elem); XML_DEPRECATED XMLPUBFUN int htmlAutoCloseTag(xmlDoc *doc, const xmlChar *name, xmlNode *elem); XML_DEPRECATED XMLPUBFUN const htmlEntityDesc * htmlParseEntityRef(htmlParserCtxt *ctxt, const xmlChar **str); XML_DEPRECATED XMLPUBFUN int htmlParseCharRef(htmlParserCtxt *ctxt); XML_DEPRECATED XMLPUBFUN void htmlParseElement(htmlParserCtxt *ctxt); XMLPUBFUN htmlParserCtxt * htmlNewParserCtxt(void); XMLPUBFUN htmlParserCtxt * htmlNewSAXParserCtxt(const htmlSAXHandler *sax, void *userData); XMLPUBFUN htmlParserCtxt * htmlCreateMemoryParserCtxt(const char *buffer, int size); XMLPUBFUN int htmlParseDocument(htmlParserCtxt *ctxt); XML_DEPRECATED XMLPUBFUN xmlDoc * htmlSAXParseDoc (const xmlChar *cur, const char *encoding, htmlSAXHandler *sax, void *userData); XMLPUBFUN xmlDoc * htmlParseDoc (const xmlChar *cur, const char *encoding); XMLPUBFUN htmlParserCtxt * htmlCreateFileParserCtxt(const char *filename, const char *encoding); XML_DEPRECATED XMLPUBFUN xmlDoc * htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandler *sax, void *userData); XMLPUBFUN xmlDoc * htmlParseFile (const char *filename, const char *encoding); XML_DEPRECATED XMLPUBFUN int htmlUTF8ToHtml (unsigned char *out, int *outlen, const unsigned char *in, int *inlen); XML_DEPRECATED XMLPUBFUN int htmlEncodeEntities(unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int quoteChar); XML_DEPRECATED XMLPUBFUN int htmlIsScriptAttribute(const xmlChar *name); XML_DEPRECATED XMLPUBFUN int htmlHandleOmittedElem(int val); #ifdef LIBXML_PUSH_ENABLED /* * Interfaces for the Push mode. */ XMLPUBFUN htmlParserCtxt * htmlCreatePushParserCtxt(htmlSAXHandler *sax, void *user_data, const char *chunk, int size, const char *filename, xmlCharEncoding enc); XMLPUBFUN int htmlParseChunk (htmlParserCtxt *ctxt, const char *chunk, int size, int terminate); #endif /* LIBXML_PUSH_ENABLED */ XMLPUBFUN void htmlFreeParserCtxt (htmlParserCtxt *ctxt); /* * New set of simpler/more flexible APIs */ /** * This is the set of HTML parser options that can be passed to * #htmlReadDoc, #htmlCtxtSetOptions and other functions. */ typedef enum { /** * No effect as of 2.14.0. */ HTML_PARSE_RECOVER = 1<<0, /** * Do not default to a doctype if none was found. */ HTML_PARSE_NODEFDTD = 1<<2, /** * Disable error and warning reports to the error handlers. * Errors are still accessible with xmlCtxtGetLastError(). */ HTML_PARSE_NOERROR = 1<<5, /** * Disable warning reports. */ HTML_PARSE_NOWARNING = 1<<6, /** * No effect. */ HTML_PARSE_PEDANTIC = 1<<7, /** * Remove some text nodes containing only whitespace from the * result document. Which nodes are removed depends on a conservative * heuristic. The reindenting feature of the serialization code relies * on this option to be set when parsing. Use of this option is * DISCOURAGED. */ HTML_PARSE_NOBLANKS = 1<<8, /** * No effect. */ HTML_PARSE_NONET = 1<<11, /** * Do not add implied html, head or body elements. */ HTML_PARSE_NOIMPLIED = 1<<13, /** * Store small strings directly in the node struct to save * memory. */ HTML_PARSE_COMPACT = 1<<16, /** * Relax some internal limits. See XML_PARSE_HUGE in xmlParserOption. * * @since 2.14.0 * * Use XML_PARSE_HUGE with older versions. */ HTML_PARSE_HUGE = 1<<19, /** * Ignore the encoding in the HTML declaration. This option is * mostly unneeded these days. The only effect is to enforce * ISO-8859-1 decoding of ASCII-like data. */ HTML_PARSE_IGNORE_ENC =1<<21, /** * Enable reporting of line numbers larger than 65535. * * @since 2.14.0 * * Use XML_PARSE_BIG_LINES with older versions. */ HTML_PARSE_BIG_LINES = 1<<22, /** * Make the tokenizer emit a SAX callback for each token. This results * in unbalanced invocations of startElement and endElement. * * For now, this is only usable to tokenize HTML5 with custom SAX * callbacks. A tree builder isn't implemented yet. * * @since 2.14.0 */ HTML_PARSE_HTML5 = 1<<26 } htmlParserOption; XMLPUBFUN void htmlCtxtReset (htmlParserCtxt *ctxt); XMLPUBFUN int htmlCtxtSetOptions (htmlParserCtxt *ctxt, int options); XMLPUBFUN int htmlCtxtUseOptions (htmlParserCtxt *ctxt, int options); XMLPUBFUN xmlDoc * htmlReadDoc (const xmlChar *cur, const char *URL, const char *encoding, int options); XMLPUBFUN xmlDoc * htmlReadFile (const char *URL, const char *encoding, int options); XMLPUBFUN xmlDoc * htmlReadMemory (const char *buffer, int size, const char *URL, const char *encoding, int options); XMLPUBFUN xmlDoc * htmlReadFd (int fd, const char *URL, const char *encoding, int options); XMLPUBFUN xmlDoc * htmlReadIO (xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void *ioctx, const char *URL, const char *encoding, int options); XMLPUBFUN xmlDoc * htmlCtxtParseDocument (htmlParserCtxt *ctxt, xmlParserInput *input); XMLPUBFUN xmlDoc * htmlCtxtReadDoc (xmlParserCtxt *ctxt, const xmlChar *cur, const char *URL, const char *encoding, int options); XMLPUBFUN xmlDoc * htmlCtxtReadFile (xmlParserCtxt *ctxt, const char *filename, const char *encoding, int options); XMLPUBFUN xmlDoc * htmlCtxtReadMemory (xmlParserCtxt *ctxt, const char *buffer, int size, const char *URL, const char *encoding, int options); XMLPUBFUN xmlDoc * htmlCtxtReadFd (xmlParserCtxt *ctxt, int fd, const char *URL, const char *encoding, int options); XMLPUBFUN xmlDoc * htmlCtxtReadIO (xmlParserCtxt *ctxt, xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void *ioctx, const char *URL, const char *encoding, int options); /** * deprecated content model */ typedef enum { HTML_NA = 0 , /* something we don't check at all */ HTML_INVALID = 0x1 , HTML_DEPRECATED = 0x2 , HTML_VALID = 0x4 , HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */ } htmlStatus ; /* Using htmlElemDesc rather than name here, to emphasise the fact that otherwise there's a lookup overhead */ XML_DEPRECATED XMLPUBFUN htmlStatus htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ; XML_DEPRECATED XMLPUBFUN int htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ; XML_DEPRECATED XMLPUBFUN htmlStatus htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ; XML_DEPRECATED XMLPUBFUN htmlStatus htmlNodeStatus(xmlNode *, int) ; #ifdef __cplusplus } #endif #endif /* LIBXML_HTML_ENABLED */ #endif /* __HTML_PARSER_H__ */