From cef1ba2d58cb8a6928055b322c88a79b5b5c0ad4 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 22 Feb 2012 16:55:40 +0100 Subject: Disable problematic reading of external entities in raptor --- redland/raptor/makefile.mk | 3 +- redland/raptor/raptor-1.4.18.entities.patch | 383 ++++++++++++++++++++++++++++ 2 files changed, 385 insertions(+), 1 deletion(-) create mode 100644 redland/raptor/raptor-1.4.18.entities.patch (limited to 'redland') diff --git a/redland/raptor/makefile.mk b/redland/raptor/makefile.mk index 214ee2d78b4f..292a34024dc2 100644 --- a/redland/raptor/makefile.mk +++ b/redland/raptor/makefile.mk @@ -58,7 +58,8 @@ OOO_PATCH_FILES= \ $(TARFILE_NAME).patch.dmake \ $(TARFILE_NAME).patch.win32 \ $(TARFILE_NAME).patch.rindex \ - raptor-aix.patch + raptor-aix.patch \ + $(TARFILE_NAME).entities.patch .IF "$(CROSS_COMPILING)"=="YES" OOO_PATCH_FILES += \ diff --git a/redland/raptor/raptor-1.4.18.entities.patch b/redland/raptor/raptor-1.4.18.entities.patch new file mode 100644 index 000000000000..4964dae3c36b --- /dev/null +++ b/redland/raptor/raptor-1.4.18.entities.patch @@ -0,0 +1,383 @@ +--- misc/raptor-1.4.18/src/raptor.h.old 2008-06-20 07:47:38.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor.h 2012-02-15 16:54:21.000000000 +0100 +@@ -376,6 +376,7 @@ typedef struct { + * @RAPTOR_FEATURE_JSON_EXTRA_DATA: JSON serializer extra top-level data + * @RAPTOR_FEATURE_RSS_TRIPLES: Atom/RSS serializer writes extra RDF triples it finds (none, rdf-xml, atom-triples) + * @RAPTOR_FEATURE_ATOM_ENTRY_URI: Atom entry URI. If given, generate an Atom Entry Document with the item having the given URI, otherwise generate an Atom Feed Document with any items found. ++ * @RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: When reading XML, load external entities. + * @RAPTOR_FEATURE_LAST: Internal + * + * Raptor parser, serializer or XML writer features. +@@ -416,7 +417,8 @@ typedef enum { + RAPTOR_FEATURE_JSON_EXTRA_DATA, + RAPTOR_FEATURE_RSS_TRIPLES, + RAPTOR_FEATURE_ATOM_ENTRY_URI, +- RAPTOR_FEATURE_LAST=RAPTOR_FEATURE_ATOM_ENTRY_URI ++ RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, ++ RAPTOR_FEATURE_LAST=RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES + } raptor_feature; + + +--- misc/raptor-1.4.18/src/raptor_feature.c.old 2008-06-05 08:54:16.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_feature.c 2012-02-15 16:55:09.000000000 +0100 +@@ -89,7 +89,8 @@ static const struct + { RAPTOR_FEATURE_JSON_CALLBACK , 6, "jsonCallback", "JSON serializer callback" }, + { RAPTOR_FEATURE_JSON_EXTRA_DATA , 6, "jsonExtraData", "JSON serializer extra data" }, + { RAPTOR_FEATURE_RSS_TRIPLES , 6, "rssTriples", "Atom/RSS serializer writes extra RDF triples" }, +- { RAPTOR_FEATURE_ATOM_ENTRY_URI , 6, "atomEntryUri", "Atom serializer Entry URI" } ++ { RAPTOR_FEATURE_ATOM_ENTRY_URI , 6, "atomEntryUri", "Atom serializer Entry URI" }, ++ { RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, 1, "loadExternalEntities", "Load external XML entities." } + }; + + +--- misc/raptor-1.4.18/src/raptor_internal.h.old 2008-06-03 07:04:09.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_internal.h 2012-02-15 16:52:08.000000000 +0100 +@@ -983,6 +983,14 @@ struct raptor_sax2_s { + + /* base URI for resolving relative URIs or xml:base URIs */ + raptor_uri* base_uri; ++ ++ /* call SAX2 handlers if non-0 */ ++ int enabled; ++ ++ /* FEATURE: ++ * non 0 if XML entities should be loaded ++ */ ++ int feature_load_external_entities; + }; + + int raptor_sax2_init(void); +--- misc/raptor-1.4.18/src/raptor_libxml.c.old 2008-06-14 05:35:27.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_libxml.c 2012-02-15 16:52:08.000000000 +0100 +@@ -142,18 +142,115 @@ raptor_libxml_hasExternalSubset (void* u + + static xmlParserInputPtr + raptor_libxml_resolveEntity(void* user_data, +- const xmlChar *publicId, const xmlChar *systemId) { +- raptor_sax2* sax2=(raptor_sax2*)user_data; +- return libxml2_resolveEntity(sax2->xc, publicId, systemId); ++ const xmlChar *publicId, const xmlChar *systemId) ++{ ++ raptor_sax2* sax2 = (raptor_sax2*)user_data; ++ xmlParserCtxtPtr ctxt = sax2->xc; ++ const unsigned char *uri_string = NULL; ++ xmlParserInputPtr entity_input; ++ int load_entity = 0; ++ ++ if(ctxt->input) ++ uri_string = (const unsigned char *)ctxt->input->filename; ++ ++ if(!uri_string) ++ uri_string = (const unsigned char *)ctxt->directory; ++ ++ load_entity = sax2->feature_load_external_entities; ++ ++ if(load_entity) { ++ entity_input = xmlLoadExternalEntity((const char*)uri_string, ++ (const char*)publicId, ++ ctxt); ++ } else { ++ RAPTOR_DEBUG4("Not loading entity URI %s by policy for publicId '%s' systemId '%s'\n", uri_string, publicId, systemId); ++ } ++ ++ return entity_input; + } + + + static xmlEntityPtr +-raptor_libxml_getEntity(void* user_data, const xmlChar *name) { +- raptor_sax2* sax2=(raptor_sax2*)user_data; +- return libxml2_getEntity(sax2->xc, name); +-} ++raptor_libxml_getEntity(void* user_data, const xmlChar *name) ++{ ++ raptor_sax2* sax2 = (raptor_sax2*)user_data; ++ xmlParserCtxtPtr xc = sax2->xc; ++ xmlEntityPtr ret = NULL; ++ ++ if(!xc) ++ return NULL; ++ ++ if(!xc->inSubset) { ++ /* looks for hardcoded set of entity names - lt, gt etc. */ ++ ret = xmlGetPredefinedEntity(name); ++ if(ret) { ++ RAPTOR_DEBUG2("Entity '%s' found in predefined set\n", name); ++ return ret; ++ } ++ } + ++ /* This section uses xmlGetDocEntity which looks for entities in ++ * memory only, never from a file or URI ++ */ ++ if(xc->myDoc && (xc->myDoc->standalone == 1)) { ++ RAPTOR_DEBUG2("Entity '%s' document is standalone\n", name); ++ /* Document is standalone: no entities are required to interpret doc */ ++ if(xc->inSubset == 2) { ++ xc->myDoc->standalone = 0; ++ ret = xmlGetDocEntity(xc->myDoc, name); ++ xc->myDoc->standalone = 1; ++ } else { ++ ret = xmlGetDocEntity(xc->myDoc, name); ++ if(!ret) { ++ xc->myDoc->standalone = 0; ++ ret = xmlGetDocEntity(xc->myDoc, name); ++ xc->myDoc->standalone = 1; ++ } ++ } ++ } else { ++ ret = xmlGetDocEntity(xc->myDoc, name); ++ } ++ ++ if(ret && !ret->children && ++ (ret->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { ++ /* Entity is an external general parsed entity. It may be in a ++ * catalog file, user file or user URI ++ */ ++ int val = 0; ++ xmlNodePtr children; ++ int load_entity = 0; ++ ++ load_entity = sax2->feature_load_external_entities; ++ ++ if(!load_entity) { ++ RAPTOR_DEBUG2("Not getting entity URI %s by policy\n", ret->URI); ++ children = xmlNewText((const xmlChar*)""); ++ } else { ++ /* Disable SAX2 handlers so that the SAX2 events do not all get ++ * sent to callbacks during dealing with the entity parsing. ++ */ ++ sax2->enabled = 0; ++ val = xmlParseCtxtExternalEntity(xc, ret->URI, ret->ExternalID, &children); ++ sax2->enabled = 1; ++ } ++ ++ if(!val) { ++ xmlAddChildList((xmlNodePtr)ret, children); ++ } else { ++ xc->validate = 0; ++ return NULL; ++ } ++ ++ ret->owner = 1; ++ ++ /* Mark this entity as having been checked - never do this again */ ++ if(!ret->checked) ++ ret->checked = 1; ++ } ++ ++ return ret; ++} ++ + + static xmlEntityPtr + raptor_libxml_getParameterEntity(void* user_data, const xmlChar *name) { +--- misc/raptor-1.4.18/src/raptor_parse.c.old 2008-06-15 09:18:50.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_parse.c 2012-02-15 16:52:08.000000000 +0100 +@@ -1294,6 +1294,7 @@ raptor_set_feature(raptor_parser *parser + case RAPTOR_FEATURE_MICROFORMATS: + case RAPTOR_FEATURE_HTML_LINK: + case RAPTOR_FEATURE_WWW_TIMEOUT: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + parser->features[(int)feature]=value; + break; + +@@ -1414,6 +1415,7 @@ raptor_get_feature(raptor_parser *parser + case RAPTOR_FEATURE_MICROFORMATS: + case RAPTOR_FEATURE_HTML_LINK: + case RAPTOR_FEATURE_WWW_TIMEOUT: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + result=(parser->features[(int)feature] != 0); + break; + +--- misc/raptor-1.4.18/src/raptor_rdfxml.c.old 2008-06-15 10:12:06.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_rdfxml.c 2012-02-15 16:52:08.000000000 +0100 +@@ -1124,6 +1124,9 @@ raptor_rdfxml_parse_start(raptor_parser* + raptor_sax2_set_feature(rdf_xml_parser->sax2, + RAPTOR_FEATURE_NO_NET, + rdf_parser->features[RAPTOR_FEATURE_NO_NET]); ++ raptor_sax2_set_feature(rdf_xml_parser->sax2, ++ RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, ++ rdf_parser->features[RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES]); + + raptor_sax2_parse_start(rdf_xml_parser->sax2, uri); + +--- misc/raptor-1.4.18/src/raptor_rss.c.old 2008-05-21 22:25:57.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_rss.c 2012-02-15 16:52:08.000000000 +0100 +@@ -251,6 +251,9 @@ raptor_rss_parse_start(raptor_parser *rd + raptor_sax2_set_feature(rss_parser->sax2, + RAPTOR_FEATURE_NO_NET, + rdf_parser->features[RAPTOR_FEATURE_NO_NET]); ++ raptor_sax2_set_feature(rss_parser->sax2, ++ RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, ++ rdf_parser->features[RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES]); + + raptor_sax2_parse_start(rss_parser->sax2, uri); + +--- misc/raptor-1.4.18/src/raptor_sax2.c.old 2008-06-15 10:12:20.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_sax2.c 2012-02-15 16:52:08.000000000 +0100 +@@ -96,6 +96,8 @@ raptor_new_sax2(void* user_data, raptor_ + + sax2->user_data=user_data; + ++ sax2->enabled = 1; ++ + sax2->locator=error_handlers->locator; + + sax2->error_handlers=error_handlers; +@@ -687,6 +689,10 @@ raptor_sax2_set_feature(raptor_sax2 *sax + sax2->feature_no_net=value; + break; + ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: ++ sax2->feature_load_external_entities=value; ++ break; ++ + case RAPTOR_FEATURE_SCANNING: + case RAPTOR_FEATURE_ASSUME_IS_RDF: + case RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES: +@@ -767,6 +773,9 @@ raptor_sax2_start_element(void* user_dat + unsigned char *xml_language=NULL; + raptor_uri *xml_base=NULL; + ++ if(!sax2->enabled) ++ return; ++ + #ifdef RAPTOR_XML_EXPAT + #ifdef EXPAT_UTF8_BOM_CRASH + sax2->tokens_count++; +@@ -990,6 +999,9 @@ raptor_sax2_end_element(void* user_data, + raptor_sax2* sax2=(raptor_sax2*)user_data; + raptor_xml_element* xml_element; + ++ if(!sax2->enabled) ++ return; ++ + #ifdef RAPTOR_XML_EXPAT + #ifdef EXPAT_UTF8_BOM_CRASH + sax2->tokens_count++; +@@ -1025,6 +1037,10 @@ void + raptor_sax2_characters(void* user_data, const unsigned char *s, int len) + { + raptor_sax2* sax2=(raptor_sax2*)user_data; ++ ++ if(!sax2->enabled) ++ return; ++ + if(sax2->characters_handler) + sax2->characters_handler(sax2->user_data, sax2->current_element, s, len); + } +@@ -1035,6 +1051,10 @@ void + raptor_sax2_cdata(void* user_data, const unsigned char *s, int len) + { + raptor_sax2* sax2=(raptor_sax2*)user_data; ++ ++ if(!sax2->enabled) ++ return; ++ + #ifdef RAPTOR_XML_EXPAT + #ifdef EXPAT_UTF8_BOM_CRASH + sax2->tokens_count++; +@@ -1051,6 +1071,10 @@ void + raptor_sax2_comment(void* user_data, const unsigned char *s) + { + raptor_sax2* sax2=(raptor_sax2*)user_data; ++ ++ if(!sax2->enabled) ++ return; ++ + if(sax2->comment_handler) + sax2->comment_handler(sax2->user_data, sax2->current_element, s); + } +@@ -1066,6 +1090,10 @@ raptor_sax2_unparsed_entity_decl(void* u + const unsigned char* notationName) + { + raptor_sax2* sax2=(raptor_sax2*)user_data; ++ ++ if(!sax2->enabled) ++ return; ++ + if(sax2->unparsed_entity_decl_handler) + sax2->unparsed_entity_decl_handler(sax2->user_data, + entityName, base, systemId, +@@ -1082,6 +1110,10 @@ raptor_sax2_external_entity_ref(void* us + const unsigned char* publicId) + { + raptor_sax2* sax2=(raptor_sax2*)user_data; ++ ++ if(!sax2->enabled) ++ return 0; ++ + if(sax2->external_entity_ref_handler) + return sax2->external_entity_ref_handler(sax2->user_data, + context, base, systemId, publicId); +--- misc/raptor-1.4.18/src/raptor_serialize.c.old 2008-06-20 02:55:31.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_serialize.c 2012-02-15 16:52:08.000000000 +0100 +@@ -859,6 +859,7 @@ raptor_serializer_set_feature(raptor_ser + + /* Shared */ + case RAPTOR_FEATURE_NO_NET: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + + /* XML writer features */ + case RAPTOR_FEATURE_WRITER_AUTO_INDENT: +@@ -965,6 +966,7 @@ raptor_serializer_set_feature_string(rap + + /* Shared */ + case RAPTOR_FEATURE_NO_NET: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + + /* XML writer features */ + case RAPTOR_FEATURE_WRITER_AUTO_INDENT: +@@ -1102,6 +1104,7 @@ raptor_serializer_get_feature(raptor_ser + + /* Shared */ + case RAPTOR_FEATURE_NO_NET: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + + /* XML writer features */ + case RAPTOR_FEATURE_WRITER_AUTO_INDENT: +@@ -1201,6 +1204,7 @@ raptor_serializer_get_feature_string(rap + + /* Shared */ + case RAPTOR_FEATURE_NO_NET: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + + /* XML writer features */ + case RAPTOR_FEATURE_WRITER_AUTO_INDENT: +--- misc/raptor-1.4.18/src/raptor_turtle_writer.c.old 2008-06-20 07:47:48.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_turtle_writer.c 2012-02-15 16:52:08.000000000 +0100 +@@ -723,6 +723,7 @@ raptor_turtle_writer_set_feature(raptor_ + + /* Shared */ + case RAPTOR_FEATURE_NO_NET: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + + /* XML writer features */ + case RAPTOR_FEATURE_RELATIVE_URIS: +@@ -836,6 +837,7 @@ raptor_turtle_writer_get_feature(raptor_ + + /* Shared */ + case RAPTOR_FEATURE_NO_NET: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + + /* XML writer features */ + case RAPTOR_FEATURE_RELATIVE_URIS: +--- misc/raptor-1.4.18/src/raptor_xml_writer.c.old 2008-06-03 07:05:56.000000000 +0200 ++++ misc/build/raptor-1.4.18/src/raptor_xml_writer.c 2012-02-15 16:52:08.000000000 +0100 +@@ -906,6 +906,7 @@ raptor_xml_writer_set_feature(raptor_xml + + /* Shared */ + case RAPTOR_FEATURE_NO_NET: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + + /* XML writer features */ + case RAPTOR_FEATURE_RELATIVE_URIS: +@@ -1026,6 +1027,7 @@ raptor_xml_writer_get_feature(raptor_xml + + /* Shared */ + case RAPTOR_FEATURE_NO_NET: ++ case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + + /* XML writer features */ + case RAPTOR_FEATURE_RELATIVE_URIS: -- cgit