From 417a4c9c0c21e5ffcb9ffa9cdbdf0d6f467c9c9d Mon Sep 17 00:00:00 2001 From: Joe Orton Date: Wed, 16 Dec 2020 16:23:23 +0000 Subject: [PATCH] Merge r1884505, r1915625 from trunk: The Microsoft OOXML format uses xml packaged into a zip file, and has mimetypes like: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet This mimetypes contains 'xml', but is unfortunately not an xml file. xml2enc processes these files (in particular, when mod_proxy_html is used), typically resulting in them being corrupted as it seems to attempt to perform a ISO-8859-1 to UTF-8 conversion on them. * modules/filters/mod_xml2enc.c (xml2enc_ffunc): Restrict test for XML types to matching "+xml". Submitted by: Joseph Heenan , jorton PR: 64339 Github: closes #150 git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1884505 13f79535-47bb-0310-9956-ffa450edef68 (cherry picked from commit caeb9081648a92d2e545af5c97fd6fe5120eb4cf) --- changes-entries/pr64339.txt | 4 ++++ modules/filters/mod_xml2enc.c | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 changes-entries/pr64339.txt diff --git a/changes-entries/pr64339.txt b/changes-entries/pr64339.txt new file mode 100644 index 00000000000..9d88bc38941 --- /dev/null +++ b/changes-entries/pr64339.txt @@ -0,0 +1,4 @@ + *) mod_xml2enc: Update check to accept any text/ media type + or any XML media type per RFC 7303, avoiding + corruption of Microsoft OOXML formats. PR 64339. + [Joseph Heenan , Joe Orton] diff --git a/modules/filters/mod_xml2enc.c b/modules/filters/mod_xml2enc.c index 9e3bc314f4e..eb05c183a01 100644 --- a/modules/filters/mod_xml2enc.c +++ b/modules/filters/mod_xml2enc.c @@ -323,7 +323,7 @@ static apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb) apr_bucket* bstart; apr_size_t insz = 0; int pending_meta = 0; - char *ctype; + char *mtype; char *p; if (!ctx || !f->r->content_type) { @@ -332,13 +332,17 @@ static apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb) return ap_pass_brigade(f->next, bb) ; } - ctype = apr_pstrdup(f->r->pool, f->r->content_type); - for (p = ctype; *p; ++p) - if (isupper(*p)) - *p = tolower(*p); - - /* only act if starts-with "text/" or contains "xml" */ - if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml")) { + /* Extract the media type, ignoring parameters in content-type. */ + mtype = apr_pstrdup(f->r->pool, f->r->content_type); + if ((p = ap_strchr(mtype, ';')) != NULL) *p = '\0'; + ap_str_tolower(mtype); + + /* Accept text/ types, plus any XML media type per RFC 7303. */ + if (!(strncmp(mtype, "text/", 5) == 0 + || strcmp(mtype, "application/xml") == 0 + || (strlen(mtype) > 7 /* minimum 'a/b+xml' length */ + && (p = strstr(mtype, "+xml")) != NULL + && strlen(p) == 4 /* ensures +xml is a suffix */))) { ap_remove_output_filter(f); return ap_pass_brigade(f->next, bb) ; }