diff --git a/changes-entries/rewrite-bctls-bneg b/changes-entries/rewrite-bctls-bneg new file mode 100644 index 00000000000..7ff614d1d71 --- /dev/null +++ b/changes-entries/rewrite-bctls-bneg @@ -0,0 +1,3 @@ + *) mod_rewrite: Add "BCTLS" and "BNE" RewriteRule flags. Re-allow encoded + characters on redirections without the "NE" flag. + [Yann Ylavic, Eric Covener] diff --git a/docs/manual/mod/mod_rewrite.xml b/docs/manual/mod/mod_rewrite.xml index 795d546c212..b92cb69d42f 100644 --- a/docs/manual/mod/mod_rewrite.xml +++ b/docs/manual/mod/mod_rewrite.xml @@ -1329,6 +1329,16 @@ cannot use $N in the substitution string! applying the transformation. details ... + + BCTLS + Like [B], but only escape control characters and spaces. + details ... + + + BNE + Characters of [B] or [BCTLS] which should not be escaped. + details ... + backrefnoplus|BNP If backreferences are being escaped, spaces should be escaped to @@ -1336,7 +1346,6 @@ cannot use $N in the substitution string! path component rather than the query string.details ... - chain|C Rule is chained to the following rule. If the rule fails, diff --git a/docs/manual/rewrite/flags.xml b/docs/manual/rewrite/flags.xml index d6855db0b98..f3a7413b372 100644 --- a/docs/manual/rewrite/flags.xml +++ b/docs/manual/rewrite/flags.xml @@ -113,11 +113,13 @@ the entire third argument of RewriteRule -# Escape spaces and question marks. +# Escape spaces and question marks. The quotes around the final argument +# are required when a space is included. RewriteRule "^search/(.*)$" "/search.php?term=$1" "[B= ?]" - +

To limit the characters escaped this way, see flag_bne +and flag_bctls

BNP|backrefnoplus (don't escape space to +) @@ -126,8 +128,43 @@ module="mod_rewrite">RewriteRule to escape the space character in a backreference to %20 rather than '+'. Useful when the backreference will be used in the path component rather than the query string.

+ +# Escape spaces to %20 in the path instead of + as used in form submission via +# the query string +RewriteRule "^search/(.*)$" "/search.php/$1" "[B,BNP]" + + +

This flag is available in version 2.4.26 and later.

+
+ +
BCTLS +

The [BCTLS] flag is similar to the [B] flag, but only escapes +control characters and the space character. This is the same set of +characters rejected when they are copied into the query string unencoded. +

+ + +# Escape control characters and spaces +RewriteRule "^search/(.*)$" "/search.php/$1" "[BCTLS]" + + +

This flag is available in version 2.4.57 and later.

+ +
+ +
BNE +

The list of characters in [BNE=...] are treated as exclusions to the +characters of the [B] or [BCTLS] flags. The listed characters will not be +escaped. +

+ + +# Escape the default characters, but leave / +RewriteRule "^search/(.*)$" "/search.php?term=$1" "[B,BNE=/]" + +

This flag is available in version 2.4.57 and later.

C|chain @@ -211,7 +248,7 @@ attribute is set to the specified value. Typical values are None, RewriteEngine On -RewriteRule "^/index\.html" "-" [CO=frontdoor:yes:.example.com:1440:/] +RewriteRule "^/index\.html" "-" [CO=frontdoor:yes:.example.com:1440:/]

In the example give, the rule doesn't rewrite the request. @@ -298,8 +335,8 @@ environment variable is used to exclude those requests from the access log.

-RewriteRule "\.(png|gif|jpg)$" "-" [E=image:1] -CustomLog "logs/access_log" combined env=!image +RewriteRule "\.(png|gif|jpg)$" "-" [E=image:1] +CustomLog "logs/access_log" combined env=!image

Note that this same effect can be obtained using downloaded from your server.

-RewriteRule "\.exe" "-" [F] +RewriteRule "\.exe" "-" [F]

This example uses the "-" syntax for the rewrite target, which means @@ -347,7 +384,7 @@ longer available.

rewrite target when using the [G] flag:

-RewriteRule "oldproduct" "-" [G,NC] +RewriteRule "oldproduct" "-" [G,NC]

When using [G], an [L] is implied - that is, the response is returned @@ -361,7 +398,7 @@ handler. For example, one might use this to force all files without a file extension to be parsed by the php handler:

-RewriteRule "!\." "-" [H=application/x-httpd-php] +RewriteRule "!\." "-" [H=application/x-httpd-php]

@@ -429,8 +466,8 @@ module="mod_rewrite">RewriteRule will be skipped.

RewriteBase "/" -RewriteCond "%{REQUEST_URI}" "!=/index.php" -RewriteRule "^(.*)" "/index.php?req=$1" [L,PT] +RewriteCond "%{REQUEST_URI}" !=/index.php +RewriteRule "^(.*)" "/index.php?req=$1" [L,PT]
@@ -454,12 +491,12 @@ pattern still matches (i.e., while the URI still contains an A), perform this substitution (i.e., replace the A with a B).

-

In 2.4.8 and later, this module returns an error after 32,000 iterations to +

In 2.4.8 and later, this module returns an error after 10,000 iterations to protect against unintended looping. An alternative maximum number of iterations can be specified by adding to the N flag.

# Be willing to replace 1 character in each pass of the loop -RewriteRule "(.+)[><;]$" "$1" [N=64000] +RewriteRule "(.+)[><;]$" "$1" [N=32000] # ... or, give up if after 10 loops RewriteRule "(.+)[><;]$" "$1" [N=10] @@ -717,20 +754,22 @@ URI in request' warnings.

The [S] flag is used to skip rules that you don't want to run. The syntax of the skip flag is [S=N], where N signifies the number of rules to skip (provided the -RewriteRule matches). This can be thought of as a goto -statement in your rewrite ruleset. In the following example, we only want -to run the RewriteRule if the -requested URI doesn't correspond with an actual file.

+RewriteRule
and any preceding +RewriteCond directives match). This can be thought of as a +goto statement in your rewrite ruleset. In the following +example, we only want to run the +RewriteRule if the requested URI doesn't correspond with an +actual file.

# Is the request for a non-existent file? -RewriteCond "%{REQUEST_FILENAME}" "!-f" -RewriteCond "%{REQUEST_FILENAME}" "!-d" +RewriteCond "%{REQUEST_FILENAME}" !-f +RewriteCond "%{REQUEST_FILENAME}" !-d # If so, skip these two RewriteRules -RewriteRule ".?" "-" [S=2] +RewriteRule ".?" "-" [S=2] -RewriteRule "(.*\.gif)" "images.php?$1" -RewriteRule "(.*\.html)" "docs.php?$1" +RewriteRule "(.*\.gif)" "images.php?$1" +RewriteRule "(.*\.html)" "docs.php?$1"

This technique is useful because a skip=N, where N is the number of rules in the else-clause:

# Does the file exist? -RewriteCond "%{REQUEST_FILENAME}" "!-f" -RewriteCond "%{REQUEST_FILENAME}" "!-d" +RewriteCond "%{REQUEST_FILENAME}" !-f +RewriteCond "%{REQUEST_FILENAME}" !-d # Create an if-then-else construct by skipping 3 lines if we meant to go to the "else" stanza. -RewriteRule ".?" "-" [S=3] +RewriteRule ".?" "-" [S=3] # IF the file exists, then: - RewriteRule "(.*\.gif)" "images.php?$1" + RewriteRule "(.*\.gif)" "images.php?$1" RewriteRule "(.*\.html)" "docs.php?$1" # Skip past the "else" stanza. - RewriteRule ".?" "-" [S=1] + RewriteRule ".?" "-" [S=1] # ELSE... - RewriteRule "(.*)" "404.php?file=$1" + RewriteRule "(.*)" "404.php?file=$1" # END @@ -776,7 +815,7 @@ source code as plain text, if requested in a particular way:

# Serve .pl files as plain text -RewriteRule "\.pl$" "-" [T=text/plain] +RewriteRule "\.pl$" "-" [T=text/plain]

Or, perhaps, if you have a camera that produces jpeg images without @@ -785,7 +824,7 @@ correct MIME type by virtue of their file names:

# Files with 'IMG' in the name are jpg images. -RewriteRule "IMG" "-" [T=image/jpg] +RewriteRule "IMG" "-" [T=image/jpg]

Please note that this is a trivial example, and could be better done diff --git a/modules/mappers/mod_rewrite.c b/modules/mappers/mod_rewrite.c index 5195ceee3d4..709ffc5289c 100644 --- a/modules/mappers/mod_rewrite.c +++ b/modules/mappers/mod_rewrite.c @@ -106,6 +106,8 @@ #include "mod_rewrite.h" #include "ap_expr.h" +#include "test_char.h" + static ap_dbd_t *(*dbd_acquire)(request_rec*) = NULL; static void (*dbd_prepare)(server_rec*, const char*, const char*) = NULL; static const char* really_last_key = "rewrite_really_last"; @@ -174,6 +176,7 @@ static const char* really_last_key = "rewrite_really_last"; #define RULEFLAG_ESCAPENOPLUS (1<<18) #define RULEFLAG_QSLAST (1<<19) #define RULEFLAG_QSNONE (1<<20) /* programattic only */ +#define RULEFLAG_ESCAPECTLS (1<<21) /* return code of the rewrite rule * the result may be escaped - or not @@ -327,7 +330,8 @@ typedef struct { data_item *cookie; /* added cookies */ int skip; /* number of next rules to skip */ int maxrounds; /* limit on number of loops with N flag */ - char *escapes; /* specific backref escapes */ + const char *escapes; /* specific backref escapes */ + const char *noescapes; /* specific backref chars not to escape */ } rewriterule_entry; typedef struct { @@ -427,7 +431,9 @@ static apr_global_mutex_t *rewrite_mapr_lock_acquire = NULL; static const char *rewritemap_mutex_type = "rewrite-map"; /* Optional functions imported from mod_ssl when loaded: */ -static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus); +static char *escape_backref(apr_pool_t *p, const char *path, + const char *escapeme, const char *noescapeme, + int flags); /* * +-------------------------------------------------------+ @@ -654,14 +660,21 @@ static APR_INLINE unsigned char *c2x(unsigned what, unsigned char prefix, * Escapes a backreference in a similar way as php's urlencode does. * Based on ap_os_escape_path in server/util.c */ -static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus) { - char *copy = apr_palloc(p, 3 * strlen(path) + 3); +static char *escape_backref(apr_pool_t *p, const char *path, + const char *escapeme, const char *noescapeme, + int flags) +{ + char *copy = apr_palloc(p, 3 * strlen(path) + 1); const unsigned char *s = (const unsigned char *)path; unsigned char *d = (unsigned char *)copy; - unsigned c; + int noplus = (flags & RULEFLAG_ESCAPENOPLUS) != 0; + int ctls = (flags & RULEFLAG_ESCAPECTLS) != 0; + unsigned char c; while ((c = *s)) { - if (!escapeme) { + if (((ctls ? !TEST_CHAR(c, T_VCHAR_OBSTEXT) : !escapeme) + || (escapeme && ap_strchr_c(escapeme, c))) + && (!noescapeme || !ap_strchr_c(noescapeme, c))) { if (apr_isalnum(c) || c == '_') { *d++ = c; } @@ -672,23 +685,8 @@ static char *escape_backref(apr_pool_t *p, const char *path, const char *escapem d = c2x(c, '%', d); } } - else { - const char *esc = escapeme; - while (*esc) { - if (c == *esc) { - if (c == ' ' && !noplus) { - *d++ = '+'; - } - else { - d = c2x(c, '%', d); - } - break; - } - ++esc; - } - if (!*esc) { - *d++ = c; - } + else { + *d++ = c; } ++s; } @@ -2469,7 +2467,8 @@ static char *do_expand(char *input, rewrite_ctx *ctx, rewriterule_entry *entry) /* escape the backreference */ char *tmp2, *tmp; tmp = apr_pstrmemdup(pool, bri->source + bri->regmatch[n].rm_so, span); - tmp2 = escape_backref(pool, tmp, entry->escapes, entry->flags & RULEFLAG_ESCAPENOPLUS); + tmp2 = escape_backref(pool, tmp, entry->escapes, entry->noescapes, + entry->flags); rewritelog((ctx->r, 5, ctx->perdir, "escaping backreference '%s' to '%s'", tmp, tmp2)); @@ -3541,13 +3540,24 @@ static const char *cmd_rewriterule_setflag(apr_pool_t *p, void *_cfg, case 'B': if (!*key || !strcasecmp(key, "ackrefescaping")) { cfg->flags |= RULEFLAG_ESCAPEBACKREF; - if (val && *val) { + if (val && *val) { cfg->escapes = val; } } + else if (!strcasecmp(key, "NE")) { + if (val && *val) { + cfg->noescapes = val; + } + else { + return "flag 'BNE' wants a list of characters (i.e. [BNE=...])"; + } + } else if (!strcasecmp(key, "NP") || !strcasecmp(key, "ackrefernoplus")) { cfg->flags |= RULEFLAG_ESCAPENOPLUS; } + else if (!strcasecmp(key, "CTLS")) { + cfg->flags |= RULEFLAG_ESCAPECTLS|RULEFLAG_ESCAPEBACKREF; + } else { ++error; } @@ -3809,7 +3819,6 @@ static const char *cmd_rewriterule(cmd_parms *cmd, void *in_dconf, "'", NULL); } - /* arg3: optional flags field */ newrule->forced_mimetype = NULL; newrule->forced_handler = NULL; newrule->forced_responsecode = HTTP_MOVED_TEMPORARILY; @@ -3818,6 +3827,9 @@ static const char *cmd_rewriterule(cmd_parms *cmd, void *in_dconf, newrule->cookie = NULL; newrule->skip = 0; newrule->maxrounds = REWRITE_MAX_ROUNDS; + newrule->escapes = newrule->noescapes = NULL; + + /* arg3: optional flags field */ if (a3 != NULL) { if ((err = cmd_parseflagfield(cmd->pool, newrule, a3, cmd_rewriterule_setflag)) != NULL) { @@ -4745,13 +4757,19 @@ static int hook_uri2file(request_rec *r) } if (rulestatus) { - unsigned skip; - apr_size_t flen; - - if (r->args && *(ap_scan_vchar_obstext(r->args))) { + unsigned skip_absolute = is_absolute_uri(r->filename, NULL); + apr_size_t flen = r->filename ? strlen(r->filename) : 0; + int to_proxyreq = (flen > 6 && strncmp(r->filename, "proxy:", 6) == 0); + int will_escape = skip_absolute && (rulestatus != ACTION_NOESCAPE); + + if (r->args + && !will_escape + && *(ap_scan_vchar_obstext(r->args))) { /* * We have a raw control character or a ' ' in r->args. * Correct encoding was missed. + * Correct encoding was missed and we're not going to escape + * it before returning. */ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10410) "Rewritten query string contains control " @@ -4766,8 +4784,7 @@ static int hook_uri2file(request_rec *r) return n; } - flen = r->filename ? strlen(r->filename) : 0; - if (flen > 6 && strncmp(r->filename, "proxy:", 6) == 0) { + if (to_proxyreq) { /* it should be go on as an internal proxy request */ /* check if the proxy module is enabled, so @@ -4809,7 +4826,7 @@ static int hook_uri2file(request_rec *r) r->filename)); return OK; } - else if ((skip = is_absolute_uri(r->filename, NULL)) > 0) { + else if (skip_absolute > 0) { int n; /* it was finally rewritten to a remote URL */ @@ -4817,7 +4834,7 @@ static int hook_uri2file(request_rec *r) if (rulestatus != ACTION_NOESCAPE) { rewritelog((r, 1, NULL, "escaping %s for redirect", r->filename)); - r->filename = escape_absolute_uri(r->pool, r->filename, skip); + r->filename = escape_absolute_uri(r->pool, r->filename, skip_absolute); } /* append the QUERY_STRING part */ @@ -5041,9 +5058,17 @@ static int hook_fixup(request_rec *r) */ rulestatus = apply_rewrite_list(r, dconf->rewriterules, dconf->directory); if (rulestatus) { - unsigned skip; + unsigned skip_absolute = is_absolute_uri(r->filename, NULL); + int to_proxyreq = 0; + int will_escape = 0; - if (r->args && *(ap_scan_vchar_obstext(r->args))) { + l = strlen(r->filename); + to_proxyreq = l > 6 && strncmp(r->filename, "proxy:", 6) == 0; + will_escape = skip_absolute && (rulestatus != ACTION_NOESCAPE); + + if (r->args + && !will_escape + && *(ap_scan_vchar_obstext(r->args))) { /* * We have a raw control character or a ' ' in r->args. * Correct encoding was missed. @@ -5061,8 +5086,7 @@ static int hook_fixup(request_rec *r) return n; } - l = strlen(r->filename); - if (l > 6 && strncmp(r->filename, "proxy:", 6) == 0) { + if (to_proxyreq) { /* it should go on as an internal proxy request */ /* make sure the QUERY_STRING and @@ -5086,7 +5110,7 @@ static int hook_fixup(request_rec *r) "%s [OK]", r->filename)); return OK; } - else if ((skip = is_absolute_uri(r->filename, NULL)) > 0) { + else if (skip_absolute > 0) { /* it was finally rewritten to a remote URL */ /* because we are in a per-dir context @@ -5095,7 +5119,7 @@ static int hook_fixup(request_rec *r) */ if (dconf->baseurl != NULL) { /* skip 'scheme://' */ - cp = r->filename + skip; + cp = r->filename + skip_absolute; if ((cp = ap_strchr(cp, '/')) != NULL && *(++cp)) { rewritelog((r, 2, dconf->directory, @@ -5140,7 +5164,7 @@ static int hook_fixup(request_rec *r) if (rulestatus != ACTION_NOESCAPE) { rewritelog((r, 1, dconf->directory, "escaping %s for redirect", r->filename)); - r->filename = escape_absolute_uri(r->pool, r->filename, skip); + r->filename = escape_absolute_uri(r->pool, r->filename, skip_absolute); } /* append the QUERY_STRING part */