From c91943a58094ebb73f7dc24c85cdb5a1fa527ed3 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 4 Dec 2015 19:24:23 +0100 Subject: [PATCH 1/2] Make it possible to define new encode sets in other crates. --- Cargo.toml | 2 +- make_encode_sets.py | 42 ------ src/encode_sets.rs | 298 ---------------------------------------- src/percent_encoding.rs | 136 +++++++++++++----- 4 files changed, 101 insertions(+), 377 deletions(-) delete mode 100644 make_encode_sets.py delete mode 100644 src/encode_sets.rs diff --git a/Cargo.toml b/Cargo.toml index a7d64ac5..d30d592b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "url" -version = "0.5.1" +version = "0.5.2" authors = [ "Simon Sapin " ] description = "URL library for Rust, based on the WHATWG URL Standard" diff --git a/make_encode_sets.py b/make_encode_sets.py deleted file mode 100644 index eb859050..00000000 --- a/make_encode_sets.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2013-2014 Simon Sapin. -# -# Licensed under the Apache License, Version 2.0 or the MIT license -# , at your -# option. This file may not be copied, modified, or distributed -# except according to those terms. - - -# Run as: python make_encode_sets.py > src/encode_sets.rs - - -print('''\ -// Copyright 2013-2014 Simon Sapin. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// Generated by make_encode_sets.py -''') -for name, encoded in [ - ('SIMPLE', ''), - ('QUERY', r''' "#<>'''), - ('DEFAULT', r''' "#<>`?{}'''), - ('USERINFO', r''' "#<>`?{}@'''), - ('PASSWORD', r''' "#<>`?{}@\/'''), - ('USERNAME', r''' "#<>`?{}@\/:'''), - ('FORM_URLENCODED', r''' !"#$%&\'()+,/:;<=>?@[\]^`{|}~'''), - ('HTTP_VALUE', r''' "%'()*,/:;<->?[\]{}'''), -]: - print( - "pub static %s: [&'static str; 256] = [\n%s\n];\n\n" - % (name, '\n'.join( - ' ' + ' '.join( - '"%s%s",' % ("\\" if chr(b) in '\\"' else "", chr(b)) - if 0x20 <= b <= 0x7E and chr(b) not in encoded - else '"%%%02X",' % b - for b in range(s, s + 8) - ) for s in range(0, 256, 8)))) diff --git a/src/encode_sets.rs b/src/encode_sets.rs deleted file mode 100644 index d7b5fb9d..00000000 --- a/src/encode_sets.rs +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright 2013-2014 Simon Sapin. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// Generated by make_encode_sets.py - -pub static SIMPLE: [&'static str; 256] = [ - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - " ", "!", "\"", "#", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "/", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", ":", ";", "<", "=", ">", "?", - "@", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "\\", "]", "^", "_", - "`", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "{", "|", "}", "~", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF", -]; - - -pub static QUERY: [&'static str; 256] = [ - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - "%20", "!", "%22", "%23", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "/", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", ":", ";", "%3C", "=", "%3E", "?", - "@", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "\\", "]", "^", "_", - "`", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "{", "|", "}", "~", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF", -]; - - -pub static DEFAULT: [&'static str; 256] = [ - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - "%20", "!", "%22", "%23", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "/", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", ":", ";", "%3C", "=", "%3E", "%3F", - "@", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "\\", "]", "^", "_", - "%60", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "%7B", "|", "%7D", "~", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF", -]; - - -pub static USERINFO: [&'static str; 256] = [ - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - "%20", "!", "%22", "%23", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "/", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", ":", ";", "%3C", "=", "%3E", "%3F", - "%40", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "\\", "]", "^", "_", - "%60", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "%7B", "|", "%7D", "~", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF", -]; - - -pub static PASSWORD: [&'static str; 256] = [ - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - "%20", "!", "%22", "%23", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "%2F", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", ":", ";", "%3C", "=", "%3E", "%3F", - "%40", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "%5C", "]", "^", "_", - "%60", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "%7B", "|", "%7D", "~", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF", -]; - - -pub static USERNAME: [&'static str; 256] = [ - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - "%20", "!", "%22", "%23", "$", "%", "&", "'", - "(", ")", "*", "+", ",", "-", ".", "%2F", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", "%3A", ";", "%3C", "=", "%3E", "%3F", - "%40", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "[", "%5C", "]", "^", "_", - "%60", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "%7B", "|", "%7D", "~", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF", -]; - - -pub static FORM_URLENCODED: [&'static str; 256] = [ - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", - "%28", "%29", "*", "%2B", "%2C", "-", ".", "%2F", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", - "%40", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_", - "%60", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF", -]; - - -pub static HTTP_VALUE: [&'static str; 256] = [ - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - "%20", "!", "%22", "#", "$", "%25", "&", "%27", - "%28", "%29", "%2A", "+", "%2C", "%2D", ".", "%2F", - "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", "%3A", "%3B", "%3C", "=", "%3E", "%3F", - "@", "A", "B", "C", "D", "E", "F", "G", - "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", - "X", "Y", "Z", "%5B", "%5C", "%5D", "^", "_", - "`", "a", "b", "c", "d", "e", "f", "g", - "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", - "x", "y", "z", "%7B", "|", "%7D", "~", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF", -]; - - diff --git a/src/percent_encoding.rs b/src/percent_encoding.rs index 146bede2..2bbfe825 100644 --- a/src/percent_encoding.rs +++ b/src/percent_encoding.rs @@ -6,9 +6,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. - -#[path = "encode_sets.rs"] -mod encode_sets; +use std::ascii::AsciiExt; +use std::fmt::Write; /// Represents a set of characters / bytes that should be percent-encoded. /// @@ -21,51 +20,116 @@ mod encode_sets; /// In the query string however, a question mark does not have any special meaning /// and does not need to be percent-encoded. /// -/// Since the implementation details of `EncodeSet` are private, -/// the set of available encode sets is not extensible beyond the ones -/// provided here. -/// If you need a different encode set, -/// please [file a bug](https://github.com/servo/rust-url/issues) -/// explaining the use case. -#[derive(Copy, Clone)] -pub struct EncodeSet { - map: &'static [&'static str; 256], +/// A few sets are defined in this module. +/// Use the [`define_encode_set!`](../macro.define_encode_set!.html) macro to define different ones. +pub trait EncodeSet { + fn contains(&self, byte: u8) -> bool; } -/// This encode set is used for fragment identifier and non-relative scheme data. -pub static SIMPLE_ENCODE_SET: EncodeSet = EncodeSet { map: &encode_sets::SIMPLE }; +/// Define a new struct +/// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait, +/// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html) +/// and related functions. +/// +/// Parameters are ASCII printable characters to include in the set +/// in addition to U+0000 to U+001F and above U+007F. +/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set). +/// +/// Example +/// ======= +/// +/// ```rust +/// #[macro_use] extern crate url; +/// define_encode_set! { +/// /// This encode set is used in the URL parser for query strings. +/// pub QUERY_ENCODE_SET = {' ', '"', '#', '<', '>'} +/// } +/// # fn main() { +/// assert_eq!(url::percent_encoding::percent_encode(b"foo bar", QUERY_ENCODE_SET), "foo%20bar"); +/// # } +/// ``` +#[macro_export] +macro_rules! define_encode_set { + ($(#[$attr: meta])* pub $name: ident = {$($ch: pat),*}) => { + $(#[$attr])* + #[derive(Copy, Clone)] + #[allow(non_camel_case_types)] + pub struct $name; + + impl $crate::percent_encoding::EncodeSet for $name { + fn contains(&self, byte: u8) -> bool { + match byte as char { + $( + $ch => true, + )* + _ => byte < 0x20 || byte > 0x7E + } + } + } + } +} + +define_encode_set! { + /// This encode set is used for fragment identifier and non-relative scheme data. + pub SIMPLE_ENCODE_SET = {} +} -/// This encode set is used in the URL parser for query strings. -pub static QUERY_ENCODE_SET: EncodeSet = EncodeSet { map: &encode_sets::QUERY }; +define_encode_set! { + /// This encode set is used in the URL parser for query strings. + pub QUERY_ENCODE_SET = {' ', '"', '#', '<', '>'} +} -/// This encode set is used for path components. -pub static DEFAULT_ENCODE_SET: EncodeSet = EncodeSet { map: &encode_sets::DEFAULT }; +define_encode_set! { + /// This encode set is used for path components. + pub DEFAULT_ENCODE_SET = {' ', '"', '#', '<', '>', '`', '?', '{', '}'} +} -/// This encode set is used in the URL parser for usernames and passwords. -pub static USERINFO_ENCODE_SET: EncodeSet = EncodeSet { map: &encode_sets::USERINFO }; +define_encode_set! { + /// This encode set is used in the URL parser for usernames and passwords. + pub USERINFO_ENCODE_SET = {' ', '"', '#', '<', '>', '`', '?', '{', '}', '@'} +} -/// This encode set should be used when setting the password field of a parsed URL. -pub static PASSWORD_ENCODE_SET: EncodeSet = EncodeSet { map: &encode_sets::PASSWORD }; +define_encode_set! { + /// This encode set should be used when setting the password field of a parsed URL. + pub PASSWORD_ENCODE_SET = {' ', '"', '#', '<', '>', '`', '?', '{', '}', '@', '\\', '/'} +} -/// This encode set should be used when setting the username field of a parsed URL. -pub static USERNAME_ENCODE_SET: EncodeSet = EncodeSet { map: &encode_sets::USERNAME }; +define_encode_set! { + /// This encode set should be used when setting the username field of a parsed URL. + pub USERNAME_ENCODE_SET = {' ', '"', '#', '<', '>', '`', '?', '{', '}', '@', '\\', '/', ':'} +} -/// This encode set is used in `application/x-www-form-urlencoded` serialization. -pub static FORM_URLENCODED_ENCODE_SET: EncodeSet = EncodeSet { - map: &encode_sets::FORM_URLENCODED, -}; +define_encode_set! { + /// This encode set is used in `application/x-www-form-urlencoded` serialization. + pub FORM_URLENCODED_ENCODE_SET = { + ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '+', ',', '/', ':', ';', + '<', '=', '>', '?', '@', '[', '\\', ']', '^', '`', '{', '|', '}', '~' + } +} -/// This encode set is used for HTTP header values and is defined at -/// https://tools.ietf.org/html/rfc5987#section-3.2 -pub static HTTP_VALUE_ENCODE_SET: EncodeSet = EncodeSet { map: &encode_sets::HTTP_VALUE }; +define_encode_set! { + /// This encode set is used for HTTP header values and is defined at + /// https://tools.ietf.org/html/rfc5987#section-3.2 + pub HTTP_VALUE = { + ' ', '"', '%', '\'', '(', ')', '*', ',', '/', ':', ';', '<', '-', '>', '?', + '[', '\\', ']', '{', '}' + } +} /// Percent-encode the given bytes, and push the result to `output`. /// /// The pushed strings are within the ASCII range. #[inline] -pub fn percent_encode_to(input: &[u8], encode_set: EncodeSet, output: &mut String) { +pub fn percent_encode_to(input: &[u8], encode_set: E, output: &mut String) { for &byte in input { - output.push_str(encode_set.map[byte as usize]) + if encode_set.contains(byte) { + write!(output, "%{:02X}", byte).unwrap(); + } else { + assert!(byte.is_ascii()); + unsafe { + output.as_mut_vec().push(byte) + } + } } } @@ -74,7 +138,7 @@ pub fn percent_encode_to(input: &[u8], encode_set: EncodeSet, output: &mut Strin /// /// The returned string is within the ASCII range. #[inline] -pub fn percent_encode(input: &[u8], encode_set: EncodeSet) -> String { +pub fn percent_encode(input: &[u8], encode_set: E) -> String { let mut output = String::new(); percent_encode_to(input, encode_set, &mut output); output @@ -85,7 +149,7 @@ pub fn percent_encode(input: &[u8], encode_set: EncodeSet) -> String { /// /// The pushed strings are within the ASCII range. #[inline] -pub fn utf8_percent_encode_to(input: &str, encode_set: EncodeSet, output: &mut String) { +pub fn utf8_percent_encode_to(input: &str, encode_set: E, output: &mut String) { percent_encode_to(input.as_bytes(), encode_set, output) } @@ -94,7 +158,7 @@ pub fn utf8_percent_encode_to(input: &str, encode_set: EncodeSet, output: &mut S /// /// The returned string is within the ASCII range. #[inline] -pub fn utf8_percent_encode(input: &str, encode_set: EncodeSet) -> String { +pub fn utf8_percent_encode(input: &str, encode_set: E) -> String { let mut output = String::new(); utf8_percent_encode_to(input, encode_set, &mut output); output From f0b8f072c002757b71f00794d6f4955329d84735 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 4 Dec 2015 22:26:51 +0100 Subject: [PATCH 2/2] Define encode sets based on another set. --- src/percent_encoding.rs | 42 +++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/percent_encoding.rs b/src/percent_encoding.rs index 2bbfe825..e80865ba 100644 --- a/src/percent_encoding.rs +++ b/src/percent_encoding.rs @@ -23,6 +23,8 @@ use std::fmt::Write; /// A few sets are defined in this module. /// Use the [`define_encode_set!`](../macro.define_encode_set!.html) macro to define different ones. pub trait EncodeSet { + /// Called with UTF-8 bytes rather than code points. + /// Should return false for all non-ASCII bytes. fn contains(&self, byte: u8) -> bool; } @@ -31,8 +33,7 @@ pub trait EncodeSet { /// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html) /// and related functions. /// -/// Parameters are ASCII printable characters to include in the set -/// in addition to U+0000 to U+001F and above U+007F. +/// Parameters are characters to include in the set in addition to those of the base set. /// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set). /// /// Example @@ -40,68 +41,77 @@ pub trait EncodeSet { /// /// ```rust /// #[macro_use] extern crate url; +/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; /// define_encode_set! { /// /// This encode set is used in the URL parser for query strings. -/// pub QUERY_ENCODE_SET = {' ', '"', '#', '<', '>'} +/// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} /// } /// # fn main() { -/// assert_eq!(url::percent_encoding::percent_encode(b"foo bar", QUERY_ENCODE_SET), "foo%20bar"); +/// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET), "foo%20bar"); /// # } /// ``` #[macro_export] macro_rules! define_encode_set { - ($(#[$attr: meta])* pub $name: ident = {$($ch: pat),*}) => { + ($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => { $(#[$attr])* #[derive(Copy, Clone)] #[allow(non_camel_case_types)] pub struct $name; impl $crate::percent_encoding::EncodeSet for $name { + #[inline] fn contains(&self, byte: u8) -> bool { match byte as char { $( $ch => true, )* - _ => byte < 0x20 || byte > 0x7E + _ => $base_set.contains(byte) } } } } } -define_encode_set! { - /// This encode set is used for fragment identifier and non-relative scheme data. - pub SIMPLE_ENCODE_SET = {} +/// This encode set is used for fragment identifier and non-relative scheme data. +#[derive(Copy, Clone)] +#[allow(non_camel_case_types)] +pub struct SIMPLE_ENCODE_SET; + +impl EncodeSet for SIMPLE_ENCODE_SET { + #[inline] + fn contains(&self, byte: u8) -> bool { + byte < 0x20 || byte > 0x7E + } } define_encode_set! { /// This encode set is used in the URL parser for query strings. - pub QUERY_ENCODE_SET = {' ', '"', '#', '<', '>'} + pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} } define_encode_set! { /// This encode set is used for path components. - pub DEFAULT_ENCODE_SET = {' ', '"', '#', '<', '>', '`', '?', '{', '}'} + pub DEFAULT_ENCODE_SET = [QUERY_ENCODE_SET] | {'`', '?', '{', '}'} } define_encode_set! { /// This encode set is used in the URL parser for usernames and passwords. - pub USERINFO_ENCODE_SET = {' ', '"', '#', '<', '>', '`', '?', '{', '}', '@'} + pub USERINFO_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'@'} } define_encode_set! { /// This encode set should be used when setting the password field of a parsed URL. - pub PASSWORD_ENCODE_SET = {' ', '"', '#', '<', '>', '`', '?', '{', '}', '@', '\\', '/'} + pub PASSWORD_ENCODE_SET = [USERINFO_ENCODE_SET] | {'\\', '/'} } define_encode_set! { /// This encode set should be used when setting the username field of a parsed URL. - pub USERNAME_ENCODE_SET = {' ', '"', '#', '<', '>', '`', '?', '{', '}', '@', '\\', '/', ':'} + pub USERNAME_ENCODE_SET = [PASSWORD_ENCODE_SET] | {':'} } define_encode_set! { /// This encode set is used in `application/x-www-form-urlencoded` serialization. - pub FORM_URLENCODED_ENCODE_SET = { + pub FORM_URLENCODED_ENCODE_SET = [SIMPLE_ENCODE_SET] | { ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '+', ',', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '`', '{', '|', '}', '~' } @@ -110,7 +120,7 @@ define_encode_set! { define_encode_set! { /// This encode set is used for HTTP header values and is defined at /// https://tools.ietf.org/html/rfc5987#section-3.2 - pub HTTP_VALUE = { + pub HTTP_VALUE = [SIMPLE_ENCODE_SET] | { ' ', '"', '%', '\'', '(', ')', '*', ',', '/', ':', ';', '<', '-', '>', '?', '[', '\\', ']', '{', '}' }