diff --git a/README.md b/README.md index 6eaaf01..e2ca270 100644 --- a/README.md +++ b/README.md @@ -39,8 +39,8 @@ To decode a byte sequence with invalid sequences: ~~~~ {.rust} all::ISO_8859_6.decode([65,99,109,101,169], DecodeStrict); // => Err(...) -all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace); // => Ok(~"Acme\ufffd") -all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore); // => Ok(~"Acme") +all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace); // => Ok(StrBuf::from_str("Acme\ufffd")) +all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore); // => Ok(StrBuf::from_str("Acme")) ~~~~ A practical example of custom encoder traps: @@ -56,9 +56,9 @@ fn hex_ncr_escape(_encoder: &Encoder, input: &str, output: &mut ByteWriter) -> b } static HexNcrEscape: EncoderTrap = EncoderTrap(hex_ncr_escape); -let orig = ~"Hello, 世界!"; +let orig = "Hello, 世界!".to_owned(); let encoded = all::ASCII.encode(orig, HexNcrEscape).unwrap(); -all::ASCII.decode(encoded.as_slice(), DecodeStrict); // => Ok(~"Hello, 世界!") +all::ASCII.decode(encoded.as_slice(), DecodeStrict); // => Ok(StrBuf::from_str("Hello, 世界!")) ~~~~ Getting the encoding from the string label, @@ -69,10 +69,10 @@ let euckr = label::encoding_from_whatwg_label("euc-kr").unwrap(); euckr.name(); // => "windows-949" euckr.whatwg_name(); // => Some("euc-kr"), for the sake of compatibility let broken = &[0xbf, 0xec, 0xbf, 0xcd, 0xff, 0xbe, 0xd3]; -euckr.decode(broken, DecodeReplace); // => Ok(~"\uc6b0\uc640\ufffd\uc559") +euckr.decode(broken, DecodeReplace); // => Ok(Strbuf::from_str("\uc6b0\uc640\ufffd\uc559")) // corresponding rust-encoding native API: -all::WINDOWS_949.decode(broken, DecodeReplace); // => Ok(~"\uc6b0\uc640\ufffd\uc559") +all::WINDOWS_949.decode(broken, DecodeReplace); // => Ok(StrBuf::from_str("\uc6b0\uc640\ufffd\uc559")) ~~~~ Supported Encodings diff --git a/src/encoding/codec/ascii.rs b/src/encoding/codec/ascii.rs index 0b66597..8935265 100644 --- a/src/encoding/codec/ascii.rs +++ b/src/encoding/codec/ascii.rs @@ -18,8 +18,8 @@ pub struct ASCIIEncoding; impl Encoding for ASCIIEncoding { fn name(&self) -> &'static str { "ascii" } - fn encoder(&self) -> ~Encoder { ASCIIEncoder::new() } - fn decoder(&self) -> ~Decoder { ASCIIDecoder::new() } + fn encoder(&self) -> Box { ASCIIEncoder::new() } + fn decoder(&self) -> Box { ASCIIDecoder::new() } } /// An encoder for ASCII. @@ -27,11 +27,11 @@ impl Encoding for ASCIIEncoding { pub struct ASCIIEncoder; impl ASCIIEncoder { - pub fn new() -> ~Encoder { ~ASCIIEncoder as ~Encoder } + pub fn new() -> Box { box ASCIIEncoder as Box } } impl Encoder for ASCIIEncoder { - fn from_self(&self) -> ~Encoder { ASCIIEncoder::new() } + fn from_self(&self) -> Box { ASCIIEncoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { @@ -59,11 +59,11 @@ impl Encoder for ASCIIEncoder { pub struct ASCIIDecoder; impl ASCIIDecoder { - pub fn new() -> ~Decoder { ~ASCIIDecoder as ~Decoder } + pub fn new() -> Box { box ASCIIDecoder as Box } } impl Decoder for ASCIIDecoder { - fn from_self(&self) -> ~Decoder { ASCIIDecoder::new() } + fn from_self(&self) -> Box { ASCIIDecoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { diff --git a/src/encoding/codec/error.rs b/src/encoding/codec/error.rs index 3d6f1dd..3901bcf 100644 --- a/src/encoding/codec/error.rs +++ b/src/encoding/codec/error.rs @@ -13,8 +13,8 @@ pub struct ErrorEncoding; impl Encoding for ErrorEncoding { fn name(&self) -> &'static str { "error" } - fn encoder(&self) -> ~Encoder { ErrorEncoder::new() } - fn decoder(&self) -> ~Decoder { ErrorDecoder::new() } + fn encoder(&self) -> Box { ErrorEncoder::new() } + fn decoder(&self) -> Box { ErrorDecoder::new() } } /// An encoder that always returns error. @@ -22,16 +22,17 @@ impl Encoding for ErrorEncoding { pub struct ErrorEncoder; impl ErrorEncoder { - pub fn new() -> ~Encoder { ~ErrorEncoder as ~Encoder } + pub fn new() -> Box { box ErrorEncoder as Box } } impl Encoder for ErrorEncoder { - fn from_self(&self) -> ~Encoder { ErrorEncoder::new() } + fn from_self(&self) -> Box { ErrorEncoder::new() } fn raw_feed(&mut self, input: &str, _output: &mut ByteWriter) -> (uint, Option) { if input.len() > 0 { let str::CharRange {ch: _, next} = input.char_range_at(0); - (0, Some(CodecError { upto: next, cause: "unrepresentable character".into_maybe_owned() })) + (0, Some(CodecError { upto: next, + cause: "unrepresentable character".into_maybe_owned() })) } else { (0, None) } @@ -47,11 +48,11 @@ impl Encoder for ErrorEncoder { pub struct ErrorDecoder; impl ErrorDecoder { - pub fn new() -> ~Decoder { ~ErrorDecoder as ~Decoder } + pub fn new() -> Box { box ErrorDecoder as Box } } impl Decoder for ErrorDecoder { - fn from_self(&self) -> ~Decoder { ErrorDecoder::new() } + fn from_self(&self) -> Box { ErrorDecoder::new() } fn raw_feed(&mut self, input: &[u8], _output: &mut StringWriter) -> (uint, Option) { if input.len() > 0 { diff --git a/src/encoding/codec/japanese.rs b/src/encoding/codec/japanese.rs index 8b0ae4e..6fbba2c 100644 --- a/src/encoding/codec/japanese.rs +++ b/src/encoding/codec/japanese.rs @@ -30,8 +30,8 @@ pub struct EUCJPEncoding; impl Encoding for EUCJPEncoding { fn name(&self) -> &'static str { "euc-jp" } fn whatwg_name(&self) -> Option<&'static str> { Some("euc-jp") } - fn encoder(&self) -> ~Encoder { EUCJPEncoder::new() } - fn decoder(&self) -> ~Decoder { EUCJP0212Decoder::new() } + fn encoder(&self) -> Box { EUCJPEncoder::new() } + fn decoder(&self) -> Box { EUCJP0212Decoder::new() } } /// An encoder for EUC-JP with unused G3 character set. @@ -39,11 +39,11 @@ impl Encoding for EUCJPEncoding { pub struct EUCJPEncoder; impl EUCJPEncoder { - pub fn new() -> ~Encoder { ~EUCJPEncoder as ~Encoder } + pub fn new() -> Box { box EUCJPEncoder as Box } } impl Encoder for EUCJPEncoder { - fn from_self(&self) -> ~Encoder { EUCJPEncoder::new() } + fn from_self(&self) -> Box { EUCJPEncoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { @@ -89,11 +89,11 @@ pub struct EUCJP0212Decoder { } impl EUCJP0212Decoder { - pub fn new() -> ~Decoder { ~EUCJP0212Decoder { first: 0, second: 0 } as ~Decoder } + pub fn new() -> Box { box EUCJP0212Decoder { first: 0, second: 0 } as Box } } impl Decoder for EUCJP0212Decoder { - fn from_self(&self) -> ~Decoder { EUCJP0212Decoder::new() } + fn from_self(&self) -> Box { EUCJP0212Decoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { @@ -319,8 +319,8 @@ pub struct Windows31JEncoding; impl Encoding for Windows31JEncoding { fn name(&self) -> &'static str { "windows-31j" } fn whatwg_name(&self) -> Option<&'static str> { Some("shift_jis") } // WHATWG compatibility - fn encoder(&self) -> ~Encoder { Windows31JEncoder::new() } - fn decoder(&self) -> ~Decoder { Windows31JDecoder::new() } + fn encoder(&self) -> Box { Windows31JEncoder::new() } + fn decoder(&self) -> Box { Windows31JDecoder::new() } } /// An encoder for Shift_JIS with IBM/NEC extensions. @@ -328,11 +328,11 @@ impl Encoding for Windows31JEncoding { pub struct Windows31JEncoder; impl Windows31JEncoder { - pub fn new() -> ~Encoder { ~Windows31JEncoder as ~Encoder } + pub fn new() -> Box { box Windows31JEncoder as Box } } impl Encoder for Windows31JEncoder { - fn from_self(&self) -> ~Encoder { Windows31JEncoder::new() } + fn from_self(&self) -> Box { Windows31JEncoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { @@ -376,11 +376,11 @@ pub struct Windows31JDecoder { } impl Windows31JDecoder { - pub fn new() -> ~Decoder { ~Windows31JDecoder { lead: 0 } as ~Decoder } + pub fn new() -> Box { box Windows31JDecoder { lead: 0 } as Box } } impl Decoder for Windows31JDecoder { - fn from_self(&self) -> ~Decoder { Windows31JDecoder::new() } + fn from_self(&self) -> Box { Windows31JDecoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { diff --git a/src/encoding/codec/korean.rs b/src/encoding/codec/korean.rs index 831af44..199234c 100644 --- a/src/encoding/codec/korean.rs +++ b/src/encoding/codec/korean.rs @@ -25,8 +25,8 @@ pub struct Windows949Encoding; impl Encoding for Windows949Encoding { fn name(&self) -> &'static str { "windows-949" } fn whatwg_name(&self) -> Option<&'static str> { Some("euc-kr") } // WHATWG compatibility - fn encoder(&self) -> ~Encoder { Windows949Encoder::new() } - fn decoder(&self) -> ~Decoder { Windows949Decoder::new() } + fn encoder(&self) -> Box { Windows949Encoder::new() } + fn decoder(&self) -> Box { Windows949Decoder::new() } } /// An encoder for Windows code page 949. @@ -34,11 +34,11 @@ impl Encoding for Windows949Encoding { pub struct Windows949Encoder; impl Windows949Encoder { - pub fn new() -> ~Encoder { ~Windows949Encoder as ~Encoder } + pub fn new() -> Box { box Windows949Encoder as Box } } impl Encoder for Windows949Encoder { - fn from_self(&self) -> ~Encoder { Windows949Encoder::new() } + fn from_self(&self) -> Box { Windows949Encoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { @@ -83,11 +83,11 @@ pub struct Windows949Decoder { } impl Windows949Decoder { - pub fn new() -> ~Decoder { ~Windows949Decoder { lead: 0 } as ~Decoder } + pub fn new() -> Box { box Windows949Decoder { lead: 0 } as Box } } impl Decoder for Windows949Decoder { - fn from_self(&self) -> ~Decoder { Windows949Decoder::new() } + fn from_self(&self) -> Box { Windows949Decoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { diff --git a/src/encoding/codec/simpchinese.rs b/src/encoding/codec/simpchinese.rs index 3c8242e..aaa5ad4 100644 --- a/src/encoding/codec/simpchinese.rs +++ b/src/encoding/codec/simpchinese.rs @@ -35,8 +35,8 @@ pub struct GBK18030Encoding; impl Encoding for GBK18030Encoding { fn name(&self) -> &'static str { "gbk18030" } fn whatwg_name(&self) -> Option<&'static str> { Some("gbk") } // WHATWG compatibility - fn encoder(&self) -> ~Encoder { GBK18030Encoder::new() } - fn decoder(&self) -> ~Decoder { GBK18030Decoder::new() } + fn encoder(&self) -> Box { GBK18030Encoder::new() } + fn decoder(&self) -> Box { GBK18030Decoder::new() } } /// An encoder for an one- and two-byte subset of GB 18030. @@ -44,11 +44,11 @@ impl Encoding for GBK18030Encoding { pub struct GBK18030Encoder; impl GBK18030Encoder { - pub fn new() -> ~Encoder { ~GBK18030Encoder as ~Encoder } + pub fn new() -> Box { box GBK18030Encoder as Box } } impl Encoder for GBK18030Encoder { - fn from_self(&self) -> ~Encoder { GBK18030Encoder::new() } + fn from_self(&self) -> Box { GBK18030Encoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { @@ -86,11 +86,11 @@ pub struct GBK18030Decoder { } impl GBK18030Decoder { - pub fn new() -> ~Decoder { ~GBK18030Decoder { first: 0 } as ~Decoder } + pub fn new() -> Box { box GBK18030Decoder { first: 0 } as Box } } impl Decoder for GBK18030Decoder { - fn from_self(&self) -> ~Decoder { GBK18030Decoder::new() } + fn from_self(&self) -> Box { GBK18030Decoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { @@ -240,8 +240,8 @@ pub struct GB18030Encoding; impl Encoding for GB18030Encoding { fn name(&self) -> &'static str { "gb18030" } fn whatwg_name(&self) -> Option<&'static str> { Some("gb18030") } - fn encoder(&self) -> ~Encoder { GB18030Encoder::new() } - fn decoder(&self) -> ~Decoder { GB18030Decoder::new() } + fn encoder(&self) -> Box { GB18030Encoder::new() } + fn decoder(&self) -> Box { GB18030Decoder::new() } } /// An encoder for GB 18030. @@ -249,11 +249,11 @@ impl Encoding for GB18030Encoding { pub struct GB18030Encoder; impl GB18030Encoder { - pub fn new() -> ~Encoder { ~GB18030Encoder as ~Encoder } + pub fn new() -> Box { box GB18030Encoder as Box } } impl Encoder for GB18030Encoder { - fn from_self(&self) -> ~Encoder { GB18030Encoder::new() } + fn from_self(&self) -> Box { GB18030Encoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { @@ -300,11 +300,13 @@ pub struct GB18030Decoder { } impl GB18030Decoder { - pub fn new() -> ~Decoder { ~GB18030Decoder { first: 0, second: 0, third: 0 } as ~Decoder } + pub fn new() -> Box { + box GB18030Decoder { first: 0, second: 0, third: 0 } as Box + } } impl Decoder for GB18030Decoder { - fn from_self(&self) -> ~Decoder { GB18030Decoder::new() } + fn from_self(&self) -> Box { GB18030Decoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { diff --git a/src/encoding/codec/singlebyte.rs b/src/encoding/codec/singlebyte.rs index c386f05..3761a60 100644 --- a/src/encoding/codec/singlebyte.rs +++ b/src/encoding/codec/singlebyte.rs @@ -18,8 +18,8 @@ pub struct SingleByteEncoding { impl Encoding for SingleByteEncoding { fn name(&self) -> &'static str { self.name } fn whatwg_name(&self) -> Option<&'static str> { self.whatwg_name } - fn encoder(&'static self) -> ~Encoder { SingleByteEncoder::new(self.index_backward) } - fn decoder(&'static self) -> ~Decoder { SingleByteDecoder::new(self.index_forward) } + fn encoder(&'static self) -> Box { SingleByteEncoder::new(self.index_backward) } + fn decoder(&'static self) -> Box { SingleByteDecoder::new(self.index_forward) } } /// An encoder for single-byte encodings based on ASCII. @@ -29,13 +29,13 @@ pub struct SingleByteEncoder { } impl SingleByteEncoder { - pub fn new(index_backward: extern "Rust" fn(u16) -> u8) -> ~Encoder { - ~SingleByteEncoder { index_backward: index_backward } as ~Encoder + pub fn new(index_backward: extern "Rust" fn(u16) -> u8) -> Box { + box SingleByteEncoder { index_backward: index_backward } as Box } } impl Encoder for SingleByteEncoder { - fn from_self(&self) -> ~Encoder { SingleByteEncoder::new(self.index_backward) } + fn from_self(&self) -> Box { SingleByteEncoder::new(self.index_backward) } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { @@ -72,13 +72,13 @@ pub struct SingleByteDecoder { } impl SingleByteDecoder { - pub fn new(index_forward: extern "Rust" fn(u8) -> u16) -> ~Decoder { - ~SingleByteDecoder { index_forward: index_forward } as ~Decoder + pub fn new(index_forward: extern "Rust" fn(u8) -> u16) -> Box { + box SingleByteDecoder { index_forward: index_forward } as Box } } impl Decoder for SingleByteDecoder { - fn from_self(&self) -> ~Decoder { SingleByteDecoder::new(self.index_forward) } + fn from_self(&self) -> Box { SingleByteDecoder::new(self.index_forward) } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { diff --git a/src/encoding/codec/tradchinese.rs b/src/encoding/codec/tradchinese.rs index 1113b60..68d4759 100644 --- a/src/encoding/codec/tradchinese.rs +++ b/src/encoding/codec/tradchinese.rs @@ -28,8 +28,8 @@ pub struct BigFive2003Encoding; impl Encoding for BigFive2003Encoding { fn name(&self) -> &'static str { "big5-2003" } fn whatwg_name(&self) -> Option<&'static str> { Some("big5") } // WHATWG compatibility - fn encoder(&self) -> ~Encoder { BigFive2003Encoder::new() } - fn decoder(&self) -> ~Decoder { BigFive2003HKSCS2008Decoder::new() } + fn encoder(&self) -> Box { BigFive2003Encoder::new() } + fn decoder(&self) -> Box { BigFive2003HKSCS2008Decoder::new() } } /// An encoder for Big5-2003. @@ -37,11 +37,11 @@ impl Encoding for BigFive2003Encoding { pub struct BigFive2003Encoder; impl BigFive2003Encoder { - pub fn new() -> ~Encoder { ~BigFive2003Encoder as ~Encoder } + pub fn new() -> Box { box BigFive2003Encoder as Box } } impl Encoder for BigFive2003Encoder { - fn from_self(&self) -> ~Encoder { BigFive2003Encoder::new() } + fn from_self(&self) -> Box { BigFive2003Encoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { @@ -80,11 +80,11 @@ pub struct BigFive2003HKSCS2008Decoder { } impl BigFive2003HKSCS2008Decoder { - pub fn new() -> ~Decoder { ~BigFive2003HKSCS2008Decoder { lead: 0 } as ~Decoder } + pub fn new() -> Box { box BigFive2003HKSCS2008Decoder { lead: 0 } as Box } } impl Decoder for BigFive2003HKSCS2008Decoder { - fn from_self(&self) -> ~Decoder { BigFive2003HKSCS2008Decoder::new() } + fn from_self(&self) -> Box { BigFive2003HKSCS2008Decoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { diff --git a/src/encoding/codec/utf_16.rs b/src/encoding/codec/utf_16.rs index 7788159..f8a5d52 100644 --- a/src/encoding/codec/utf_16.rs +++ b/src/encoding/codec/utf_16.rs @@ -23,8 +23,8 @@ pub struct UTF16LEEncoding; impl Encoding for UTF16LEEncoding { fn name(&self) -> &'static str { "utf-16le" } fn whatwg_name(&self) -> Option<&'static str> { Some("utf-16") } // WHATWG compatibility - fn encoder(&self) -> ~Encoder { UTF16LEEncoder::new() } - fn decoder(&self) -> ~Decoder { UTF16LEDecoder::new() } + fn encoder(&self) -> Box { UTF16LEEncoder::new() } + fn decoder(&self) -> Box { UTF16LEDecoder::new() } } /** @@ -36,8 +36,8 @@ pub struct UTF16BEEncoding; impl Encoding for UTF16BEEncoding { fn name(&self) -> &'static str { "utf-16be" } fn whatwg_name(&self) -> Option<&'static str> { Some("utf-16be") } - fn encoder(&self) -> ~Encoder { UTF16BEEncoder::new() } - fn decoder(&self) -> ~Decoder { UTF16BEDecoder::new() } + fn encoder(&self) -> Box { UTF16BEEncoder::new() } + fn decoder(&self) -> Box { UTF16BEDecoder::new() } } /// An encoder for UTF-16 in little endian. @@ -49,18 +49,18 @@ pub struct UTF16LEEncoder; pub struct UTF16BEEncoder; impl UTF16LEEncoder { - pub fn new() -> ~Encoder { ~UTF16LEEncoder as ~Encoder } + pub fn new() -> Box { box UTF16LEEncoder as Box } } impl UTF16BEEncoder { - pub fn new() -> ~Encoder { ~UTF16BEEncoder as ~Encoder } + pub fn new() -> Box { box UTF16BEEncoder as Box } } macro_rules! impl_UTF16Encoder( ($encoder:ident: fn write_two_bytes($output:ident: &mut ByteWriter, $msb:ident: u8, $lsb:ident: u8) $body:block) => (impl Encoder for $encoder { - fn from_self(&self) -> ~Encoder { $encoder::new() } + fn from_self(&self) -> Box { $encoder::new() } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { output.writer_hint(input.len() * 2); @@ -123,21 +123,21 @@ pub struct UTF16BEDecoder { } impl UTF16LEDecoder { - pub fn new() -> ~Decoder { - ~UTF16LEDecoder { leadbyte: 0xffff, leadsurrogate: 0xffff } as ~Decoder + pub fn new() -> Box { + box UTF16LEDecoder { leadbyte: 0xffff, leadsurrogate: 0xffff } as Box } } impl UTF16BEDecoder { - pub fn new() -> ~Decoder { - ~UTF16BEDecoder { leadbyte: 0xffff, leadsurrogate: 0xffff } as ~Decoder + pub fn new() -> Box { + box UTF16BEDecoder { leadbyte: 0xffff, leadsurrogate: 0xffff } as Box } } macro_rules! impl_UTF16Decoder( ($decoder:ident: fn concat_two_bytes($lead:ident: u16, $trail:ident: u8) -> u16 $body:block) => (impl Decoder for $decoder { - fn from_self(&self) -> ~Decoder { $decoder::new() } + fn from_self(&self) -> Box { $decoder::new() } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { diff --git a/src/encoding/codec/utf_8.rs b/src/encoding/codec/utf_8.rs index a6d316d..33db969 100644 --- a/src/encoding/codec/utf_8.rs +++ b/src/encoding/codec/utf_8.rs @@ -48,8 +48,8 @@ pub struct UTF8Encoding; impl Encoding for UTF8Encoding { fn name(&self) -> &'static str { "utf-8" } fn whatwg_name(&self) -> Option<&'static str> { Some("utf-8") } - fn encoder(&self) -> ~Encoder { UTF8Encoder::new() } - fn decoder(&self) -> ~Decoder { UTF8Decoder::new() } + fn encoder(&self) -> Box { UTF8Encoder::new() } + fn decoder(&self) -> Box { UTF8Decoder::new() } } /// An encoder for UTF-8. @@ -57,11 +57,11 @@ impl Encoding for UTF8Encoding { pub struct UTF8Encoder; impl UTF8Encoder { - pub fn new() -> ~Encoder { ~UTF8Encoder as ~Encoder } + pub fn new() -> Box { box UTF8Encoder as Box } } impl Encoder for UTF8Encoder { - fn from_self(&self) -> ~Encoder { UTF8Encoder::new() } + fn from_self(&self) -> Box { UTF8Encoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option) { @@ -86,8 +86,8 @@ pub struct UTF8Decoder { } impl UTF8Decoder { - pub fn new() -> ~Decoder { - ~UTF8Decoder { queuelen: 0, queue: [0, ..4], state: INITIAL_STATE } as ~Decoder + pub fn new() -> Box { + box UTF8Decoder { queuelen: 0, queue: [0, ..4], state: INITIAL_STATE } as Box } } @@ -139,7 +139,7 @@ static REJECT_STATE: u8 = 12; static REJECT_STATE_WITH_BACKUP: u8 = REJECT_STATE | 1; impl Decoder for UTF8Decoder { - fn from_self(&self) -> ~Decoder { UTF8Decoder::new() } + fn from_self(&self) -> Box { UTF8Decoder::new() } fn is_ascii_compatible(&self) -> bool { true } fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option) { diff --git a/src/encoding/codec/whatwg.rs b/src/encoding/codec/whatwg.rs index 5277c19..4ac2f2c 100644 --- a/src/encoding/codec/whatwg.rs +++ b/src/encoding/codec/whatwg.rs @@ -15,8 +15,8 @@ pub struct EncoderOnlyUTF8Encoding; impl Encoding for EncoderOnlyUTF8Encoding { fn name(&self) -> &'static str { "encoder-only-utf-8" } fn whatwg_name(&self) -> Option<&'static str> { Some("replacement") } // WHATWG compatibility - fn encoder(&self) -> ~Encoder { codec::utf_8::UTF8Encoding.encoder() } - fn decoder(&self) -> ~Decoder { codec::error::ErrorEncoding.decoder() } + fn encoder(&self) -> Box { codec::utf_8::UTF8Encoding.encoder() } + fn decoder(&self) -> Box { codec::error::ErrorEncoding.decoder() } } /// Algorithmic mapping for `x-user-defined` encoding. diff --git a/src/encoding/label.rs b/src/encoding/label.rs index 0b09086..d560cb2 100644 --- a/src/encoding/label.rs +++ b/src/encoding/label.rs @@ -12,7 +12,7 @@ use types::EncodingRef; /// Implements "get an encoding" algorithm: http://encoding.spec.whatwg.org/#decode pub fn encoding_from_whatwg_label(label: &str) -> Option { // FIXME(rust#10683): temp needed as workaround - let trimmed = label.trim_chars(& &[' ', '\n', '\r', '\t', '\x0C']).to_ascii_lower(); + let trimmed = label.trim_chars(&[' ', '\n', '\r', '\t', '\x0C']).to_ascii_lower(); match trimmed.as_slice() { "unicode-1-1-utf-8" | "utf-8" | @@ -347,8 +347,8 @@ mod tests { } #[bench] - fn bench_encoding_from_whatwg_label(harness: &mut test::BenchHarness) { - harness.iter(|| { + fn bench_encoding_from_whatwg_label(bencher: &mut test::Bencher) { + bencher.iter(|| { encoding_from_whatwg_label("iso-8859-bazinga"); }) } diff --git a/src/encoding/lib.rs b/src/encoding/lib.rs index 4806425..d65cacb 100644 --- a/src/encoding/lib.rs +++ b/src/encoding/lib.rs @@ -17,7 +17,12 @@ #![feature(globs, macro_rules)] -pub use self::types::*; // reexport +pub use self::types::{CodecError, ByteWriter, StringWriter, + Encoder, Decoder, EncodingRef, Encoding, + EncoderTrapFunc, DecoderTrapFunc, DecoderTrap, + DecodeStrict, DecodeReplace, DecodeIgnore, + EncoderTrap, EncodeStrict, EncodeReplace, + EncodeIgnore, EncodeNcrEscape, decode}; // reexport mod util; #[cfg(test)] mod testutils; @@ -92,11 +97,14 @@ mod tests { assert_eq!(all::ISO_8859_2.encode("Acme\xa9", EncodeNcrEscape), Ok(vec!(65,99,109,101,38,35,49,54,57,59))); // Acme© - assert_eq!(all::ISO_8859_1.decode([99,97,102,233], DecodeStrict), Ok(~"caf\xe9")); + assert_eq!(all::ISO_8859_1.decode([99,97,102,233], DecodeStrict), + Ok(StrBuf::from_str("caf\xe9"))); assert!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeStrict).is_err()); - assert_eq!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace), Ok(~"Acme\ufffd")); - assert_eq!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore), Ok(~"Acme")); + assert_eq!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace), + Ok(StrBuf::from_str("Acme\ufffd"))); + assert_eq!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore), + Ok(StrBuf::from_str("Acme"))); } #[test] @@ -110,10 +118,10 @@ mod tests { true } static HexNcrEscape: EncoderTrap = EncoderTrap(hex_ncr_escape); - let orig = ~"Hello, 世界!"; + let orig = "Hello, 世界!".to_owned(); let encoded = all::ASCII.encode(orig, HexNcrEscape).unwrap(); let decoded = all::ASCII.decode(encoded.as_slice(), DecodeStrict).unwrap(); - assert_eq!(decoded, ~"Hello, 世界!"); + assert_eq!(decoded, StrBuf::from_str("Hello, 世界!")); } #[test] @@ -122,10 +130,12 @@ mod tests { assert_eq!(euckr.name(), "windows-949"); assert_eq!(euckr.whatwg_name(), Some("euc-kr")); // for the sake of compatibility let broken = &[0xbf, 0xec, 0xbf, 0xcd, 0xff, 0xbe, 0xd3]; - assert_eq!(euckr.decode(broken, DecodeReplace), Ok(~"\uc6b0\uc640\ufffd\uc559")); + assert_eq!(euckr.decode(broken, DecodeReplace), + Ok(StrBuf::from_str("\uc6b0\uc640\ufffd\uc559"))); // corresponding rust-encoding native API: - assert_eq!(all::WINDOWS_949.decode(broken, DecodeReplace), Ok(~"\uc6b0\uc640\ufffd\uc559")); + assert_eq!(all::WINDOWS_949.decode(broken, DecodeReplace), + Ok(StrBuf::from_str("\uc6b0\uc640\ufffd\uc559"))); } diff --git a/src/encoding/types.rs b/src/encoding/types.rs index 83a4d3c..d4e98ab 100644 --- a/src/encoding/types.rs +++ b/src/encoding/types.rs @@ -82,20 +82,6 @@ pub trait ByteWriter { fn write_bytes(&mut self, v: &[u8]); } -impl ByteWriter for ~[u8] { - fn writer_hint(&mut self, expectedlen: uint) { - self.reserve_additional(expectedlen); - } - - fn write_byte(&mut self, b: u8) { - self.push(b); - } - - fn write_bytes(&mut self, v: &[u8]) { - self.push_all(v); - } -} - impl ByteWriter for Vec { fn writer_hint(&mut self, expectedlen: uint) { self.reserve_additional(expectedlen); @@ -127,7 +113,7 @@ pub trait StringWriter { fn write_str(&mut self, s: &str); } -impl StringWriter for T { +impl StringWriter for StrBuf { fn writer_hint(&mut self, expectedlen: uint) { let newlen = self.len() + expectedlen; self.reserve(newlen); @@ -146,7 +132,7 @@ impl StringWriter for T { /// This is a lower level interface, and normally `Encoding::encode` should be used instead. pub trait Encoder { /// Creates a fresh `Encoder` instance which parameters are same as `self`. - fn from_self(&self) -> ~Encoder; + fn from_self(&self) -> Box; /// Returns true if this encoding is compatible to ASCII, /// i.e. U+0000 through U+007F always map to bytes 00 through 7F and nothing else. @@ -198,7 +184,7 @@ pub trait Encoder { /// This is a lower level interface, and normally `Encoding::decode` should be used instead. pub trait Decoder { /// Creates a fresh `Decoder` instance which parameters are same as `self`. - fn from_self(&self) -> ~Decoder; + fn from_self(&self) -> Box; /// Returns true if this encoding is compatible to ASCII, /// i.e. bytes 00 through 7F always map to U+0000 through U+007F and nothing else. @@ -227,16 +213,16 @@ pub trait Decoder { /// A test-friendly interface to `raw_feed`. Internal use only. #[cfg(test)] - fn test_feed(&mut self, input: &[u8]) -> (uint, Option, ~str) { - let mut buf = ~""; + fn test_feed(&mut self, input: &[u8]) -> (uint, Option, StrBuf) { + let mut buf = StrBuf::new(); let (nprocessed, err) = self.raw_feed(input, &mut buf); (nprocessed, err, buf) } /// A test-friendly interface to `raw_finish`. Internal use only. #[cfg(test)] - fn test_finish(&mut self) -> (Option, ~str) { - let mut buf = ~""; + fn test_finish(&mut self) -> (Option, StrBuf) { + let mut buf = StrBuf::new(); let err = self.raw_finish(&mut buf); (err, buf) } @@ -267,10 +253,10 @@ pub trait Encoding { fn whatwg_name(&self) -> Option<&'static str> { None } /// Creates a new encoder. - fn encoder(&'static self) -> ~Encoder; + fn encoder(&'static self) -> Box; /// Creates a new decoder. - fn decoder(&'static self) -> ~Decoder; + fn decoder(&'static self) -> Box; /// An easy-to-use interface to `Encoder`. /// On the encoder error `trap` is called, @@ -279,19 +265,19 @@ pub trait Encoding { fn encode(&'static self, input: &str, trap: EncoderTrap) -> Result,SendStr> { let mut encoder = self.encoder(); let mut remaining = input; - let mut unprocessed = ~""; + let mut unprocessed = StrBuf::new(); let mut ret = Vec::new(); loop { let (offset, err) = encoder.raw_feed(remaining, &mut ret); - if offset > 0 { unprocessed.clear(); } + if offset > 0 { unprocessed.truncate(0); } match err { Some(err) => { unprocessed.push_str(remaining.slice(offset, err.upto)); - if !trap.trap(encoder, unprocessed, &mut ret) { + if !trap.trap(encoder, unprocessed.as_slice(), &mut ret) { return Err(err.cause); } - unprocessed.clear(); + unprocessed.truncate(0); remaining = remaining.slice(err.upto, remaining.len()); } None => { @@ -303,7 +289,7 @@ pub trait Encoding { match encoder.raw_finish(&mut ret) { Some(err) => { - if !trap.trap(encoder, unprocessed, &mut ret) { + if !trap.trap(encoder, unprocessed.as_slice(), &mut ret) { return Err(err.cause); } } @@ -316,11 +302,11 @@ pub trait Encoding { /// On the decoder error `trap` is called, /// which may return a replacement string to continue processing, /// or a failure to return the error. - fn decode(&'static self, input: &[u8], trap: DecoderTrap) -> Result<~str,SendStr> { + fn decode(&'static self, input: &[u8], trap: DecoderTrap) -> Result { let mut decoder = self.decoder(); let mut remaining = input; let mut unprocessed = Vec::new(); - let mut ret = ~""; + let mut ret = StrBuf::new(); loop { let (offset, err) = decoder.raw_feed(remaining, &mut ret); @@ -433,9 +419,9 @@ impl EncoderTrap { EncodeReplace => reencode(encoder, "?", output, "Replace"), EncodeIgnore => true, EncodeNcrEscape => { - let mut escapes = ~""; + let mut escapes = StrBuf::new(); for ch in input.chars() { escapes.push_str(format!("&\\#{:d};", ch as int)); } - reencode(encoder, escapes, output, "NcrEscape") + reencode(encoder, escapes.as_slice(), output, "NcrEscape") }, EncoderTrap(func) => func(encoder, input, output), } @@ -447,7 +433,7 @@ impl EncoderTrap { /// and decoded a single string in memory. /// Return the result and the used encoding. pub fn decode(input: &[u8], trap: DecoderTrap, fallback_encoding: EncodingRef) - -> (Result<~str,SendStr>, EncodingRef) { + -> (Result, EncodingRef) { use all::{UTF_8, UTF_16LE, UTF_16BE}; if input.starts_with([0xEF, 0xBB, 0xBF]) { (UTF_8.decode(input.slice_from(3), trap), UTF_8 as EncodingRef)