diff --git a/README.md b/README.md index 6eaaf01..ce2d515 100644 --- a/README.md +++ b/README.md @@ -39,8 +39,8 @@ To decode a byte sequence with invalid sequences: ~~~~ {.rust} all::ISO_8859_6.decode([65,99,109,101,169], DecodeStrict); // => Err(...) -all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace); // => Ok(~"Acme\ufffd") -all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore); // => Ok(~"Acme") +all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace); // => Ok(StrBuf::from_str("Acme\ufffd")) +all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore); // => Ok(StrBuf::from_str("Acme")) ~~~~ A practical example of custom encoder traps: @@ -58,7 +58,7 @@ static HexNcrEscape: EncoderTrap = EncoderTrap(hex_ncr_escape); let orig = ~"Hello, 世界!"; let encoded = all::ASCII.encode(orig, HexNcrEscape).unwrap(); -all::ASCII.decode(encoded.as_slice(), DecodeStrict); // => Ok(~"Hello, 世界!") +all::ASCII.decode(encoded.as_slice(), DecodeStrict); // => Ok(StrBuf::from_str("Hello, 世界!")) ~~~~ Getting the encoding from the string label, @@ -69,10 +69,10 @@ let euckr = label::encoding_from_whatwg_label("euc-kr").unwrap(); euckr.name(); // => "windows-949" euckr.whatwg_name(); // => Some("euc-kr"), for the sake of compatibility let broken = &[0xbf, 0xec, 0xbf, 0xcd, 0xff, 0xbe, 0xd3]; -euckr.decode(broken, DecodeReplace); // => Ok(~"\uc6b0\uc640\ufffd\uc559") +euckr.decode(broken, DecodeReplace); // => Ok(Strbuf::from_str("\uc6b0\uc640\ufffd\uc559")) // corresponding rust-encoding native API: -all::WINDOWS_949.decode(broken, DecodeReplace); // => Ok(~"\uc6b0\uc640\ufffd\uc559") +all::WINDOWS_949.decode(broken, DecodeReplace); // => Ok(StrBuf::from_str("\uc6b0\uc640\ufffd\uc559")) ~~~~ Supported Encodings diff --git a/src/encoding/label.rs b/src/encoding/label.rs index 0b09086..0e191d8 100644 --- a/src/encoding/label.rs +++ b/src/encoding/label.rs @@ -347,8 +347,8 @@ mod tests { } #[bench] - fn bench_encoding_from_whatwg_label(harness: &mut test::BenchHarness) { - harness.iter(|| { + fn bench_encoding_from_whatwg_label(bencher: &mut test::Bencher) { + bencher.iter(|| { encoding_from_whatwg_label("iso-8859-bazinga"); }) } diff --git a/src/encoding/lib.rs b/src/encoding/lib.rs index 7e4376a..094c887 100644 --- a/src/encoding/lib.rs +++ b/src/encoding/lib.rs @@ -15,7 +15,10 @@ #![feature(globs, macro_rules)] -pub use self::types::*; // reexport +pub use self::types::{CodecError, ByteWriter, StringWriter, + Encoder, Decoder, EncodingRef, Encoding, + EncoderTrapFunc, DecoderTrapFunc, DecoderTrap, + EncoderTrap, decode}; // reexport mod util; #[cfg(test)] mod testutils; @@ -90,11 +93,14 @@ mod tests { assert_eq!(all::ISO_8859_2.encode("Acme\xa9", EncodeNcrEscape), Ok(vec!(65,99,109,101,38,35,49,54,57,59))); // Acme© - assert_eq!(all::ISO_8859_1.decode([99,97,102,233], DecodeStrict), Ok(~"caf\xe9")); + assert_eq!(all::ISO_8859_1.decode([99,97,102,233], DecodeStrict), + Ok(StrBuf::from_str("caf\xe9"))); assert!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeStrict).is_err()); - assert_eq!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace), Ok(~"Acme\ufffd")); - assert_eq!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore), Ok(~"Acme")); + assert_eq!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace), + Ok(StrBuf::from_str("Acme\ufffd"))); + assert_eq!(all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore), + Ok(StrBuf::from_str("Acme"))); } #[test] @@ -111,7 +117,7 @@ mod tests { let orig = ~"Hello, 世界!"; let encoded = all::ASCII.encode(orig, HexNcrEscape).unwrap(); let decoded = all::ASCII.decode(encoded.as_slice(), DecodeStrict).unwrap(); - assert_eq!(decoded, ~"Hello, 世界!"); + assert_eq!(decoded, StrBuf::from_str("Hello, 世界!")); } #[test] @@ -120,10 +126,12 @@ mod tests { assert_eq!(euckr.name(), "windows-949"); assert_eq!(euckr.whatwg_name(), Some("euc-kr")); // for the sake of compatibility let broken = &[0xbf, 0xec, 0xbf, 0xcd, 0xff, 0xbe, 0xd3]; - assert_eq!(euckr.decode(broken, DecodeReplace), Ok(~"\uc6b0\uc640\ufffd\uc559")); + assert_eq!(euckr.decode(broken, DecodeReplace), + Ok(StrBuf::from_str("\uc6b0\uc640\ufffd\uc559"))); // corresponding rust-encoding native API: - assert_eq!(all::WINDOWS_949.decode(broken, DecodeReplace), Ok(~"\uc6b0\uc640\ufffd\uc559")); + assert_eq!(all::WINDOWS_949.decode(broken, DecodeReplace), + Ok(StrBuf::from_str("\uc6b0\uc640\ufffd\uc559"))); } diff --git a/src/encoding/types.rs b/src/encoding/types.rs index 83a4d3c..5824739 100644 --- a/src/encoding/types.rs +++ b/src/encoding/types.rs @@ -127,7 +127,7 @@ pub trait StringWriter { fn write_str(&mut self, s: &str); } -impl StringWriter for T { +impl StringWriter for StrBuf { fn writer_hint(&mut self, expectedlen: uint) { let newlen = self.len() + expectedlen; self.reserve(newlen); @@ -227,16 +227,16 @@ pub trait Decoder { /// A test-friendly interface to `raw_feed`. Internal use only. #[cfg(test)] - fn test_feed(&mut self, input: &[u8]) -> (uint, Option, ~str) { - let mut buf = ~""; + fn test_feed(&mut self, input: &[u8]) -> (uint, Option, StrBuf) { + let mut buf = StrBuf::new(); let (nprocessed, err) = self.raw_feed(input, &mut buf); (nprocessed, err, buf) } /// A test-friendly interface to `raw_finish`. Internal use only. #[cfg(test)] - fn test_finish(&mut self) -> (Option, ~str) { - let mut buf = ~""; + fn test_finish(&mut self) -> (Option, StrBuf) { + let mut buf = StrBuf::new(); let err = self.raw_finish(&mut buf); (err, buf) } @@ -279,19 +279,19 @@ pub trait Encoding { fn encode(&'static self, input: &str, trap: EncoderTrap) -> Result,SendStr> { let mut encoder = self.encoder(); let mut remaining = input; - let mut unprocessed = ~""; + let mut unprocessed = StrBuf::new(); let mut ret = Vec::new(); loop { let (offset, err) = encoder.raw_feed(remaining, &mut ret); - if offset > 0 { unprocessed.clear(); } + if offset > 0 { unprocessed.truncate(0); } match err { Some(err) => { unprocessed.push_str(remaining.slice(offset, err.upto)); - if !trap.trap(encoder, unprocessed, &mut ret) { + if !trap.trap(encoder, unprocessed.as_slice(), &mut ret) { return Err(err.cause); } - unprocessed.clear(); + unprocessed.truncate(0); remaining = remaining.slice(err.upto, remaining.len()); } None => { @@ -303,7 +303,7 @@ pub trait Encoding { match encoder.raw_finish(&mut ret) { Some(err) => { - if !trap.trap(encoder, unprocessed, &mut ret) { + if !trap.trap(encoder, unprocessed.as_slice(), &mut ret) { return Err(err.cause); } } @@ -316,11 +316,11 @@ pub trait Encoding { /// On the decoder error `trap` is called, /// which may return a replacement string to continue processing, /// or a failure to return the error. - fn decode(&'static self, input: &[u8], trap: DecoderTrap) -> Result<~str,SendStr> { + fn decode(&'static self, input: &[u8], trap: DecoderTrap) -> Result { let mut decoder = self.decoder(); let mut remaining = input; let mut unprocessed = Vec::new(); - let mut ret = ~""; + let mut ret = StrBuf::new(); loop { let (offset, err) = decoder.raw_feed(remaining, &mut ret); @@ -433,9 +433,9 @@ impl EncoderTrap { EncodeReplace => reencode(encoder, "?", output, "Replace"), EncodeIgnore => true, EncodeNcrEscape => { - let mut escapes = ~""; + let mut escapes = StrBuf::new(); for ch in input.chars() { escapes.push_str(format!("&\\#{:d};", ch as int)); } - reencode(encoder, escapes, output, "NcrEscape") + reencode(encoder, escapes.as_slice(), output, "NcrEscape") }, EncoderTrap(func) => func(encoder, input, output), } @@ -447,7 +447,7 @@ impl EncoderTrap { /// and decoded a single string in memory. /// Return the result and the used encoding. pub fn decode(input: &[u8], trap: DecoderTrap, fallback_encoding: EncodingRef) - -> (Result<~str,SendStr>, EncodingRef) { + -> (Result, EncodingRef) { use all::{UTF_8, UTF_16LE, UTF_16BE}; if input.starts_with([0xEF, 0xBB, 0xBF]) { (UTF_8.decode(input.slice_from(3), trap), UTF_8 as EncodingRef)