diff --git a/src/blocker.rs b/src/blocker.rs index 09533ed..df8bade 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -98,6 +98,9 @@ pub struct Blocker { #[cfg(feature = "object-pooling")] #[serde(skip_serializing, skip_deserializing)] pool: TokenPool, + + #[serde(default)] + generic_hide: NetworkFilterList, } impl Blocker { @@ -109,6 +112,21 @@ impl Blocker { self.check_parameterised(request, false, false) } + pub fn check_generic_hide(&self, hostname_request: &Request) -> bool { + let mut request_tokens; + #[cfg(feature = "object-pooling")] + { + request_tokens = self.pool.pool.new(); + } + #[cfg(not(feature = "object-pooling"))] + { + request_tokens = Vec::with_capacity(utils::TOKENS_BUFFER_SIZE); + } + hostname_request.get_tokens(&mut request_tokens); + + self.generic_hide.check(hostname_request, &request_tokens, &HashSet::new()).is_some() + } + pub fn check_parameterised(&self, request: &Request, matched_rule: bool, force_check_exceptions: bool) -> BlockerResult { if !request.is_supported { return BlockerResult::default(); @@ -247,6 +265,8 @@ impl Blocker { let mut tagged_filters_all = Vec::with_capacity(200); // $badfilter let mut badfilters = Vec::with_capacity(100); + // $generichide + let mut generic_hide = Vec::with_capacity(4000); // All other filters let mut filters = Vec::with_capacity(network_filters.len()); @@ -268,6 +288,8 @@ impl Blocker { } if filter.is_csp() { csp.push(filter); + } else if filter.is_generic_hide() { + generic_hide.push(filter); } else if filter.is_exception() { exceptions.push(filter); } else if filter.is_important() { @@ -283,6 +305,7 @@ impl Blocker { } csp.shrink_to_fit(); + generic_hide.shrink_to_fit(); exceptions.shrink_to_fit(); importants.shrink_to_fit(); redirects.shrink_to_fit(); @@ -296,6 +319,7 @@ impl Blocker { redirects: NetworkFilterList::new(redirects, options.enable_optimizations), filters_tagged: NetworkFilterList::new(Vec::new(), options.enable_optimizations), filters: NetworkFilterList::new(filters, options.enable_optimizations), + generic_hide: NetworkFilterList::new(generic_hide, options.enable_optimizations), // Tags special case for enabling/disabling them dynamically tags_enabled: HashSet::new(), tagged_filters_all, @@ -315,6 +339,8 @@ impl Blocker { pub fn filter_exists(&self, filter: &NetworkFilter) -> bool { if filter.is_csp() { self.csp.filter_exists(filter) + } else if filter.is_generic_hide() { + self.generic_hide.filter_exists(filter) } else if filter.is_exception() { self.exceptions.filter_exists(filter) } else if filter.is_important() { @@ -336,6 +362,9 @@ impl Blocker { } else if filter.is_csp() { self.csp.filter_add(filter); Ok(self) + } else if filter.is_generic_hide() { + self.generic_hide.filter_add(filter); + Ok(self) } else if filter.is_exception() { self.exceptions.filter_add(filter); Ok(self) @@ -1327,6 +1356,20 @@ mod blocker_tests { assert!(!matched_rule.matched); assert!(matched_rule.exception.is_some()); } + + #[test] + fn generichide() { + let blocker_options: BlockerOptions = BlockerOptions { + debug: true, + enable_optimizations: true, + }; + + let mut blocker = Blocker::new(Vec::new(), &blocker_options); + + blocker.filter_add(NetworkFilter::parse("@@||example.com$generichide", true).unwrap()).unwrap(); + + assert!(blocker.check_generic_hide(&Request::from_url("https://example.com").unwrap())); + } } #[cfg(test)] @@ -1346,14 +1389,12 @@ mod legacy_rule_parsing_tests { // easyList = { 24478, 31144, 0, 5589 }; // not handling (and not including) filters with the following options: // - $popup - // - $generichide - // - $subdocument // - $document // - $elemhide // difference from original counts caused by not handling document/subdocument options and possibly miscounting on the blocker side. // Printing all non-cosmetic, non-html, non-comment/-empty rules and ones with no unsupported options yields 29142 items // This engine also handles 3 rules that old one does not - const EASY_LIST: ListCounts = ListCounts { filters: 24062+3, cosmetic_filters: 31163, exceptions: 5080 }; + const EASY_LIST: ListCounts = ListCounts { filters: 24062+3, cosmetic_filters: 31163, exceptions: 5800 }; // easyPrivacy = { 11817, 0, 0, 1020 }; // differences in counts explained by hashset size underreporting as detailed in the next two cases const EASY_PRIVACY: ListCounts = ListCounts { filters: 11889, cosmetic_filters: 0, exceptions: 1021 }; @@ -1391,7 +1432,8 @@ mod legacy_rule_parsing_tests { let blocker = Blocker::new(network_filters, &blocker_options); // Some filters in the filter_map are pointed at by multiple tokens, increasing the total number of items - assert!(vec_hashmap_len(&blocker.exceptions.filter_map) >= expectation.exceptions, "Number of collected exceptions does not match expectation"); + assert!(vec_hashmap_len(&blocker.exceptions.filter_map) + vec_hashmap_len(&blocker.generic_hide.filter_map) + >= expectation.exceptions, "Number of collected exceptions does not match expectation"); assert!(vec_hashmap_len(&blocker.filters.filter_map) + vec_hashmap_len(&blocker.importants.filter_map) + diff --git a/src/cosmetic_filter_cache.rs b/src/cosmetic_filter_cache.rs index 2fda4a6..041f17e 100644 --- a/src/cosmetic_filter_cache.rs +++ b/src/cosmetic_filter_cache.rs @@ -12,7 +12,7 @@ lazy_static! { static ref PUBLIC_SUFFIXES: psl::List = psl::List::new(); } -/// Contains cosmetic filter information intended to be injected into a particular hostname. +/// Contains cosmetic filter information intended to be used on a particular URL. /// /// `hide_selectors` is a set of any CSS selector on the page that should be hidden, i.e. styled as /// `{ display: none !important; }`. @@ -25,21 +25,27 @@ lazy_static! { /// /// `injected_script` is the Javascript code for any scriptlets that should be injected into the /// page. +/// +/// `generichide` is set to true if there is a corresponding `$generichide` exception network +/// filter. If so, the page should not query for additional generic rules using +/// `hidden_class_id_selectors`. #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] -pub struct HostnameSpecificResources { +pub struct UrlSpecificResources { pub hide_selectors: HashSet, pub style_selectors: HashMap>, pub exceptions: HashSet, pub injected_script: String, + pub generichide: bool, } -impl HostnameSpecificResources { +impl UrlSpecificResources { pub fn empty() -> Self { Self { hide_selectors: HashSet::new(), style_selectors: HashMap::new(), exceptions: HashSet::new(), injected_script: String::new(), + generichide: false, } } } @@ -197,10 +203,10 @@ impl CosmeticFilterCache { .collect::>() } - pub fn hostname_cosmetic_resources(&self, hostname: &str) -> HostnameSpecificResources { + pub fn hostname_cosmetic_resources(&self, hostname: &str, generichide: bool) -> UrlSpecificResources { let domain = match PUBLIC_SUFFIXES.domain(hostname) { Some(domain) => domain, - None => return HostnameSpecificResources::empty(), + None => return UrlSpecificResources::empty(), }; let domain_str = domain.to_str(); @@ -225,8 +231,13 @@ impl CosmeticFilterCache { let (hostname_hide_selectors, style_selectors, script_injections) = hostname_specific_rules(&rules_that_apply[..]); - let mut hide_selectors = self.misc_generic_selectors.difference(&exceptions.hide_exceptions).cloned().collect::>(); - hostname_hide_selectors.into_iter().for_each(|sel| { hide_selectors.insert(sel); }); + let hide_selectors = if generichide { + hostname_hide_selectors + } else { + let mut hide_selectors = self.misc_generic_selectors.difference(&exceptions.hide_exceptions).cloned().collect::>(); + hostname_hide_selectors.into_iter().for_each(|sel| { hide_selectors.insert(sel); }); + hide_selectors + }; let mut injected_script = String::new(); script_injections.iter().for_each(|s| { @@ -236,11 +247,12 @@ impl CosmeticFilterCache { } }); - HostnameSpecificResources { + UrlSpecificResources { hide_selectors, style_selectors, exceptions: exceptions.hide_exceptions, injected_script, + generichide, } } @@ -446,15 +458,15 @@ mod cosmetic_cache_tests { "sub.example.com#@#.item2", ]); - let out = cfcache.hostname_cosmetic_resources("test.com"); - let mut expected = HostnameSpecificResources::empty(); + let out = cfcache.hostname_cosmetic_resources("test.com", false); + let mut expected = UrlSpecificResources::empty(); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("example.com"); + let out = cfcache.hostname_cosmetic_resources("example.com", false); expected.exceptions.insert(".item".into()); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("sub.example.com"); + let out = cfcache.hostname_cosmetic_resources("sub.example.com", false); expected.exceptions.insert(".item2".into()); assert_eq!(out, expected); } @@ -465,16 +477,16 @@ mod cosmetic_cache_tests { "example.com,~sub.example.com##.item", ]); - let out = cfcache.hostname_cosmetic_resources("test.com"); - let mut expected = HostnameSpecificResources::empty(); + let out = cfcache.hostname_cosmetic_resources("test.com", false); + let mut expected = UrlSpecificResources::empty(); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("example.com"); + let out = cfcache.hostname_cosmetic_resources("example.com", false); expected.hide_selectors.insert(".item".to_owned()); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("sub.example.com"); - let mut expected = HostnameSpecificResources::empty(); + let out = cfcache.hostname_cosmetic_resources("sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); expected.exceptions.insert(".item".into()); assert_eq!(out, expected); } @@ -488,23 +500,23 @@ mod cosmetic_cache_tests { "a2.sub.example.com##.element:style(background: #000)", ]); - let out = cfcache.hostname_cosmetic_resources("sub.example.com"); - let mut expected = HostnameSpecificResources::empty(); + let out = cfcache.hostname_cosmetic_resources("sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("sub.test.example.com"); + let out = cfcache.hostname_cosmetic_resources("sub.test.example.com", false); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("a1.sub.example.com"); + let out = cfcache.hostname_cosmetic_resources("a1.sub.example.com", false); expected.hide_selectors.insert(".element".to_owned()); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("test.example.com"); + let out = cfcache.hostname_cosmetic_resources("test.example.com", false); expected.hide_selectors.clear(); expected.style_selectors.insert(".element".to_owned(), vec!["background: #fff".to_owned()]); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("a2.sub.example.com"); + let out = cfcache.hostname_cosmetic_resources("a2.sub.example.com", false); expected.style_selectors.clear(); expected.style_selectors.insert(".element".to_owned(), vec!["background: #000".to_owned()]); assert_eq!(out, expected); @@ -547,26 +559,26 @@ mod cosmetic_cache_tests { }, ]); - let out = cfcache.hostname_cosmetic_resources("sub.example.com"); - let mut expected = HostnameSpecificResources::empty(); + let out = cfcache.hostname_cosmetic_resources("sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("sub.test.example.com"); + let out = cfcache.hostname_cosmetic_resources("sub.test.example.com", false); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("test.example.com"); + let out = cfcache.hostname_cosmetic_resources("test.example.com", false); expected.injected_script = "set-constant.js, atob, trueFunc\n".to_owned(); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("cosmetic.net"); + let out = cfcache.hostname_cosmetic_resources("cosmetic.net", false); expected.injected_script = "nowebrtc.js\n".to_owned(); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("g.cosmetic.net"); + let out = cfcache.hostname_cosmetic_resources("g.cosmetic.net", false); expected.injected_script = "nowebrtc.js\nwindow.open-defuser.js\n".to_owned(); assert_eq!(out, expected); - let out = cfcache.hostname_cosmetic_resources("c.g.cosmetic.net"); + let out = cfcache.hostname_cosmetic_resources("c.g.cosmetic.net", false); expected.injected_script = "window.open-defuser.js\n".to_owned(); assert_eq!(out, expected); } @@ -619,7 +631,7 @@ mod cosmetic_cache_tests { "~test.com###test-element", ]; let cfcache = CosmeticFilterCache::new(rules.iter().map(|r| CosmeticFilter::parse(r, false).unwrap()).collect::>()); - let exceptions = cfcache.hostname_cosmetic_resources("example.co.uk").exceptions; + let exceptions = cfcache.hostname_cosmetic_resources("example.co.uk", false).exceptions; let out = cfcache.hidden_class_id_selectors(&["a-class".into()], &[], &exceptions); assert_eq!(out, [".a-class .with .children"]); @@ -630,7 +642,7 @@ mod cosmetic_cache_tests { let out = cfcache.hidden_class_id_selectors(&[], &["test-element".into()], &exceptions); assert_eq!(out, ["#test-element"]); - let exceptions = cfcache.hostname_cosmetic_resources("a1.test.com").exceptions; + let exceptions = cfcache.hostname_cosmetic_resources("a1.test.com", false).exceptions; let out = cfcache.hidden_class_id_selectors(&["a-class".into()], &[], &exceptions); assert_eq!(out, [".a-class", ".a-class .with .children"]); @@ -653,14 +665,14 @@ mod cosmetic_cache_tests { ]; let cfcache = CosmeticFilterCache::new(rules.iter().map(|r| CosmeticFilter::parse(r, false).unwrap()).collect::>()); - let hide_selectors = cfcache.hostname_cosmetic_resources("test.com").hide_selectors; + let hide_selectors = cfcache.hostname_cosmetic_resources("test.com", false).hide_selectors; let mut expected_hides = HashSet::new(); expected_hides.insert("a[href=\"bad.com\"]".to_owned()); expected_hides.insert("div > p".to_owned()); expected_hides.insert("a[href=\"notbad.com\"]".to_owned()); assert_eq!(hide_selectors, expected_hides); - let hide_selectors = cfcache.hostname_cosmetic_resources("example.com").hide_selectors; + let hide_selectors = cfcache.hostname_cosmetic_resources("example.com", false).hide_selectors; let mut expected_hides = HashSet::new(); expected_hides.insert("a[href=\"bad.com\"]".to_owned()); assert_eq!(hide_selectors, expected_hides); diff --git a/src/engine.rs b/src/engine.rs index 5836c4b..ecb2837 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,6 +1,6 @@ use crate::blocker::{Blocker, BlockerError, BlockerOptions, BlockerResult}; -use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameSpecificResources}; +use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources}; use crate::lists::{parse_filters, parse_filter, ParsedFilter, FilterParseError}; use crate::request::Request; use crate::filters::network::NetworkFilter; @@ -227,11 +227,19 @@ impl Engine { self.cosmetic_cache.hidden_class_id_selectors(classes, ids, exceptions) } - /// Returns a set of cosmetic filter resources required for a particular hostname. Once this - /// has been called, all CSS ids and classes on a page should be passed to - /// `hidden_class_id_selectors` to obtain any stylesheets consisting of generic rules. - pub fn hostname_cosmetic_resources(&self, hostname: &str) -> HostnameSpecificResources { - self.cosmetic_cache.hostname_cosmetic_resources(hostname) + /// Returns a set of cosmetic filter resources required for a particular url. Once this has + /// been called, all CSS ids and classes on a page should be passed to + /// `hidden_class_id_selectors` to obtain any stylesheets consisting of generic rules (if the + /// returned `generichide` value is false). + pub fn url_cosmetic_resources(&self, url: &str) -> UrlSpecificResources { + let request = Request::from_url(url); + if request.is_err() { + return UrlSpecificResources::empty(); + } + let request = request.unwrap(); + + let generichide = self.blocker.check_generic_hide(&request); + self.cosmetic_cache.hostname_cosmetic_resources(&request.hostname, generichide) } } @@ -530,4 +538,32 @@ mod tests { assert_eq!(resource.content_type, "application/javascript"); assert_eq!(&resource.data, script); } + + #[test] + fn generichide() { + let filters = vec![ + String::from("##.donotblock"), + String::from("##a[href=\"generic.com\"]"), + + String::from("@@||example.com$generichide"), + String::from("example.com##.block"), + + String::from("@@||example2.com/test.html$generichide"), + String::from("example2.com##.block"), + ]; + let url_results = vec![ + ("https://example.com", vec![".block"], true), + ("https://example.com/test.html", vec![".block"], true), + ("https://example2.com", vec![".block", "a[href=\"generic.com\"]"], false), + ("https://example2.com/test.html", vec![".block"], true), + ]; + + let engine = Engine::from_rules_parametrised(&filters, true, true, true, false); + + url_results.into_iter().for_each(|(url, expected_result, expected_generichide)| { + let result = engine.url_cosmetic_resources(url); + assert_eq!(result.hide_selectors, expected_result.iter().map(|s| s.to_string()).collect::>()); + assert_eq!(result.generichide, expected_generichide); + }); + } } diff --git a/src/filters/network.rs b/src/filters/network.rs index c4e6880..7ef4eb3 100644 --- a/src/filters/network.rs +++ b/src/filters/network.rs @@ -22,6 +22,8 @@ pub enum NetworkFilterError { NegatedExplicitCancel, NegatedRedirection, NegatedTag, + NegatedGenericHide, + GenericHideWithoutException, EmptyRedirection, UnrecognisedOption, NoRegex, @@ -53,6 +55,7 @@ bitflags! { const FIRST_PARTY = 1 << 17; const EXPLICIT_CANCEL = 1 << 26; const BAD_FILTER = 1 << 27; + const GENERIC_HIDE = 1 << 30; // full document rules tend to be handled differently const FROM_DOCUMENT = 1 << 29; @@ -341,6 +344,10 @@ impl NetworkFilter { csp = Some(String::from(value)); } } + ("generichide", true) => return Err(NetworkFilterError::NegatedGenericHide), + ("generichide", false) => mask.set(NetworkFilterMask::GENERIC_HIDE, true), + ("ghide", true) => return Err(NetworkFilterError::NegatedGenericHide), + ("ghide", false) => mask.set(NetworkFilterMask::GENERIC_HIDE, true), (_, negation) => { // Handle content type options separatly let mut option_mask = NetworkFilterMask::NONE; @@ -558,6 +565,12 @@ impl NetworkFilter { None }; + if mask.contains(NetworkFilterMask::GENERIC_HIDE) { + if !mask.contains(NetworkFilterMask::IS_EXCEPTION) { + return Err(NetworkFilterError::GenericHideWithoutException); + } + } + Ok(NetworkFilter { bug, csp, @@ -739,6 +752,10 @@ impl NetworkFilter { pub fn is_badfilter(&self) -> bool { self.mask.contains(NetworkFilterMask::BAD_FILTER) } + + pub fn is_generic_hide(&self) -> bool { + self.mask.contains(NetworkFilterMask::GENERIC_HIDE) + } pub fn is_regex(&self) -> bool { self.mask.contains(NetworkFilterMask::IS_REGEX) @@ -2250,11 +2267,38 @@ mod parse_tests { } } + #[test] + fn parses_generic_hide() { + { + let filter = NetworkFilter::parse("||foo.com$generichide", true); + assert!(filter.is_err()); + } + { + let filter = NetworkFilter::parse("@@||foo.com$generichide", true).unwrap(); + assert_eq!(filter.is_exception(), true); + assert_eq!(filter.is_generic_hide(), true); + } + { + let filter = NetworkFilter::parse("@@||foo.com|$generichide", true).unwrap(); + assert_eq!(filter.is_exception(), true); + assert_eq!(filter.is_generic_hide(), true); + } + { + let filter = NetworkFilter::parse("@@$generichide,domain=example.com", true).unwrap(); + assert_eq!(filter.is_generic_hide(), true); + let breakdown = NetworkFilterBreakdown::from(&filter); + assert_eq!(breakdown.opt_domains, Some(vec![utils::fast_hash("example.com")])); + } + { + let filter = NetworkFilter::parse("||foo.com", true).unwrap(); + assert_eq!(filter.is_generic_hide(), false); + } + } + #[test] fn handles_unsupported_options() { let options = vec![ "genericblock", - "generichide", "inline-script", "popunder", "popup",