diff --git a/.gitignore b/.gitignore index 71d702c..f000422 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,9 @@ tags # output from sample which can be used for importing *.dat +# but do inclule the public_suffix_list.dat file, which is used in tests +!public_suffix_list.dat + # Python compiled files *.pyc @@ -48,3 +51,5 @@ out # VIM temp files *.swp + +.vscode diff --git a/Makefile b/Makefile index 5c48d36..2b3c720 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ .PHONY: clean build: - ./node_modules/.bin/node-gyp configure && ./node_modules/.bin/node-gyp build + ./node_modules/.bin/node-gyp configure && ./node_modules/.bin/node-gyp build test: ./node_modules/node-gyp/gyp/gyp_main.py --generator-output=./build --depth=. -f ninja test/binding.gyp @@ -26,4 +26,4 @@ perf: ./build/out/Default/perf clean: - rm -Rf build + rm -Rf build \ No newline at end of file diff --git a/ad_block_client.cc b/ad_block_client.cc index 2c9fa8b..24aeeb0 100644 --- a/ad_block_client.cc +++ b/ad_block_client.cc @@ -12,6 +12,8 @@ #include "./cosmetic_filter.h" #include "./hashFn.h" #include "./no_fingerprint_domain.h" +#include "etld/matcher.h" +#include "etld/serialization.h" #include "BloomFilter.h" @@ -21,6 +23,10 @@ using std::cout; using std::endl; #endif +using brave_etld::SerializedBuffer; +using brave_etld::SerializationResult; +using brave_etld::matcher_from_serialization; + std::set unknownOptions; // Fast hash function applicable to 2 byte char checks @@ -587,6 +593,10 @@ void AdBlockClient::clear() { delete badFingerprintsHashSet; badFingerprintsHashSet = nullptr; } + if (etldMatcher) { + delete etldMatcher; + etldMatcher = nullptr; + } numFilters = 0; numCosmeticFilters = 0; @@ -685,7 +695,8 @@ bool isNoFingerprintDomainHashSetMiss(HashSet *hashSet, static_cast(host + hostLen - start))); } -bool isHostAnchoredHashSetMiss(const char *input, int inputLen, +bool isHostAnchoredHashSetMiss(const AdBlockClient * client, + const char *input, int inputLen, HashSet *hashSet, const char *inputHost, int inputHostLen, @@ -709,7 +720,9 @@ bool isHostAnchoredHashSetMiss(const char *input, int inputLen, if (*(start - 1) == '.') { Filter *filter = hashSet->Find(Filter(start, static_cast(inputHost + inputHostLen - start), - nullptr, start, inputHostLen - (start - inputHost))); + nullptr, + start, + inputHostLen - (start - inputHost))); if (filter && filter->matches(input, inputLen, contextOption, contextDomain)) { if (foundFilter) { @@ -722,8 +735,10 @@ bool isHostAnchoredHashSetMiss(const char *input, int inputLen, } Filter *filter = hashSet->Find(Filter(start, - static_cast(inputHost + inputHostLen - start), nullptr, - start, inputHostLen)); + static_cast(inputHost + inputHostLen - start), + nullptr, + start, + inputHostLen)); if (!filter) { return true; } @@ -756,7 +771,7 @@ bool AdBlockClient::matches(const char* input, FilterOption contextOption, if (contextDomain) { contextDomainLen = static_cast(strlen(contextDomain)); if (isThirdPartyHost(contextDomain, contextDomainLen, - inputHost, static_cast(inputHostLen))) { + inputHost, static_cast(inputHostLen), etldMatcher)) { contextOption = static_cast(contextOption | FOThirdParty); } else { @@ -809,7 +824,7 @@ bool AdBlockClient::matches(const char* input, FilterOption contextOption, if (!hasMatch) { bloomFilterMiss = bloomFilter && !bloomFilter->substringExists(input, AdBlockClient::kFingerprintSize); - hostAnchoredHashSetMiss = isHostAnchoredHashSetMiss(input, inputLen, + hostAnchoredHashSetMiss = isHostAnchoredHashSetMiss(this, input, inputLen, hostAnchoredHashSet, inputHost, inputHostLen, contextOption, contextDomain, matchedFilter); if (bloomFilterMiss && hostAnchoredHashSetMiss) { @@ -859,7 +874,7 @@ bool AdBlockClient::matches(const char* input, FilterOption contextOption, if (isNoFingerprintDomainHashSetMiss( noFingerprintAntiDomainExceptionHashSet, contextDomain, - contextDomainLen )) { + contextDomainLen)) { hasExceptionMatch = hasExceptionMatch || hasMatchingFilters(noFingerprintAntiDomainOnlyExceptionFilters, numNoFingerprintAntiDomainOnlyExceptionFilters, input, inputLen, @@ -883,9 +898,9 @@ bool AdBlockClient::matches(const char* input, FilterOption contextOption, && !exceptionBloomFilter->substringExists(input, AdBlockClient::kFingerprintSize); bool hostAnchoredExceptionHashSetMiss = - isHostAnchoredHashSetMiss(input, inputLen, hostAnchoredExceptionHashSet, - inputHost, inputHostLen, contextOption, contextDomain, - matchedExceptionFilter); + isHostAnchoredHashSetMiss(this, input, inputLen, + hostAnchoredExceptionHashSet, inputHost, inputHostLen, + contextOption, contextDomain); // Now that we have a matching rule, we should check if no exception rule // hits, if none hits, we should block @@ -946,7 +961,7 @@ bool AdBlockClient::findMatchingFilters(const char *input, if (contextDomain) { contextDomainLen = static_cast(strlen(contextDomain)); if (isThirdPartyHost(contextDomain, contextDomainLen, - inputHost, static_cast(inputHostLen))) { + inputHost, static_cast(inputHostLen), etldMatcher)) { contextOption = static_cast(contextOption | FOThirdParty); } else { @@ -981,7 +996,7 @@ bool AdBlockClient::findMatchingFilters(const char *input, } if (!*matchingFilter) { - isHostAnchoredHashSetMiss(input, inputLen, + isHostAnchoredHashSetMiss(this, input, inputLen, hostAnchoredHashSet, inputHost, inputHostLen, contextOption, contextDomain, matchingFilter); } @@ -1010,9 +1025,9 @@ bool AdBlockClient::findMatchingFilters(const char *input, } if (!*matchingExceptionFilter) { - isHostAnchoredHashSetMiss(input, inputLen, hostAnchoredExceptionHashSet, - inputHost, inputHostLen, contextOption, contextDomain, - matchingExceptionFilter); + isHostAnchoredHashSetMiss(this, input, inputLen, + hostAnchoredExceptionHashSet, inputHost, inputHostLen, contextOption, + contextDomain, matchingExceptionFilter); } if (!*matchingExceptionFilter) { @@ -1055,7 +1070,7 @@ void setFilterBorrowedMemory(Filter *filters, int numFilters) { } // Parses the filter data into a few collections of filters and enables -// efficent querying. +// efficient querying. bool AdBlockClient::parse(const char *input, bool preserveRules) { // If the user is parsing and we have regex support, // then we can determine the fingerprints for the bloom filter. @@ -1365,6 +1380,7 @@ bool AdBlockClient::parse(const char *input, bool preserveRules) { &simpleCosmeticFilters, preserveRules); if (!f.hasUnsupportedOptions()) { + f.setEtldMatcher(etldMatcher); switch (f.filterType & FTListTypesMask) { case FTException: if (f.filterType & FTHostOnly) { @@ -1441,6 +1457,14 @@ bool AdBlockClient::parse(const char *input, bool preserveRules) { return true; } +void AdBlockClient::parsePublicSuffixRules(const char *input) { + if (etldMatcher != nullptr) { + delete etldMatcher; + } + + etldMatcher = new Matcher(std::string(input)); +} + // Fills the specified buffer if specified, returns the number of characters // written or needed int serializeFilters(char * buffer, size_t bufferSizeAvail, @@ -1542,10 +1566,19 @@ char * AdBlockClient::serialize(int *totalSize, &noFingerprintAntiDomainExceptionHashSetSize); } + SerializedBuffer serializedMatcherBuffer; + int serializedMatcherBufSize; + if (etldMatcher == nullptr) { + etldMatcher = new Matcher(); + } + SerializationResult matcherSerializationResult = etldMatcher->Serialize(); + serializedMatcherBuffer = matcherSerializationResult.buffer; + serializedMatcherBufSize = static_cast(serializedMatcherBuffer.size()); + // Get the number of bytes that we'll need char sz[512]; *totalSize += 1 + snprintf(sz, sizeof(sz), - "%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x", + "%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x", numFilters, numExceptionFilters, adjustedNumCosmeticFilters, adjustedNumHtmlFilters, numNoFingerprintFilters, numNoFingerprintExceptionFilters, @@ -1554,13 +1587,16 @@ char * AdBlockClient::serialize(int *totalSize, numNoFingerprintDomainOnlyExceptionFilters, numNoFingerprintAntiDomainOnlyExceptionFilters, numHostAnchoredFilters, numHostAnchoredExceptionFilters, - bloomFilter ? bloomFilter->getByteBufferSize() : 0, exceptionBloomFilter - ? exceptionBloomFilter->getByteBufferSize() : 0, - hostAnchoredHashSetSize, hostAnchoredExceptionHashSetSize, - noFingerprintDomainHashSetSize, - noFingerprintAntiDomainHashSetSize, - noFingerprintDomainExceptionHashSetSize, - noFingerprintAntiDomainExceptionHashSetSize); + bloomFilter ? bloomFilter->getByteBufferSize() : 0, + exceptionBloomFilter ? exceptionBloomFilter->getByteBufferSize() : 0, + hostAnchoredHashSetSize, + hostAnchoredExceptionHashSetSize, + noFingerprintDomainHashSetSize, + noFingerprintAntiDomainHashSetSize, + noFingerprintDomainExceptionHashSetSize, + noFingerprintAntiDomainExceptionHashSetSize, + serializedMatcherBufSize); + *totalSize += serializeFilters(nullptr, 0, filters, numFilters) + serializeFilters(nullptr, 0, exceptionFilters, numExceptionFilters) + serializeFilters(nullptr, 0, cosmeticFilters, adjustedNumCosmeticFilters) + @@ -1588,6 +1624,7 @@ char * AdBlockClient::serialize(int *totalSize, *totalSize += noFingerprintAntiDomainHashSetSize; *totalSize += noFingerprintDomainExceptionHashSetSize; *totalSize += noFingerprintAntiDomainExceptionHashSetSize; + *totalSize += serializedMatcherBufSize; // Allocate it int pos = 0; @@ -1667,6 +1704,12 @@ char * AdBlockClient::serialize(int *totalSize, delete[] noFingerprintAntiDomainExceptionHashSetBuffer; } + if (serializedMatcherBufSize > 0) { + memcpy(buffer + pos, serializedMatcherBuffer.c_str(), + serializedMatcherBufSize); + pos += serializedMatcherBufSize; + } + return buffer; } @@ -1717,24 +1760,32 @@ bool AdBlockClient::deserialize(char *buffer) { noFingerprintDomainHashSetSize = 0, noFingerprintAntiDomainHashSetSize = 0, noFingerprintDomainExceptionHashSetSize = 0, - noFingerprintAntiDomainExceptionHashSetSize = 0; + noFingerprintAntiDomainExceptionHashSetSize = 0, + etldMatcherBufSize = 0; int pos = 0; sscanf(buffer + pos, - "%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x", + "%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x", &numFilters, - &numExceptionFilters, &numCosmeticFilters, &numHtmlFilters, - &numNoFingerprintFilters, &numNoFingerprintExceptionFilters, + &numExceptionFilters, + &numCosmeticFilters, + &numHtmlFilters, + &numNoFingerprintFilters, + &numNoFingerprintExceptionFilters, &numNoFingerprintDomainOnlyFilters, &numNoFingerprintAntiDomainOnlyFilters, &numNoFingerprintDomainOnlyExceptionFilters, &numNoFingerprintAntiDomainOnlyExceptionFilters, - &numHostAnchoredFilters, &numHostAnchoredExceptionFilters, - &bloomFilterSize, &exceptionBloomFilterSize, - &hostAnchoredHashSetSize, &hostAnchoredExceptionHashSetSize, + &numHostAnchoredFilters, + &numHostAnchoredExceptionFilters, + &bloomFilterSize, + &exceptionBloomFilterSize, + &hostAnchoredHashSetSize, + &hostAnchoredExceptionHashSetSize, &noFingerprintDomainHashSetSize, &noFingerprintAntiDomainHashSetSize, &noFingerprintDomainExceptionHashSetSize, - &noFingerprintAntiDomainExceptionHashSetSize); + &noFingerprintAntiDomainExceptionHashSetSize, + &etldMatcherBufSize); pos += static_cast(strlen(buffer + pos)) + 1; filters = new Filter[numFilters]; @@ -1783,40 +1834,53 @@ bool AdBlockClient::deserialize(char *buffer) { pos += exceptionBloomFilterSize; if (!initHashSet(&hostAnchoredHashSet, buffer + pos, hostAnchoredHashSetSize)) { - return false; + return false; } pos += hostAnchoredHashSetSize; if (!initHashSet(&hostAnchoredExceptionHashSet, buffer + pos, hostAnchoredExceptionHashSetSize)) { - return false; + return false; } pos += hostAnchoredExceptionHashSetSize; if (!initHashSet(&noFingerprintDomainHashSet, buffer + pos, noFingerprintDomainHashSetSize)) { - return false; + return false; } pos += noFingerprintDomainHashSetSize; if (!initHashSet(&noFingerprintAntiDomainHashSet, buffer + pos, noFingerprintAntiDomainHashSetSize)) { - return false; + return false; } pos += noFingerprintAntiDomainHashSetSize; if (!initHashSet(&noFingerprintDomainExceptionHashSet, buffer + pos, noFingerprintDomainExceptionHashSetSize)) { - return false; + return false; } pos += noFingerprintDomainExceptionHashSetSize; if (!initHashSet(&noFingerprintAntiDomainExceptionHashSet, buffer + pos, noFingerprintAntiDomainExceptionHashSetSize)) { - return false; + return false; } pos += noFingerprintAntiDomainExceptionHashSetSize; + if (etldMatcher != nullptr) { + delete etldMatcher; + } + + if (etldMatcherBufSize == 0) { + etldMatcher = new Matcher(); + } else { + SerializedBuffer serializedmatcherBuffer = std::string( + buffer + pos, etldMatcherBufSize); + Matcher newMatcher = matcher_from_serialization(serializedmatcherBuffer); + etldMatcher = new Matcher(newMatcher); + } + return true; } diff --git a/ad_block_client.h b/ad_block_client.h index 5122780..ee11a5f 100644 --- a/ad_block_client.h +++ b/ad_block_client.h @@ -9,6 +9,7 @@ #include #include #include "./filter.h" +#include "etld/matcher.h" class CosmeticFilter; class BloomFilter; @@ -24,9 +25,9 @@ class AdBlockClient { ~AdBlockClient(); void clear(); -// bool parse(const char *input); bool parse(const char *input, bool preserveRules = false); - bool matches(const char* input, + void parsePublicSuffixRules(const char *input); + bool matches(const char *input, FilterOption contextOption = FONoFilterOption, const char* contextDomain = nullptr, Filter** matchedFilter = nullptr, @@ -110,6 +111,7 @@ class AdBlockClient { template bool initHashSet(HashSet**, char *buffer, int len); char *deserializedBuffer; + brave_etld::Matcher* etldMatcher = nullptr; }; extern std::set unknownOptions; diff --git a/ad_block_client_wrap.cc b/ad_block_client_wrap.cc index fe24fa0..93b5721 100644 --- a/ad_block_client_wrap.cc +++ b/ad_block_client_wrap.cc @@ -104,6 +104,8 @@ void AdBlockClientWrap::Init(Local exports) { // Prototype NODE_SET_PROTOTYPE_METHOD(tpl, "clear", AdBlockClientWrap::Clear); NODE_SET_PROTOTYPE_METHOD(tpl, "parse", AdBlockClientWrap::Parse); + NODE_SET_PROTOTYPE_METHOD(tpl, "parsePublicSuffixRules", + AdBlockClientWrap::ParsePublicSuffixRules); NODE_SET_PROTOTYPE_METHOD(tpl, "matches", AdBlockClientWrap::Matches); NODE_SET_PROTOTYPE_METHOD(tpl, "findMatchingFilters", AdBlockClientWrap::FindMatchingFilters); @@ -232,6 +234,17 @@ void AdBlockClientWrap::Parse(const FunctionCallbackInfo& args) { obj->parse(buffer, preserveRules); } +void AdBlockClientWrap::ParsePublicSuffixRules( + const FunctionCallbackInfo& args) { + Isolate* isolate = args.GetIsolate(); + String::Utf8Value str(isolate, args[0]->ToString()); + const char * buffer = *str; + + AdBlockClientWrap* obj = + ObjectWrap::Unwrap(args.Holder()); + obj->parsePublicSuffixRules(buffer); +} + void AdBlockClientWrap::Matches(const FunctionCallbackInfo& args) { Isolate* isolate = args.GetIsolate(); String::Utf8Value str(isolate, args[0]->ToString()); diff --git a/ad_block_client_wrap.h b/ad_block_client_wrap.h index 3978c5b..53529d0 100644 --- a/ad_block_client_wrap.h +++ b/ad_block_client_wrap.h @@ -28,6 +28,8 @@ class AdBlockClientWrap : public AdBlockClient, public node::ObjectWrap { static void Clear(const v8::FunctionCallbackInfo& args); static void Parse(const v8::FunctionCallbackInfo& args); + static void ParsePublicSuffixRules( + const v8::FunctionCallbackInfo& args); static void Matches(const v8::FunctionCallbackInfo& args); static void Serialize(const v8::FunctionCallbackInfo& args); static void Deserialize(const v8::FunctionCallbackInfo& args); diff --git a/binding.gyp b/binding.gyp index fe26fc8..283b48c 100644 --- a/binding.gyp +++ b/binding.gyp @@ -19,6 +19,19 @@ "no_fingerprint_domain.h", "protocol.cc", "protocol.h", + "etld/domain.cc", + "etld/domain.h", + "etld/matcher.cc", + "etld/matcher.h", + "etld/internal/parser.cc", + "etld/internal/parser.h", + "etld/internal/public_suffix_rule_set.cc", + "etld/internal/public_suffix_rule_set.h", + "etld/internal/public_suffix_rule.cc", + "etld/internal/public_suffix_rule.h", + "etld/serialization.cc", + "etld/serialization.h", + "etld/types.h", "./node_modules/bloom-filter-cpp/BloomFilter.cpp", "./node_modules/bloom-filter-cpp/BloomFilter.h", "./node_modules/bloom-filter-cpp/hashFn.cpp", diff --git a/brave/BUILD.gn b/brave/BUILD.gn index 37a4ba3..9c95ffd 100644 --- a/brave/BUILD.gn +++ b/brave/BUILD.gn @@ -26,6 +26,19 @@ source_set("ad-block") { "../no_fingerprint_domain.h", "../protocol.cc", "../protocol.h", + "../etld/domain.cc", + "../etld/domain.h", + "../etld/matcher.cc", + "../etld/matcher.h", + "../etld/internal/parser.cc", + "../etld/internal/parser.h", + "../etld/internal/public_suffix_rule.cc", + "../etld/internal/public_suffix_rule.h", + "../etld/internal/public_suffix_rule_set.cc", + "../etld/internal/public_suffix_rule_set.h", + "../etld/serialization.cc", + "../etld/serialization.h", + "../etld/types.h", ] deps = [ diff --git a/etld/domain.cc b/etld/domain.cc new file mode 100644 index 0000000..916a7dc --- /dev/null +++ b/etld/domain.cc @@ -0,0 +1,67 @@ +/* Copyright (c) 2019 The Brave Software Team. Distributed under the MPL2 + * license. This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "etld/domain.h" +#include +#include + +using ::std::string; +using ::std::stringstream; +using ::std::vector; + +namespace brave_etld { + +Domain::Domain(const vector