diff --git a/libraries/src/Environment/Browser.php b/libraries/src/Environment/Browser.php index ade1f2e9ea3d9..440dd18f0c40a 100644 --- a/libraries/src/Environment/Browser.php +++ b/libraries/src/Environment/Browser.php @@ -76,54 +76,371 @@ class Browser * @since 12.1 */ protected $robots = array( - // The most common ones. - 'Googlebot', - 'msnbot', + 'Googlebot\/', + 'Googlebot-Mobile', + 'Googlebot-Image', + 'Googlebot-News', + 'Googlebot-Video', + 'AdsBot-Google([^-]|$)', + 'AdsBot-Google-Mobile', + 'Feedfetcher-Google', + 'Mediapartners-Google', + 'Mediapartners \(Googlebot\)', + 'APIs-Google', + 'bingbot', 'Slurp', - 'Yahoo', - // The rest alphabetically. - 'Arachnoidea', - 'ArchitextSpider', - 'Ask Jeeves', - 'B-l-i-t-z-Bot', - 'Baiduspider', - 'BecomeBot', - 'cfetch', - 'ConveraCrawler', - 'ExtractorPro', + '[wW]get', + 'curl', + 'LinkedInBot', + 'Python-urllib', + 'python-requests', + 'libwww', + 'httpunit', + 'nutch', + 'Go-http-client', + 'phpcrawl', + 'msnbot', + 'jyxobot', 'FAST-WebCrawler', - 'FDSE robot', - 'fido', - 'geckobot', + 'FAST Enterprise Crawler', + 'BIGLOTRON', + 'Teoma', + 'convera', + 'seekbot', 'Gigabot', - 'Girafabot', - 'grub-client', - 'Gulliver', - 'HTTrack', + 'Gigablast', + 'exabot', 'ia_archiver', - 'InfoSeek', - 'kinjabot', - 'KIT-Fireball', - 'larbin', - 'LEIA', - 'lmspider', - 'Lycos_Spider', - 'Mediapartners-Google', - 'MuscatFerret', - 'NaverBot', - 'OmniExplorer_Bot', - 'polybot', - 'Pompos', - 'Scooter', - 'Teoma', - 'TheSuBot', - 'TurnitinBot', - 'Ultraseek', - 'ViolaBot', - 'webbandit', - 'www.almaden.ibm.com/cs/crawler', - 'yandex.com/bots', - 'ZyBorg', + 'GingerCrawler', + 'webmon ', + 'HTTrack', + 'grub.org', + 'UsineNouvelleCrawler', + 'antibot', + 'netresearchserver', + 'speedy', + 'fluffy', + 'bibnum.bnf', + 'findlink', + 'msrbot', + 'panscient', + 'yacybot', + 'AISearchBot', + 'ips-agent', + 'tagoobot', + 'MJ12bot', + 'woriobot', + 'yanga', + 'buzzbot', + 'mlbot', + 'YandexBot', + 'yandex.com\/bots', + 'purebot', + 'Linguee Bot', + 'CyberPatrol', + 'voilabot', + 'Baiduspider', + 'citeseerxbot', + 'spbot', + 'twengabot', + 'postrank', + 'turnitinbot', + 'scribdbot', + 'page2rss', + 'sitebot', + 'linkdex', + 'Adidxbot', + 'blekkobot', + 'ezooms', + 'dotbot', + 'Mail.RU_Bot', + 'discobot', + 'heritrix', + 'findthatfile', + 'europarchive.org', + 'NerdByNature.Bot', + 'sistrix crawler', + 'Ahrefs(Bot|SiteAudit)', + 'fuelbot', + 'CrunchBot', + 'centurybot9', + 'IndeedBot', + 'mappydata', + 'woobot', + 'ZoominfoBot', + 'PrivacyAwareBot', + 'Multiviewbot', + 'SWIMGBot', + 'Grobbot', + 'eright', + 'Apercite', + 'semanticbot', + 'Aboundex', + 'domaincrawler', + 'wbsearchbot', + 'summify', + 'CCBot', + 'edisterbot', + 'seznambot', + 'ec2linkfinder', + 'gslfbot', + 'aiHitBot', + 'intelium_bot', + 'facebookexternalhit', + 'Yeti', + 'RetrevoPageAnalyzer', + 'lb-spider', + 'Sogou', + 'lssbot', + 'careerbot', + 'wotbox', + 'wocbot', + 'ichiro', + 'DuckDuckBot', + 'lssrocketcrawler', + 'drupact', + 'webcompanycrawler', + 'acoonbot', + 'openindexspider', + 'gnam gnam spider', + 'web-archive-net.com.bot', + 'backlinkcrawler', + 'coccoc', + 'integromedb', + 'content crawler spider', + 'toplistbot', + 'it2media-domain-crawler', + 'ip-web-crawler.com', + 'siteexplorer.info', + 'elisabot', + 'proximic', + 'changedetection', + 'arabot', + 'WeSEE:Search', + 'niki-bot', + 'CrystalSemanticsBot', + 'rogerbot', + '360Spider', + 'psbot', + 'InterfaxScanBot', + 'CC Metadata Scaper', + 'g00g1e.net', + 'GrapeshotCrawler', + 'urlappendbot', + 'brainobot', + 'fr-crawler', + 'binlar', + 'SimpleCrawler', + 'Twitterbot', + 'cXensebot', + 'smtbot', + 'bnf.fr_bot', + 'A6-Indexer', + 'ADmantX', + 'Facebot', + 'OrangeBot\/', + 'memorybot', + 'AdvBot', + 'MegaIndex', + 'SemanticScholarBot', + 'ltx71', + 'nerdybot', + 'xovibot', + 'BUbiNG', + 'Qwantify', + 'archive.org_bot', + 'Applebot', + 'TweetmemeBot', + 'crawler4j', + 'findxbot', + 'S[eE][mM]rushBot', + 'yoozBot', + 'lipperhey', + 'Y!J', + 'Domain Re-Animator Bot', + 'AddThis', + 'Screaming Frog SEO Spider', + 'MetaURI', + 'Scrapy', + 'Livelap[bB]ot', + 'OpenHoseBot', + 'CapsuleChecker', + 'collection@infegy.com', + 'IstellaBot', + 'DeuSu\/', + 'betaBot', + 'Cliqzbot\/', + 'MojeekBot\/', + 'netEstate NE Crawler', + 'SafeSearch microdata crawler', + 'Gluten Free Crawler\/', + 'Sonic', + 'Sysomos', + 'Trove', + 'deadlinkchecker', + 'Slack-ImgProxy', + 'Embedly', + 'RankActiveLinkBot', + 'iskanie', + 'SafeDNSBot', + 'SkypeUriPreview', + 'Veoozbot', + 'Slackbot', + 'redditbot', + 'datagnionbot', + 'Google-Adwords-Instant', + 'adbeat_bot', + 'WhatsApp', + 'contxbot', + 'pinterest', + 'electricmonk', + 'GarlikCrawler', + 'BingPreview\/', + 'vebidoobot', + 'FemtosearchBot', + 'Yahoo Link Preview', + 'MetaJobBot', + 'DomainStatsBot', + 'mindUpBot', + 'Daum\/', + 'Jugendschutzprogramm-Crawler', + 'Xenu Link Sleuth', + 'Pcore-HTTP', + 'moatbot', + 'KosmioBot', + 'pingdom', + 'PhantomJS', + 'Gowikibot', + 'PiplBot', + 'Discordbot', + 'TelegramBot', + 'Jetslide', + 'newsharecounts', + 'James BOT', + 'Barkrowler', + 'TinEye', + 'SocialRankIOBot', + 'trendictionbot', + 'Ocarinabot', + 'epicbot', + 'Primalbot', + 'DuckDuckGo-Favicons-Bot', + 'GnowitNewsbot', + 'Leikibot', + 'LinkArchiver', + 'YaK\/', + 'PaperLiBot', + 'Digg Deeper', + 'dcrawl', + 'Snacktory', + 'AndersPinkBot', + 'Fyrebot', + 'EveryoneSocialBot', + 'Mediatoolkitbot', + 'Luminator-robots', + 'ExtLinksBot', + 'SurveyBot', + 'NING\/', + 'okhttp', + 'Nuzzel', + 'omgili', + 'PocketParser', + 'YisouSpider', + 'um-LN', + 'ToutiaoSpider', + 'MuckRack', + 'Jamie\'s Spider', + 'AHC\/', + 'NetcraftSurveyAgent', + 'Laserlikebot', + 'Apache-HttpClient', + 'AppEngine-Google', + 'Jetty', + 'Upflow', + 'Thinklab', + 'Traackr.com', + 'Twurly', + 'Mastodon', + 'http_get', + 'DnyzBot', + 'botify', + '007ac9 Crawler', + 'BehloolBot', + 'BrandVerity', + 'check_http', + 'BDCbot', + 'ZumBot', + 'EZID', + 'ICC-Crawler', + 'ArchiveBot', + '^LCC ', + 'filterdb.iss.net\/crawler', + 'BLP_bbot', + 'BomboraBot', + 'Buck\/', + 'Companybook-Crawler', + 'Genieo', + 'magpie-crawler', + 'MeltwaterNews', + 'Moreover', + 'newspaper\/', + 'ScoutJet', + '(^| )sentry\/', + 'StorygizeBot', + 'UptimeRobot', + 'OutclicksBot', + 'seoscanners', + 'Hatena', + 'Google Web Preview', + 'MauiBot', + 'AlphaBot', + 'SBL-BOT', + 'IAS crawler', + 'adscanner', + 'Netvibes', + 'acapbot', + 'Baidu-YunGuanCe', + 'bitlybot', + 'blogmuraBot', + 'Bot.AraTurka.com', + 'bot-pge.chlooe.com', + 'BoxcarBot', + 'BTWebClient', + 'ContextAd Bot', + 'Digincore bot', + 'Disqus', + 'Feedly', + 'Fetch\/', + 'Fever', + 'Flamingo_SearchEngine', + 'FlipboardProxy', + 'g2reader-bot', + 'imrbot', + 'K7MLWCBot', + 'Kemvibot', + 'Landau-Media-Spider', + 'linkapediabot', + 'vkShare', + 'Siteimprove.com', + 'BLEXBot\/', + 'DareBoost', + 'ZuperlistBot\/', + 'Miniflux\/', + 'Feedspotbot\/', + 'Diffbot\/', + 'SEOkicks', + 'tracemyfile', + 'Nimbostratus-Bot', + 'zgrab', + 'PR-CY.RU', + 'AdsTxtCrawler', + 'Datafeedwatch', + 'Zabbix', + 'TangibleeBot', + 'google-xrawler', + 'axios', + 'Amazon CloudFront', + 'Pulsepoint', ); /** @@ -661,7 +978,7 @@ public function isRobot() { foreach ($this->robots as $robot) { - if (strpos($this->agent, $robot) !== false) + if (preg_match('/' . $robot . '/', $this->agent)) { return true; }