复制代码 代码如下: /** * 判断是否为搜索引擎蜘蛛 * * @author Eddy * @return bool */ function isCrawler() { $agent= strtolower($_SERVER['HTTP_USER_AGENT']); if (!empty($agent)) { $spiderSite= array( "TencentTraveler", "Baiduspider+", "BaiduGame", "Googlebot", "msnbot", "Sosospider+", "Sogou web spider", "ia_archiver", "Yahoo! Slurp", "YoudaoBot", "Yahoo Slurp", "MSNBot", "Java (Often spam bot)", "BaiDuSpider", "Voila", "Yandex bot", "BSpider", "twiceler", "Sogou Spider", "Speedy Spider", "Google AdSense", "Heritrix", "Python-urllib", "Alexa (IA Archiver)", "Ask", "Exabot", "Custo", "OutfoxBot/YodaoBot", "yacy", "SurveyBot", "legs", "lwp-trivial", "Nutch", "StackRambler", "The web archive (IA Archiver)", "Perl tool", "MJ12bot", "Netcraft", "MSIECrawler", "WGet tools", "larbin", "Fish search", ); foreach($spiderSite as $val) { $str = strtolower($val); if (strpos($agent, $str) !== false) { return true; } } } else { return false; } }
网上倒是能搜到一大把,不过都是千篇一律的复制来复制去的,也不够全面,我这里整理了一份比较全面的代码:
复制代码 代码如下: function is_spider(){ $robot = 0; $USER_AGENT = strtolower($_SERVER['HTTP_USER_AGENT']); if(strpos($USER_AGENT,"bot")) $robot = 1; if(strpos($USER_AGENT,"spider")) $robot = 1; if(strpos($USER_AGENT,"slurp")) $robot = 1; if(strpos($USER_AGENT,"mediapartners-google")) $robot = 1; if(strpos($USER_AGENT,"fast-webcrawler")) $robot = 1; if(strpos($USER_AGENT,"altavista")) $robot = 1; if(strpos($USER_AGENT,"ia_archiver")) $robot = 1; if($robot == 1){ //do something } return ''; }
|