PHP判断是否为百度蜘蛛进行DEDE相关词采集

DEDE 2019-10-13 21:37:01 评论
<p>更多相关:{dede:tag row='30' sort='new' getall='0'}<a href='/ss-[field:tag /]/' target="_blank">[field:tag /]</a>&nbsp;&nbsp;{/dede:tag}
{dede:php}
if (preg_match("#(baidu)#si", $_SERVER['HTTP_USER_AGENT'])) {
$keyword= $refObj->Fields['title'];
$keyword = htmlspecialchars($keyword);
$ch = curl_init(); //初始化
curl_setopt($ch, CURLOPT_URL, "http://www.baidu.com/s?wd=".urlencode($keyword));  //设置选项,包括URL
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 10); //PHP获取超时
$contents = curl_exec($ch);    //执行并获取HTML文档内容
curl_close($ch);      //释放curl句柄
preg_match_all ('/rs_src=0S+">([^"]+)</a>/',$contents,$baidugjc);
foreach($baidugjc[1] as $k=>$v){
$name.='<a href="/ss-'.urlencode($v).'/">'.$v.'</a> ';
}
echo $name;
}
else{
echo '载入中...';
}
{/dede:php}
</p>

DEDE搜索页相关词采集

<p style="padding:5px 0px"><b>相关搜索:</b></p>
{dede:global name='keyword' runphp="yes"}
if (preg_match("#(baidu)#si", $_SERVER['HTTP_USER_AGENT'])) {
$keyword = @me;
$keyword = htmlspecialchars($keyword);
$ch = curl_init(); //初始化
curl_setopt($ch, CURLOPT_URL, "http://www.baidu.com/s?wd=".urlencode($keyword));  //设置选项,包括URL
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 10); //PHP获取超时
$contents = curl_exec($ch);    //执行并获取HTML文档内容
curl_close($ch);      //释放curl句柄
preg_match_all ('/rs_src=0S+">([^"]+)</a>/',$contents,$baidugjc);
foreach($baidugjc[1] as $k=>$v){
$name.='<li><a href="/ss-'.urlencode($v).'/">'.$v.'</a></li>';
}
@me = $name;
}
else{
@me = '载入中...';
}
{/dede:global}
<p style="clear:both"></p>

关键词调用

<div class="sidebar">
<h3><span>关键词列表</span></h3>
{dede:php}
$a=array(
'养生按摩','养生'
);
shuffle($a); 
foreach ($a as $i=>$k) {
    if ($i > 30) { 
break; 
echo '<a href="/ss-'.urlencode($k).'/" title="'.$k.'" class="tag'.rand(1,9).'">'.$k.'</a> ';
}
{/dede:php}
</div>

百度当天热门词

<div class="sidebar plus">
<h3><span>当日热门搜索</span></h3>
<ul>
{dede:php}
$ch = curl_init(); //初始化
curl_setopt($ch, CURLOPT_URL, "http://top.baidu.com/buzz?b=1&fr=topindex");  //设置选项,包括URL
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_USERAGENT, 'Baiduspider+(+http://www.baidu.com/search/spider.htm)'); //伪装成百度蜘蛛
curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR: 202.108.11.8', 'CLIENT-IP: 202.108.11.8')); //伪装百度蜘蛛ip地址
curl_setopt($ch, CURLOPT_TIMEOUT, 10); //PHP获取超时
$baiduci = curl_exec($ch);    //执行并获取HTML文档内容
curl_close($ch);      //释放curl句柄
//iconv('utf-8','gbk', $baiduci);
$baiduci = mb_convert_encoding($baiduci, 'utf-8', 'GBK,UTF-8,ASCII');
preg_match_all ('/<a class="list-title"([^\>]+)>([^\"]+)<\/a>/',$baiduci,$ci);
foreach ($ci[2] as $value) {
echo "<li><a href='ss-".urlencode($value)."/'>".$value."</a></li>";
}
{/dede:php}
</ul>
</div>

评论

说点什么吧
  • 全部评论(0
    还没有评论,快来抢沙发吧!