【www.gdgbn.com--word】

php教程自动获取关键字代码

$mincipin=5;//最小词频

$minlen=4;//关键字最小长度

tiqukeyword($tiqustr,$minlen,$mincipin);

function tiqukeyword($tiqustr,$minlen,$mincipin)

{$strlong=strlen($tiqustr);

$arr=array();

$k=-1;

for($i=0;$i<($strlong-$mincipin*$minlen);$i++){

$end=ceil(($strlong-$i)/$mincipin+$i);

for($j=$minlen;$j<$end;$j++){$num=0;

if(($guanjianzi=substr($tiqustr,$i,$j))!==false){

$wz=$i+$j;

$num++;}

else{break;}

while($wz<$strlong){if(($wz=strpos($tiqustr,$guanjianzi,$wz))!==false)

{$num++;

$wz=$wz+strlen($guanjianzi);}

else break;

}

if($j==$minlen){

if($num>=$mincipin){$maxnum=$num;$k++;$str=substr($tiqustr,$i,$j);

$arr[$k]=array($i,$j,$str,$num,0);

}

else{break;}
}

else{

if($num>=$maxnum){

$maxnum=$num;

$str=substr($tiqustr,$i,$j);

$arr[$k]=array($i,$j,$str,$num,0);

}

else break;

}
}

}

echo "初步得到的数组:";

print_r($arr);

//echo "";

 

$arrlong=count($arr);

for($i=0;$i<$arrlong;$i++){

$bjarr=$arr[$i];

$nowid=$i;

if($bjarr[4]==1)continue;

for($j=$i+1;$j<$arrlong;$j++){

if($arr[$j][4]==1)continue;

$qujianks=$bjarr[0];

$qujianjs=$bjarr[1]+$bjarr[0]-1;

$a=$arr[$j][0];

$b=$arr[$j][1]+$arr[$j][0]-1;

if(($bjarr[2]==$arr[$j][2])&&($bjarr[3]>$arr[$j][3]))$arr[$j][4]=1;

 

if($a<=$qujianks&&$qujianks<=$b&&$a<=$qujianjs&&$qujianjs<=$b)

{if($bjarr[3]<=$arr[$j][3]){

$arr[$nowid][4]=1;$nowid=$j;$bjarr=$arr[$j];
}
}

elseif($qujianks<=$a&&$a<=$qujianjs&&$qujianks<=$b&&$b<=$qujianjs){

$arr[$j][4]=1;
}

}

}

 

echo "重叠加标记后的数组:";

print_r($arr);

 

$jieguoarr=array();

for($i=0;$i<$arrlong;$i++)

{if($arr[$i][4]==0)$jieguoarr[]=$arr[$i];

 

}

echo "";

echo "最后得到的数组:";

print_r($jieguoarr);

}

 

本文来源:http://www.gdgbn.com/bangongshuma/29742/