【www.gdgbn.com--中文酷站】
结构:首字散列表、trie索引树结点
优点:分词中,不需预知待查询词的长度,沿树链逐字匹配。
缺点:构造和维护比较复杂,单词树枝多,浪费了一定的空间
* @version 0.1
* @todo 构造通用的字典算法,并写了一个简易的分词
* @author shjuto@gmail.com
* trie字典树
*
*/
class trie
{
private $trie;
function __construct()
{
$trie = array("children" => array(),"isword"=>false);
}
/**
* 把词加入词典
*
* @param string $key
*/
function &setword($word="")
{
$trienode = &$this->trie;
for($i = 0;$i < strlen($word);$i++)
{
$character = $word[$i];
if(!isset($trienode["children"][$character]))
{
$trienode["children"][$character] = array("isword"=>false);
}
if($i == strlen($word)-1)
{
$trienode["children"][$character] = array("isword"=>true);
}
$trienode = &$trienode["children"][$character];
}
}
/**
* 判断是否为词典词
*
* @param string $word
* @return bool true/false
*/
function & isword($word)
{
$trienode = &$this->trie;
for($i = 0;$i < strlen($word);$i++)
{
$character = $word[$i];
if(!isset($trienode["children"][$character]))
&