123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- <?php
- /**
- * Class Trie
- * 关键词过滤
- *
- * 使用方法
- * $trie = new Qii\Library\Trie();
- * $arr = file('test.txt');
- * $trie->setWords($arr);
- * $trie->search('过滤词aha');
- */
- namespace Qii\Library;
- class Trie
- {
- private $trie;
-
- public function __construct()
- {
- $this->trie = array('children' => array(), 'isWord' => false);
- }
-
- /**
- * 设置过滤的词
- *
- * @param string $word 过滤词
- */
- public function setWord($word)
- {
- $word = trim($word);
- $trieNode = &$this->trie;
- for ($i = 0; $i < strlen($word); $i++) {
- $character = $word[$i];
- if (!isset($trieNode['children'][$character])) {
- $trieNode['children'][$character] = array('isWord' => false);
- }
- if ($i == strlen($word) - 1) {
- $trieNode['children'][$character] = array('isWord' => true);
- }
- $trieNode = &$trieNode['children'][$character];
- }
- }
-
- /**
- * 设置过滤词
- *
- * @param string $words 过滤词
- */
- public function setWords($words)
- {
- if (!is_array($words)) {
- return;
- }
-
- foreach ($words as $word) {
- $this->setWord($word);
- }
- }
-
- /**
- * 是否是过滤的词
- *
- * @param string $word
- * @return bool
- */
- public function isWord($word)
- {
- $trieNode = &$this->trie;
- for ($i = 0; $i < strlen($word); $i++) {
- $character = $word[$i];
- if (!isset($trieNode['children'][$character])) {
- return false;
- } else {
- if ($i == (strlen($word) - 1) && $trieNode['children'][$character]['isWord'] == true) {
- return true;
- } elseif ($i == (strlen($word) - 1) && $trieNode['children'][$character]['isWord'] == false) {
- return false;
- }
- $trieNode = &$trieNode['children'][$character];
- }
- }
- }
-
- /**
- * 查找哪些词是在过滤列表中
- *
- * @param string $text
- * @return array
- */
- public function search($text = "")
- {
- $textLen = strlen($text);
- $trieNode = $tree = $this->trie;
- $find = array();
- $wordRootPosition = 0;
- $preNode = false;
- $word = '';
- for ($i = 0; $i < $textLen; $i++) {
- if (isset($trieNode['children'][$text[$i]])) {
- $word = $word . $text[$i];
- $trieNode = $trieNode['children'][$text[$i]];
- if ($preNode == false) {
- $wordRootPosition = $i;
- }
- $preNode = true;
- if ($trieNode['isWord']) {
- $find[] = array('position' => $wordRootPosition, 'word' => $word);
- }
- } else {
- $trieNode = $tree;
- $word = '';
- if ($preNode) {
- $i = $i - 1;
- $preNode = false;
- }
- }
- }
- return $find;
- }
-
- /**
- * 匹配最长长度
- *
- * @param string $text
- * @return mixed
- */
- public function searchMax($text)
- {
- $textLen = strlen($text);
- $trieNode = $tree = $this->trie;
- $find = array();
- $wordRootPosition = 0;
- $preNode = false;
- $word = '';
- for ($i = 0; $i < $textLen; $i++) {
- if (isset($trieNode['children'][$text[$i]])) {
- $word = $word . $text[$i];
- $trieNode = $trieNode['children'][$text[$i]];
- if ($preNode == false) {
- $wordRootPosition = $i;
- }
- $preNode = true;
- if ($trieNode['isWord']) {
- $find[] = array('position' => $wordRootPosition, 'word' => $word);
- }
- } else {
- $trieNode = $tree;
- $word = '';
- if ($preNode) {
- $i = $i - 1;
- $preNode = false;
- }
- }
- }
- $n = count($find) - 1;
- return $find[$n];
- }
- }
|