DFA.php 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. <?php
  2. /**
  3. * DFA 关键词过滤
  4. * @link https://blog.csdn.net/u013303402/article/details/79218554
  5. *
  6. * @useage:
  7. *
  8. * $filter = new \Qii\Library\SensitiveWords\DFA();
  9. * $filter->setDefaultKeywords();
  10. * $result = $filter->searchKey('关键词');
  11. *
  12. */
  13. namespace Qii\Library\SensitiveWords;
  14. class DFA
  15. {
  16. private $arrHashMap = [];
  17. public function getHashMap()
  18. {
  19. return $this->arrHashMap;
  20. }
  21. /**
  22. * 设置默认词库
  23. *
  24. */
  25. public function setDefaultKeywords()
  26. {
  27. $this->setTreeByFile(__DIR__ . DS . 'Words.txt');
  28. }
  29. /**
  30. * 设置默认词库
  31. *
  32. * @param string $file 文件路径
  33. *
  34. * @throws \Exception
  35. */
  36. public function setTreeByFile($file)
  37. {
  38. if(!is_file($file)) throw new \Exception('敏感词文件不存在', __LINE__);
  39. $fp = fopen($file, 'ra');
  40. if(!$fp) throw new \Exception('读取文件失败', __LINE__);
  41. while(!feof($fp))
  42. {
  43. $keyword = trim(fgets($fp));
  44. $this->addKeyWord($keyword);
  45. }
  46. }
  47. public function addKeyWord($strWord)
  48. {
  49. $len = mb_strlen($strWord, 'UTF-8');
  50. // 传址
  51. $arrHashMap = &$this->arrHashMap;
  52. for ($i = 0; $i < $len; $i++) {
  53. $word = mb_substr($strWord, $i, 1, 'UTF-8');
  54. // 已存在
  55. if (isset($arrHashMap[$word])) {
  56. if ($i == ($len - 1)) {
  57. $arrHashMap[$word]['end'] = 1;
  58. }
  59. } else {
  60. // 不存在
  61. if ($i == ($len - 1)) {
  62. $arrHashMap[$word] = [];
  63. $arrHashMap[$word]['end'] = 1;
  64. } else {
  65. $arrHashMap[$word] = [];
  66. $arrHashMap[$word]['end'] = 0;
  67. }
  68. }
  69. // 传址
  70. $arrHashMap = &$arrHashMap[$word];
  71. }
  72. }
  73. public function searchKey($strWord)
  74. {
  75. $len = mb_strlen($strWord, 'UTF-8');
  76. $arrHashMap = $this->arrHashMap;
  77. for ($i = 0; $i < $len; $i++) {
  78. $word = mb_substr($strWord, $i, 1, 'UTF-8');
  79. if (!isset($arrHashMap[$word])) {
  80. // reset hashmap
  81. $arrHashMap = $this->arrHashMap;
  82. continue;
  83. }
  84. if ($arrHashMap[$word]['end']) {
  85. return true;
  86. }
  87. $arrHashMap = $arrHashMap[$word];
  88. }
  89. return false;
  90. }
  91. }