aboutsummaryrefslogtreecommitdiffstats
path: root/library/spam/b8/degenerator
diff options
context:
space:
mode:
Diffstat (limited to 'library/spam/b8/degenerator')
-rw-r--r--library/spam/b8/degenerator/degenerator_default.php127
1 files changed, 0 insertions, 127 deletions
diff --git a/library/spam/b8/degenerator/degenerator_default.php b/library/spam/b8/degenerator/degenerator_default.php
deleted file mode 100644
index 4ff6d882b..000000000
--- a/library/spam/b8/degenerator/degenerator_default.php
+++ /dev/null
@@ -1,127 +0,0 @@
-<?php
-
-# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
-#
-# This file is part of the b8 package
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Tobias Leupold
- */
-
-class b8_degenerator_default
-{
-
- public $degenerates = array();
-
- /**
- * Generates a list of "degenerated" words for a list of words.
- *
- * @access public
- * @param array $tokens
- * @return array An array containing an array of degenerated tokens for each token
- */
-
- public function degenerate(array $words)
- {
-
- $degenerates = array();
-
- foreach($words as $word)
- $degenerates[$word] = $this->_degenerate_word($word);
-
- return $degenerates;
-
- }
-
- /**
- * If the original word is not found in the database then
- * we build "degenerated" versions of the word to lookup.
- *
- * @access private
- * @param string $word
- * @return array An array of degenerated words
- */
-
- protected function _degenerate_word($word)
- {
-
- # Check for any stored words so the process doesn't have to repeat
- if(isset($this->degenerates[$word]) === TRUE)
- return $this->degenerates[$word];
-
- $degenerate = array();
-
- # Add different version of upper and lower case and ucfirst
- array_push($degenerate, strtolower($word));
- array_push($degenerate, strtoupper($word));
- array_push($degenerate, ucfirst($word));
-
- # Degenerate all versions
-
- foreach($degenerate as $alt_word) {
-
- # Look for stuff like !!! and ???
-
- if(preg_match('/[!?]$/', $alt_word) > 0) {
-
- # Add versions with different !s and ?s
-
- if(preg_match('/[!?]{2,}$/', $alt_word) > 0) {
- $tmp = preg_replace('/([!?])+$/', '$1', $alt_word);
- array_push($degenerate, $tmp);
- }
-
- $tmp = preg_replace('/([!?])+$/', '', $alt_word);
- array_push($degenerate, $tmp);
-
- }
-
- # Look for ... at the end of the word
-
- $alt_word_int = $alt_word;
-
- while(preg_match('/[\.]$/', $alt_word_int) > 0) {
- $alt_word_int = substr($alt_word_int, 0, strlen($alt_word_int) - 1);
- array_push($degenerate, $alt_word_int);
- }
-
- }
-
- # Some degenerates are the same as the original word. These don't have
- # to be fetched, so we create a new array with only new tokens
-
- $real_degenerate = array();
-
- foreach($degenerate as $deg_word) {
- if($word != $deg_word)
- array_push($real_degenerate, $deg_word);
- }
-
- # Store the list of degenerates for the token
- $this->degenerates[$word] = $real_degenerate;
-
- return $real_degenerate;
-
- }
-
-}
-
-?> \ No newline at end of file