diff options
author | Andrew Manning <tamanning@zoho.com> | 2016-06-02 22:32:50 -0400 |
---|---|---|
committer | Andrew Manning <tamanning@zoho.com> | 2016-06-02 22:32:50 -0400 |
commit | b93e398674b375a3b14718fc6dd2a815aad9b387 (patch) | |
tree | 7c2a8097e1c90a87cc8207b5fe08a064f4fa3ae8 /library/spam/b8 | |
parent | b70c6809648bb3c78e5e26f9293727b3a7aa4025 (diff) | |
parent | f9075e2a2feca0f37fdf568be6e6e53460aa9034 (diff) | |
download | volse-hubzilla-b93e398674b375a3b14718fc6dd2a815aad9b387.tar.gz volse-hubzilla-b93e398674b375a3b14718fc6dd2a815aad9b387.tar.bz2 volse-hubzilla-b93e398674b375a3b14718fc6dd2a815aad9b387.zip |
Merge remote-tracking branch 'upstream/dev' into wiki
Diffstat (limited to 'library/spam/b8')
-rw-r--r-- | library/spam/b8/b8.php | 503 | ||||
-rw-r--r-- | library/spam/b8/b8.php.ORIG | 503 | ||||
-rw-r--r-- | library/spam/b8/degenerator/degenerator_default.php | 127 | ||||
-rw-r--r-- | library/spam/b8/lexer/lexer_default.php | 205 | ||||
-rw-r--r-- | library/spam/b8/storage/storage_base.php | 396 | ||||
-rw-r--r-- | library/spam/b8/storage/storage_base.php.ORIG | 395 | ||||
-rw-r--r-- | library/spam/b8/storage/storage_dba.php | 198 | ||||
-rw-r--r-- | library/spam/b8/storage/storage_frndc.php | 318 | ||||
-rw-r--r-- | library/spam/b8/storage/storage_mysql.php | 351 |
9 files changed, 0 insertions, 2996 deletions
diff --git a/library/spam/b8/b8.php b/library/spam/b8/b8.php deleted file mode 100644 index 28a3dd29f..000000000 --- a/library/spam/b8/b8.php +++ /dev/null @@ -1,503 +0,0 @@ -<?php - -# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> -# -# b8 - A Bayesian spam filter written in PHP 5 -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation in version 2.1 of the License. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -/** - * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> - * - * @license LGPL - * @access public - * @package b8 - * @author Tobias Leupold - * @author Oliver Lillie (aka buggedcom) (original PHP 5 port) - */ - -class b8 -{ - - public $config = array( - 'min_size' => 3, - 'max_size' => 30, - 'allow_numbers' => FALSE, - 'lexer' => 'default', - 'degenerator' => 'default', - 'storage' => 'dba', - 'use_relevant' => 15, - 'min_dev' => 0.2, - 'rob_s' => 0.3, - 'rob_x' => 0.5 - ); - - private $_lexer = NULL; - private $_database = NULL; - private $_token_data = NULL; - - const SPAM = 'spam'; - const HAM = 'ham'; - const LEARN = 'learn'; - const UNLEARN = 'unlearn'; - - const STARTUP_FAIL_DATABASE = 'STARTUP_FAIL_DATABASE'; - const STARTUP_FAIL_LEXER = 'STARTUP_FAIL_LEXER'; - const TRAINER_CATEGORY_FAIL = 'TRAINER_CATEGORY_FAIL'; - - /** - * Constructs b8 - * - * @access public - * @return void - */ - - function __construct($config = array(), $database_config) - { - - # Validate config data - - if(count($config) > 0) { - - foreach ($config as $name=>$value) { - - switch($name) { - - case 'min_dev': - case 'rob_s': - case 'rob_x': - $this->config[$name] = (float) $value; - break; - - case 'min_size': - case 'max_size': - case 'use_relevant': - $this->config[$name] = (int) $value; - break; - - case 'allow_numbers': - $this->config[$name] = (bool) $value; - break; - - case 'lexer': - $value = (string) strtolower($value); - $this->config[$name] = is_file(dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . "lexer_" . $value . '.php') === TRUE ? $value : 'default'; - break; - - case 'storage': - $this->config[$name] = (string) $value; - break; - - } - - } - - } - - # Setup the database backend - - # Get the basic storage class used by all backends - if($this->load_class('b8_storage_base', dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_base.php') === FALSE) - return; - - # Get the degenerator we need - if($this->load_class('b8_degenerator_' . $this->config['degenerator'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'degenerator' . DIRECTORY_SEPARATOR . 'degenerator_' . $this->config['degenerator'] . '.php') === FALSE) - return; - - # Get the actual storage backend we need - if($this->load_class('b8_storage_' . $this->config['storage'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_' . $this->config['storage'] . '.php') === FALSE) - return; - - # Setup the backend - $class = 'b8_storage_' . $this->config['storage']; - $this->_database = new $class( - $database_config, - $this->config['degenerator'], date('ymd') - ); - - # Setup the lexer class - - if($this->load_class('b8_lexer_' . $this->config['lexer'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . 'lexer_' . $this->config['lexer'] . '.php') === FALSE) - return; - - $class = 'b8_lexer_' . $this->config['lexer']; - $this->_lexer = new $class( - array( - 'min_size' => $this->config['min_size'], - 'max_size' => $this->config['max_size'], - 'allow_numbers' => $this->config['allow_numbers'] - ) - ); - - } - - /** - * Load a class file if a class has not been defined yet. - * - * @access public - * @return boolean Returns TRUE if everything is okay, otherwise FALSE. - */ - - public function load_class($class_name, $class_file) - { - - if(class_exists($class_name, FALSE) === FALSE) { - - $included = require_once $class_file; - - if($included === FALSE or class_exists($class_name, FALSE) === FALSE) - return FALSE; - - } - - return TRUE; - - } - - /** - * Validates the class has all it needs to work. - * - * @access public - * @return mixed Returns TRUE if everything is okay, otherwise an error code. - */ - - public function validate() - { - - if($this->_database === NULL) - return self::STARTUP_FAIL_DATABASE; - - # Connect the database backend if we aren't connected yet - - elseif($this->_database->connected === FALSE) { - - $connection = $this->_database->connect(); - - if($connection !== TRUE) - return $connection; - - } - - if($this->_lexer === NULL) - return self::STARTUP_FAIL_LEXER; - - return TRUE; - - } - - /** - * Classifies a text - * - * @access public - * @package default - * @param string $text - * @return float The rating between 0 (ham) and 1 (spam) - */ - - public function classify($uid,$text) - { - - # Validate the startup - - $started_up = $this->validate(); - - if($started_up !== TRUE) - return $started_up; - - # Get the internal database variables, containing the number of ham and - # spam texts so the spam probability can be calculated in relation to them - $internals = $this->_database->get_internals($uid); - - # Calculate the spamminess of all tokens - - # Get all tokens we want to rate - - $tokens = $this->_lexer->get_tokens($text); - - # Check if the lexer failed - # (if so, $tokens will be a lexer error code, if not, $tokens will be an array) - if(!is_array($tokens)) - return $tokens; - - # Fetch all availible data for the token set from the database - $this->_token_data = $this->_database->get(array_keys($tokens),$uid); - - # Calculate the spamminess and importance for each token (or a degenerated form of it) - - $word_count = array(); - $rating = array(); - $importance = array(); - - foreach($tokens as $word => $count) { - - $word_count[$word] = $count; - - # Although we only call this function only here ... let's do the - # calculation stuff in a function to make this a bit less confusing ;-) - $rating[$word] = $this->_get_probability($word, $internals['texts_ham'], $internals['texts_spam']); - - $importance[$word] = abs(0.5 - $rating[$word]); - - } - - # Order by importance - arsort($importance); - reset($importance); - - # Get the most interesting tokens (use all if we have less than the given number) - - $relevant = array(); - - for($i = 0; $i < $this->config['use_relevant']; $i++) { - - if($tmp = each($importance)) { - - # Important tokens remain - - # If the token's rating is relevant enough, use it - - if(abs(0.5 - $rating[$tmp['key']]) > $this->config['min_dev']) { - - # Tokens that appear more than once also count more than once - - for($x = 0, $l = $word_count[$tmp['key']]; $x < $l; $x++) - array_push($relevant, $rating[$tmp['key']]); - - } - - } - - else { - # We have less than words to use, so we already - # use what we have and can break here - break; - } - - } - - # Calculate the spamminess of the text (thanks to Mr. Robinson ;-) - # We set both hamminess and Spamminess to 1 for the first multiplying - $hamminess = 1; - $spamminess = 1; - - # Consider all relevant ratings - foreach($relevant as $value) { - $hamminess *= (1.0 - $value); - $spamminess *= $value; - } - - # If no token was good for calculation, we really don't know how - # to rate this text; so we assume a spam and ham probability of 0.5 - - if($hamminess === 1 and $spamminess === 1) { - $hamminess = 0.5; - $spamminess = 0.5; - $n = 1; - } - else { - # Get the number of relevant ratings - $n = count($relevant); - } - - # Calculate the combined rating - - # The actual hamminess and spamminess - $hamminess = 1 - pow($hamminess, (1 / $n)); - $spamminess = 1 - pow($spamminess, (1 / $n)); - - # Calculate the combined indicator - $probability = ($hamminess - $spamminess) / ($hamminess + $spamminess); - - # We want a value between 0 and 1, not between -1 and +1, so ... - $probability = (1 + $probability) / 2; - - # Alea iacta est - return $probability; - - } - - /** - * Calculate the spamminess of a single token also considering "degenerated" versions - * - * @access private - * @param string $word - * @param string $texts_ham - * @param string $texts_spam - * @return void - */ - - private function _get_probability($word, $texts_ham, $texts_spam) - { - - # Let's see what we have! - - if(isset($this->_token_data['tokens'][$word]) === TRUE) { - # The token was in the database, so we can use it's data as-is - # and calculate the spamminess of this token directly - return $this->_calc_probability($this->_token_data['tokens'][$word], $texts_ham, $texts_spam); - } - - # Damn. The token was not found, so do we have at least similar words? - - if(isset($this->_token_data['degenerates'][$word]) === TRUE) { - - # We found similar words, so calculate the spamminess for each one - # and choose the most important one for the further calculation - - # The default rating is 0.5 simply saying nothing - $rating = 0.5; - - foreach($this->_token_data['degenerates'][$word] as $degenerate => $count) { - - # Calculate the rating of the current degenerated token - $rating_tmp = $this->_calc_probability($count, $texts_ham, $texts_spam); - - # Is it more important than the rating of another degenerated version? - if(abs(0.5 - $rating_tmp) > abs(0.5 - $rating)) - $rating = $rating_tmp; - - } - - return $rating; - - } - - else { - # The token is really unknown, so choose the default rating - # for completely unknown tokens. This strips down to the - # robX parameter so we can cheap out the freaky math ;-) - return $this->config['rob_x']; - } - - } - - /** - * Do the actual spamminess calculation of a single token - * - * @access private - * @param array $data - * @param string $texts_ham - * @param string $texts_spam - * @return void - */ - - private function _calc_probability($data, $texts_ham, $texts_spam) - { - - # Calculate the basic probability by Mr. Graham - - # But: consider the number of ham and spam texts saved instead of the - # number of entries where the token appeared to calculate a relative - # spamminess because we count tokens appearing multiple times not just - # once but as often as they appear in the learned texts - - $rel_ham = $data['count_ham']; - $rel_spam = $data['count_spam']; - - if($texts_ham > 0) - $rel_ham = $data['count_ham'] / $texts_ham; - - if($texts_spam > 0) - $rel_spam = $data['count_spam'] / $texts_spam; - - $rating = $rel_spam / ($rel_ham + $rel_spam); - - # Calculate the better probability proposed by Mr. Robinson - $all = $data['count_ham'] + $data['count_spam']; - return (($this->config['rob_s'] * $this->config['rob_x']) + ($all * $rating)) / ($this->config['rob_s'] + $all); - - } - - /** - * Check the validity of the category of a request - * - * @access private - * @param string $category - * @return void - */ - - private function _check_category($category) - { - return $category === self::HAM or $category === self::SPAM; - } - - /** - * Learn a reference text - * - * @access public - * @param string $text - * @param const $category Either b8::SPAM or b8::HAM - * @return void - */ - - public function learn($text, $category, $uid) - { - return $this->_process_text($text, $category, self::LEARN, $uid); - } - - /** - * Unlearn a reference text - * - * @access public - * @param string $text - * @param const $category Either b8::SPAM or b8::HAM - * @return void - */ - - public function unlearn($text, $category, $uid) - { - return $this->_process_text($text, $category, self::UNLEARN, $uid); - } - - /** - * Does the actual interaction with the storage backend for learning or unlearning texts - * - * @access private - * @param string $text - * @param const $category Either b8::SPAM or b8::HAM - * @param const $action Either b8::LEARN or b8::UNLEARN - * @return void - */ - - private function _process_text($text, $category, $action, $uid = 0) - { - - # Validate the startup - - $started_up = $this->validate(); - - if($started_up !== TRUE) - return $started_up; - - # Look if the request is okay - if($this->_check_category($category) === FALSE) - return self::TRAINER_CATEGORY_FAIL; - - # Get all tokens from $text - - $tokens = $this->_lexer->get_tokens($text); - - # Check if the lexer failed - # (if so, $tokens will be a lexer error code, if not, $tokens will be an array) - if(!is_array($tokens)) - return $tokens; - - # Pass the tokens and what to do with it to the storage backend - return $this->_database->process_text($tokens, $category, $action, $uid); - - } - -} - -?>
\ No newline at end of file diff --git a/library/spam/b8/b8.php.ORIG b/library/spam/b8/b8.php.ORIG deleted file mode 100644 index ea1e15ffa..000000000 --- a/library/spam/b8/b8.php.ORIG +++ /dev/null @@ -1,503 +0,0 @@ -<?php - -# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> -# -# b8 - A Bayesian spam filter written in PHP 5 -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation in version 2.1 of the License. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -/** - * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> - * - * @license LGPL - * @access public - * @package b8 - * @author Tobias Leupold - * @author Oliver Lillie (aka buggedcom) (original PHP 5 port) - */ - -class b8 -{ - - public $config = array( - 'min_size' => 3, - 'max_size' => 30, - 'allow_numbers' => FALSE, - 'lexer' => 'default', - 'degenerator' => 'default', - 'storage' => 'dba', - 'use_relevant' => 15, - 'min_dev' => 0.2, - 'rob_s' => 0.3, - 'rob_x' => 0.5 - ); - - private $_lexer = NULL; - private $_database = NULL; - private $_token_data = NULL; - - const SPAM = 'spam'; - const HAM = 'ham'; - const LEARN = 'learn'; - const UNLEARN = 'unlearn'; - - const STARTUP_FAIL_DATABASE = 'STARTUP_FAIL_DATABASE'; - const STARTUP_FAIL_LEXER = 'STARTUP_FAIL_LEXER'; - const TRAINER_CATEGORY_FAIL = 'TRAINER_CATEGORY_FAIL'; - - /** - * Constructs b8 - * - * @access public - * @return void - */ - - function __construct($config = array(), $database_config) - { - - # Validate config data - - if(count($config) > 0) { - - foreach ($config as $name=>$value) { - - switch($name) { - - case 'min_dev': - case 'rob_s': - case 'rob_x': - $this->config[$name] = (float) $value; - break; - - case 'min_size': - case 'max_size': - case 'use_relevant': - $this->config[$name] = (int) $value; - break; - - case 'allow_numbers': - $this->config[$name] = (bool) $value; - break; - - case 'lexer': - $value = (string) strtolower($value); - $this->config[$name] = is_file(dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . "lexer_" . $value . '.php') === TRUE ? $value : 'default'; - break; - - case 'storage': - $this->config[$name] = (string) $value; - break; - - } - - } - - } - - # Setup the database backend - - # Get the basic storage class used by all backends - if($this->load_class('b8_storage_base', dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_base.php') === FALSE) - return; - - # Get the degenerator we need - if($this->load_class('b8_degenerator_' . $this->config['degenerator'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'degenerator' . DIRECTORY_SEPARATOR . 'degenerator_' . $this->config['degenerator'] . '.php') === FALSE) - return; - - # Get the actual storage backend we need - if($this->load_class('b8_storage_' . $this->config['storage'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_' . $this->config['storage'] . '.php') === FALSE) - return; - - # Setup the backend - $class = 'b8_storage_' . $this->config['storage']; - $this->_database = new $class( - $database_config, - $this->config['degenerator'], date('ymd') - ); - - # Setup the lexer class - - if($this->load_class('b8_lexer_' . $this->config['lexer'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . 'lexer_' . $this->config['lexer'] . '.php') === FALSE) - return; - - $class = 'b8_lexer_' . $this->config['lexer']; - $this->_lexer = new $class( - array( - 'min_size' => $this->config['min_size'], - 'max_size' => $this->config['max_size'], - 'allow_numbers' => $this->config['allow_numbers'] - ) - ); - - } - - /** - * Load a class file if a class has not been defined yet. - * - * @access public - * @return boolean Returns TRUE if everything is okay, otherwise FALSE. - */ - - public function load_class($class_name, $class_file) - { - - if(class_exists($class_name, FALSE) === FALSE) { - - $included = require_once $class_file; - - if($included === FALSE or class_exists($class_name, FALSE) === FALSE) - return FALSE; - - } - - return TRUE; - - } - - /** - * Validates the class has all it needs to work. - * - * @access public - * @return mixed Returns TRUE if everything is okay, otherwise an error code. - */ - - public function validate() - { - - if($this->_database === NULL) - return self::STARTUP_FAIL_DATABASE; - - # Connect the database backend if we aren't connected yet - - elseif($this->_database->connected === FALSE) { - - $connection = $this->_database->connect(); - - if($connection !== TRUE) - return $connection; - - } - - if($this->_lexer === NULL) - return self::STARTUP_FAIL_LEXER; - - return TRUE; - - } - - /** - * Classifies a text - * - * @access public - * @package default - * @param string $text - * @return float The rating between 0 (ham) and 1 (spam) - */ - - public function classify($text) - { - - # Validate the startup - - $started_up = $this->validate(); - - if($started_up !== TRUE) - return $started_up; - - # Get the internal database variables, containing the number of ham and - # spam texts so the spam probability can be calculated in relation to them - $internals = $this->_database->get_internals(); - - # Calculate the spamminess of all tokens - - # Get all tokens we want to rate - - $tokens = $this->_lexer->get_tokens($text); - - # Check if the lexer failed - # (if so, $tokens will be a lexer error code, if not, $tokens will be an array) - if(!is_array($tokens)) - return $tokens; - - # Fetch all availible data for the token set from the database - $this->_token_data = $this->_database->get(array_keys($tokens)); - - # Calculate the spamminess and importance for each token (or a degenerated form of it) - - $word_count = array(); - $rating = array(); - $importance = array(); - - foreach($tokens as $word => $count) { - - $word_count[$word] = $count; - - # Although we only call this function only here ... let's do the - # calculation stuff in a function to make this a bit less confusing ;-) - $rating[$word] = $this->_get_probability($word, $internals['texts_ham'], $internals['texts_spam']); - - $importance[$word] = abs(0.5 - $rating[$word]); - - } - - # Order by importance - arsort($importance); - reset($importance); - - # Get the most interesting tokens (use all if we have less than the given number) - - $relevant = array(); - - for($i = 0; $i < $this->config['use_relevant']; $i++) { - - if($tmp = each($importance)) { - - # Important tokens remain - - # If the token's rating is relevant enough, use it - - if(abs(0.5 - $rating[$tmp['key']]) > $this->config['min_dev']) { - - # Tokens that appear more than once also count more than once - - for($x = 0, $l = $word_count[$tmp['key']]; $x < $l; $x++) - array_push($relevant, $rating[$tmp['key']]); - - } - - } - - else { - # We have less than words to use, so we already - # use what we have and can break here - break; - } - - } - - # Calculate the spamminess of the text (thanks to Mr. Robinson ;-) - # We set both hamminess and Spamminess to 1 for the first multiplying - $hamminess = 1; - $spamminess = 1; - - # Consider all relevant ratings - foreach($relevant as $value) { - $hamminess *= (1.0 - $value); - $spamminess *= $value; - } - - # If no token was good for calculation, we really don't know how - # to rate this text; so we assume a spam and ham probability of 0.5 - - if($hamminess === 1 and $spamminess === 1) { - $hamminess = 0.5; - $spamminess = 0.5; - $n = 1; - } - else { - # Get the number of relevant ratings - $n = count($relevant); - } - - # Calculate the combined rating - - # The actual hamminess and spamminess - $hamminess = 1 - pow($hamminess, (1 / $n)); - $spamminess = 1 - pow($spamminess, (1 / $n)); - - # Calculate the combined indicator - $probability = ($hamminess - $spamminess) / ($hamminess + $spamminess); - - # We want a value between 0 and 1, not between -1 and +1, so ... - $probability = (1 + $probability) / 2; - - # Alea iacta est - return $probability; - - } - - /** - * Calculate the spamminess of a single token also considering "degenerated" versions - * - * @access private - * @param string $word - * @param string $texts_ham - * @param string $texts_spam - * @return void - */ - - private function _get_probability($word, $texts_ham, $texts_spam) - { - - # Let's see what we have! - - if(isset($this->_token_data['tokens'][$word]) === TRUE) { - # The token was in the database, so we can use it's data as-is - # and calculate the spamminess of this token directly - return $this->_calc_probability($this->_token_data['tokens'][$word], $texts_ham, $texts_spam); - } - - # Damn. The token was not found, so do we have at least similar words? - - if(isset($this->_token_data['degenerates'][$word]) === TRUE) { - - # We found similar words, so calculate the spamminess for each one - # and choose the most important one for the further calculation - - # The default rating is 0.5 simply saying nothing - $rating = 0.5; - - foreach($this->_token_data['degenerates'][$word] as $degenerate => $count) { - - # Calculate the rating of the current degenerated token - $rating_tmp = $this->_calc_probability($count, $texts_ham, $texts_spam); - - # Is it more important than the rating of another degenerated version? - if(abs(0.5 - $rating_tmp) > abs(0.5 - $rating)) - $rating = $rating_tmp; - - } - - return $rating; - - } - - else { - # The token is really unknown, so choose the default rating - # for completely unknown tokens. This strips down to the - # robX parameter so we can cheap out the freaky math ;-) - return $this->config['rob_x']; - } - - } - - /** - * Do the actual spamminess calculation of a single token - * - * @access private - * @param array $data - * @param string $texts_ham - * @param string $texts_spam - * @return void - */ - - private function _calc_probability($data, $texts_ham, $texts_spam) - { - - # Calculate the basic probability by Mr. Graham - - # But: consider the number of ham and spam texts saved instead of the - # number of entries where the token appeared to calculate a relative - # spamminess because we count tokens appearing multiple times not just - # once but as often as they appear in the learned texts - - $rel_ham = $data['count_ham']; - $rel_spam = $data['count_spam']; - - if($texts_ham > 0) - $rel_ham = $data['count_ham'] / $texts_ham; - - if($texts_spam > 0) - $rel_spam = $data['count_spam'] / $texts_spam; - - $rating = $rel_spam / ($rel_ham + $rel_spam); - - # Calculate the better probability proposed by Mr. Robinson - $all = $data['count_ham'] + $data['count_spam']; - return (($this->config['rob_s'] * $this->config['rob_x']) + ($all * $rating)) / ($this->config['rob_s'] + $all); - - } - - /** - * Check the validity of the category of a request - * - * @access private - * @param string $category - * @return void - */ - - private function _check_category($category) - { - return $category === self::HAM or $category === self::SPAM; - } - - /** - * Learn a reference text - * - * @access public - * @param string $text - * @param const $category Either b8::SPAM or b8::HAM - * @return void - */ - - public function learn($text, $category) - { - return $this->_process_text($text, $category, self::LEARN); - } - - /** - * Unlearn a reference text - * - * @access public - * @param string $text - * @param const $category Either b8::SPAM or b8::HAM - * @return void - */ - - public function unlearn($text, $category) - { - return $this->_process_text($text, $category, self::UNLEARN); - } - - /** - * Does the actual interaction with the storage backend for learning or unlearning texts - * - * @access private - * @param string $text - * @param const $category Either b8::SPAM or b8::HAM - * @param const $action Either b8::LEARN or b8::UNLEARN - * @return void - */ - - private function _process_text($text, $category, $action) - { - - # Validate the startup - - $started_up = $this->validate(); - - if($started_up !== TRUE) - return $started_up; - - # Look if the request is okay - if($this->_check_category($category) === FALSE) - return self::TRAINER_CATEGORY_FAIL; - - # Get all tokens from $text - - $tokens = $this->_lexer->get_tokens($text); - - # Check if the lexer failed - # (if so, $tokens will be a lexer error code, if not, $tokens will be an array) - if(!is_array($tokens)) - return $tokens; - - # Pass the tokens and what to do with it to the storage backend - return $this->_database->process_text($tokens, $category, $action); - - } - -} - -?>
\ No newline at end of file diff --git a/library/spam/b8/degenerator/degenerator_default.php b/library/spam/b8/degenerator/degenerator_default.php deleted file mode 100644 index 4ff6d882b..000000000 --- a/library/spam/b8/degenerator/degenerator_default.php +++ /dev/null @@ -1,127 +0,0 @@ -<?php - -# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> -# -# This file is part of the b8 package -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation in version 2.1 of the License. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -/** - * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> - * - * @license LGPL - * @access public - * @package b8 - * @author Tobias Leupold - */ - -class b8_degenerator_default -{ - - public $degenerates = array(); - - /** - * Generates a list of "degenerated" words for a list of words. - * - * @access public - * @param array $tokens - * @return array An array containing an array of degenerated tokens for each token - */ - - public function degenerate(array $words) - { - - $degenerates = array(); - - foreach($words as $word) - $degenerates[$word] = $this->_degenerate_word($word); - - return $degenerates; - - } - - /** - * If the original word is not found in the database then - * we build "degenerated" versions of the word to lookup. - * - * @access private - * @param string $word - * @return array An array of degenerated words - */ - - protected function _degenerate_word($word) - { - - # Check for any stored words so the process doesn't have to repeat - if(isset($this->degenerates[$word]) === TRUE) - return $this->degenerates[$word]; - - $degenerate = array(); - - # Add different version of upper and lower case and ucfirst - array_push($degenerate, strtolower($word)); - array_push($degenerate, strtoupper($word)); - array_push($degenerate, ucfirst($word)); - - # Degenerate all versions - - foreach($degenerate as $alt_word) { - - # Look for stuff like !!! and ??? - - if(preg_match('/[!?]$/', $alt_word) > 0) { - - # Add versions with different !s and ?s - - if(preg_match('/[!?]{2,}$/', $alt_word) > 0) { - $tmp = preg_replace('/([!?])+$/', '$1', $alt_word); - array_push($degenerate, $tmp); - } - - $tmp = preg_replace('/([!?])+$/', '', $alt_word); - array_push($degenerate, $tmp); - - } - - # Look for ... at the end of the word - - $alt_word_int = $alt_word; - - while(preg_match('/[\.]$/', $alt_word_int) > 0) { - $alt_word_int = substr($alt_word_int, 0, strlen($alt_word_int) - 1); - array_push($degenerate, $alt_word_int); - } - - } - - # Some degenerates are the same as the original word. These don't have - # to be fetched, so we create a new array with only new tokens - - $real_degenerate = array(); - - foreach($degenerate as $deg_word) { - if($word != $deg_word) - array_push($real_degenerate, $deg_word); - } - - # Store the list of degenerates for the token - $this->degenerates[$word] = $real_degenerate; - - return $real_degenerate; - - } - -} - -?>
\ No newline at end of file diff --git a/library/spam/b8/lexer/lexer_default.php b/library/spam/b8/lexer/lexer_default.php deleted file mode 100644 index 7b5ca22bf..000000000 --- a/library/spam/b8/lexer/lexer_default.php +++ /dev/null @@ -1,205 +0,0 @@ -<?php - -# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> -# -# This file is part of the b8 package -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation in version 2.1 of the License. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -/** - * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> - * - * @license LGPL - * @access public - * @package b8 - * @author Tobias Leupold - * @author Oliver Lillie (aka buggedcom) (original PHP 5 port) - */ - -class b8_lexer_default -{ - - const LEXER_TEXT_NOT_STRING = 'LEXER_TEXT_NOT_STRING'; - const LEXER_TEXT_EMPTY = 'LEXER_TEXT_EMPTY'; - - public $config = NULL; - - # The regular expressions we use to split the text to tokens - - public $regexp = array( - 'ip' => '/([A-Za-z0-9\_\-\.]+)/', - 'raw_split' => '/[\s,\.\/"\:;\|<>\-_\[\]{}\+=\)\(\*\&\^%]+/', - 'html' => '/(<.+?>)/', - 'tagname' => '/(.+?)\s/', - 'numbers' => '/^[0-9]+$/' - ); - - /** - * Constructs the lexer. - * - * @access public - * @return void - */ - - function __construct($config) - { - $this->config = $config; - } - - /** - * Generates the tokens required for the bayesian filter. - * - * @access public - * @param string $text - * @return array Returns the list of tokens - */ - - public function get_tokens($text) - { - - # Check that we actually have a string ... - if(is_string($text) === FALSE) - return self::LEXER_TEXT_NOT_STRING; - - # ... and that it's not empty - if(empty($text) === TRUE) - return self::LEXER_TEXT_EMPTY; - - # Re-convert the text to the original characters coded in UTF-8, as - # they have been coded in html entities during the post process - $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); - - $tokens = array(); - - # Find URLs and IP addresses - - preg_match_all($this->regexp['ip'], $text, $raw_tokens); - - foreach($raw_tokens[1] as $word) { - - # Check for a dot - if(strpos($word, '.') === FALSE) - continue; - - # Check that the word is valid, min and max sizes, etc. - if($this->_is_valid($word) === FALSE) - continue; - - if(isset($tokens[$word]) === FALSE) - $tokens[$word] = 1; - else - $tokens[$word] += 1; - - # Delete the word from the text so it doesn't get re-added. - $text = str_replace($word, '', $text); - - # Also process the parts of the URLs - $url_parts = preg_split($this->regexp['raw_split'], $word); - - foreach($url_parts as $word) { - - # Again validate the part - - if($this->_is_valid($word) === FALSE) - continue; - - if(isset($tokens[$word]) === FALSE) - $tokens[$word] = 1; - else - $tokens[$word] += 1; - - } - - } - - # Split the remaining text - - $raw_tokens = preg_split($this->regexp['raw_split'], $text); - - foreach($raw_tokens as $word) { - - # Again validate the part - - if($this->_is_valid($word) === FALSE) - continue; - - if(isset($tokens[$word]) === FALSE) - $tokens[$word] = 1; - else - $tokens[$word] += 1; - - } - - # Process the HTML - - preg_match_all($this->regexp['html'], $text, $raw_tokens); - - foreach($raw_tokens[1] as $word) { - - # Again validate the part - - if($this->_is_valid($word) === FALSE) - continue; - - # If the tag has parameters, just use the tag itself - - if(strpos($word, ' ') !== FALSE) { - preg_match($this->regexp['tagname'], $word, $tmp); - $word = "{$tmp[1]}...>"; - } - - if(isset($tokens[$word]) === FALSE) - $tokens[$word] = 1; - else - $tokens[$word] += 1; - - } - - # Return a list of all found tokens - return $tokens; - - } - - /** - * Validates a token. - * - * @access private - * @param string $token The token string. - * @return boolean Returns TRUE if the token is valid, otherwise returns FALSE - */ - - private function _is_valid($token) - { - - # Validate the size of the token - - $len = strlen($token); - - if($len < $this->config['min_size'] or $len > $this->config['max_size']) - return FALSE; - - # We may want to exclude pure numbers - if($this->config['allow_numbers'] === FALSE) { - if(preg_match($this->regexp['numbers'], $token) > 0) - return FALSE; - } - - # Token is okay - return TRUE; - - } - -} - -?>
\ No newline at end of file diff --git a/library/spam/b8/storage/storage_base.php b/library/spam/b8/storage/storage_base.php deleted file mode 100644 index 6b181ee96..000000000 --- a/library/spam/b8/storage/storage_base.php +++ /dev/null @@ -1,396 +0,0 @@ -<?php - -# Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de> -# -# This file is part of the b8 package -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation in version 2.1 of the License. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -/** - * Functions used by all storage backends - * Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de> - * - * @license LGPL - * @access public - * @package b8 - * @author Tobias Leupold - */ - -abstract class b8_storage_base -{ - - public $connected = FALSE; - - protected $_degenerator = NULL; - - const INTERNALS_TEXTS_HAM = 'bayes*texts.ham'; - const INTERNALS_TEXTS_SPAM = 'bayes*texts.spam'; - const INTERNALS_DBVERSION = 'bayes*dbversion'; - - const BACKEND_NOT_CONNECTED = 'BACKEND_NOT_CONNECTED'; - const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION'; - const DATABASE_NOT_B8 = 'DATABASE_NOT_B8'; - - /** - * Validates the class has all it needs to work. - * - * @access protected - * @return mixed Returns TRUE if everything is okay, otherwise an error code. - */ - - protected function validate() - { - - # We set up the degenerator here, as we would have to duplicate code if it - # was done in the constructor of the respective storage backend. - $class = 'b8_degenerator_' . $this->b8_config['degenerator']; - $this->_degenerator = new $class(); - - if($this->connected !== TRUE) - return self::BACKEND_NOT_CONNECTED; - - return TRUE; - - } - - /** - * Checks if a b8 database is used and if it's version is okay - * - * @access protected - * @return mixed Returns TRUE if everything is okay, otherwise an error code. - */ - - protected function check_database($uid) - { - - $internals = $this->get_internals($uid); - - if(isset($internals['dbversion'])) { - if($internals['dbversion'] == "2") { - return TRUE; - } - else { - $this->connected = FALSE; - return self::DATABASE_WRONG_VERSION; - } - } - else { - $this->connected = FALSE; - return self::DATABASE_NOT_B8; - } - - } - - /** - * Parses the "count" data of a token. - * - * @access private - * @param string $data - * @return array Returns an array of the parsed data: array(count_ham, count_spam, lastseen). - */ - - private function _parse_count($data) - { - - list($count_ham, $count_spam, $lastseen) = explode(' ', $data); - - $count_ham = (int) $count_ham; - $count_spam = (int) $count_spam; - - return array( - 'count_ham' => $count_ham, - 'count_spam' => $count_spam - ); - - } - - /** - * Get the database's internal variables. - * - * @access public - * @return array Returns an array of all internals. - */ - - public function get_internals($uid) - { - - $internals = $this->_get_query( - array( - self::INTERNALS_TEXTS_HAM, - self::INTERNALS_TEXTS_SPAM, - self::INTERNALS_DBVERSION - ), - $uid - ); - - return array( - 'texts_ham' => (int) $internals[self::INTERNALS_TEXTS_HAM], - 'texts_spam' => (int) $internals[self::INTERNALS_TEXTS_SPAM], - 'dbversion' => (int) $internals[self::INTERNALS_DBVERSION] - ); - - } - - /** - * Get all data about a list of tags from the database. - * - * @access public - * @param array $tokens - * @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))). - */ - - public function get($tokens, $uid) - { - - # Validate the startup - - $started_up = $this->validate(); - - if($started_up !== TRUE) - return $started_up; - - # First we see what we have in the database. - $token_data = $this->_get_query($tokens, $uid); - - # Check if we have to degenerate some tokens - - $missing_tokens = array(); - - foreach($tokens as $token) { - if(!isset($token_data[$token])) - $missing_tokens[] = $token; - } - - if(count($missing_tokens) > 0) { - - # We have to degenerate some tokens - $degenerates_list = array(); - - # Generate a list of degenerated tokens for the missing tokens ... - $degenerates = $this->_degenerator->degenerate($missing_tokens); - - # ... and look them up - - foreach($degenerates as $token => $token_degenerates) - $degenerates_list = array_merge($degenerates_list, $token_degenerates); - - $token_data = array_merge($token_data, $this->_get_query($degenerates_list)); - - } - - # Here, we have all availible data in $token_data. - - $return_data_tokens = array(); - $return_data_degenerates = array(); - - foreach($tokens as $token) { - - if(isset($token_data[$token]) === TRUE) { - - # The token was found in the database - - # Add the data ... - $return_data_tokens[$token] = $this->_parse_count($token_data[$token]); - - # ... and update it's lastseen parameter - $this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today'], $uid ); - - } - - else { - - # The token was not found, so we look if we - # can return data for degenerated tokens - - # Check all degenerated forms of the token - - foreach($this->_degenerator->degenerates[$token] as $degenerate) { - - if(isset($token_data[$degenerate]) === TRUE) { - - # A degeneration of the token way found in the database - - # Add the data ... - $return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]); - - # ... and update it's lastseen parameter - $this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today'], $uid); - - } - - } - - } - - } - - # Now, all token data directly found in the database is in $return_data_tokens - # and all data for degenerated versions is in $return_data_degenerates - - # First, we commit the changes to the lastseen parameters - $this->_commit(); - - # Then, we return what we have - return array( - 'tokens' => $return_data_tokens, - 'degenerates' => $return_data_degenerates - ); - - } - - /** - * Stores or deletes a list of tokens from the given category. - * - * @access public - * @param array $tokens - * @param const $category Either b8::HAM or b8::SPAM - * @param const $action Either b8::LEARN or b8::UNLEARN - * @return void - */ - - public function process_text($tokens, $category, $action, $uid) - { - - # Validate the startup - - $started_up = $this->validate(); - - if($started_up !== TRUE) - return $started_up; - - # No matter what we do, we first have to check what data we have. - - # First get the internals, including the ham texts and spam texts counter - $internals = $this->get_internals($uid); - - # Then, fetch all data for all tokens we have (and update their lastseen parameters) - $token_data = $this->_get_query(array_keys($tokens), $uid); - - # Process all tokens to learn/unlearn - - foreach($tokens as $token => $count) { - - if(isset($token_data[$token])) { - - # We already have this token, so update it's data - - # Get the existing data - list($count_ham, $count_spam, $lastseen) = explode(' ', $token_data[$token]); - $count_ham = (int) $count_ham; - $count_spam = (int) $count_spam; - - # Increase or decrease the right counter - - if($action === b8::LEARN) { - if($category === b8::HAM) - $count_ham += $count; - elseif($category === b8::SPAM) - $count_spam += $count; - } - - elseif($action == b8::UNLEARN) { - if($category === b8::HAM) - $count_ham -= $count; - elseif($category === b8::SPAM) - $count_spam -= $count; - } - - # We don't want to have negative values - - if($count_ham < 0) - $count_ham = 0; - - if($count_spam < 0) - $count_spam = 0; - - # Now let's see if we have to update or delete the token - if($count_ham !== 0 or $count_spam !== 0) - $this->_update($token, "$count_ham $count_spam " . $this->b8_config['today'], $uid); - else - $this->_del($token, $uid); - - } - - else { - - # We don't have the token. If we unlearn a text, we can't delete it - # as we don't have it anyway, so just do something if we learn a text - - if($action === b8::LEARN) { - - if($category === b8::HAM) - $data = '1 0 '; - elseif($category === b8::SPAM) - $data = '0 1 '; - - $data .= $this->b8_config['today']; - - $this->_put($token, $data, $uid); - - } - - } - - } - - # Now, all token have been processed, so let's update the right text - - if($action === b8::LEARN) { - - if($category === b8::HAM) { - $internals['texts_ham']++; - $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid); - } - - elseif($category === b8::SPAM) { - $internals['texts_spam']++; - $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid); - } - - } - - elseif($action == b8::UNLEARN) { - - if($category === b8::HAM) { - - $internals['texts_ham']--; - - if($internals['texts_ham'] < 0) - $internals['texts_ham'] = 0; - - $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid); - - } - - elseif($category === b8::SPAM) { - - $internals['texts_spam']--; - - if($internals['texts_spam'] < 0) - $internals['texts_spam'] = 0; - - $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid); - - } - - } - - # We're done and can commit all changes to the database now - $this->_commit($uid); - - } - -} - -?>
\ No newline at end of file diff --git a/library/spam/b8/storage/storage_base.php.ORIG b/library/spam/b8/storage/storage_base.php.ORIG deleted file mode 100644 index 01f5a69d7..000000000 --- a/library/spam/b8/storage/storage_base.php.ORIG +++ /dev/null @@ -1,395 +0,0 @@ -<?php - -# Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de> -# -# This file is part of the b8 package -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation in version 2.1 of the License. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -/** - * Functions used by all storage backends - * Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de> - * - * @license LGPL - * @access public - * @package b8 - * @author Tobias Leupold - */ - -abstract class b8_storage_base -{ - - public $connected = FALSE; - - protected $_degenerator = NULL; - - const INTERNALS_TEXTS_HAM = 'bayes*texts.ham'; - const INTERNALS_TEXTS_SPAM = 'bayes*texts.spam'; - const INTERNALS_DBVERSION = 'bayes*dbversion'; - - const BACKEND_NOT_CONNECTED = 'BACKEND_NOT_CONNECTED'; - const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION'; - const DATABASE_NOT_B8 = 'DATABASE_NOT_B8'; - - /** - * Validates the class has all it needs to work. - * - * @access protected - * @return mixed Returns TRUE if everything is okay, otherwise an error code. - */ - - protected function validate() - { - - # We set up the degenerator here, as we would have to duplicate code if it - # was done in the constructor of the respective storage backend. - $class = 'b8_degenerator_' . $this->b8_config['degenerator']; - $this->_degenerator = new $class(); - - if($this->connected !== TRUE) - return self::BACKEND_NOT_CONNECTED; - - return TRUE; - - } - - /** - * Checks if a b8 database is used and if it's version is okay - * - * @access protected - * @return mixed Returns TRUE if everything is okay, otherwise an error code. - */ - - protected function check_database() - { - - $internals = $this->get_internals(); - - if(isset($internals['dbversion'])) { - if($internals['dbversion'] == "2") { - return TRUE; - } - else { - $this->connected = FALSE; - return self::DATABASE_WRONG_VERSION; - } - } - else { - $this->connected = FALSE; - return self::DATABASE_NOT_B8; - } - - } - - /** - * Parses the "count" data of a token. - * - * @access private - * @param string $data - * @return array Returns an array of the parsed data: array(count_ham, count_spam, lastseen). - */ - - private function _parse_count($data) - { - - list($count_ham, $count_spam, $lastseen) = explode(' ', $data); - - $count_ham = (int) $count_ham; - $count_spam = (int) $count_spam; - - return array( - 'count_ham' => $count_ham, - 'count_spam' => $count_spam - ); - - } - - /** - * Get the database's internal variables. - * - * @access public - * @return array Returns an array of all internals. - */ - - public function get_internals() - { - - $internals = $this->_get_query( - array( - self::INTERNALS_TEXTS_HAM, - self::INTERNALS_TEXTS_SPAM, - self::INTERNALS_DBVERSION - ) - ); - - return array( - 'texts_ham' => (int) $internals[self::INTERNALS_TEXTS_HAM], - 'texts_spam' => (int) $internals[self::INTERNALS_TEXTS_SPAM], - 'dbversion' => (int) $internals[self::INTERNALS_DBVERSION] - ); - - } - - /** - * Get all data about a list of tags from the database. - * - * @access public - * @param array $tokens - * @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))). - */ - - public function get($tokens) - { - - # Validate the startup - - $started_up = $this->validate(); - - if($started_up !== TRUE) - return $started_up; - - # First we see what we have in the database. - $token_data = $this->_get_query($tokens); - - # Check if we have to degenerate some tokens - - $missing_tokens = array(); - - foreach($tokens as $token) { - if(!isset($token_data[$token])) - $missing_tokens[] = $token; - } - - if(count($missing_tokens) > 0) { - - # We have to degenerate some tokens - $degenerates_list = array(); - - # Generate a list of degenerated tokens for the missing tokens ... - $degenerates = $this->_degenerator->degenerate($missing_tokens); - - # ... and look them up - - foreach($degenerates as $token => $token_degenerates) - $degenerates_list = array_merge($degenerates_list, $token_degenerates); - - $token_data = array_merge($token_data, $this->_get_query($degenerates_list)); - - } - - # Here, we have all availible data in $token_data. - - $return_data_tokens = array(); - $return_data_degenerates = array(); - - foreach($tokens as $token) { - - if(isset($token_data[$token]) === TRUE) { - - # The token was found in the database - - # Add the data ... - $return_data_tokens[$token] = $this->_parse_count($token_data[$token]); - - # ... and update it's lastseen parameter - $this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today']); - - } - - else { - - # The token was not found, so we look if we - # can return data for degenerated tokens - - # Check all degenerated forms of the token - - foreach($this->_degenerator->degenerates[$token] as $degenerate) { - - if(isset($token_data[$degenerate]) === TRUE) { - - # A degeneration of the token way found in the database - - # Add the data ... - $return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]); - - # ... and update it's lastseen parameter - $this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today']); - - } - - } - - } - - } - - # Now, all token data directly found in the database is in $return_data_tokens - # and all data for degenerated versions is in $return_data_degenerates - - # First, we commit the changes to the lastseen parameters - $this->_commit(); - - # Then, we return what we have - return array( - 'tokens' => $return_data_tokens, - 'degenerates' => $return_data_degenerates - ); - - } - - /** - * Stores or deletes a list of tokens from the given category. - * - * @access public - * @param array $tokens - * @param const $category Either b8::HAM or b8::SPAM - * @param const $action Either b8::LEARN or b8::UNLEARN - * @return void - */ - - public function process_text($tokens, $category, $action) - { - - # Validate the startup - - $started_up = $this->validate(); - - if($started_up !== TRUE) - return $started_up; - - # No matter what we do, we first have to check what data we have. - - # First get the internals, including the ham texts and spam texts counter - $internals = $this->get_internals(); - - # Then, fetch all data for all tokens we have (and update their lastseen parameters) - $token_data = $this->_get_query(array_keys($tokens)); - - # Process all tokens to learn/unlearn - - foreach($tokens as $token => $count) { - - if(isset($token_data[$token])) { - - # We already have this token, so update it's data - - # Get the existing data - list($count_ham, $count_spam, $lastseen) = explode(' ', $token_data[$token]); - $count_ham = (int) $count_ham; - $count_spam = (int) $count_spam; - - # Increase or decrease the right counter - - if($action === b8::LEARN) { - if($category === b8::HAM) - $count_ham += $count; - elseif($category === b8::SPAM) - $count_spam += $count; - } - - elseif($action == b8::UNLEARN) { - if($category === b8::HAM) - $count_ham -= $count; - elseif($category === b8::SPAM) - $count_spam -= $count; - } - - # We don't want to have negative values - - if($count_ham < 0) - $count_ham = 0; - - if($count_spam < 0) - $count_spam = 0; - - # Now let's see if we have to update or delete the token - if($count_ham !== 0 or $count_spam !== 0) - $this->_update($token, "$count_ham $count_spam " . $this->b8_config['today']); - else - $this->_del($token); - - } - - else { - - # We don't have the token. If we unlearn a text, we can't delete it - # as we don't have it anyway, so just do something if we learn a text - - if($action === b8::LEARN) { - - if($category === b8::HAM) - $data = '1 0 '; - elseif($category === b8::SPAM) - $data = '0 1 '; - - $data .= $this->b8_config['today']; - - $this->_put($token, $data); - - } - - } - - } - - # Now, all token have been processed, so let's update the right text - - if($action === b8::LEARN) { - - if($category === b8::HAM) { - $internals['texts_ham']++; - $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']); - } - - elseif($category === b8::SPAM) { - $internals['texts_spam']++; - $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']); - } - - } - - elseif($action == b8::UNLEARN) { - - if($category === b8::HAM) { - - $internals['texts_ham']--; - - if($internals['texts_ham'] < 0) - $internals['texts_ham'] = 0; - - $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']); - - } - - elseif($category === b8::SPAM) { - - $internals['texts_spam']--; - - if($internals['texts_spam'] < 0) - $internals['texts_spam'] = 0; - - $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']); - - } - - } - - # We're done and can commit all changes to the database now - $this->_commit(); - - } - -} - -?>
\ No newline at end of file diff --git a/library/spam/b8/storage/storage_dba.php b/library/spam/b8/storage/storage_dba.php deleted file mode 100644 index 04618b23e..000000000 --- a/library/spam/b8/storage/storage_dba.php +++ /dev/null @@ -1,198 +0,0 @@ -<?php - -# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> -# -# This file is part of the b8 package -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation in version 2.1 of the License. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -/** - * The DBA (Berkeley DB) abstraction layer for communicating with the database. - * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de> - * - * @license LGPL - * @access public - * @package b8 - * @author Tobias Leupold - */ - -class b8_storage_dba extends b8_storage_base -{ - - public $config = array( - 'database' => 'wordlist.db', - 'handler' => 'db4', - ); - - public $b8_config = array( - 'degenerator' => NULL, - 'today' => NULL - ); - - private $_db = NULL; - - const DATABASE_CONNECTION_FAIL = 'DATABASE_CONNECTION_FAIL'; - - /** - * Constructs the database layer. - * - * @access public - * @param string $config - */ - - function __construct($config, $degenerator, $today) - { - - # Pass some variables of the main b8 config to this class - $this->b8_config['degenerator'] = $degenerator; - $this->b8_config['today'] = $today; - - # Validate the config items - if(count($config) > 0) { - foreach ($config as $name => $value) { - $this->config[$name] = (string) $value; - } - } - - } - - /** - * Closes the database connection. - * - * @access public - * @return void - */ - - function __destruct() - { - if($this->_db !== NULL) { - dba_close($this->_db); - $this->connected = FALSE; - } - } - - /** - * Connect to the database and do some checks. - * - * @access public - * @return mixed Returns TRUE on a successful database connection, otherwise returns a constant from b8. - */ - - public function connect() - { - - # Have we already connected? - if($this->_db !== NULL) - return TRUE; - - # Open the database connection - $this->_db = dba_open(dirname(__FILE__) . DIRECTORY_SEPARATOR . ".." . DIRECTORY_SEPARATOR . $this->config['database'], "w", $this->config['handler']); - - if($this->_db === FALSE) { - $this->connected = FALSE; - $this->_db = NULL; - return self::DATABASE_CONNECTION_FAIL; - } - - # Everything is okay and connected - - $this->connected = TRUE; - - # Let's see if this is a b8 database and the version is okay - return $this->check_database(); - - } - - /** - * Does the actual interaction with the database when fetching data. - * - * @access protected - * @param array $tokens - * @return mixed Returns an array of the returned data in the format array(token => data) or an empty array if there was no data. - */ - - protected function _get_query($tokens) - { - - $data = array(); - - foreach ($tokens as $token) { - - $count = dba_fetch($token, $this->_db); - - if($count !== FALSE) - $data[$token] = $count; - - } - - return $data; - - } - - /** - * Store a token to the database. - * - * @access protected - * @param string $token - * @param string $count - * @return bool TRUE on success or FALSE on failure - */ - - protected function _put($token, $count) { - return dba_insert($token, $count, $this->_db); - } - - /** - * Update an existing token. - * - * @access protected - * @param string $token - * @param string $count - * @return bool TRUE on success or FALSE on failure - */ - - protected function _update($token, $count) - { - return dba_replace($token, $count, $this->_db); - } - - /** - * Remove a token from the database. - * - * @access protected - * @param string $token - * @return bool TRUE on success or FALSE on failure - */ - - protected function _del($token) - { - return dba_delete($token, $this->_db); - } - - /** - * Does nothing :-D - * - * @access protected - * @return void - */ - - protected function _commit() - { - # We just need this function because the (My)SQL backend(s) need it. - return; - } - -} - -?>
\ No newline at end of file diff --git a/library/spam/b8/storage/storage_frndc.php b/library/spam/b8/storage/storage_frndc.php deleted file mode 100644 index f211d4431..000000000 --- a/library/spam/b8/storage/storage_frndc.php +++ /dev/null @@ -1,318 +0,0 @@ -<?php - -# Copyright (C) 2006-2011 Tobias Leupold <tobias.leupold@web.de> -# -# This file is part of the b8 package -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation in version 2.1 of the License. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -/** - * The MySQL abstraction layer for communicating with the database. - * Copyright (C) 2009 Oliver Lillie (aka buggedcom) - * Copyright (C) 2010-2011 Tobias Leupold <tobias.leupold@web.de> - * - * @license LGPL - * @access public - * @package b8 - * @author Oliver Lillie (aka buggedcom) (original PHP 5 port and optimizations) - * @author Tobias Leupold - */ - -class b8_storage_frndc extends b8_storage_base -{ - - public $config = array( - 'database' => 'b8_wordlist', - 'table_name' => 'b8_wordlist', - 'host' => 'localhost', - 'user' => FALSE, - 'pass' => FALSE, - 'connection' => NULL - ); - - public $b8_config = array( - 'degenerator' => NULL, - 'today' => NULL - ); - - private $_connection = NULL; - private $_deletes = array(); - private $_puts = array(); - private $_updates = array(); - private $uid = 0; - - const DATABASE_CONNECTION_FAIL = 'DATABASE_CONNECTION_FAIL'; - const DATABASE_CONNECTION_ERROR = 'DATABASE_CONNECTION_ERROR'; - const DATABASE_CONNECTION_BAD_RESOURCE = 'DATABASE_CONNECTION_BAD_RESOURCE'; - const DATABASE_SELECT_ERROR = 'DATABASE_SELECT_ERROR'; - const DATABASE_TABLE_ACCESS_FAIL = 'DATABASE_TABLE_ACCESS_FAIL'; - const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION'; - - /** - * Constructs the database layer. - * - * @access public - * @param string $config - */ - - function __construct($config, $degenerator, $today) - { - - # Pass some variables of the main b8 config to this class - $this->b8_config['degenerator'] = $degenerator; - $this->b8_config['today'] = $today; - - # Validate the config items - - if(count($config) > 0) { - - foreach ($config as $name => $value) { - - switch($name) { - - case 'table_name': - case 'host': - case 'user': - case 'pass': - case 'database': - $this->config[$name] = (string) $value; - break; - - case 'connection': - - if($value !== NULL) { - - if(is_resource($value) === TRUE) { - $resource_type = get_resource_type($value); - $this->config['connection'] = $resource_type !== 'mysql link' && $resource_type !== 'mysql link persistent' ? FALSE : $value; - } - - else - $this->config['connection'] = FALSE; - - } - - break; - - } - - } - - } - - } - - /** - * Closes the database connection. - * - * @access public - * @return void - */ - - function __destruct() - { - - if($this->_connection === NULL) - return; - - # Commit any changes before closing - $this->_commit(); - - # Just close the connection if no link-resource was passed and b8 created it's own connection - if($this->config['connection'] === NULL) - mysql_close($this->_connection); - - $this->connected = FALSE; - - } - - /** - * Connect to the database and do some checks. - * - * @access public - * @return mixed Returns TRUE on a successful database connection, otherwise returns a constant from b8. - */ - - public function connect() - { - - $this->connected = TRUE; - return TRUE; - - } - - /** - * Does the actual interaction with the database when fetching data. - * - * @access protected - * @param array $tokens - * @return mixed Returns an array of the returned data in the format array(token => data) or an empty array if there was no data. - */ - - protected function _get_query($tokens, $uid) - { - - # Construct the query ... - - if(count($tokens) > 0) { - - $where = array(); - - foreach ($tokens as $token) { - $token = dbesc($token); - array_push($where, $token); - } - - $where = 'term IN ("' . implode('", "', $where) . '")'; - } - - else { - $token = dbesc($token); - $where = 'term = "' . $token . '"'; - } - - # ... and fetch the data - - $result = q(' - SELECT * FROM spam WHERE ' . $where . ' AND uid = ' . $uid ); - - - $returned_tokens = array(); - if(count($result)) { - foreach($result as $rr) - $returned_tokens[] = $rr['term']; - } - $to_create = array(); - - if(count($tokens) > 0) { - foreach($tokens as $token) - if(! in_array($token,$returned_tokens)) - $to_create[] = str_tolower($token); - } - if(count($to_create)) { - $sql = ''; - foreach($to_create as $term) { - if(strlen($sql)) - $sql .= ','; - $sql .= sprintf("(term,date,uid) values('%s','%s',%d)", - dbesc(str_tolower($term)) - dbesc(datetime_convert()), - intval($uid) - ); - q("insert into spam " . $sql); - } - - return $result; - - } - - /** - * Store a token to the database. - * - * @access protected - * @param string $token - * @param string $count - * @return void - */ - - protected function _put($token, $count, $uid) { - $token = dbesc($token); - $count = dbesc($count); - $uid = dbesc($uid); - array_push($this->_puts, '("' . $token . '", "' . $count . '", "' . $uid .'")'); - } - - /** - * Update an existing token. - * - * @access protected - * @param string $token - * @param string $count - * @return void - */ - - protected function _update($token, $count, $uid) - { - $token = dbesc($token); - $count = dbesc($count); - $uid = dbesc($uid); - array_push($this->_puts, '("' . $token . '", "' . $count . '", "' . $uid .'")'); - } - - /** - * Remove a token from the database. - * - * @access protected - * @param string $token - * @return void - */ - - protected function _del($token, $uid) - { - $token = dbesc($token); - $uid = dbesc($uid); - $this->uid = $uid; - array_push($this->_deletes, $token); - } - - /** - * Commits any modification queries. - * - * @access protected - * @return void - */ - - protected function _commit($uid) - { - - if(count($this->_deletes) > 0) { - - $result = q(' - DELETE FROM ' . $this->config['table_name'] . ' - WHERE term IN ("' . implode('", "', $this->_deletes) . '") AND uid = ' . $this->uid); - - $this->_deletes = array(); - - } - - if(count($this->_puts) > 0) { -//fixme - $result = q(' - INSERT INTO ' . $this->config['table_name'] . '(term, count, uid) - VALUES ' . implode(', ', $this->_puts)); - - $this->_puts = array(); - - } - - if(count($this->_updates) > 0) { - - // this still needs work - $result = q("select * from " . $this->config['table_name'] . ' where token = '); - - - $result = q(' - INSERT INTO ' . $this->config['table_name'] . '(token, count, uid) - VALUES ' . implode(', ', $this->_updates) . ', ' . $uid . ' - ON DUPLICATE KEY UPDATE ' . $this->config['table_name'] . '.count = VALUES(count);', $this->_connection); - - $this->_updates = array(); - - } - - } - -} - -?>
\ No newline at end of file diff --git a/library/spam/b8/storage/storage_mysql.php b/library/spam/b8/storage/storage_mysql.php deleted file mode 100644 index 022536350..000000000 --- a/library/spam/b8/storage/storage_mysql.php +++ /dev/null @@ -1,351 +0,0 @@ -<?php - -# Copyright (C) 2006-2011 Tobias Leupold <tobias.leupold@web.de> -# -# This file is part of the b8 package -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation in version 2.1 of the License. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -/** - * The MySQL abstraction layer for communicating with the database. - * Copyright (C) 2009 Oliver Lillie (aka buggedcom) - * Copyright (C) 2010-2011 Tobias Leupold <tobias.leupold@web.de> - * - * @license LGPL - * @access public - * @package b8 - * @author Oliver Lillie (aka buggedcom) (original PHP 5 port and optimizations) - * @author Tobias Leupold - */ - -class b8_storage_mysql extends b8_storage_base -{ - - public $config = array( - 'database' => 'b8_wordlist', - 'table_name' => 'b8_wordlist', - 'host' => 'localhost', - 'user' => FALSE, - 'pass' => FALSE, - 'connection' => NULL - ); - - public $b8_config = array( - 'degenerator' => NULL, - 'today' => NULL - ); - - private $_connection = NULL; - private $_deletes = array(); - private $_puts = array(); - private $_updates = array(); - - const DATABASE_CONNECTION_FAIL = 'DATABASE_CONNECTION_FAIL'; - const DATABASE_CONNECTION_ERROR = 'DATABASE_CONNECTION_ERROR'; - const DATABASE_CONNECTION_BAD_RESOURCE = 'DATABASE_CONNECTION_BAD_RESOURCE'; - const DATABASE_SELECT_ERROR = 'DATABASE_SELECT_ERROR'; - const DATABASE_TABLE_ACCESS_FAIL = 'DATABASE_TABLE_ACCESS_FAIL'; - const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION'; - - /** - * Constructs the database layer. - * - * @access public - * @param string $config - */ - - function __construct($config, $degenerator, $today) - { - - # Pass some variables of the main b8 config to this class - $this->b8_config['degenerator'] = $degenerator; - $this->b8_config['today'] = $today; - - # Validate the config items - - if(count($config) > 0) { - - foreach ($config as $name => $value) { - - switch($name) { - - case 'table_name': - case 'host': - case 'user': - case 'pass': - case 'database': - $this->config[$name] = (string) $value; - break; - - case 'connection': - - if($value !== NULL) { - - if(is_resource($value) === TRUE) { - $resource_type = get_resource_type($value); - $this->config['connection'] = $resource_type !== 'mysql link' && $resource_type !== 'mysql link persistent' ? FALSE : $value; - } - - else - $this->config['connection'] = FALSE; - - } - - break; - - } - - } - - } - - } - - /** - * Closes the database connection. - * - * @access public - * @return void - */ - - function __destruct() - { - - if($this->_connection === NULL) - return; - - # Commit any changes before closing - $this->_commit(); - - # Just close the connection if no link-resource was passed and b8 created it's own connection - if($this->config['connection'] === NULL) - mysql_close($this->_connection); - - $this->connected = FALSE; - - } - - /** - * Connect to the database and do some checks. - * - * @access public - * @return mixed Returns TRUE on a successful database connection, otherwise returns a constant from b8. - */ - - public function connect() - { - - # Are we already connected? - if($this->connected === TRUE) - return TRUE; - - # Are we using an existing passed resource? - if($this->config['connection'] === FALSE) { - # ... yes we are, but the connection is not a resource, so return an error - $this->connected = FALSE; - return self::DATABASE_CONNECTION_BAD_RESOURCE; - } - - elseif($this->config['connection'] === NULL) { - - # ... no we aren't so we have to connect. - - if($this->_connection = mysql_connect($this->config['host'], $this->config['user'], $this->config['pass'])) { - if(mysql_select_db($this->config['database'], $this->_connection) === FALSE) { - $this->connected = FALSE; - return self::DATABASE_SELECT_ERROR . ": " . mysql_error(); - } - } - else { - $this->connected = FALSE; - return self::DATABASE_CONNECTION_ERROR; - } - - } - - else { - # ... yes we are - $this->_connection = $this->config['connection']; - } - - # Just in case ... - if($this->_connection === NULL) { - $this->connected = FALSE; - return self::DATABASE_CONNECTION_FAIL; - } - - # Check to see if the wordlist table exists - if(mysql_query('DESCRIBE ' . $this->config['table_name'], $this->_connection) === FALSE) { - $this->connected = FALSE; - return self::DATABASE_TABLE_ACCESS_FAIL . ": " . mysql_error(); - } - - # Everything is okay and connected - $this->connected = TRUE; - - # Let's see if this is a b8 database and the version is okay - return $this->check_database(); - - } - - /** - * Does the actual interaction with the database when fetching data. - * - * @access protected - * @param array $tokens - * @return mixed Returns an array of the returned data in the format array(token => data) or an empty array if there was no data. - */ - - protected function _get_query($tokens) - { - - # Construct the query ... - - if(count($tokens) > 0) { - - $where = array(); - - foreach ($tokens as $token) { - $token = mysql_real_escape_string($token, $this->_connection); - array_push($where, $token); - } - - $where = 'token IN ("' . implode('", "', $where) . '")'; - } - - else { - $token = mysql_real_escape_string($token, $this->_connection); - $where = 'token = "' . $token . '"'; - } - - # ... and fetch the data - - $result = mysql_query(' - SELECT token, count - FROM ' . $this->config['table_name'] . ' - WHERE ' . $where . '; - ', $this->_connection); - - $data = array(); - - while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) - $data[$row['token']] = $row['count']; - - mysql_free_result($result); - - return $data; - - } - - /** - * Store a token to the database. - * - * @access protected - * @param string $token - * @param string $count - * @return void - */ - - protected function _put($token, $count) { - $token = mysql_real_escape_string($token, $this->_connection); - $count = mysql_real_escape_string($count, $this->_connection);; - array_push($this->_puts, '("' . $token . '", "' . $count . '")'); - } - - /** - * Update an existing token. - * - * @access protected - * @param string $token - * @param string $count - * @return void - */ - - protected function _update($token, $count) - { - $token = mysql_real_escape_string($token, $this->_connection); - $count = mysql_real_escape_string($count, $this->_connection); - array_push($this->_updates, '("' . $token . '", "' . $count . '")'); - } - - /** - * Remove a token from the database. - * - * @access protected - * @param string $token - * @return void - */ - - protected function _del($token) - { - $token = mysql_real_escape_string($token, $this->_connection); - array_push($this->_deletes, $token); - } - - /** - * Commits any modification queries. - * - * @access protected - * @return void - */ - - protected function _commit() - { - - if(count($this->_deletes) > 0) { - - $result = mysql_query(' - DELETE FROM ' . $this->config['table_name'] . ' - WHERE token IN ("' . implode('", "', $this->_deletes) . '"); - ', $this->_connection); - - if(is_resource($result) === TRUE) - mysql_free_result($result); - - $this->_deletes = array(); - - } - - if(count($this->_puts) > 0) { - - $result = mysql_query(' - INSERT INTO ' . $this->config['table_name'] . '(token, count) - VALUES ' . implode(', ', $this->_puts) . ';', $this->_connection); - - if(is_resource($result) === TRUE) - mysql_free_result($result); - - $this->_puts = array(); - - } - - if(count($this->_updates) > 0) { - - $result = mysql_query(' - INSERT INTO ' . $this->config['table_name'] . '(token, count) - VALUES ' . implode(', ', $this->_updates) . ' - ON DUPLICATE KEY UPDATE ' . $this->config['table_name'] . '.count = VALUES(count);', $this->_connection); - - if(is_resource($result) === TRUE) - mysql_free_result($result); - - $this->_updates = array(); - - } - - } - -} - -?>
\ No newline at end of file |