aboutsummaryrefslogtreecommitdiffstats
path: root/library/spam/b8
diff options
context:
space:
mode:
authorAndrew Manning <tamanning@zoho.com>2016-06-02 22:32:50 -0400
committerAndrew Manning <tamanning@zoho.com>2016-06-02 22:32:50 -0400
commitb93e398674b375a3b14718fc6dd2a815aad9b387 (patch)
tree7c2a8097e1c90a87cc8207b5fe08a064f4fa3ae8 /library/spam/b8
parentb70c6809648bb3c78e5e26f9293727b3a7aa4025 (diff)
parentf9075e2a2feca0f37fdf568be6e6e53460aa9034 (diff)
downloadvolse-hubzilla-b93e398674b375a3b14718fc6dd2a815aad9b387.tar.gz
volse-hubzilla-b93e398674b375a3b14718fc6dd2a815aad9b387.tar.bz2
volse-hubzilla-b93e398674b375a3b14718fc6dd2a815aad9b387.zip
Merge remote-tracking branch 'upstream/dev' into wiki
Diffstat (limited to 'library/spam/b8')
-rw-r--r--library/spam/b8/b8.php503
-rw-r--r--library/spam/b8/b8.php.ORIG503
-rw-r--r--library/spam/b8/degenerator/degenerator_default.php127
-rw-r--r--library/spam/b8/lexer/lexer_default.php205
-rw-r--r--library/spam/b8/storage/storage_base.php396
-rw-r--r--library/spam/b8/storage/storage_base.php.ORIG395
-rw-r--r--library/spam/b8/storage/storage_dba.php198
-rw-r--r--library/spam/b8/storage/storage_frndc.php318
-rw-r--r--library/spam/b8/storage/storage_mysql.php351
9 files changed, 0 insertions, 2996 deletions
diff --git a/library/spam/b8/b8.php b/library/spam/b8/b8.php
deleted file mode 100644
index 28a3dd29f..000000000
--- a/library/spam/b8/b8.php
+++ /dev/null
@@ -1,503 +0,0 @@
-<?php
-
-# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
-#
-# b8 - A Bayesian spam filter written in PHP 5
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Tobias Leupold
- * @author Oliver Lillie (aka buggedcom) (original PHP 5 port)
- */
-
-class b8
-{
-
- public $config = array(
- 'min_size' => 3,
- 'max_size' => 30,
- 'allow_numbers' => FALSE,
- 'lexer' => 'default',
- 'degenerator' => 'default',
- 'storage' => 'dba',
- 'use_relevant' => 15,
- 'min_dev' => 0.2,
- 'rob_s' => 0.3,
- 'rob_x' => 0.5
- );
-
- private $_lexer = NULL;
- private $_database = NULL;
- private $_token_data = NULL;
-
- const SPAM = 'spam';
- const HAM = 'ham';
- const LEARN = 'learn';
- const UNLEARN = 'unlearn';
-
- const STARTUP_FAIL_DATABASE = 'STARTUP_FAIL_DATABASE';
- const STARTUP_FAIL_LEXER = 'STARTUP_FAIL_LEXER';
- const TRAINER_CATEGORY_FAIL = 'TRAINER_CATEGORY_FAIL';
-
- /**
- * Constructs b8
- *
- * @access public
- * @return void
- */
-
- function __construct($config = array(), $database_config)
- {
-
- # Validate config data
-
- if(count($config) > 0) {
-
- foreach ($config as $name=>$value) {
-
- switch($name) {
-
- case 'min_dev':
- case 'rob_s':
- case 'rob_x':
- $this->config[$name] = (float) $value;
- break;
-
- case 'min_size':
- case 'max_size':
- case 'use_relevant':
- $this->config[$name] = (int) $value;
- break;
-
- case 'allow_numbers':
- $this->config[$name] = (bool) $value;
- break;
-
- case 'lexer':
- $value = (string) strtolower($value);
- $this->config[$name] = is_file(dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . "lexer_" . $value . '.php') === TRUE ? $value : 'default';
- break;
-
- case 'storage':
- $this->config[$name] = (string) $value;
- break;
-
- }
-
- }
-
- }
-
- # Setup the database backend
-
- # Get the basic storage class used by all backends
- if($this->load_class('b8_storage_base', dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_base.php') === FALSE)
- return;
-
- # Get the degenerator we need
- if($this->load_class('b8_degenerator_' . $this->config['degenerator'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'degenerator' . DIRECTORY_SEPARATOR . 'degenerator_' . $this->config['degenerator'] . '.php') === FALSE)
- return;
-
- # Get the actual storage backend we need
- if($this->load_class('b8_storage_' . $this->config['storage'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_' . $this->config['storage'] . '.php') === FALSE)
- return;
-
- # Setup the backend
- $class = 'b8_storage_' . $this->config['storage'];
- $this->_database = new $class(
- $database_config,
- $this->config['degenerator'], date('ymd')
- );
-
- # Setup the lexer class
-
- if($this->load_class('b8_lexer_' . $this->config['lexer'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . 'lexer_' . $this->config['lexer'] . '.php') === FALSE)
- return;
-
- $class = 'b8_lexer_' . $this->config['lexer'];
- $this->_lexer = new $class(
- array(
- 'min_size' => $this->config['min_size'],
- 'max_size' => $this->config['max_size'],
- 'allow_numbers' => $this->config['allow_numbers']
- )
- );
-
- }
-
- /**
- * Load a class file if a class has not been defined yet.
- *
- * @access public
- * @return boolean Returns TRUE if everything is okay, otherwise FALSE.
- */
-
- public function load_class($class_name, $class_file)
- {
-
- if(class_exists($class_name, FALSE) === FALSE) {
-
- $included = require_once $class_file;
-
- if($included === FALSE or class_exists($class_name, FALSE) === FALSE)
- return FALSE;
-
- }
-
- return TRUE;
-
- }
-
- /**
- * Validates the class has all it needs to work.
- *
- * @access public
- * @return mixed Returns TRUE if everything is okay, otherwise an error code.
- */
-
- public function validate()
- {
-
- if($this->_database === NULL)
- return self::STARTUP_FAIL_DATABASE;
-
- # Connect the database backend if we aren't connected yet
-
- elseif($this->_database->connected === FALSE) {
-
- $connection = $this->_database->connect();
-
- if($connection !== TRUE)
- return $connection;
-
- }
-
- if($this->_lexer === NULL)
- return self::STARTUP_FAIL_LEXER;
-
- return TRUE;
-
- }
-
- /**
- * Classifies a text
- *
- * @access public
- * @package default
- * @param string $text
- * @return float The rating between 0 (ham) and 1 (spam)
- */
-
- public function classify($uid,$text)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # Get the internal database variables, containing the number of ham and
- # spam texts so the spam probability can be calculated in relation to them
- $internals = $this->_database->get_internals($uid);
-
- # Calculate the spamminess of all tokens
-
- # Get all tokens we want to rate
-
- $tokens = $this->_lexer->get_tokens($text);
-
- # Check if the lexer failed
- # (if so, $tokens will be a lexer error code, if not, $tokens will be an array)
- if(!is_array($tokens))
- return $tokens;
-
- # Fetch all availible data for the token set from the database
- $this->_token_data = $this->_database->get(array_keys($tokens),$uid);
-
- # Calculate the spamminess and importance for each token (or a degenerated form of it)
-
- $word_count = array();
- $rating = array();
- $importance = array();
-
- foreach($tokens as $word => $count) {
-
- $word_count[$word] = $count;
-
- # Although we only call this function only here ... let's do the
- # calculation stuff in a function to make this a bit less confusing ;-)
- $rating[$word] = $this->_get_probability($word, $internals['texts_ham'], $internals['texts_spam']);
-
- $importance[$word] = abs(0.5 - $rating[$word]);
-
- }
-
- # Order by importance
- arsort($importance);
- reset($importance);
-
- # Get the most interesting tokens (use all if we have less than the given number)
-
- $relevant = array();
-
- for($i = 0; $i < $this->config['use_relevant']; $i++) {
-
- if($tmp = each($importance)) {
-
- # Important tokens remain
-
- # If the token's rating is relevant enough, use it
-
- if(abs(0.5 - $rating[$tmp['key']]) > $this->config['min_dev']) {
-
- # Tokens that appear more than once also count more than once
-
- for($x = 0, $l = $word_count[$tmp['key']]; $x < $l; $x++)
- array_push($relevant, $rating[$tmp['key']]);
-
- }
-
- }
-
- else {
- # We have less than words to use, so we already
- # use what we have and can break here
- break;
- }
-
- }
-
- # Calculate the spamminess of the text (thanks to Mr. Robinson ;-)
- # We set both hamminess and Spamminess to 1 for the first multiplying
- $hamminess = 1;
- $spamminess = 1;
-
- # Consider all relevant ratings
- foreach($relevant as $value) {
- $hamminess *= (1.0 - $value);
- $spamminess *= $value;
- }
-
- # If no token was good for calculation, we really don't know how
- # to rate this text; so we assume a spam and ham probability of 0.5
-
- if($hamminess === 1 and $spamminess === 1) {
- $hamminess = 0.5;
- $spamminess = 0.5;
- $n = 1;
- }
- else {
- # Get the number of relevant ratings
- $n = count($relevant);
- }
-
- # Calculate the combined rating
-
- # The actual hamminess and spamminess
- $hamminess = 1 - pow($hamminess, (1 / $n));
- $spamminess = 1 - pow($spamminess, (1 / $n));
-
- # Calculate the combined indicator
- $probability = ($hamminess - $spamminess) / ($hamminess + $spamminess);
-
- # We want a value between 0 and 1, not between -1 and +1, so ...
- $probability = (1 + $probability) / 2;
-
- # Alea iacta est
- return $probability;
-
- }
-
- /**
- * Calculate the spamminess of a single token also considering "degenerated" versions
- *
- * @access private
- * @param string $word
- * @param string $texts_ham
- * @param string $texts_spam
- * @return void
- */
-
- private function _get_probability($word, $texts_ham, $texts_spam)
- {
-
- # Let's see what we have!
-
- if(isset($this->_token_data['tokens'][$word]) === TRUE) {
- # The token was in the database, so we can use it's data as-is
- # and calculate the spamminess of this token directly
- return $this->_calc_probability($this->_token_data['tokens'][$word], $texts_ham, $texts_spam);
- }
-
- # Damn. The token was not found, so do we have at least similar words?
-
- if(isset($this->_token_data['degenerates'][$word]) === TRUE) {
-
- # We found similar words, so calculate the spamminess for each one
- # and choose the most important one for the further calculation
-
- # The default rating is 0.5 simply saying nothing
- $rating = 0.5;
-
- foreach($this->_token_data['degenerates'][$word] as $degenerate => $count) {
-
- # Calculate the rating of the current degenerated token
- $rating_tmp = $this->_calc_probability($count, $texts_ham, $texts_spam);
-
- # Is it more important than the rating of another degenerated version?
- if(abs(0.5 - $rating_tmp) > abs(0.5 - $rating))
- $rating = $rating_tmp;
-
- }
-
- return $rating;
-
- }
-
- else {
- # The token is really unknown, so choose the default rating
- # for completely unknown tokens. This strips down to the
- # robX parameter so we can cheap out the freaky math ;-)
- return $this->config['rob_x'];
- }
-
- }
-
- /**
- * Do the actual spamminess calculation of a single token
- *
- * @access private
- * @param array $data
- * @param string $texts_ham
- * @param string $texts_spam
- * @return void
- */
-
- private function _calc_probability($data, $texts_ham, $texts_spam)
- {
-
- # Calculate the basic probability by Mr. Graham
-
- # But: consider the number of ham and spam texts saved instead of the
- # number of entries where the token appeared to calculate a relative
- # spamminess because we count tokens appearing multiple times not just
- # once but as often as they appear in the learned texts
-
- $rel_ham = $data['count_ham'];
- $rel_spam = $data['count_spam'];
-
- if($texts_ham > 0)
- $rel_ham = $data['count_ham'] / $texts_ham;
-
- if($texts_spam > 0)
- $rel_spam = $data['count_spam'] / $texts_spam;
-
- $rating = $rel_spam / ($rel_ham + $rel_spam);
-
- # Calculate the better probability proposed by Mr. Robinson
- $all = $data['count_ham'] + $data['count_spam'];
- return (($this->config['rob_s'] * $this->config['rob_x']) + ($all * $rating)) / ($this->config['rob_s'] + $all);
-
- }
-
- /**
- * Check the validity of the category of a request
- *
- * @access private
- * @param string $category
- * @return void
- */
-
- private function _check_category($category)
- {
- return $category === self::HAM or $category === self::SPAM;
- }
-
- /**
- * Learn a reference text
- *
- * @access public
- * @param string $text
- * @param const $category Either b8::SPAM or b8::HAM
- * @return void
- */
-
- public function learn($text, $category, $uid)
- {
- return $this->_process_text($text, $category, self::LEARN, $uid);
- }
-
- /**
- * Unlearn a reference text
- *
- * @access public
- * @param string $text
- * @param const $category Either b8::SPAM or b8::HAM
- * @return void
- */
-
- public function unlearn($text, $category, $uid)
- {
- return $this->_process_text($text, $category, self::UNLEARN, $uid);
- }
-
- /**
- * Does the actual interaction with the storage backend for learning or unlearning texts
- *
- * @access private
- * @param string $text
- * @param const $category Either b8::SPAM or b8::HAM
- * @param const $action Either b8::LEARN or b8::UNLEARN
- * @return void
- */
-
- private function _process_text($text, $category, $action, $uid = 0)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # Look if the request is okay
- if($this->_check_category($category) === FALSE)
- return self::TRAINER_CATEGORY_FAIL;
-
- # Get all tokens from $text
-
- $tokens = $this->_lexer->get_tokens($text);
-
- # Check if the lexer failed
- # (if so, $tokens will be a lexer error code, if not, $tokens will be an array)
- if(!is_array($tokens))
- return $tokens;
-
- # Pass the tokens and what to do with it to the storage backend
- return $this->_database->process_text($tokens, $category, $action, $uid);
-
- }
-
-}
-
-?> \ No newline at end of file
diff --git a/library/spam/b8/b8.php.ORIG b/library/spam/b8/b8.php.ORIG
deleted file mode 100644
index ea1e15ffa..000000000
--- a/library/spam/b8/b8.php.ORIG
+++ /dev/null
@@ -1,503 +0,0 @@
-<?php
-
-# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
-#
-# b8 - A Bayesian spam filter written in PHP 5
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Tobias Leupold
- * @author Oliver Lillie (aka buggedcom) (original PHP 5 port)
- */
-
-class b8
-{
-
- public $config = array(
- 'min_size' => 3,
- 'max_size' => 30,
- 'allow_numbers' => FALSE,
- 'lexer' => 'default',
- 'degenerator' => 'default',
- 'storage' => 'dba',
- 'use_relevant' => 15,
- 'min_dev' => 0.2,
- 'rob_s' => 0.3,
- 'rob_x' => 0.5
- );
-
- private $_lexer = NULL;
- private $_database = NULL;
- private $_token_data = NULL;
-
- const SPAM = 'spam';
- const HAM = 'ham';
- const LEARN = 'learn';
- const UNLEARN = 'unlearn';
-
- const STARTUP_FAIL_DATABASE = 'STARTUP_FAIL_DATABASE';
- const STARTUP_FAIL_LEXER = 'STARTUP_FAIL_LEXER';
- const TRAINER_CATEGORY_FAIL = 'TRAINER_CATEGORY_FAIL';
-
- /**
- * Constructs b8
- *
- * @access public
- * @return void
- */
-
- function __construct($config = array(), $database_config)
- {
-
- # Validate config data
-
- if(count($config) > 0) {
-
- foreach ($config as $name=>$value) {
-
- switch($name) {
-
- case 'min_dev':
- case 'rob_s':
- case 'rob_x':
- $this->config[$name] = (float) $value;
- break;
-
- case 'min_size':
- case 'max_size':
- case 'use_relevant':
- $this->config[$name] = (int) $value;
- break;
-
- case 'allow_numbers':
- $this->config[$name] = (bool) $value;
- break;
-
- case 'lexer':
- $value = (string) strtolower($value);
- $this->config[$name] = is_file(dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . "lexer_" . $value . '.php') === TRUE ? $value : 'default';
- break;
-
- case 'storage':
- $this->config[$name] = (string) $value;
- break;
-
- }
-
- }
-
- }
-
- # Setup the database backend
-
- # Get the basic storage class used by all backends
- if($this->load_class('b8_storage_base', dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_base.php') === FALSE)
- return;
-
- # Get the degenerator we need
- if($this->load_class('b8_degenerator_' . $this->config['degenerator'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'degenerator' . DIRECTORY_SEPARATOR . 'degenerator_' . $this->config['degenerator'] . '.php') === FALSE)
- return;
-
- # Get the actual storage backend we need
- if($this->load_class('b8_storage_' . $this->config['storage'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_' . $this->config['storage'] . '.php') === FALSE)
- return;
-
- # Setup the backend
- $class = 'b8_storage_' . $this->config['storage'];
- $this->_database = new $class(
- $database_config,
- $this->config['degenerator'], date('ymd')
- );
-
- # Setup the lexer class
-
- if($this->load_class('b8_lexer_' . $this->config['lexer'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . 'lexer_' . $this->config['lexer'] . '.php') === FALSE)
- return;
-
- $class = 'b8_lexer_' . $this->config['lexer'];
- $this->_lexer = new $class(
- array(
- 'min_size' => $this->config['min_size'],
- 'max_size' => $this->config['max_size'],
- 'allow_numbers' => $this->config['allow_numbers']
- )
- );
-
- }
-
- /**
- * Load a class file if a class has not been defined yet.
- *
- * @access public
- * @return boolean Returns TRUE if everything is okay, otherwise FALSE.
- */
-
- public function load_class($class_name, $class_file)
- {
-
- if(class_exists($class_name, FALSE) === FALSE) {
-
- $included = require_once $class_file;
-
- if($included === FALSE or class_exists($class_name, FALSE) === FALSE)
- return FALSE;
-
- }
-
- return TRUE;
-
- }
-
- /**
- * Validates the class has all it needs to work.
- *
- * @access public
- * @return mixed Returns TRUE if everything is okay, otherwise an error code.
- */
-
- public function validate()
- {
-
- if($this->_database === NULL)
- return self::STARTUP_FAIL_DATABASE;
-
- # Connect the database backend if we aren't connected yet
-
- elseif($this->_database->connected === FALSE) {
-
- $connection = $this->_database->connect();
-
- if($connection !== TRUE)
- return $connection;
-
- }
-
- if($this->_lexer === NULL)
- return self::STARTUP_FAIL_LEXER;
-
- return TRUE;
-
- }
-
- /**
- * Classifies a text
- *
- * @access public
- * @package default
- * @param string $text
- * @return float The rating between 0 (ham) and 1 (spam)
- */
-
- public function classify($text)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # Get the internal database variables, containing the number of ham and
- # spam texts so the spam probability can be calculated in relation to them
- $internals = $this->_database->get_internals();
-
- # Calculate the spamminess of all tokens
-
- # Get all tokens we want to rate
-
- $tokens = $this->_lexer->get_tokens($text);
-
- # Check if the lexer failed
- # (if so, $tokens will be a lexer error code, if not, $tokens will be an array)
- if(!is_array($tokens))
- return $tokens;
-
- # Fetch all availible data for the token set from the database
- $this->_token_data = $this->_database->get(array_keys($tokens));
-
- # Calculate the spamminess and importance for each token (or a degenerated form of it)
-
- $word_count = array();
- $rating = array();
- $importance = array();
-
- foreach($tokens as $word => $count) {
-
- $word_count[$word] = $count;
-
- # Although we only call this function only here ... let's do the
- # calculation stuff in a function to make this a bit less confusing ;-)
- $rating[$word] = $this->_get_probability($word, $internals['texts_ham'], $internals['texts_spam']);
-
- $importance[$word] = abs(0.5 - $rating[$word]);
-
- }
-
- # Order by importance
- arsort($importance);
- reset($importance);
-
- # Get the most interesting tokens (use all if we have less than the given number)
-
- $relevant = array();
-
- for($i = 0; $i < $this->config['use_relevant']; $i++) {
-
- if($tmp = each($importance)) {
-
- # Important tokens remain
-
- # If the token's rating is relevant enough, use it
-
- if(abs(0.5 - $rating[$tmp['key']]) > $this->config['min_dev']) {
-
- # Tokens that appear more than once also count more than once
-
- for($x = 0, $l = $word_count[$tmp['key']]; $x < $l; $x++)
- array_push($relevant, $rating[$tmp['key']]);
-
- }
-
- }
-
- else {
- # We have less than words to use, so we already
- # use what we have and can break here
- break;
- }
-
- }
-
- # Calculate the spamminess of the text (thanks to Mr. Robinson ;-)
- # We set both hamminess and Spamminess to 1 for the first multiplying
- $hamminess = 1;
- $spamminess = 1;
-
- # Consider all relevant ratings
- foreach($relevant as $value) {
- $hamminess *= (1.0 - $value);
- $spamminess *= $value;
- }
-
- # If no token was good for calculation, we really don't know how
- # to rate this text; so we assume a spam and ham probability of 0.5
-
- if($hamminess === 1 and $spamminess === 1) {
- $hamminess = 0.5;
- $spamminess = 0.5;
- $n = 1;
- }
- else {
- # Get the number of relevant ratings
- $n = count($relevant);
- }
-
- # Calculate the combined rating
-
- # The actual hamminess and spamminess
- $hamminess = 1 - pow($hamminess, (1 / $n));
- $spamminess = 1 - pow($spamminess, (1 / $n));
-
- # Calculate the combined indicator
- $probability = ($hamminess - $spamminess) / ($hamminess + $spamminess);
-
- # We want a value between 0 and 1, not between -1 and +1, so ...
- $probability = (1 + $probability) / 2;
-
- # Alea iacta est
- return $probability;
-
- }
-
- /**
- * Calculate the spamminess of a single token also considering "degenerated" versions
- *
- * @access private
- * @param string $word
- * @param string $texts_ham
- * @param string $texts_spam
- * @return void
- */
-
- private function _get_probability($word, $texts_ham, $texts_spam)
- {
-
- # Let's see what we have!
-
- if(isset($this->_token_data['tokens'][$word]) === TRUE) {
- # The token was in the database, so we can use it's data as-is
- # and calculate the spamminess of this token directly
- return $this->_calc_probability($this->_token_data['tokens'][$word], $texts_ham, $texts_spam);
- }
-
- # Damn. The token was not found, so do we have at least similar words?
-
- if(isset($this->_token_data['degenerates'][$word]) === TRUE) {
-
- # We found similar words, so calculate the spamminess for each one
- # and choose the most important one for the further calculation
-
- # The default rating is 0.5 simply saying nothing
- $rating = 0.5;
-
- foreach($this->_token_data['degenerates'][$word] as $degenerate => $count) {
-
- # Calculate the rating of the current degenerated token
- $rating_tmp = $this->_calc_probability($count, $texts_ham, $texts_spam);
-
- # Is it more important than the rating of another degenerated version?
- if(abs(0.5 - $rating_tmp) > abs(0.5 - $rating))
- $rating = $rating_tmp;
-
- }
-
- return $rating;
-
- }
-
- else {
- # The token is really unknown, so choose the default rating
- # for completely unknown tokens. This strips down to the
- # robX parameter so we can cheap out the freaky math ;-)
- return $this->config['rob_x'];
- }
-
- }
-
- /**
- * Do the actual spamminess calculation of a single token
- *
- * @access private
- * @param array $data
- * @param string $texts_ham
- * @param string $texts_spam
- * @return void
- */
-
- private function _calc_probability($data, $texts_ham, $texts_spam)
- {
-
- # Calculate the basic probability by Mr. Graham
-
- # But: consider the number of ham and spam texts saved instead of the
- # number of entries where the token appeared to calculate a relative
- # spamminess because we count tokens appearing multiple times not just
- # once but as often as they appear in the learned texts
-
- $rel_ham = $data['count_ham'];
- $rel_spam = $data['count_spam'];
-
- if($texts_ham > 0)
- $rel_ham = $data['count_ham'] / $texts_ham;
-
- if($texts_spam > 0)
- $rel_spam = $data['count_spam'] / $texts_spam;
-
- $rating = $rel_spam / ($rel_ham + $rel_spam);
-
- # Calculate the better probability proposed by Mr. Robinson
- $all = $data['count_ham'] + $data['count_spam'];
- return (($this->config['rob_s'] * $this->config['rob_x']) + ($all * $rating)) / ($this->config['rob_s'] + $all);
-
- }
-
- /**
- * Check the validity of the category of a request
- *
- * @access private
- * @param string $category
- * @return void
- */
-
- private function _check_category($category)
- {
- return $category === self::HAM or $category === self::SPAM;
- }
-
- /**
- * Learn a reference text
- *
- * @access public
- * @param string $text
- * @param const $category Either b8::SPAM or b8::HAM
- * @return void
- */
-
- public function learn($text, $category)
- {
- return $this->_process_text($text, $category, self::LEARN);
- }
-
- /**
- * Unlearn a reference text
- *
- * @access public
- * @param string $text
- * @param const $category Either b8::SPAM or b8::HAM
- * @return void
- */
-
- public function unlearn($text, $category)
- {
- return $this->_process_text($text, $category, self::UNLEARN);
- }
-
- /**
- * Does the actual interaction with the storage backend for learning or unlearning texts
- *
- * @access private
- * @param string $text
- * @param const $category Either b8::SPAM or b8::HAM
- * @param const $action Either b8::LEARN or b8::UNLEARN
- * @return void
- */
-
- private function _process_text($text, $category, $action)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # Look if the request is okay
- if($this->_check_category($category) === FALSE)
- return self::TRAINER_CATEGORY_FAIL;
-
- # Get all tokens from $text
-
- $tokens = $this->_lexer->get_tokens($text);
-
- # Check if the lexer failed
- # (if so, $tokens will be a lexer error code, if not, $tokens will be an array)
- if(!is_array($tokens))
- return $tokens;
-
- # Pass the tokens and what to do with it to the storage backend
- return $this->_database->process_text($tokens, $category, $action);
-
- }
-
-}
-
-?> \ No newline at end of file
diff --git a/library/spam/b8/degenerator/degenerator_default.php b/library/spam/b8/degenerator/degenerator_default.php
deleted file mode 100644
index 4ff6d882b..000000000
--- a/library/spam/b8/degenerator/degenerator_default.php
+++ /dev/null
@@ -1,127 +0,0 @@
-<?php
-
-# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
-#
-# This file is part of the b8 package
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Tobias Leupold
- */
-
-class b8_degenerator_default
-{
-
- public $degenerates = array();
-
- /**
- * Generates a list of "degenerated" words for a list of words.
- *
- * @access public
- * @param array $tokens
- * @return array An array containing an array of degenerated tokens for each token
- */
-
- public function degenerate(array $words)
- {
-
- $degenerates = array();
-
- foreach($words as $word)
- $degenerates[$word] = $this->_degenerate_word($word);
-
- return $degenerates;
-
- }
-
- /**
- * If the original word is not found in the database then
- * we build "degenerated" versions of the word to lookup.
- *
- * @access private
- * @param string $word
- * @return array An array of degenerated words
- */
-
- protected function _degenerate_word($word)
- {
-
- # Check for any stored words so the process doesn't have to repeat
- if(isset($this->degenerates[$word]) === TRUE)
- return $this->degenerates[$word];
-
- $degenerate = array();
-
- # Add different version of upper and lower case and ucfirst
- array_push($degenerate, strtolower($word));
- array_push($degenerate, strtoupper($word));
- array_push($degenerate, ucfirst($word));
-
- # Degenerate all versions
-
- foreach($degenerate as $alt_word) {
-
- # Look for stuff like !!! and ???
-
- if(preg_match('/[!?]$/', $alt_word) > 0) {
-
- # Add versions with different !s and ?s
-
- if(preg_match('/[!?]{2,}$/', $alt_word) > 0) {
- $tmp = preg_replace('/([!?])+$/', '$1', $alt_word);
- array_push($degenerate, $tmp);
- }
-
- $tmp = preg_replace('/([!?])+$/', '', $alt_word);
- array_push($degenerate, $tmp);
-
- }
-
- # Look for ... at the end of the word
-
- $alt_word_int = $alt_word;
-
- while(preg_match('/[\.]$/', $alt_word_int) > 0) {
- $alt_word_int = substr($alt_word_int, 0, strlen($alt_word_int) - 1);
- array_push($degenerate, $alt_word_int);
- }
-
- }
-
- # Some degenerates are the same as the original word. These don't have
- # to be fetched, so we create a new array with only new tokens
-
- $real_degenerate = array();
-
- foreach($degenerate as $deg_word) {
- if($word != $deg_word)
- array_push($real_degenerate, $deg_word);
- }
-
- # Store the list of degenerates for the token
- $this->degenerates[$word] = $real_degenerate;
-
- return $real_degenerate;
-
- }
-
-}
-
-?> \ No newline at end of file
diff --git a/library/spam/b8/lexer/lexer_default.php b/library/spam/b8/lexer/lexer_default.php
deleted file mode 100644
index 7b5ca22bf..000000000
--- a/library/spam/b8/lexer/lexer_default.php
+++ /dev/null
@@ -1,205 +0,0 @@
-<?php
-
-# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
-#
-# This file is part of the b8 package
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Tobias Leupold
- * @author Oliver Lillie (aka buggedcom) (original PHP 5 port)
- */
-
-class b8_lexer_default
-{
-
- const LEXER_TEXT_NOT_STRING = 'LEXER_TEXT_NOT_STRING';
- const LEXER_TEXT_EMPTY = 'LEXER_TEXT_EMPTY';
-
- public $config = NULL;
-
- # The regular expressions we use to split the text to tokens
-
- public $regexp = array(
- 'ip' => '/([A-Za-z0-9\_\-\.]+)/',
- 'raw_split' => '/[\s,\.\/"\:;\|<>\-_\[\]{}\+=\)\(\*\&\^%]+/',
- 'html' => '/(<.+?>)/',
- 'tagname' => '/(.+?)\s/',
- 'numbers' => '/^[0-9]+$/'
- );
-
- /**
- * Constructs the lexer.
- *
- * @access public
- * @return void
- */
-
- function __construct($config)
- {
- $this->config = $config;
- }
-
- /**
- * Generates the tokens required for the bayesian filter.
- *
- * @access public
- * @param string $text
- * @return array Returns the list of tokens
- */
-
- public function get_tokens($text)
- {
-
- # Check that we actually have a string ...
- if(is_string($text) === FALSE)
- return self::LEXER_TEXT_NOT_STRING;
-
- # ... and that it's not empty
- if(empty($text) === TRUE)
- return self::LEXER_TEXT_EMPTY;
-
- # Re-convert the text to the original characters coded in UTF-8, as
- # they have been coded in html entities during the post process
- $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
-
- $tokens = array();
-
- # Find URLs and IP addresses
-
- preg_match_all($this->regexp['ip'], $text, $raw_tokens);
-
- foreach($raw_tokens[1] as $word) {
-
- # Check for a dot
- if(strpos($word, '.') === FALSE)
- continue;
-
- # Check that the word is valid, min and max sizes, etc.
- if($this->_is_valid($word) === FALSE)
- continue;
-
- if(isset($tokens[$word]) === FALSE)
- $tokens[$word] = 1;
- else
- $tokens[$word] += 1;
-
- # Delete the word from the text so it doesn't get re-added.
- $text = str_replace($word, '', $text);
-
- # Also process the parts of the URLs
- $url_parts = preg_split($this->regexp['raw_split'], $word);
-
- foreach($url_parts as $word) {
-
- # Again validate the part
-
- if($this->_is_valid($word) === FALSE)
- continue;
-
- if(isset($tokens[$word]) === FALSE)
- $tokens[$word] = 1;
- else
- $tokens[$word] += 1;
-
- }
-
- }
-
- # Split the remaining text
-
- $raw_tokens = preg_split($this->regexp['raw_split'], $text);
-
- foreach($raw_tokens as $word) {
-
- # Again validate the part
-
- if($this->_is_valid($word) === FALSE)
- continue;
-
- if(isset($tokens[$word]) === FALSE)
- $tokens[$word] = 1;
- else
- $tokens[$word] += 1;
-
- }
-
- # Process the HTML
-
- preg_match_all($this->regexp['html'], $text, $raw_tokens);
-
- foreach($raw_tokens[1] as $word) {
-
- # Again validate the part
-
- if($this->_is_valid($word) === FALSE)
- continue;
-
- # If the tag has parameters, just use the tag itself
-
- if(strpos($word, ' ') !== FALSE) {
- preg_match($this->regexp['tagname'], $word, $tmp);
- $word = "{$tmp[1]}...>";
- }
-
- if(isset($tokens[$word]) === FALSE)
- $tokens[$word] = 1;
- else
- $tokens[$word] += 1;
-
- }
-
- # Return a list of all found tokens
- return $tokens;
-
- }
-
- /**
- * Validates a token.
- *
- * @access private
- * @param string $token The token string.
- * @return boolean Returns TRUE if the token is valid, otherwise returns FALSE
- */
-
- private function _is_valid($token)
- {
-
- # Validate the size of the token
-
- $len = strlen($token);
-
- if($len < $this->config['min_size'] or $len > $this->config['max_size'])
- return FALSE;
-
- # We may want to exclude pure numbers
- if($this->config['allow_numbers'] === FALSE) {
- if(preg_match($this->regexp['numbers'], $token) > 0)
- return FALSE;
- }
-
- # Token is okay
- return TRUE;
-
- }
-
-}
-
-?> \ No newline at end of file
diff --git a/library/spam/b8/storage/storage_base.php b/library/spam/b8/storage/storage_base.php
deleted file mode 100644
index 6b181ee96..000000000
--- a/library/spam/b8/storage/storage_base.php
+++ /dev/null
@@ -1,396 +0,0 @@
-<?php
-
-# Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
-#
-# This file is part of the b8 package
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * Functions used by all storage backends
- * Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Tobias Leupold
- */
-
-abstract class b8_storage_base
-{
-
- public $connected = FALSE;
-
- protected $_degenerator = NULL;
-
- const INTERNALS_TEXTS_HAM = 'bayes*texts.ham';
- const INTERNALS_TEXTS_SPAM = 'bayes*texts.spam';
- const INTERNALS_DBVERSION = 'bayes*dbversion';
-
- const BACKEND_NOT_CONNECTED = 'BACKEND_NOT_CONNECTED';
- const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION';
- const DATABASE_NOT_B8 = 'DATABASE_NOT_B8';
-
- /**
- * Validates the class has all it needs to work.
- *
- * @access protected
- * @return mixed Returns TRUE if everything is okay, otherwise an error code.
- */
-
- protected function validate()
- {
-
- # We set up the degenerator here, as we would have to duplicate code if it
- # was done in the constructor of the respective storage backend.
- $class = 'b8_degenerator_' . $this->b8_config['degenerator'];
- $this->_degenerator = new $class();
-
- if($this->connected !== TRUE)
- return self::BACKEND_NOT_CONNECTED;
-
- return TRUE;
-
- }
-
- /**
- * Checks if a b8 database is used and if it's version is okay
- *
- * @access protected
- * @return mixed Returns TRUE if everything is okay, otherwise an error code.
- */
-
- protected function check_database($uid)
- {
-
- $internals = $this->get_internals($uid);
-
- if(isset($internals['dbversion'])) {
- if($internals['dbversion'] == "2") {
- return TRUE;
- }
- else {
- $this->connected = FALSE;
- return self::DATABASE_WRONG_VERSION;
- }
- }
- else {
- $this->connected = FALSE;
- return self::DATABASE_NOT_B8;
- }
-
- }
-
- /**
- * Parses the "count" data of a token.
- *
- * @access private
- * @param string $data
- * @return array Returns an array of the parsed data: array(count_ham, count_spam, lastseen).
- */
-
- private function _parse_count($data)
- {
-
- list($count_ham, $count_spam, $lastseen) = explode(' ', $data);
-
- $count_ham = (int) $count_ham;
- $count_spam = (int) $count_spam;
-
- return array(
- 'count_ham' => $count_ham,
- 'count_spam' => $count_spam
- );
-
- }
-
- /**
- * Get the database's internal variables.
- *
- * @access public
- * @return array Returns an array of all internals.
- */
-
- public function get_internals($uid)
- {
-
- $internals = $this->_get_query(
- array(
- self::INTERNALS_TEXTS_HAM,
- self::INTERNALS_TEXTS_SPAM,
- self::INTERNALS_DBVERSION
- ),
- $uid
- );
-
- return array(
- 'texts_ham' => (int) $internals[self::INTERNALS_TEXTS_HAM],
- 'texts_spam' => (int) $internals[self::INTERNALS_TEXTS_SPAM],
- 'dbversion' => (int) $internals[self::INTERNALS_DBVERSION]
- );
-
- }
-
- /**
- * Get all data about a list of tags from the database.
- *
- * @access public
- * @param array $tokens
- * @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))).
- */
-
- public function get($tokens, $uid)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # First we see what we have in the database.
- $token_data = $this->_get_query($tokens, $uid);
-
- # Check if we have to degenerate some tokens
-
- $missing_tokens = array();
-
- foreach($tokens as $token) {
- if(!isset($token_data[$token]))
- $missing_tokens[] = $token;
- }
-
- if(count($missing_tokens) > 0) {
-
- # We have to degenerate some tokens
- $degenerates_list = array();
-
- # Generate a list of degenerated tokens for the missing tokens ...
- $degenerates = $this->_degenerator->degenerate($missing_tokens);
-
- # ... and look them up
-
- foreach($degenerates as $token => $token_degenerates)
- $degenerates_list = array_merge($degenerates_list, $token_degenerates);
-
- $token_data = array_merge($token_data, $this->_get_query($degenerates_list));
-
- }
-
- # Here, we have all availible data in $token_data.
-
- $return_data_tokens = array();
- $return_data_degenerates = array();
-
- foreach($tokens as $token) {
-
- if(isset($token_data[$token]) === TRUE) {
-
- # The token was found in the database
-
- # Add the data ...
- $return_data_tokens[$token] = $this->_parse_count($token_data[$token]);
-
- # ... and update it's lastseen parameter
- $this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today'], $uid );
-
- }
-
- else {
-
- # The token was not found, so we look if we
- # can return data for degenerated tokens
-
- # Check all degenerated forms of the token
-
- foreach($this->_degenerator->degenerates[$token] as $degenerate) {
-
- if(isset($token_data[$degenerate]) === TRUE) {
-
- # A degeneration of the token way found in the database
-
- # Add the data ...
- $return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]);
-
- # ... and update it's lastseen parameter
- $this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today'], $uid);
-
- }
-
- }
-
- }
-
- }
-
- # Now, all token data directly found in the database is in $return_data_tokens
- # and all data for degenerated versions is in $return_data_degenerates
-
- # First, we commit the changes to the lastseen parameters
- $this->_commit();
-
- # Then, we return what we have
- return array(
- 'tokens' => $return_data_tokens,
- 'degenerates' => $return_data_degenerates
- );
-
- }
-
- /**
- * Stores or deletes a list of tokens from the given category.
- *
- * @access public
- * @param array $tokens
- * @param const $category Either b8::HAM or b8::SPAM
- * @param const $action Either b8::LEARN or b8::UNLEARN
- * @return void
- */
-
- public function process_text($tokens, $category, $action, $uid)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # No matter what we do, we first have to check what data we have.
-
- # First get the internals, including the ham texts and spam texts counter
- $internals = $this->get_internals($uid);
-
- # Then, fetch all data for all tokens we have (and update their lastseen parameters)
- $token_data = $this->_get_query(array_keys($tokens), $uid);
-
- # Process all tokens to learn/unlearn
-
- foreach($tokens as $token => $count) {
-
- if(isset($token_data[$token])) {
-
- # We already have this token, so update it's data
-
- # Get the existing data
- list($count_ham, $count_spam, $lastseen) = explode(' ', $token_data[$token]);
- $count_ham = (int) $count_ham;
- $count_spam = (int) $count_spam;
-
- # Increase or decrease the right counter
-
- if($action === b8::LEARN) {
- if($category === b8::HAM)
- $count_ham += $count;
- elseif($category === b8::SPAM)
- $count_spam += $count;
- }
-
- elseif($action == b8::UNLEARN) {
- if($category === b8::HAM)
- $count_ham -= $count;
- elseif($category === b8::SPAM)
- $count_spam -= $count;
- }
-
- # We don't want to have negative values
-
- if($count_ham < 0)
- $count_ham = 0;
-
- if($count_spam < 0)
- $count_spam = 0;
-
- # Now let's see if we have to update or delete the token
- if($count_ham !== 0 or $count_spam !== 0)
- $this->_update($token, "$count_ham $count_spam " . $this->b8_config['today'], $uid);
- else
- $this->_del($token, $uid);
-
- }
-
- else {
-
- # We don't have the token. If we unlearn a text, we can't delete it
- # as we don't have it anyway, so just do something if we learn a text
-
- if($action === b8::LEARN) {
-
- if($category === b8::HAM)
- $data = '1 0 ';
- elseif($category === b8::SPAM)
- $data = '0 1 ';
-
- $data .= $this->b8_config['today'];
-
- $this->_put($token, $data, $uid);
-
- }
-
- }
-
- }
-
- # Now, all token have been processed, so let's update the right text
-
- if($action === b8::LEARN) {
-
- if($category === b8::HAM) {
- $internals['texts_ham']++;
- $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid);
- }
-
- elseif($category === b8::SPAM) {
- $internals['texts_spam']++;
- $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid);
- }
-
- }
-
- elseif($action == b8::UNLEARN) {
-
- if($category === b8::HAM) {
-
- $internals['texts_ham']--;
-
- if($internals['texts_ham'] < 0)
- $internals['texts_ham'] = 0;
-
- $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid);
-
- }
-
- elseif($category === b8::SPAM) {
-
- $internals['texts_spam']--;
-
- if($internals['texts_spam'] < 0)
- $internals['texts_spam'] = 0;
-
- $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid);
-
- }
-
- }
-
- # We're done and can commit all changes to the database now
- $this->_commit($uid);
-
- }
-
-}
-
-?> \ No newline at end of file
diff --git a/library/spam/b8/storage/storage_base.php.ORIG b/library/spam/b8/storage/storage_base.php.ORIG
deleted file mode 100644
index 01f5a69d7..000000000
--- a/library/spam/b8/storage/storage_base.php.ORIG
+++ /dev/null
@@ -1,395 +0,0 @@
-<?php
-
-# Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
-#
-# This file is part of the b8 package
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * Functions used by all storage backends
- * Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Tobias Leupold
- */
-
-abstract class b8_storage_base
-{
-
- public $connected = FALSE;
-
- protected $_degenerator = NULL;
-
- const INTERNALS_TEXTS_HAM = 'bayes*texts.ham';
- const INTERNALS_TEXTS_SPAM = 'bayes*texts.spam';
- const INTERNALS_DBVERSION = 'bayes*dbversion';
-
- const BACKEND_NOT_CONNECTED = 'BACKEND_NOT_CONNECTED';
- const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION';
- const DATABASE_NOT_B8 = 'DATABASE_NOT_B8';
-
- /**
- * Validates the class has all it needs to work.
- *
- * @access protected
- * @return mixed Returns TRUE if everything is okay, otherwise an error code.
- */
-
- protected function validate()
- {
-
- # We set up the degenerator here, as we would have to duplicate code if it
- # was done in the constructor of the respective storage backend.
- $class = 'b8_degenerator_' . $this->b8_config['degenerator'];
- $this->_degenerator = new $class();
-
- if($this->connected !== TRUE)
- return self::BACKEND_NOT_CONNECTED;
-
- return TRUE;
-
- }
-
- /**
- * Checks if a b8 database is used and if it's version is okay
- *
- * @access protected
- * @return mixed Returns TRUE if everything is okay, otherwise an error code.
- */
-
- protected function check_database()
- {
-
- $internals = $this->get_internals();
-
- if(isset($internals['dbversion'])) {
- if($internals['dbversion'] == "2") {
- return TRUE;
- }
- else {
- $this->connected = FALSE;
- return self::DATABASE_WRONG_VERSION;
- }
- }
- else {
- $this->connected = FALSE;
- return self::DATABASE_NOT_B8;
- }
-
- }
-
- /**
- * Parses the "count" data of a token.
- *
- * @access private
- * @param string $data
- * @return array Returns an array of the parsed data: array(count_ham, count_spam, lastseen).
- */
-
- private function _parse_count($data)
- {
-
- list($count_ham, $count_spam, $lastseen) = explode(' ', $data);
-
- $count_ham = (int) $count_ham;
- $count_spam = (int) $count_spam;
-
- return array(
- 'count_ham' => $count_ham,
- 'count_spam' => $count_spam
- );
-
- }
-
- /**
- * Get the database's internal variables.
- *
- * @access public
- * @return array Returns an array of all internals.
- */
-
- public function get_internals()
- {
-
- $internals = $this->_get_query(
- array(
- self::INTERNALS_TEXTS_HAM,
- self::INTERNALS_TEXTS_SPAM,
- self::INTERNALS_DBVERSION
- )
- );
-
- return array(
- 'texts_ham' => (int) $internals[self::INTERNALS_TEXTS_HAM],
- 'texts_spam' => (int) $internals[self::INTERNALS_TEXTS_SPAM],
- 'dbversion' => (int) $internals[self::INTERNALS_DBVERSION]
- );
-
- }
-
- /**
- * Get all data about a list of tags from the database.
- *
- * @access public
- * @param array $tokens
- * @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))).
- */
-
- public function get($tokens)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # First we see what we have in the database.
- $token_data = $this->_get_query($tokens);
-
- # Check if we have to degenerate some tokens
-
- $missing_tokens = array();
-
- foreach($tokens as $token) {
- if(!isset($token_data[$token]))
- $missing_tokens[] = $token;
- }
-
- if(count($missing_tokens) > 0) {
-
- # We have to degenerate some tokens
- $degenerates_list = array();
-
- # Generate a list of degenerated tokens for the missing tokens ...
- $degenerates = $this->_degenerator->degenerate($missing_tokens);
-
- # ... and look them up
-
- foreach($degenerates as $token => $token_degenerates)
- $degenerates_list = array_merge($degenerates_list, $token_degenerates);
-
- $token_data = array_merge($token_data, $this->_get_query($degenerates_list));
-
- }
-
- # Here, we have all availible data in $token_data.
-
- $return_data_tokens = array();
- $return_data_degenerates = array();
-
- foreach($tokens as $token) {
-
- if(isset($token_data[$token]) === TRUE) {
-
- # The token was found in the database
-
- # Add the data ...
- $return_data_tokens[$token] = $this->_parse_count($token_data[$token]);
-
- # ... and update it's lastseen parameter
- $this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today']);
-
- }
-
- else {
-
- # The token was not found, so we look if we
- # can return data for degenerated tokens
-
- # Check all degenerated forms of the token
-
- foreach($this->_degenerator->degenerates[$token] as $degenerate) {
-
- if(isset($token_data[$degenerate]) === TRUE) {
-
- # A degeneration of the token way found in the database
-
- # Add the data ...
- $return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]);
-
- # ... and update it's lastseen parameter
- $this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today']);
-
- }
-
- }
-
- }
-
- }
-
- # Now, all token data directly found in the database is in $return_data_tokens
- # and all data for degenerated versions is in $return_data_degenerates
-
- # First, we commit the changes to the lastseen parameters
- $this->_commit();
-
- # Then, we return what we have
- return array(
- 'tokens' => $return_data_tokens,
- 'degenerates' => $return_data_degenerates
- );
-
- }
-
- /**
- * Stores or deletes a list of tokens from the given category.
- *
- * @access public
- * @param array $tokens
- * @param const $category Either b8::HAM or b8::SPAM
- * @param const $action Either b8::LEARN or b8::UNLEARN
- * @return void
- */
-
- public function process_text($tokens, $category, $action)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # No matter what we do, we first have to check what data we have.
-
- # First get the internals, including the ham texts and spam texts counter
- $internals = $this->get_internals();
-
- # Then, fetch all data for all tokens we have (and update their lastseen parameters)
- $token_data = $this->_get_query(array_keys($tokens));
-
- # Process all tokens to learn/unlearn
-
- foreach($tokens as $token => $count) {
-
- if(isset($token_data[$token])) {
-
- # We already have this token, so update it's data
-
- # Get the existing data
- list($count_ham, $count_spam, $lastseen) = explode(' ', $token_data[$token]);
- $count_ham = (int) $count_ham;
- $count_spam = (int) $count_spam;
-
- # Increase or decrease the right counter
-
- if($action === b8::LEARN) {
- if($category === b8::HAM)
- $count_ham += $count;
- elseif($category === b8::SPAM)
- $count_spam += $count;
- }
-
- elseif($action == b8::UNLEARN) {
- if($category === b8::HAM)
- $count_ham -= $count;
- elseif($category === b8::SPAM)
- $count_spam -= $count;
- }
-
- # We don't want to have negative values
-
- if($count_ham < 0)
- $count_ham = 0;
-
- if($count_spam < 0)
- $count_spam = 0;
-
- # Now let's see if we have to update or delete the token
- if($count_ham !== 0 or $count_spam !== 0)
- $this->_update($token, "$count_ham $count_spam " . $this->b8_config['today']);
- else
- $this->_del($token);
-
- }
-
- else {
-
- # We don't have the token. If we unlearn a text, we can't delete it
- # as we don't have it anyway, so just do something if we learn a text
-
- if($action === b8::LEARN) {
-
- if($category === b8::HAM)
- $data = '1 0 ';
- elseif($category === b8::SPAM)
- $data = '0 1 ';
-
- $data .= $this->b8_config['today'];
-
- $this->_put($token, $data);
-
- }
-
- }
-
- }
-
- # Now, all token have been processed, so let's update the right text
-
- if($action === b8::LEARN) {
-
- if($category === b8::HAM) {
- $internals['texts_ham']++;
- $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']);
- }
-
- elseif($category === b8::SPAM) {
- $internals['texts_spam']++;
- $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']);
- }
-
- }
-
- elseif($action == b8::UNLEARN) {
-
- if($category === b8::HAM) {
-
- $internals['texts_ham']--;
-
- if($internals['texts_ham'] < 0)
- $internals['texts_ham'] = 0;
-
- $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']);
-
- }
-
- elseif($category === b8::SPAM) {
-
- $internals['texts_spam']--;
-
- if($internals['texts_spam'] < 0)
- $internals['texts_spam'] = 0;
-
- $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']);
-
- }
-
- }
-
- # We're done and can commit all changes to the database now
- $this->_commit();
-
- }
-
-}
-
-?> \ No newline at end of file
diff --git a/library/spam/b8/storage/storage_dba.php b/library/spam/b8/storage/storage_dba.php
deleted file mode 100644
index 04618b23e..000000000
--- a/library/spam/b8/storage/storage_dba.php
+++ /dev/null
@@ -1,198 +0,0 @@
-<?php
-
-# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
-#
-# This file is part of the b8 package
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * The DBA (Berkeley DB) abstraction layer for communicating with the database.
- * Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Tobias Leupold
- */
-
-class b8_storage_dba extends b8_storage_base
-{
-
- public $config = array(
- 'database' => 'wordlist.db',
- 'handler' => 'db4',
- );
-
- public $b8_config = array(
- 'degenerator' => NULL,
- 'today' => NULL
- );
-
- private $_db = NULL;
-
- const DATABASE_CONNECTION_FAIL = 'DATABASE_CONNECTION_FAIL';
-
- /**
- * Constructs the database layer.
- *
- * @access public
- * @param string $config
- */
-
- function __construct($config, $degenerator, $today)
- {
-
- # Pass some variables of the main b8 config to this class
- $this->b8_config['degenerator'] = $degenerator;
- $this->b8_config['today'] = $today;
-
- # Validate the config items
- if(count($config) > 0) {
- foreach ($config as $name => $value) {
- $this->config[$name] = (string) $value;
- }
- }
-
- }
-
- /**
- * Closes the database connection.
- *
- * @access public
- * @return void
- */
-
- function __destruct()
- {
- if($this->_db !== NULL) {
- dba_close($this->_db);
- $this->connected = FALSE;
- }
- }
-
- /**
- * Connect to the database and do some checks.
- *
- * @access public
- * @return mixed Returns TRUE on a successful database connection, otherwise returns a constant from b8.
- */
-
- public function connect()
- {
-
- # Have we already connected?
- if($this->_db !== NULL)
- return TRUE;
-
- # Open the database connection
- $this->_db = dba_open(dirname(__FILE__) . DIRECTORY_SEPARATOR . ".." . DIRECTORY_SEPARATOR . $this->config['database'], "w", $this->config['handler']);
-
- if($this->_db === FALSE) {
- $this->connected = FALSE;
- $this->_db = NULL;
- return self::DATABASE_CONNECTION_FAIL;
- }
-
- # Everything is okay and connected
-
- $this->connected = TRUE;
-
- # Let's see if this is a b8 database and the version is okay
- return $this->check_database();
-
- }
-
- /**
- * Does the actual interaction with the database when fetching data.
- *
- * @access protected
- * @param array $tokens
- * @return mixed Returns an array of the returned data in the format array(token => data) or an empty array if there was no data.
- */
-
- protected function _get_query($tokens)
- {
-
- $data = array();
-
- foreach ($tokens as $token) {
-
- $count = dba_fetch($token, $this->_db);
-
- if($count !== FALSE)
- $data[$token] = $count;
-
- }
-
- return $data;
-
- }
-
- /**
- * Store a token to the database.
- *
- * @access protected
- * @param string $token
- * @param string $count
- * @return bool TRUE on success or FALSE on failure
- */
-
- protected function _put($token, $count) {
- return dba_insert($token, $count, $this->_db);
- }
-
- /**
- * Update an existing token.
- *
- * @access protected
- * @param string $token
- * @param string $count
- * @return bool TRUE on success or FALSE on failure
- */
-
- protected function _update($token, $count)
- {
- return dba_replace($token, $count, $this->_db);
- }
-
- /**
- * Remove a token from the database.
- *
- * @access protected
- * @param string $token
- * @return bool TRUE on success or FALSE on failure
- */
-
- protected function _del($token)
- {
- return dba_delete($token, $this->_db);
- }
-
- /**
- * Does nothing :-D
- *
- * @access protected
- * @return void
- */
-
- protected function _commit()
- {
- # We just need this function because the (My)SQL backend(s) need it.
- return;
- }
-
-}
-
-?> \ No newline at end of file
diff --git a/library/spam/b8/storage/storage_frndc.php b/library/spam/b8/storage/storage_frndc.php
deleted file mode 100644
index f211d4431..000000000
--- a/library/spam/b8/storage/storage_frndc.php
+++ /dev/null
@@ -1,318 +0,0 @@
-<?php
-
-# Copyright (C) 2006-2011 Tobias Leupold <tobias.leupold@web.de>
-#
-# This file is part of the b8 package
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * The MySQL abstraction layer for communicating with the database.
- * Copyright (C) 2009 Oliver Lillie (aka buggedcom)
- * Copyright (C) 2010-2011 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Oliver Lillie (aka buggedcom) (original PHP 5 port and optimizations)
- * @author Tobias Leupold
- */
-
-class b8_storage_frndc extends b8_storage_base
-{
-
- public $config = array(
- 'database' => 'b8_wordlist',
- 'table_name' => 'b8_wordlist',
- 'host' => 'localhost',
- 'user' => FALSE,
- 'pass' => FALSE,
- 'connection' => NULL
- );
-
- public $b8_config = array(
- 'degenerator' => NULL,
- 'today' => NULL
- );
-
- private $_connection = NULL;
- private $_deletes = array();
- private $_puts = array();
- private $_updates = array();
- private $uid = 0;
-
- const DATABASE_CONNECTION_FAIL = 'DATABASE_CONNECTION_FAIL';
- const DATABASE_CONNECTION_ERROR = 'DATABASE_CONNECTION_ERROR';
- const DATABASE_CONNECTION_BAD_RESOURCE = 'DATABASE_CONNECTION_BAD_RESOURCE';
- const DATABASE_SELECT_ERROR = 'DATABASE_SELECT_ERROR';
- const DATABASE_TABLE_ACCESS_FAIL = 'DATABASE_TABLE_ACCESS_FAIL';
- const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION';
-
- /**
- * Constructs the database layer.
- *
- * @access public
- * @param string $config
- */
-
- function __construct($config, $degenerator, $today)
- {
-
- # Pass some variables of the main b8 config to this class
- $this->b8_config['degenerator'] = $degenerator;
- $this->b8_config['today'] = $today;
-
- # Validate the config items
-
- if(count($config) > 0) {
-
- foreach ($config as $name => $value) {
-
- switch($name) {
-
- case 'table_name':
- case 'host':
- case 'user':
- case 'pass':
- case 'database':
- $this->config[$name] = (string) $value;
- break;
-
- case 'connection':
-
- if($value !== NULL) {
-
- if(is_resource($value) === TRUE) {
- $resource_type = get_resource_type($value);
- $this->config['connection'] = $resource_type !== 'mysql link' && $resource_type !== 'mysql link persistent' ? FALSE : $value;
- }
-
- else
- $this->config['connection'] = FALSE;
-
- }
-
- break;
-
- }
-
- }
-
- }
-
- }
-
- /**
- * Closes the database connection.
- *
- * @access public
- * @return void
- */
-
- function __destruct()
- {
-
- if($this->_connection === NULL)
- return;
-
- # Commit any changes before closing
- $this->_commit();
-
- # Just close the connection if no link-resource was passed and b8 created it's own connection
- if($this->config['connection'] === NULL)
- mysql_close($this->_connection);
-
- $this->connected = FALSE;
-
- }
-
- /**
- * Connect to the database and do some checks.
- *
- * @access public
- * @return mixed Returns TRUE on a successful database connection, otherwise returns a constant from b8.
- */
-
- public function connect()
- {
-
- $this->connected = TRUE;
- return TRUE;
-
- }
-
- /**
- * Does the actual interaction with the database when fetching data.
- *
- * @access protected
- * @param array $tokens
- * @return mixed Returns an array of the returned data in the format array(token => data) or an empty array if there was no data.
- */
-
- protected function _get_query($tokens, $uid)
- {
-
- # Construct the query ...
-
- if(count($tokens) > 0) {
-
- $where = array();
-
- foreach ($tokens as $token) {
- $token = dbesc($token);
- array_push($where, $token);
- }
-
- $where = 'term IN ("' . implode('", "', $where) . '")';
- }
-
- else {
- $token = dbesc($token);
- $where = 'term = "' . $token . '"';
- }
-
- # ... and fetch the data
-
- $result = q('
- SELECT * FROM spam WHERE ' . $where . ' AND uid = ' . $uid );
-
-
- $returned_tokens = array();
- if(count($result)) {
- foreach($result as $rr)
- $returned_tokens[] = $rr['term'];
- }
- $to_create = array();
-
- if(count($tokens) > 0) {
- foreach($tokens as $token)
- if(! in_array($token,$returned_tokens))
- $to_create[] = str_tolower($token);
- }
- if(count($to_create)) {
- $sql = '';
- foreach($to_create as $term) {
- if(strlen($sql))
- $sql .= ',';
- $sql .= sprintf("(term,date,uid) values('%s','%s',%d)",
- dbesc(str_tolower($term))
- dbesc(datetime_convert()),
- intval($uid)
- );
- q("insert into spam " . $sql);
- }
-
- return $result;
-
- }
-
- /**
- * Store a token to the database.
- *
- * @access protected
- * @param string $token
- * @param string $count
- * @return void
- */
-
- protected function _put($token, $count, $uid) {
- $token = dbesc($token);
- $count = dbesc($count);
- $uid = dbesc($uid);
- array_push($this->_puts, '("' . $token . '", "' . $count . '", "' . $uid .'")');
- }
-
- /**
- * Update an existing token.
- *
- * @access protected
- * @param string $token
- * @param string $count
- * @return void
- */
-
- protected function _update($token, $count, $uid)
- {
- $token = dbesc($token);
- $count = dbesc($count);
- $uid = dbesc($uid);
- array_push($this->_puts, '("' . $token . '", "' . $count . '", "' . $uid .'")');
- }
-
- /**
- * Remove a token from the database.
- *
- * @access protected
- * @param string $token
- * @return void
- */
-
- protected function _del($token, $uid)
- {
- $token = dbesc($token);
- $uid = dbesc($uid);
- $this->uid = $uid;
- array_push($this->_deletes, $token);
- }
-
- /**
- * Commits any modification queries.
- *
- * @access protected
- * @return void
- */
-
- protected function _commit($uid)
- {
-
- if(count($this->_deletes) > 0) {
-
- $result = q('
- DELETE FROM ' . $this->config['table_name'] . '
- WHERE term IN ("' . implode('", "', $this->_deletes) . '") AND uid = ' . $this->uid);
-
- $this->_deletes = array();
-
- }
-
- if(count($this->_puts) > 0) {
-//fixme
- $result = q('
- INSERT INTO ' . $this->config['table_name'] . '(term, count, uid)
- VALUES ' . implode(', ', $this->_puts));
-
- $this->_puts = array();
-
- }
-
- if(count($this->_updates) > 0) {
-
- // this still needs work
- $result = q("select * from " . $this->config['table_name'] . ' where token = ');
-
-
- $result = q('
- INSERT INTO ' . $this->config['table_name'] . '(token, count, uid)
- VALUES ' . implode(', ', $this->_updates) . ', ' . $uid . '
- ON DUPLICATE KEY UPDATE ' . $this->config['table_name'] . '.count = VALUES(count);', $this->_connection);
-
- $this->_updates = array();
-
- }
-
- }
-
-}
-
-?> \ No newline at end of file
diff --git a/library/spam/b8/storage/storage_mysql.php b/library/spam/b8/storage/storage_mysql.php
deleted file mode 100644
index 022536350..000000000
--- a/library/spam/b8/storage/storage_mysql.php
+++ /dev/null
@@ -1,351 +0,0 @@
-<?php
-
-# Copyright (C) 2006-2011 Tobias Leupold <tobias.leupold@web.de>
-#
-# This file is part of the b8 package
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * The MySQL abstraction layer for communicating with the database.
- * Copyright (C) 2009 Oliver Lillie (aka buggedcom)
- * Copyright (C) 2010-2011 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Oliver Lillie (aka buggedcom) (original PHP 5 port and optimizations)
- * @author Tobias Leupold
- */
-
-class b8_storage_mysql extends b8_storage_base
-{
-
- public $config = array(
- 'database' => 'b8_wordlist',
- 'table_name' => 'b8_wordlist',
- 'host' => 'localhost',
- 'user' => FALSE,
- 'pass' => FALSE,
- 'connection' => NULL
- );
-
- public $b8_config = array(
- 'degenerator' => NULL,
- 'today' => NULL
- );
-
- private $_connection = NULL;
- private $_deletes = array();
- private $_puts = array();
- private $_updates = array();
-
- const DATABASE_CONNECTION_FAIL = 'DATABASE_CONNECTION_FAIL';
- const DATABASE_CONNECTION_ERROR = 'DATABASE_CONNECTION_ERROR';
- const DATABASE_CONNECTION_BAD_RESOURCE = 'DATABASE_CONNECTION_BAD_RESOURCE';
- const DATABASE_SELECT_ERROR = 'DATABASE_SELECT_ERROR';
- const DATABASE_TABLE_ACCESS_FAIL = 'DATABASE_TABLE_ACCESS_FAIL';
- const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION';
-
- /**
- * Constructs the database layer.
- *
- * @access public
- * @param string $config
- */
-
- function __construct($config, $degenerator, $today)
- {
-
- # Pass some variables of the main b8 config to this class
- $this->b8_config['degenerator'] = $degenerator;
- $this->b8_config['today'] = $today;
-
- # Validate the config items
-
- if(count($config) > 0) {
-
- foreach ($config as $name => $value) {
-
- switch($name) {
-
- case 'table_name':
- case 'host':
- case 'user':
- case 'pass':
- case 'database':
- $this->config[$name] = (string) $value;
- break;
-
- case 'connection':
-
- if($value !== NULL) {
-
- if(is_resource($value) === TRUE) {
- $resource_type = get_resource_type($value);
- $this->config['connection'] = $resource_type !== 'mysql link' && $resource_type !== 'mysql link persistent' ? FALSE : $value;
- }
-
- else
- $this->config['connection'] = FALSE;
-
- }
-
- break;
-
- }
-
- }
-
- }
-
- }
-
- /**
- * Closes the database connection.
- *
- * @access public
- * @return void
- */
-
- function __destruct()
- {
-
- if($this->_connection === NULL)
- return;
-
- # Commit any changes before closing
- $this->_commit();
-
- # Just close the connection if no link-resource was passed and b8 created it's own connection
- if($this->config['connection'] === NULL)
- mysql_close($this->_connection);
-
- $this->connected = FALSE;
-
- }
-
- /**
- * Connect to the database and do some checks.
- *
- * @access public
- * @return mixed Returns TRUE on a successful database connection, otherwise returns a constant from b8.
- */
-
- public function connect()
- {
-
- # Are we already connected?
- if($this->connected === TRUE)
- return TRUE;
-
- # Are we using an existing passed resource?
- if($this->config['connection'] === FALSE) {
- # ... yes we are, but the connection is not a resource, so return an error
- $this->connected = FALSE;
- return self::DATABASE_CONNECTION_BAD_RESOURCE;
- }
-
- elseif($this->config['connection'] === NULL) {
-
- # ... no we aren't so we have to connect.
-
- if($this->_connection = mysql_connect($this->config['host'], $this->config['user'], $this->config['pass'])) {
- if(mysql_select_db($this->config['database'], $this->_connection) === FALSE) {
- $this->connected = FALSE;
- return self::DATABASE_SELECT_ERROR . ": " . mysql_error();
- }
- }
- else {
- $this->connected = FALSE;
- return self::DATABASE_CONNECTION_ERROR;
- }
-
- }
-
- else {
- # ... yes we are
- $this->_connection = $this->config['connection'];
- }
-
- # Just in case ...
- if($this->_connection === NULL) {
- $this->connected = FALSE;
- return self::DATABASE_CONNECTION_FAIL;
- }
-
- # Check to see if the wordlist table exists
- if(mysql_query('DESCRIBE ' . $this->config['table_name'], $this->_connection) === FALSE) {
- $this->connected = FALSE;
- return self::DATABASE_TABLE_ACCESS_FAIL . ": " . mysql_error();
- }
-
- # Everything is okay and connected
- $this->connected = TRUE;
-
- # Let's see if this is a b8 database and the version is okay
- return $this->check_database();
-
- }
-
- /**
- * Does the actual interaction with the database when fetching data.
- *
- * @access protected
- * @param array $tokens
- * @return mixed Returns an array of the returned data in the format array(token => data) or an empty array if there was no data.
- */
-
- protected function _get_query($tokens)
- {
-
- # Construct the query ...
-
- if(count($tokens) > 0) {
-
- $where = array();
-
- foreach ($tokens as $token) {
- $token = mysql_real_escape_string($token, $this->_connection);
- array_push($where, $token);
- }
-
- $where = 'token IN ("' . implode('", "', $where) . '")';
- }
-
- else {
- $token = mysql_real_escape_string($token, $this->_connection);
- $where = 'token = "' . $token . '"';
- }
-
- # ... and fetch the data
-
- $result = mysql_query('
- SELECT token, count
- FROM ' . $this->config['table_name'] . '
- WHERE ' . $where . ';
- ', $this->_connection);
-
- $data = array();
-
- while ($row = mysql_fetch_array($result, MYSQL_ASSOC))
- $data[$row['token']] = $row['count'];
-
- mysql_free_result($result);
-
- return $data;
-
- }
-
- /**
- * Store a token to the database.
- *
- * @access protected
- * @param string $token
- * @param string $count
- * @return void
- */
-
- protected function _put($token, $count) {
- $token = mysql_real_escape_string($token, $this->_connection);
- $count = mysql_real_escape_string($count, $this->_connection);;
- array_push($this->_puts, '("' . $token . '", "' . $count . '")');
- }
-
- /**
- * Update an existing token.
- *
- * @access protected
- * @param string $token
- * @param string $count
- * @return void
- */
-
- protected function _update($token, $count)
- {
- $token = mysql_real_escape_string($token, $this->_connection);
- $count = mysql_real_escape_string($count, $this->_connection);
- array_push($this->_updates, '("' . $token . '", "' . $count . '")');
- }
-
- /**
- * Remove a token from the database.
- *
- * @access protected
- * @param string $token
- * @return void
- */
-
- protected function _del($token)
- {
- $token = mysql_real_escape_string($token, $this->_connection);
- array_push($this->_deletes, $token);
- }
-
- /**
- * Commits any modification queries.
- *
- * @access protected
- * @return void
- */
-
- protected function _commit()
- {
-
- if(count($this->_deletes) > 0) {
-
- $result = mysql_query('
- DELETE FROM ' . $this->config['table_name'] . '
- WHERE token IN ("' . implode('", "', $this->_deletes) . '");
- ', $this->_connection);
-
- if(is_resource($result) === TRUE)
- mysql_free_result($result);
-
- $this->_deletes = array();
-
- }
-
- if(count($this->_puts) > 0) {
-
- $result = mysql_query('
- INSERT INTO ' . $this->config['table_name'] . '(token, count)
- VALUES ' . implode(', ', $this->_puts) . ';', $this->_connection);
-
- if(is_resource($result) === TRUE)
- mysql_free_result($result);
-
- $this->_puts = array();
-
- }
-
- if(count($this->_updates) > 0) {
-
- $result = mysql_query('
- INSERT INTO ' . $this->config['table_name'] . '(token, count)
- VALUES ' . implode(', ', $this->_updates) . '
- ON DUPLICATE KEY UPDATE ' . $this->config['table_name'] . '.count = VALUES(count);', $this->_connection);
-
- if(is_resource($result) === TRUE)
- mysql_free_result($result);
-
- $this->_updates = array();
-
- }
-
- }
-
-}
-
-?> \ No newline at end of file