aboutsummaryrefslogtreecommitdiffstats
path: root/library/spam/b8/storage/storage_base.php.ORIG
diff options
context:
space:
mode:
Diffstat (limited to 'library/spam/b8/storage/storage_base.php.ORIG')
-rw-r--r--library/spam/b8/storage/storage_base.php.ORIG395
1 files changed, 0 insertions, 395 deletions
diff --git a/library/spam/b8/storage/storage_base.php.ORIG b/library/spam/b8/storage/storage_base.php.ORIG
deleted file mode 100644
index 01f5a69d7..000000000
--- a/library/spam/b8/storage/storage_base.php.ORIG
+++ /dev/null
@@ -1,395 +0,0 @@
-<?php
-
-# Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
-#
-# This file is part of the b8 package
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation in version 2.1 of the License.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-/**
- * Functions used by all storage backends
- * Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
- *
- * @license LGPL
- * @access public
- * @package b8
- * @author Tobias Leupold
- */
-
-abstract class b8_storage_base
-{
-
- public $connected = FALSE;
-
- protected $_degenerator = NULL;
-
- const INTERNALS_TEXTS_HAM = 'bayes*texts.ham';
- const INTERNALS_TEXTS_SPAM = 'bayes*texts.spam';
- const INTERNALS_DBVERSION = 'bayes*dbversion';
-
- const BACKEND_NOT_CONNECTED = 'BACKEND_NOT_CONNECTED';
- const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION';
- const DATABASE_NOT_B8 = 'DATABASE_NOT_B8';
-
- /**
- * Validates the class has all it needs to work.
- *
- * @access protected
- * @return mixed Returns TRUE if everything is okay, otherwise an error code.
- */
-
- protected function validate()
- {
-
- # We set up the degenerator here, as we would have to duplicate code if it
- # was done in the constructor of the respective storage backend.
- $class = 'b8_degenerator_' . $this->b8_config['degenerator'];
- $this->_degenerator = new $class();
-
- if($this->connected !== TRUE)
- return self::BACKEND_NOT_CONNECTED;
-
- return TRUE;
-
- }
-
- /**
- * Checks if a b8 database is used and if it's version is okay
- *
- * @access protected
- * @return mixed Returns TRUE if everything is okay, otherwise an error code.
- */
-
- protected function check_database()
- {
-
- $internals = $this->get_internals();
-
- if(isset($internals['dbversion'])) {
- if($internals['dbversion'] == "2") {
- return TRUE;
- }
- else {
- $this->connected = FALSE;
- return self::DATABASE_WRONG_VERSION;
- }
- }
- else {
- $this->connected = FALSE;
- return self::DATABASE_NOT_B8;
- }
-
- }
-
- /**
- * Parses the "count" data of a token.
- *
- * @access private
- * @param string $data
- * @return array Returns an array of the parsed data: array(count_ham, count_spam, lastseen).
- */
-
- private function _parse_count($data)
- {
-
- list($count_ham, $count_spam, $lastseen) = explode(' ', $data);
-
- $count_ham = (int) $count_ham;
- $count_spam = (int) $count_spam;
-
- return array(
- 'count_ham' => $count_ham,
- 'count_spam' => $count_spam
- );
-
- }
-
- /**
- * Get the database's internal variables.
- *
- * @access public
- * @return array Returns an array of all internals.
- */
-
- public function get_internals()
- {
-
- $internals = $this->_get_query(
- array(
- self::INTERNALS_TEXTS_HAM,
- self::INTERNALS_TEXTS_SPAM,
- self::INTERNALS_DBVERSION
- )
- );
-
- return array(
- 'texts_ham' => (int) $internals[self::INTERNALS_TEXTS_HAM],
- 'texts_spam' => (int) $internals[self::INTERNALS_TEXTS_SPAM],
- 'dbversion' => (int) $internals[self::INTERNALS_DBVERSION]
- );
-
- }
-
- /**
- * Get all data about a list of tags from the database.
- *
- * @access public
- * @param array $tokens
- * @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))).
- */
-
- public function get($tokens)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # First we see what we have in the database.
- $token_data = $this->_get_query($tokens);
-
- # Check if we have to degenerate some tokens
-
- $missing_tokens = array();
-
- foreach($tokens as $token) {
- if(!isset($token_data[$token]))
- $missing_tokens[] = $token;
- }
-
- if(count($missing_tokens) > 0) {
-
- # We have to degenerate some tokens
- $degenerates_list = array();
-
- # Generate a list of degenerated tokens for the missing tokens ...
- $degenerates = $this->_degenerator->degenerate($missing_tokens);
-
- # ... and look them up
-
- foreach($degenerates as $token => $token_degenerates)
- $degenerates_list = array_merge($degenerates_list, $token_degenerates);
-
- $token_data = array_merge($token_data, $this->_get_query($degenerates_list));
-
- }
-
- # Here, we have all availible data in $token_data.
-
- $return_data_tokens = array();
- $return_data_degenerates = array();
-
- foreach($tokens as $token) {
-
- if(isset($token_data[$token]) === TRUE) {
-
- # The token was found in the database
-
- # Add the data ...
- $return_data_tokens[$token] = $this->_parse_count($token_data[$token]);
-
- # ... and update it's lastseen parameter
- $this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today']);
-
- }
-
- else {
-
- # The token was not found, so we look if we
- # can return data for degenerated tokens
-
- # Check all degenerated forms of the token
-
- foreach($this->_degenerator->degenerates[$token] as $degenerate) {
-
- if(isset($token_data[$degenerate]) === TRUE) {
-
- # A degeneration of the token way found in the database
-
- # Add the data ...
- $return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]);
-
- # ... and update it's lastseen parameter
- $this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today']);
-
- }
-
- }
-
- }
-
- }
-
- # Now, all token data directly found in the database is in $return_data_tokens
- # and all data for degenerated versions is in $return_data_degenerates
-
- # First, we commit the changes to the lastseen parameters
- $this->_commit();
-
- # Then, we return what we have
- return array(
- 'tokens' => $return_data_tokens,
- 'degenerates' => $return_data_degenerates
- );
-
- }
-
- /**
- * Stores or deletes a list of tokens from the given category.
- *
- * @access public
- * @param array $tokens
- * @param const $category Either b8::HAM or b8::SPAM
- * @param const $action Either b8::LEARN or b8::UNLEARN
- * @return void
- */
-
- public function process_text($tokens, $category, $action)
- {
-
- # Validate the startup
-
- $started_up = $this->validate();
-
- if($started_up !== TRUE)
- return $started_up;
-
- # No matter what we do, we first have to check what data we have.
-
- # First get the internals, including the ham texts and spam texts counter
- $internals = $this->get_internals();
-
- # Then, fetch all data for all tokens we have (and update their lastseen parameters)
- $token_data = $this->_get_query(array_keys($tokens));
-
- # Process all tokens to learn/unlearn
-
- foreach($tokens as $token => $count) {
-
- if(isset($token_data[$token])) {
-
- # We already have this token, so update it's data
-
- # Get the existing data
- list($count_ham, $count_spam, $lastseen) = explode(' ', $token_data[$token]);
- $count_ham = (int) $count_ham;
- $count_spam = (int) $count_spam;
-
- # Increase or decrease the right counter
-
- if($action === b8::LEARN) {
- if($category === b8::HAM)
- $count_ham += $count;
- elseif($category === b8::SPAM)
- $count_spam += $count;
- }
-
- elseif($action == b8::UNLEARN) {
- if($category === b8::HAM)
- $count_ham -= $count;
- elseif($category === b8::SPAM)
- $count_spam -= $count;
- }
-
- # We don't want to have negative values
-
- if($count_ham < 0)
- $count_ham = 0;
-
- if($count_spam < 0)
- $count_spam = 0;
-
- # Now let's see if we have to update or delete the token
- if($count_ham !== 0 or $count_spam !== 0)
- $this->_update($token, "$count_ham $count_spam " . $this->b8_config['today']);
- else
- $this->_del($token);
-
- }
-
- else {
-
- # We don't have the token. If we unlearn a text, we can't delete it
- # as we don't have it anyway, so just do something if we learn a text
-
- if($action === b8::LEARN) {
-
- if($category === b8::HAM)
- $data = '1 0 ';
- elseif($category === b8::SPAM)
- $data = '0 1 ';
-
- $data .= $this->b8_config['today'];
-
- $this->_put($token, $data);
-
- }
-
- }
-
- }
-
- # Now, all token have been processed, so let's update the right text
-
- if($action === b8::LEARN) {
-
- if($category === b8::HAM) {
- $internals['texts_ham']++;
- $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']);
- }
-
- elseif($category === b8::SPAM) {
- $internals['texts_spam']++;
- $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']);
- }
-
- }
-
- elseif($action == b8::UNLEARN) {
-
- if($category === b8::HAM) {
-
- $internals['texts_ham']--;
-
- if($internals['texts_ham'] < 0)
- $internals['texts_ham'] = 0;
-
- $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']);
-
- }
-
- elseif($category === b8::SPAM) {
-
- $internals['texts_spam']--;
-
- if($internals['texts_spam'] < 0)
- $internals['texts_spam'] = 0;
-
- $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']);
-
- }
-
- }
-
- # We're done and can commit all changes to the database now
- $this->_commit();
-
- }
-
-}
-
-?> \ No newline at end of file