aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Vogel <icarus@dabo.de>2012-03-10 09:52:20 +0100
committerMichael Vogel <icarus@dabo.de>2012-03-10 09:52:20 +0100
commit15916a2cb717828367fc7e0bb74137942836a772 (patch)
treeb1cdcacab4bb67563d0ff2beb52c67438e9c7b21
parentcb1ecf2c5ad649eb372db9eb3bd1da756f207fb6 (diff)
downloadvolse-hubzilla-15916a2cb717828367fc7e0bb74137942836a772.tar.gz
volse-hubzilla-15916a2cb717828367fc7e0bb74137942836a772.tar.bz2
volse-hubzilla-15916a2cb717828367fc7e0bb74137942836a772.zip
Mail: Removing signatures, gpg, unnecessary line breaks
-rwxr-xr-xinclude/email.php14
-rw-r--r--include/msgclean.php225
-rwxr-xr-xinclude/poller.php3
3 files changed, 240 insertions, 2 deletions
diff --git a/include/email.php b/include/email.php
index 659978b6e..a3449a424 100755
--- a/include/email.php
+++ b/include/email.php
@@ -1,5 +1,7 @@
<?php
require_once('include/html2plain.php');
+require_once('include/msgclean.php');
+require_once('include/quoteconvert.php');
function email_connect($mailbox,$username,$password) {
if(! function_exists('imap_open'))
@@ -86,6 +88,7 @@ function email_get_msg($mbox,$uid) {
if(! $struc->parts) {
$ret['body'] = email_get_part($mbox,$uid,$struc,0, 'html');
+ $html = $ret['body'];
if (trim($ret['body']) == '')
$ret['body'] = email_get_part($mbox,$uid,$struc,0, 'plain');
@@ -107,6 +110,17 @@ function email_get_msg($mbox,$uid) {
else
$ret['body'] = $text;
}
+
+ $ret['body'] = removegpg($ret['body']);
+ $msg = removesig($ret['body']);
+ $ret['body'] = $msg['body'];
+ $ret['body'] = convertquote($ret['body'], false);
+
+ if (trim($html) != '')
+ $ret['body'] = removelinebreak($ret['body']);
+
+ $ret['body'] = unifyattributionline($ret['body']);
+
return $ret;
}
diff --git a/include/msgclean.php b/include/msgclean.php
new file mode 100644
index 000000000..284ad1ce4
--- /dev/null
+++ b/include/msgclean.php
@@ -0,0 +1,225 @@
+<?php
+
+function savereplace($pattern, $replace, $text)
+{
+ $save = $text;
+
+ $text = preg_replace($pattern, $replace, $text);
+
+ if ($text == '')
+ $text = $save;
+ return($text);
+}
+
+function unifyattributionline($message)
+{
+ $quotestr = array('quote', 'collapsed');
+ foreach ($quotestr as $quote) {
+
+ $message = savereplace('/----- Original Message -----\s.*?From: "([^<"].*?)" <(.*?)>\s.*?To: (.*?)\s*?Cc: (.*?)\s*?Sent: (.*?)\s.*?Subject: ([^\n].*)\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/----- Original Message -----\s.*?From: "([^<"].*?)" <(.*?)>\s.*?To: (.*?)\s*?Sent: (.*?)\s.*?Subject: ([^\n].*)\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/-------- Original-Nachricht --------\s*\['.$quote.'\]\nDatum: (.*?)\nVon: (.*?) <(.*?)>\nAn: (.*?)\nBetreff: (.*?)\n/i', "[".$quote."='$2']\n", $message);
+ $message = savereplace('/-------- Original-Nachricht --------\s*\['.$quote.'\]\sDatum: (.*?)\s.*Von: "([^<"].*?)" <(.*?)>\s.*An: (.*?)\n.*/i', "[".$quote."='$2']\n", $message);
+ $message = savereplace('/-------- Original-Nachricht --------\s*\['.$quote.'\]\nDatum: (.*?)\nVon: (.*?)\nAn: (.*?)\nBetreff: (.*?)\n/i', "[".$quote."='$2']\n", $message);
+
+ $message = savereplace('/-----Urspr.*?ngliche Nachricht-----\sVon: "([^<"].*?)" <(.*?)>\s.*Gesendet: (.*?)\s.*An: (.*?)\s.*Betreff: ([^\n].*?).*:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/-----Urspr.*?ngliche Nachricht-----\sVon: "([^<"].*?)" <(.*?)>\s.*Gesendet: (.*?)\s.*An: (.*?)\s.*Betreff: ([^\n].*?)\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/Am (.*?), schrieb (.*?):\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+
+ $message = savereplace('/Am .*?, \d+ .*? \d+ \d+:\d+:\d+ \+\d+\sschrieb\s(.*?)\s<(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/Am (.*?) schrieb (.*?) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+ $message = savereplace('/Am (.*?) schrieb <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+ $message = savereplace('/Am (.*?) schrieb (.*?):\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+ $message = savereplace('/Am (.*?) schrieb (.*?)\n(.*?):\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+
+ $message = savereplace('/(\d+)\/(\d+)\/(\d+) ([^<"].*?) <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message);
+
+ $message = savereplace('/On .*?, \d+ .*? \d+ \d+:\d+:\d+ \+\d+\s(.*?)\s<(.*?)>\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/On (.*?) at (.*?), (.*?)\s<(.*?)>\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$3']\n", $message);
+ $message = savereplace('/On (.*?)\n([^<].*?)\s<(.*?)>\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+ $message = savereplace('/On (.*?), (.*?), (.*?)\s<(.*?)>\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$3']\n", $message);
+ $message = savereplace('/On ([^,].*?), (.*?)\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+ $message = savereplace('/On (.*?), (.*?)\swrote\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+
+ // Der loescht manchmal den Body - was eigentlich unmoeglich ist
+ $message = savereplace('/On (.*?),(.*?),(.*?),(.*?), (.*?) wrote:\s*\['.$quote.'\]/i', "[".$quote."='$5']\n", $message);
+
+ $message = savereplace('/Zitat von ([^<].*?) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/Quoting ([^<].*?) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/From: "([^<"].*?)" <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/From: <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/Du \(([^)].*?)\) schreibst:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/--- (.*?) <.*?> schrieb am (.*?):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/--- (.*?) schrieb am (.*?):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/\* (.*?) <(.*?)> hat geschrieben:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/(.*?) <(.*?)> schrieb (.*?)\):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/(.*?) <(.*?)> schrieb am (.*?) um (.*):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/(.*?) schrieb am (.*?) um (.*):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/(.*?) \((.*?)\) schrieb:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+ $message = savereplace('/(.*?) schrieb:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/(.*?) <(.*?)> writes:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/(.*?) \((.*?)\) writes:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+ $message = savereplace('/(.*?) writes:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/\* (.*?) wrote:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/(.*?) wrote \(.*?\):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/(.*?) wrote:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/([^<].*?) <.*?> hat am (.*?)\sum\s(.*)\sgeschrieben:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+
+ $message = savereplace('/(\d+)\/(\d+)\/(\d+) ([^<"].*?) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message);
+ $message = savereplace('/(\d+)\/(\d+)\/(\d+) (.*?) <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message);
+ $message = savereplace('/(\d+)\/(\d+)\/(\d+) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message);
+ $message = savereplace('/(\d+)\/(\d+)\/(\d+) <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message);
+
+ $message = savereplace('/(.*?) <(.*?)> schrubselte:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message);
+ $message = savereplace('/(.*?) \((.*?)\) schrubselte:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message);
+
+ }
+ return($message);
+}
+
+function removegpg($message)
+{
+
+ $pattern = '/(.*)\s*-----BEGIN PGP SIGNED MESSAGE-----\s*[\r\n].*Hash:.*?[\r\n](.*)'.
+ '[\r\n]\s*-----BEGIN PGP SIGNATURE-----\s*[\r\n].*'.
+ '[\r\n]\s*-----END PGP SIGNATURE-----(.*)/is';
+
+ preg_match($pattern, $message, $result);
+
+ $cleaned = trim($result[1].$result[2].$result[3]);
+
+ $cleaned = str_replace(array("\n- --\n", "\n- -"), array("\n-- \n", "\n-"), $cleaned);
+
+
+ if ($cleaned == '')
+ $cleaned = $message;
+
+ return($cleaned);
+}
+
+function removesig($message)
+{
+ $sigpos = strrpos($message, "\n-- \n");
+ $quotepos = strrpos($message, "[/quote]");
+
+ if ($sigpos == 0) {
+ // Speziell fuer web.de, die das als Trenner verwenden
+ $message = str_replace("\n___________________________________________________________\n", "\n-- \n", $message);
+ $sigpos = strrpos($message, "\n-- \n");
+ $quotepos = strrpos($message, "[/quote]");
+ }
+
+ // Sollte sich der Signaturtrenner innerhalb eines Quotes befinden
+ // wird keine Signaturtrennung ausgefuehrt
+ if (($sigpos < $quotepos) and ($sigpos != 0))
+ return(array('body' => $message, 'sig' => ''));
+
+ // To-Do: Regexp umstellen, so dass auf 1 oder kein Leerzeichen
+ // geprueft wird
+ //$message = str_replace("\n--\n", "\n-- \n", $message);
+
+ $pattern = '/(.*)[\r\n]-- [\r\n](.*)/is';
+
+ preg_match($pattern, $message, $result);
+
+ if (($result[1] != '') and ($result[2] != '')) {
+ $cleaned = trim($result[1])."\n";
+ $sig = trim($result[2]);
+ // '[hr][size=x-small][color=darkblue]'.trim($result[2]).'[/color][/size]';
+ } else {
+ $cleaned = $message;
+ $sig = '';
+ }
+
+ return(array('body' => $cleaned, 'sig' => $sig));
+}
+
+function removelinebreak($message)
+{
+ $arrbody = explode("\n", trim($message));
+
+ $lines = array();
+ $lineno = 0;
+
+ foreach($arrbody as $i => $line) {
+ $currquotelevel = 0;
+ $currline = $line;
+ while ((strlen($currline)>0) and ((substr($currline, 0, 1) == '>')
+ or (substr($currline, 0, 1) == ' '))) {
+ if (substr($currline, 0, 1) == '>')
+ $currquotelevel++;
+
+ $currline = ltrim(substr($currline, 1));
+ }
+
+ $quotelevel = 0;
+ $nextline = trim($arrbody[$i+1]);
+ while ((strlen($nextline)>0) and ((substr($nextline, 0, 1) == '>')
+ or (substr($nextline, 0, 1) == ' '))) {
+ if (substr($nextline, 0, 1) == '>')
+ $quotelevel++;
+
+ $nextline = ltrim(substr($nextline, 1));
+ }
+
+ $len = strlen($line);
+ $firstword = strpos($nextline.' ', ' ');
+
+ $specialchars = ((substr(trim($nextline), 0, 1) == '-') or
+ (substr(trim($nextline), 0, 1) == '=') or
+ (substr(trim($nextline), 0, 1) == '*') or
+ (substr(trim($nextline), 0, 1) == '·') or
+ (substr(trim($nextline), 0, 4) == '[url') or
+ (substr(trim($nextline), 0, 5) == '[size') or
+ (substr(trim($nextline), 0, 7) == 'http://') or
+ (substr(trim($nextline), 0, 8) == 'https://'));
+
+ if (!$specialchars)
+ $specialchars = ((substr(rtrim($line), -1) == '-') or
+ (substr(rtrim($line), -1) == '=') or
+ (substr(rtrim($line), -1) == '*') or
+ (substr(rtrim($line), -1) == '·') or
+ (substr(rtrim($line), -6) == '[/url]') or
+ (substr(rtrim($line), -7) == '[/size]'));
+
+ //if ($specialchars)
+ // echo ("Special\n");
+
+ if ($lines[$lineno] != '') {
+ if (substr($lines[$lineno], -1) != ' ')
+ $lines[$lineno] .= ' ';
+
+ while ((strlen($line)>0) and ((substr($line, 0, 1) == '>')
+ or (substr($line, 0, 1) == ' '))) {
+
+ $line = ltrim(substr($line, 1));
+ }
+
+ }
+ //else
+ // $lines[$lineno] = $quotelevel.'-'.$len.'-'.$firstword.'-';
+
+ $lines[$lineno] .= $line;
+ //if ((($len + $firstword < 68) and (substr($line, -1, 1) != ' '))
+ // or ($quotelevel != $currquotelevel) or $specialchars)
+ if (((substr($line, -1, 1) != ' '))
+ or ($quotelevel != $currquotelevel))
+ $lineno++;
+ }
+ return(implode("\n", $lines));
+
+}
+?>
diff --git a/include/poller.php b/include/poller.php
index cfbc46b87..3be6caebd 100755
--- a/include/poller.php
+++ b/include/poller.php
@@ -1,7 +1,6 @@
<?php
require_once("boot.php");
-require_once("include/quoteconvert.php");
function poller_run($argv, $argc){
@@ -494,7 +493,7 @@ function poller_run($argv, $argc){
logger("Mail: can't fetch msg ".$msg_uid);
continue;
}
- $datarray['body'] = escape_tags(convertquote($r['body'], false));
+ $datarray['body'] = escape_tags($r['body']);
logger("Mail: Importing ".$msg_uid);