diff options
Diffstat (limited to 'include')
-rwxr-xr-x | include/email.php | 14 | ||||
-rwxr-xr-x | include/html2bbcode.php | 8 | ||||
-rw-r--r-- | include/msgclean.php | 225 | ||||
-rwxr-xr-x | include/poller.php | 21 | ||||
-rw-r--r-- | include/text.php | 19 |
5 files changed, 279 insertions, 8 deletions
diff --git a/include/email.php b/include/email.php index 659978b6e..a3449a424 100755 --- a/include/email.php +++ b/include/email.php @@ -1,5 +1,7 @@ <?php require_once('include/html2plain.php'); +require_once('include/msgclean.php'); +require_once('include/quoteconvert.php'); function email_connect($mailbox,$username,$password) { if(! function_exists('imap_open')) @@ -86,6 +88,7 @@ function email_get_msg($mbox,$uid) { if(! $struc->parts) { $ret['body'] = email_get_part($mbox,$uid,$struc,0, 'html'); + $html = $ret['body']; if (trim($ret['body']) == '') $ret['body'] = email_get_part($mbox,$uid,$struc,0, 'plain'); @@ -107,6 +110,17 @@ function email_get_msg($mbox,$uid) { else $ret['body'] = $text; } + + $ret['body'] = removegpg($ret['body']); + $msg = removesig($ret['body']); + $ret['body'] = $msg['body']; + $ret['body'] = convertquote($ret['body'], false); + + if (trim($html) != '') + $ret['body'] = removelinebreak($ret['body']); + + $ret['body'] = unifyattributionline($ret['body']); + return $ret; } diff --git a/include/html2bbcode.php b/include/html2bbcode.php index 32a90d7d6..0dafecc71 100755 --- a/include/html2bbcode.php +++ b/include/html2bbcode.php @@ -150,10 +150,14 @@ function html2bbcode($message) node2bbcode($doc, 'font', array('size'=>'/(\d+)/'), '[size=$1]', '[/size]'); node2bbcode($doc, 'font', array('color'=>'/(.+)/'), '[color=$1]', '[/color]'); - node2bbcode($doc, 'span', array('style'=>'/.*color:\s*(.+?)[,;].*/'), '[color="$1"]', '[/color]'); - node2bbcode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)/'), '[size=$1]', '[/size]'); + // Untested + //node2bbcode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*font-family:\s*(.+?)[,;].*color:\s*(.+?)[,;].*/'), '[size=$1][font=$2][color=$3]', '[/color][/font][/size]'); + //node2bbcode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)[,;].*/'), '[size=$1]', '[/size]'); + //node2bbcode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*/'), '[size=$1]', '[/size]'); + node2bbcode($doc, 'span', array('style'=>'/.*color:\s*(.+?)[,;].*/'), '[color="$1"]', '[/color]'); //node2bbcode($doc, 'span', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); + //node2bbcode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)pt.*/'), '[font=$1][size=$2]', '[/size][/font]'); //node2bbcode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)px.*/'), '[font=$1][size=$2]', '[/size][/font]'); //node2bbcode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); diff --git a/include/msgclean.php b/include/msgclean.php new file mode 100644 index 000000000..284ad1ce4 --- /dev/null +++ b/include/msgclean.php @@ -0,0 +1,225 @@ +<?php + +function savereplace($pattern, $replace, $text) +{ + $save = $text; + + $text = preg_replace($pattern, $replace, $text); + + if ($text == '') + $text = $save; + return($text); +} + +function unifyattributionline($message) +{ + $quotestr = array('quote', 'collapsed'); + foreach ($quotestr as $quote) { + + $message = savereplace('/----- Original Message -----\s.*?From: "([^<"].*?)" <(.*?)>\s.*?To: (.*?)\s*?Cc: (.*?)\s*?Sent: (.*?)\s.*?Subject: ([^\n].*)\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/----- Original Message -----\s.*?From: "([^<"].*?)" <(.*?)>\s.*?To: (.*?)\s*?Sent: (.*?)\s.*?Subject: ([^\n].*)\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/-------- Original-Nachricht --------\s*\['.$quote.'\]\nDatum: (.*?)\nVon: (.*?) <(.*?)>\nAn: (.*?)\nBetreff: (.*?)\n/i', "[".$quote."='$2']\n", $message); + $message = savereplace('/-------- Original-Nachricht --------\s*\['.$quote.'\]\sDatum: (.*?)\s.*Von: "([^<"].*?)" <(.*?)>\s.*An: (.*?)\n.*/i', "[".$quote."='$2']\n", $message); + $message = savereplace('/-------- Original-Nachricht --------\s*\['.$quote.'\]\nDatum: (.*?)\nVon: (.*?)\nAn: (.*?)\nBetreff: (.*?)\n/i', "[".$quote."='$2']\n", $message); + + $message = savereplace('/-----Urspr.*?ngliche Nachricht-----\sVon: "([^<"].*?)" <(.*?)>\s.*Gesendet: (.*?)\s.*An: (.*?)\s.*Betreff: ([^\n].*?).*:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/-----Urspr.*?ngliche Nachricht-----\sVon: "([^<"].*?)" <(.*?)>\s.*Gesendet: (.*?)\s.*An: (.*?)\s.*Betreff: ([^\n].*?)\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/Am (.*?), schrieb (.*?):\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + + $message = savereplace('/Am .*?, \d+ .*? \d+ \d+:\d+:\d+ \+\d+\sschrieb\s(.*?)\s<(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/Am (.*?) schrieb (.*?) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + $message = savereplace('/Am (.*?) schrieb <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + $message = savereplace('/Am (.*?) schrieb (.*?):\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + $message = savereplace('/Am (.*?) schrieb (.*?)\n(.*?):\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + + $message = savereplace('/(\d+)\/(\d+)\/(\d+) ([^<"].*?) <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message); + + $message = savereplace('/On .*?, \d+ .*? \d+ \d+:\d+:\d+ \+\d+\s(.*?)\s<(.*?)>\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/On (.*?) at (.*?), (.*?)\s<(.*?)>\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$3']\n", $message); + $message = savereplace('/On (.*?)\n([^<].*?)\s<(.*?)>\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + $message = savereplace('/On (.*?), (.*?), (.*?)\s<(.*?)>\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$3']\n", $message); + $message = savereplace('/On ([^,].*?), (.*?)\swrote:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + $message = savereplace('/On (.*?), (.*?)\swrote\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + + // Der loescht manchmal den Body - was eigentlich unmoeglich ist + $message = savereplace('/On (.*?),(.*?),(.*?),(.*?), (.*?) wrote:\s*\['.$quote.'\]/i', "[".$quote."='$5']\n", $message); + + $message = savereplace('/Zitat von ([^<].*?) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/Quoting ([^<].*?) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/From: "([^<"].*?)" <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/From: <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/Du \(([^)].*?)\) schreibst:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/--- (.*?) <.*?> schrieb am (.*?):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/--- (.*?) schrieb am (.*?):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/\* (.*?) <(.*?)> hat geschrieben:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/(.*?) <(.*?)> schrieb (.*?)\):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/(.*?) <(.*?)> schrieb am (.*?) um (.*):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/(.*?) schrieb am (.*?) um (.*):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/(.*?) \((.*?)\) schrieb:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + $message = savereplace('/(.*?) schrieb:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/(.*?) <(.*?)> writes:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/(.*?) \((.*?)\) writes:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + $message = savereplace('/(.*?) writes:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/\* (.*?) wrote:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/(.*?) wrote \(.*?\):\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/(.*?) wrote:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/([^<].*?) <.*?> hat am (.*?)\sum\s(.*)\sgeschrieben:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + + $message = savereplace('/(\d+)\/(\d+)\/(\d+) ([^<"].*?) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message); + $message = savereplace('/(\d+)\/(\d+)\/(\d+) (.*?) <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message); + $message = savereplace('/(\d+)\/(\d+)\/(\d+) <(.*?)>:\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message); + $message = savereplace('/(\d+)\/(\d+)\/(\d+) <(.*?)>\s*\['.$quote.'\]/i', "[".$quote."='$4']\n", $message); + + $message = savereplace('/(.*?) <(.*?)> schrubselte:\s*\['.$quote.'\]/i', "[".$quote."='$1']\n", $message); + $message = savereplace('/(.*?) \((.*?)\) schrubselte:\s*\['.$quote.'\]/i', "[".$quote."='$2']\n", $message); + + } + return($message); +} + +function removegpg($message) +{ + + $pattern = '/(.*)\s*-----BEGIN PGP SIGNED MESSAGE-----\s*[\r\n].*Hash:.*?[\r\n](.*)'. + '[\r\n]\s*-----BEGIN PGP SIGNATURE-----\s*[\r\n].*'. + '[\r\n]\s*-----END PGP SIGNATURE-----(.*)/is'; + + preg_match($pattern, $message, $result); + + $cleaned = trim($result[1].$result[2].$result[3]); + + $cleaned = str_replace(array("\n- --\n", "\n- -"), array("\n-- \n", "\n-"), $cleaned); + + + if ($cleaned == '') + $cleaned = $message; + + return($cleaned); +} + +function removesig($message) +{ + $sigpos = strrpos($message, "\n-- \n"); + $quotepos = strrpos($message, "[/quote]"); + + if ($sigpos == 0) { + // Speziell fuer web.de, die das als Trenner verwenden + $message = str_replace("\n___________________________________________________________\n", "\n-- \n", $message); + $sigpos = strrpos($message, "\n-- \n"); + $quotepos = strrpos($message, "[/quote]"); + } + + // Sollte sich der Signaturtrenner innerhalb eines Quotes befinden + // wird keine Signaturtrennung ausgefuehrt + if (($sigpos < $quotepos) and ($sigpos != 0)) + return(array('body' => $message, 'sig' => '')); + + // To-Do: Regexp umstellen, so dass auf 1 oder kein Leerzeichen + // geprueft wird + //$message = str_replace("\n--\n", "\n-- \n", $message); + + $pattern = '/(.*)[\r\n]-- [\r\n](.*)/is'; + + preg_match($pattern, $message, $result); + + if (($result[1] != '') and ($result[2] != '')) { + $cleaned = trim($result[1])."\n"; + $sig = trim($result[2]); + // '[hr][size=x-small][color=darkblue]'.trim($result[2]).'[/color][/size]'; + } else { + $cleaned = $message; + $sig = ''; + } + + return(array('body' => $cleaned, 'sig' => $sig)); +} + +function removelinebreak($message) +{ + $arrbody = explode("\n", trim($message)); + + $lines = array(); + $lineno = 0; + + foreach($arrbody as $i => $line) { + $currquotelevel = 0; + $currline = $line; + while ((strlen($currline)>0) and ((substr($currline, 0, 1) == '>') + or (substr($currline, 0, 1) == ' '))) { + if (substr($currline, 0, 1) == '>') + $currquotelevel++; + + $currline = ltrim(substr($currline, 1)); + } + + $quotelevel = 0; + $nextline = trim($arrbody[$i+1]); + while ((strlen($nextline)>0) and ((substr($nextline, 0, 1) == '>') + or (substr($nextline, 0, 1) == ' '))) { + if (substr($nextline, 0, 1) == '>') + $quotelevel++; + + $nextline = ltrim(substr($nextline, 1)); + } + + $len = strlen($line); + $firstword = strpos($nextline.' ', ' '); + + $specialchars = ((substr(trim($nextline), 0, 1) == '-') or + (substr(trim($nextline), 0, 1) == '=') or + (substr(trim($nextline), 0, 1) == '*') or + (substr(trim($nextline), 0, 1) == '·') or + (substr(trim($nextline), 0, 4) == '[url') or + (substr(trim($nextline), 0, 5) == '[size') or + (substr(trim($nextline), 0, 7) == 'http://') or + (substr(trim($nextline), 0, 8) == 'https://')); + + if (!$specialchars) + $specialchars = ((substr(rtrim($line), -1) == '-') or + (substr(rtrim($line), -1) == '=') or + (substr(rtrim($line), -1) == '*') or + (substr(rtrim($line), -1) == '·') or + (substr(rtrim($line), -6) == '[/url]') or + (substr(rtrim($line), -7) == '[/size]')); + + //if ($specialchars) + // echo ("Special\n"); + + if ($lines[$lineno] != '') { + if (substr($lines[$lineno], -1) != ' ') + $lines[$lineno] .= ' '; + + while ((strlen($line)>0) and ((substr($line, 0, 1) == '>') + or (substr($line, 0, 1) == ' '))) { + + $line = ltrim(substr($line, 1)); + } + + } + //else + // $lines[$lineno] = $quotelevel.'-'.$len.'-'.$firstword.'-'; + + $lines[$lineno] .= $line; + //if ((($len + $firstword < 68) and (substr($line, -1, 1) != ' ')) + // or ($quotelevel != $currquotelevel) or $specialchars) + if (((substr($line, -1, 1) != ' ')) + or ($quotelevel != $currquotelevel)) + $lineno++; + } + return(implode("\n", $lines)); + +} +?> diff --git a/include/poller.php b/include/poller.php index cfbc46b87..3bc98e36f 100755 --- a/include/poller.php +++ b/include/poller.php @@ -1,7 +1,6 @@ <?php require_once("boot.php"); -require_once("include/quoteconvert.php"); function poller_run($argv, $argc){ @@ -70,6 +69,19 @@ function poller_run($argv, $argc){ // clear old cache Cache::clear(); + // clear item cache files if they are older than one day + $cache = get_config('system','itemcache'); + if (($cache != '') and is_dir($cache)) { + if ($dh = opendir($cache)) { + while (($file = readdir($dh)) !== false) { + $fullpath = $cache."/".$file; + if ((filetype($fullpath) == "file") and filectime($fullpath) < (time() - 86400)) + unlink($fullpath); + } + closedir($dh); + } + } + $manual_id = 0; $generation = 0; $hub_update = false; @@ -141,7 +153,10 @@ function poller_run($argv, $argc){ if($manual_id) $contact['last-update'] = '0000-00-00 00:00:00'; - if($contact['network'] === NETWORK_DFRN || $contact['network'] === NETWORK_OSTATUS) + if($contact['network'] === NETWORK_DFRN) + $contact['priority'] = 2; + + if(!get_config('system','ostatus_use_priority') and ($contact['network'] === NETWORK_OSTATUS)) $contact['priority'] = 2; if($contact['priority'] || $contact['subhub']) { @@ -494,7 +509,7 @@ function poller_run($argv, $argc){ logger("Mail: can't fetch msg ".$msg_uid); continue; } - $datarray['body'] = escape_tags(convertquote($r['body'], false)); + $datarray['body'] = escape_tags($r['body']); logger("Mail: Importing ".$msg_uid); diff --git a/include/text.php b/include/text.php index 5ad0154d7..08c5a5424 100644 --- a/include/text.php +++ b/include/text.php @@ -876,14 +876,27 @@ function prepare_body($item,$attach = false) { call_hooks('prepare_body_init', $item); - $s = prepare_text($item['body']); + $cache = get_config('system','itemcache'); + + if (($cache != '')) { + $cachefile = $cache."/".$item["guid"]."-".strtotime($item["edited"])."-".hash("crc32", $item['body']); + + if (file_exists($cachefile)) + $s = file_get_contents($cachefile); + else { + $s = prepare_text($item['body']); + file_put_contents($cachefile, $s); + } + } else + $s = prepare_text($item['body']); $prep_arr = array('item' => $item, 'html' => $s); call_hooks('prepare_body', $prep_arr); $s = $prep_arr['html']; - if(! $attach) + if(! $attach) { return $s; + } $arr = explode(',',$item['attach']); if(count($arr)) { @@ -914,9 +927,9 @@ function prepare_body($item,$attach = false) { $s .= '<div class="clear"></div></div>'; } - $prep_arr = array('item' => $item, 'html' => $s); call_hooks('prepare_body_final', $prep_arr); + return $prep_arr['html']; }} |