aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMax Kostikov <max@kostikov.co>2018-11-02 22:37:53 +0100
committerMax Kostikov <max@kostikov.co>2018-11-02 22:37:53 +0100
commit0d9d0a4b70a2dd73005089f1128f4d6fc10340ce (patch)
treef8ea96f7c6b94f282a232c906f65df6b7de8eabf
parent32c4614a324f07133fee1163fff77aab5e9cc258 (diff)
downloadvolse-hubzilla-0d9d0a4b70a2dd73005089f1128f4d6fc10340ce.tar.gz
volse-hubzilla-0d9d0a4b70a2dd73005089f1128f4d6fc10340ce.tar.bz2
volse-hubzilla-0d9d0a4b70a2dd73005089f1128f4d6fc10340ce.zip
more precise codepage detection
-rw-r--r--Zotlabs/Module/Linkinfo.php8
1 files changed, 5 insertions, 3 deletions
diff --git a/Zotlabs/Module/Linkinfo.php b/Zotlabs/Module/Linkinfo.php
index 5c3946d4f..4bd1deefb 100644
--- a/Zotlabs/Module/Linkinfo.php
+++ b/Zotlabs/Module/Linkinfo.php
@@ -229,9 +229,11 @@ class Linkinfo extends \Zotlabs\Web\Controller {
$header = $result['header'];
$body = $result['body'];
- $cp = (preg_match('/meta.+content=["|\']text\/html;\s+charset=([^"|\']+)/i', $body, $o) ? $o[1] : 'AUTO');
- if(strtoupper($cp) == 'ISO-8859-5')
- $cp = 'AUTO';
+ // Check codepage in page or in HTTP headers if not exist
+ $cp = (preg_match('/meta.+content=["|\']text\/html;\s+charset=([^"|\']+)/i', $body, $o) ? $o[1] : '');
+ if(empty($cp) || strtoupper($cp) == 'ISO-8859-5')
+ $cp = (preg_match('/Content-Type: text\/html;\s+charset=(.+)/im', $header, $o) ? $o[1] : 'AUTO');
+
$body = mb_convert_encoding($body, 'UTF-8', $cp);
$body = mb_convert_encoding($body, 'HTML-ENTITIES', "UTF-8");