diff options
author | Max Kostikov <max@kostikov.co> | 2018-11-02 22:37:53 +0100 |
---|---|---|
committer | Max Kostikov <max@kostikov.co> | 2018-11-02 22:37:53 +0100 |
commit | 0d9d0a4b70a2dd73005089f1128f4d6fc10340ce (patch) | |
tree | f8ea96f7c6b94f282a232c906f65df6b7de8eabf /Zotlabs | |
parent | 32c4614a324f07133fee1163fff77aab5e9cc258 (diff) | |
download | volse-hubzilla-0d9d0a4b70a2dd73005089f1128f4d6fc10340ce.tar.gz volse-hubzilla-0d9d0a4b70a2dd73005089f1128f4d6fc10340ce.tar.bz2 volse-hubzilla-0d9d0a4b70a2dd73005089f1128f4d6fc10340ce.zip |
more precise codepage detection
Diffstat (limited to 'Zotlabs')
-rw-r--r-- | Zotlabs/Module/Linkinfo.php | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/Zotlabs/Module/Linkinfo.php b/Zotlabs/Module/Linkinfo.php index 5c3946d4f..4bd1deefb 100644 --- a/Zotlabs/Module/Linkinfo.php +++ b/Zotlabs/Module/Linkinfo.php @@ -229,9 +229,11 @@ class Linkinfo extends \Zotlabs\Web\Controller { $header = $result['header']; $body = $result['body']; - $cp = (preg_match('/meta.+content=["|\']text\/html;\s+charset=([^"|\']+)/i', $body, $o) ? $o[1] : 'AUTO'); - if(strtoupper($cp) == 'ISO-8859-5') - $cp = 'AUTO'; + // Check codepage in page or in HTTP headers if not exist + $cp = (preg_match('/meta.+content=["|\']text\/html;\s+charset=([^"|\']+)/i', $body, $o) ? $o[1] : ''); + if(empty($cp) || strtoupper($cp) == 'ISO-8859-5') + $cp = (preg_match('/Content-Type: text\/html;\s+charset=(.+)/im', $header, $o) ? $o[1] : 'AUTO'); + $body = mb_convert_encoding($body, 'UTF-8', $cp); $body = mb_convert_encoding($body, 'HTML-ENTITIES', "UTF-8"); |