From 0d9d0a4b70a2dd73005089f1128f4d6fc10340ce Mon Sep 17 00:00:00 2001 From: Max Kostikov Date: Fri, 2 Nov 2018 22:37:53 +0100 Subject: more precise codepage detection --- Zotlabs/Module/Linkinfo.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'Zotlabs/Module/Linkinfo.php') diff --git a/Zotlabs/Module/Linkinfo.php b/Zotlabs/Module/Linkinfo.php index 5c3946d4f..4bd1deefb 100644 --- a/Zotlabs/Module/Linkinfo.php +++ b/Zotlabs/Module/Linkinfo.php @@ -229,9 +229,11 @@ class Linkinfo extends \Zotlabs\Web\Controller { $header = $result['header']; $body = $result['body']; - $cp = (preg_match('/meta.+content=["|\']text\/html;\s+charset=([^"|\']+)/i', $body, $o) ? $o[1] : 'AUTO'); - if(strtoupper($cp) == 'ISO-8859-5') - $cp = 'AUTO'; + // Check codepage in page or in HTTP headers if not exist + $cp = (preg_match('/meta.+content=["|\']text\/html;\s+charset=([^"|\']+)/i', $body, $o) ? $o[1] : ''); + if(empty($cp) || strtoupper($cp) == 'ISO-8859-5') + $cp = (preg_match('/Content-Type: text\/html;\s+charset=(.+)/im', $header, $o) ? $o[1] : 'AUTO'); + $body = mb_convert_encoding($body, 'UTF-8', $cp); $body = mb_convert_encoding($body, 'HTML-ENTITIES', "UTF-8"); -- cgit v1.2.3