From 0ad7c8f69eb8b0d6087ef9982af90add02770b1c Mon Sep 17 00:00:00 2001 From: root Date: Fri, 30 Oct 2020 20:01:12 +0000 Subject: HTML parsing lib change to standard PHP in scrape_feed() and scrape_vcard() (cherry picked from commit 16d450fc6980bb70f13e574d1b20406dd313110e) --- include/network.php | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/network.php') diff --git a/include/network.php b/include/network.php index 4457f59bc..9d4c00ee8 100644 --- a/include/network.php +++ b/include/network.php @@ -1333,14 +1333,15 @@ function fetch_xrd_links($url) { */ function scrape_feed($url) { - require_once('library/HTML5/Parser.php'); $ret = array(); $level = 0; $x = z_fetch_url($url,false,$level,array('novalidate' => true)); - if(! $x['success']) + if(! $x['success']) { + logger('ERROR fetching URL'); return $ret; + } $headers = $x['header']; $code = $x['return_code']; @@ -1374,17 +1375,16 @@ function scrape_feed($url) { } } + $dom = new DOMDocument(); try { - $dom = HTML5_Parser::parse($s); + $dom->loadHTML( $s); } catch (DOMException $e) { - logger('Parse error: ' . $e); - } - - if(! $dom) { - logger('Failed to parse.'); + logger('Feed parse error: ' . $e); + // logger('Feed parse ERROR: ' . libxml_get_last_error()->message); return $ret; } + $head = $dom->getElementsByTagName('base'); if($head) { foreach($head as $head0) { @@ -1846,15 +1846,15 @@ function probe_api_path($host) { function scrape_vcard($url) { - require_once('library/HTML5/Parser.php'); - $ret = array(); logger('url=' . $url); $x = z_fetch_url($url); - if(! $x['success']) + if(! $x['success']) { + logger('ERROR fetching URL'); return $ret; + } $s = $x['body']; @@ -1871,14 +1871,14 @@ function scrape_vcard($url) { } } + $dom = new DOMDocument(); try { - $dom = HTML5_Parser::parse($s); + $dom->loadHTML( $s); } catch (DOMException $e) { - logger('Parse error: ' . $e); - } - - if(! $dom) + logger('hCard parse error: ' . $e); + // logger('hCard fetch ERROR: ' . libxml_get_last_error()->message); return $ret; + } // Pull out hCard profile elements -- cgit v1.2.3