diff options
author | Friendika <info@friendika.com> | 2011-07-04 23:02:04 -0700 |
---|---|---|
committer | Friendika <info@friendika.com> | 2011-07-04 23:02:04 -0700 |
commit | 24d41e2c6e759baf17a10aa2e48d4b1907d5c7a0 (patch) | |
tree | 70546c8afb1992ab06f404bedcfa0c26f612213d | |
parent | 92831c9416c58d57f93b59748534b82ef0fb53cc (diff) | |
download | volse-hubzilla-24d41e2c6e759baf17a10aa2e48d4b1907d5c7a0.tar.gz volse-hubzilla-24d41e2c6e759baf17a10aa2e48d4b1907d5c7a0.tar.bz2 volse-hubzilla-24d41e2c6e759baf17a10aa2e48d4b1907d5c7a0.zip |
purify html before trying to parse wild urls. This way at least it should parse.
-rw-r--r-- | mod/parse_url.php | 17 |
1 files changed, 13 insertions, 4 deletions
diff --git a/mod/parse_url.php b/mod/parse_url.php index 15a6aced0..ec28d7411 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -1,6 +1,7 @@ <?php require_once('library/HTML5/Parser.php'); +require_once('library/HTMLPurifier.auto.php'); function parse_url_content(&$a) { @@ -31,16 +32,25 @@ function parse_url_content(&$a) { killme(); } + logger('parse_url: data: ' . $s, LOGGER_DATA); if(! $s) { echo sprintf($template,$url,$url,''); killme(); } + $config = HTMLPurifier_Config::createDefault(); + $config->set('Cache.DefinitionImpl', null); + + $purifier = new HTMLPurifier($config); + $s = $purifier->purify($s); + $dom = @HTML5_Parser::parse($s); - if(! $dom) - return $ret; + if(! $dom) { + echo sprintf($template,$url,$url,''); + killme(); + } $items = $dom->getElementsByTagName('title'); @@ -51,7 +61,6 @@ function parse_url_content(&$a) { } } - $divs = $dom->getElementsByTagName('div'); if($divs) { foreach($divs as $div) { @@ -94,6 +103,6 @@ function parse_url_content(&$a) { $text = '<br />' . $text; } - echo sprintf($template,$url,$title,$text); + echo sprintf($template,$url,($title) ? $title : $url,$text); killme(); } |