aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFriendika <info@friendika.com>2011-07-04 23:02:04 -0700
committerFriendika <info@friendika.com>2011-07-04 23:02:04 -0700
commit24d41e2c6e759baf17a10aa2e48d4b1907d5c7a0 (patch)
tree70546c8afb1992ab06f404bedcfa0c26f612213d
parent92831c9416c58d57f93b59748534b82ef0fb53cc (diff)
downloadvolse-hubzilla-24d41e2c6e759baf17a10aa2e48d4b1907d5c7a0.tar.gz
volse-hubzilla-24d41e2c6e759baf17a10aa2e48d4b1907d5c7a0.tar.bz2
volse-hubzilla-24d41e2c6e759baf17a10aa2e48d4b1907d5c7a0.zip
purify html before trying to parse wild urls. This way at least it should parse.
-rw-r--r--mod/parse_url.php17
1 files changed, 13 insertions, 4 deletions
diff --git a/mod/parse_url.php b/mod/parse_url.php
index 15a6aced0..ec28d7411 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -1,6 +1,7 @@
<?php
require_once('library/HTML5/Parser.php');
+require_once('library/HTMLPurifier.auto.php');
function parse_url_content(&$a) {
@@ -31,16 +32,25 @@ function parse_url_content(&$a) {
killme();
}
+ logger('parse_url: data: ' . $s, LOGGER_DATA);
if(! $s) {
echo sprintf($template,$url,$url,'');
killme();
}
+ $config = HTMLPurifier_Config::createDefault();
+ $config->set('Cache.DefinitionImpl', null);
+
+ $purifier = new HTMLPurifier($config);
+ $s = $purifier->purify($s);
+
$dom = @HTML5_Parser::parse($s);
- if(! $dom)
- return $ret;
+ if(! $dom) {
+ echo sprintf($template,$url,$url,'');
+ killme();
+ }
$items = $dom->getElementsByTagName('title');
@@ -51,7 +61,6 @@ function parse_url_content(&$a) {
}
}
-
$divs = $dom->getElementsByTagName('div');
if($divs) {
foreach($divs as $div) {
@@ -94,6 +103,6 @@ function parse_url_content(&$a) {
$text = '<br />' . $text;
}
- echo sprintf($template,$url,$title,$text);
+ echo sprintf($template,$url,($title) ? $title : $url,$text);
killme();
}