aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFriendika <info@friendika.com>2010-12-19 19:04:37 -0800
committerFriendika <info@friendika.com>2010-12-19 19:04:37 -0800
commit2d9718fee9f5c07fe8098b892f9b096fcc68c43e (patch)
treea714631ea94bd57a7becd04f8081edf8cf1b2b3e
parent24a9a41f969cfd7625847c77a949e4b279f68406 (diff)
downloadvolse-hubzilla-2d9718fee9f5c07fe8098b892f9b096fcc68c43e.tar.gz
volse-hubzilla-2d9718fee9f5c07fe8098b892f9b096fcc68c43e.tar.bz2
volse-hubzilla-2d9718fee9f5c07fe8098b892f9b096fcc68c43e.zip
do a slightly better job at finding relevant content from scraping submitted links
-rw-r--r--mod/parse_url.php43
1 files changed, 34 insertions, 9 deletions
diff --git a/mod/parse_url.php b/mod/parse_url.php
index aa71893ab..acfe624cb 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -6,6 +6,8 @@ function parse_url_content(&$a) {
$url = trim($_GET['url']);
+ $text = null;
+
$template = "<a href=\"%s\" >%s</a>%s";
if($url)
@@ -34,15 +36,38 @@ function parse_url_content(&$a) {
}
}
- $items = $dom->getElementsByTagName('p');
- if($items) {
- foreach($items as $item) {
- $text = $item->textContent;
- $text = strip_tags($text);
- if(strlen($text) < 100)
- continue;
- $text = substr($text,0,250) . '...' ;
- break;
+
+ $divs = $dom->getElementsByTagName('div');
+ if($divs) {
+ foreach($divs as $div) {
+ $class = $div->getAttribute('class');
+ if($class && stristr($class,'article')) {
+ $items = $div->getElementsByTagName('p');
+ if($items) {
+ foreach($items as $item) {
+ $text = $item->textContent;
+ $text = strip_tags($text);
+ if(strlen($text) < 100)
+ continue;
+ $text = substr($text,0,250) . '...' ;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if(! $text) {
+ $items = $dom->getElementsByTagName('p');
+ if($items) {
+ foreach($items as $item) {
+ $text = $item->textContent;
+ $text = strip_tags($text);
+ if(strlen($text) < 100)
+ continue;
+ $text = substr($text,0,250) . '...' ;
+ break;
+ }
}
}