added more filters for description, these target the mess Blogger sends over, but seem to help others.

This commit is contained in:
Jacob Haddon 2024-02-20 22:04:12 -05:00
parent 38400c7b82
commit d30f85b57b

View File

@ -112,8 +112,14 @@ while ( my $url = <DATA>) {
$testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text $testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text
$testItem->{'description'} =~ s/<figure ((.|\n)+?)figure>//sg; #remove "figure" can use pipe to add more $testItem->{'description'} =~ s/<figure ((.|\n)+?)figure>//sg; #remove "figure" can use pipe to add more
$testItem->{'description'} =~ s/<img(.+?)>//sg; # remove IMG tags $testItem->{'description'} =~ s/<img(.+?)>//sg; # remove IMG tags
$testItem->{'description'} =~ s/<span ((.|\n)+?)>//sg; #remove "span" tags (mostly blogger)
$testItem->{'description'} =~ s/<\/span>//sg; #remove "span" endtags
$testItem->{'description'} =~ s/<div class="separator" style(.+?)<\/div>//sg; # remove blogger DIV tags
$testItem->{'description'} =~ s/<br(.+?)>/<br>/sg; # remove blogger BR tags
$testItem->{'description'} =~ s/(<div><br><\/div>)+/<br>/sg; # remove blogger BR + DIV tags
$testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length $testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length
$testItem->{'description'} =~ s/(<[^<]+)$//s; # link at end of description $testItem->{'description'} =~ s/<\/?div.*?>//sg; # remove div tags
$testItem->{'description'} =~ s/(<a[^<]+)$//s; # link at end of description
#add continue reading to end. #add continue reading to end.
$testItem->{'description'} .= " <a href=\'" . $testItem->{'link'} . "\' target=\'_blank\'>Continue Reading</a>"; $testItem->{'description'} .= " <a href=\'" . $testItem->{'link'} . "\' target=\'_blank\'>Continue Reading</a>";
@ -357,4 +363,5 @@ https://tinyfrights.com/feed/
https://weightlessbooks.com/feed/ https://weightlessbooks.com/feed/
https://www.crystallakepub.com/feed/ https://www.crystallakepub.com/feed/
https://lynnehansen.zenfolio.com/blog.rss https://lynnehansen.zenfolio.com/blog.rss
https://www.bevvincent.com/feed/ https://www.bevvincent.com/feed/
http://liviallewellyn.com/feed/