From 9340ae676104a2df7dd4e0f896ef1cc83461f49a Mon Sep 17 00:00:00 2001 From: Jacob Haddon Date: Wed, 14 Feb 2024 09:59:45 -0500 Subject: [PATCH 1/4] fixed CSS for extra long URLs in the list, so they break on the phone view. --- smhnRSS.pl | 341 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100755 smhnRSS.pl diff --git a/smhnRSS.pl b/smhnRSS.pl new file mode 100755 index 0000000..902602d --- /dev/null +++ b/smhnRSS.pl @@ -0,0 +1,341 @@ +#!/usr/bin/perl +use cPanelUserConfig; #for cpanel servers + +use 5.010; +use strict; +use warnings; + +################################### +# +# The Sunday Morning Horror News +# +# This script pulls from a list of RSS feeds and agregates them together into a web page. +# It is designed to run as a cron and overright the HTML file. +# +# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html +# Code repository: https://code.jacobhaddon.com/jake/smhn +# Written by Jacob Haddon https://jacobhaddon.com +# +################################### + +# Packages + +use Time::Piece; # https://perldoc.perl.org/Time::Piece +use LWP::Simple; # https://metacpan.org/pod/LWP::Simple +use XML::RSS; # https://metacpan.org/pod/XML::RSS +use HTML::Entities; # https://metacpan.org/pod/HTML::Entities + +# server file folders +# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml"; +# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html"; + +# local file folders +my $rssFilePath = "feed.xml"; +my $htmlFilePath = "index.html"; + +################################### +# RSS Configurations +################################### + +my $title = "Sunday Morning Horror News"; +my $homeLink = "http://sundaymorninghorrornews.com"; +my $feedLink = "http://sundaymorninghorrornews.com/feed.xml"; +my $description = 'A collection of horror writer, editor, poet and publisher blogs from all over the web!'; +my $webmaster = 'webmaster@sundaymorninghorrornews.com'; +my $copyright = 'Copyright respective writers'; + +################################### +# Go through list of URLs, get RSS feed, +# take newest 3 that are less than $then old, +# add to new RSS feed object +################################### + +# get today, subtact time to make cut off +my $now = localtime; +my $then = $now->add_months(-2); + +#number of items to keep from each feed +my $number_of_items = 2; # +1 since everything starts at 0 + +#list to hold the new RSS items +my %list; + +# Make the list of URLS while parsing DATA +my $listHTML = "
\n"; + +################################### +# Make an RSS Feed! +################################### + +# date format: Thu, 28 Dec 2023 03:51:42 +# $now->strftime("%a, %d %b %Y %H:%M:%S %z"); + +my $rss2 = XML::RSS->new (version => '2.0'); +$rss2->add_module(prefix => 'atom', uri => 'http://www.w3.org/2005/Atom'); + +$rss2->channel(title => $title, + link => $homeLink, + language => 'en-US', + description => $description, + copyright => $copyright, + pubDate => $now->strftime('%a, %d %b %Y %H:%M:%S %z'), + lastBuildDate => $now->strftime('%a, %d %b %Y %H:%M:%S %z'), + webMaster => $webmaster, + atom => { 'link' => { 'href' => $feedLink, 'rel' => 'self', 'type' => 'application/rss+xml' } } + ); # $rss->channel + +# foreach ITEM, newest (highest EPOCH) first +foreach my $name (reverse sort keys %list) { + + $rss2->add_item(title => $list{$name}->{'title'}, + permaLink => $list{$name}->{'link'}, + link => $list{$name}->{'link'}, + description => $list{$name}->{'description'}, + pubDate => $list{$name}->{'pubDate'}, + author => $list{$name}->{'dc'}{'creator'}, + source => $list{$name}->{'feedName'}, + sourceUrl => $list{$name}->{'feedURL'}, + ); # $rss->channel + +} # foreach + +# Save the RSS feed as a file +$rss2->save($rssFilePath); + +################################### +# Format the RSS data for HTML +################################### + +# make the HTML for the processed RSS +my $rssHTML = "
\n"; + +# print the title and link of each RSS item + foreach my $item (@{$rss2->{'items'}}) { + + $rssHTML .= "

" . encode_entities($item->{'title'}) . "

\n\n"; + $rssHTML .= "
" . encode_entities($item->{'author'}) . " - " . formatDate($item->{'pubDate'}) ."
\n\n"; + $rssHTML .= "
" . $item->{'description'} . "
\n\n"; + + } # foreach item + +# close out the rssHTML +$rssHTML .= "
\n\n"; + +# the webpage HTML + +# format the pubDate +my $printDate = formatDate($rss2->{'channel'}{'pubDate'}); + +# header for a direct HTML post +my $html_header = "Status: 200\nContent-type: text/html\n\n"; + +################################### +# Make the HTML Page +################################### + +my $html = <<"HTML_END"; + + + + + + + The Sunday Morning Horror News + + + + + +
+ +

$title

+

$description

+

This site is updated every Sunday morning

+

updated: $printDate

+ +
+ + + +
+

The News!

+ + $rssHTML + +
+ +
+

About $title

+

The $title is an aggregator of RSS feeds from writers, poets and publishers.

+

Too often the world is flying by at the speed of social media. $title is designed to update just once a week (on Sunday!) so you can take your time and catch up on the news without having to scroll.

+ +
+ +
+

Contact

+

Contact $title at: $webmaster +

We are intolerant of intolerance. If you see something hateful, transphobic, homophobic, racist, or the like, let us know.

+

We don't publish the content you see here, just collect it, but we don't need to collect trash.

+
+ +
+

The List!

+

This is the list of the feeds that we are checking. If you have an RSS reader, grab them and follow along!

+ + $listHTML + +
+ +
+ + + +
+ + + + +HTML_END + +# write the file +open(FH, '>', $htmlFilePath) or die $!; +print FH $html; +close(FH); + +# print $html_header . $html; + +################################### +# Functions +################################### + +sub formatDate { + my $testDate = $_[0]; + $testDate =~ s/((\+|\-)(\d\d\d\d))//; + # Convert to a TIME object + my $t = Time::Piece->strptime($testDate); + return($t->strftime('%a, %d %b %Y %H:%M:%S')); +} + +# FIN + +################################### +# DATA is list of the feed URLs +################################### + +__DATA__ +https://ecatherine.com/feed/ +https://redlagoe.com/feed/ +https://diebooth.wordpress.com/feed/ +https://kelliowen.com/feed/ +https://mercedesmyardley.com/feed/ +https://mehitobel.com/feed/ +http://antoncancre.blogspot.com/feeds/posts/default?alt=rss +https://www.toddkeisling.com/news?format=rss +https://www.jfgonzalez.org/news?format=rss +https://www.briankeene.com/news?format=rss +https://ghoulish.rip/feed/ +https://www.cemeterydance.com/extras/feed/ +https://www.gwendolynkiste.com/Blog/feed/ +https://katherinesilvaauthor.substack.com/feed +https://queenofswordspress.com/feed/ +http://fiendlover.blogspot.com/feeds/posts/default +http://jacobhaddon.com/feed/ +http://apokrupha.com/feed/ +https://ellendatlow.com/feed/ +https://paulaguran.com/ \ No newline at end of file From cf59c9248d67f36d6616a9fdfde888f89e36c21d Mon Sep 17 00:00:00 2001 From: Jacob Haddon Date: Sat, 17 Feb 2024 16:29:30 -0500 Subject: [PATCH 2/4] fixed backwards DIV UL tags at the end of the RSS feed list --- smhnRSS.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smhnRSS.pl b/smhnRSS.pl index 902602d..c11f5a5 100755 --- a/smhnRSS.pl +++ b/smhnRSS.pl @@ -120,7 +120,7 @@ while ( my $url = ) { } # while DATA #close out the list URL html -$listHTML .= "\n"; +$listHTML .= "\n"; ################################### # Make an RSS Feed! From 38400c7b82cd8aed3b07228d1afd249a0ff936cf Mon Sep 17 00:00:00 2001 From: Jacob Haddon Date: Tue, 20 Feb 2024 20:07:47 -0500 Subject: [PATCH 3/4] Adds a check about the item author and a backup if one is not there. Tidied up some CSS --- smhnRSS.pl | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/smhnRSS.pl b/smhnRSS.pl index c11f5a5..8e9eb04 100755 --- a/smhnRSS.pl +++ b/smhnRSS.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -use cPanelUserConfig; #for cpanel servers +# use cPanelUserConfig; #for cpanel servers use 5.010; use strict; @@ -63,6 +63,7 @@ my %list; # Make the list of URLS while parsing DATA my $listHTML = "
    \n"; +# Go through each URL in the DATA section and make the new list while ( my $url = ) { chomp $url; @@ -96,6 +97,15 @@ while ( my $url = ) { $testItem->{'feedURL'} = $_; $testItem->{'feedName'} = $rss1->{'channel'}{'title'}; + # Find Author tags + if ($testItem->{'dc'}{'creator'}) { + $testItem->{'itemAuthor'} = $testItem->{'dc'}{'creator'}; + } elsif ($testItem->{'author'}) { + $testItem->{'itemAuthor'} = $testItem->{'author'} + } else { + $testItem->{'itemAuthor'} = $rss1->{'channel'}{'title'}; + } # if author + # Clean up some of the artifacts in the RSS feed 'description' section $testItem->{'description'} =~ s/\n\s*/\n/g; # get rid of excess white space $testItem->{'description'} =~ s/()$//s; # link at end of description @@ -151,7 +161,7 @@ foreach my $name (reverse sort keys %list) { link => $list{$name}->{'link'}, description => $list{$name}->{'description'}, pubDate => $list{$name}->{'pubDate'}, - author => $list{$name}->{'dc'}{'creator'}, + author => $list{$name}->{'itemAuthor'}, source => $list{$name}->{'feedName'}, sourceUrl => $list{$name}->{'feedURL'}, ); # $rss->channel @@ -204,9 +214,7 @@ my $html = <<"HTML_END"; body{ margin:40px auto; max-width:650px; - line-height:1.6; - font-size:18px; - font-family: Baskerville, TimesNewRoman, Times New Roman, Times, Georgia, serif; + font: 18px/1.6 baskerville, palatino, 'palatino linotype', georgia,serif; padding:0 10px: word-break: break-word; } @@ -239,7 +247,7 @@ my $html = <<"HTML_END";

    $title

    $description

    -

    This site is updated every Sunday morning

    +

    This site is in BETA and will update daily as we test things out and add URLs. Let us know if there are sites that should be included!

    updated: $printDate

    @@ -305,12 +313,13 @@ close(FH); ################################### sub formatDate { + # takes a date, and converts it to a formatted string my $testDate = $_[0]; $testDate =~ s/((\+|\-)(\d\d\d\d))//; # Convert to a TIME object my $t = Time::Piece->strptime($testDate); return($t->strftime('%a, %d %b %Y %H:%M:%S')); -} +} # sub formatDate # FIN @@ -326,10 +335,10 @@ https://kelliowen.com/feed/ https://mercedesmyardley.com/feed/ https://mehitobel.com/feed/ http://antoncancre.blogspot.com/feeds/posts/default?alt=rss +https://marysangi.wordpress.com/feed/ https://www.toddkeisling.com/news?format=rss https://www.jfgonzalez.org/news?format=rss https://www.briankeene.com/news?format=rss -https://ghoulish.rip/feed/ https://www.cemeterydance.com/extras/feed/ https://www.gwendolynkiste.com/Blog/feed/ https://katherinesilvaauthor.substack.com/feed @@ -338,4 +347,14 @@ http://fiendlover.blogspot.com/feeds/posts/default http://jacobhaddon.com/feed/ http://apokrupha.com/feed/ https://ellendatlow.com/feed/ -https://paulaguran.com/ \ No newline at end of file +https://paulaguran.com/ +https://amandaheadlee.com/feed/ +https://theimbloglio.wordpress.com/feed/ +https://kennethwcain.com/feed/ +https://wellwortharead.blogspot.com/feeds/posts/default?alt=rss +http://robertfordauthor.com/feed/ +https://tinyfrights.com/feed/ +https://weightlessbooks.com/feed/ +https://www.crystallakepub.com/feed/ +https://lynnehansen.zenfolio.com/blog.rss +https://www.bevvincent.com/feed/ \ No newline at end of file From d30f85b57b8a64e5d004eb06f18ec8989fe2cce3 Mon Sep 17 00:00:00 2001 From: Jacob Haddon Date: Tue, 20 Feb 2024 22:04:12 -0500 Subject: [PATCH 4/4] added more filters for description, these target the mess Blogger sends over, but seem to help others. --- smhnRSS.pl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/smhnRSS.pl b/smhnRSS.pl index 8e9eb04..e7ef79a 100755 --- a/smhnRSS.pl +++ b/smhnRSS.pl @@ -112,8 +112,14 @@ while ( my $url = ) { $testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text $testItem->{'description'} =~ s/
    //sg; #remove "figure" can use pipe to add more $testItem->{'description'} =~ s///sg; # remove IMG tags + $testItem->{'description'} =~ s///sg; #remove "span" tags (mostly blogger) + $testItem->{'description'} =~ s/<\/span>//sg; #remove "span" endtags + $testItem->{'description'} =~ s/
    //sg; # remove blogger DIV tags + $testItem->{'description'} =~ s//
    /sg; # remove blogger BR tags + $testItem->{'description'} =~ s/(

    <\/div>)+/
    /sg; # remove blogger BR + DIV tags $testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length - $testItem->{'description'} =~ s/(<[^<]+)$//s; # link at end of description + $testItem->{'description'} =~ s/<\/?div.*?>//sg; # remove div tags + $testItem->{'description'} =~ s/({'description'} .= " {'link'} . "\' target=\'_blank\'>Continue Reading"; @@ -357,4 +363,5 @@ https://tinyfrights.com/feed/ https://weightlessbooks.com/feed/ https://www.crystallakepub.com/feed/ https://lynnehansen.zenfolio.com/blog.rss -https://www.bevvincent.com/feed/ \ No newline at end of file +https://www.bevvincent.com/feed/ +http://liviallewellyn.com/feed/ \ No newline at end of file