From c22072829828c54d527bc867517d7478759cc4c9 Mon Sep 17 00:00:00 2001 From: Jacob Haddon Date: Tue, 13 Feb 2024 14:13:11 -0500 Subject: [PATCH] Perl script for aggregating RSS and outputing an HTML and feed file. Blank version and the SMHN specific configuration added --- .gitignore | 2 + blankRSS.pl | 313 +++++++++++++++++++++++++++++++++++++++++++++++ smhnRSS.pl | 340 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 655 insertions(+) create mode 100644 .gitignore create mode 100755 blankRSS.pl create mode 100644 smhnRSS.pl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5ef4512 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +index.html +feed.xml diff --git a/blankRSS.pl b/blankRSS.pl new file mode 100755 index 0000000..41e0445 --- /dev/null +++ b/blankRSS.pl @@ -0,0 +1,313 @@ +#!/usr/bin/perl +# use cPanelUserConfig; #for cpanel servers + +use 5.010; +use strict; +use warnings; + +################################### +# +# blankRSS.pl +# +# This script pulls from a list of RSS feeds and agregates them together into a web page. +# It is designed to run as a cron and overright the HTML file. +# +# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html +# Code repository: https://code.jacobhaddon.com/jake/smhn +# Written by Jacob Haddon https://jacobhaddon.com +# +################################### + +# Packages + +use Time::Piece; # https://perldoc.perl.org/Time::Piece +use LWP::Simple; # https://metacpan.org/pod/LWP::Simple +use XML::RSS; # https://metacpan.org/pod/XML::RSS +use HTML::Entities; # https://metacpan.org/pod/HTML::Entities + +# server file folders +# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml"; +# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html"; + +# local file folders +my $rssFilePath = "feed.xml"; +my $htmlFilePath = "index.html"; + +################################### +# RSS Configurations +################################### + +my $title = "The Title of My Site"; +my $homeLink = "http://example.com"; +my $feedLink = "http://example/feed.xml"; +my $description = 'A description of my feed, it should be one line in length.'; +my $webmaster = 'webmaster@example.com'; +my $copyright = 'Copyright respective writers'; + +################################### +# Go through list of URLs, get RSS feed, +# take newest 3 that are less than $then old, +# add to new RSS feed object +################################### + +# get today, subtact time to make cut off +my $now = localtime; +my $then = $now->add_months(-2); + +#number of items to keep from each feed +my $number_of_items = 2; # +1 since everything starts at 0 + +#list to hold the new RSS items +my %list; + +# Make the list of URLS while parsing DATA +my $listHTML = "
\n"; + +################################### +# Make an RSS Feed! +################################### + +# date format: Thu, 28 Dec 2023 03:51:42 +# $now->strftime("%a, %d %b %Y %H:%M:%S %z"); + +my $rss2 = XML::RSS->new (version => '2.0'); +$rss2->add_module(prefix => 'atom', uri => 'http://www.w3.org/2005/Atom'); + +$rss2->channel(title => $title, + link => $homeLink, + language => 'en-US', + description => $description, + copyright => $copyright, + pubDate => $now->strftime('%a, %d %b %Y %H:%M:%S %z'), + lastBuildDate => $now->strftime('%a, %d %b %Y %H:%M:%S %z'), + webMaster => $webmaster, + atom => { 'link' => { 'href' => $feedLink, 'rel' => 'self', 'type' => 'application/rss+xml' } } + ); # $rss->channel + +# foreach ITEM, newest (highest EPOCH) first +foreach my $name (reverse sort keys %list) { + + $rss2->add_item(title => $list{$name}->{'title'}, + permaLink => $list{$name}->{'link'}, + link => $list{$name}->{'link'}, + description => $list{$name}->{'description'}, + pubDate => $list{$name}->{'pubDate'}, + author => $list{$name}->{'dc'}{'creator'}, + source => $list{$name}->{'feedName'}, + sourceUrl => $list{$name}->{'feedURL'}, + ); # $rss->channel + +} # foreach + +# Save the RSS feed as a file +$rss2->save($rssFilePath); + +################################### +# Format the RSS data for HTML +################################### + +# make the HTML for the processed RSS +my $rssHTML = "
\n"; + +# print the title and link of each RSS item + foreach my $item (@{$rss2->{'items'}}) { + + $rssHTML .= "

" . encode_entities($item->{'title'}) . "

\n\n"; + $rssHTML .= "
" . encode_entities($item->{'author'}) . " - " . formatDate($item->{'pubDate'}) ."
\n\n"; + $rssHTML .= "
" . $item->{'description'} . "
\n\n"; + + } # foreach item + +# close out the rssHTML +$rssHTML .= "
\n\n"; + +# the webpage HTML + +# format the pubDate +my $printDate = formatDate($rss2->{'channel'}{'pubDate'}); + +# header for a direct HTML post +my $html_header = "Status: 200\nContent-type: text/html\n\n"; + +################################### +# Make the HTML Page +################################### + +my $html = <<"HTML_END"; + + + + + + + $title + + + + + +
+

$title

+

$description

+

updated: $printDate

+
+ + + +
+

The News!

+ + $rssHTML + +
+ +
+

About $title

+

$description

+
+ +
+

Contact

+

Contact $title at: $webmaster +

+ +
+

The List!

+

This is the list of the feeds that we are checking. If you have an RSS reader, grab them and follow along!

+ + $listHTML + +
+ +
+ + + +
+ + + + +HTML_END + +# write the file +open(FH, '>', $htmlFilePath) or die $!; +print FH $html; +close(FH); + +# print $html_header . $html; + +################################### +# Functions +################################### + +sub formatDate { + my $testDate = $_[0]; + $testDate =~ s/((\+|\-)(\d\d\d\d))//; + # Convert to a TIME object + my $t = Time::Piece->strptime($testDate); + return($t->strftime('%a, %d %b %Y %H:%M:%S')); +} + +# FIN + +################################### +# DATA is list of the feed URLs +################################### + +__DATA__ +https://nnw.ranchero.com/feed.json +https://feeds.npr.org/1001/rss.xml \ No newline at end of file diff --git a/smhnRSS.pl b/smhnRSS.pl new file mode 100644 index 0000000..be188a0 --- /dev/null +++ b/smhnRSS.pl @@ -0,0 +1,340 @@ +#!/usr/bin/perl +use cPanelUserConfig; #for cpanel servers + +use 5.010; +use strict; +use warnings; + +################################### +# +# The Sunday Morning Horror News +# +# This script pulls from a list of RSS feeds and agregates them together into a web page. +# It is designed to run as a cron and overright the HTML file. +# +# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html +# Code repository: https://code.jacobhaddon.com/jake/smhn +# Written by Jacob Haddon https://jacobhaddon.com +# +################################### + +# Packages + +use Time::Piece; # https://perldoc.perl.org/Time::Piece +use LWP::Simple; # https://metacpan.org/pod/LWP::Simple +use XML::RSS; # https://metacpan.org/pod/XML::RSS +use HTML::Entities; # https://metacpan.org/pod/HTML::Entities + +# server file folders +# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml"; +# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html"; + +# local file folders +my $rssFilePath = "feed.xml"; +my $htmlFilePath = "index.html"; + +################################### +# RSS Configurations +################################### + +my $title = "Sunday Morning Horror News"; +my $homeLink = "http://sundaymorninghorrornews.com"; +my $feedLink = "http://sundaymorninghorrornews.com/feed.xml"; +my $description = 'A collection of horror writer, editor, poet and publisher blogs from all over the web!'; +my $webmaster = 'webmaster@sundaymorninghorrornews.com'; +my $copyright = 'Copyright respective writers'; + +################################### +# Go through list of URLs, get RSS feed, +# take newest 3 that are less than $then old, +# add to new RSS feed object +################################### + +# get today, subtact time to make cut off +my $now = localtime; +my $then = $now->add_months(-2); + +#number of items to keep from each feed +my $number_of_items = 2; # +1 since everything starts at 0 + +#list to hold the new RSS items +my %list; + +# Make the list of URLS while parsing DATA +my $listHTML = "
\n"; + +################################### +# Make an RSS Feed! +################################### + +# date format: Thu, 28 Dec 2023 03:51:42 +# $now->strftime("%a, %d %b %Y %H:%M:%S %z"); + +my $rss2 = XML::RSS->new (version => '2.0'); +$rss2->add_module(prefix => 'atom', uri => 'http://www.w3.org/2005/Atom'); + +$rss2->channel(title => $title, + link => $homeLink, + language => 'en-US', + description => $description, + copyright => $copyright, + pubDate => $now->strftime('%a, %d %b %Y %H:%M:%S %z'), + lastBuildDate => $now->strftime('%a, %d %b %Y %H:%M:%S %z'), + webMaster => $webmaster, + atom => { 'link' => { 'href' => $feedLink, 'rel' => 'self', 'type' => 'application/rss+xml' } } + ); # $rss->channel + +# foreach ITEM, newest (highest EPOCH) first +foreach my $name (reverse sort keys %list) { + + $rss2->add_item(title => $list{$name}->{'title'}, + permaLink => $list{$name}->{'link'}, + link => $list{$name}->{'link'}, + description => $list{$name}->{'description'}, + pubDate => $list{$name}->{'pubDate'}, + author => $list{$name}->{'dc'}{'creator'}, + source => $list{$name}->{'feedName'}, + sourceUrl => $list{$name}->{'feedURL'}, + ); # $rss->channel + +} # foreach + +# Save the RSS feed as a file +$rss2->save($rssFilePath); + +################################### +# Format the RSS data for HTML +################################### + +# make the HTML for the processed RSS +my $rssHTML = "
\n"; + +# print the title and link of each RSS item + foreach my $item (@{$rss2->{'items'}}) { + + $rssHTML .= "

" . encode_entities($item->{'title'}) . "

\n\n"; + $rssHTML .= "
" . encode_entities($item->{'author'}) . " - " . formatDate($item->{'pubDate'}) ."
\n\n"; + $rssHTML .= "
" . $item->{'description'} . "
\n\n"; + + } # foreach item + +# close out the rssHTML +$rssHTML .= "
\n\n"; + +# the webpage HTML + +# format the pubDate +my $printDate = formatDate($rss2->{'channel'}{'pubDate'}); + +# header for a direct HTML post +my $html_header = "Status: 200\nContent-type: text/html\n\n"; + +################################### +# Make the HTML Page +################################### + +my $html = <<"HTML_END"; + + + + + + + The Sunday Morning Horror News + + + + + +
+ +

$title

+

$description

+

This site is updated every Sunday morning

+

updated: $printDate

+ +
+ + + +
+

The News!

+ + $rssHTML + +
+ +
+

About $title

+

The $title is an aggregator of RSS feeds from writers, poets and publishers.

+

Too often the world is flying by at the speed of social media. $title is designed to update just once a week (on Sunday!) so you can take your time and catch up on the news without having to scroll.

+ +
+ +
+

Contact

+

Contact $title at: $webmaster +

We are intolerant of intolerance. If you see something hateful, transphobic, homophobic, racist, or the like, let us know.

+

We don't publish the content you see here, just collect it, but we don't need to collect trash.

+
+ +
+

The List!

+

This is the list of the feeds that we are checking. If you have an RSS reader, grab them and follow along!

+ + $listHTML + +
+ +
+ + + +
+ + + + +HTML_END + +# write the file +open(FH, '>', $htmlFilePath) or die $!; +print FH $html; +close(FH); + +# print $html_header . $html; + +################################### +# Functions +################################### + +sub formatDate { + my $testDate = $_[0]; + $testDate =~ s/((\+|\-)(\d\d\d\d))//; + # Convert to a TIME object + my $t = Time::Piece->strptime($testDate); + return($t->strftime('%a, %d %b %Y %H:%M:%S')); +} + +# FIN + +################################### +# DATA is list of the feed URLs +################################### + +__DATA__ +https://ecatherine.com/feed/ +https://redlagoe.com/feed/ +https://diebooth.wordpress.com/feed/ +https://kelliowen.com/feed/ +https://mercedesmyardley.com/feed/ +https://mehitobel.com/feed/ +http://antoncancre.blogspot.com/feeds/posts/default?alt=rss +https://www.toddkeisling.com/news?format=rss +https://www.jfgonzalez.org/news?format=rss +https://www.briankeene.com/news?format=rss +https://ghoulish.rip/feed/ +https://www.cemeterydance.com/extras/feed/ +https://www.gwendolynkiste.com/Blog/feed/ +https://katherinesilvaauthor.substack.com/feed +https://queenofswordspress.com/feed/ +http://fiendlover.blogspot.com/feeds/posts/default +http://jacobhaddon.com/feed/ +http://apokrupha.com/feed/ +https://ellendatlow.com/feed/ +https://paulaguran.com/ \ No newline at end of file