Compare commits

..

6 Commits

Author SHA1 Message Date
Jacob Haddon
e8b01b292c this adds in date math to change from two months to two weeks for the feed length 2024-03-17 19:07:28 -04:00
Jacob Haddon
211dede75a minor corrections 2024-03-07 14:28:03 -05:00
Jacob Haddon
7f0f20d779 updated blank with changes from other file 2024-03-07 14:22:17 -05:00
Jacob Haddon
1cb4d8d03d Merge branch 'smhn' 2024-03-07 14:15:03 -05:00
Jacob Haddon
5efc041ed9 updated readme 2024-03-07 14:14:46 -05:00
Jacob Haddon
889aae9f6a this ads an error log into the RSS feed loop, logging which URLs fail parsing 2024-03-06 19:53:25 -05:00
3 changed files with 93 additions and 12 deletions

View File

@ -10,8 +10,8 @@ The following CPAN modules need to be installed on your server:
* LWP::Simple
* LWP::Protocol::https
* XML::RSS;
* HTML::Entities;
* XML::RSS
* HTML::Entities
# Configuration
@ -28,10 +28,17 @@ This area is for general information on the site and feed.
* feed URL
* copyright
There are two files you have to save, update these with the path of the folders on your server where they will be saved. Examples are given for CPanel type servers:
There are three files you have to save, update these with the path of the folders on your server where they will be saved.
* Feed - the RSS feed generated for the site, should be an xml file
* HTML - the site itself, should be an html file
* Error Log - this lists URLs that did not work - should be a txt file
Example paths are given for CPanel type servers:
* Feed - /home/USER_NAME/public_html/feed.xml
* HTML - /home/USER_NAME/public_html/index.xml
* HTML - /home/USER_NAME/public_html/index.html
* Error Log - /home/USER_FOLDER/public_html/feed.log
## The Site

View File

@ -10,7 +10,7 @@ use warnings;
# blankRSS.pl
#
# This script pulls from a list of RSS feeds and agregates them together into a web page.
# It is designed to run as a cron and overright the HTML file.
# It is designed to run as a cron and overwrite the HTML file.
#
# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
# Code repository: https://code.jacobhaddon.com/jake/smhn
@ -28,10 +28,12 @@ use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
# server file folders
# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
# my $errorFilePath = "/home/USER_FOLDER/public_html/feed.log";
# local file folders
my $rssFilePath = "feed.xml";
my $htmlFilePath = "index.html";
my $errorFilePath = "feed.log";
###################################
# RSS Configurations
@ -63,6 +65,10 @@ my %list;
# Make the list of URLS while parsing DATA
my $listHTML = "<div class=\'listHTML\'><ul>\n";
# Make a list of URLs that have an error
my $listURLError = "The following feeds had issues this time:\n\n" . $now->strftime('%a, %d %b %Y %H:%M:%S %z'). "\n\n";
# Go through each URL in the DATA section and make the new list
while ( my $url = <DATA>) {
chomp $url;
@ -75,7 +81,12 @@ while ( my $url = <DATA>) {
# parse the XML
my $rss1 = XML::RSS->new;
eval { $rss1->parse( $xml ) };
next if $@;
# if empty, add URL to log file variable
if ($@) {
$listURLError .= "* " . $url . "\n";
next;
}; # if $@
# go through the items from the XML
for (my $j = 0; $j <= $number_of_items; $j++){
@ -96,14 +107,29 @@ while ( my $url = <DATA>) {
$testItem->{'feedURL'} = $_;
$testItem->{'feedName'} = $rss1->{'channel'}{'title'};
# Find Author tags
if ($testItem->{'dc'}{'creator'}) {
$testItem->{'itemAuthor'} = $testItem->{'dc'}{'creator'};
} elsif ($testItem->{'author'}) {
$testItem->{'itemAuthor'} = $testItem->{'author'}
} else {
$testItem->{'itemAuthor'} = $rss1->{'channel'}{'title'};
} # if author
# Clean up some of the artifacts in the RSS feed 'description' section
$testItem->{'description'} =~ s/\n\s*/\n/g; # get rid of excess white space
$testItem->{'description'} =~ s/(<a(.+?)<\/a>)$//s; # link at end of description
$testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text
$testItem->{'description'} =~ s/<figure ((.|\n)+?)figure>//sg; #remove "figure" can use pipe to add more
$testItem->{'description'} =~ s/<img(.+?)>//sg; # remove IMG tags
$testItem->{'description'} =~ s/<span ((.|\n)+?)>//sg; #remove "span" tags (mostly blogger)
$testItem->{'description'} =~ s/<\/span>//sg; #remove "span" endtags
$testItem->{'description'} =~ s/<div class="separator" style(.+?)<\/div>//sg; # remove blogger DIV tags
$testItem->{'description'} =~ s/<br(.+?)>/<br>/sg; # remove blogger BR tags
$testItem->{'description'} =~ s/(<div><br><\/div>)+/<br>/sg; # remove blogger BR + DIV tags
$testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length
$testItem->{'description'} =~ s/(<[^<]+)$//s; # link at end of description
$testItem->{'description'} =~ s/<\/?div.*?>//sg; # remove div tags
$testItem->{'description'} =~ s/(<a[^<]+)$//s; # link at end of description
#add continue reading to end.
$testItem->{'description'} .= " <a href=\'" . $testItem->{'link'} . "\' target=\'_blank\'>Continue Reading</a>";
@ -122,6 +148,14 @@ while ( my $url = <DATA>) {
#close out the list URL html
$listHTML .= "</div></ul>\n";
###################################
# Write the error file
###################################
open(FH, '>', $errorFilePath) or die $!;
print FH $listURLError;
close(FH);
###################################
# Make an RSS Feed!
###################################

View File

@ -10,7 +10,7 @@ use warnings;
# The Sunday Morning Horror News
#
# This script pulls from a list of RSS feeds and agregates them together into a web page.
# It is designed to run as a cron and overright the HTML file.
# It is designed to run as a cron and overwrite the HTML file.
#
# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
# Code repository: https://code.jacobhaddon.com/jake/smhn
@ -21,6 +21,7 @@ use warnings;
# Packages
use Time::Piece; # https://perldoc.perl.org/Time::Piece
use Time::Seconds; # https://perldoc.perl.org/Time::Seconds
use LWP::Simple; # https://metacpan.org/pod/LWP::Simple
use XML::RSS; # https://metacpan.org/pod/XML::RSS
use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
@ -28,10 +29,12 @@ use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
# server file folders
# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
# my $errorFilePath = "/home/USER_FOLDER/public_html/feed.log";
# local file folders
my $rssFilePath = "feed.xml";
my $htmlFilePath = "index.html";
my $errorFilePath = "feed.log";
###################################
# RSS Configurations
@ -50,9 +53,12 @@ my $copyright = 'Copyright respective writers';
# add to new RSS feed object
###################################
# number of weeks in the past to hold RSS feed
my $num_weeks = 2;
# get today, subtact time to make cut off
my $now = localtime;
my $then = $now->add_months(-2);
my $then = $now - (ONE_WEEK * $num_weeks);
#number of items to keep from each feed
my $number_of_items = 2; # +1 since everything starts at 0
@ -63,6 +69,9 @@ my %list;
# Make the list of URLS while parsing DATA
my $listHTML = "<div class=\'listHTML\'><ul>\n";
# Make a list of URLs that have an error
my $listURLError = "The following feeds had issues this time:\n\n" . $now->strftime('%a, %d %b %Y %H:%M:%S %z'). "\n\n";
# Go through each URL in the DATA section and make the new list
while ( my $url = <DATA>) {
chomp $url;
@ -76,7 +85,12 @@ while ( my $url = <DATA>) {
# parse the XML
my $rss1 = XML::RSS->new;
eval { $rss1->parse( $xml ) };
next if $@;
# if empty, add URL to log file variable
if ($@) {
$listURLError .= "* " . $url . "\n";
next;
}; # if $@
# go through the items from the XML
for (my $j = 0; $j <= $number_of_items; $j++){
@ -138,6 +152,14 @@ while ( my $url = <DATA>) {
#close out the list URL html
$listHTML .= "</ul></div>\n";
###################################
# Write the error file
###################################
open(FH, '>', $errorFilePath) or die $!;
print FH $listURLError;
close(FH);
###################################
# Make an RSS Feed!
###################################
@ -204,6 +226,9 @@ my $printDate = formatDate($rss2->{'channel'}{'pubDate'});
# header for a direct HTML post
my $html_header = "Status: 200\nContent-type: text/html\n\n";
###################################
# Make the HTML Page
###################################
@ -353,7 +378,7 @@ http://fiendlover.blogspot.com/feeds/posts/default
http://jacobhaddon.com/feed/
http://apokrupha.com/feed/
https://ellendatlow.com/feed/
https://paulaguran.com/
https://paulaguran.com/feed/
https://amandaheadlee.com/feed/
https://theimbloglio.wordpress.com/feed/
https://kennethwcain.com/feed/
@ -364,4 +389,19 @@ https://weightlessbooks.com/feed/
https://www.crystallakepub.com/feed/
https://lynnehansen.zenfolio.com/blog.rss
https://www.bevvincent.com/feed/
http://liviallewellyn.com/feed/
http://liviallewellyn.com/feed/
https://www.kristidemeester.com/blog-feed.xml
https://www.lucysnyder.com/index.php/feed/
https://www.emilyruthverona.com/blog-feed.xml
https://www.elizabethhand.com/welcome?format=rss
https://www.jamielackey.com/feed/
https://cv-hunt.com/feed/
https://authorjenniferallisprovost.com/feed/
https://jezzywolfe.wordpress.com/feed/
https://lmariewood.com/feed/
https://www.leemurray.info/blog-feed.xml
https://meghanarcuri.com/feed/
https://nicolecushing.wordpress.com/feed/
https://saratantlinger.com/feed/
https://sunnymoraine.com/feed/
https://lauramauro.com/feed/