Compare commits
6 Commits
ecdb015067
...
e8b01b292c
Author | SHA1 | Date | |
---|---|---|---|
|
e8b01b292c | ||
|
211dede75a | ||
|
7f0f20d779 | ||
|
1cb4d8d03d | ||
|
5efc041ed9 | ||
|
889aae9f6a |
15
README.md
15
README.md
@ -10,8 +10,8 @@ The following CPAN modules need to be installed on your server:
|
|||||||
|
|
||||||
* LWP::Simple
|
* LWP::Simple
|
||||||
* LWP::Protocol::https
|
* LWP::Protocol::https
|
||||||
* XML::RSS;
|
* XML::RSS
|
||||||
* HTML::Entities;
|
* HTML::Entities
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
|
|
||||||
@ -28,10 +28,17 @@ This area is for general information on the site and feed.
|
|||||||
* feed URL
|
* feed URL
|
||||||
* copyright
|
* copyright
|
||||||
|
|
||||||
There are two files you have to save, update these with the path of the folders on your server where they will be saved. Examples are given for CPanel type servers:
|
There are three files you have to save, update these with the path of the folders on your server where they will be saved.
|
||||||
|
|
||||||
|
* Feed - the RSS feed generated for the site, should be an xml file
|
||||||
|
* HTML - the site itself, should be an html file
|
||||||
|
* Error Log - this lists URLs that did not work - should be a txt file
|
||||||
|
|
||||||
|
Example paths are given for CPanel type servers:
|
||||||
|
|
||||||
* Feed - /home/USER_NAME/public_html/feed.xml
|
* Feed - /home/USER_NAME/public_html/feed.xml
|
||||||
* HTML - /home/USER_NAME/public_html/index.xml
|
* HTML - /home/USER_NAME/public_html/index.html
|
||||||
|
* Error Log - /home/USER_FOLDER/public_html/feed.log
|
||||||
|
|
||||||
## The Site
|
## The Site
|
||||||
|
|
||||||
|
40
blankRSS.pl
40
blankRSS.pl
@ -10,7 +10,7 @@ use warnings;
|
|||||||
# blankRSS.pl
|
# blankRSS.pl
|
||||||
#
|
#
|
||||||
# This script pulls from a list of RSS feeds and agregates them together into a web page.
|
# This script pulls from a list of RSS feeds and agregates them together into a web page.
|
||||||
# It is designed to run as a cron and overright the HTML file.
|
# It is designed to run as a cron and overwrite the HTML file.
|
||||||
#
|
#
|
||||||
# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
|
# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
|
||||||
# Code repository: https://code.jacobhaddon.com/jake/smhn
|
# Code repository: https://code.jacobhaddon.com/jake/smhn
|
||||||
@ -28,10 +28,12 @@ use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
|
|||||||
# server file folders
|
# server file folders
|
||||||
# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
|
# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
|
||||||
# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
|
# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
|
||||||
|
# my $errorFilePath = "/home/USER_FOLDER/public_html/feed.log";
|
||||||
|
|
||||||
# local file folders
|
# local file folders
|
||||||
my $rssFilePath = "feed.xml";
|
my $rssFilePath = "feed.xml";
|
||||||
my $htmlFilePath = "index.html";
|
my $htmlFilePath = "index.html";
|
||||||
|
my $errorFilePath = "feed.log";
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
# RSS Configurations
|
# RSS Configurations
|
||||||
@ -63,6 +65,10 @@ my %list;
|
|||||||
# Make the list of URLS while parsing DATA
|
# Make the list of URLS while parsing DATA
|
||||||
my $listHTML = "<div class=\'listHTML\'><ul>\n";
|
my $listHTML = "<div class=\'listHTML\'><ul>\n";
|
||||||
|
|
||||||
|
# Make a list of URLs that have an error
|
||||||
|
my $listURLError = "The following feeds had issues this time:\n\n" . $now->strftime('%a, %d %b %Y %H:%M:%S %z'). "\n\n";
|
||||||
|
|
||||||
|
# Go through each URL in the DATA section and make the new list
|
||||||
while ( my $url = <DATA>) {
|
while ( my $url = <DATA>) {
|
||||||
chomp $url;
|
chomp $url;
|
||||||
|
|
||||||
@ -75,7 +81,12 @@ while ( my $url = <DATA>) {
|
|||||||
# parse the XML
|
# parse the XML
|
||||||
my $rss1 = XML::RSS->new;
|
my $rss1 = XML::RSS->new;
|
||||||
eval { $rss1->parse( $xml ) };
|
eval { $rss1->parse( $xml ) };
|
||||||
next if $@;
|
|
||||||
|
# if empty, add URL to log file variable
|
||||||
|
if ($@) {
|
||||||
|
$listURLError .= "* " . $url . "\n";
|
||||||
|
next;
|
||||||
|
}; # if $@
|
||||||
|
|
||||||
# go through the items from the XML
|
# go through the items from the XML
|
||||||
for (my $j = 0; $j <= $number_of_items; $j++){
|
for (my $j = 0; $j <= $number_of_items; $j++){
|
||||||
@ -96,14 +107,29 @@ while ( my $url = <DATA>) {
|
|||||||
$testItem->{'feedURL'} = $_;
|
$testItem->{'feedURL'} = $_;
|
||||||
$testItem->{'feedName'} = $rss1->{'channel'}{'title'};
|
$testItem->{'feedName'} = $rss1->{'channel'}{'title'};
|
||||||
|
|
||||||
|
# Find Author tags
|
||||||
|
if ($testItem->{'dc'}{'creator'}) {
|
||||||
|
$testItem->{'itemAuthor'} = $testItem->{'dc'}{'creator'};
|
||||||
|
} elsif ($testItem->{'author'}) {
|
||||||
|
$testItem->{'itemAuthor'} = $testItem->{'author'}
|
||||||
|
} else {
|
||||||
|
$testItem->{'itemAuthor'} = $rss1->{'channel'}{'title'};
|
||||||
|
} # if author
|
||||||
|
|
||||||
# Clean up some of the artifacts in the RSS feed 'description' section
|
# Clean up some of the artifacts in the RSS feed 'description' section
|
||||||
$testItem->{'description'} =~ s/\n\s*/\n/g; # get rid of excess white space
|
$testItem->{'description'} =~ s/\n\s*/\n/g; # get rid of excess white space
|
||||||
$testItem->{'description'} =~ s/(<a(.+?)<\/a>)$//s; # link at end of description
|
$testItem->{'description'} =~ s/(<a(.+?)<\/a>)$//s; # link at end of description
|
||||||
$testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text
|
$testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text
|
||||||
$testItem->{'description'} =~ s/<figure ((.|\n)+?)figure>//sg; #remove "figure" can use pipe to add more
|
$testItem->{'description'} =~ s/<figure ((.|\n)+?)figure>//sg; #remove "figure" can use pipe to add more
|
||||||
$testItem->{'description'} =~ s/<img(.+?)>//sg; # remove IMG tags
|
$testItem->{'description'} =~ s/<img(.+?)>//sg; # remove IMG tags
|
||||||
|
$testItem->{'description'} =~ s/<span ((.|\n)+?)>//sg; #remove "span" tags (mostly blogger)
|
||||||
|
$testItem->{'description'} =~ s/<\/span>//sg; #remove "span" endtags
|
||||||
|
$testItem->{'description'} =~ s/<div class="separator" style(.+?)<\/div>//sg; # remove blogger DIV tags
|
||||||
|
$testItem->{'description'} =~ s/<br(.+?)>/<br>/sg; # remove blogger BR tags
|
||||||
|
$testItem->{'description'} =~ s/(<div><br><\/div>)+/<br>/sg; # remove blogger BR + DIV tags
|
||||||
$testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length
|
$testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length
|
||||||
$testItem->{'description'} =~ s/(<[^<]+)$//s; # link at end of description
|
$testItem->{'description'} =~ s/<\/?div.*?>//sg; # remove div tags
|
||||||
|
$testItem->{'description'} =~ s/(<a[^<]+)$//s; # link at end of description
|
||||||
|
|
||||||
#add continue reading to end.
|
#add continue reading to end.
|
||||||
$testItem->{'description'} .= " <a href=\'" . $testItem->{'link'} . "\' target=\'_blank\'>Continue Reading</a>";
|
$testItem->{'description'} .= " <a href=\'" . $testItem->{'link'} . "\' target=\'_blank\'>Continue Reading</a>";
|
||||||
@ -122,6 +148,14 @@ while ( my $url = <DATA>) {
|
|||||||
#close out the list URL html
|
#close out the list URL html
|
||||||
$listHTML .= "</div></ul>\n";
|
$listHTML .= "</div></ul>\n";
|
||||||
|
|
||||||
|
###################################
|
||||||
|
# Write the error file
|
||||||
|
###################################
|
||||||
|
|
||||||
|
open(FH, '>', $errorFilePath) or die $!;
|
||||||
|
print FH $listURLError;
|
||||||
|
close(FH);
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
# Make an RSS Feed!
|
# Make an RSS Feed!
|
||||||
###################################
|
###################################
|
||||||
|
50
smhnRSS.pl
50
smhnRSS.pl
@ -10,7 +10,7 @@ use warnings;
|
|||||||
# The Sunday Morning Horror News
|
# The Sunday Morning Horror News
|
||||||
#
|
#
|
||||||
# This script pulls from a list of RSS feeds and agregates them together into a web page.
|
# This script pulls from a list of RSS feeds and agregates them together into a web page.
|
||||||
# It is designed to run as a cron and overright the HTML file.
|
# It is designed to run as a cron and overwrite the HTML file.
|
||||||
#
|
#
|
||||||
# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
|
# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
|
||||||
# Code repository: https://code.jacobhaddon.com/jake/smhn
|
# Code repository: https://code.jacobhaddon.com/jake/smhn
|
||||||
@ -21,6 +21,7 @@ use warnings;
|
|||||||
# Packages
|
# Packages
|
||||||
|
|
||||||
use Time::Piece; # https://perldoc.perl.org/Time::Piece
|
use Time::Piece; # https://perldoc.perl.org/Time::Piece
|
||||||
|
use Time::Seconds; # https://perldoc.perl.org/Time::Seconds
|
||||||
use LWP::Simple; # https://metacpan.org/pod/LWP::Simple
|
use LWP::Simple; # https://metacpan.org/pod/LWP::Simple
|
||||||
use XML::RSS; # https://metacpan.org/pod/XML::RSS
|
use XML::RSS; # https://metacpan.org/pod/XML::RSS
|
||||||
use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
|
use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
|
||||||
@ -28,10 +29,12 @@ use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
|
|||||||
# server file folders
|
# server file folders
|
||||||
# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
|
# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
|
||||||
# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
|
# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
|
||||||
|
# my $errorFilePath = "/home/USER_FOLDER/public_html/feed.log";
|
||||||
|
|
||||||
# local file folders
|
# local file folders
|
||||||
my $rssFilePath = "feed.xml";
|
my $rssFilePath = "feed.xml";
|
||||||
my $htmlFilePath = "index.html";
|
my $htmlFilePath = "index.html";
|
||||||
|
my $errorFilePath = "feed.log";
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
# RSS Configurations
|
# RSS Configurations
|
||||||
@ -50,9 +53,12 @@ my $copyright = 'Copyright respective writers';
|
|||||||
# add to new RSS feed object
|
# add to new RSS feed object
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
|
# number of weeks in the past to hold RSS feed
|
||||||
|
my $num_weeks = 2;
|
||||||
|
|
||||||
# get today, subtact time to make cut off
|
# get today, subtact time to make cut off
|
||||||
my $now = localtime;
|
my $now = localtime;
|
||||||
my $then = $now->add_months(-2);
|
my $then = $now - (ONE_WEEK * $num_weeks);
|
||||||
|
|
||||||
#number of items to keep from each feed
|
#number of items to keep from each feed
|
||||||
my $number_of_items = 2; # +1 since everything starts at 0
|
my $number_of_items = 2; # +1 since everything starts at 0
|
||||||
@ -63,6 +69,9 @@ my %list;
|
|||||||
# Make the list of URLS while parsing DATA
|
# Make the list of URLS while parsing DATA
|
||||||
my $listHTML = "<div class=\'listHTML\'><ul>\n";
|
my $listHTML = "<div class=\'listHTML\'><ul>\n";
|
||||||
|
|
||||||
|
# Make a list of URLs that have an error
|
||||||
|
my $listURLError = "The following feeds had issues this time:\n\n" . $now->strftime('%a, %d %b %Y %H:%M:%S %z'). "\n\n";
|
||||||
|
|
||||||
# Go through each URL in the DATA section and make the new list
|
# Go through each URL in the DATA section and make the new list
|
||||||
while ( my $url = <DATA>) {
|
while ( my $url = <DATA>) {
|
||||||
chomp $url;
|
chomp $url;
|
||||||
@ -76,7 +85,12 @@ while ( my $url = <DATA>) {
|
|||||||
# parse the XML
|
# parse the XML
|
||||||
my $rss1 = XML::RSS->new;
|
my $rss1 = XML::RSS->new;
|
||||||
eval { $rss1->parse( $xml ) };
|
eval { $rss1->parse( $xml ) };
|
||||||
next if $@;
|
|
||||||
|
# if empty, add URL to log file variable
|
||||||
|
if ($@) {
|
||||||
|
$listURLError .= "* " . $url . "\n";
|
||||||
|
next;
|
||||||
|
}; # if $@
|
||||||
|
|
||||||
# go through the items from the XML
|
# go through the items from the XML
|
||||||
for (my $j = 0; $j <= $number_of_items; $j++){
|
for (my $j = 0; $j <= $number_of_items; $j++){
|
||||||
@ -138,6 +152,14 @@ while ( my $url = <DATA>) {
|
|||||||
#close out the list URL html
|
#close out the list URL html
|
||||||
$listHTML .= "</ul></div>\n";
|
$listHTML .= "</ul></div>\n";
|
||||||
|
|
||||||
|
###################################
|
||||||
|
# Write the error file
|
||||||
|
###################################
|
||||||
|
|
||||||
|
open(FH, '>', $errorFilePath) or die $!;
|
||||||
|
print FH $listURLError;
|
||||||
|
close(FH);
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
# Make an RSS Feed!
|
# Make an RSS Feed!
|
||||||
###################################
|
###################################
|
||||||
@ -204,6 +226,9 @@ my $printDate = formatDate($rss2->{'channel'}{'pubDate'});
|
|||||||
# header for a direct HTML post
|
# header for a direct HTML post
|
||||||
my $html_header = "Status: 200\nContent-type: text/html\n\n";
|
my $html_header = "Status: 200\nContent-type: text/html\n\n";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
# Make the HTML Page
|
# Make the HTML Page
|
||||||
###################################
|
###################################
|
||||||
@ -353,7 +378,7 @@ http://fiendlover.blogspot.com/feeds/posts/default
|
|||||||
http://jacobhaddon.com/feed/
|
http://jacobhaddon.com/feed/
|
||||||
http://apokrupha.com/feed/
|
http://apokrupha.com/feed/
|
||||||
https://ellendatlow.com/feed/
|
https://ellendatlow.com/feed/
|
||||||
https://paulaguran.com/
|
https://paulaguran.com/feed/
|
||||||
https://amandaheadlee.com/feed/
|
https://amandaheadlee.com/feed/
|
||||||
https://theimbloglio.wordpress.com/feed/
|
https://theimbloglio.wordpress.com/feed/
|
||||||
https://kennethwcain.com/feed/
|
https://kennethwcain.com/feed/
|
||||||
@ -364,4 +389,19 @@ https://weightlessbooks.com/feed/
|
|||||||
https://www.crystallakepub.com/feed/
|
https://www.crystallakepub.com/feed/
|
||||||
https://lynnehansen.zenfolio.com/blog.rss
|
https://lynnehansen.zenfolio.com/blog.rss
|
||||||
https://www.bevvincent.com/feed/
|
https://www.bevvincent.com/feed/
|
||||||
http://liviallewellyn.com/feed/
|
http://liviallewellyn.com/feed/
|
||||||
|
https://www.kristidemeester.com/blog-feed.xml
|
||||||
|
https://www.lucysnyder.com/index.php/feed/
|
||||||
|
https://www.emilyruthverona.com/blog-feed.xml
|
||||||
|
https://www.elizabethhand.com/welcome?format=rss
|
||||||
|
https://www.jamielackey.com/feed/
|
||||||
|
https://cv-hunt.com/feed/
|
||||||
|
https://authorjenniferallisprovost.com/feed/
|
||||||
|
https://jezzywolfe.wordpress.com/feed/
|
||||||
|
https://lmariewood.com/feed/
|
||||||
|
https://www.leemurray.info/blog-feed.xml
|
||||||
|
https://meghanarcuri.com/feed/
|
||||||
|
https://nicolecushing.wordpress.com/feed/
|
||||||
|
https://saratantlinger.com/feed/
|
||||||
|
https://sunnymoraine.com/feed/
|
||||||
|
https://lauramauro.com/feed/
|
Loading…
x
Reference in New Issue
Block a user