Compare commits
6 Commits
ecdb015067
...
e8b01b292c
Author | SHA1 | Date | |
---|---|---|---|
|
e8b01b292c | ||
|
211dede75a | ||
|
7f0f20d779 | ||
|
1cb4d8d03d | ||
|
5efc041ed9 | ||
|
889aae9f6a |
15
README.md
15
README.md
@ -10,8 +10,8 @@ The following CPAN modules need to be installed on your server:
|
||||
|
||||
* LWP::Simple
|
||||
* LWP::Protocol::https
|
||||
* XML::RSS;
|
||||
* HTML::Entities;
|
||||
* XML::RSS
|
||||
* HTML::Entities
|
||||
|
||||
# Configuration
|
||||
|
||||
@ -28,10 +28,17 @@ This area is for general information on the site and feed.
|
||||
* feed URL
|
||||
* copyright
|
||||
|
||||
There are two files you have to save, update these with the path of the folders on your server where they will be saved. Examples are given for CPanel type servers:
|
||||
There are three files you have to save, update these with the path of the folders on your server where they will be saved.
|
||||
|
||||
* Feed - the RSS feed generated for the site, should be an xml file
|
||||
* HTML - the site itself, should be an html file
|
||||
* Error Log - this lists URLs that did not work - should be a txt file
|
||||
|
||||
Example paths are given for CPanel type servers:
|
||||
|
||||
* Feed - /home/USER_NAME/public_html/feed.xml
|
||||
* HTML - /home/USER_NAME/public_html/index.xml
|
||||
* HTML - /home/USER_NAME/public_html/index.html
|
||||
* Error Log - /home/USER_FOLDER/public_html/feed.log
|
||||
|
||||
## The Site
|
||||
|
||||
|
40
blankRSS.pl
40
blankRSS.pl
@ -10,7 +10,7 @@ use warnings;
|
||||
# blankRSS.pl
|
||||
#
|
||||
# This script pulls from a list of RSS feeds and agregates them together into a web page.
|
||||
# It is designed to run as a cron and overright the HTML file.
|
||||
# It is designed to run as a cron and overwrite the HTML file.
|
||||
#
|
||||
# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
|
||||
# Code repository: https://code.jacobhaddon.com/jake/smhn
|
||||
@ -28,10 +28,12 @@ use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
|
||||
# server file folders
|
||||
# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
|
||||
# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
|
||||
# my $errorFilePath = "/home/USER_FOLDER/public_html/feed.log";
|
||||
|
||||
# local file folders
|
||||
my $rssFilePath = "feed.xml";
|
||||
my $htmlFilePath = "index.html";
|
||||
my $errorFilePath = "feed.log";
|
||||
|
||||
###################################
|
||||
# RSS Configurations
|
||||
@ -63,6 +65,10 @@ my %list;
|
||||
# Make the list of URLS while parsing DATA
|
||||
my $listHTML = "<div class=\'listHTML\'><ul>\n";
|
||||
|
||||
# Make a list of URLs that have an error
|
||||
my $listURLError = "The following feeds had issues this time:\n\n" . $now->strftime('%a, %d %b %Y %H:%M:%S %z'). "\n\n";
|
||||
|
||||
# Go through each URL in the DATA section and make the new list
|
||||
while ( my $url = <DATA>) {
|
||||
chomp $url;
|
||||
|
||||
@ -75,7 +81,12 @@ while ( my $url = <DATA>) {
|
||||
# parse the XML
|
||||
my $rss1 = XML::RSS->new;
|
||||
eval { $rss1->parse( $xml ) };
|
||||
next if $@;
|
||||
|
||||
# if empty, add URL to log file variable
|
||||
if ($@) {
|
||||
$listURLError .= "* " . $url . "\n";
|
||||
next;
|
||||
}; # if $@
|
||||
|
||||
# go through the items from the XML
|
||||
for (my $j = 0; $j <= $number_of_items; $j++){
|
||||
@ -96,14 +107,29 @@ while ( my $url = <DATA>) {
|
||||
$testItem->{'feedURL'} = $_;
|
||||
$testItem->{'feedName'} = $rss1->{'channel'}{'title'};
|
||||
|
||||
# Find Author tags
|
||||
if ($testItem->{'dc'}{'creator'}) {
|
||||
$testItem->{'itemAuthor'} = $testItem->{'dc'}{'creator'};
|
||||
} elsif ($testItem->{'author'}) {
|
||||
$testItem->{'itemAuthor'} = $testItem->{'author'}
|
||||
} else {
|
||||
$testItem->{'itemAuthor'} = $rss1->{'channel'}{'title'};
|
||||
} # if author
|
||||
|
||||
# Clean up some of the artifacts in the RSS feed 'description' section
|
||||
$testItem->{'description'} =~ s/\n\s*/\n/g; # get rid of excess white space
|
||||
$testItem->{'description'} =~ s/(<a(.+?)<\/a>)$//s; # link at end of description
|
||||
$testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text
|
||||
$testItem->{'description'} =~ s/<figure ((.|\n)+?)figure>//sg; #remove "figure" can use pipe to add more
|
||||
$testItem->{'description'} =~ s/<img(.+?)>//sg; # remove IMG tags
|
||||
$testItem->{'description'} =~ s/<span ((.|\n)+?)>//sg; #remove "span" tags (mostly blogger)
|
||||
$testItem->{'description'} =~ s/<\/span>//sg; #remove "span" endtags
|
||||
$testItem->{'description'} =~ s/<div class="separator" style(.+?)<\/div>//sg; # remove blogger DIV tags
|
||||
$testItem->{'description'} =~ s/<br(.+?)>/<br>/sg; # remove blogger BR tags
|
||||
$testItem->{'description'} =~ s/(<div><br><\/div>)+/<br>/sg; # remove blogger BR + DIV tags
|
||||
$testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length
|
||||
$testItem->{'description'} =~ s/(<[^<]+)$//s; # link at end of description
|
||||
$testItem->{'description'} =~ s/<\/?div.*?>//sg; # remove div tags
|
||||
$testItem->{'description'} =~ s/(<a[^<]+)$//s; # link at end of description
|
||||
|
||||
#add continue reading to end.
|
||||
$testItem->{'description'} .= " <a href=\'" . $testItem->{'link'} . "\' target=\'_blank\'>Continue Reading</a>";
|
||||
@ -122,6 +148,14 @@ while ( my $url = <DATA>) {
|
||||
#close out the list URL html
|
||||
$listHTML .= "</div></ul>\n";
|
||||
|
||||
###################################
|
||||
# Write the error file
|
||||
###################################
|
||||
|
||||
open(FH, '>', $errorFilePath) or die $!;
|
||||
print FH $listURLError;
|
||||
close(FH);
|
||||
|
||||
###################################
|
||||
# Make an RSS Feed!
|
||||
###################################
|
||||
|
50
smhnRSS.pl
50
smhnRSS.pl
@ -10,7 +10,7 @@ use warnings;
|
||||
# The Sunday Morning Horror News
|
||||
#
|
||||
# This script pulls from a list of RSS feeds and agregates them together into a web page.
|
||||
# It is designed to run as a cron and overright the HTML file.
|
||||
# It is designed to run as a cron and overwrite the HTML file.
|
||||
#
|
||||
# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
|
||||
# Code repository: https://code.jacobhaddon.com/jake/smhn
|
||||
@ -21,6 +21,7 @@ use warnings;
|
||||
# Packages
|
||||
|
||||
use Time::Piece; # https://perldoc.perl.org/Time::Piece
|
||||
use Time::Seconds; # https://perldoc.perl.org/Time::Seconds
|
||||
use LWP::Simple; # https://metacpan.org/pod/LWP::Simple
|
||||
use XML::RSS; # https://metacpan.org/pod/XML::RSS
|
||||
use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
|
||||
@ -28,10 +29,12 @@ use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
|
||||
# server file folders
|
||||
# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
|
||||
# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
|
||||
# my $errorFilePath = "/home/USER_FOLDER/public_html/feed.log";
|
||||
|
||||
# local file folders
|
||||
my $rssFilePath = "feed.xml";
|
||||
my $htmlFilePath = "index.html";
|
||||
my $errorFilePath = "feed.log";
|
||||
|
||||
###################################
|
||||
# RSS Configurations
|
||||
@ -50,9 +53,12 @@ my $copyright = 'Copyright respective writers';
|
||||
# add to new RSS feed object
|
||||
###################################
|
||||
|
||||
# number of weeks in the past to hold RSS feed
|
||||
my $num_weeks = 2;
|
||||
|
||||
# get today, subtact time to make cut off
|
||||
my $now = localtime;
|
||||
my $then = $now->add_months(-2);
|
||||
my $then = $now - (ONE_WEEK * $num_weeks);
|
||||
|
||||
#number of items to keep from each feed
|
||||
my $number_of_items = 2; # +1 since everything starts at 0
|
||||
@ -63,6 +69,9 @@ my %list;
|
||||
# Make the list of URLS while parsing DATA
|
||||
my $listHTML = "<div class=\'listHTML\'><ul>\n";
|
||||
|
||||
# Make a list of URLs that have an error
|
||||
my $listURLError = "The following feeds had issues this time:\n\n" . $now->strftime('%a, %d %b %Y %H:%M:%S %z'). "\n\n";
|
||||
|
||||
# Go through each URL in the DATA section and make the new list
|
||||
while ( my $url = <DATA>) {
|
||||
chomp $url;
|
||||
@ -76,7 +85,12 @@ while ( my $url = <DATA>) {
|
||||
# parse the XML
|
||||
my $rss1 = XML::RSS->new;
|
||||
eval { $rss1->parse( $xml ) };
|
||||
next if $@;
|
||||
|
||||
# if empty, add URL to log file variable
|
||||
if ($@) {
|
||||
$listURLError .= "* " . $url . "\n";
|
||||
next;
|
||||
}; # if $@
|
||||
|
||||
# go through the items from the XML
|
||||
for (my $j = 0; $j <= $number_of_items; $j++){
|
||||
@ -138,6 +152,14 @@ while ( my $url = <DATA>) {
|
||||
#close out the list URL html
|
||||
$listHTML .= "</ul></div>\n";
|
||||
|
||||
###################################
|
||||
# Write the error file
|
||||
###################################
|
||||
|
||||
open(FH, '>', $errorFilePath) or die $!;
|
||||
print FH $listURLError;
|
||||
close(FH);
|
||||
|
||||
###################################
|
||||
# Make an RSS Feed!
|
||||
###################################
|
||||
@ -204,6 +226,9 @@ my $printDate = formatDate($rss2->{'channel'}{'pubDate'});
|
||||
# header for a direct HTML post
|
||||
my $html_header = "Status: 200\nContent-type: text/html\n\n";
|
||||
|
||||
|
||||
|
||||
|
||||
###################################
|
||||
# Make the HTML Page
|
||||
###################################
|
||||
@ -353,7 +378,7 @@ http://fiendlover.blogspot.com/feeds/posts/default
|
||||
http://jacobhaddon.com/feed/
|
||||
http://apokrupha.com/feed/
|
||||
https://ellendatlow.com/feed/
|
||||
https://paulaguran.com/
|
||||
https://paulaguran.com/feed/
|
||||
https://amandaheadlee.com/feed/
|
||||
https://theimbloglio.wordpress.com/feed/
|
||||
https://kennethwcain.com/feed/
|
||||
@ -364,4 +389,19 @@ https://weightlessbooks.com/feed/
|
||||
https://www.crystallakepub.com/feed/
|
||||
https://lynnehansen.zenfolio.com/blog.rss
|
||||
https://www.bevvincent.com/feed/
|
||||
http://liviallewellyn.com/feed/
|
||||
http://liviallewellyn.com/feed/
|
||||
https://www.kristidemeester.com/blog-feed.xml
|
||||
https://www.lucysnyder.com/index.php/feed/
|
||||
https://www.emilyruthverona.com/blog-feed.xml
|
||||
https://www.elizabethhand.com/welcome?format=rss
|
||||
https://www.jamielackey.com/feed/
|
||||
https://cv-hunt.com/feed/
|
||||
https://authorjenniferallisprovost.com/feed/
|
||||
https://jezzywolfe.wordpress.com/feed/
|
||||
https://lmariewood.com/feed/
|
||||
https://www.leemurray.info/blog-feed.xml
|
||||
https://meghanarcuri.com/feed/
|
||||
https://nicolecushing.wordpress.com/feed/
|
||||
https://saratantlinger.com/feed/
|
||||
https://sunnymoraine.com/feed/
|
||||
https://lauramauro.com/feed/
|
Loading…
x
Reference in New Issue
Block a user