From 9340ae676104a2df7dd4e0f896ef1cc83461f49a Mon Sep 17 00:00:00 2001
From: Jacob Haddon <jhaddon@apokrupha.com>
Date: Wed, 14 Feb 2024 09:59:45 -0500
Subject: [PATCH 1/4] fixed CSS for extra long URLs in the list, so they break
 on the phone view.

---
 smhnRSS.pl | 341 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 341 insertions(+)
 create mode 100755 smhnRSS.pl
diff --git a/smhnRSS.pl b/smhnRSS.pl
new file mode 100755
index 0000000..902602d
--- /dev/null
+++ b/smhnRSS.pl
@@ -0,0 +1,341 @@
+#!/usr/bin/perl
+use cPanelUserConfig; #for cpanel servers
+
+use 5.010;
+use strict;
+use warnings;
+
+###################################
+#
+# The Sunday Morning Horror News
+# 
+# This script pulls from a list of RSS feeds and agregates them together into a web page. 
+# It is designed to run as a cron and overright the HTML file.
+# 
+# license GPLv3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
+# Code repository: https://code.jacobhaddon.com/jake/smhn
+# Written by Jacob Haddon https://jacobhaddon.com
+#
+###################################
+
+# Packages 
+
+use Time::Piece; # https://perldoc.perl.org/Time::Piece
+use LWP::Simple; # https://metacpan.org/pod/LWP::Simple
+use XML::RSS; # https://metacpan.org/pod/XML::RSS
+use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
+
+# server file folders
+# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
+# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
+
+# local file folders
+my $rssFilePath = "feed.xml";
+my $htmlFilePath = "index.html";
+
+###################################
+#  RSS Configurations 
+###################################
+
+my $title = "Sunday Morning Horror News";
+my $homeLink = "http://sundaymorninghorrornews.com";
+my $feedLink = "http://sundaymorninghorrornews.com/feed.xml";
+my $description = 'A collection of horror writer, editor, poet and publisher blogs from all over the web!';
+my $webmaster = 'webmaster@sundaymorninghorrornews.com';
+my $copyright = 'Copyright respective writers';
+
+###################################
+# Go through list of URLs, get RSS feed, 
+# take newest 3 that are less than $then old, 
+# add to new RSS feed object
+###################################
+
+# get today, subtact time to make cut off
+my $now = localtime;
+my $then = $now->add_months(-2);
+
+#number of items to keep from each feed
+my $number_of_items = 2; # +1 since everything starts at 0
+
+#list to hold the new RSS items
+my %list; 
+
+# Make the list of URLS while parsing DATA
+my $listHTML = "<div class=\'listHTML\'><ul>\n";
+
+while ( my $url = <DATA>) {
+    chomp $url;
+    
+    # get the XML from the URL
+	my $ua = LWP::UserAgent->new;
+	$ua->default_header('User-Agent' => 'Mozilla/5.0');
+	my $resp = $ua->get($url) or next;
+	my $xml = $resp->decoded_content;
+	
+	# parse the XML
+    my $rss1 = XML::RSS->new;
+    eval { $rss1->parse( $xml ) };
+    next if $@;
+    
+    # go through the items from the XML
+    for (my $j = 0; $j <= $number_of_items; $j++){
+		
+		my $testItem =  @{ $rss1->{'items'}}[$j];
+	
+		# Get the pub date of the article
+		my $testDate = $testItem->{'pubDate'};
+	
+		# Strip out the milliseconds 
+		$testDate =~ s/((\+|\-)(\d\d\d\d))//; 
+	
+		# Convert to a TIME object
+		my $t = Time::Piece->strptime($testDate);
+
+		# If the post is older than one year, do not include it
+		if ($t->epoch > $then->epoch) {		
+			$testItem->{'feedURL'} = $_;
+			$testItem->{'feedName'} = $rss1->{'channel'}{'title'};
+			
+			# Clean up some of the artifacts in the RSS feed 'description' section
+			$testItem->{'description'} =~ s/\n\s*/\n/g; # get rid of excess white space
+			$testItem->{'description'} =~ s/(<a(.+?)<\/a>)$//s; # link at end of description
+			$testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text
+			$testItem->{'description'} =~ s/<figure ((.|\n)+?)figure>//sg; #remove "figure" can use pipe to add more
+			$testItem->{'description'} =~ s/<img(.+?)>//sg; # remove IMG tags
+			$testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length 
+			$testItem->{'description'} =~ s/(<[^<]+)$//s; # link at end of description
+			
+			#add continue reading to end. 
+			$testItem->{'description'} .= " <a href=\'" . $testItem->{'link'} . "\' target=\'_blank\'>Continue Reading</a>"; 
+			
+			# Set Epoch as KEY so easier to arrange in order
+			$list{$t->epoch} = $testItem;
+		} #if epoch
+
+    } # for i loop
+    
+    # add to the list of URL HTML 
+	$listHTML .= "<li>" . $url . "</li>\n";
+
+} # while DATA
+
+#close out the list URL html
+$listHTML .= "</div></ul>\n";
+
+###################################
+#  Make an RSS Feed! 
+###################################
+
+# date format: Thu, 28 Dec 2023 03:51:42
+# $now->strftime("%a, %d %b %Y %H:%M:%S %z");
+
+my $rss2 = XML::RSS->new (version => '2.0');
+$rss2->add_module(prefix => 'atom', uri => 'http://www.w3.org/2005/Atom');
+
+$rss2->channel(title          => $title,
+               link           => $homeLink,
+               language       => 'en-US',
+               description    => $description,
+               copyright      => $copyright,
+               pubDate        => $now->strftime('%a, %d %b %Y %H:%M:%S %z'),
+               lastBuildDate  => $now->strftime('%a, %d %b %Y %H:%M:%S %z'),
+               webMaster      => $webmaster,
+               atom           => { 'link' => { 'href' => $feedLink, 'rel' => 'self', 'type' => 'application/rss+xml' } }
+               ); # $rss->channel
+               
+# foreach ITEM, newest (highest EPOCH) first
+foreach my $name (reverse sort keys %list) {
+
+    $rss2->add_item(title => $list{$name}->{'title'},
+        	permaLink  => $list{$name}->{'link'},
+        	link  => $list{$name}->{'link'},
+        	description => $list{$name}->{'description'},
+        	pubDate => $list{$name}->{'pubDate'},
+        	author => $list{$name}->{'dc'}{'creator'},
+        	source => $list{$name}->{'feedName'},
+        	sourceUrl => $list{$name}->{'feedURL'},
+       ); # $rss->channel
+       
+} # foreach
+
+# Save the RSS feed as a file    
+$rss2->save($rssFilePath);
+
+###################################
+#   Format the RSS data for HTML
+###################################
+
+# make the HTML for the processed RSS 
+my $rssHTML = "<div class=\'rssHTML'>\n";
+
+# print the title and link of each RSS item
+	foreach my $item (@{$rss2->{'items'}}) {
+	
+		$rssHTML .= "<h2 class=\'headline\'><a href='" . $item->{'link'} . "' target=\'_blank\'>" . encode_entities($item->{'title'}) . "</a></h2>\n\n"; 
+		$rssHTML .= "<div class=\'byline\'>" . encode_entities($item->{'author'}) . " - " . formatDate($item->{'pubDate'}) ."</div>\n\n";
+		$rssHTML .= "<div class=\'description\'>" . $item->{'description'} . "</div>\n\n";
+
+	} # foreach item 
+
+# close out the rssHTML
+$rssHTML .= "</div>\n\n";
+
+# the webpage HTML 
+
+# format the pubDate
+my $printDate = formatDate($rss2->{'channel'}{'pubDate'});
+
+# header for a direct HTML post 
+my $html_header = "Status: 200\nContent-type: text/html\n\n";
+
+###################################
+#   Make the HTML Page
+###################################
+ 
+my $html = <<"HTML_END";
+<!DOCTYPE html>
+<html>
+
+<head>
+	<meta charset="utf-8">
+	<meta name="viewport" content="width=device-width, initial-scale=1">
+	<title>The Sunday Morning Horror News</title>
+	<style>
+		body{
+			margin:40px auto;
+			max-width:650px;
+			line-height:1.6;
+			font-size:18px;
+			font-family: Baskerville, TimesNewRoman, Times New Roman, Times, Georgia, serif;
+			padding:0 10px:
+			word-break: break-word;
+		}
+		h1, h2, h3 {font-weight: 200;}
+		hr{width:50%;}
+		.byline{font-style:italic;}
+		nav{
+			font-size:20px;
+			text-align: center;
+		}
+		a {text-decoration:none;}
+		a:hover {text-decoration:underline;}
+		
+		/* Dark Mode Colors */
+		\@media screen and (prefers-color-scheme: dark) {
+			body{ 
+				color: white;
+				background-color: black;
+			}
+			a {color:orange;}
+			a:visited{color:yellow;}
+
+		} /* media screen */
+	</style>
+</head>
+
+<body>
+
+<header>
+
+	<h1>$title</h1>
+	<p>$description</p>
+	<p>This site is updated every Sunday morning</p>
+	<p>updated: $printDate</p>
+
+</header>
+
+<nav>
+<a href="#news">News</a> - <a href="#about">About</a> - <a href="#contact">Contact</a> - <a href="#list">List</a> - <a href="$feedLink">RSS Feed</a>
+</nav>
+
+<section id="news">
+	<h1>The News!</h1>
+	
+	$rssHTML
+
+</section>
+
+<section id="about">
+	<h1>About $title</h1>
+	<p>The $title is an aggregator of RSS feeds from writers, poets and publishers.</p>
+	<p>Too often the world is flying by at the speed of social media. $title is designed to update just once a week (on Sunday!) so you can take your time and catch up on the news without having to scroll.</p>
+	
+</section>
+
+<section id="contact">
+	<h1>Contact</h1>
+	<p>Contact $title at: $webmaster 
+	<p>We are intolerant of intolerance. If you see something hateful, transphobic, homophobic, racist, or the like, let us know.</p>
+	<p>We don't publish the content you see here, just collect it, but we don't need to collect trash.</p>
+</section>
+
+<section id="list">
+	<h1>The List!</h1>
+	<p>This is the list of the feeds that we are checking. If you have an RSS reader, grab them and follow along!</p>
+	
+	$listHTML
+	
+</section>
+
+<hr>
+
+<footer>
+	<p>This news feed made with <a href="https://www.perl.org/">Perl</a> and <a href="https://www.barebones.com/products/bbedit/">BBEdit</a>. Best viewable in Netscape Navigator 4 or higher</p>
+	<p>Open web features like RSS make sites like this possible. Support the open web!</p>
+	<p>this is a <a href="http://motherfuckingwebsite.com/">motherfucking website</a> and a <a href="http://bettermotherfuckingwebsite.com/">better motherfucking</a> website.</p>
+	<p><a href="https://code.jacobhaddon.com/jake/smhn">Code</a> by Jacob Haddon - license <a href="https://www.gnu.org/licenses/gpl-3.0.en.html">GPLv3.0</a> - <a href="https://Apokrupha.com">Apokrupha.com</a></p>
+</footer>
+
+<hr>
+
+</body>
+</html> 
+
+HTML_END
+
+# write the file 
+open(FH, '>', $htmlFilePath) or die $!;
+print FH $html;
+close(FH);
+
+# print $html_header . $html;
+
+###################################
+#  Functions 
+###################################
+
+sub formatDate {
+	my $testDate = $_[0];
+	$testDate =~ s/((\+|\-)(\d\d\d\d))//; 
+	# Convert to a TIME object
+	my $t = Time::Piece->strptime($testDate);
+	return($t->strftime('%a, %d %b %Y %H:%M:%S'));
+}
+
+# FIN
+
+###################################
+#  DATA is list of the feed URLs
+###################################
+
+__DATA__
+https://ecatherine.com/feed/
+https://redlagoe.com/feed/
+https://diebooth.wordpress.com/feed/
+https://kelliowen.com/feed/
+https://mercedesmyardley.com/feed/
+https://mehitobel.com/feed/
+http://antoncancre.blogspot.com/feeds/posts/default?alt=rss
+https://www.toddkeisling.com/news?format=rss
+https://www.jfgonzalez.org/news?format=rss
+https://www.briankeene.com/news?format=rss
+https://ghoulish.rip/feed/
+https://www.cemeterydance.com/extras/feed/
+https://www.gwendolynkiste.com/Blog/feed/
+https://katherinesilvaauthor.substack.com/feed
+https://queenofswordspress.com/feed/
+http://fiendlover.blogspot.com/feeds/posts/default
+http://jacobhaddon.com/feed/
+http://apokrupha.com/feed/
+https://ellendatlow.com/feed/
+https://paulaguran.com/
\ No newline at end of file

From cf59c9248d67f36d6616a9fdfde888f89e36c21d Mon Sep 17 00:00:00 2001
From: Jacob Haddon <jhaddon@apokrupha.com>
Date: Sat, 17 Feb 2024 16:29:30 -0500
Subject: [PATCH 2/4] fixed backwards DIV UL tags at the end of the RSS feed
 list

---
 smhnRSS.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/smhnRSS.pl b/smhnRSS.pl
index 902602d..c11f5a5 100755
--- a/smhnRSS.pl
+++ b/smhnRSS.pl
@@ -120,7 +120,7 @@ while ( my $url = <DATA>) {
 } # while DATA
 
 #close out the list URL html
-$listHTML .= "</div></ul>\n";
+$listHTML .= "</ul></div>\n";
 
 ###################################
 #  Make an RSS Feed! 

From 38400c7b82cd8aed3b07228d1afd249a0ff936cf Mon Sep 17 00:00:00 2001
From: Jacob Haddon <jhaddon@apokrupha.com>
Date: Tue, 20 Feb 2024 20:07:47 -0500
Subject: [PATCH 3/4] Adds a check about the item author and a backup if one is
 not there. Tidied up some CSS

---
 smhnRSS.pl | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/smhnRSS.pl b/smhnRSS.pl
index c11f5a5..8e9eb04 100755
--- a/smhnRSS.pl
+++ b/smhnRSS.pl
@@ -1,5 +1,5 @@
 #!/usr/bin/perl
-use cPanelUserConfig; #for cpanel servers
+# use cPanelUserConfig; #for cpanel servers
 
 use 5.010;
 use strict;
@@ -63,6 +63,7 @@ my %list;
 # Make the list of URLS while parsing DATA
 my $listHTML = "<div class=\'listHTML\'><ul>\n";
 
+# Go through each URL in the DATA section and make the new list
 while ( my $url = <DATA>) {
     chomp $url;
     
@@ -96,6 +97,15 @@ while ( my $url = <DATA>) {
 			$testItem->{'feedURL'} = $_;
 			$testItem->{'feedName'} = $rss1->{'channel'}{'title'};
 			
+			# Find Author tags 
+			if ($testItem->{'dc'}{'creator'}) {
+				$testItem->{'itemAuthor'} = $testItem->{'dc'}{'creator'};
+			} elsif ($testItem->{'author'}) {
+				$testItem->{'itemAuthor'} = $testItem->{'author'}
+			} else {
+				$testItem->{'itemAuthor'} = $rss1->{'channel'}{'title'};
+			} # if author  
+						
 			# Clean up some of the artifacts in the RSS feed 'description' section
 			$testItem->{'description'} =~ s/\n\s*/\n/g; # get rid of excess white space
 			$testItem->{'description'} =~ s/(<a(.+?)<\/a>)$//s; # link at end of description
@@ -151,7 +161,7 @@ foreach my $name (reverse sort keys %list) {
         	link  => $list{$name}->{'link'},
         	description => $list{$name}->{'description'},
         	pubDate => $list{$name}->{'pubDate'},
-        	author => $list{$name}->{'dc'}{'creator'},
+        	author => $list{$name}->{'itemAuthor'},
         	source => $list{$name}->{'feedName'},
         	sourceUrl => $list{$name}->{'feedURL'},
        ); # $rss->channel
@@ -204,9 +214,7 @@ my $html = <<"HTML_END";
 		body{
 			margin:40px auto;
 			max-width:650px;
-			line-height:1.6;
-			font-size:18px;
-			font-family: Baskerville, TimesNewRoman, Times New Roman, Times, Georgia, serif;
+			font: 18px/1.6 baskerville, palatino, 'palatino linotype', georgia,serif;
 			padding:0 10px:
 			word-break: break-word;
 		}
@@ -239,7 +247,7 @@ my $html = <<"HTML_END";
 
 	<h1>$title</h1>
 	<p>$description</p>
-	<p>This site is updated every Sunday morning</p>
+	<p>This site is in BETA and will update daily as we test things out and add URLs. Let us know if there are sites that should be included!</p>
 	<p>updated: $printDate</p>
 
 </header>
@@ -305,12 +313,13 @@ close(FH);
 ###################################
 
 sub formatDate {
+	# takes a date, and converts it to a formatted string
 	my $testDate = $_[0];
 	$testDate =~ s/((\+|\-)(\d\d\d\d))//; 
 	# Convert to a TIME object
 	my $t = Time::Piece->strptime($testDate);
 	return($t->strftime('%a, %d %b %Y %H:%M:%S'));
-}
+} # sub formatDate
 
 # FIN
 
@@ -326,10 +335,10 @@ https://kelliowen.com/feed/
 https://mercedesmyardley.com/feed/
 https://mehitobel.com/feed/
 http://antoncancre.blogspot.com/feeds/posts/default?alt=rss
+https://marysangi.wordpress.com/feed/
 https://www.toddkeisling.com/news?format=rss
 https://www.jfgonzalez.org/news?format=rss
 https://www.briankeene.com/news?format=rss
-https://ghoulish.rip/feed/
 https://www.cemeterydance.com/extras/feed/
 https://www.gwendolynkiste.com/Blog/feed/
 https://katherinesilvaauthor.substack.com/feed
@@ -338,4 +347,14 @@ http://fiendlover.blogspot.com/feeds/posts/default
 http://jacobhaddon.com/feed/
 http://apokrupha.com/feed/
 https://ellendatlow.com/feed/
-https://paulaguran.com/
\ No newline at end of file
+https://paulaguran.com/
+https://amandaheadlee.com/feed/
+https://theimbloglio.wordpress.com/feed/
+https://kennethwcain.com/feed/
+https://wellwortharead.blogspot.com/feeds/posts/default?alt=rss
+http://robertfordauthor.com/feed/
+https://tinyfrights.com/feed/
+https://weightlessbooks.com/feed/
+https://www.crystallakepub.com/feed/
+https://lynnehansen.zenfolio.com/blog.rss
+https://www.bevvincent.com/feed/
\ No newline at end of file

From d30f85b57b8a64e5d004eb06f18ec8989fe2cce3 Mon Sep 17 00:00:00 2001
From: Jacob Haddon <jhaddon@apokrupha.com>
Date: Tue, 20 Feb 2024 22:04:12 -0500
Subject: [PATCH 4/4] added more filters for description, these target the mess
 Blogger sends over, but seem to help others.

---
 smhnRSS.pl | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/smhnRSS.pl b/smhnRSS.pl
index 8e9eb04..e7ef79a 100755
--- a/smhnRSS.pl
+++ b/smhnRSS.pl
@@ -112,8 +112,14 @@ while ( my $url = <DATA>) {
 			$testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text
 			$testItem->{'description'} =~ s/<figure ((.|\n)+?)figure>//sg; #remove "figure" can use pipe to add more
 			$testItem->{'description'} =~ s/<img(.+?)>//sg; # remove IMG tags
+			$testItem->{'description'} =~ s/<span ((.|\n)+?)>//sg; #remove "span" tags (mostly blogger)
+			$testItem->{'description'} =~ s/<\/span>//sg; #remove "span" endtags 
+			$testItem->{'description'} =~ s/<div class="separator" style(.+?)<\/div>//sg; # remove blogger DIV tags
+			$testItem->{'description'} =~ s/<br(.+?)>/<br>/sg; # remove blogger BR tags
+			$testItem->{'description'} =~ s/(<div><br><\/div>)+/<br>/sg; # remove blogger BR + DIV tags
 			$testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length 
-			$testItem->{'description'} =~ s/(<[^<]+)$//s; # link at end of description
+			$testItem->{'description'} =~ s/<\/?div.*?>//sg; # remove div tags
+			$testItem->{'description'} =~ s/(<a[^<]+)$//s; # link at end of description
 			
 			#add continue reading to end. 
 			$testItem->{'description'} .= " <a href=\'" . $testItem->{'link'} . "\' target=\'_blank\'>Continue Reading</a>"; 
@@ -357,4 +363,5 @@ https://tinyfrights.com/feed/
 https://weightlessbooks.com/feed/
 https://www.crystallakepub.com/feed/
 https://lynnehansen.zenfolio.com/blog.rss
-https://www.bevvincent.com/feed/
\ No newline at end of file
+https://www.bevvincent.com/feed/
+http://liviallewellyn.com/feed/
\ No newline at end of file