updated blank with changes from other file
This commit is contained in:
parent
1cb4d8d03d
commit
7f0f20d779
38
blankRSS.pl
38
blankRSS.pl
@ -28,10 +28,12 @@ use HTML::Entities; # https://metacpan.org/pod/HTML::Entities
|
||||
# server file folders
|
||||
# my $rssFilePath = "/home/USER_FOLDER/public_html/feed.xml";
|
||||
# my $htmlFilePath = "/home/USER_FOLDER/public_html/index.html";
|
||||
# my $errorFilePath = "/home/USER_FOLDER/public_html/feed.log";
|
||||
|
||||
# local file folders
|
||||
my $rssFilePath = "feed.xml";
|
||||
my $htmlFilePath = "index.html";
|
||||
my $errorFilePath = "feed.log";
|
||||
|
||||
###################################
|
||||
# RSS Configurations
|
||||
@ -63,6 +65,10 @@ my %list;
|
||||
# Make the list of URLS while parsing DATA
|
||||
my $listHTML = "<div class=\'listHTML\'><ul>\n";
|
||||
|
||||
# Make a list of URLs that have an error
|
||||
my $listURLError = "The following feeds had issues this time:\n\n" . $now->strftime('%a, %d %b %Y %H:%M:%S %z'). "\n\n";
|
||||
|
||||
# Go through each URL in the DATA section and make the new list
|
||||
while ( my $url = <DATA>) {
|
||||
chomp $url;
|
||||
|
||||
@ -75,7 +81,12 @@ while ( my $url = <DATA>) {
|
||||
# parse the XML
|
||||
my $rss1 = XML::RSS->new;
|
||||
eval { $rss1->parse( $xml ) };
|
||||
next if $@;
|
||||
|
||||
# if empty, add URL to log file variable
|
||||
if ($@) {
|
||||
$listURLError .= "* " . $url . "\n";
|
||||
next;
|
||||
}; # if $@
|
||||
|
||||
# go through the items from the XML
|
||||
for (my $j = 0; $j <= $number_of_items; $j++){
|
||||
@ -96,14 +107,29 @@ while ( my $url = <DATA>) {
|
||||
$testItem->{'feedURL'} = $_;
|
||||
$testItem->{'feedName'} = $rss1->{'channel'}{'title'};
|
||||
|
||||
# Find Author tags
|
||||
if ($testItem->{'dc'}{'creator'}) {
|
||||
$testItem->{'itemAuthor'} = $testItem->{'dc'}{'creator'};
|
||||
} elsif ($testItem->{'author'}) {
|
||||
$testItem->{'itemAuthor'} = $testItem->{'author'}
|
||||
} else {
|
||||
$testItem->{'itemAuthor'} = $rss1->{'channel'}{'title'};
|
||||
} # if author
|
||||
|
||||
# Clean up some of the artifacts in the RSS feed 'description' section
|
||||
$testItem->{'description'} =~ s/\n\s*/\n/g; # get rid of excess white space
|
||||
$testItem->{'description'} =~ s/(<a(.+?)<\/a>)$//s; # link at end of description
|
||||
$testItem->{'description'} =~ s/<(\w) class=(.*?)>/<$1>/s; # remove class statements from text
|
||||
$testItem->{'description'} =~ s/<figure ((.|\n)+?)figure>//sg; #remove "figure" can use pipe to add more
|
||||
$testItem->{'description'} =~ s/<img(.+?)>//sg; # remove IMG tags
|
||||
$testItem->{'description'} =~ s/<span ((.|\n)+?)>//sg; #remove "span" tags (mostly blogger)
|
||||
$testItem->{'description'} =~ s/<\/span>//sg; #remove "span" endtags
|
||||
$testItem->{'description'} =~ s/<div class="separator" style(.+?)<\/div>//sg; # remove blogger DIV tags
|
||||
$testItem->{'description'} =~ s/<br(.+?)>/<br>/sg; # remove blogger BR tags
|
||||
$testItem->{'description'} =~ s/(<div><br><\/div>)+/<br>/sg; # remove blogger BR + DIV tags
|
||||
$testItem->{'description'} =~ s/.{1200}\K.*//s; # limit length
|
||||
$testItem->{'description'} =~ s/(<[^<]+)$//s; # link at end of description
|
||||
$testItem->{'description'} =~ s/<\/?div.*?>//sg; # remove div tags
|
||||
$testItem->{'description'} =~ s/(<a[^<]+)$//s; # link at end of description
|
||||
|
||||
#add continue reading to end.
|
||||
$testItem->{'description'} .= " <a href=\'" . $testItem->{'link'} . "\' target=\'_blank\'>Continue Reading</a>";
|
||||
@ -122,6 +148,14 @@ while ( my $url = <DATA>) {
|
||||
#close out the list URL html
|
||||
$listHTML .= "</div></ul>\n";
|
||||
|
||||
###################################
|
||||
# Write the error file
|
||||
###################################
|
||||
|
||||
open(FH, '>', $errorFilePath) or die $!;
|
||||
print FH $listURLError;
|
||||
close(FH);
|
||||
|
||||
###################################
|
||||
# Make an RSS Feed!
|
||||
###################################
|
||||
|
Loading…
x
Reference in New Issue
Block a user