diff --git a/feedcake.py b/feedcake.py index 184eb48..4aaf2bd 100644 --- a/feedcake.py +++ b/feedcake.py @@ -96,7 +96,7 @@ def process_feed(feed_url, output_filename): online_soup = BeautifulSoup(r.text, 'html.parser') - content_soup = BeautifulSoup('
', 'html.parser') + content_soup = BeautifulSoup('
', 'html.parser') # Remove all Comments for element in online_soup(text=lambda text: isinstance(text, Comment)): @@ -108,41 +108,41 @@ def process_feed(feed_url, output_filename): if entry.link.startswith('https://or'+'f.a'+'t/sto'+'ries'): if entry.date: article_time = content_soup.new_tag('time', datetime=entry.date) - content_soup.article.append(article_time) + content_soup.div.append(article_time) article_headline = online_soup.find('h1', attrs={'class': 'story-lead-headline'}) - content_soup.article.append(article_headline) + content_soup.div.append(article_headline) article_body = online_soup.find('div', attrs={'class': 'story-content'}) - content_soup.article.append(article_body) + content_soup.div.append(article_body) article_link = content_soup.new_tag('a', href=entry.link) - article_link['class'] = 'source'; + article_link['class'] = 'source' article_link.string = 'Quelle (' + entry.link + ')' - content_soup.article.append(article_link) + content_soup.div.append(article_link) if entry.link.startswith('https://de'+'rst'+'and'+'ard'+'.a'+'t/20'): # url starts with number ... too lazy for regex :) if entry.published: article_time = content_soup.new_tag('time', datetime=entry.published) - content_soup.article.append(article_time) + content_soup.div.append(article_time) article_headline = online_soup.find('h1', attrs={'itemprop': 'headline'}) - content_soup.article.append(article_headline) + content_soup.div.append(article_headline) # images etc article_aside = online_soup.find('div', id="content-aside") - content_soup.article.append(article_aside) + content_soup.div.append(article_aside) article_body = online_soup.find('div', attrs={'itemprop': 'articleBody'}) - content_soup.article.append(article_body) - article_link = content_soup.new_tag('a', href=entry.link) - article_link['class'] = 'source'; - article_link.string = 'Quelle (' + entry.link.split('?')[0] + ')' - content_soup.article.append(article_link) + content_soup.div.append(article_body) # modify original link -> mobile version and comment section link_to_comments = re.sub(r'(\/\/)', r'\1mobil.',entry.link.split('?')[0]) + '?_viewMode=forum#' article_comments_link = content_soup.new_tag('a', href=link_to_comments) - article_comments_link['class'] = 'comments'; - article_comments_link.sting = 'Kommentare' - content_soup.article.append(article_comments_link) + article_comments_link['class'] = 'comments' + article_comments_p = content_soup.new_tag('p') + article_comments_link.string = 'Kommentare' + article_comments_p.append(article_comments_link) + content_soup.div.append(article_comments_p) - article_link.string = 'Quelle (' + entry.link.split('?')[0] + ')' - content_soup.article.append(article_link) + article_link = content_soup.new_tag('a', href=entry.link.split('?')[0]) + article_link['class'] = 'source' + article_link.string = 'Quelle: ' + entry.link.split('?')[0] + content_soup.div.append(article_link) # create directory for storing and serving html and images @@ -150,7 +150,6 @@ def process_feed(feed_url, output_filename): # download all article images and replace image source for img in content_soup.findAll('img'): - print(img) if img.get('data-src'): old_url = img['data-src'] if not old_url.startswith('data:'):