| @@ -96,7 +96,7 @@ def process_feed(feed_url, output_filename): | |||
| online_soup = BeautifulSoup(r.text, 'html.parser') | |||
| content_soup = BeautifulSoup('<article></article>', 'html.parser') | |||
| content_soup = BeautifulSoup('<div></div>', 'html.parser') | |||
| # Remove all Comments | |||
| for element in online_soup(text=lambda text: isinstance(text, Comment)): | |||
| @@ -108,41 +108,41 @@ def process_feed(feed_url, output_filename): | |||
| if entry.link.startswith('https://or'+'f.a'+'t/sto'+'ries'): | |||
| if entry.date: | |||
| article_time = content_soup.new_tag('time', datetime=entry.date) | |||
| content_soup.article.append(article_time) | |||
| content_soup.div.append(article_time) | |||
| article_headline = online_soup.find('h1', attrs={'class': 'story-lead-headline'}) | |||
| content_soup.article.append(article_headline) | |||
| content_soup.div.append(article_headline) | |||
| article_body = online_soup.find('div', attrs={'class': 'story-content'}) | |||
| content_soup.article.append(article_body) | |||
| content_soup.div.append(article_body) | |||
| article_link = content_soup.new_tag('a', href=entry.link) | |||
| article_link['class'] = 'source'; | |||
| article_link['class'] = 'source' | |||
| article_link.string = 'Quelle (' + entry.link + ')' | |||
| content_soup.article.append(article_link) | |||
| content_soup.div.append(article_link) | |||
| if entry.link.startswith('https://de'+'rst'+'and'+'ard'+'.a'+'t/20'): # url starts with number ... too lazy for regex :) | |||
| if entry.published: | |||
| article_time = content_soup.new_tag('time', datetime=entry.published) | |||
| content_soup.article.append(article_time) | |||
| content_soup.div.append(article_time) | |||
| article_headline = online_soup.find('h1', attrs={'itemprop': 'headline'}) | |||
| content_soup.article.append(article_headline) | |||
| content_soup.div.append(article_headline) | |||
| # images etc | |||
| article_aside = online_soup.find('div', id="content-aside") | |||
| content_soup.article.append(article_aside) | |||
| content_soup.div.append(article_aside) | |||
| article_body = online_soup.find('div', attrs={'itemprop': 'articleBody'}) | |||
| content_soup.article.append(article_body) | |||
| article_link = content_soup.new_tag('a', href=entry.link) | |||
| article_link['class'] = 'source'; | |||
| article_link.string = 'Quelle (' + entry.link.split('?')[0] + ')' | |||
| content_soup.article.append(article_link) | |||
| content_soup.div.append(article_body) | |||
| # modify original link -> mobile version and comment section | |||
| link_to_comments = re.sub(r'(\/\/)', r'\1mobil.',entry.link.split('?')[0]) + '?_viewMode=forum#' | |||
| article_comments_link = content_soup.new_tag('a', href=link_to_comments) | |||
| article_comments_link['class'] = 'comments'; | |||
| article_comments_link.sting = 'Kommentare' | |||
| content_soup.article.append(article_comments_link) | |||
| article_comments_link['class'] = 'comments' | |||
| article_comments_p = content_soup.new_tag('p') | |||
| article_comments_link.string = 'Kommentare' | |||
| article_comments_p.append(article_comments_link) | |||
| content_soup.div.append(article_comments_p) | |||
| article_link.string = 'Quelle (' + entry.link.split('?')[0] + ')' | |||
| content_soup.article.append(article_link) | |||
| article_link = content_soup.new_tag('a', href=entry.link.split('?')[0]) | |||
| article_link['class'] = 'source' | |||
| article_link.string = 'Quelle: ' + entry.link.split('?')[0] | |||
| content_soup.div.append(article_link) | |||
| # create directory for storing and serving html and images | |||
| @@ -150,7 +150,6 @@ def process_feed(feed_url, output_filename): | |||
| # download all article images and replace image source | |||
| for img in content_soup.findAll('img'): | |||
| print(img) | |||
| if img.get('data-src'): | |||
| old_url = img['data-src'] | |||
| if not old_url.startswith('data:'): | |||