Browse Source

change parent tag and fix typo

master
Andreas Demmelbauer 5 years ago
parent
commit
58dceb1eaa
1 changed files with 19 additions and 20 deletions
  1. +19
    -20
      feedcake.py

+ 19
- 20
feedcake.py View File

@@ -96,7 +96,7 @@ def process_feed(feed_url, output_filename):

online_soup = BeautifulSoup(r.text, 'html.parser')

content_soup = BeautifulSoup('<article></article>', 'html.parser')
content_soup = BeautifulSoup('<div></div>', 'html.parser')

# Remove all Comments
for element in online_soup(text=lambda text: isinstance(text, Comment)):
@@ -108,41 +108,41 @@ def process_feed(feed_url, output_filename):
if entry.link.startswith('https://or'+'f.a'+'t/sto'+'ries'):
if entry.date:
article_time = content_soup.new_tag('time', datetime=entry.date)
content_soup.article.append(article_time)
content_soup.div.append(article_time)
article_headline = online_soup.find('h1', attrs={'class': 'story-lead-headline'})
content_soup.article.append(article_headline)
content_soup.div.append(article_headline)
article_body = online_soup.find('div', attrs={'class': 'story-content'})
content_soup.article.append(article_body)
content_soup.div.append(article_body)
article_link = content_soup.new_tag('a', href=entry.link)
article_link['class'] = 'source';
article_link['class'] = 'source'
article_link.string = 'Quelle (' + entry.link + ')'
content_soup.article.append(article_link)
content_soup.div.append(article_link)

if entry.link.startswith('https://de'+'rst'+'and'+'ard'+'.a'+'t/20'): # url starts with number ... too lazy for regex :)
if entry.published:
article_time = content_soup.new_tag('time', datetime=entry.published)
content_soup.article.append(article_time)
content_soup.div.append(article_time)
article_headline = online_soup.find('h1', attrs={'itemprop': 'headline'})
content_soup.article.append(article_headline)
content_soup.div.append(article_headline)
# images etc
article_aside = online_soup.find('div', id="content-aside")
content_soup.article.append(article_aside)
content_soup.div.append(article_aside)
article_body = online_soup.find('div', attrs={'itemprop': 'articleBody'})
content_soup.article.append(article_body)
article_link = content_soup.new_tag('a', href=entry.link)
article_link['class'] = 'source';
article_link.string = 'Quelle (' + entry.link.split('?')[0] + ')'
content_soup.article.append(article_link)
content_soup.div.append(article_body)

# modify original link -> mobile version and comment section
link_to_comments = re.sub(r'(\/\/)', r'\1mobil.',entry.link.split('?')[0]) + '?_viewMode=forum#'
article_comments_link = content_soup.new_tag('a', href=link_to_comments)
article_comments_link['class'] = 'comments';
article_comments_link.sting = 'Kommentare'
content_soup.article.append(article_comments_link)
article_comments_link['class'] = 'comments'
article_comments_p = content_soup.new_tag('p')
article_comments_link.string = 'Kommentare'
article_comments_p.append(article_comments_link)
content_soup.div.append(article_comments_p)

article_link.string = 'Quelle (' + entry.link.split('?')[0] + ')'
content_soup.article.append(article_link)
article_link = content_soup.new_tag('a', href=entry.link.split('?')[0])
article_link['class'] = 'source'
article_link.string = 'Quelle: ' + entry.link.split('?')[0]
content_soup.div.append(article_link)


# create directory for storing and serving html and images
@@ -150,7 +150,6 @@ def process_feed(feed_url, output_filename):

# download all article images and replace image source
for img in content_soup.findAll('img'):
print(img)
if img.get('data-src'):
old_url = img['data-src']
if not old_url.startswith('data:'):


Loading…
Cancel
Save