|
@@ -118,7 +118,6 @@ def process_feed(feed_url, output_filename): |
|
|
content_soup.article.append(article_link) |
|
|
content_soup.article.append(article_link) |
|
|
|
|
|
|
|
|
if entry.link.startswith('https://de'+'rst'+'and'+'ard'+'.a'+'t/20'): # url starts with number ... too lazy for regex :) |
|
|
if entry.link.startswith('https://de'+'rst'+'and'+'ard'+'.a'+'t/20'): # url starts with number ... too lazy for regex :) |
|
|
print(entry) |
|
|
|
|
|
if entry.published: |
|
|
if entry.published: |
|
|
article_time = content_soup.new_tag('time', datetime=entry.published) |
|
|
article_time = content_soup.new_tag('time', datetime=entry.published) |
|
|
content_soup.article.append(article_time) |
|
|
content_soup.article.append(article_time) |
|
@@ -153,21 +152,18 @@ def process_feed(feed_url, output_filename): |
|
|
print(img) |
|
|
print(img) |
|
|
if img.get('data-src'): |
|
|
if img.get('data-src'): |
|
|
old_url = img['data-src'] |
|
|
old_url = img['data-src'] |
|
|
print(old_url) |
|
|
|
|
|
if not old_url.startswith('data:'): |
|
|
if not old_url.startswith('data:'): |
|
|
new_filename = filename_from_url(old_url) |
|
|
new_filename = filename_from_url(old_url) |
|
|
img['data-src'] = base_url + '/' + entry_dir + '/' + new_filename |
|
|
img['data-src'] = base_url + '/' + entry_dir + '/' + new_filename |
|
|
download_image(old_url, entry_dir, new_filename) |
|
|
download_image(old_url, entry_dir, new_filename) |
|
|
if img.get('src'): |
|
|
if img.get('src'): |
|
|
old_url = img['src'] |
|
|
old_url = img['src'] |
|
|
print(old_url) |
|
|
|
|
|
if not old_url.startswith('data:'): |
|
|
if not old_url.startswith('data:'): |
|
|
new_filename = filename_from_url(old_url) |
|
|
new_filename = filename_from_url(old_url) |
|
|
img['src'] = base_url + '/' + entry_dir + '/' + new_filename |
|
|
img['src'] = base_url + '/' + entry_dir + '/' + new_filename |
|
|
download_image(old_url, entry_dir, new_filename) |
|
|
download_image(old_url, entry_dir, new_filename) |
|
|
if img.get('data-srcset'): |
|
|
if img.get('data-srcset'): |
|
|
srcset = img['data-srcset'].split(', ') |
|
|
srcset = img['data-srcset'].split(', ') |
|
|
print(old_url) |
|
|
|
|
|
new_srcset = [] |
|
|
new_srcset = [] |
|
|
for src in srcset: |
|
|
for src in srcset: |
|
|
old_url = src.split(' ')[0] |
|
|
old_url = src.split(' ')[0] |
|
|