|
|
@@ -28,7 +28,6 @@ except: |
|
|
|
print("ERROR: Config file not found or invalid!") |
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
print(filedir) |
|
|
|
public_path = filedir + '/public' |
|
|
|
assets_path = public_path + '/assets' |
|
|
|
feeds_path = public_path + '/feeds' |
|
|
@@ -79,7 +78,11 @@ def download_image(url, entry_dir, filename): |
|
|
|
for chunk in response.iter_content(1024): |
|
|
|
f.write(chunk) |
|
|
|
|
|
|
|
def process_feed(feed_url, output_filename): |
|
|
|
def process_feed(obj): |
|
|
|
feed_url = obj['source'] |
|
|
|
output_filename = obj['destination'] |
|
|
|
|
|
|
|
print('Updating:', obj['destination']) |
|
|
|
|
|
|
|
# Get the feed |
|
|
|
r_feed = requests.get(feed_url, headers=requestheaders) |
|
|
@@ -88,10 +91,10 @@ def process_feed(feed_url, output_filename): |
|
|
|
|
|
|
|
# Store data of new articles |
|
|
|
for entry in feedparser.parse(r_feed.text).entries: |
|
|
|
print(entry.link) |
|
|
|
entry_dir = get_valid_filename(entry.link) # input e.g. https://orf.at/stories/3117136/ |
|
|
|
entry_path = assets_path + '/'+ entry_dir |
|
|
|
if not os.path.exists(entry_path): |
|
|
|
print('New item: ', entry.link) |
|
|
|
r = requests.get(entry.link.split('?')[0], headers=requestheaders) |
|
|
|
|
|
|
|
online_soup = BeautifulSoup(r.text, 'html.parser') |
|
|
@@ -192,6 +195,14 @@ def process_feed(feed_url, output_filename): |
|
|
|
|
|
|
|
feed_soup = BeautifulSoup(r_feed.text, 'lxml-xml') |
|
|
|
|
|
|
|
# Exclude items |
|
|
|
if obj.get('exclude') and isinstance(obj['exclude'], list): |
|
|
|
for e in feed_soup.findAll('item'): |
|
|
|
matches = [x for x in obj['exclude'] if x.lower() in e.title.text.lower()] |
|
|
|
if len(matches) > 0: |
|
|
|
e.extract() |
|
|
|
print('Exclude: ', e.title.text, '->', matches) |
|
|
|
|
|
|
|
for e in feed_soup.findAll('item'): |
|
|
|
entry_dir = get_valid_filename(e.link.text) |
|
|
|
f_content = open(assets_path + '/' + entry_dir + '/index.html', 'r') |
|
|
@@ -213,4 +224,4 @@ def process_feed(feed_url, output_filename): |
|
|
|
# Let's actually fetch the stuff! |
|
|
|
|
|
|
|
for feed in config['feeds']: |
|
|
|
process_feed(feed['source'], feed['destination']) |
|
|
|
process_feed(feed) |