From 38ccbdf3f2d09a0c74d20e6a53d6ba8dce5a12f1 Mon Sep 17 00:00:00 2001 From: Andreas Demmelbauer Date: Tue, 2 Apr 2019 14:45:54 -0700 Subject: [PATCH] assets get their own independet url (no sub of feeds) --- README.md | 6 ++++-- config.example.json | 2 +- feedcake.py | 14 +++++++++----- public/{ => assets}/.gitignore | 0 public/feeds/.gitignore | 2 ++ 5 files changed, 16 insertions(+), 8 deletions(-) rename public/{ => assets}/.gitignore (100%) create mode 100644 public/feeds/.gitignore diff --git a/README.md b/README.md index de84b7b..18e372c 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,10 @@ news pages * make `cron.sh` executable: `chmod +x cron.sh` * add cronjob for `cron.sh`: `crontab -e` * `*/5 * * * * /absolute/path/to/cron.sh > /path/to/logfile 2>&1` -* setup your webserver: the `base_url` must point to the `public` directory - You should add basic http authentication or at least keep the url private +* setup your webserver: + * let your webserver somehow point to the `feeds` directory. + You should protect the http path with a basic authentication. + * let the `assets_url` specified in the config point to the `assets` directory. * After running the script the first time, your desired feed is available at `base_url/destination` (e.g. `https://yourdomain.tld/some-url/newspaper.xml`) diff --git a/config.example.json b/config.example.json index e3a42c1..06af43e 100644 --- a/config.example.json +++ b/config.example.json @@ -1,5 +1,5 @@ { - "base_url" : "https://yourdomain.tld/some-url", + "assets_url" : "https://yourdomain.tld/some-url", "feeds" : [ { "source" : "https://a.newspaper.tld/news.xml", diff --git a/feedcake.py b/feedcake.py index 55a3aaf..ed9171f 100644 --- a/feedcake.py +++ b/feedcake.py @@ -31,9 +31,10 @@ except: print(filedir) public_path = filedir + '/public' assets_path = public_path + '/assets' +feeds_path = public_path + '/feeds' # e.g. https://example.com/some-string -base_url = config['base_url'] +asset_url = config['asset_url'] @@ -154,13 +155,13 @@ def process_feed(feed_url, output_filename): old_url = img['data-src'] if not old_url.startswith('data:'): new_filename = filename_from_url(old_url) - img['data-src'] = base_url + '/' + entry_dir + '/' + new_filename + img['data-src'] = asset_url + '/' + entry_dir + '/' + new_filename download_image(old_url, entry_dir, new_filename) if img.get('src'): old_url = img['src'] if not old_url.startswith('data:'): new_filename = filename_from_url(old_url) - img['src'] = base_url + '/' + entry_dir + '/' + new_filename + img['src'] = asset_url + '/' + entry_dir + '/' + new_filename download_image(old_url, entry_dir, new_filename) if img.get('data-srcset'): srcset = img['data-srcset'].split(', ') @@ -170,7 +171,7 @@ def process_feed(feed_url, output_filename): src_res = src.split(' ')[1] new_filename = filename_from_url(old_url) download_image(old_url, entry_dir, new_filename) - new_url = base_url + '/' + entry_dir + '/' + new_filename + new_url = asset_url + '/' + entry_dir + '/' + new_filename src = ' '.join([new_url, src_res]) new_srcset.append(src) img['data-srcset'] = ', '.join(new_srcset) @@ -200,7 +201,10 @@ def process_feed(feed_url, output_filename): e.append(content_tag) f_content.close - f = open(public_path + '/' + output_filename, 'w') + # create directory if not present + os.makedirs(feeds_path, exist_ok=True) + + f = open(feeds_path + '/' + output_filename, 'w') f.write(str(feed_soup.prettify())) f.close() diff --git a/public/.gitignore b/public/assets/.gitignore similarity index 100% rename from public/.gitignore rename to public/assets/.gitignore diff --git a/public/feeds/.gitignore b/public/feeds/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/public/feeds/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore