Browse Source

assets get their own independet url (no sub of feeds)

master
Andreas Demmelbauer 5 years ago
parent
commit
38ccbdf3f2
5 changed files with 16 additions and 8 deletions
  1. +4
    -2
      README.md
  2. +1
    -1
      config.example.json
  3. +9
    -5
      feedcake.py
  4. +0
    -0
      public/assets/.gitignore
  5. +2
    -0
      public/feeds/.gitignore

+ 4
- 2
README.md View File

@@ -52,8 +52,10 @@ news pages
* make `cron.sh` executable: `chmod +x cron.sh`
* add cronjob for `cron.sh`: `crontab -e`
* `*/5 * * * * /absolute/path/to/cron.sh > /path/to/logfile 2>&1`
* setup your webserver: the `base_url` must point to the `public` directory
You should add basic http authentication or at least keep the url private
* setup your webserver:
* let your webserver somehow point to the `feeds` directory.
You should protect the http path with a basic authentication.
* let the `assets_url` specified in the config point to the `assets` directory.
* After running the script the first time, your desired feed is available at
`base_url/destination` (e.g. `https://yourdomain.tld/some-url/newspaper.xml`)



+ 1
- 1
config.example.json View File

@@ -1,5 +1,5 @@
{
"base_url" : "https://yourdomain.tld/some-url",
"assets_url" : "https://yourdomain.tld/some-url",
"feeds" : [
{
"source" : "https://a.newspaper.tld/news.xml",


+ 9
- 5
feedcake.py View File

@@ -31,9 +31,10 @@ except:
print(filedir)
public_path = filedir + '/public'
assets_path = public_path + '/assets'
feeds_path = public_path + '/feeds'

# e.g. https://example.com/some-string
base_url = config['base_url']
asset_url = config['asset_url']



@@ -154,13 +155,13 @@ def process_feed(feed_url, output_filename):
old_url = img['data-src']
if not old_url.startswith('data:'):
new_filename = filename_from_url(old_url)
img['data-src'] = base_url + '/' + entry_dir + '/' + new_filename
img['data-src'] = asset_url + '/' + entry_dir + '/' + new_filename
download_image(old_url, entry_dir, new_filename)
if img.get('src'):
old_url = img['src']
if not old_url.startswith('data:'):
new_filename = filename_from_url(old_url)
img['src'] = base_url + '/' + entry_dir + '/' + new_filename
img['src'] = asset_url + '/' + entry_dir + '/' + new_filename
download_image(old_url, entry_dir, new_filename)
if img.get('data-srcset'):
srcset = img['data-srcset'].split(', ')
@@ -170,7 +171,7 @@ def process_feed(feed_url, output_filename):
src_res = src.split(' ')[1]
new_filename = filename_from_url(old_url)
download_image(old_url, entry_dir, new_filename)
new_url = base_url + '/' + entry_dir + '/' + new_filename
new_url = asset_url + '/' + entry_dir + '/' + new_filename
src = ' '.join([new_url, src_res])
new_srcset.append(src)
img['data-srcset'] = ', '.join(new_srcset)
@@ -200,7 +201,10 @@ def process_feed(feed_url, output_filename):
e.append(content_tag)
f_content.close

f = open(public_path + '/' + output_filename, 'w')
# create directory if not present
os.makedirs(feeds_path, exist_ok=True)

f = open(feeds_path + '/' + output_filename, 'w')
f.write(str(feed_soup.prettify()))
f.close()



public/.gitignore → public/assets/.gitignore View File


+ 2
- 0
public/feeds/.gitignore View File

@@ -0,0 +1,2 @@
*
!.gitignore

Loading…
Cancel
Save