import os.path import sys import feedparser from mastodon import Mastodon import json import requests import re import sqlite3 import html2text from datetime import datetime, date, time, timedelta # default config location is a 'config.json' next to the script. try: filedir = os.path.dirname(os.path.abspath(__file__)) if len(sys.argv) < 2: print("Using default config location: %s/config.json" % filedir) config = json.load(open(filedir+'/config.json')) else: config = json.load(open(sys.argv[1])) except: print("ERROR: Config file not found!") sys.exit(1) mastinstance = config['mastodon']['instance'] mastuser = config['mastodon']['user'] mastpasswd = config['mastodon']['password'] twitter = config['sources']['twitter'] soup = config['sources']['soup'] dryrun = config['settings']['dryrun'] days = config['settings']['days'] # sqlite db to store processed tweets (and corresponding toots ids) sql = sqlite3.connect(config['settings']['databasefilepath']) db = sql.cursor() db.execute('''CREATE TABLE IF NOT EXISTS posts (srcpost text, srcuser text, mastpost text, mastuser text, mastinstance text)''') mastodon_api = None def register_app(mastuser,mastpasswd,mastinstance,mastodon_api): if mastodon_api is None: if not os.path.isfile(mastinstance+'.secret'): if Mastodon.create_app( 'metasyndicator', api_base_url='https://'+mastinstance, to_file = mastinstance+'.secret' ): print('app created on instance '+mastinstance) else: print('failed to create app on instance '+mastinstance) sys.exit(1) try: mastodon_api = Mastodon( client_id=mastinstance+'.secret', api_base_url='https://'+mastinstance ) mastodon_api.log_in( username=mastuser, password=mastpasswd, scopes=['read', 'write'], to_file=mastuser+".secret" ) return mastodon_api except: print("ERROR: First Login Failed!") sys.exit(1) # twitter section print('====== TWITTER ======') t = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitter) for p in reversed(t.entries): # check if this tweet has been processed db.execute( 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?', (p.id, twitter, mastuser, mastinstance) ) last = db.fetchone() shouldpost = True posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec) if last is not None: shouldpost = False print("skip: already posted") # process only unprocessed tweets less than n days old if datetime.now() - posttime > timedelta(days=days): shouldpost = False print("skip: Post too old") # kill tweets with fb links with fire! if "https://www.facebook.com" in p.title or "https://m.facebook.com" in p.title: shouldpost = False print("skip: a Tweet that links to facebook? ... That's too much.") if shouldpost: print(posttime) # Create application if it does not exist mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api) c = p.title if p.author != '(%s)' % twitter: c = ("RT %s\n" % p.author[1:-1]) + c toot_media = [] # get the pictures... for pic in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", p.summary): if (not dryrun): media = requests.get(pic.group(0)) media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type')) toot_media.append(media_posted['id']) media = None else: print('Dryrun: not fetching ', pic.group(0), ' and not uploading it to mastodon') # replace t.co link by original URL m = re.search(r"http[^ \xa0]*", c) if m != None: l = m.group(0) r = requests.get(l, allow_redirects=False) if r.status_code in {301,302}: c = c.replace(l,r.headers.get('Location')) # remove pic.twitter.com links m = re.search(r"pic.twitter.com[^ \xa0]*", c) if m != None: l = m.group(0) c = c.replace(l,' ') # remove ellipsis c = c.replace('\xa0…',' ') print(c) if (not dryrun): toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='unlisted', spoiler_text=None) print( '--> toot posted!') try: db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, twitter, toot.id, mastuser, mastinstance)) sql.commit() except: print('database execution failed.') print('p.id: ', p.id) print('toot.id: ', toot.id) else: print('Dryrun: not posting toot and not adding it to database') print('------------------------') # soup.io section print('====== SOUP ======') h = html2text.HTML2Text() h.ignore_links = True h.ignore_images = True h.body_width = 0 s = feedparser.parse('http://'+soup+'/rss') for p in reversed(s.entries): # check if this tweet has been processed db.execute( 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?', (p.id, soup, mastuser, mastinstance) ) last = db.fetchone() # process only unprocessed post less than n days old posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec) if last is None and (datetime.now() - posttime < timedelta(days=days)): # Create application if it does not exist mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api) print(p.link) j = json.loads(p.soup_attributes) # get status id and user if twitter is source twitterstatus = None twitteruser = None if (isinstance(j['source'], str)): if ( j['source'].startswith('https://twitter.com/') or j['source'].startswith('https://mobile.twitter.com/')): twitterurl = j['source'].split('/') twitteruser = twitterurl[3] if ( twitterurl[4] == 'status'): twitterstatus = twitterurl[5] # get all tweeted statuses db.execute('SELECT srcpost FROM posts where srcuser = ?', (twitter,)) postedtweets = [] for postedtweet in db.fetchall(): postedtweets.append(postedtweet[0].split('/')[-1]) # check if already tweeted if twitterstatus in postedtweets: print('Already tweeted: ', j['source']) else: # collect information about images pics = [] accepted_filetypes = ('.jpg', '.jpeg', '.png', '.webm', '.JPG', '.JPEG', '.PNG', '.WEBM') # let's don't do mp4 for now. if (isinstance(j['source'], str) and j['source'].endswith(accepted_filetypes) ): pics.append(j['source']) elif ( 'url' in j and isinstance(j['url'], str) and j['url'].endswith(accepted_filetypes) ): pics.append(j['url']) # get the images and post them to mastadon ... toot_media = [] for pic in pics: if (not dryrun): media = requests.get(pic) print(pic, ' has mimetype ', media.headers.get('content-type')) media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type')) toot_media.append(media_posted['id']) else: print('Dryrun: not fetching ', pic, ' and not uploading it to mastodon') # remove all html stuff - python module in use only supports markdown, not pure plaintext textsrc = h.handle(p.summary_detail.value.replace("", "
")) # free text from lines without visible characters cleantextsrc = '' for line in textsrc.split('\n'): line = line.strip() cleantextsrc += line + '\n' # strip newlines, reduce newlines, remove markdown bold (i know, ugly), do some clean up text = cleantextsrc.strip('\n').replace('\n\n\n','\n\n').replace('**','').replace('\\--','') # link directly to source or use soup as source. if (isinstance(j['source'], str) and j['source'] not in text): source = '\n\nSource: ' + j['source'] else: source = '\n\nSource: ' + p.link # shorten text if too long maximumlegth = 500 - 1 - len(source) - 50 # 50 ... just in case (if they also count attachement url and so on) text = (text[:maximumlegth] + '…') if len(text) > maximumlegth else text # add source text += source print(text) if (not dryrun): # post toot toot = mastodon_api.status_post(text, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='public', spoiler_text=None) # add entry to database if "id" in toot: db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, soup, toot.id, mastuser, mastinstance)) sql.commit() print( '--> ', p.id, ' posted!') else: print('Dryrun: not posting toot and not adding it to database') print('------------------------')