import os.path import sys import feedparser from mastodon import Mastodon import json import requests import re import sqlite3 import html2text import time from datetime import datetime, date, timedelta # default config location is a 'config.json' next to the script. try: filedir = os.path.dirname(os.path.abspath(__file__)) if len(sys.argv) < 2: configpath = filedir+'/config.json' print("Using default config location: ", configpath) config = json.load(open(configpath)) else: configpath = sys.argv[1] config = json.load(open(configpath)) except: print("Problem reading config file: ", configpath) print("ERROR: Config file not found or invalid!") sys.exit(1) mastinstance = config['mastodon']['instance'] mastuser = config['mastodon']['user'] mastpasswd = config['mastodon']['password'] twitteruser = config['sources']['twitter']['user'] soupuser = config['sources']['soup']['user'] dryrun = config['settings']['dryrun'] days = config['settings']['days'] delay = config['settings']['delay'] # sqlite db to store processed tweets (and corresponding toots ids) sql = sqlite3.connect(config['settings']['databasefilepath']) db = sql.cursor() db.execute('''CREATE TABLE IF NOT EXISTS posts (srcpost text, srcuser text, mastpost text, mastuser text, mastinstance text)''') mastodon_api = None def register_app(mastuser,mastpasswd,mastinstance,mastodon_api): if mastodon_api is None: if not os.path.isfile(mastinstance+'.secret'): if Mastodon.create_app( 'metasyndicator', api_base_url='https://'+mastinstance, to_file = mastinstance+'.secret' ): print('app created on instance '+mastinstance) else: print('failed to create app on instance '+mastinstance) sys.exit(1) try: mastodon_api = Mastodon( client_id=mastinstance+'.secret', api_base_url='https://'+mastinstance ) mastodon_api.log_in( username=mastuser, password=mastpasswd, scopes=['read', 'write'], to_file=mastuser+".secret" ) return mastodon_api except: print("ERROR: First Login Failed!") sys.exit(1) # twitter section print('====== TWITTER ======') t = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitteruser) # start with oldest for p in reversed(t.entries): # check if this tweet has been processed db.execute( 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?', (p.id, twitteruser, mastuser, mastinstance) ) last = db.fetchone() print('Processing: %s' % p.id) shouldpost = True posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec) if last is not None: shouldpost = False print("skip: already posted") # process only unprocessed tweets less than n days old age = datetime.now() - posttime if age > timedelta(days=days): shouldpost = False print("skip: Posting older than %s days (%s)" % (days, age) ) # kill tweets with fb links with fire! if "https://www.facebook.com" in p.title or "https://m.facebook.com" in p.title: shouldpost = False print("skip: a Tweet that links to facebook? ... That's too much.") if shouldpost: print(posttime) # Create application if it does not exist mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api) c = p.title if p.author.lower() != '(@%s)' % twitteruser.lower(): c = ("RT %s from Twitter:\n" % p.author[1:-1]) + c toot_media = [] # get the pictures... for pic in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", p.summary): if (not dryrun): media = requests.get(pic.group(0)) media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type')) toot_media.append(media_posted['id']) media = None else: print('Dryrun: not fetching ', pic.group(0), ' and not uploading it to mastodon') # replace t.co link by original URL m = re.search(r"http[^ \xa0]*", c) if m != None: l = m.group(0) r = requests.get(l, allow_redirects=False) if r.status_code in {301,302}: c = c.replace(l,r.headers.get('Location')) # remove pic.twitter.com links m = re.search(r"pic.twitter.com[^ \xa0]*", c) if m != None: l = m.group(0) c = c.replace(l,' ') # remove ellipsis c = c.replace('\xa0…',' ') c += '\n\nSource: %s' % p.link print(c) if (not dryrun): toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility=config['sources']['twitter']['visibility'], spoiler_text=None) print( '--> toot posted!') try: db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, twitteruser, toot.id, mastuser, mastinstance)) sql.commit() except: print('database execution failed.') print('p.id: ', p.id) print('toot.id: ', toot.id) else: print('Dryrun: not posting toot and not adding it to database') print('waiting %s seconds ...' % delay) time.sleep(delay) print('------------------------') # soup.io section print('====== SOUP ======') h = html2text.HTML2Text() h.ignore_links = True h.ignore_images = True h.body_width = 0 s = feedparser.parse('http://'+soupuser+'/rss') # start with oldest for p in reversed(s.entries): # check if this tweet has been processed db.execute( 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?', (p.id, soupuser, mastuser, mastinstance) ) last = db.fetchone() print('Processing: %s' % p.id) shouldpost = True if last is not None: shouldpost = False print("skip: already posted") # process only unprocessed tweets less than n days old posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec) age = datetime.now() - posttime if age > timedelta(days=days): shouldpost = False print("skip: Posting older than %s days (%s)" % (days, age) ) if shouldpost: # Create application if it does not exist mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api) print(p.link) j = json.loads(p.soup_attributes) # get status id and user if twitter is source tweet_id = None tweet_author = None if (isinstance(j['source'], str)): if ( j['source'].startswith('https://twitter.com/') or j['source'].startswith('https://mobile.twitter.com/')): twitterurl = j['source'].split('/') tweet_author = twitterurl[3] if ( twitterurl[4] == 'status'): tweet_id = twitterurl[5] # get all tweeted statuses print(twitteruser) db.execute('SELECT srcpost FROM posts where srcuser = ?', (twitteruser,)) postedtweets = [] for postedtweet in db.fetchall(): postedtweets.append(postedtweet[0].split('/')[-1]) # check if already tweeted if tweet_id in postedtweets: print('Already posted the Tweet: ', j['source']) else: # collect information about images pics = [] accepted_filetypes = ('.jpg', '.jpeg', '.png', '.webm', '.JPG', '.JPEG', '.PNG', '.WEBM') # let's don't do mp4 for now. if (isinstance(j['source'], str) and j['source'].endswith(accepted_filetypes) ): pics.append(j['source']) elif ( 'url' in j and isinstance(j['url'], str) and j['url'].endswith(accepted_filetypes) ): pics.append(j['url']) # get the images and post them to mastadon ... toot_media = [] for pic in pics: if (not dryrun): media = requests.get(pic) print(pic, ' has mimetype ', media.headers.get('content-type')) media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type')) toot_media.append(media_posted['id']) else: print('Dryrun: not fetching ', pic, ' and not uploading it to mastodon') poster = p.title.split(']')[0].strip('[') poster_text = "\n(via %s on soup.io)" % poster # remove all html stuff - python module in use only supports markdown, not pure plaintext textsrc = h.handle(p.summary_detail.value.replace("", "
")) # free text from lines without visible characters cleantextsrc = '' for line in textsrc.split('\n'): line = line.strip() cleantextsrc += line + '\n' # strip newlines, reduce newlines, remove markdown bold (i know, ugly), do some clean up text = cleantextsrc.strip('\n').replace('\n\n\n','\n\n').replace('**','').replace('\\--','') # link directly to source or use soup as source. if (isinstance(j['source'], str) and j['source'] not in text): source = '\n\nSource: ' + j['source'] else: source = '\n\nSource: ' + p.link # shorten text if too long maximumlegth = 500 - 1 - len(poster_text) - len(source) - 50 # 50 ... just in case (if they also count attachement url and so on) text = (text[:maximumlegth] + '…') if len(text) > maximumlegth else text # add source text += source # add soup poster text += poster_text print(text) if (not dryrun): # post toot toot = mastodon_api.status_post(text, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility=config['sources']['soup']['visibility'], spoiler_text=None) # add entry to database if "id" in toot: db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, soupuser, toot.id, mastuser, mastinstance)) sql.commit() print( '--> ', p.id, ' posted!') else: print('Dryrun: not posting toot and not adding it to database') print('waiting %s seconds ...' % delay) time.sleep(delay) print('------------------------')