123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292 |
- import os.path
- import sys
- import feedparser
- from mastodon import Mastodon
- import json
- import requests
- import re
- import sqlite3
- import html2text
- from datetime import datetime, date, time, timedelta
- # default config location is a 'config.json' next to the script.
- try:
- filedir = os.path.dirname(os.path.abspath(__file__))
- if len(sys.argv) < 2:
- print("Using default config location: %s/config.json" % filedir)
- config = json.load(open(filedir+'/config.json'))
- else:
- config = json.load(open(sys.argv[1]))
- except:
- print("ERROR: Config file not found!")
- sys.exit(1)
- mastinstance = config['mastodon']['instance']
- mastuser = config['mastodon']['user']
- mastpasswd = config['mastodon']['password']
- twitteruser = config['sources']['twitter']['user']
- soupuser = config['sources']['soup']['user']
- dryrun = config['settings']['dryrun']
- days = config['settings']['days']
- # sqlite db to store processed tweets (and corresponding toots ids)
- sql = sqlite3.connect(config['settings']['databasefilepath'])
- db = sql.cursor()
- db.execute('''CREATE TABLE IF NOT EXISTS posts (srcpost text, srcuser text, mastpost text, mastuser text, mastinstance text)''')
- mastodon_api = None
- def register_app(mastuser,mastpasswd,mastinstance,mastodon_api):
- if mastodon_api is None:
- if not os.path.isfile(mastinstance+'.secret'):
- if Mastodon.create_app(
- 'metasyndicator',
- api_base_url='https://'+mastinstance,
- to_file = mastinstance+'.secret'
- ):
- print('app created on instance '+mastinstance)
- else:
- print('failed to create app on instance '+mastinstance)
- sys.exit(1)
- try:
- mastodon_api = Mastodon(
- client_id=mastinstance+'.secret',
- api_base_url='https://'+mastinstance
- )
- mastodon_api.log_in(
- username=mastuser,
- password=mastpasswd,
- scopes=['read', 'write'],
- to_file=mastuser+".secret"
- )
- return mastodon_api
- except:
- print("ERROR: First Login Failed!")
- sys.exit(1)
- # twitter section
- print('====== TWITTER ======')
- t = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitteruser)
- # start with oldest
- for p in reversed(t.entries):
- # check if this tweet has been processed
- db.execute(
- 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?',
- (p.id, twitteruser, mastuser, mastinstance)
- )
- last = db.fetchone()
- print('Processing: %s' % p.id)
- shouldpost = True
- posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
- if last is not None:
- shouldpost = False
- print("skip: already posted")
- # process only unprocessed tweets less than n days old
- age = datetime.now() - posttime
- if age > timedelta(days=days):
- shouldpost = False
- print("skip: Posting older than %s days (%s)" % (days, age) )
- # kill tweets with fb links with fire!
- if "https://www.facebook.com" in p.title or "https://m.facebook.com" in p.title:
- shouldpost = False
- print("skip: a Tweet that links to facebook? ... That's too much.")
- if shouldpost:
- print(posttime)
- # Create application if it does not exist
- mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)
- c = p.title
- if p.author.lower() != '(@%s)' % twitteruser.lower():
- c = ("RT %s from Twitter:\n" % p.author[1:-1]) + c
- toot_media = []
- # get the pictures...
- for pic in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", p.summary):
- if (not dryrun):
- media = requests.get(pic.group(0))
- media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
- toot_media.append(media_posted['id'])
- media = None
- else:
- print('Dryrun: not fetching ', pic.group(0), ' and not uploading it to mastodon')
- # replace t.co link by original URL
- m = re.search(r"http[^ \xa0]*", c)
- if m != None:
- l = m.group(0)
- r = requests.get(l, allow_redirects=False)
- if r.status_code in {301,302}:
- c = c.replace(l,r.headers.get('Location'))
- # remove pic.twitter.com links
- m = re.search(r"pic.twitter.com[^ \xa0]*", c)
- if m != None:
- l = m.group(0)
- c = c.replace(l,' ')
- # remove ellipsis
- c = c.replace('\xa0…',' ')
- c += '\n\nSource: %s' % p.link
- print(c)
- if (not dryrun):
- toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility=config['sources']['twitter']['visibility'], spoiler_text=None)
- print( '--> toot posted!')
- try:
- db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, twitteruser, toot.id, mastuser, mastinstance))
- sql.commit()
- except:
- print('database execution failed.')
- print('p.id: ', p.id)
- print('toot.id: ', toot.id)
- else:
- print('Dryrun: not posting toot and not adding it to database')
- print('------------------------')
- # soup.io section
- print('====== SOUP ======')
- h = html2text.HTML2Text()
- h.ignore_links = True
- h.ignore_images = True
- h.body_width = 0
- s = feedparser.parse('http://'+soupuser+'/rss')
- # start with oldest
- for p in reversed(s.entries):
- # check if this tweet has been processed
- db.execute(
- 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?',
- (p.id, soupuser, mastuser, mastinstance)
- )
- last = db.fetchone()
- print('Processing: %s' % p.id)
- if last is not None:
- shouldpost = False
- print("skip: already posted")
- # process only unprocessed tweets less than n days old
- shouldpost = True
- posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
- age = datetime.now() - posttime
- if age > timedelta(days=days):
- shouldpost = False
- print("skip: Posting older than %s days (%s)" % (days, age) )
- if shouldpost:
- # Create application if it does not exist
- mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)
- print(p.link)
- j = json.loads(p.soup_attributes)
- # get status id and user if twitter is source
- tweet_id = None
- tweet_author = None
- if (isinstance(j['source'], str)):
- if ( j['source'].startswith('https://twitter.com/') or j['source'].startswith('https://mobile.twitter.com/')):
- twitterurl = j['source'].split('/')
- tweet_author = twitterurl[3]
- if ( twitterurl[4] == 'status'):
- tweet_id = twitterurl[5]
- # get all tweeted statuses
- print(twitteruser)
- db.execute('SELECT srcpost FROM posts where srcuser = ?', (twitteruser,))
- postedtweets = []
- for postedtweet in db.fetchall():
- postedtweets.append(postedtweet[0].split('/')[-1])
- # check if already tweeted
- if tweet_id in postedtweets:
- print('Already posted the Tweet: ', j['source'])
- else:
- # collect information about images
- pics = []
- accepted_filetypes = ('.jpg', '.jpeg', '.png', '.webm', '.JPG', '.JPEG', '.PNG', '.WEBM') # let's don't do mp4 for now.
- if (isinstance(j['source'], str) and j['source'].endswith(accepted_filetypes) ):
- pics.append(j['source'])
- elif ( 'url' in j and isinstance(j['url'], str) and j['url'].endswith(accepted_filetypes) ):
- pics.append(j['url'])
- # get the images and post them to mastadon ...
- toot_media = []
- for pic in pics:
- if (not dryrun):
- media = requests.get(pic)
- print(pic, ' has mimetype ', media.headers.get('content-type'))
- media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
- toot_media.append(media_posted['id'])
- else:
- print('Dryrun: not fetching ', pic, ' and not uploading it to mastodon')
- poster = p.title.split(']')[0].strip('[')
- poster_text = "\n\nvia %s on soup.io" % poster
- # remove all html stuff - python module in use only supports markdown, not pure plaintext
- textsrc = h.handle(p.summary_detail.value.replace("<small>", "<br><small>"))
- # free text from lines without visible characters
- cleantextsrc = ''
- for line in textsrc.split('\n'):
- line = line.strip()
- cleantextsrc += line + '\n'
- # strip newlines, reduce newlines, remove markdown bold (i know, ugly), do some clean up
- text = cleantextsrc.strip('\n').replace('\n\n\n','\n\n').replace('**','').replace('\\--','')
- # link directly to source or use soup as source.
- if (isinstance(j['source'], str) and j['source'] not in text):
- source = '\n\nSource: ' + j['source']
- else:
- source = '\n\nSource: ' + p.link
- # shorten text if too long
- maximumlegth = 500 - 1 - len(poster_text) - len(source) - 50 # 50 ... just in case (if they also count attachement url and so on)
- text = (text[:maximumlegth] + '…') if len(text) > maximumlegth else text
- # add poster
- text += poster_text
- # add source
- text += source
- print(text)
- if (not dryrun):
- # post toot
- toot = mastodon_api.status_post(text, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility=config['sources']['soup']['visibility'], spoiler_text=None)
- # add entry to database
- if "id" in toot:
- db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, soupuser, toot.id, mastuser, mastinstance))
- sql.commit()
- print( '--> ', p.id, ' posted!')
- else:
- print('Dryrun: not posting toot and not adding it to database')
- print('------------------------')