diff --git a/.gitignore b/.gitignore index 3c983b1..36254d0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ *.secret cron.sh - +config.json diff --git a/README.md b/README.md index 23467bb..33f42ea 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,23 @@ -# TootBot +# TootBot Spicy Edition -A small python 3.x script to replicate tweets on a mastodon account. +A small python 3.x script to replicate tweets and posts from soup.io on a mastodon account. The script only need mastodon login/pass to post toots. -It gets the tweets from RSS available at http://twitrss.me, then does some cleanup on the content: +For Twitter, it gets the tweets from RSS available at http://twitrss.me, then does some cleanup on the content: - twitter tracking links (t.co) are dereferenced - twitter hosted pictures are retrieved and uploaded to mastodon -A sqlite database is used to keep track of tweets than have been tooted. +For Soup, it uses the official RSS feed (e.g. https://metalab.soup.io/rss), then does some cleanup on the content: +- checks if source is twitter and checks for duplicates +- removes html stuff +- looks for one picture in source link or in soup post and uploads it to mastodon +- adds source url (if no source is specified, soup post is source url) -This script is in use for a few accounts: -- cq94 -> https://amicale.net/@cquest -- Etalab -> https://mastodon.etalab.gouv.fr/@etalab -- datagouvfr -> https://mastodon.etalab.gouv.fr/@datagouvfr -- osm_fr -> https://fr.osm.social/@osm_fr -- sotmfr -> https://fr.osm.social/@sotmfr +A sqlite database is used to keep track of items than have been tooted. + + +this script is based on TootBot: https://github.com/cquest/tootbot The script is simply called by a cron job and can run on any server (does not have to be on the mastodon instance server). diff --git a/config-example.json b/config-example.json new file mode 100644 index 0000000..e2e3c42 --- /dev/null +++ b/config-example.json @@ -0,0 +1,16 @@ +{ + "mastodon": { + "instance": "mastodon.social", + "user": "loginemail@example.com", + "password": "correcthorsebatterystaple" + }, + "sources": { + "twitter": "metalabVie", + "soup": "metalab.soup.io" + }, + "settings": { + "days": 1, + "dryrun": true, + "databasefilepath": "tootbot.db" + } +} diff --git a/cron-example.sh b/cron-example.sh new file mode 100644 index 0000000..8e69c98 --- /dev/null +++ b/cron-example.sh @@ -0,0 +1,5 @@ +# activate virtualenv if necessary +# source /home/cquest/.virtualenvs/tootbot/bin/activate + +python tootbot.py config.json +python tootbot.py anotherconfig.json diff --git a/cron-sample.sh b/cron-sample.sh deleted file mode 100644 index a5f7d26..0000000 --- a/cron-sample.sh +++ /dev/null @@ -1,11 +0,0 @@ -# activate virtualenv if necessary -# source /home/cquest/.virtualenvs/tootbot/bin/activate - -# parameters: -# 1- twitter account to clone -# 2- mastodon login -# 3- mastodon password -# 4- instance domain (https:// is automatically added) - -python tootbot.py geonym_fr geonym@amicale.net **password** test.amicale.net -python tootbot.py cq94 cquest@amicale.net **password** test.amicale.net diff --git a/requirements.txt b/requirements.txt index d7b9053..04ecd48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ feedparser Mastodon.py requests - +html2text diff --git a/tootbot.py b/tootbot.py index fb5f889..8d7557b 100755 --- a/tootbot.py +++ b/tootbot.py @@ -6,80 +6,125 @@ import json import requests import re import sqlite3 +import html2text from datetime import datetime, date, time, timedelta -if len(sys.argv) < 4: - print("Usage: python3 tootbot.py twitter_account mastodon_login mastodon_passwd mastodon_instance") + + +# default config location is a 'config.json' next to the script. +try: + filedir = os.path.dirname(os.path.abspath(__file__)) + if len(sys.argv) < 2: + print("Using default config location: %s/config.json" % filedir) + config = json.load(open(filedir+'/config.json')) + else: + config = json.load(open(sys.argv[1])) + +except: + print("ERROR: Config file not found!") sys.exit(1) + +mastinstance = config['mastodon']['instance'] +mastuser = config['mastodon']['user'] +mastpasswd = config['mastodon']['password'] + +twitter = config['sources']['twitter'] +soup = config['sources']['soup'] + +dryrun = config['settings']['dryrun'] +days = config['settings']['days'] + + # sqlite db to store processed tweets (and corresponding toots ids) -sql = sqlite3.connect('tootbot.db') +sql = sqlite3.connect(config['settings']['databasefilepath']) db = sql.cursor() -db.execute('''CREATE TABLE IF NOT EXISTS tweets (tweet text, toot text, twitter text, mastodon text, instance text)''') +db.execute('''CREATE TABLE IF NOT EXISTS posts (srcpost text, srcuser text, mastpost text, mastuser text, mastinstance text)''') + +mastodon_api = None + +def register_app(mastuser,mastpasswd,mastinstance,mastodon_api): + if mastodon_api is None: + if not os.path.isfile(mastinstance+'.secret'): + if Mastodon.create_app( + 'metasyndicator', + api_base_url='https://'+mastinstance, + to_file = mastinstance+'.secret' + ): + print('app created on instance '+mastinstance) + else: + print('failed to create app on instance '+mastinstance) + sys.exit(1) + + try: + mastodon_api = Mastodon( + client_id=mastinstance+'.secret', + api_base_url='https://'+mastinstance + ) + mastodon_api.log_in( + username=mastuser, + password=mastpasswd, + scopes=['read', 'write'], + to_file=mastuser+".secret" + ) + return mastodon_api + except: + print("ERROR: First Login Failed!") + sys.exit(1) -if len(sys.argv)>4: - instance = sys.argv[4] -else: - instance = 'amicale.net' -if len(sys.argv)>5: - days = int(sys.argv[5]) -else: - days = 1 -twitter = sys.argv[1] -mastodon = sys.argv[2] -passwd = sys.argv[3] -mastodon_api = None -d = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitter) +# twitter section -for t in reversed(d.entries): +print('====== TWITTER ======') + +t = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitter) + +for p in reversed(t.entries): # check if this tweet has been processed - db.execute('SELECT * FROM tweets WHERE tweet = ? AND twitter = ? and mastodon = ? and instance = ?',(t.id, twitter, mastodon, instance)) + db.execute( + 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?', + (p.id, twitter, mastuser, mastinstance) + ) last = db.fetchone() - # process only unprocessed tweets less than 1 day old - if last is None and (datetime.now()-datetime(t.published_parsed.tm_year, t.published_parsed.tm_mon, t.published_parsed.tm_mday, t.published_parsed.tm_hour, t.published_parsed.tm_min, t.published_parsed.tm_sec) < timedelta(days=days)): - if mastodon_api is None: - # Create application if it does not exist - if not os.path.isfile(instance+'.secret'): - if Mastodon.create_app( - 'tootbot', - api_base_url='https://'+instance, - to_file = instance+'.secret' - ): - print('tootbot app created on instance '+instance) - else: - print('failed to create app on instance '+instance) - sys.exit(1) + shouldpost = True + posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec) + if last is not None: + shouldpost = False + print("skip: already posted") + # process only unprocessed tweets less than n days old + if datetime.now() - posttime > timedelta(days=days): + shouldpost = False + print("skip: Post too old") + # kill tweets with fb links with fire! + if "https://www.facebook.com" in p.title or "https://m.facebook.com" in p.title: + shouldpost = False + print("skip: a Tweet that links to facebook? ... That's too much.") + - try: - mastodon_api = Mastodon( - client_id=instance+'.secret', - api_base_url='https://'+instance - ) - mastodon_api.log_in( - username=mastodon, - password=passwd, - scopes=['read', 'write'], - to_file=mastodon+".secret" - ) - except: - print("ERROR: First Login Failed!") - sys.exit(1) - #h = BeautifulSoup(t.summary_detail.value, "html.parser") - c = t.title - if t.author != '(%s)' % twitter: - c = ("RT %s\n" % t.author[1:-1]) + c + if shouldpost: + print(posttime) + # Create application if it does not exist + mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api) + + c = p.title + + if p.author != '(%s)' % twitter: + c = ("RT %s\n" % p.author[1:-1]) + c toot_media = [] # get the pictures... - for p in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", t.summary): - media = requests.get(p.group(0)) - media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type')) - toot_media.append(media_posted['id']) + for pic in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", p.summary): + if (not dryrun): + media = requests.get(pic.group(0)) + media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type')) + toot_media.append(media_posted['id']) + media = None + else: + print('Dryrun: not fetching ', pic.group(0), ' and not uploading it to mastodon') # replace t.co link by original URL m = re.search(r"http[^ \xa0]*", c) @@ -97,10 +142,130 @@ for t in reversed(d.entries): # remove ellipsis c = c.replace('\xa0…',' ') + print(c) - if toot_media is not None: - toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='public', spoiler_text=None) - if "id" in toot: - db.execute("INSERT INTO tweets VALUES ( ? , ? , ? , ? , ? )", - (t.id, toot["id"], twitter, mastodon, instance)) + if (not dryrun): + toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='unlisted', spoiler_text=None) + print( '--> toot posted!') + try: + db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, twitter, toot.id, mastuser, mastinstance)) sql.commit() + except: + print('database execution failed.') + print('p.id: ', p.id) + print('toot.id: ', toot.id) + + else: + print('Dryrun: not posting toot and not adding it to database') + print('------------------------') + + + +# soup.io section + +print('====== SOUP ======') + +h = html2text.HTML2Text() +h.ignore_links = True +h.ignore_images = True +h.body_width = 0 + +s = feedparser.parse('http://'+soup+'/rss') + +for p in reversed(s.entries): + # check if this tweet has been processed + db.execute( + 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?', + (p.id, soup, mastuser, mastinstance) + ) + last = db.fetchone() + + # process only unprocessed post less than n days old + posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec) + if last is None and (datetime.now() - posttime < timedelta(days=days)): + # Create application if it does not exist + mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api) + + print(p.link) + j = json.loads(p.soup_attributes) + + # get status id and user if twitter is source + twitterstatus = None + twitteruser = None + if (isinstance(j['source'], str)): + if ( j['source'].startswith('https://twitter.com/') or j['source'].startswith('https://mobile.twitter.com/')): + twitterurl = j['source'].split('/') + twitteruser = twitterurl[3] + if ( twitterurl[4] == 'status'): + twitterstatus = twitterurl[5] + + # get all tweeted statuses + db.execute('SELECT srcpost FROM posts where srcuser = ?', (twitter,)) + postedtweets = [] + for postedtweet in db.fetchall(): + postedtweets.append(postedtweet[0].split('/')[-1]) + + # check if already tweeted + if twitterstatus in postedtweets: + print('Already tweeted: ', j['source']) + + else: + # collect information about images + pics = [] + accepted_filetypes = ('.jpg', '.jpeg', '.png', '.webm', '.JPG', '.JPEG', '.PNG', '.WEBM') # let's don't do mp4 for now. + if (isinstance(j['source'], str) and j['source'].endswith(accepted_filetypes) ): + pics.append(j['source']) + elif ( 'url' in j and isinstance(j['url'], str) and j['url'].endswith(accepted_filetypes) ): + pics.append(j['url']) + + # get the images and post them to mastadon ... + toot_media = [] + for pic in pics: + if (not dryrun): + media = requests.get(pic) + print(pic, ' has mimetype ', media.headers.get('content-type')) + media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type')) + toot_media.append(media_posted['id']) + else: + print('Dryrun: not fetching ', pic, ' and not uploading it to mastodon') + + + # remove all html stuff - python module in use only supports markdown, not pure plaintext + textsrc = h.handle(p.summary_detail.value.replace("", "
")) + # free text from lines without visible characters + cleantextsrc = '' + for line in textsrc.split('\n'): + line = line.strip() + cleantextsrc += line + '\n' + + # strip newlines, reduce newlines, remove markdown bold (i know, ugly), do some clean up + text = cleantextsrc.strip('\n').replace('\n\n\n','\n\n').replace('**','').replace('\\--','') + + # link directly to source or use soup as source. + if (isinstance(j['source'], str) and j['source'] not in text): + source = '\n\nSource: ' + j['source'] + else: + source = '\n\nSource: ' + p.link + + # shorten text if too long + maximumlegth = 500 - 1 - len(source) - 50 # 50 ... just in case (if they also count attachement url and so on) + text = (text[:maximumlegth] + '…') if len(text) > maximumlegth else text + + # add source + text += source + + print(text) + + if (not dryrun): + # post toot + toot = mastodon_api.status_post(text, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='public', spoiler_text=None) + + # add entry to database + if "id" in toot: + db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, soup, toot.id, mastuser, mastinstance)) + sql.commit() + print( '--> ', p.id, ' posted!') + else: + print('Dryrun: not posting toot and not adding it to database') + + print('------------------------')