redplanet
/
tootbot-spicy

import os.path
import sys
import feedparser
from mastodon import Mastodon
import json
import requests
import re
import sqlite3
import html2text
from datetime import datetime, date, time, timedelta


# default config location is a 'config.json' next to the script.
try:
    filedir = os.path.dirname(os.path.abspath(__file__))
    if len(sys.argv) < 2:
        print("Using default config location:  %s/config.json" % filedir)
        config = json.load(open(filedir+'/config.json'))
    else:
        config = json.load(open(sys.argv[1]))

except:
    print("ERROR: Config file not found!")
    sys.exit(1)


mastinstance = config['mastodon']['instance']
mastuser = config['mastodon']['user']
mastpasswd = config['mastodon']['password']

twitter = config['sources']['twitter']
soup = config['sources']['soup']

dryrun = config['settings']['dryrun']
days = config['settings']['days']


# sqlite db to store processed tweets (and corresponding toots ids)
sql = sqlite3.connect(config['settings']['databasefilepath'])
db = sql.cursor()
db.execute('''CREATE TABLE IF NOT EXISTS posts (srcpost text, srcuser text, mastpost text, mastuser text, mastinstance text)''')

mastodon_api = None

def register_app(mastuser,mastpasswd,mastinstance,mastodon_api):
    if mastodon_api is None:
        if not os.path.isfile(mastinstance+'.secret'):
            if Mastodon.create_app(
                'metasyndicator',
                api_base_url='https://'+mastinstance,
                to_file = mastinstance+'.secret'
            ):
                print('app created on instance '+mastinstance)
            else:
                print('failed to create app on instance '+mastinstance)
                sys.exit(1)

    try:
        mastodon_api = Mastodon(
          client_id=mastinstance+'.secret',
          api_base_url='https://'+mastinstance
        )
        mastodon_api.log_in(
            username=mastuser,
            password=mastpasswd,
            scopes=['read', 'write'],
            to_file=mastuser+".secret"
        )
        return mastodon_api
    except:
        print("ERROR: First Login Failed!")
        sys.exit(1)


# twitter section

print('====== TWITTER ======')

t = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitter)

for p in reversed(t.entries):
    # check if this tweet has been processed
    db.execute(
        'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ?  AND mastuser = ? AND mastinstance = ?',
        (p.id, twitter, mastuser, mastinstance)
    )
    last = db.fetchone()

    shouldpost = True
    posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
    if last is not None:
        shouldpost = False
        print("skip: already posted")
    # process only unprocessed tweets less than n days old
    if datetime.now() - posttime > timedelta(days=days):
        shouldpost = False
        print("skip: Post too old")
    # kill tweets with fb links with fire!
    if "https://www.facebook.com" in p.title or "https://m.facebook.com" in p.title:
        shouldpost = False
        print("skip: a Tweet that links to facebook? ... That's too much.")


    if shouldpost:
        print(posttime)
        # Create application if it does not exist
        mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)

        c = p.title

        if p.author != '(%s)' % twitter:
            c = ("RT %s\n" % p.author[1:-1]) + c
        toot_media = []
        # get the pictures...
        for pic in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", p.summary):
            if (not dryrun):
                media = requests.get(pic.group(0))
                media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
                toot_media.append(media_posted['id'])
                media = None
            else:
                print('Dryrun: not fetching ', pic.group(0), ' and not uploading it to mastodon')

        # replace t.co link by original URL
        m = re.search(r"http[^ \xa0]*", c)
        if m != None:
            l = m.group(0)
            r = requests.get(l, allow_redirects=False)
            if r.status_code in {301,302}:
                c = c.replace(l,r.headers.get('Location'))

        # remove pic.twitter.com links
        m = re.search(r"pic.twitter.com[^ \xa0]*", c)
        if m != None:
            l = m.group(0)
            c = c.replace(l,' ')

        # remove ellipsis
        c = c.replace('\xa0…',' ')
        print(c)

        if (not dryrun):
            toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='unlisted', spoiler_text=None)
            print( '--> toot posted!')
            try:
                db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, twitter, toot.id, mastuser, mastinstance))
                sql.commit()
            except:
                print('database execution failed.')
                print('p.id: ', p.id)
                print('toot.id: ', toot.id)

        else:
            print('Dryrun: not posting toot and not adding it to database')
        print('------------------------')


# soup.io section

print('====== SOUP ======')

h = html2text.HTML2Text()
h.ignore_links = True
h.ignore_images = True
h.body_width = 0

s = feedparser.parse('http://'+soup+'/rss')

for p in reversed(s.entries):
    # check if this tweet has been processed
    db.execute(
        'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ?  AND mastuser = ? AND mastinstance = ?',
        (p.id, soup, mastuser, mastinstance)
    )
    last = db.fetchone()

    # process only unprocessed post less than n days old
    posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
    if last is None and (datetime.now() - posttime < timedelta(days=days)):
        # Create application if it does not exist
        mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)

        print(p.link)
        j = json.loads(p.soup_attributes)

        # get status id and user if twitter is source
        twitterstatus = None
        twitteruser = None
        if (isinstance(j['source'], str)):
            if ( j['source'].startswith('https://twitter.com/') or j['source'].startswith('https://mobile.twitter.com/')):
                twitterurl = j['source'].split('/')
                twitteruser = twitterurl[3]
                if ( twitterurl[4] == 'status'):
                    twitterstatus = twitterurl[5]

        # get all tweeted statuses
        db.execute('SELECT srcpost FROM posts where srcuser = ?', (twitter,))
        postedtweets = []
        for postedtweet in db.fetchall():
            postedtweets.append(postedtweet[0].split('/')[-1])

        # check if already tweeted
        if twitterstatus in postedtweets:
            print('Already tweeted: ', j['source'])

        else:
            # collect information about images
            pics = []
            accepted_filetypes = ('.jpg', '.jpeg', '.png', '.webm', '.JPG', '.JPEG', '.PNG', '.WEBM') # let's don't do mp4 for now.
            if (isinstance(j['source'], str) and j['source'].endswith(accepted_filetypes) ):
                pics.append(j['source'])
            elif ( 'url' in j and isinstance(j['url'], str) and j['url'].endswith(accepted_filetypes) ):
                pics.append(j['url'])

            # get the images and post them to mastadon ...
            toot_media = []
            for pic in pics:
                if (not dryrun):
                    media = requests.get(pic)
                    print(pic, ' has mimetype ', media.headers.get('content-type'))
                    media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
                    toot_media.append(media_posted['id'])
                else:
                    print('Dryrun: not fetching ', pic, ' and not uploading it to mastodon')


            # remove all html stuff - python module in use only supports markdown, not pure plaintext
            textsrc = h.handle(p.summary_detail.value.replace("<small>", "<br><small>"))
            # free text from lines without visible characters
            cleantextsrc = ''
            for line in textsrc.split('\n'):
                line = line.strip()
                cleantextsrc += line + '\n'

            # strip newlines, reduce newlines, remove markdown bold (i know, ugly), do some clean up
            text = cleantextsrc.strip('\n').replace('\n\n\n','\n\n').replace('**','').replace('\\--','')

            # link directly to source or use soup as source.
            if (isinstance(j['source'], str) and j['source'] not in text):
                source = '\n\nSource: ' + j['source']
            else:
                source = '\n\nSource: ' + p.link

            # shorten text if too long
            maximumlegth = 500 - 1 - len(source) - 50 # 50 ... just in case (if they also count attachement url and so on)
            text = (text[:maximumlegth] + '…') if len(text) > maximumlegth else text

            # add source
            text += source

            print(text)

            if (not dryrun):
                # post toot
                toot = mastodon_api.status_post(text, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='public', spoiler_text=None)

                # add entry to database
                if "id" in toot:
                    db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, soup, toot.id, mastuser, mastinstance))
                    sql.commit()
                    print( '--> ',  p.id, ' posted!')
            else:
                print('Dryrun: not posting toot and not adding it to database')

        print('------------------------')