@@ -6,80 +6,125 @@ import json
import requests
import re
import sqlite3
import html2text
from datetime import datetime, date, time, timedelta
if len(sys.argv) < 4:
print("Usage: python3 tootbot.py twitter_account mastodon_login mastodon_passwd mastodon_instance")
# default config location is a 'config.json' next to the script.
try:
filedir = os.path.dirname(os.path.abspath(__file__))
if len(sys.argv) < 2:
print("Using default config location: %s/config.json" % filedir)
config = json.load(open(filedir+'/config.json'))
else:
config = json.load(open(sys.argv[1]))
except:
print("ERROR: Config file not found!")
sys.exit(1)
mastinstance = config['mastodon']['instance']
mastuser = config['mastodon']['user']
mastpasswd = config['mastodon']['password']
twitter = config['sources']['twitter']
soup = config['sources']['soup']
dryrun = config['settings']['dryrun']
days = config['settings']['days']
# sqlite db to store processed tweets (and corresponding toots ids)
sql = sqlite3.connect('tootbot.db')
sql = sqlite3.connect(config['settings']['databasefilepath'] )
db = sql.cursor()
db.execute('''CREATE TABLE IF NOT EXISTS tweets (tweet text, toot text, twitter text, mastodon text, instance text)''')
db.execute('''CREATE TABLE IF NOT EXISTS posts (srcpost text, srcuser text, mastpost text, mastuser text, mastinstance text)''')
mastodon_api = None
def register_app(mastuser,mastpasswd,mastinstance,mastodon_api):
if mastodon_api is None:
if not os.path.isfile(mastinstance+'.secret'):
if Mastodon.create_app(
'metasyndicator',
api_base_url='https://'+mastinstance,
to_file = mastinstance+'.secret'
):
print('app created on instance '+mastinstance)
else:
print('failed to create app on instance '+mastinstance)
sys.exit(1)
try:
mastodon_api = Mastodon(
client_id=mastinstance+'.secret',
api_base_url='https://'+mastinstance
)
mastodon_api.log_in(
username=mastuser,
password=mastpasswd,
scopes=['read', 'write'],
to_file=mastuser+".secret"
)
return mastodon_api
except:
print("ERROR: First Login Failed!")
sys.exit(1)
if len(sys.argv)>4:
instance = sys.argv[4]
else:
instance = 'amicale.net'
if len(sys.argv)>5:
days = int(sys.argv[5])
else:
days = 1
twitter = sys.argv[1]
mastodon = sys.argv[2]
passwd = sys.argv[3]
mastodon_api = None
d = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitter)
# twitter section
for t in reversed(d.entries):
print('====== TWITTER ======')
t = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitter)
for p in reversed(t.entries):
# check if this tweet has been processed
db.execute('SELECT * FROM tweets WHERE tweet = ? AND twitter = ? and mastodon = ? and instance = ?',(t.id, twitter, mastodon, instance))
db.execute(
'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?',
(p.id, twitter, mastuser, mastinstance)
)
last = db.fetchone()
# process only unprocessed tweets less than 1 day old
if last is None and (datetime.now()-datetime(t.published_parsed.tm_year, t.published_parsed.tm_mon, t.published_parsed.tm_mday, t.published_parsed.tm_hour, t.published_parsed.tm_min, t.published_parsed.tm_sec) < timedelta(days=days)):
if mastodon_api is None:
# Create application if it does not exist
if not os.path.isfile(instance+'.secret'):
if Mastodon.create_app(
'tootbot',
api_base_url='https://'+instance,
to_file = instance+'.secret'
):
print('tootbot app created on instance '+instance)
else:
print('failed to create app on instance '+instance )
sys.exit(1)
shouldpost = True
posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
if last is not None:
shouldpost = False
print("skip: already posted")
# process only unprocessed tweets less than n days old
if datetime.now() - posttime > timedelta(days=days):
shouldpost = False
print("skip: Post too old")
# kill tweets with fb links with fire!
if "https://www.facebook.com" in p.title or "https://m.facebook.com" in p.title:
shouldpost = False
print("skip: a Tweet that links to facebook? ... That's too much." )
try:
mastodon_api = Mastodon(
client_id=instance+'.secret',
api_base_url='https://'+instance
)
mastodon_api.log_in(
username=mastodon,
password=passwd,
scopes=['read', 'write'],
to_file=mastodon+".secret"
)
except:
print("ERROR: First Login Failed!")
sys.exit(1)
#h = BeautifulSoup(t.summary_detail.value, "html.parser")
c = t.title
if t.author != '(%s)' % twitter:
c = ("RT %s\n" % t.author[1:-1]) + c
if shouldpost:
print(posttime)
# Create application if it does not exist
mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)
c = p.title
if p.author != '(%s)' % twitter:
c = ("RT %s\n" % p.author[1:-1]) + c
toot_media = []
# get the pictures...
for p in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", t.summary):
media = requests.get(p.group(0))
media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
toot_media.append(media_posted['id'])
for pic in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", p.summary):
if (not dryrun):
media = requests.get(pic.group(0))
media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
toot_media.append(media_posted['id'])
media = None
else:
print('Dryrun: not fetching ', pic.group(0), ' and not uploading it to mastodon')
# replace t.co link by original URL
m = re.search(r"http[^ \xa0]*", c)
@@ -97,10 +142,130 @@ for t in reversed(d.entries):
# remove ellipsis
c = c.replace('\xa0…',' ')
print(c)
if toot_media is not None :
toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='public ', spoiler_text=None)
if "id" in toot:
db.execute("INSERT INTO tweets VALUES ( ? , ? , ? , ? , ? )",
(t.id, toot["id"], twitter, mastodon, instance))
if (not dryrun) :
toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='unlisted ', spoiler_text=None)
print( '--> toot posted!')
try:
db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, twitter, toot.id, mastuser, mast instance))
sql.commit()
except:
print('database execution failed.')
print('p.id: ', p.id)
print('toot.id: ', toot.id)
else:
print('Dryrun: not posting toot and not adding it to database')
print('------------------------')
# soup.io section
print('====== SOUP ======')
h = html2text.HTML2Text()
h.ignore_links = True
h.ignore_images = True
h.body_width = 0
s = feedparser.parse('http://'+soup+'/rss')
for p in reversed(s.entries):
# check if this tweet has been processed
db.execute(
'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?',
(p.id, soup, mastuser, mastinstance)
)
last = db.fetchone()
# process only unprocessed post less than n days old
posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
if last is None and (datetime.now() - posttime < timedelta(days=days)):
# Create application if it does not exist
mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)
print(p.link)
j = json.loads(p.soup_attributes)
# get status id and user if twitter is source
twitterstatus = None
twitteruser = None
if (isinstance(j['source'], str)):
if ( j['source'].startswith('https://twitter.com/') or j['source'].startswith('https://mobile.twitter.com/')):
twitterurl = j['source'].split('/')
twitteruser = twitterurl[3]
if ( twitterurl[4] == 'status'):
twitterstatus = twitterurl[5]
# get all tweeted statuses
db.execute('SELECT srcpost FROM posts where srcuser = ?', (twitter,))
postedtweets = []
for postedtweet in db.fetchall():
postedtweets.append(postedtweet[0].split('/')[-1])
# check if already tweeted
if twitterstatus in postedtweets:
print('Already tweeted: ', j['source'])
else:
# collect information about images
pics = []
accepted_filetypes = ('.jpg', '.jpeg', '.png', '.webm', '.JPG', '.JPEG', '.PNG', '.WEBM') # let's don't do mp4 for now.
if (isinstance(j['source'], str) and j['source'].endswith(accepted_filetypes) ):
pics.append(j['source'])
elif ( 'url' in j and isinstance(j['url'], str) and j['url'].endswith(accepted_filetypes) ):
pics.append(j['url'])
# get the images and post them to mastadon ...
toot_media = []
for pic in pics:
if (not dryrun):
media = requests.get(pic)
print(pic, ' has mimetype ', media.headers.get('content-type'))
media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
toot_media.append(media_posted['id'])
else:
print('Dryrun: not fetching ', pic, ' and not uploading it to mastodon')
# remove all html stuff - python module in use only supports markdown, not pure plaintext
textsrc = h.handle(p.summary_detail.value.replace("<small>", "<br><small>"))
# free text from lines without visible characters
cleantextsrc = ''
for line in textsrc.split('\n'):
line = line.strip()
cleantextsrc += line + '\n'
# strip newlines, reduce newlines, remove markdown bold (i know, ugly), do some clean up
text = cleantextsrc.strip('\n').replace('\n\n\n','\n\n').replace('**','').replace('\\--','')
# link directly to source or use soup as source.
if (isinstance(j['source'], str) and j['source'] not in text):
source = '\n\nSource: ' + j['source']
else:
source = '\n\nSource: ' + p.link
# shorten text if too long
maximumlegth = 500 - 1 - len(source) - 50 # 50 ... just in case (if they also count attachement url and so on)
text = (text[:maximumlegth] + '…') if len(text) > maximumlegth else text
# add source
text += source
print(text)
if (not dryrun):
# post toot
toot = mastodon_api.status_post(text, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='public', spoiler_text=None)
# add entry to database
if "id" in toot:
db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, soup, toot.id, mastuser, mastinstance))
sql.commit()
print( '--> ', p.id, ' posted!')
else:
print('Dryrun: not posting toot and not adding it to database')
print('------------------------')