You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

299 lines
11 KiB

  1. import os.path
  2. import sys
  3. import feedparser
  4. from mastodon import Mastodon
  5. import json
  6. import requests
  7. import re
  8. import sqlite3
  9. import html2text
  10. import time
  11. from datetime import datetime, date, timedelta
  12. # default config location is a 'config.json' next to the script.
  13. try:
  14. filedir = os.path.dirname(os.path.abspath(__file__))
  15. if len(sys.argv) < 2:
  16. configpath = filedir+'/config.json'
  17. print("Using default config location: ", configpath)
  18. config = json.load(open(configpath))
  19. else:
  20. configpath = sys.argv[1]
  21. config = json.load(open(configpath))
  22. except:
  23. print("Problem reading config file: ", configpath)
  24. print("ERROR: Config file not found or invalid!")
  25. sys.exit(1)
  26. mastinstance = config['mastodon']['instance']
  27. mastuser = config['mastodon']['user']
  28. mastpasswd = config['mastodon']['password']
  29. twitteruser = config['sources']['twitter']['user']
  30. soupuser = config['sources']['soup']['user']
  31. dryrun = config['settings']['dryrun']
  32. days = config['settings']['days']
  33. delay = config['settings']['delay']
  34. # sqlite db to store processed tweets (and corresponding toots ids)
  35. sql = sqlite3.connect(config['settings']['databasefilepath'])
  36. db = sql.cursor()
  37. db.execute('''CREATE TABLE IF NOT EXISTS posts (srcpost text, srcuser text, mastpost text, mastuser text, mastinstance text)''')
  38. mastodon_api = None
  39. def register_app(mastuser,mastpasswd,mastinstance,mastodon_api):
  40. if mastodon_api is None:
  41. if not os.path.isfile(mastinstance+'.secret'):
  42. if Mastodon.create_app(
  43. 'metasyndicator',
  44. api_base_url='https://'+mastinstance,
  45. to_file = mastinstance+'.secret'
  46. ):
  47. print('app created on instance '+mastinstance)
  48. else:
  49. print('failed to create app on instance '+mastinstance)
  50. sys.exit(1)
  51. try:
  52. mastodon_api = Mastodon(
  53. client_id=mastinstance+'.secret',
  54. api_base_url='https://'+mastinstance
  55. )
  56. mastodon_api.log_in(
  57. username=mastuser,
  58. password=mastpasswd,
  59. scopes=['read', 'write'],
  60. to_file=mastuser+".secret"
  61. )
  62. return mastodon_api
  63. except:
  64. print("ERROR: First Login Failed!")
  65. sys.exit(1)
  66. # twitter section
  67. print('====== TWITTER ======')
  68. t = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitteruser)
  69. # start with oldest
  70. for p in reversed(t.entries):
  71. # check if this tweet has been processed
  72. db.execute(
  73. 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?',
  74. (p.id, twitteruser, mastuser, mastinstance)
  75. )
  76. last = db.fetchone()
  77. print('Processing: %s' % p.id)
  78. shouldpost = True
  79. posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
  80. if last is not None:
  81. shouldpost = False
  82. print("skip: already posted")
  83. # process only unprocessed tweets less than n days old
  84. age = datetime.now() - posttime
  85. if age > timedelta(days=days):
  86. shouldpost = False
  87. print("skip: Posting older than %s days (%s)" % (days, age) )
  88. # kill tweets with fb links with fire!
  89. if "https://www.facebook.com" in p.title or "https://m.facebook.com" in p.title:
  90. shouldpost = False
  91. print("skip: a Tweet that links to facebook? ... That's too much.")
  92. if shouldpost:
  93. print(posttime)
  94. # Create application if it does not exist
  95. mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)
  96. c = p.title
  97. if p.author.lower() != '(@%s)' % twitteruser.lower():
  98. c = ("RT %s from Twitter:\n" % p.author[1:-1]) + c
  99. toot_media = []
  100. # get the pictures...
  101. for pic in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", p.summary):
  102. if (not dryrun):
  103. media = requests.get(pic.group(0))
  104. media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
  105. toot_media.append(media_posted['id'])
  106. media = None
  107. else:
  108. print('Dryrun: not fetching ', pic.group(0), ' and not uploading it to mastodon')
  109. # replace t.co link by original URL
  110. m = re.search(r"http[^ \xa0]*", c)
  111. if m != None:
  112. l = m.group(0)
  113. r = requests.get(l, allow_redirects=False)
  114. if r.status_code in {301,302}:
  115. c = c.replace(l,r.headers.get('Location'))
  116. # remove pic.twitter.com links
  117. m = re.search(r"pic.twitter.com[^ \xa0]*", c)
  118. if m != None:
  119. l = m.group(0)
  120. c = c.replace(l,' ')
  121. # remove ellipsis
  122. c = c.replace('\xa0…',' ')
  123. c += '\n\nSource: %s' % p.link
  124. print(c)
  125. if (not dryrun):
  126. toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility=config['sources']['twitter']['visibility'], spoiler_text=None)
  127. print( '--> toot posted!')
  128. try:
  129. db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, twitteruser, toot.id, mastuser, mastinstance))
  130. sql.commit()
  131. except:
  132. print('database execution failed.')
  133. print('p.id: ', p.id)
  134. print('toot.id: ', toot.id)
  135. else:
  136. print('Dryrun: not posting toot and not adding it to database')
  137. print('waiting %s seconds ...' % delay)
  138. time.sleep(delay)
  139. print('------------------------')
  140. # soup.io section
  141. print('====== SOUP ======')
  142. h = html2text.HTML2Text()
  143. h.ignore_links = True
  144. h.ignore_images = True
  145. h.body_width = 0
  146. s = feedparser.parse('http://'+soupuser+'/rss')
  147. # start with oldest
  148. for p in reversed(s.entries):
  149. # check if this tweet has been processed
  150. db.execute(
  151. 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?',
  152. (p.id, soupuser, mastuser, mastinstance)
  153. )
  154. last = db.fetchone()
  155. print('Processing: %s' % p.id)
  156. shouldpost = True
  157. if last is not None:
  158. shouldpost = False
  159. print("skip: already posted")
  160. # process only unprocessed tweets less than n days old
  161. posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
  162. age = datetime.now() - posttime
  163. if age > timedelta(days=days):
  164. shouldpost = False
  165. print("skip: Posting older than %s days (%s)" % (days, age) )
  166. if shouldpost:
  167. # Create application if it does not exist
  168. mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)
  169. print(p.link)
  170. j = json.loads(p.soup_attributes)
  171. # get status id and user if twitter is source
  172. tweet_id = None
  173. tweet_author = None
  174. if (isinstance(j['source'], str)):
  175. if ( j['source'].startswith('https://twitter.com/') or j['source'].startswith('https://mobile.twitter.com/')):
  176. twitterurl = j['source'].split('/')
  177. tweet_author = twitterurl[3]
  178. if ( twitterurl[4] == 'status'):
  179. tweet_id = twitterurl[5]
  180. # get all tweeted statuses
  181. print(twitteruser)
  182. db.execute('SELECT srcpost FROM posts where srcuser = ?', (twitteruser,))
  183. postedtweets = []
  184. for postedtweet in db.fetchall():
  185. postedtweets.append(postedtweet[0].split('/')[-1])
  186. # check if already tweeted
  187. if tweet_id in postedtweets:
  188. print('Already posted the Tweet: ', j['source'])
  189. else:
  190. # collect information about images
  191. pics = []
  192. accepted_filetypes = ('.jpg', '.jpeg', '.png', '.webm', '.JPG', '.JPEG', '.PNG', '.WEBM') # let's don't do mp4 for now.
  193. if (isinstance(j['source'], str) and j['source'].endswith(accepted_filetypes) ):
  194. pics.append(j['source'])
  195. elif ( 'url' in j and isinstance(j['url'], str) and j['url'].endswith(accepted_filetypes) ):
  196. pics.append(j['url'])
  197. # get the images and post them to mastadon ...
  198. toot_media = []
  199. for pic in pics:
  200. if (not dryrun):
  201. media = requests.get(pic)
  202. print(pic, ' has mimetype ', media.headers.get('content-type'))
  203. media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
  204. toot_media.append(media_posted['id'])
  205. else:
  206. print('Dryrun: not fetching ', pic, ' and not uploading it to mastodon')
  207. poster = p.title.split(']')[0].strip('[')
  208. poster_text = "\n(via %s on soup.io)" % poster
  209. # remove all html stuff - python module in use only supports markdown, not pure plaintext
  210. textsrc = h.handle(p.summary_detail.value.replace("<small>", "<br><small>"))
  211. # free text from lines without visible characters
  212. cleantextsrc = ''
  213. for line in textsrc.split('\n'):
  214. line = line.strip()
  215. cleantextsrc += line + '\n'
  216. # strip newlines, reduce newlines, remove markdown bold (i know, ugly), do some clean up
  217. text = cleantextsrc.strip('\n').replace('\n\n\n','\n\n').replace('**','').replace('\\--','')
  218. # link directly to source or use soup as source.
  219. if (isinstance(j['source'], str) and j['source'] not in text):
  220. source = '\n\nSource: ' + j['source']
  221. else:
  222. source = '\n\nSource: ' + p.link
  223. # shorten text if too long
  224. maximumlegth = 500 - 1 - len(poster_text) - len(source) - 50 # 50 ... just in case (if they also count attachement url and so on)
  225. text = (text[:maximumlegth] + '…') if len(text) > maximumlegth else text
  226. # add source
  227. text += source
  228. # add soup poster
  229. text += poster_text
  230. print(text)
  231. if (not dryrun):
  232. # post toot
  233. toot = mastodon_api.status_post(text, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility=config['sources']['soup']['visibility'], spoiler_text=None)
  234. # add entry to database
  235. if "id" in toot:
  236. db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, soupuser, toot.id, mastuser, mastinstance))
  237. sql.commit()
  238. print( '--> ', p.id, ' posted!')
  239. else:
  240. print('Dryrun: not posting toot and not adding it to database')
  241. print('waiting %s seconds ...' % delay)
  242. time.sleep(delay)
  243. print('------------------------')