You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

272 lines
9.8 KiB

  1. import os.path
  2. import sys
  3. import feedparser
  4. from mastodon import Mastodon
  5. import json
  6. import requests
  7. import re
  8. import sqlite3
  9. import html2text
  10. from datetime import datetime, date, time, timedelta
  11. # default config location is a 'config.json' next to the script.
  12. try:
  13. filedir = os.path.dirname(os.path.abspath(__file__))
  14. if len(sys.argv) < 2:
  15. print("Using default config location: %s/config.json" % filedir)
  16. config = json.load(open(filedir+'/config.json'))
  17. else:
  18. config = json.load(open(sys.argv[1]))
  19. except:
  20. print("ERROR: Config file not found!")
  21. sys.exit(1)
  22. mastinstance = config['mastodon']['instance']
  23. mastuser = config['mastodon']['user']
  24. mastpasswd = config['mastodon']['password']
  25. twitter = config['sources']['twitter']
  26. soup = config['sources']['soup']
  27. dryrun = config['settings']['dryrun']
  28. days = config['settings']['days']
  29. # sqlite db to store processed tweets (and corresponding toots ids)
  30. sql = sqlite3.connect(config['settings']['databasefilepath'])
  31. db = sql.cursor()
  32. db.execute('''CREATE TABLE IF NOT EXISTS posts (srcpost text, srcuser text, mastpost text, mastuser text, mastinstance text)''')
  33. mastodon_api = None
  34. def register_app(mastuser,mastpasswd,mastinstance,mastodon_api):
  35. if mastodon_api is None:
  36. if not os.path.isfile(mastinstance+'.secret'):
  37. if Mastodon.create_app(
  38. 'metasyndicator',
  39. api_base_url='https://'+mastinstance,
  40. to_file = mastinstance+'.secret'
  41. ):
  42. print('app created on instance '+mastinstance)
  43. else:
  44. print('failed to create app on instance '+mastinstance)
  45. sys.exit(1)
  46. try:
  47. mastodon_api = Mastodon(
  48. client_id=mastinstance+'.secret',
  49. api_base_url='https://'+mastinstance
  50. )
  51. mastodon_api.log_in(
  52. username=mastuser,
  53. password=mastpasswd,
  54. scopes=['read', 'write'],
  55. to_file=mastuser+".secret"
  56. )
  57. return mastodon_api
  58. except:
  59. print("ERROR: First Login Failed!")
  60. sys.exit(1)
  61. # twitter section
  62. print('====== TWITTER ======')
  63. t = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+twitter)
  64. for p in reversed(t.entries):
  65. # check if this tweet has been processed
  66. db.execute(
  67. 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?',
  68. (p.id, twitter, mastuser, mastinstance)
  69. )
  70. last = db.fetchone()
  71. shouldpost = True
  72. posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
  73. if last is not None:
  74. shouldpost = False
  75. print("skip: already posted")
  76. # process only unprocessed tweets less than n days old
  77. if datetime.now() - posttime > timedelta(days=days):
  78. shouldpost = False
  79. print("skip: Post too old")
  80. # kill tweets with fb links with fire!
  81. if "https://www.facebook.com" in p.title or "https://m.facebook.com" in p.title:
  82. shouldpost = False
  83. print("skip: a Tweet that links to facebook? ... That's too much.")
  84. if shouldpost:
  85. print(posttime)
  86. # Create application if it does not exist
  87. mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)
  88. c = p.title
  89. if p.author != '(%s)' % twitter:
  90. c = ("RT %s\n" % p.author[1:-1]) + c
  91. toot_media = []
  92. # get the pictures...
  93. for pic in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", p.summary):
  94. if (not dryrun):
  95. media = requests.get(pic.group(0))
  96. media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
  97. toot_media.append(media_posted['id'])
  98. media = None
  99. else:
  100. print('Dryrun: not fetching ', pic.group(0), ' and not uploading it to mastodon')
  101. # replace t.co link by original URL
  102. m = re.search(r"http[^ \xa0]*", c)
  103. if m != None:
  104. l = m.group(0)
  105. r = requests.get(l, allow_redirects=False)
  106. if r.status_code in {301,302}:
  107. c = c.replace(l,r.headers.get('Location'))
  108. # remove pic.twitter.com links
  109. m = re.search(r"pic.twitter.com[^ \xa0]*", c)
  110. if m != None:
  111. l = m.group(0)
  112. c = c.replace(l,' ')
  113. # remove ellipsis
  114. c = c.replace('\xa0…',' ')
  115. print(c)
  116. if (not dryrun):
  117. toot = mastodon_api.status_post(c, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='unlisted', spoiler_text=None)
  118. print( '--> toot posted!')
  119. try:
  120. db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, twitter, toot.id, mastuser, mastinstance))
  121. sql.commit()
  122. except:
  123. print('database execution failed.')
  124. print('p.id: ', p.id)
  125. print('toot.id: ', toot.id)
  126. else:
  127. print('Dryrun: not posting toot and not adding it to database')
  128. print('------------------------')
  129. # soup.io section
  130. print('====== SOUP ======')
  131. h = html2text.HTML2Text()
  132. h.ignore_links = True
  133. h.ignore_images = True
  134. h.body_width = 0
  135. s = feedparser.parse('http://'+soup+'/rss')
  136. for p in reversed(s.entries):
  137. # check if this tweet has been processed
  138. db.execute(
  139. 'SELECT * FROM posts WHERE srcpost = ? AND srcuser = ? AND mastuser = ? AND mastinstance = ?',
  140. (p.id, soup, mastuser, mastinstance)
  141. )
  142. last = db.fetchone()
  143. # process only unprocessed post less than n days old
  144. posttime = datetime(p.published_parsed.tm_year, p.published_parsed.tm_mon, p.published_parsed.tm_mday, p.published_parsed.tm_hour, p.published_parsed.tm_min, p.published_parsed.tm_sec)
  145. if last is None and (datetime.now() - posttime < timedelta(days=days)):
  146. # Create application if it does not exist
  147. mastodon_api = register_app(mastuser, mastpasswd, mastinstance, mastodon_api)
  148. print(p.link)
  149. j = json.loads(p.soup_attributes)
  150. # get status id and user if twitter is source
  151. twitterstatus = None
  152. twitteruser = None
  153. if (isinstance(j['source'], str)):
  154. if ( j['source'].startswith('https://twitter.com/') or j['source'].startswith('https://mobile.twitter.com/')):
  155. twitterurl = j['source'].split('/')
  156. twitteruser = twitterurl[3]
  157. if ( twitterurl[4] == 'status'):
  158. twitterstatus = twitterurl[5]
  159. # get all tweeted statuses
  160. db.execute('SELECT srcpost FROM posts where srcuser = ?', (twitter,))
  161. postedtweets = []
  162. for postedtweet in db.fetchall():
  163. postedtweets.append(postedtweet[0].split('/')[-1])
  164. # check if already tweeted
  165. if twitterstatus in postedtweets:
  166. print('Already tweeted: ', j['source'])
  167. else:
  168. # collect information about images
  169. pics = []
  170. accepted_filetypes = ('.jpg', '.jpeg', '.png', '.webm', '.JPG', '.JPEG', '.PNG', '.WEBM') # let's don't do mp4 for now.
  171. if (isinstance(j['source'], str) and j['source'].endswith(accepted_filetypes) ):
  172. pics.append(j['source'])
  173. elif ( 'url' in j and isinstance(j['url'], str) and j['url'].endswith(accepted_filetypes) ):
  174. pics.append(j['url'])
  175. # get the images and post them to mastadon ...
  176. toot_media = []
  177. for pic in pics:
  178. if (not dryrun):
  179. media = requests.get(pic)
  180. print(pic, ' has mimetype ', media.headers.get('content-type'))
  181. media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
  182. toot_media.append(media_posted['id'])
  183. else:
  184. print('Dryrun: not fetching ', pic, ' and not uploading it to mastodon')
  185. # remove all html stuff - python module in use only supports markdown, not pure plaintext
  186. textsrc = h.handle(p.summary_detail.value.replace("<small>", "<br><small>"))
  187. # free text from lines without visible characters
  188. cleantextsrc = ''
  189. for line in textsrc.split('\n'):
  190. line = line.strip()
  191. cleantextsrc += line + '\n'
  192. # strip newlines, reduce newlines, remove markdown bold (i know, ugly), do some clean up
  193. text = cleantextsrc.strip('\n').replace('\n\n\n','\n\n').replace('**','').replace('\\--','')
  194. # link directly to source or use soup as source.
  195. if (isinstance(j['source'], str) and j['source'] not in text):
  196. source = '\n\nSource: ' + j['source']
  197. else:
  198. source = '\n\nSource: ' + p.link
  199. # shorten text if too long
  200. maximumlegth = 500 - 1 - len(source) - 50 # 50 ... just in case (if they also count attachement url and so on)
  201. text = (text[:maximumlegth] + '…') if len(text) > maximumlegth else text
  202. # add source
  203. text += source
  204. print(text)
  205. if (not dryrun):
  206. # post toot
  207. toot = mastodon_api.status_post(text, in_reply_to_id=None, media_ids=toot_media, sensitive=False, visibility='public', spoiler_text=None)
  208. # add entry to database
  209. if "id" in toot:
  210. db.execute("INSERT INTO posts VALUES ( ? , ? , ? , ? , ? )", (p.id, soup, toot.id, mastuser, mastinstance))
  211. sql.commit()
  212. print( '--> ', p.id, ' posted!')
  213. else:
  214. print('Dryrun: not posting toot and not adding it to database')
  215. print('------------------------')