From 67a00914ef0be25844d7cfaf27f807fd5ae16c13 Mon Sep 17 00:00:00 2001 From: Ching Date: Thu, 11 Mar 2021 10:12:49 +0800 Subject: [PATCH] add jandan / setu bot --- jandan_toot.py | 135 +++++++++++++++++++++++++++++++++++++++++++++++ setu.py | 140 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 jandan_toot.py create mode 100644 setu.py diff --git a/jandan_toot.py b/jandan_toot.py new file mode 100644 index 0000000..44c99fb --- /dev/null +++ b/jandan_toot.py @@ -0,0 +1,135 @@ +import os +import random +import time + +from bs4 import BeautifulSoup +import requests +import logging +from mastodon import Mastodon +import traceback + +logging.basicConfig(filename='jandan.log', level=logging.INFO) +logger = logging.getLogger('jandan.log') + +mastodon = Mastodon( + access_token = 'TFBqF_7eLHP1Lr8zqegAnKUBT3mq1_qU2zHOn7aX5sU', + api_base_url = 'https://botsin.space' +) + +# mastodon = Mastodon( +# access_token = 'd357sMhxQ7GoIbxt3qSGr9YbSx1tVVf9sggA2pVOoMA', +# api_base_url = 'https://o3o.ca' +# ) +def ramdom_ua(): + ver1 = str(random.randrange(2, 6)) + ver2 = str(random.randrange(0, 10)) + ver3 = str(random.randrange(0, 10)) + ver = ver1 + '.' + ver2 + '.' + ver3 + ran = random.randrange(0,10) + types = ['Galaxy S4','Galaxy S5','Galaxy S6','Galaxy S7','MI 4LTE','HM NOTE 1LTE','Sony Xperia Z','Sony Xperia Z1','Sony Xperia Z2','Sony Xperia Z3'] + phone_type = types[ran] + ua = {'User-Agent': 'Mozilla/5.0 (Linux; Android %s; %s Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19' % (ver,phone_type)} + return ua + +def load_id(): + id_list = [] + with open('id_list') as f: + id_list = f.read().splitlines() + f.close() + return id_list + +def save_id(id_list): + with open('id_list', 'wb') as f: + for item in id_list: + f.write(("%s\n" % item).encode()) + f.close() +session = requests.session() + + +def toot(): + jandan_pic_url = 'https://jandan.net/pic' + page_count = 2 + status_list = [] + id_list = load_id() + if not id_list: + id_list = [] + + session.headers.update(ramdom_ua()) + + while page_count and jandan_pic_url: + # import ipdb; ipdb.set_trace() + resp = session.get(jandan_pic_url) + if resp.status_code != 200: + time.sleep(10) + continue + soup = BeautifulSoup(resp.content, 'html.parser') + jandan_pic_url = soup.find(class_='previous-comment-page')['href'] + page_count -= 1 + if jandan_pic_url.startswith('//'): + jandan_pic_url = 'http:' + jandan_pic_url + + pic_nodes = soup.find('ol').find_all('li') + + for node in pic_nodes: + try: + id_ = node.attrs.get('id') + if not id_: + continue + if id_ in id_list: + continue + text = node.p.get_text() + img_set = node.find_all('a',class_="view_img_link") + img_list = [] + for img in img_set: + img_list.append(img.get('href')) + status = text.split('[查看原图]')[0] + status += '#煎蛋无聊图' + status_list.append({'id':id_, 'status':status, 'img':img_list}) + except: + pass + + status_list.reverse() + for status in status_list: + if status['id'] not in id_list: + id_list.insert(0, status['id']) + if len(id_list) > 100: + id_list = id_list[:100] + save_id(id_list) + media_ids = [] + for img in status['img']: + if img.startswith('//'): + img = 'https:' + img + mime_type = 'image/jpeg' + if img.lower().split('.')[-1] == 'gif': + mime_type = 'image/gif' + r = session.get(img) + logger.info((status['id'], status['status'], img, mime_type)) + try: + toot_resp = mastodon.media_post(r.content, mime_type) + except: + continue + if toot_resp.get('id'): + media_ids.append(toot_resp['id']) + if not media_ids: + continue + if len(media_ids) <= 4: + toot_resp = mastodon.status_post(status['status'], media_ids=media_ids) + time.sleep(5) + else: + total = len(media_ids) // 4 + if len(media_ids) % 4: + total += 1 + for x in range(total): + text = '(%s/%s) %s' % ((x+1), total, status['status']) + toot_resp = mastodon.status_post(text, media_ids=media_ids[x*4:(x+1)*4]) + time.sleep(5) + +while True: + try: + logger.info('%s !!!! start', time.asctime()) + toot() + time.sleep(20) + except Exception as ex: + logger.error(traceback.format_exc()) + time.sleep(10) + diff --git a/setu.py b/setu.py new file mode 100644 index 0000000..54f4f97 --- /dev/null +++ b/setu.py @@ -0,0 +1,140 @@ +import os +import random +import time + +from bs4 import BeautifulSoup +import requests +from mastodon import Mastodon +from moviepy.editor import VideoFileClip +from urllib.parse import urlparse +import logging +import traceback + +logging.basicConfig(filename='/home/captain/dev/log/setu.log', level=logging.INFO) +logger = logging.getLogger('/home/captain/dev/log/setu.log') +mastodon = Mastodon( + access_token = 'iQHxw2fdVO92q73gg2w9yTpMj0inybgOq5ezR7thffU', + api_base_url = 'https://botsin.space' +) +account_name = '@setu' + +# mastodon = Mastodon( +# access_token = 'd357sMhxQ7GoIbxt3qSGr9YbSx1tVVf9sggA2pVOoMA', +# api_base_url = 'https://o3o.ca' +# ) +# account_name = '@kedai' + + +tvdl_url = 'https://tvdl-api.saif.dev/' + +def load_id(): + id_ = None + with open('/home/captain/dev/setu_dm') as f: + id_ = f.read() + f.close() + return id_ + +def save_id(id_): + with open('/home/captain/dev/setu_dm', 'wb') as f: + f.write(("%s\n" % id_).encode()) + f.close() + + +def post_mentions(): + min_id = load_id() + logger.info('!!! %s get %s', time.asctime(), min_id) + mentions = mastodon.notifications(mentions_only=True, min_id=min_id) + mentions.reverse() + for mention in mentions: + if 'status' not in mention: + continue + soup = BeautifulSoup(mention['status']['content'], 'html.parser') + if not (mention['type'] == 'mention' and mention['status']['visibility'] == 'direct'): + save_id(mention['id']) + continue + if not mention['status']['media_attachments']: + content = soup.get_text().split(account_name)[-1].strip() + media_ids = [] + if content.startswith('http'): + logging.info('get url %s', content) + try: + url = urlparse(content) + url_ = url.geturl() + if 'twitter.com' in url_: + data = { + 'url': url_, + 'ver': 1306 + } + r_ = requests.post(tvdl_url, data=data) + if r_.status_code != 200: + raise ValueError + resp_data = r_.json() + url_ = resp_data.get('high', {}).get('downloadURL') + r = requests.get(url_) + mime_type = 'image/jpeg' + url = urlparse(url_) + ext = url.path.split('.')[-1] + if ext.lower() in ['jpg', 'jpeg', 'gif']: + if ext == 'gif': + mime_type = 'image/gif' + toot_resp = mastodon.media_post(r.content, mime_type) + else: + f = open('/home/captain/dev/temp_download_file.%s' % ext, 'wb') + f.write(r.content) + f.close() + mime_type = None + toot_resp = mastodon.media_post('/home/captain/dev/temp_download_file.%s' % ext, mime_type) + except: + continue + if toot_resp.get('id'): + media_ids.append(toot_resp['id']) + mastodon.status_post('🔞', media_ids=media_ids, sensitive=True) + save_id(mention['id']) + continue + + media_ids = [] + for media in mention['status']['media_attachments']: + mime_type = 'image/jpeg' + if media['type'] == 'video': + resp = requests.get(media['url']) + if resp.status_code != 200: + continue + f = open('/home/captain/dev/temp_video.mp4', 'wb') + f.write(resp.content) + f.close() + try: + toot_resp = mastodon.media_post('/home/captain/dev/temp_video.mp4', mime_type) + except: + try: + clip = VideoFileClip('/home/captain/dev/temp_video.mp4') + clip.write_gif('/home/captain/dev/temp_gif.gif') + mime_type = 'image/gif' + toot_resp = mastodon.media_post('/home/captain/dev/temp_gif.gif', mime_type) + except: + continue + if toot_resp.get('id'): + media_ids.append(toot_resp['id']) + continue + r = requests.get(media['url']) + try: + toot_resp = mastodon.media_post(r.content, mime_type) + except: + continue + if toot_resp.get('id'): + media_ids.append(toot_resp['id']) + soup = BeautifulSoup(mention['status']['content'], 'html.parser') + logger.info('posting %s, %s, %s', mention['id'], soup.get_text(), mention['status']['media_attachments']) + status = soup.get_text().split(account_name)[-1] + if not status: + status = '🔞' + if media_ids: + mastodon.status_post(status, media_ids=media_ids, sensitive=True) + save_id(mention['id']) + +while True: + try: + post_mentions() + time.sleep(10) + except Exception as ex: + logger.error(traceback.format_exc()) + time.sleep(10)