toot_bot/jandan_toot.py
2021-06-11 11:05:11 +08:00

144 lines
4.2 KiB
Python

import os
import random
import time
from requests.api import get
from bs4 import BeautifulSoup
import requests
import logging
from mastodon import Mastodon
import traceback
import re
logging.basicConfig(filename='/home/captain/dev/log/jandan.log', level=logging.INFO)
logger = logging.getLogger('/home/captain/dev/log/jandan.log')
mastodon = Mastodon(
access_token = 'TFBqF_7eLHP1Lr8zqegAnKUBT3mq1_qU2zHOn7aX5sU',
api_base_url = 'https://botsin.space'
)
# mastodon = Mastodon(
# access_token = 'd357sMhxQ7GoIbxt3qSGr9YbSx1tVVf9sggA2pVOoMA',
# api_base_url = 'https://o3o.ca'
# )
def ramdom_ua():
ver1 = str(random.randrange(2, 6))
ver2 = str(random.randrange(0, 10))
ver3 = str(random.randrange(0, 10))
ver = ver1 + '.' + ver2 + '.' + ver3
ran = random.randrange(0,10)
types = ['Galaxy S4','Galaxy S5','Galaxy S6','Galaxy S7','MI 4LTE','HM NOTE 1LTE','Sony Xperia Z','Sony Xperia Z1','Sony Xperia Z2','Sony Xperia Z3']
phone_type = types[ran]
ua = {'User-Agent': 'Mozilla/5.0 (Linux; Android %s; %s Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19' % (ver,phone_type)}
return ua
def load_id():
id_list = []
with open('/home/captain/dev/tmp/id_list') as f:
id_list = f.read().splitlines()
f.close()
return id_list
def save_id(id_list):
with open('/home/captain/dev/tmp/id_list', 'wb') as f:
for item in id_list:
f.write(("%s\n" % item).encode())
f.close()
session = requests.session()
def get_proxy():
return requests.get("http://118.24.52.95:5010/get/?type=https").json()
def toot(proxy=None):
jandan_pic_url = 'https://jandan.net/pic'
page_count = 2
status_list = []
id_list = load_id()
if not id_list:
id_list = []
session.headers.update(ramdom_ua())
session.proxies.update({"https": "https://{}".format(proxy)})
while page_count and jandan_pic_url:
resp = session.get(jandan_pic_url, timeout=2)
if resp.status_code != 200:
time.sleep(10)
continue
soup = BeautifulSoup(resp.content, 'html.parser')
jandan_pic_url = soup.find(class_='previous-comment-page')['href']
page_count -= 1
if jandan_pic_url.startswith('//'):
jandan_pic_url = 'https:' + jandan_pic_url
pic_nodes = soup.find('ol').find_all('li')
for node in pic_nodes:
try:
id_ = node.attrs.get('id')
if not id_:
continue
if id_ in id_list:
continue
text = node.p.get_text()
img_set = node.find_all('a',class_="view_img_link")
img_list = []
for img in img_set:
img_list.append(img.get('href'))
status = text.split('[查看原图]')[0]
status += '#煎蛋无聊图'
status_list.append({'id':id_, 'status':status, 'img':img_list})
except:
pass
status_list.reverse()
for status in status_list:
#import ipdb; ipdb.set_trace()
if status['id'] not in id_list:
id_list.insert(0, status['id'])
if len(id_list) > 100:
id_list = id_list[:100]
save_id(id_list)
media_ids = []
for img in status['img']:
if img.startswith('//'):
img = 'https:' + img
mime_type = 'image/jpeg'
if img.lower().split('.')[-1] == 'gif':
mime_type = 'image/gif'
resp = session.get(img)
logger.info((status['id'], status['status'], img, mime_type))
try:
toot_resp = mastodon.media_post(resp.content, mime_type)
except:
continue
if toot_resp.get('id'):
media_ids.append(toot_resp['id'])
if not media_ids:
continue
if len(media_ids) <= 4:
toot_resp = mastodon.status_post(status['status'], media_ids=media_ids)
time.sleep(5)
else:
total = len(media_ids) // 4
if len(media_ids) % 4:
total += 1
for x in range(total):
text = '(%s/%s) %s' % ((x+1), total, status['status'])
toot_resp = mastodon.status_post(text, media_ids=media_ids[x*4:(x+1)*4])
time.sleep(5)
while True:
try:
logger.info('%s !!!! start', time.asctime())
proxy = get_proxy()
toot(proxy=proxy['proxy'])
time.sleep(20)
except Exception as ex:
logger.error(traceback.format_exc())
time.sleep(10)