import os import random import time from bs4 import BeautifulSoup import requests import logging from mastodon import Mastodon import traceback logging.basicConfig(filename='/home/captain/dev/log/jandan.log', level=logging.INFO) logger = logging.getLogger('/home/captain/dev/log/jandan.log') mastodon = Mastodon( access_token = 'TFBqF_7eLHP1Lr8zqegAnKUBT3mq1_qU2zHOn7aX5sU', api_base_url = 'https://botsin.space' ) # mastodon = Mastodon( # access_token = 'd357sMhxQ7GoIbxt3qSGr9YbSx1tVVf9sggA2pVOoMA', # api_base_url = 'https://o3o.ca' # ) def ramdom_ua(): ver1 = str(random.randrange(2, 6)) ver2 = str(random.randrange(0, 10)) ver3 = str(random.randrange(0, 10)) ver = ver1 + '.' + ver2 + '.' + ver3 ran = random.randrange(0,10) types = ['Galaxy S4','Galaxy S5','Galaxy S6','Galaxy S7','MI 4LTE','HM NOTE 1LTE','Sony Xperia Z','Sony Xperia Z1','Sony Xperia Z2','Sony Xperia Z3'] phone_type = types[ran] ua = {'User-Agent': 'Mozilla/5.0 (Linux; Android %s; %s Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19' % (ver,phone_type)} return ua def load_id(): id_list = [] with open('/home/captain/dev/id_list') as f: id_list = f.read().splitlines() f.close() return id_list def save_id(id_list): with open('/home/captain/dev/id_list', 'wb') as f: for item in id_list: f.write(("%s\n" % item).encode()) f.close() session = requests.session() def toot(): jandan_pic_url = 'https://jandan.net/pic' page_count = 2 status_list = [] id_list = load_id() if not id_list: id_list = [] session.headers.update(ramdom_ua()) while page_count and jandan_pic_url: resp = session.get(jandan_pic_url) if resp.status_code != 200: time.sleep(10) continue soup = BeautifulSoup(resp.content, 'html.parser') jandan_pic_url = soup.find(class_='previous-comment-page')['href'] page_count -= 1 if jandan_pic_url.startswith('//'): jandan_pic_url = 'http:' + jandan_pic_url pic_nodes = soup.find('ol').find_all('li') for node in pic_nodes: try: id_ = node.attrs.get('id') if not id_: continue if id_ in id_list: continue text = node.p.get_text() img_set = node.find_all('a',class_="view_img_link") img_list = [] for img in img_set: img_list.append(img.get('href')) status = text.split('[查看原图]')[0] status += '#煎蛋无聊图' status_list.append({'id':id_, 'status':status, 'img':img_list}) except: pass status_list.reverse() for status in status_list: #import ipdb; ipdb.set_trace() if status['id'] not in id_list: id_list.insert(0, status['id']) if len(id_list) > 100: id_list = id_list[:100] save_id(id_list) media_ids = [] for img in status['img']: if img.startswith('//'): img = 'https:' + img mime_type = 'image/jpeg' if img.lower().split('.')[-1] == 'gif': mime_type = 'image/gif' resp = session.get(img) logger.info((status['id'], status['status'], img, mime_type)) try: toot_resp = mastodon.media_post(resp.content, mime_type) except: continue if toot_resp.get('id'): media_ids.append(toot_resp['id']) if not media_ids: continue if len(media_ids) <= 4: toot_resp = mastodon.status_post(status['status'], media_ids=media_ids) time.sleep(5) else: total = len(media_ids) // 4 if len(media_ids) % 4: total += 1 for x in range(total): text = '(%s/%s) %s' % ((x+1), total, status['status']) toot_resp = mastodon.status_post(text, media_ids=media_ids[x*4:(x+1)*4]) time.sleep(5) while True: try: logger.info('%s !!!! start', time.asctime()) toot() time.sleep(20) except Exception as ex: logger.error(traceback.format_exc()) time.sleep(10)