155 lines
4.6 KiB
Python
155 lines
4.6 KiB
Python
import os
|
|
import random
|
|
import time
|
|
|
|
from requests.api import get
|
|
|
|
from bs4 import BeautifulSoup
|
|
import requests
|
|
import logging
|
|
from mastodon import Mastodon
|
|
import traceback
|
|
import re
|
|
|
|
logging.basicConfig(filename='/home/captain/dev/log/jandan.log', level=logging.INFO)
|
|
logger = logging.getLogger('/home/captain/dev/log/jandan.log')
|
|
|
|
mastodon = Mastodon(
|
|
access_token = 'TFBqF_7eLHP1Lr8zqegAnKUBT3mq1_qU2zHOn7aX5sU',
|
|
api_base_url = 'https://botsin.space'
|
|
)
|
|
|
|
# mastodon = Mastodon(
|
|
# access_token = 'd357sMhxQ7GoIbxt3qSGr9YbSx1tVVf9sggA2pVOoMA',
|
|
# api_base_url = 'https://o3o.ca'
|
|
# )
|
|
def ramdom_ua():
|
|
ver1 = str(random.randrange(2, 6))
|
|
ver2 = str(random.randrange(0, 10))
|
|
ver3 = str(random.randrange(0, 10))
|
|
ver = ver1 + '.' + ver2 + '.' + ver3
|
|
ran = random.randrange(0,10)
|
|
types = ['Galaxy S4','Galaxy S5','Galaxy S6','Galaxy S7','MI 4LTE','HM NOTE 1LTE','Sony Xperia Z','Sony Xperia Z1','Sony Xperia Z2','Sony Xperia Z3']
|
|
phone_type = types[ran]
|
|
ua = {'User-Agent': 'Mozilla/5.0 (Linux; Android %s; %s Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19' % (ver,phone_type)}
|
|
return ua
|
|
|
|
def load_id():
|
|
id_list = []
|
|
with open('/home/captain/dev/tmp/id_list') as f:
|
|
id_list = f.read().splitlines()
|
|
f.close()
|
|
return id_list
|
|
|
|
def save_id(id_list):
|
|
with open('/home/captain/dev/tmp/id_list', 'wb') as f:
|
|
for item in id_list:
|
|
f.write(("%s\n" % item).encode())
|
|
f.close()
|
|
session = requests.session()
|
|
|
|
|
|
def get_proxy():
|
|
return requests.get("http://118.24.52.95:5010/get/?type=https").json()
|
|
|
|
usable_proxy = None
|
|
|
|
def toot(proxy=None):
|
|
jandan_pic_url = 'https://jandan.net/pic'
|
|
page_count = 2
|
|
status_list = []
|
|
id_list = load_id()
|
|
global usable_proxy
|
|
if not id_list:
|
|
id_list = []
|
|
|
|
session.headers.update(ramdom_ua())
|
|
if proxy:
|
|
logger.info('~~~ proxy %s, usable proxy %s', proxy, usable_proxy)
|
|
session.proxies.update({"https": "https://{}".format(proxy)})
|
|
|
|
while page_count and jandan_pic_url:
|
|
resp = session.get(jandan_pic_url, timeout=2)
|
|
if resp.status_code != 200:
|
|
usable_proxy = None
|
|
if resp.status_code == 403:
|
|
raise ValueError('http status code 403')
|
|
time.sleep(10)
|
|
continue
|
|
usable_proxy = proxy
|
|
soup = BeautifulSoup(resp.content, 'html.parser')
|
|
jandan_pic_url = soup.find(class_='previous-comment-page')['href']
|
|
page_count -= 1
|
|
if jandan_pic_url.startswith('//'):
|
|
jandan_pic_url = 'https:' + jandan_pic_url
|
|
|
|
pic_nodes = soup.find('ol').find_all('li')
|
|
|
|
for node in pic_nodes:
|
|
try:
|
|
id_ = node.attrs.get('id')
|
|
if not id_:
|
|
continue
|
|
if id_ in id_list:
|
|
continue
|
|
text = node.p.get_text()
|
|
img_set = node.find_all('a',class_="view_img_link")
|
|
img_list = []
|
|
for img in img_set:
|
|
img_list.append(img.get('href'))
|
|
status = text.split('[查看原图]')[0]
|
|
status += '#煎蛋无聊图'
|
|
status_list.append({'id':id_, 'status':status, 'img':img_list})
|
|
except:
|
|
pass
|
|
|
|
status_list.reverse()
|
|
for status in status_list:
|
|
#import ipdb; ipdb.set_trace()
|
|
if status['id'] not in id_list:
|
|
id_list.insert(0, status['id'])
|
|
if len(id_list) > 100:
|
|
id_list = id_list[:100]
|
|
save_id(id_list)
|
|
media_ids = []
|
|
for img in status['img']:
|
|
if img.startswith('//'):
|
|
img = 'https:' + img
|
|
mime_type = 'image/jpeg'
|
|
if img.lower().split('.')[-1] == 'gif':
|
|
mime_type = 'image/gif'
|
|
resp = session.get(img)
|
|
logger.info((status['id'], status['status'], img, mime_type))
|
|
try:
|
|
toot_resp = mastodon.media_post(resp.content, mime_type)
|
|
except:
|
|
continue
|
|
if toot_resp.get('id'):
|
|
media_ids.append(toot_resp['id'])
|
|
if not media_ids:
|
|
continue
|
|
if len(media_ids) <= 4:
|
|
toot_resp = mastodon.status_post(status['status'], media_ids=media_ids)
|
|
time.sleep(5)
|
|
else:
|
|
total = len(media_ids) // 4
|
|
if len(media_ids) % 4:
|
|
total += 1
|
|
for x in range(total):
|
|
text = '(%s/%s) %s' % ((x+1), total, status['status'])
|
|
toot_resp = mastodon.status_post(text, media_ids=media_ids[x*4:(x+1)*4])
|
|
time.sleep(5)
|
|
|
|
while True:
|
|
try:
|
|
logger.info('%s !!!! start', time.asctime())
|
|
if not usable_proxy:
|
|
proxy = get_proxy()
|
|
usable_proxy = proxy.get('proxy')
|
|
toot(proxy=usable_proxy)
|
|
time.sleep(20)
|
|
except Exception as ex:
|
|
logger.error('!!! %s, %s', time.asctime(), traceback.format_exc())
|
|
time.sleep(10)
|
|
|