feat(crawler): 增加 logger,修改发送逻辑

增加 logger,修改发送逻辑

Signed-off-by: Ching <loooching@gmail.com>
This commit is contained in:
Ching 2023-07-17 11:49:05 +08:00
parent 5dfbfa5c57
commit 129df366ed

View File

@ -6,18 +6,22 @@ from bs4 import BeautifulSoup
import re import re
import redis import redis
import json import json
import logging
from mastodon import Mastodon from mastodon import Mastodon
logging.basicConfig(filename='/root/develop/dev/log/chh-craler.log', level=logging.INFO)
logger = logging.getLogger('/root/develop/dev/log/chh-craler.log')
# connect to redis with password # connect to redis with password
redis_db = redis.StrictRedis(host="localhost", redis_db = redis.StrictRedis(host="localhost",
port=6379, db=0, password='s7LkRNuaLxST5e') port=6379, db=0, password='s7LkRNuaLxST5e')
mastodon_client = Mastodon( mastodon_client = Mastodon(
access_token = '', access_token = '8LIqGXmerhP8QABT3ppe-1HDATfbmJ-8iDY1_QxNkjk',
api_base_url = '' api_base_url = 'https://nofan.xyz/'
) )
@ -118,6 +122,11 @@ def toot():
# get the article info from redis # get the article info from redis
for key in keys: for key in keys:
article = json.loads(redis_db.get(key)) article = json.loads(redis_db.get(key))
# get send article id from redis set 'send-chh-article-id'
# if the article['id'] is in the set, skip it
if redis_db.sismember('send-chh-article-id', article['article_id']):
continue
# upload article image to mastodon # upload article image to mastodon
media = mastodon_client.media_post(article['img_url']) media = mastodon_client.media_post(article['img_url'])
# toot the article info # toot the article info
@ -131,10 +140,10 @@ def toot():
mastodon_client.toot(toot_content, media_ids=[media['id']]) mastodon_client.toot(toot_content, media_ids=[media['id']])
# add the article['id'] to the set
redis_db.sadd('send-chh-article-id', article['article_id'])
break
if __name__ == '__main__': if __name__ == '__main__':
crawler() crawler()
toot()
article = {'img_url': 'https://static.chiphell.com/portal/202307/16/080741btbodlblx8nwzn74.jpg', 'title': '一点小收藏—AP皇家橡树', 'author': '幼月', 'date': '2023/07/16', 'category': '腕表', 'content': '\n又是我胡汉三,最近不知道怎么捅了腕表品牌的窝了。。三天两头给我塞东西。。所以作业按照混乱发!\r\n没有文笔,只有碎碎念。\r\n\r\n\r\nROYAL OAK 15551OR\r\n\r\n女王的第一块AP最早许愿是蓝盘但是到的是白盘不过白盘也非常美 ...', 'url': 'https://www.chiphell.com/article-30010-1.html', 'article_id': '30010'}