From 129df366edca97be6b09b2f27caa72e4c6399dd8 Mon Sep 17 00:00:00 2001 From: Ching Date: Mon, 17 Jul 2023 11:49:05 +0800 Subject: [PATCH] =?UTF-8?q?feat(crawler):=20=E5=A2=9E=E5=8A=A0=20logger?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E6=94=B9=E5=8F=91=E9=80=81=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加 logger,修改发送逻辑 Signed-off-by: Ching --- crawler.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/crawler.py b/crawler.py index 4594197..00b2c45 100644 --- a/crawler.py +++ b/crawler.py @@ -6,18 +6,22 @@ from bs4 import BeautifulSoup import re import redis import json +import logging from mastodon import Mastodon +logging.basicConfig(filename='/root/develop/dev/log/chh-craler.log', level=logging.INFO) +logger = logging.getLogger('/root/develop/dev/log/chh-craler.log') + # connect to redis with password redis_db = redis.StrictRedis(host="localhost", port=6379, db=0, password='s7LkRNuaLxST5e') mastodon_client = Mastodon( - access_token = '', - api_base_url = '' + access_token = '8LIqGXmerhP8QABT3ppe-1HDATfbmJ-8iDY1_QxNkjk', + api_base_url = 'https://nofan.xyz/' ) @@ -118,6 +122,11 @@ def toot(): # get the article info from redis for key in keys: article = json.loads(redis_db.get(key)) + # get send article id from redis set 'send-chh-article-id' + # if the article['id'] is in the set, skip it + if redis_db.sismember('send-chh-article-id', article['article_id']): + continue + # upload article image to mastodon media = mastodon_client.media_post(article['img_url']) # toot the article info @@ -131,10 +140,10 @@ def toot(): mastodon_client.toot(toot_content, media_ids=[media['id']]) - + # add the article['id'] to the set + redis_db.sadd('send-chh-article-id', article['article_id']) + break if __name__ == '__main__': crawler() - - -article = {'img_url': 'https://static.chiphell.com/portal/202307/16/080741btbodlblx8nwzn74.jpg', 'title': '一点小收藏—AP皇家橡树', 'author': '幼月', 'date': '2023/07/16', 'category': '腕表', 'content': '\n又是我胡汉三,最近不知道怎么捅了腕表品牌的窝了。。三天两头给我塞东西。。所以作业按照混乱发!\r\n没有文笔,只有碎碎念。\r\n\r\n\r\nROYAL OAK 15551OR\r\n\r\n女王的第一块AP,最早许愿是蓝盘但是到的是白盘,不过白盘也非常美 ...', 'url': 'https://www.chiphell.com/article-30010-1.html', 'article_id': '30010'} + toot()