From c5bf60858c696873dadca00d477c10ec0dcecb75 Mon Sep 17 00:00:00 2001 From: Ching Date: Wed, 17 Apr 2024 10:18:17 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20=E4=BF=AE=E6=94=B9=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E7=9F=AD=E9=93=BE=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crawler.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/crawler.py b/crawler.py index 6e1f22b..6899733 100644 --- a/crawler.py +++ b/crawler.py @@ -6,18 +6,19 @@ from bs4 import BeautifulSoup import re import redis import json -import logging +from loguru import logger from mastodon import Mastodon -logging.basicConfig(filename='/root/develop/dev/log/chh-craler.log', level=logging.INFO) -logger = logging.getLogger('/root/develop/dev/log/chh-craler.log') +# logging.basicConfig(filename='/root/develop/log/chh-craler.log', level=logging.INFO) +# logger = logging.getLogger('/root/develop/log/chh-craler.log') +logger.add('/root/develop/log/chh-craler.log', level='INFO') # connect to redis with password redis_db = redis.StrictRedis(host="localhost", - port=6379, db=0, password='s7LkRNuaLxST5e') + port=6379, db=0) mastodon_client = Mastodon( access_token = '8LIqGXmerhP8QABT3ppe-1HDATfbmJ-8iDY1_QxNkjk', @@ -28,6 +29,7 @@ mastodon_client = Mastodon( def save_to_redis(article): key = 'chh-article:%s' % article['article_id'] if not redis_db.get(key): + article['url'] = url_shorten(article['url']) redis_db.set(key, json.dumps(article), ex=3600*24*7) return True @@ -120,7 +122,7 @@ def crawler(): 'date': date, 'category': category, 'content': content, - 'url': url_shorten(url), + 'url': url, 'article_id': article_id }