diff --git a/crawler.py b/crawler.py index 6e1f22b..6899733 100644 --- a/crawler.py +++ b/crawler.py @@ -6,18 +6,19 @@ from bs4 import BeautifulSoup import re import redis import json -import logging +from loguru import logger from mastodon import Mastodon -logging.basicConfig(filename='/root/develop/dev/log/chh-craler.log', level=logging.INFO) -logger = logging.getLogger('/root/develop/dev/log/chh-craler.log') +# logging.basicConfig(filename='/root/develop/log/chh-craler.log', level=logging.INFO) +# logger = logging.getLogger('/root/develop/log/chh-craler.log') +logger.add('/root/develop/log/chh-craler.log', level='INFO') # connect to redis with password redis_db = redis.StrictRedis(host="localhost", - port=6379, db=0, password='s7LkRNuaLxST5e') + port=6379, db=0) mastodon_client = Mastodon( access_token = '8LIqGXmerhP8QABT3ppe-1HDATfbmJ-8iDY1_QxNkjk', @@ -28,6 +29,7 @@ mastodon_client = Mastodon( def save_to_redis(article): key = 'chh-article:%s' % article['article_id'] if not redis_db.get(key): + article['url'] = url_shorten(article['url']) redis_db.set(key, json.dumps(article), ex=3600*24*7) return True @@ -120,7 +122,7 @@ def crawler(): 'date': date, 'category': category, 'content': content, - 'url': url_shorten(url), + 'url': url, 'article_id': article_id }