refactor: 修改获取短链逻辑
This commit is contained in:
parent
a4c7f76216
commit
c5bf60858c
12
crawler.py
12
crawler.py
@ -6,18 +6,19 @@ from bs4 import BeautifulSoup
|
|||||||
import re
|
import re
|
||||||
import redis
|
import redis
|
||||||
import json
|
import json
|
||||||
import logging
|
from loguru import logger
|
||||||
|
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
|
|
||||||
|
|
||||||
logging.basicConfig(filename='/root/develop/dev/log/chh-craler.log', level=logging.INFO)
|
# logging.basicConfig(filename='/root/develop/log/chh-craler.log', level=logging.INFO)
|
||||||
logger = logging.getLogger('/root/develop/dev/log/chh-craler.log')
|
# logger = logging.getLogger('/root/develop/log/chh-craler.log')
|
||||||
|
logger.add('/root/develop/log/chh-craler.log', level='INFO')
|
||||||
|
|
||||||
|
|
||||||
# connect to redis with password
|
# connect to redis with password
|
||||||
redis_db = redis.StrictRedis(host="localhost",
|
redis_db = redis.StrictRedis(host="localhost",
|
||||||
port=6379, db=0, password='s7LkRNuaLxST5e')
|
port=6379, db=0)
|
||||||
|
|
||||||
mastodon_client = Mastodon(
|
mastodon_client = Mastodon(
|
||||||
access_token = '8LIqGXmerhP8QABT3ppe-1HDATfbmJ-8iDY1_QxNkjk',
|
access_token = '8LIqGXmerhP8QABT3ppe-1HDATfbmJ-8iDY1_QxNkjk',
|
||||||
@ -28,6 +29,7 @@ mastodon_client = Mastodon(
|
|||||||
def save_to_redis(article):
|
def save_to_redis(article):
|
||||||
key = 'chh-article:%s' % article['article_id']
|
key = 'chh-article:%s' % article['article_id']
|
||||||
if not redis_db.get(key):
|
if not redis_db.get(key):
|
||||||
|
article['url'] = url_shorten(article['url'])
|
||||||
redis_db.set(key, json.dumps(article), ex=3600*24*7)
|
redis_db.set(key, json.dumps(article), ex=3600*24*7)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -120,7 +122,7 @@ def crawler():
|
|||||||
'date': date,
|
'date': date,
|
||||||
'category': category,
|
'category': category,
|
||||||
'content': content,
|
'content': content,
|
||||||
'url': url_shorten(url),
|
'url': url,
|
||||||
'article_id': article_id
|
'article_id': article_id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user