feat(crawler): add cloudscraper to bypass Cloudflare protection
- Replace requests with cloudscraper for image downloading - Update log file path to use home directory logs - Add timeout parameter for image requests to prevent hanging
This commit is contained in:
parent
da1969b103
commit
3bbe483c64
@ -7,6 +7,7 @@ import re
|
|||||||
import redis
|
import redis
|
||||||
import json
|
import json
|
||||||
import feedparser
|
import feedparser
|
||||||
|
import cloudscraper
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
@ -14,8 +15,8 @@ from mastodon import Mastodon
|
|||||||
|
|
||||||
# logging.basicConfig(filename='/root/develop/log/chh-craler.log', level=logging.INFO)
|
# logging.basicConfig(filename='/root/develop/log/chh-craler.log', level=logging.INFO)
|
||||||
# logger = logging.getLogger('/root/develop/log/chh-craler.log')
|
# logger = logging.getLogger('/root/develop/log/chh-craler.log')
|
||||||
logger.add('/root/develop/log/chh-craler.log', level='INFO')
|
logger.add('/home/ching/logs/chh-craler.log', level='INFO')
|
||||||
|
scraper = cloudscraper.create_scraper()
|
||||||
|
|
||||||
# connect to redis with password
|
# connect to redis with password
|
||||||
redis_db = redis.StrictRedis(host="localhost",
|
redis_db = redis.StrictRedis(host="localhost",
|
||||||
@ -105,7 +106,8 @@ def toot():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# download article image to a temp file
|
# download article image to a temp file
|
||||||
img = requests.get(article['img_url'])
|
#img = requests.get(article['img_url'])
|
||||||
|
img = scraper.get(article['img_url'], timeout=10)
|
||||||
# upload article image to mastodon
|
# upload article image to mastodon
|
||||||
media = mastodon_client.media_post(img.content, 'image/jpeg')
|
media = mastodon_client.media_post(img.content, 'image/jpeg')
|
||||||
# toot the article info
|
# toot the article info
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user