diff --git a/crawler.py b/crawler.py
new file mode 100644
index 0000000..4594197
--- /dev/null
+++ b/crawler.py
@@ -0,0 +1,140 @@
+### a crawler for the website: https://www.chiphell.com/
+
+
+import requests
+from bs4 import BeautifulSoup
+import re
+import redis
+import json
+
+from mastodon import Mastodon
+
+
+
+# connect to redis with password
+redis_db = redis.StrictRedis(host="localhost",
+ port=6379, db=0, password='s7LkRNuaLxST5e')
+
+mastodon_client = Mastodon(
+ access_token = '',
+ api_base_url = ''
+)
+
+
+def save_to_redis(article):
+ key = 'chh-article:%s' % article['article_id']
+ if not redis_db.get(key):
+ redis_db.set(key, json.dumps(article), ex=3600*24*7)
+ return True
+
+def crawler():
+ # get article list in html div class name = "acon cl"
+ home_url = 'https://www.chiphell.com/'
+ # a normal chrome user agent
+ headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 8_6_7) AppleWebKit/534.36 (KHTML, like Gecko) Chrome/51.0.2863.218 Safari/601'}
+ # get the html page
+ r = requests.get(home_url, headers=headers)
+ # use BeautifulSoup to parse the html page
+ soup = BeautifulSoup(r.text, features="html.parser")
+ # find the div class name = "acon cl" in the last div name = "chiphell_box cl"
+ div = soup.find_all('div', class_='chiphell_box cl')[-1]
+ div = div.find('div', class_='acon cl')
+ # articles are in the ul div name = "threadulid"
+ ul = div.find('ul', id='threadulid')
+ # find all the li tags
+ li_list = ul.find_all('li')
+ # a list item is like:
+ #
+ #
+ #
+ #
+ #
+ #
一点小收藏—AP皇家橡树
+ #
+ #

+ #
+ #
+ #
2023/07/16
+ #
3231
+ #
48
+ #
腕表
+ #
+ #
+ #
+ # 又是我胡汉三,最近不知道怎么捅了腕表品牌的窝了。。三天两头给我塞东西。。所以作业按照混乱发!
+ # 没有文笔,只有碎碎念。
+ # ROYAL OAK 15551OR
+ # 女王的第一块AP,最早许愿是蓝盘但是到的是白盘,不过白盘也非常美 ...
+ #
+ #
+ # get the article img, title, author, date, category, content and url
+ for li in li_list:
+ # get the article img
+ img = li.find('img')
+ img_url = img['src']
+ # get the article title
+ title = li.find('a', class_='tm03 cl')
+ title = title.text
+ # get the article author
+ author = li.find('a', class_='')
+ author = author.text
+ # get the article date
+ date = li.find('span', style='padding-left: 0px;')
+ date = date.text
+ # get the article category
+ category = li.find('a', class_='asort cl')
+ category = category.text
+ # get the article content
+ content = li.find('div', class_='tm04 cl')
+ content = content.text
+ # get the article url
+ url = li.find('a', class_='tm03 cl')
+ url = home_url + url['href']
+ # get the article id
+ article_id = re.findall(r'article-(\d+)-1.html', url)[0]
+
+
+ # make the article info a dict
+ article = {
+ 'img_url': img_url,
+ 'title': title,
+ 'author': author,
+ 'date': date,
+ 'category': category,
+ 'content': content,
+ 'url': url,
+ 'article_id': article_id
+ }
+
+ # save the article info to redis
+ if save_to_redis(article):
+ print(article)
+
+def toot():
+ # get all the keys in redis
+ keys = redis_db.keys('chh-article:*')
+ # get the article info from redis
+ for key in keys:
+ article = json.loads(redis_db.get(key))
+ # upload article image to mastodon
+ media = mastodon_client.media_post(article['img_url'])
+ # toot the article info
+ toot_content = """{title} - {category} by {author} \n
+ {content} \n
+ {url}""".format(title=article['title'],
+ category=article['category'],
+ author=article['author'],
+ content=article['content'],
+ url=article['url'])
+
+ mastodon_client.toot(toot_content, media_ids=[media['id']])
+
+
+
+if __name__ == '__main__':
+ crawler()
+
+
+article = {'img_url': 'https://static.chiphell.com/portal/202307/16/080741btbodlblx8nwzn74.jpg', 'title': '一点小收藏—AP皇家橡树', 'author': '幼月', 'date': '2023/07/16', 'category': '腕表', 'content': '\n又是我胡汉三,最近不知道怎么捅了腕表品牌的窝了。。三天两头给我塞东西。。所以作业按照混乱发!\r\n没有文笔,只有碎碎念。\r\n\r\n\r\nROYAL OAK 15551OR\r\n\r\n女王的第一块AP,最早许愿是蓝盘但是到的是白盘,不过白盘也非常美 ...', 'url': 'https://www.chiphell.com/article-30010-1.html', 'article_id': '30010'}