# -*- coding: utf-8 -*- # scraper instagram posts import os import sys os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dsite.settings") sys.path.insert(0, '../') sys.path.insert(0, './') from django.core.wsgi import get_wsgi_application get_wsgi_application() import pickle from urllib.parse import urlparse import time from mastodon import Mastodon from instagram_private_api import Client import requests import logging from dsite import settings logging.basicConfig(filename='/root/develop/log/ins2mastodon.log', level=logging.INFO) logger = logging.getLogger('/root/develop/log/ins2mastodon.log') mastodon_cli = Mastodon(access_token=settings.MASTODON_NOFAN_ACCESS_TOKEN, api_base_url='https://nofan.xyz') def send_image_to_mastodon(image_url, text): resp = requests.get(image_url) mime_type = 'image/jpeg' url = urlparse(image_url) ext = url.path.split('.')[-1] if ext == 'gif': mime_type = 'image/gif' toot_resp = mastodon_cli.media_post(resp.content, mime_type) if toot_resp.get('id'): media_ids = [toot_resp['id']] mastodon_cli.status_post(text, media_ids=media_ids) logger.info('send %s', text) image_name = urlparse(url).path.split('/')[-1] with open(settings.MASTODON_SYNCED_IMAGES_LOG, 'a') as f: f.write(image_name + '\n') # write binary file with api.settings def writeSettings(user, pwd, settings_file): api = Client(user, pwd) with open(settings_file, "wb") as FileObj: pickle.dump(api.settings, FileObj) # read binary file to api.settings def readSettings(settings_file): cache = None with open(settings_file, "rb") as FileObj: cache = pickle.load(FileObj) return cache if __name__ == '__main__': if not os.path.exists(settings.IG_PRIVATE_API_SETTINGS): writeSettings(settings.IG_LOGIN_USERNAME, settings.IG_LOGIN_PASSWORD, settings.IG_PRIVATE_API_SETTINGS) cache_settings = readSettings(settings.IG_PRIVATE_API_SETTINGS) api = Client(settings.IG_LOGIN_USERNAME, settings.IG_LOGIN_PASSWORD, settings=cache_settings) while True: results = api.self_feed() logger.info('getting %s posts', len(results['items'])) for item in results['items']: text = item['caption']['text'] image_url = item['image_versions2']['candidates'][0]['url'] try: with open(settings.MASTODON_SYNCED_IMAGES_LOG, 'r') as f: send_images = f.readlines() send_images = [x.strip() for x in send_images] image_name = urlparse(image_url).path.split('/')[-1] if image_name not in send_images: send_image_to_mastodon(image_url, text) except Exception as e: logger.error(e) time.sleep(60)