# coding = utf-8 import requests from loguru import logger import json class BarcodeSpider: """ 条形码爬虫类 """ def __init__( self, rapid_api_url="https://barcodes1.p.rapidapi.com/", x_rapidapi_key="", x_rapidapi_host="barcodes1.p.rapidapi.com", ): self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" self.base_url = ( "https://bff.gds.org.cn/gds/searching-api/ProductService/homepagestatistic" ) self.domestic_url = ( "https://bff.gds.org.cn/gds/searching-api/ProductService/ProductListByGTIN" ) self.domestic_url_simple = "https://bff.gds.org.cn/gds/searching-api/ProductService/ProductSimpleInfoByGTIN" self.imported_url = "https://bff.gds.org.cn/gds/searching-api/ImportProduct/GetImportProductDataForGtin" self.imported_url_blk = "https://www.barcodelookup.com/" self.rapid_api_url = rapid_api_url self.x_rapidapi_key = x_rapidapi_key self.x_rapidapi_host = x_rapidapi_host def get_domestic_good(self, barcode): session = requests.session() session.headers.update({"User-Agent": self.user_agent}) response = session.get(self.base_url) if response.status_code != 200: logger.error( "error in getting base_url status_code is {}, barcode is {}".format( response.status_code, barcode ) ) return None payload = {"PageSize": "30", "PageIndex": "1", "SearchItem": str(barcode)} response_domestic_url = session.get(self.domestic_url, params=payload) if response_domestic_url.status_code != 200: logger.error( "error in getting domestic_url status_code is {}, barcode is {}".format( response_domestic_url.status_code, barcode ) ) return None good = json.loads(response_domestic_url.text) if good["Code"] == 2: logger.error("error, {}, barcode is {}".format(good["Msg"], barcode)) return None if good["Code"] != 1 or good["Data"]["Items"] == []: logger.error("error, item no found, barcode is {}".format(barcode)) return None base_id = good["Data"]["Items"][0]["base_id"] payload = {"gtin": str(barcode), "id": base_id} response_domestic_url_simple = session.get( self.domestic_url_simple, params=payload ) if response_domestic_url_simple.status_code != 200: return self.rework_good(good["Data"]["Items"][0]) simpleInfo = json.loads(response_domestic_url_simple.text) if simpleInfo["Code"] != 1: return self.rework_good(good["Data"]["Items"][0]) if simpleInfo["Data"] != "": good["Data"]["Items"][0]["simple_info"] = simpleInfo["Data"] return self.rework_good(good["Data"]["Items"][0]) return self.rework_good(good["Data"]["Items"][0]) def get_imported_good(self, barcode): session = requests.session() session.headers.update({"User-Agent": self.user_agent}) response = session.get(self.base_url) if response.status_code != 200: logger.error( "error in getting base_url status_code is {}, barcode is {}".format( response.status_code, barcode ) ) good_blk = self.get_imorted_good_from_blk(barcode) return good_blk payload = { "PageSize": "30", "PageIndex": "1", "Gtin": str(barcode), "Description": "", "AndOr": "0", } response_imported_url = session.get(self.imported_url, params=payload) if response_imported_url.status_code != 200: logger.error( "error in getting imported_url status_code is {}, barcode is {}".format( response_imported_url.status_code, barcode ) ) good_blk = self.get_imorted_good_from_blk(barcode) return good_blk good = json.loads(response_imported_url.text) if good["Code"] != 1 or good["Data"]["Items"] == []: logger.error("error, item no found, barcode is {}".format(barcode)) good_blk = self.get_imorted_good_from_blk(barcode) return good_blk if (len(good["Data"]["Items"]) == 1) and ( good["Data"]["Items"][0]["description_cn"] is not None ): return self.rework_good(good["Data"]["Items"][0]) if (len(good["Data"]["Items"]) == 1) and ( good["Data"]["Items"][0]["description_cn"] is None ): good_blk = self.get_imorted_good_from_blk(barcode) return good_blk if len(good["Data"]["Items"]) >= 2: for item in good["Data"]["Items"]: if item["realname"] == item["importer_name"]: return self.rework_good(item) return self.rework_good(good["Data"]["Items"][0]) def get_imorted_good_from_blk(self, barcode): if not self.x_rapidapi_key: return None good = {} querystring = {"query": barcode} headers = { "X-RapidAPI-Key": self.x_rapidapi_key, "X-RapidAPI-Host": self.x_rapidapi_host, } response = requests.get(self.rapid_api_url, headers=headers, params=querystring) good_dict = response.json() if "product" not in good_dict: return None good["description_cn"] = good_dict["product"]["title"] good["picfilename"] = good_dict["product"]["images"][0] attributes = good_dict["product"]["attributes"] good["specification_cn"] = ", ".join( [f"{key}:{value}" for key, value in attributes.items()] ) good["gtin"] = barcode return good def rework_good(self, good): if "id" in good: del good["id"] if "f_id" in good: del good["f_id"] if "brandid" in good: del good["brandid"] if "base_id" in good: del good["base_id"] if good["branch_code"]: good["branch_code"] = good["branch_code"].strip() if "picture_filename" in good: if good["picture_filename"] and ( not good["picture_filename"].startswith("http") ): good["picture_filename"] = ( "https://oss.gds.org.cn" + good["picture_filename"] ) if "picfilename" in good: if good["picfilename"] and (not good["picfilename"].startswith("http")): good["picfilename"] = "https://oss.gds.org.cn" + good["picfilename"] return good def get_good(self, barcode): if barcode.startswith("69") or barcode.startswith("069"): return self.get_domestic_good(barcode) else: return self.get_imported_good(barcode) class ShowApiSpider: """ { "spec": "300毫升", "sptmImg": "", "remark": "查询成功!", "img": "http://hj2.co/barcode/img/36be2cb461d5aa0e213755368be93af3", "ycg": "", "nw": "", "ret_code": "0", "description": "", "qs": "", "manuAddress": "", "note": "checkResult:1;备注:logout_flag:0;login_date:Jul 28 1993 12:00:00:000AM;valid_date:Jul 28 2023 12:00:00:000AM;宽:7.8;单位:CM;高:16.1;深:3.7;英文名称:Johnson's milk+rice bath 300ml;关键字:沐浴露;销售单位:BX;形态描述:个人护理用品;毛重:339;上市时间:2018-08-01;产地:上海;", "goodsType": "服装、箱包、个人护理用品>>个人护理用品>>洗浴、身体护理品>>皮肤护理品", "gpcType": "身体清洁/洗涤/香皂用品", "gw": "", "keyword": "沐浴露", "width": "", "gpc": "10000330", "code": "6907376500056", "hight": "", "depth": "", "manuName": "强生(中国)有限公司", "price": "", "flag": true, "imgList": [], "trademark": "强生婴儿", "goodsName": "强生婴儿牛奶沐浴露300毫升" }""" def __init__(self, app_keys): self.app_keys = app_keys self.base_url = "https://route.showapi.com/66-22" def get_good(self, barcode): if barcode.startswith('069') or barcode.startswith('69'): # lstrip去掉左边的0 barcode = barcode.lstrip('0') else: return None for app_key in self.app_keys: url = self.base_url + "?appKey=%s" % app_key payload = { "code": str(barcode), } response = requests.get(url, params=payload) resp_data = response.json() if resp_data["showapi_res_code"] == 0 and resp_data['showapi_res_body']['ret_code'] == '0': good = resp_data["showapi_res_body"] del good["ret_code"] del good["remark"] del good["flag"] return good #TODO: raise exception and log return None def main(): # 国产商品 # good = BarCodeSpider.get_good('06917878036526') # 进口商品 # good = BarCodeSpider.get_good('4901201103803') # 国际商品 good = BarcodeSpider.get_good("3346476426843") print(good) if __name__ == "__main__": main() """ 国产商品字典 "keyword": "农夫山泉", "branch_code": "3301 ", "gtin": "06921168593910", "specification": "900毫升", "is_private": false, "firm_name": "农夫山泉股份有限公司", "brandcn": "农夫山泉", "picture_filename": "https://oss.gds.org.cn/userfile/uploada/gra/1712072230/06921168593910/06921168593910.1.jpg", "description": "农夫山泉NFC橙汁900ml", "logout_flag": "0", "have_ms_product": 0, "base_create_time": "2018-07-10T10:01:31.763Z", "branch_name": "浙江分中心", "base_source": "Source", "gpc": "10000201", "gpcname": "即饮型调味饮料", "saledate": "2017-11-30T16:00:00Z", "saledateyear": 2017, "base_last_updated": "2019-01-09T02:00:00Z", "base_user_id": "源数据服务", "code": "69211685", "levels": null, "levels_source": null, "valid_date": "2023-02-16T16:00:00Z", "logout_date": null, "gtinstatus": 1 """ """ 进口商品字典 "gtin": "04901201103803", "description_cn": "UCC117速溶综合咖啡90g", "specification_cn": "90克", "brand_cn": "悠诗诗", "gpc": "10000115", "gpc_name": "速溶咖啡", "origin_cn": "392", "origin_name": "日本", "codeNet": null, "codeNetContent": null, "suggested_retail_price": 0, "suggested_retail_price_unit": "人民币", "txtKeyword": null, "picfilename": "https://oss.gds.org.cn/userfile/importcpfile/201911301903478446204015916.png", "realname": "磨禾(厦门)进出口有限公司", "branch_code": "3501", "branch_name": "福建分中心", "importer_name": "磨禾(厦门)进出口有限公司", "certificatefilename": null, "certificatestatus": 0, "isprivary": 0, "isconfidentiality": 0, "datasource": 0 """ """ 国际商品字典 """