2024年3月2日发(作者:五菱宏光官网商城)
der的全部代码# coding=utf-8import jsonimport reimport scrapyfrom s import Rulefrom tractors import LinkExtractorfrom import BrandItem, SerialItem, ModelItem# json替换keydef replacea(matched): return \'\"\' + (\'value\') + \'\":\'# 解析车系Itemdef parse_serial_item(serial, bid, vendor): item = SerialItem() item[\'id\'] = (\'div/div/a/@id\')[0].re(r\'n(d+)\')[0] item[\'bid\'] = bid item[\'name\'] = (\'div/div/a/@title\')[0].extract() item[\'vendor\'] = vendor item[\'logo\'] = (\'div/div/a/img/@src\')[0].extract() sell = (\'div/ul/li[@class=\"price\"]/a/text()\')[0].re(ur\'停售\') item[\'sell\'] = \'0\' if sell else \'1\' return itemclass YiCheSpider(): name = \"yiche\" rules = ( # 所有车系 Rule(LinkExtractor(allow=(r\'/tree_chexing/mb_d+/$\',)), callback=\'parse_serial\', follow=True), # 在售车型 Rule(LinkExtractor(allow=(r\'/w+/$\',)), callback=\'parse_model\', follow=True), # 停售车型 Rule(LinkExtractor(allow=(r\'/AjaxNew/?csID=d+&year=d+$\',)), callback=\'parse_model_selled\', follow=True), ) def start_requests(self): url = \'/CarInfo/?tagtype=chexing&pagetype=masterbrand&objid=0\' yield t(url, callback=, dont_filter=True) # 解析品牌 def parse(self, response): print \'==> %s\' %
result = (\'(?P
# 停售车型 years = (\'//*[@id=\"carlist_nosaleyear\"]/a/@id\').extract() for year in years: url = \'/AjaxNew/?csID=%s&year=%s\' % (sid, year) request = t(url=url, callback=_model_selled, dont_filter=True) [\'sid\'] = sid yield request @staticmethod def parse_model_selled(response): print \'==> %s\' % sid = [\'sid\'] try: datas = (_as_unicode()) for data in datas: classify = data[\'Engine_Exhaust\'] + \'/\' + data[\'MaxPower\'] + \' \' + data[\'InhaleType\'] for car in data[\'carList\']: item = ModelItem() item[\'id\'] = car[\'CarID\'] item[\'sid\'] = sid item[\'name\'] = car[\'YearType\'] + \' \' + car[\'Name\'] item[\'classify\'] = classify item[\'sell\'] = \'0\' yield item except ValueError: print \'model parse error,serial_id[%s].\' % sid pass
更多推荐
车型,爬取,车系,官网,宏光,全部,解析
发布评论