1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
| import requests import urllib.request import time import pymongo
client = pymongo.MongoClient('192.168.1.123', 27000) book_qunar = client['qunar'] sheet_qunar_zyx = book_qunar['qunar_zyx']
def get_list(dep, item): url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo,activityDetail&dep={}&query={}&dappDealTrace=true&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=dujia_hy_destination&date=&needNoResult=true&originalquery={}&width=480&height=320&quality=90&limit=0,20&includeAD=true&qsact=search'.format( urllib.request.quote(dep), urllib.request.quote(item), urllib.request.quote(item)) time.sleep(1) strhtml = requests.get(url) try: routeCount = int(strhtml.json()['data']['limit']['routeCount']) except: return for limit in range(0, routeCount, 20): url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo,activityDetail&dep={}&query={}&dappDealTrace=true&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=dujia_hy_destination&date=&needNoResult=true&originalquery={}&width=480&height=320&quality=90&limit={},20&includeAD=true&qsact=search'.format( urllib.request.quote(dep), urllib.request.quote(item), urllib.request.quote(item), limit) time.sleep(1) strhtml = requests.get(url) result = { 'date': time.strftime('%Y-%m-%d', time.localtime(time.time())), 'dep': dep, 'arrive': item, 'limit': limit, 'result': strhtml.json() } sheet_qunar_zyx.insert_one(result)
def get_json(url): strhtml = requests.get(url) time.sleep(1) return strhtml.json()
if __name__ == "__main__": url = 'https://touch.dujia.qunar.com/depCities.qunar' dep_dict = get_json(url) for dep_item in dep_dict['data']: for dep in dep_dict['data'][dep_item]: a = [] url = 'https://touch.dujia.qunar.com/golfz/sight/arriveRecommend?dep={}&exclude=&extensionImg=255,175'.format( urllib.request.quote(dep)) arrive_dict = get_json(url) for arrive_item in arrive_dict['data']: for arrive_item_1 in arrive_item['subModules']: for query in arrive_item_1['items']: if query['query'] not in a: a.append(query['query']) for item in a: get_list(dep, item)
|