爬取高德地图全国小区信息

import json
import requests
import time

'''创建数据库表保存爬取到的数据'''
import pymysql
#打开数据库连接
db=pymysql.connect("您的主机ip","用户名","密码","数据库名",charset="utf8")
#创建游标
cursor=db.cursor()



#执行sql语句,创建表,如果表存在,则删除
cursor.execute("drop table if exists Community")
#创建表的sql语句
sql='''
create table Community(
i int(8) not null AUTO_INCREMENT,
id varchar(500),
biz_type varchar(500),
name varchar(500),
type varchar(500),
address varchar(500),
tel varchar(500),
location varchar(500),
p_code varchar(500),
p_name varchar(500),
city_code varchar(500),
city_name varchar(500),
ad_code varchar(500),
ad_name varchar(500),
business_area varchar(500),
PRIMARY KEY(i)
)engine=MyISAM AUTO_INCREMENT=1 default CHARSET=utf8;
'''
cursor.execute(sql)


#获取数据,并把数据保存到数据表中
def get_data(page_index, url_amap):
    global total_record
    time.sleep(0.5)
    print('解析页码: ' + str(page_index) + '... ...')
    # 获取第page_index页的网址
    url = url_amap.replace('page_index', str(page_index))
    # 对需要爬取的网页发送请求
    response = requests.get(url)
    # 获取爬出来的数据
    poi_json = response.json()

    if total_record == 0:
        total_record = int(poi_json.get('count', 0))
    poi_lists = poi_json.get("pois")
    if poi_lists != None or '':
        for poi in poi_lists:
            poi_dict = {}
            poi_dict["id"] = poi.get('id')
            poi_dict["biz_type"] = poi.get('biz_type')
            poi_dict["name"] = poi.get('name')
            poi_dict["type"] = poi.get('type')
            poi_dict["address"] = poi.get('address')
            poi_dict["tel"] = poi.get('tel')
            poi_dict["location"] = poi.get('location')
            poi_dict["pcode"] = poi.get('pcode')
            poi_dict["pname"] = poi.get('pname')
            poi_dict["citycode"] = poi.get('citycode')
            poi_dict["cityname"] = poi.get('cityname')
            poi_dict["adcode"] = poi.get('adcode')
            poi_dict["adname"] = poi.get('adname')
            poi_dict["business_area"] = poi.get('business_area')
            # 添加数据到mysql数据库
            cursor.execute("INSERT INTO Community(id,biz_type,name,type,address,tel,location,p_code,p_name,city_code,city_name,ad_code,ad_name,business_area) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",(str(poi_dict["id"]),str(poi_dict["biz_type"]),str(poi_dict["name"]),str(poi_dict["type"]),str(poi_dict["address"]),str(poi_dict["tel"]),str(poi_dict["location"]),str(poi_dict["pcode"]),str(poi_dict["pname"]),str(poi_dict["citycode"]),str(poi_dict["cityname"]),str(poi_dict["adcode"]),str(poi_dict["adname"]),str(poi_dict["business_area"])))
    else:
        pass
    return poi_json.get("pois")


def getPOIdata(page_size, url_amap):
    global total_record
    print("获取POI数据开始")
    json_data = get_data(1, url_amap)
    if (total_record / page_size) != 0:
        page_number = int(total_record / page_size) + 2
    else:
        page_number = int(total_record / page_size) + 1

    for each_page in range(2, page_number):
        get_data(each_page, url_amap)


if __name__ == '__main__':
    city = []
    data = open("city.json", encoding="utf-8-sig")
    strJson = json.load(data)
    for i in range(len(strJson)):
        city.append(strJson[i]['adcode'])

    #'API密匙',输入密匙列表,免费密匙一天之内访问2000次,所以创建了多个密匙去访问
    key = [
        您的API密匙
        ]
    for y in range(0, len(city)):
        if 0<=y<=1900:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[0] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 1901<=y<=2000:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[1] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2001<=y<=2100:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[2] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2101<=y<=2200:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[3] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2201<=y<=2300:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[4] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2301<=y<=2400:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[5] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2401<=y<=2500:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[6] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2501<=y<=2600:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[7] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2601<=y<=2700:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[8] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2701<=y<=2800:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[9] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2801<=y<=2900:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[10] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 2901<=y<=3000:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[11] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 3001<=y<=3100:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[12] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 3101<=y<=3200:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[13] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 3201<=y<=3300:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[14] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 3301<=y<=3400:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[15] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 3401<=y<=3500:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[16] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        elif 3501<=y<=3600:
            print(y)
            url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[17] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all'
        else:
            print(y)
            continue
        page_size = 20
        page_index = r'page=1'
        global total_record  #总的数据量
        total_record = 0
        getPOIdata(page_size, url_amap)

#关闭游标
cursor.close()
#关闭数据库
db.close()


[ri-alerts color="primary"]下载地址:本地下载[/ri-alerts]

 

© 版权声明
THE END
喜欢就支持一下吧
点赞0 分享