1. PG数据库
lng,lat to geohash7
SELECT distinct gaode_polygon[0]::float as gd_lng,gaode_polygon[1]::float as gd_lat,
st_geohash(st_setsrid(st_geomfromtext('POINT('|| gaode_polygon[0]::decimal ||' '|| gaode_polygon[1]::decimal || ')'), 4326),7) as geohash7
FROM tabel_sample
读数据
#loc polygon
sql = f"""select distinct city as city_name,circle,geo_polygon
from temp.familymart_city_geo_circle
where city = '北京市'
"""
client = psycopg2.connect(dbname=。。。)
bj_city_geo = gpd.GeoDataFrame.from_postgis(sql, client, geom_col='geo_polygon', crs='epsg:4326')
2. geohash 九宫格
geohash package
import geohash
# 包括自己在内的九宫格
def find_neighbors(x):
try:
return geohash.expand(x)
except:
return None
# 一列geohash的九宫格set
def get_around_df(df_gaode):
"""
9宫格set
"""
round_list1 = df_gaode['hashcode_round'].tolist()
round_list2 = sum(round_list1, [])
round_set = list(set(round_list2))
df_around = pd.DataFrame({'geohash7':round_set})
df_around['hashcode_round']= df_around['geohash7'].apply(lambda x: find_neighbors(x))
return df_around
自定义转换geohash
# geohash 字典
base32_value = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31]
base32_key = '0,1,2,3,4,5,6,7,8,9,b,c,d,e,f,g,h,j,k,m,n,p,q,r,s,t,u,v,w,x,y,z'.split(',')
base32_dict = dict(zip(base32_key, base32_value))
reverse_base32_dict = dict(zip(base32_value, base32_key))
# 当前geohash值对应的二进制串
def decodeToBinary(hashcode):
bCoordinate = []
for codes in hashcode:
decimal_value = base32_dict[codes]
binary_value = bin(decimal_value)
length = len(binary_value)
binary_code = '0' * abs(7 - length) + binary_value[2 - length:]
binary_code = binary_code[-5:]
bCoordinate.append(binary_code)
binary = ''.join(bCoordinate)
lat_b = []
lng_b = []
for i in range(len(binary)):
if i % 2 == 1:
lat_b.append(binary[i])
else:
lng_b.append(binary[i])
return ''.join(lat_b), ''.join(lng_b)
# 二进制加
def add_binary_nums(x, y):
max_len = max(len(x), len(y))
x = x.zfill(max_len)
y = y.zfill(max_len)
result = ''
carry = 0
for i in range(max_len - 1, -1, -1):
r = carry
r += 1 if x[i] == '1' else 0
r += 1 if y[i] == '1' else 0
result = ('1' if r % 2 == 1 else '0') + result
carry = 0 if r < 2 else 1
if carry != 0: result = '1' + result
return result.zfill(max_len)
# 二进制减
def reduce_binary(a, b):
max_len = max(len(a), len(b))
a = a.zfill(max_len)
b = b.zfill(max_len)
new_binary = ''
carry = 0
i = max_len - 1
while i >= 0:
s = int(a[i]) - int(b[i])
if s == -1:
if carry == 0:
carry = 1
new_binary = new_binary + "1"
else:
carry = 1
new_binary = new_binary + "0"
if s == 0:
if carry == 0:
carry = 0
new_binary = new_binary + "0"
else:
carry = 1
new_binary = new_binary + "1"
if s == 1:
if carry == 0:
carry = 0
new_binary = new_binary + "1"
else:
carry = 0
new_binary = new_binary + "0"
i = i - 1
if carry > 0:
new_binary = new_binary + "1"
return new_binary[::-1]
# 二进制九宫格
def find_round_Coordinate(lat_b, lng_b):
tile_round = []
# 当前
center = (lat_b, lng_b)
tile_round.append(center)
center_left = (lat_b, reduce_binary(lng_b, '1'))
tile_round.append(center_left)
center_right = (lat_b, add_binary_nums(lng_b, '1'))
tile_round.append(center_right)
center_up = (add_binary_nums(lat_b, '1'), lng_b)
tile_round.append(center_up)
center_low = (reduce_binary(lat_b, '1'), lng_b)
tile_round.append(center_low)
center_leftup = (add_binary_nums(lat_b, '1'), reduce_binary(lng_b, '1'))
tile_round.append(center_leftup)
center_leftlow = (reduce_binary(lat_b, '1'), reduce_binary(lng_b, '1'))
tile_round.append(center_leftlow)
center_rightup = (add_binary_nums(lat_b, '1'), add_binary_nums(lng_b, '1'))
tile_round.append(center_rightup)
center_rightlow = (reduce_binary(lat_b, '1'), add_binary_nums(lng_b, '1'))
tile_round.append(center_rightlow)
return tile_round
# 二进制九宫格转hashcode
def encodeToHashcode(tile_round):
hashcode_tile_round = []
for item in tile_round:
hashcode = []
lat_b, lng_b = item[0], item[1]
binary = []
for i in range(len(lat_b)):
binary.append(lng_b[i])
binary.append(lat_b[i])
binary.append(lng_b[-1]) # 7位hash binary长度35位 经度多一位
binary = ''.join(binary)
for j in range(0, 35, 5):
binary_value = binary[j:j + 5]
decimal_value = int(binary_value, 2)
codes = reverse_base32_dict[decimal_value]
hashcode.append(codes)
hashcode_str = ''.join(hashcode)
hashcode_tile_round.append(hashcode_str)
return hashcode_tile_round
def hashcode_round(hashcode):
if (len(hashcode) > 12):
print("The length of geohash is out of range")
# 当前geohash值对应的二进制串
lat_b, lng_b = decodeToBinary(hashcode)
# 二进制九宫格
tile_round = find_round_Coordinate(lat_b, lng_b)
# 二进制九宫格转hashcode
hashcode_tile_round = encodeToHashcode(tile_round)
return hashcode_tile_round
buffer 画圆
conn = psycopg2.connect(dbname='', user="", password="", host="",port=5432)
sql = f"""select DISTINCT chnl_code,gdlng::float,gdlat::float,
ST_Transform(ST_SetSRID(ST_Buffer(ST_MakePoint(gdlng::float,gdlat::float)::geography,250)::geometry,4326),3857) as circle_polygon,
st_geohash(st_setsrid(st_geomfromtext('POINT('|| gdlng::decimal ||' '|| gdlat::decimal || ')'), 4326),7) as geohash_code
from retail_table """
df = gpd.GeoDataFrame.from_postgis(sql, conn, geom_col='circle_polygon',
crs={'init': 'epsg:3857'}).to_crs(epsg=4326)
conn.close()
df['loc_geo'] = df.apply(lambda x: Point(x.gdlng, x.gdlat), axis=1)
df = gpd.GeoDataFrame(df, geometry='loc_geo')
sjoin intersects,contains
# 都是geopandas 格式
xy_df3 = gpd.sjoin(rowdf,
df_house,
how='left',
op='contains',
lsuffix='circle_polygon',
rsuffix='point_geo')
read_as_json
# HIVE 无法存储polygon,一般存json格式
sql = f""" select id,type,name,code,parent_name,dist_m,gd_boundary
from table
limit 1
"""
df = connect_sql_hive(sql)
# json转polygon
df['geo_polygon'] = df['gd_boundary'].apply(lambda x: shape(json.loads(x)) )
df = gpd.GeoDataFrame(df, geometry='geo_polygon',crs={'init': 'epsg:3857'}).to_crs(epsg=4326)
个人工作总结
参考文章: