如何自动化构建中文标准地址库
地名作为最常用的社会公共信息,不仅与人们的日常生活息息相关,而且是政府行政行为、经济建设不可缺少的基础信息资源。在国家信息化体系中,地名是不可或缺的重要节点和桥梁,在信息传递中发挥着重要作用。
源码学习
https://gitee.com/addresstool/address
上干货-直接鲁代码
一、构建中文地址库
// 地址工具初始化
DataTable data = new DataTable();
//输入标准地址
HashMap<String,String> address5 = new HashMap<>();
// address5.put("province","江苏省");;
// address5.put("city","南京市");
address5.put("county","江宁区");
address5.put("town","汤山街道");
address5.put("community","中前社区");
address5.put("aoi","大明湖畔");
address5.put("alias_aois","乾清宫");
address5.put("sub_aoi","北苑");
address5.put("road","宏运大道");
address5.put("road_no","123");
address5.put("alias_roads","天地大道#金山大道:9"); // 道路别名
address5.put("building","9");
address5.put("unit","1");
address5.put("room","1001");
address5.put("id","5");
data.addAddressDic(address5);
HashMap<String,String> address6 = new HashMap<>();
// address6.put("province","江苏省");
// address6.put("city","南京市");
address6.put("county","江宁区");
address6.put("town","汤山街道");
address6.put("community","中前社区");
address6.put("aoi","大明湖畔");
address6.put("alias_aois","乾清宫");
address6.put("sub_aoi","北苑");
address6.put("road","宏运大道");
address6.put("road_no","123");
address6.put("alias_roads","天地大道#金山大道:9"); // 道路别名
address6.put("building","9");
address6.put("unit","2");
address6.put("room","1001");
address6.put("id","6");
data.addAddressDic(address6);
HashMap<String,String> address7 = new HashMap<>();
address7.put("building","9");
address7.put("unit","2");
address7.put("room","1001");
address7.put("id","7");
data.addAddressDic(address7);
HashMap<String,String> address8 = new HashMap<>();
address8.put("building","9");
address8.put("unit","2");
address8.put("room","1001");
address8.put("id","8");
data.addAddressDic(address8);
System.out.println("原始地址信息");
data.printData();
System.out.println("剔除垃圾地址");
data.addressFilter();
data.printData();
System.out.println("补全行政区");
data.completion();
data.printData();
System.out.println("标准地址表 最终成果");
data.addressFix();
data.printData();
数据打印
原始地址信息
5={room_id=5, town=汤山街道, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=1, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5}
6={room_id=6, town=汤山街道, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=2, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6}
7={room_id=7, unit=2, id=7, type=room, building=9, room=1001}
8={room_id=8, unit=2, id=8, type=room, building=9, room=1001}
剔除垃圾地址
5={room_id=5, town=汤山街道, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=1, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5, is_address=1}
6={room_id=6, town=汤山街道, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=2, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6, is_address=1}
补全行政区
5={room_id=5, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=1, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5, is_address=1}
6={room_id=6, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=2, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6, is_address=1}
标准地址表 最终成果
5={room_id=5, building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=room, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=1, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5, is_address=1, unit_id=5_unit}
6={room_id=6, building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=room, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=2, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6, is_address=1, unit_id=6_unit}
5_bld={building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=building, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, building=9, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5_bld, is_address=1}
6_unit_sub={subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=sub_aoi, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6_unit_sub, is_address=1}
6_unit_sub_aoi={town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=aoi, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, aoi=大明湖畔, id=6_unit_sub_aoi, is_address=1}
6_unit={building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=unit, aoi_id=6_unit_sub, alias_roads=天地大道#金山大道:9, building=9, unit=2, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6_unit, is_address=1, unit_id=6_unit}
5_unit={building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=unit, aoi_id=6_unit_sub, alias_roads=天地大道#金山大道:9, building=9, unit=1, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5_unit, is_address=1, unit_id=5_unit}
如图,已成功进行自动化的垃圾地址过滤和正常地址行政区划补充,最终完美生成中文标准地址库。
二、业务地址关联标准地址库
AddressTool ss = new AddressTool();
// 将加工好的地址库写入到addresstool中
data.initData(ss);
System.out.println(ss.getStdAddress("大明湖畔北苑9-1-1001"));
System.out.println(ss.getStdAddress("花果山大道大明湖畔北苑9-1-1001"));
System.out.println(ss.getStdAddress("花果山大道大明湖畔9-1-1001"));
System.out.println(ss.getStdAddress("金山大道大明湖畔"));
SimpleDateFormat formatter= new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z");
Date date = new Date(System.currentTimeMillis());
System.out.println(formatter.format(date));
for(int i=0;i<200000;i++){
ss.getStdAddress("金山大道大明湖畔9-1-1001");
}
System.out.println(ss.getStdAddress("大明湖畔9-1-1001"));
date = new Date(System.currentTimeMillis());
System.out.println(formatter.format(date));
结果打印
{5=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔北苑9栋1单元1001室}
{5=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔北苑9栋1单元1001室}
{5=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔北苑9栋1单元1001室}
{6_unit_sub_aoi=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔}
开始时间2024-03-26 at 13:51:26 CST
{5=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔北苑9栋1单元1001室}
结束时间2024-03-26 at 13:51:34 CST
实测速度25000条/秒
使用中有问题或者建议,欢迎联系邮箱addresstool@163.com
java资源下载
https://download.youkuaiyun.com/download/u011024436/89035851
源码学习
https://gitee.com/addresstool/address