这是本人在论文中使用的代码,大体上拆成两部分进行:获取同为模式关系、输出同位模式结果。
仅供各位参考
附件 co_location_minner . py
importos
importtime
fromfunctoolsimportpartial
frommultiprocessingimportPoolasThreadPool2
importgeohash
importpandasaspd
fromefficient_aprioriimportapriori
fromgeopy.distanceimportgreat_circle
classColocationMining(object):
def__init__(self):
self.co_location_data_lst=[]
self.read_settings()
self.init_data()
definit_data(self):
self.process_cnt=0
self.read_files()
self.input_df_etl()
self.industry_map_dict=self.get_industry_map_dict()
self.center_industry_code=self.industry_map_dict[self.center_industry]
self.industry_map_dict_reverse={v:kfork,vinself.industry_map_dict.items()}
self.root_path=os.getcwd()
defread_settings(self):
try:
df_settings=pd.read_excel(‘./settings.xlsx’)
df_settings=df_settings.set_index(‘参数名称’)
self.min_distance=df_settings.loc[‘距离阈值’,’参数值’]
self.center_industry=df_settings.loc[‘中心poi行业大类’,’参数值’]
self.conf_threshold=df_settings.loc[‘最小置信度阈值’,’参数值’]
self.supp_threshold=df_settings.loc[‘最小支持度阈值’,’参数值’]
self.filepath=df_settings.loc[‘指定文件路径’,’参数值’]
exceptExceptionase:
print(‘读取配置文件出错!,错误详情:%s’%e)
time.sleep(10000)
defread_files(self):
try:
ifstr(self.filepath).endswith(‘.csv’):
self.df=pd.read_csv(r’%s’%self.filepath,encoding=’utf8′)
else:
self.df=pd.read_excel(r’%s’%self.filepath)
exceptExceptionase:
self.df=pd.DataFrame()
print(‘读取指定的文件失败!,错误详情:%s’%e)
time.sleep(10000)
definput_df_etl(self):
df=self.df
df[‘wgs84_lng’]=df[‘wgs84_lng’].astype(str)
df[‘wgs84_lat’]=df[‘wgs84_lat’].astype(str)
df[‘location’]=df[‘wgs84_lng’]+’|’+df[‘wgs84_lat’]
df[‘geohash’]=df[‘location’].apply(self.location2geohash)
df[‘行业大类’]=df[‘行业大类’].fillna(”)
df=df[df[‘行业大类’]!=”]
df_select=df[df[‘行业大类’]==self.center_industry]
self.df_select=df_select.reset_index(drop=True)
self.df_no_select=df[df[‘行业大类’]!=self.center_industry]
deflocation2geohash(self,location):
lng,lat=[float(x)forxinlocation.split(‘|’)]
_geohash=geohash.encode(latitude=lat,longitude=lng,precision=12)
return_geohash
defget_industry_map_dict(self):
industry_set=set(self.df[‘行业大类’])
self.industry_map_dict={}
i=0
forindustryinindustry_set:
self.industry_map_dict[industry]=i
i+=1
returnself.industry_map_dict
defcaculate_coords_distance(self,location1,location2):
location1=[float(x)forxinlocation1.split(“|”)]
location2=[float(x)forxinlocation2.split(“|”)]
location2.reverse()
location1.reverse()
d=great_circle(location1,location2).meters
returnd
defget_nearyby_pois(self,_geohash,location):
df_no_select=self.df_no_select
target_geohash=_geohash[:6]
df_nearby=df_no_select[df_no_select[‘geohash’].str.contains(target_geohash)]
ifdf_nearby.shape[0]>=1:
df_nearby[‘distance’]=df_nearby[‘location’].apply(partial(self.caculate_coords_distance,location))
df_nearby=df_nearby[df_nearby[‘distance’]
Original: https://blog.csdn.net/zccccccc1998/article/details/124070857
Author: 宗成1998
Title: poi数据的co-location空间数据挖掘分析
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/742664/
转载文章受原作者版权保护。转载请注明原作者出处!