poi数据的co-location空间数据挖掘分析

这是本人在论文中使用的代码,大体上拆成两部分进行:获取同为模式关系、输出同位模式结果。

仅供各位参考

附件 co_location_minner . py

importos

importtime

fromfunctoolsimportpartial

frommultiprocessingimportPoolasThreadPool2

importgeohash

importpandasaspd

fromefficient_aprioriimportapriori

fromgeopy.distanceimportgreat_circle

classColocationMining(object):

def__init__(self):

self.co_location_data_lst=[]

self.read_settings()

self.init_data()

definit_data(self):

self.process_cnt=0

self.read_files()

self.input_df_etl()

self.industry_map_dict=self.get_industry_map_dict()

self.center_industry_code=self.industry_map_dict[self.center_industry]

self.industry_map_dict_reverse={v:kfork,vinself.industry_map_dict.items()}

self.root_path=os.getcwd()

defread_settings(self):

try:

df_settings=pd.read_excel(‘./settings.xlsx’)

df_settings=df_settings.set_index(‘参数名称’)

self.min_distance=df_settings.loc[‘距离阈值’,’参数值’]

self.center_industry=df_settings.loc[‘中心poi行业大类’,’参数值’]

self.conf_threshold=df_settings.loc[‘最小置信度阈值’,’参数值’]

self.supp_threshold=df_settings.loc[‘最小支持度阈值’,’参数值’]

self.filepath=df_settings.loc[‘指定文件路径’,’参数值’]

exceptExceptionase:

print(‘读取配置文件出错!,错误详情:%s’%e)

time.sleep(10000)

defread_files(self):

try:

ifstr(self.filepath).endswith(‘.csv’):

self.df=pd.read_csv(r’%s’%self.filepath,encoding=’utf8′)

else:

self.df=pd.read_excel(r’%s’%self.filepath)

exceptExceptionase:

self.df=pd.DataFrame()

print(‘读取指定的文件失败!,错误详情:%s’%e)

time.sleep(10000)

definput_df_etl(self):

df=self.df

df[‘wgs84_lng’]=df[‘wgs84_lng’].astype(str)

df[‘wgs84_lat’]=df[‘wgs84_lat’].astype(str)

df[‘location’]=df[‘wgs84_lng’]+’|’+df[‘wgs84_lat’]

df[‘geohash’]=df[‘location’].apply(self.location2geohash)

df[‘行业大类’]=df[‘行业大类’].fillna(”)

df=df[df[‘行业大类’]!=”]

df_select=df[df[‘行业大类’]==self.center_industry]

self.df_select=df_select.reset_index(drop=True)

self.df_no_select=df[df[‘行业大类’]!=self.center_industry]

deflocation2geohash(self,location):

lng,lat=[float(x)forxinlocation.split(‘|’)]

_geohash=geohash.encode(latitude=lat,longitude=lng,precision=12)

return_geohash

defget_industry_map_dict(self):

industry_set=set(self.df[‘行业大类’])

self.industry_map_dict={}

i=0

forindustryinindustry_set:

self.industry_map_dict[industry]=i

i+=1

returnself.industry_map_dict

defcaculate_coords_distance(self,location1,location2):

location1=[float(x)forxinlocation1.split(“|”)]

location2=[float(x)forxinlocation2.split(“|”)]

location2.reverse()

location1.reverse()

d=great_circle(location1,location2).meters

returnd

defget_nearyby_pois(self,_geohash,location):

df_no_select=self.df_no_select

target_geohash=_geohash[:6]

df_nearby=df_no_select[df_no_select[‘geohash’].str.contains(target_geohash)]

ifdf_nearby.shape[0]>=1:

df_nearby[‘distance’]=df_nearby[‘location’].apply(partial(self.caculate_coords_distance,location))

df_nearby=df_nearby[df_nearby[‘distance’]

Original: https://blog.csdn.net/zccccccc1998/article/details/124070857
Author: 宗成1998
Title: poi数据的co-location空间数据挖掘分析

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/742664/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球