上篇文章中笔者已经实现了对实体关系的抽取,形成了对应的三元组,这篇主要讲如何根据抽取的三元组去生成对应的医疗知识图谱。
前述部分
- 知识图谱构建的方式有很多,有基于Protege去构建,它是一个使用本体去一层层构建,手动定义一层一层关系,最终的结果是RDF或者OWL文件保存。具体的参考流程可以查看:Protege本体构建,另一种方法可以使用工业界比较常用的Neo4j图数据库管理工具,它操作简单,分为桌面版,社区版本。具体的可以根据用户需要去自行下载。笔者就是基于Neo4j去实现数据的批量读取最终生成医疗知识图谱。
1、数据的格式:
; 2、根据已知的数据去构建对应的医疗指示图谱
- 2.1构建实体类型
- 2.2构建实体关系
- 2.3 访问Neo4图数据库接口[self.g = Graph(“http://10.10.108.75:7474″, username=”neo4j”, password=”123456789″)]这部分根据开发者自行设定,桌面版本的改成localhost就行,另外默认的Neo4j登录账号是neo4j,密码是neo4j,登录即可提示修改。默认端口是7474和7687
class MedicalGraph:
def __init__(self): \
cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])
self.data_path = os.path.join(cur_dir, 'data/realText.json')
self.g = Graph("http://10.10.108.75:7474", username="neo4j", password="123456789")
- 2.4 构建图谱中的节点代码如下:
def create_graphnodes(self):
disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way= self.read_nodes()
while '' in list(zhengji_list):
zhengji_list.remove('')
zhengji_list = set(zhengji_list)
while '' in list(drugs):
drugs.remove('')
drugs = set(drugs)
self.create_diseases_nodes(disease_infos)
self.create_node('Drug', drugs)
print(len(drugs))
self.create_node('Prescription', prescription_list)
print(len(prescription_list))
self.create_node('Zhenghou', zhengji_list)
print(len(zhengji_list))
self.create_node('Cur_way', cur_way_list)
print(len(cur_way_list))
self.create_node('Bingji', bingji_list)
print(len(bingji_list))
self.create_node('Symptom', symptom_list)
print(len(symptom_list))
return
构造节点时候需要定义一个中心节点,关联它跟所有的实体之间的关系,这部分是笔者需要,结合自身需求去修改即可。具体代码如下:
def create_diseases_nodes(self, disease_infos):
count = 0
for disease_dict in disease_infos:
node = Node("Disease", name=disease_dict['disease'], symptoms=disease_dict['symptoms'],zhenghou=disease_dict['zhenghou'],bingji=disease_dict['bingji'],
cur_way=disease_dict['cur_way'],prescription=disease_dict['prescription'],
chinese_medicine=disease_dict['chinese_medicine'],suggestion=disease_dict['suggestion'],desc=disease_dict['desc']
)
self.g.create(node)
count += 1
print(count)
return
- 2.5实体之间关系的构建代码如下:
'''创建实体关系边'''
def create_graphrels(self):
disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way = self.read_nodes()
self.create_relationship('Disease', 'Prescription', rels_disease_prescription, 'rels_disease_prescription', '疾病对应的方剂')
self.create_relationship('Disease', 'Drug', rels_disease_drug, 'rels_disease_drug', '疾病对应药物')
self.create_relationship('Disease', 'Zhenghou', rels_disease_zhengji, 'rels_disease_zhenghou', '疾病对应的症候')
self.create_relationship('Disease', 'Symptom', rels_disease_symptoms, 'rels_disease_symptoms', '疾病对应症状')
self.create_relationship('Symptom', 'Prescription', rels_symptoms_prescription, 'rels_symptoms_prescription', '症状对应的方剂')
self.create_relationship('Symptom', 'Drug', rels_symptoms_drug, 'rels_symptoms_drug', '症状对应的药物')
self.create_relationship('Zhenghou', 'Bingji', rels_zhengji_bingji, 'rels_zhengji_bingji', '症候对应的病机')
self.create_relationship('Disease', 'Bingji', rels_disease_bingji, 'rels_disease_bingji', '疾病对应的病机')
self.create_relationship('Disease', 'Cur_way', rels_disease_cur_way, 'rels_disease_cur_way', '疾病对应的治法')
self.create_relationship('Zhenghou', 'Cur_way', rels_zhengji_cur_way, 'rels_zhengji_cur_way', '症候对应的治法')
'''创建实体关联边'''
def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
count = 0
set_edges = []
for edge in edges:
set_edges.append('###'.join(edge))
all = len(set(set_edges))
for edge in set(set_edges):
edge = edge.split('###')
p = edge[0]
q = edge[1]
query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (
start_node, end_node, p, q, rel_type, rel_name)
try:
self.g.run(query)
count += 1
print(rel_type, count, all)
except Exception as e:
print(e)
return
3、医疗知识图谱可视化
- 1、实体可视化
- 2、关系可视化
; 4、完整知识图谱代码如下:
import os
import json
from py2neo import Graph,Node
class MedicalGraph:
def __init__(self): \
cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])
self.data_path = os.path.join(cur_dir, 'data/realText.json')
self.g = Graph("http://10.10.108.75:7474", username="neo4j", password="123456789")
'''读取文件'''
def read_nodes(self):
"""
disease:疾病(一个)
symptoms:症状(列表)
zhenghou:症候(1个)
bingji:病机(列表)
cur_way:治法(一个或者俩个或者多个,是一个列表)
prescription:方剂(一个)
chinese_medicine:中药成分(列表)
suggestion:建议(注意)
desc:整个疾病的描述
#一种疾病有多种治法
:return:
"""
disease_list=[]
symptom_list = []
zhengji_list = []
bingji_list=[]
cur_way_list = []
prescription_list = []
drugs = []
disease_infos = []
rels_disease_drug = []
rels_disease_prescription = []
rels_disease_zhengji = []
rels_disease_symptoms = []
rels_symptoms_prescription=[]
rels_symptoms_drug=[]
rels_disease_cur_way=[]
rels_disease_bingji=[]
rels_zhengji_bingji=[]
rels_zhengji_cur_way=[]
count=0
for data in open(self.data_path,encoding='utf-8'):
disease_dict={}
count+=1
print(count)
data_json=json.loads(data)
disease=data_json['disease']
disease_dict['disease']=disease
disease_list.append(disease)
disease_dict['symptoms']=''
disease_dict['chinese_medicine']=''
disease_dict['prescription']=''
disease_dict['zhenghou']=''
disease_dict['cur_way']=''
disease_dict['suggestion']=data_json['suggestion']
disease_dict['desc']=data_json['desc']
if 'symptoms' in data_json:
disease_dict['symptoms']=data_json['symptoms']
symptom_list+=data_json['symptoms']
for symptom in data_json['symptoms']:
rels_disease_symptoms.append([disease,symptom])
for drug in data_json['chinese_medicine']:
rels_symptoms_drug.append([symptom,drug])
if 'chinese_medicine' in data_json:
disease_dict['chinese_medicine'] = data_json['chinese_medicine']
drugs+=data_json['chinese_medicine']
for drug in data_json['chinese_medicine']:
rels_disease_drug.append([disease,drug])
if 'prescription' in data_json:
prescription=data_json['prescription']
prescription_list.append(prescription)
disease_dict['prescription']=prescription
rels_disease_prescription.append([disease, data_json['prescription']])
for symptom in data_json['symptoms']:
rels_symptoms_prescription.append([symptom,prescription])
if 'zhenghou' in data_json:
disease_dict['zhenghou'] = data_json['zhenghou']
zhengji_list.append(data_json['zhenghou'])
zhenghou=data_json['zhenghou']
rels_disease_zhengji.append([disease,zhenghou])
if 'cur_way' in data_json:
cur_way=data_json['cur_way']
cur_way_list+=cur_way
disease_dict['cur_way']=data_json['cur_way']
zhenghou = data_json['zhenghou']
for cur in cur_way:
rels_disease_cur_way.append([disease,cur])
rels_zhengji_cur_way.append([zhenghou,cur])
if 'bingji' in data_json:
disease_dict['bingji']=data_json['bingji']
bingji_list+=data_json['bingji']
zhenghou = data_json['zhenghou']
for bing in data_json['bingji']:
rels_disease_bingji.append([disease,bing])
rels_zhengji_bingji.append([zhenghou,bing])
disease_infos.append(disease_dict)
return set(disease_list),set(drugs),set(prescription_list),set(zhengji_list),set(symptom_list),disease_infos,set(cur_way_list),set(bingji_list), \
rels_disease_symptoms,rels_disease_drug,rels_symptoms_prescription,rels_disease_zhengji,rels_disease_prescription,rels_symptoms_drug, \
rels_disease_bingji,rels_zhengji_bingji,rels_disease_cur_way,rels_zhengji_cur_way
'''建立节点'''
def create_node(self, label, nodes):
count = 0
for node_name in nodes:
node = Node(label, name=node_name)
self.g.create(node)
count += 1
print(count, len(nodes))
return
'''创建知识图谱中心疾病的节点'''
def create_diseases_nodes(self, disease_infos):
count = 0
for disease_dict in disease_infos:
node = Node("Disease", name=disease_dict['disease'], symptoms=disease_dict['symptoms'],zhenghou=disease_dict['zhenghou'],bingji=disease_dict['bingji'],
cur_way=disease_dict['cur_way'],prescription=disease_dict['prescription'],
chinese_medicine=disease_dict['chinese_medicine'],suggestion=disease_dict['suggestion'],desc=disease_dict['desc']
)
self.g.create(node)
count += 1
print(count)
return
'''创建知识图谱实体节点类型schema'''
def create_graphnodes(self):
disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way= self.read_nodes()
while '' in list(zhengji_list):
zhengji_list.remove('')
zhengji_list = set(zhengji_list)
while '' in list(drugs):
drugs.remove('')
drugs = set(drugs)
self.create_diseases_nodes(disease_infos)
self.create_node('Drug', drugs)
print(len(drugs))
self.create_node('Prescription', prescription_list)
print(len(prescription_list))
self.create_node('Zhenghou', zhengji_list)
print(len(zhengji_list))
self.create_node('Cur_way', cur_way_list)
print(len(cur_way_list))
self.create_node('Bingji', bingji_list)
print(len(bingji_list))
self.create_node('Symptom', symptom_list)
print(len(symptom_list))
return
'''创建实体关系边'''
def create_graphrels(self):
disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way = self.read_nodes()
self.create_relationship('Disease', 'Prescription', rels_disease_prescription, 'rels_disease_prescription', '疾病对应的方剂')
self.create_relationship('Disease', 'Drug', rels_disease_drug, 'rels_disease_drug', '疾病对应药物')
self.create_relationship('Disease', 'Zhenghou', rels_disease_zhengji, 'rels_disease_zhenghou', '疾病对应的症候')
self.create_relationship('Disease', 'Symptom', rels_disease_symptoms, 'rels_disease_symptoms', '疾病对应症状')
self.create_relationship('Symptom', 'Prescription', rels_symptoms_prescription, 'rels_symptoms_prescription', '症状对应的方剂')
self.create_relationship('Symptom', 'Drug', rels_symptoms_drug, 'rels_symptoms_drug', '症状对应的药物')
self.create_relationship('Zhenghou', 'Bingji', rels_zhengji_bingji, 'rels_zhengji_bingji', '症候对应的病机')
self.create_relationship('Disease', 'Bingji', rels_disease_bingji, 'rels_disease_bingji', '疾病对应的病机')
self.create_relationship('Disease', 'Cur_way', rels_disease_cur_way, 'rels_disease_cur_way', '疾病对应的治法')
self.create_relationship('Zhenghou', 'Cur_way', rels_zhengji_cur_way, 'rels_zhengji_cur_way', '症候对应的治法')
'''创建实体关联边'''
def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
count = 0
set_edges = []
for edge in edges:
set_edges.append('###'.join(edge))
all = len(set(set_edges))
for edge in set(set_edges):
edge = edge.split('###')
p = edge[0]
q = edge[1]
query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (
start_node, end_node, p, q, rel_type, rel_name)
try:
self.g.run(query)
count += 1
print(rel_type, count, all)
except Exception as e:
print(e)
return
'''导出数据'''
def export_data(self):
disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way = self.read_nodes()
f_drug = open('dict/drug.txt', 'w+',encoding='utf-8')
f_zhengji_list=open('dict/zhenghou.txt','w+',encoding='utf-8')
f_prescription_list = open('dict/prescription_list.txt', 'w+',encoding='utf-8')
f_symptom = open('dict/symptoms.txt', 'w+',encoding='utf-8')
f_disease = open('dict/disease.txt', 'w+',encoding='utf-8')
f_cur_way = open('dict/cur_way.txt', 'w+', encoding='utf-8')
f_bingji = open('dict/bingji.txt', 'w+', encoding='utf-8')
while '' in list(zhengji_list):
zhengji_list.remove('')
zhengji_list=set(zhengji_list)
while '' in list(drugs):
drugs.remove('')
drugs=set(drugs)
while '' in list(cur_way_list):
cur_way_list.remove('')
while '' in list(bingji_list):
bingji_list.remove('')
while '' in disease_list:
disease_list.remove('')
while '' in list(symptom_list):
symptom_list.remove('')
while '' in prescription_list:
prescription_list.remove('')
f_drug.write('\n'.join(list(drugs)))
f_symptom.write('\n'.join(list(symptom_list)))
f_disease.write('\n'.join(list(disease_list)))
f_prescription_list.write('\n'.join(list(prescription_list)))
f_zhengji_list.write('\n'.join(list(zhengji_list)))
f_bingji.write('\n'.join(list(bingji_list)))
f_cur_way.write('\n'.join(list(cur_way_list)))
f_drug.close()
f_symptom.close()
f_disease.close()
f_prescription_list.close()
f_zhengji_list.close()
f_bingji.close()
f_cur_way.close()
return
if __name__ == '__main__':
handler = MedicalGraph()
handler.__init__()
handler.create_graphnodes()
handler.create_graphrels()
5、总结
- 本篇是基于已有的rdf数据或者json数据去批量写入最终生成知识图谱,但是因为数据问题,领域内图谱不是很大,但是笔者在这个上面实现了医疗知识图谱的单轮问答和多轮问答。需要相关数据集或者源码可以邮箱我:sessioncookies@163.com,欢迎大家点赞收藏喔,更多干货下一期继续讲解,针对这些数据去做医疗实体识别的工作,这部分会比较多,主要设计了基于规则的前后向最大匹配算法还有BILSTM-CRF实体识别等,分别进行实验对比分析。
Original: https://blog.csdn.net/zhangxing6666/article/details/118073823
Author: AI伐木累
Title: 医疗知识图谱构建
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/545151/
转载文章受原作者版权保护。转载请注明原作者出处!