医疗知识图谱构建

上篇文章中笔者已经实现了对实体关系的抽取,形成了对应的三元组,这篇主要讲如何根据抽取的三元组去生成对应的医疗知识图谱。

前述部分

  • 知识图谱构建的方式有很多,有基于Protege去构建,它是一个使用本体去一层层构建,手动定义一层一层关系,最终的结果是RDF或者OWL文件保存。具体的参考流程可以查看:Protege本体构建,另一种方法可以使用工业界比较常用的Neo4j图数据库管理工具,它操作简单,分为桌面版,社区版本。具体的可以根据用户需要去自行下载。笔者就是基于Neo4j去实现数据的批量读取最终生成医疗知识图谱。

1、数据的格式:

医疗知识图谱构建
医疗知识图谱构建

; 2、根据已知的数据去构建对应的医疗指示图谱

  • 2.1构建实体类型
    医疗知识图谱构建
  • 2.2构建实体关系
    医疗知识图谱构建
  • 2.3 访问Neo4图数据库接口[self.g = Graph(“http://10.10.108.75:7474″, username=”neo4j”, password=”123456789″)]这部分根据开发者自行设定,桌面版本的改成localhost就行,另外默认的Neo4j登录账号是neo4j,密码是neo4j,登录即可提示修改。默认端口是7474和7687
class MedicalGraph:
    def __init__(self): \

        cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])

        self.data_path = os.path.join(cur_dir, 'data/realText.json')

        self.g = Graph("http://10.10.108.75:7474", username="neo4j", password="123456789")
  • 2.4 构建图谱中的节点代码如下:
    def create_graphnodes(self):
        disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
        rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
        rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way= self.read_nodes()

        while '' in list(zhengji_list):
            zhengji_list.remove('')
        zhengji_list = set(zhengji_list)
        while '' in list(drugs):
            drugs.remove('')
        drugs = set(drugs)

        self.create_diseases_nodes(disease_infos)

        self.create_node('Drug', drugs)
        print(len(drugs))
        self.create_node('Prescription', prescription_list)
        print(len(prescription_list))
        self.create_node('Zhenghou', zhengji_list)
        print(len(zhengji_list))
        self.create_node('Cur_way', cur_way_list)
        print(len(cur_way_list))
        self.create_node('Bingji', bingji_list)
        print(len(bingji_list))
        self.create_node('Symptom', symptom_list)
        print(len(symptom_list))
        return

构造节点时候需要定义一个中心节点,关联它跟所有的实体之间的关系,这部分是笔者需要,结合自身需求去修改即可。具体代码如下:

  def create_diseases_nodes(self, disease_infos):
        count = 0

        for disease_dict in disease_infos:

            node = Node("Disease", name=disease_dict['disease'], symptoms=disease_dict['symptoms'],zhenghou=disease_dict['zhenghou'],bingji=disease_dict['bingji'],
                        cur_way=disease_dict['cur_way'],prescription=disease_dict['prescription'],
                        chinese_medicine=disease_dict['chinese_medicine'],suggestion=disease_dict['suggestion'],desc=disease_dict['desc']
                        )
            self.g.create(node)
            count += 1
            print(count)
        return
  • 2.5实体之间关系的构建代码如下:
 '''创建实体关系边'''
    def create_graphrels(self):

        disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
        rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
        rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way = self.read_nodes()

        self.create_relationship('Disease', 'Prescription', rels_disease_prescription, 'rels_disease_prescription', '疾病对应的方剂')
        self.create_relationship('Disease', 'Drug', rels_disease_drug, 'rels_disease_drug', '疾病对应药物')

        self.create_relationship('Disease', 'Zhenghou', rels_disease_zhengji, 'rels_disease_zhenghou', '疾病对应的症候')
        self.create_relationship('Disease', 'Symptom', rels_disease_symptoms, 'rels_disease_symptoms', '疾病对应症状')

        self.create_relationship('Symptom', 'Prescription', rels_symptoms_prescription, 'rels_symptoms_prescription', '症状对应的方剂')
        self.create_relationship('Symptom', 'Drug', rels_symptoms_drug, 'rels_symptoms_drug', '症状对应的药物')
        self.create_relationship('Zhenghou', 'Bingji', rels_zhengji_bingji, 'rels_zhengji_bingji', '症候对应的病机')
        self.create_relationship('Disease', 'Bingji', rels_disease_bingji, 'rels_disease_bingji', '疾病对应的病机')
        self.create_relationship('Disease', 'Cur_way', rels_disease_cur_way, 'rels_disease_cur_way', '疾病对应的治法')
        self.create_relationship('Zhenghou', 'Cur_way', rels_zhengji_cur_way, 'rels_zhengji_cur_way', '症候对应的治法')

    '''创建实体关联边'''
    def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
        count = 0

        set_edges = []
        for edge in edges:

            set_edges.append('###'.join(edge))
        all = len(set(set_edges))
        for edge in set(set_edges):
            edge = edge.split('###')
            p = edge[0]
            q = edge[1]

            query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (
                start_node, end_node, p, q, rel_type, rel_name)
            try:
                self.g.run(query)
                count += 1
                print(rel_type, count, all)
            except Exception as e:
                print(e)
        return

3、医疗知识图谱可视化

  • 1、实体可视化
    医疗知识图谱构建
  • 2、关系可视化
    医疗知识图谱构建

; 4、完整知识图谱代码如下:

import os
import json
from py2neo import Graph,Node

class MedicalGraph:
    def __init__(self): \

        cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])

        self.data_path = os.path.join(cur_dir, 'data/realText.json')

        self.g = Graph("http://10.10.108.75:7474", username="neo4j", password="123456789")

    '''读取文件'''
    def read_nodes(self):

"""
        disease:疾病(一个)
        symptoms:症状(列表)
        zhenghou:症候(1个)
        bingji:病机(列表)
        cur_way:治法(一个或者俩个或者多个,是一个列表)
        prescription:方剂(一个)
        chinese_medicine:中药成分(列表)
        suggestion:建议(注意)
        desc:整个疾病的描述
        #一种疾病有多种治法
        :return:
"""

        disease_list=[]
        symptom_list = []
        zhengji_list = []
        bingji_list=[]
        cur_way_list = []
        prescription_list = []
        drugs = []
        disease_infos = []

        rels_disease_drug = []
        rels_disease_prescription = []
        rels_disease_zhengji = []
        rels_disease_symptoms = []
        rels_symptoms_prescription=[]

        rels_symptoms_drug=[]
        rels_disease_cur_way=[]
        rels_disease_bingji=[]
        rels_zhengji_bingji=[]
        rels_zhengji_cur_way=[]

        count=0
        for data in open(self.data_path,encoding='utf-8'):
            disease_dict={}
            count+=1
            print(count)

            data_json=json.loads(data)
            disease=data_json['disease']
            disease_dict['disease']=disease

            disease_list.append(disease)
            disease_dict['symptoms']=''
            disease_dict['chinese_medicine']=''
            disease_dict['prescription']=''
            disease_dict['zhenghou']=''

            disease_dict['cur_way']=''

            disease_dict['suggestion']=data_json['suggestion']

            disease_dict['desc']=data_json['desc']

            if 'symptoms' in data_json:

                disease_dict['symptoms']=data_json['symptoms']
                symptom_list+=data_json['symptoms']
                for symptom in data_json['symptoms']:

                    rels_disease_symptoms.append([disease,symptom])
                    for drug in data_json['chinese_medicine']:

                        rels_symptoms_drug.append([symptom,drug])

            if 'chinese_medicine' in data_json:

                disease_dict['chinese_medicine'] = data_json['chinese_medicine']
                drugs+=data_json['chinese_medicine']
                for drug in data_json['chinese_medicine']:
                    rels_disease_drug.append([disease,drug])

            if 'prescription' in data_json:

                prescription=data_json['prescription']

                prescription_list.append(prescription)
                disease_dict['prescription']=prescription
                rels_disease_prescription.append([disease, data_json['prescription']])
                for symptom in data_json['symptoms']:

                    rels_symptoms_prescription.append([symptom,prescription])

            if 'zhenghou' in data_json:

                disease_dict['zhenghou'] = data_json['zhenghou']
                zhengji_list.append(data_json['zhenghou'])
                zhenghou=data_json['zhenghou']
                rels_disease_zhengji.append([disease,zhenghou])

            if 'cur_way' in data_json:

                cur_way=data_json['cur_way']
                cur_way_list+=cur_way
                disease_dict['cur_way']=data_json['cur_way']

                zhenghou = data_json['zhenghou']
                for cur in cur_way:

                    rels_disease_cur_way.append([disease,cur])
                    rels_zhengji_cur_way.append([zhenghou,cur])

            if 'bingji' in data_json:

                disease_dict['bingji']=data_json['bingji']
                bingji_list+=data_json['bingji']
                zhenghou = data_json['zhenghou']
                for bing in data_json['bingji']:

                    rels_disease_bingji.append([disease,bing])
                    rels_zhengji_bingji.append([zhenghou,bing])

            disease_infos.append(disease_dict)

        return set(disease_list),set(drugs),set(prescription_list),set(zhengji_list),set(symptom_list),disease_infos,set(cur_way_list),set(bingji_list), \
               rels_disease_symptoms,rels_disease_drug,rels_symptoms_prescription,rels_disease_zhengji,rels_disease_prescription,rels_symptoms_drug, \
               rels_disease_bingji,rels_zhengji_bingji,rels_disease_cur_way,rels_zhengji_cur_way

    '''建立节点'''
    def create_node(self, label, nodes):
        count = 0
        for node_name in nodes:
            node = Node(label, name=node_name)
            self.g.create(node)
            count += 1
            print(count, len(nodes))
        return

    '''创建知识图谱中心疾病的节点'''

    def create_diseases_nodes(self, disease_infos):
        count = 0

        for disease_dict in disease_infos:

            node = Node("Disease", name=disease_dict['disease'], symptoms=disease_dict['symptoms'],zhenghou=disease_dict['zhenghou'],bingji=disease_dict['bingji'],
                        cur_way=disease_dict['cur_way'],prescription=disease_dict['prescription'],
                        chinese_medicine=disease_dict['chinese_medicine'],suggestion=disease_dict['suggestion'],desc=disease_dict['desc']
                        )
            self.g.create(node)
            count += 1
            print(count)
        return

    '''创建知识图谱实体节点类型schema'''
    def create_graphnodes(self):
        disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
        rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
        rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way= self.read_nodes()

        while '' in list(zhengji_list):
            zhengji_list.remove('')
        zhengji_list = set(zhengji_list)
        while '' in list(drugs):
            drugs.remove('')
        drugs = set(drugs)

        self.create_diseases_nodes(disease_infos)

        self.create_node('Drug', drugs)
        print(len(drugs))
        self.create_node('Prescription', prescription_list)
        print(len(prescription_list))
        self.create_node('Zhenghou', zhengji_list)
        print(len(zhengji_list))
        self.create_node('Cur_way', cur_way_list)
        print(len(cur_way_list))
        self.create_node('Bingji', bingji_list)
        print(len(bingji_list))
        self.create_node('Symptom', symptom_list)
        print(len(symptom_list))
        return

    '''创建实体关系边'''
    def create_graphrels(self):

        disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
        rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
        rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way = self.read_nodes()

        self.create_relationship('Disease', 'Prescription', rels_disease_prescription, 'rels_disease_prescription', '疾病对应的方剂')
        self.create_relationship('Disease', 'Drug', rels_disease_drug, 'rels_disease_drug', '疾病对应药物')

        self.create_relationship('Disease', 'Zhenghou', rels_disease_zhengji, 'rels_disease_zhenghou', '疾病对应的症候')
        self.create_relationship('Disease', 'Symptom', rels_disease_symptoms, 'rels_disease_symptoms', '疾病对应症状')

        self.create_relationship('Symptom', 'Prescription', rels_symptoms_prescription, 'rels_symptoms_prescription', '症状对应的方剂')
        self.create_relationship('Symptom', 'Drug', rels_symptoms_drug, 'rels_symptoms_drug', '症状对应的药物')
        self.create_relationship('Zhenghou', 'Bingji', rels_zhengji_bingji, 'rels_zhengji_bingji', '症候对应的病机')
        self.create_relationship('Disease', 'Bingji', rels_disease_bingji, 'rels_disease_bingji', '疾病对应的病机')
        self.create_relationship('Disease', 'Cur_way', rels_disease_cur_way, 'rels_disease_cur_way', '疾病对应的治法')
        self.create_relationship('Zhenghou', 'Cur_way', rels_zhengji_cur_way, 'rels_zhengji_cur_way', '症候对应的治法')

    '''创建实体关联边'''
    def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
        count = 0

        set_edges = []
        for edge in edges:

            set_edges.append('###'.join(edge))
        all = len(set(set_edges))
        for edge in set(set_edges):
            edge = edge.split('###')
            p = edge[0]
            q = edge[1]

            query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (
                start_node, end_node, p, q, rel_type, rel_name)
            try:
                self.g.run(query)
                count += 1
                print(rel_type, count, all)
            except Exception as e:
                print(e)
        return

    '''导出数据'''

    def export_data(self):

        disease_list, drugs, prescription_list, zhengji_list, symptom_list, disease_infos, cur_way_list, bingji_list, \
        rels_disease_symptoms, rels_disease_drug, rels_symptoms_prescription, rels_disease_zhengji, rels_disease_prescription, rels_symptoms_drug, \
        rels_disease_bingji, rels_zhengji_bingji, rels_disease_cur_way, rels_zhengji_cur_way = self.read_nodes()
        f_drug = open('dict/drug.txt', 'w+',encoding='utf-8')
        f_zhengji_list=open('dict/zhenghou.txt','w+',encoding='utf-8')
        f_prescription_list = open('dict/prescription_list.txt', 'w+',encoding='utf-8')
        f_symptom = open('dict/symptoms.txt', 'w+',encoding='utf-8')
        f_disease = open('dict/disease.txt', 'w+',encoding='utf-8')
        f_cur_way = open('dict/cur_way.txt', 'w+', encoding='utf-8')
        f_bingji = open('dict/bingji.txt', 'w+', encoding='utf-8')

        while '' in list(zhengji_list):
            zhengji_list.remove('')
        zhengji_list=set(zhengji_list)
        while '' in list(drugs):
            drugs.remove('')
        drugs=set(drugs)
        while '' in list(cur_way_list):
            cur_way_list.remove('')
        while '' in  list(bingji_list):
            bingji_list.remove('')
        while '' in disease_list:
            disease_list.remove('')
        while '' in list(symptom_list):
            symptom_list.remove('')
        while  '' in prescription_list:
            prescription_list.remove('')

        f_drug.write('\n'.join(list(drugs)))
        f_symptom.write('\n'.join(list(symptom_list)))
        f_disease.write('\n'.join(list(disease_list)))
        f_prescription_list.write('\n'.join(list(prescription_list)))
        f_zhengji_list.write('\n'.join(list(zhengji_list)))
        f_bingji.write('\n'.join(list(bingji_list)))
        f_cur_way.write('\n'.join(list(cur_way_list)))

        f_drug.close()
        f_symptom.close()
        f_disease.close()
        f_prescription_list.close()
        f_zhengji_list.close()
        f_bingji.close()
        f_cur_way.close()
        return

if __name__ == '__main__':
    handler = MedicalGraph()

    handler.__init__()

    handler.create_graphnodes()

    handler.create_graphrels()

5、总结

  • 本篇是基于已有的rdf数据或者json数据去批量写入最终生成知识图谱,但是因为数据问题,领域内图谱不是很大,但是笔者在这个上面实现了医疗知识图谱的单轮问答和多轮问答。需要相关数据集或者源码可以邮箱我:sessioncookies@163.com,欢迎大家点赞收藏喔,更多干货下一期继续讲解,针对这些数据去做医疗实体识别的工作,这部分会比较多,主要设计了基于规则的前后向最大匹配算法还有BILSTM-CRF实体识别等,分别进行实验对比分析。

Original: https://blog.csdn.net/zhangxing6666/article/details/118073823
Author: AI伐木累
Title: 医疗知识图谱构建

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/545151/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球