Browse Source

1.大模型构建知识图谱初步完成

Air 2 days ago
parent
commit
36d1b34080
1 changed files with 21 additions and 8 deletions
  1. 21 8
      src/kg_construction/manager.py

+ 21 - 8
src/kg_construction/manager.py

@@ -1,10 +1,9 @@
 import math
-
 from data.analyze_data.analyze_xls import readXls, set_become_dict_list, analyze_entity
 from src.kg_construction.llm_construct_kg import get_response_llm, sort_format, entity_relation_generation, \
     realtion_generation, job_duty_skill, standard_format_triple, job_duty_skill_tail
 from src.kg_construction.mongodb_cache import MongoDBConn
-#from src.template_generate.infer_example import gen
+from src.template_generate.infer_example import gen
 from tool.judge_respond_structure import judge_respond_triple_structure
 from tqdm import tqdm
 
@@ -28,19 +27,33 @@ if __name__ == "__main__":
 
     path = '../../data/source/机械相关专业数据汇总/爬虫数据(山东省数据)/【最终】2024_11_03_临沂职业学院-智联招聘和前程无忧数据.xls'
     data_list = readXls(path)
-    data_list_shandong = []
 
     for data in tqdm(data_list):
-        neo4jConn.inserTriple(graph=graph, head_type='公司名称', head_name=data['公司名称'], relation_name='招聘岗位', tail_type='岗位名称', tail_name=data['岗位名称'])
-        neo4jConn.inserTriple(graph=graph, head_type='岗位名称', head_name=data['岗位名称'], relation_name='岗位类别', tail_type='职位类别', tail_name=data['职位类别'])
-        if str(data['岗位职责']) != 'nan':
+        if str(data['岗位职责']) != 'nan' and (data['城市'] == '济南' or data['城市'] == '青岛' or data['城市'] == '淄博' or data['城市'] == '枣庄' or data['城市'] == '东营'
+                                           or data['城市'] == '烟台' or data['城市'] == '潍坊' or data['城市'] == '济宁' or data['城市'] == '泰安' or data['城市'] == '威海'
+                                           or data['城市'] == '日照' or data['城市'] == '临沂' or data['城市'] == '德州' or data['城市'] == '聊城' or data['城市'] == '滨州'
+                                           or data['城市'] == '菏泽'):
+
+            neo4jConn.inserTriple(graph=graph, head_type='公司名称', head_name=data['公司名称'], relation_name='招聘', tail_type='岗位名称', tail_name=data['岗位名称'])
+            #neo4jConn.inserTriple(graph=graph, head_type='公司名称', head_name=data['公司名称'], relation_name='行业', tail_type='公司行业', tail_name=data['公司行业'])
+            neo4jConn.inserTriple(graph=graph, head_type='公司名称', head_name=data['公司名称'], relation_name='城市', tail_type='城市', tail_name=data['城市'])
+
+            neo4jConn.inserNodeCharacter(graph=graph, node_type='公司名称', node_name=data['公司名称'], character_type='公司性质', character_name=data['公司性质'])
+            neo4jConn.inserNodeCharacter(graph=graph, node_type='公司名称', node_name=data['公司名称'], character_type='公司规模', character_name=data['公司规模'])
+            neo4jConn.inserNodeCharacter(graph=graph, node_type='公司名称', node_name=data['公司名称'], character_type='公司行业', character_name=data['公司行业'])
+
+            neo4jConn.inserNodeCharacter(graph=graph, node_type='岗位名称', node_name=data['岗位名称'], character_type='薪资', character_name=data['薪资'])
+            neo4jConn.inserNodeCharacter(graph=graph, node_type='岗位名称', node_name=data['岗位名称'], character_type='学历要求', character_name=data['学历要求'])
+            neo4jConn.inserNodeCharacter(graph=graph, node_type='岗位名称', node_name=data['岗位名称'], character_type='薪资', character_name=data['工作经验'])
+
             respond = job_duty_skill_tail(data['岗位名称'], data['岗位职责'])
             skills = tailSegmentation(respond)
             if skills is False:
                 continue
             for skill in skills:
-                neo4jConn.inserTriple(graph=graph, head_type='岗位名称', head_name=data['岗位名称'],relation_name='需要技能', tail_type='掌握技能', tail_name=skill)
-                print(neo4jConn.findNode(graph=graph, node_type='岗位名称', node_name=data['岗位名称']))
+                neo4jConn.inserTriple(graph=graph, head_type='岗位名称', head_name=data['岗位名称'],relation_name='技能', tail_type='掌握技能', tail_name=skill)
+                #print(neo4jConn.findNode(graph=graph, node_type='岗位名称', node_name=data['岗位名称']))
+
             print(skills)