|
@@ -1,10 +1,9 @@
|
|
|
import math
|
|
|
-
|
|
|
from data.analyze_data.analyze_xls import readXls, set_become_dict_list, analyze_entity
|
|
|
from src.kg_construction.llm_construct_kg import get_response_llm, sort_format, entity_relation_generation, \
|
|
|
realtion_generation, job_duty_skill, standard_format_triple, job_duty_skill_tail
|
|
|
from src.kg_construction.mongodb_cache import MongoDBConn
|
|
|
-#from src.template_generate.infer_example import gen
|
|
|
+from src.template_generate.infer_example import gen
|
|
|
from tool.judge_respond_structure import judge_respond_triple_structure
|
|
|
from tqdm import tqdm
|
|
|
|
|
@@ -28,19 +27,33 @@ if __name__ == "__main__":
|
|
|
|
|
|
path = '../../data/source/机械相关专业数据汇总/爬虫数据(山东省数据)/【最终】2024_11_03_临沂职业学院-智联招聘和前程无忧数据.xls'
|
|
|
data_list = readXls(path)
|
|
|
- data_list_shandong = []
|
|
|
|
|
|
for data in tqdm(data_list):
|
|
|
- neo4jConn.inserTriple(graph=graph, head_type='公司名称', head_name=data['公司名称'], relation_name='招聘岗位', tail_type='岗位名称', tail_name=data['岗位名称'])
|
|
|
- neo4jConn.inserTriple(graph=graph, head_type='岗位名称', head_name=data['岗位名称'], relation_name='岗位类别', tail_type='职位类别', tail_name=data['职位类别'])
|
|
|
- if str(data['岗位职责']) != 'nan':
|
|
|
+ if str(data['岗位职责']) != 'nan' and (data['城市'] == '济南' or data['城市'] == '青岛' or data['城市'] == '淄博' or data['城市'] == '枣庄' or data['城市'] == '东营'
|
|
|
+ or data['城市'] == '烟台' or data['城市'] == '潍坊' or data['城市'] == '济宁' or data['城市'] == '泰安' or data['城市'] == '威海'
|
|
|
+ or data['城市'] == '日照' or data['城市'] == '临沂' or data['城市'] == '德州' or data['城市'] == '聊城' or data['城市'] == '滨州'
|
|
|
+ or data['城市'] == '菏泽'):
|
|
|
+
|
|
|
+ neo4jConn.inserTriple(graph=graph, head_type='公司名称', head_name=data['公司名称'], relation_name='招聘', tail_type='岗位名称', tail_name=data['岗位名称'])
|
|
|
+ #neo4jConn.inserTriple(graph=graph, head_type='公司名称', head_name=data['公司名称'], relation_name='行业', tail_type='公司行业', tail_name=data['公司行业'])
|
|
|
+ neo4jConn.inserTriple(graph=graph, head_type='公司名称', head_name=data['公司名称'], relation_name='城市', tail_type='城市', tail_name=data['城市'])
|
|
|
+
|
|
|
+ neo4jConn.inserNodeCharacter(graph=graph, node_type='公司名称', node_name=data['公司名称'], character_type='公司性质', character_name=data['公司性质'])
|
|
|
+ neo4jConn.inserNodeCharacter(graph=graph, node_type='公司名称', node_name=data['公司名称'], character_type='公司规模', character_name=data['公司规模'])
|
|
|
+ neo4jConn.inserNodeCharacter(graph=graph, node_type='公司名称', node_name=data['公司名称'], character_type='公司行业', character_name=data['公司行业'])
|
|
|
+
|
|
|
+ neo4jConn.inserNodeCharacter(graph=graph, node_type='岗位名称', node_name=data['岗位名称'], character_type='薪资', character_name=data['薪资'])
|
|
|
+ neo4jConn.inserNodeCharacter(graph=graph, node_type='岗位名称', node_name=data['岗位名称'], character_type='学历要求', character_name=data['学历要求'])
|
|
|
+ neo4jConn.inserNodeCharacter(graph=graph, node_type='岗位名称', node_name=data['岗位名称'], character_type='薪资', character_name=data['工作经验'])
|
|
|
+
|
|
|
respond = job_duty_skill_tail(data['岗位名称'], data['岗位职责'])
|
|
|
skills = tailSegmentation(respond)
|
|
|
if skills is False:
|
|
|
continue
|
|
|
for skill in skills:
|
|
|
- neo4jConn.inserTriple(graph=graph, head_type='岗位名称', head_name=data['岗位名称'],relation_name='需要技能', tail_type='掌握技能', tail_name=skill)
|
|
|
- print(neo4jConn.findNode(graph=graph, node_type='岗位名称', node_name=data['岗位名称']))
|
|
|
+ neo4jConn.inserTriple(graph=graph, head_type='岗位名称', head_name=data['岗位名称'],relation_name='技能', tail_type='掌握技能', tail_name=skill)
|
|
|
+ #print(neo4jConn.findNode(graph=graph, node_type='岗位名称', node_name=data['岗位名称']))
|
|
|
+
|
|
|
print(skills)
|
|
|
|
|
|
|