import pandas as pd from src.kg_construction.llm_construct_kg import sort_format def readXls(path): # 读取xls文件 df = pd.read_excel(path) # 分隔 #df.iloc[:, 0] = df.iloc[:, 0].astype(str).str.split('+') #df.iloc[:, 2] = df.iloc[:, 2].astype(str).str.split('/') #df.iloc[:, 4] = df.iloc[:, 4].astype(str).str.split('/') #df.iloc[:, 6] = df.iloc[:, 6].astype(str).str.split('/') # 将DataFrame数据转换为字典列表 data_list = df.to_dict('records') return data_list def analyze_entity(data_list): set_post = set() set_job_category = set() set_company_industry = set() set_company_name = set() set_company_nature = set() set_city = set() for data in data_list: set_post.add(data['岗位名称']) set_job_category.add(data['职位类别']) set_company_industry.add(data['公司行业']) set_company_name.add(data['公司名称']) set_company_nature.add(data['公司性质']) set_city.add((data['城市'])) return set_post, set_job_category, set_company_industry, set_company_name, set_company_nature, set_city def set_become_dict_list(set_job): list_job = [] for data in set_job: list_job.append({'entity':[sort_format(data)]}) return list_job