# 读取旧的JSONL文件 with open(origin_path, "r") as file: for line in file: # 解析每一行的json数据 data = json.loads(line) input_text = data["text"] entities = data["entities"] match_names = ["地点", "人名", "地理实体", "组织"] entity_sentence = "" for entity in entities: entity_json = dict(entity) entity_text = entity_json["entity_text"] entity_names = entity_json["entity_names"] for name in entity_names: if name in match_names: entity_label = name break
# 保存重构后的JSONL文件 with open(new_path, "w", encoding="utf-8") as file: for message in messages: file.write(json.dumps(message, ensure_ascii=False) + "\n")