658 lines
25 KiB
Python
658 lines
25 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
高中数学知识图谱数据导入脚本
|
||
将JSON格式的知识图谱数据导入到PostgreSQL数据库
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import re
|
||
import psycopg2
|
||
from psycopg2.extras import Json
|
||
from typing import Dict, List, Any, Optional, Tuple
|
||
import logging
|
||
from datetime import datetime
|
||
|
||
# 配置日志
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||
handlers=[
|
||
logging.FileHandler('data_import.log'),
|
||
logging.StreamHandler()
|
||
]
|
||
)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
class KnowledgeGraphImporter:
|
||
"""知识图谱数据导入器"""
|
||
|
||
def __init__(self, db_config: Dict[str, Any]):
|
||
"""
|
||
初始化导入器
|
||
|
||
Args:
|
||
db_config: 数据库连接配置
|
||
"""
|
||
self.db_config = db_config
|
||
self.conn = None
|
||
self.cursor = None
|
||
|
||
def connect(self):
|
||
"""连接数据库"""
|
||
try:
|
||
self.conn = psycopg2.connect(**self.db_config)
|
||
self.cursor = self.conn.cursor()
|
||
logger.info("数据库连接成功")
|
||
except Exception as e:
|
||
logger.error(f"数据库连接失败: {e}")
|
||
raise
|
||
|
||
def disconnect(self):
|
||
"""断开数据库连接"""
|
||
if self.cursor:
|
||
self.cursor.close()
|
||
if self.conn:
|
||
self.conn.close()
|
||
logger.info("数据库连接已关闭")
|
||
|
||
def parse_chapter_info(self, filename: str) -> Tuple[str, str]:
|
||
"""
|
||
从文件名解析章节信息
|
||
|
||
Args:
|
||
filename: 文件名(如 knowledge-必修第一章-集合与常用逻辑用语.json)
|
||
|
||
Returns:
|
||
(volume_type, chapter_name): (必修/选择性必修, 章节名称)
|
||
"""
|
||
# 匹配格式: knowledge-必修第一章-集合与常用逻辑用语.json
|
||
pattern = r'^(knowledge|method|problems)-(必修|选择性必修)(第[一二三四五六七八九十]+章)-(.+)\.json$'
|
||
match = re.match(pattern, filename)
|
||
|
||
if match:
|
||
_, volume_type, chapter_part, chapter_name = match.groups()
|
||
return volume_type, chapter_name
|
||
else:
|
||
# 如果不匹配标准格式,返回默认值
|
||
logger.warning(f"文件名格式不标准: {filename}")
|
||
return "必修", "未知章节"
|
||
|
||
def get_or_create_textbook(self, volume_type: str, volume_number: int) -> int:
|
||
"""
|
||
获取或创建教材记录
|
||
|
||
Args:
|
||
volume_type: 必修/选择性必修
|
||
volume_number: 册数
|
||
|
||
Returns:
|
||
textbook_id: 教材ID
|
||
"""
|
||
# 查询是否已存在
|
||
query = """
|
||
SELECT id FROM textbooks
|
||
WHERE volume_type = %s AND volume_number = %s
|
||
"""
|
||
self.cursor.execute(query, (volume_type, volume_number))
|
||
result = self.cursor.fetchone()
|
||
|
||
if result:
|
||
return result[0]
|
||
|
||
# 创建新教材
|
||
insert_query = """
|
||
INSERT INTO textbooks (name, volume_type, volume_number)
|
||
VALUES (%s, %s, %s)
|
||
RETURNING id
|
||
"""
|
||
self.cursor.execute(insert_query, (
|
||
f"高中数学{volume_type}第{volume_number}册",
|
||
volume_type,
|
||
volume_number
|
||
))
|
||
return self.cursor.fetchone()[0]
|
||
|
||
def get_or_create_chapter(self, textbook_id: int, chapter_name: str) -> int:
|
||
"""
|
||
获取或创建章节记录
|
||
|
||
Args:
|
||
textbook_id: 教材ID
|
||
chapter_name: 章节名称
|
||
|
||
Returns:
|
||
chapter_id: 章节ID
|
||
"""
|
||
# 查询是否已存在
|
||
query = """
|
||
SELECT id FROM chapters
|
||
WHERE textbook_id = %s AND chapter_name = %s
|
||
"""
|
||
self.cursor.execute(query, (textbook_id, chapter_name))
|
||
result = self.cursor.fetchone()
|
||
|
||
if result:
|
||
return result[0]
|
||
|
||
# 创建新章节
|
||
insert_query = """
|
||
INSERT INTO chapters (textbook_id, chapter_name, chapter_number, chapter_order)
|
||
VALUES (%s, %s, %s, %s)
|
||
RETURNING id
|
||
"""
|
||
|
||
# 尝试从章节名称中提取章节号
|
||
chapter_number = self.extract_chapter_number(chapter_name)
|
||
self.cursor.execute(insert_query, (
|
||
textbook_id,
|
||
chapter_name,
|
||
chapter_number,
|
||
chapter_number
|
||
))
|
||
return self.cursor.fetchone()[0]
|
||
|
||
def extract_chapter_number(self, chapter_name: str) -> int:
|
||
"""从章节名称中提取章节号"""
|
||
chinese_numbers = {'一': 1, '二': 2, '三': 3, '四': 4, '五': 5,
|
||
'六': 6, '七': 7, '八': 8, '九': 9, '十': 10}
|
||
|
||
for chinese, number in chinese_numbers.items():
|
||
if chinese in chapter_name:
|
||
return number
|
||
|
||
return 1 # 默认返回1
|
||
|
||
def import_knowledge_data(self, file_path: str) -> int:
|
||
"""
|
||
导入知识点数据
|
||
|
||
Args:
|
||
file_path: JSON文件路径
|
||
|
||
Returns:
|
||
导入的知识点数量
|
||
"""
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
filename = os.path.basename(file_path)
|
||
volume_type, chapter_name = self.parse_chapter_info(filename)
|
||
|
||
# 获取教材和章节ID
|
||
volume_number = 1 # 默认第一册,可以根据实际需要调整
|
||
textbook_id = self.get_or_create_textbook(volume_type, volume_number)
|
||
chapter_id = self.get_or_create_chapter(textbook_id, chapter_name)
|
||
|
||
imported_count = 0
|
||
|
||
for knowledge in data.get('knowledge_list', []):
|
||
try:
|
||
# 插入知识点
|
||
insert_query = """
|
||
INSERT INTO knowledge_points (
|
||
code, name, level, type, importance, core_content,
|
||
principle_explanation, application_conditions, related_content,
|
||
examination_methods, textbook_location
|
||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
ON CONFLICT (code) DO UPDATE SET
|
||
name = EXCLUDED.name,
|
||
level = EXCLUDED.level,
|
||
type = EXCLUDED.type,
|
||
importance = EXCLUDED.importance,
|
||
core_content = EXCLUDED.core_content,
|
||
principle_explanation = EXCLUDED.principle_explanation,
|
||
application_conditions = EXCLUDED.application_conditions,
|
||
related_content = EXCLUDED.related_content,
|
||
examination_methods = EXCLUDED.examination_methods,
|
||
textbook_location = EXCLUDED.textbook_location,
|
||
updated_at = CURRENT_TIMESTAMP
|
||
RETURNING id
|
||
"""
|
||
|
||
self.cursor.execute(insert_query, (
|
||
knowledge.get('编号'),
|
||
knowledge.get('名称'),
|
||
knowledge.get('层次'),
|
||
knowledge.get('类型'),
|
||
knowledge.get('重要程度'),
|
||
Json(knowledge.get('核心内容', {})),
|
||
Json(knowledge.get('原理说明', {})),
|
||
Json(knowledge.get('适用条件', {})),
|
||
Json(knowledge.get('关联内容', {})),
|
||
knowledge.get('考查方式', []),
|
||
knowledge.get('关联内容', {}).get('教材位置', '')
|
||
))
|
||
|
||
knowledge_id = self.cursor.fetchone()[0]
|
||
imported_count += 1
|
||
|
||
# 处理前置知识关系
|
||
self.import_knowledge_relations(knowledge_id, knowledge.get('前置知识', []), '前置知识')
|
||
|
||
# 处理包含的子知识点
|
||
related_content = knowledge.get('关联内容', {})
|
||
sub_knowledge = related_content.get('包含的子知识点', [])
|
||
self.import_knowledge_relations(knowledge_id, sub_knowledge, '包含关系')
|
||
|
||
except Exception as e:
|
||
logger.error(f"导入知识点失败 {knowledge.get('编号', 'unknown')}: {e}")
|
||
continue
|
||
|
||
logger.info(f"从 {file_path} 导入了 {imported_count} 个知识点")
|
||
return imported_count
|
||
|
||
def import_method_data(self, file_path: str) -> int:
|
||
"""
|
||
导入方法数据
|
||
|
||
Args:
|
||
file_path: JSON文件路径
|
||
|
||
Returns:
|
||
导入的方法数量
|
||
"""
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
filename = os.path.basename(file_path)
|
||
volume_type, chapter_name = self.parse_chapter_info(filename)
|
||
|
||
# 获取教材和章节ID
|
||
volume_number = 1 # 默认第一册
|
||
textbook_id = self.get_or_create_textbook(volume_type, volume_number)
|
||
chapter_id = self.get_or_create_chapter(textbook_id, chapter_name)
|
||
|
||
imported_count = 0
|
||
|
||
for method in data.get('method_list', []):
|
||
try:
|
||
# 插入方法主记录
|
||
insert_query = """
|
||
INSERT INTO methods (
|
||
code, name, type, difficulty_level, application_scenarios,
|
||
mathematical_thoughts, problem_solving_strategy, textbook_location, description
|
||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
ON CONFLICT (code) DO UPDATE SET
|
||
name = EXCLUDED.name,
|
||
type = EXCLUDED.type,
|
||
difficulty_level = EXCLUDED.difficulty_level,
|
||
application_scenarios = EXCLUDED.application_scenarios,
|
||
mathematical_thoughts = EXCLUDED.mathematical_thoughts,
|
||
problem_solving_strategy = EXCLUDED.problem_solving_strategy,
|
||
textbook_location = EXCLUDED.textbook_location,
|
||
description = EXCLUDED.description,
|
||
updated_at = CURRENT_TIMESTAMP
|
||
RETURNING id
|
||
"""
|
||
|
||
self.cursor.execute(insert_query, (
|
||
method.get('编号'),
|
||
method.get('名称'),
|
||
method.get('类型'),
|
||
method.get('难度等级'),
|
||
Json(method.get('适用场景', {})),
|
||
method.get('数学思想', []),
|
||
method.get('解题策略'),
|
||
method.get('教材位置'),
|
||
method.get('名称') # 用名称作为描述
|
||
))
|
||
|
||
method_id = self.cursor.fetchone()[0]
|
||
imported_count += 1
|
||
|
||
# 插入方法步骤
|
||
self.import_method_steps(method_id, method.get('方法步骤', []))
|
||
|
||
# 插入常见错误
|
||
self.import_method_errors(method_id, method.get('常见错误', []))
|
||
|
||
# 处理支撑知识点
|
||
self.import_method_support_knowledge(method_id, method.get('支撑知识点', []))
|
||
|
||
except Exception as e:
|
||
logger.error(f"导入方法失败 {method.get('编号', 'unknown')}: {e}")
|
||
continue
|
||
|
||
logger.info(f"从 {file_path} 导入了 {imported_count} 个方法")
|
||
return imported_count
|
||
|
||
def import_problem_data(self, file_path: str) -> int:
|
||
"""
|
||
导入题目数据
|
||
|
||
Args:
|
||
file_path: JSON文件路径
|
||
|
||
Returns:
|
||
导入的题目数量
|
||
"""
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
filename = os.path.basename(file_path)
|
||
volume_type, chapter_name = self.parse_chapter_info(filename)
|
||
|
||
# 获取教材和章节ID
|
||
volume_number = 1 # 默认第一册
|
||
textbook_id = self.get_or_create_textbook(volume_type, volume_number)
|
||
chapter_id = self.get_or_create_chapter(textbook_id, chapter_name)
|
||
|
||
imported_count = 0
|
||
|
||
for problem in data.get('problem_list', []):
|
||
try:
|
||
# 插入题目主记录
|
||
insert_query = """
|
||
INSERT INTO problems (
|
||
code, title, problem_type, source_info, stem, full_content,
|
||
problem_category, difficulty_info, textbook_location, subquestion_count
|
||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
ON CONFLICT (code) DO UPDATE SET
|
||
title = EXCLUDED.title,
|
||
problem_type = EXCLUDED.problem_type,
|
||
source_info = EXCLUDED.source_info,
|
||
stem = EXCLUDED.stem,
|
||
full_content = EXCLUDED.full_content,
|
||
problem_category = EXCLUDED.problem_category,
|
||
difficulty_info = EXCLUDED.difficulty_info,
|
||
textbook_location = EXCLUDED.textbook_location,
|
||
subquestion_count = EXCLUDED.subquestion_count,
|
||
updated_at = CURRENT_TIMESTAMP
|
||
RETURNING id
|
||
"""
|
||
|
||
basic_info = problem.get('题目基本信息', {})
|
||
content = problem.get('题目内容', {})
|
||
difficulty = problem.get('难度评估', {})
|
||
category = problem.get('题型分类', {})
|
||
|
||
self.cursor.execute(insert_query, (
|
||
basic_info.get('编号'),
|
||
content.get('题干'),
|
||
basic_info.get('题目类型'),
|
||
Json(basic_info.get('来源信息', {})),
|
||
content.get('题干'),
|
||
content.get('完整题目'),
|
||
Json(category),
|
||
Json(difficulty),
|
||
basic_info.get('来源信息', {}).get('页码', ''),
|
||
len(content.get('问题', []))
|
||
))
|
||
|
||
problem_id = self.cursor.fetchone()[0]
|
||
imported_count += 1
|
||
|
||
# 插入小题
|
||
subquestion_ids = self.import_problem_subquestions(problem_id, content.get('问题', []))
|
||
|
||
# 插入知识点标注
|
||
self.import_problem_knowledge_tags(problem_id, subquestion_ids,
|
||
problem.get('知识点标注', {}))
|
||
|
||
# 插入方法标注
|
||
self.import_problem_method_tags(problem_id, subquestion_ids,
|
||
problem.get('方法标注', {}))
|
||
|
||
except Exception as e:
|
||
logger.error(f"导入题目失败 {problem.get('题目基本信息', {}).get('编号', 'unknown')}: {e}")
|
||
continue
|
||
|
||
logger.info(f"从 {file_path} 导入了 {imported_count} 个题目")
|
||
return imported_count
|
||
|
||
def import_knowledge_relations(self, source_knowledge_id: int, target_codes: List[str], relation_type: str):
|
||
"""导入知识点关系"""
|
||
for code in target_codes:
|
||
try:
|
||
# 查找目标知识点ID
|
||
self.cursor.execute("SELECT id FROM knowledge_points WHERE code = %s", (code,))
|
||
result = self.cursor.fetchone()
|
||
|
||
if result:
|
||
target_knowledge_id = result[0]
|
||
insert_query = """
|
||
INSERT INTO knowledge_relations (source_knowledge_id, target_knowledge_id, relation_type)
|
||
VALUES (%s, %s, %s)
|
||
ON CONFLICT (source_knowledge_id, target_knowledge_id, relation_type)
|
||
DO NOTHING
|
||
"""
|
||
self.cursor.execute(insert_query, (source_knowledge_id, target_knowledge_id, relation_type))
|
||
except Exception as e:
|
||
logger.error(f"导入知识点关系失败: {e}")
|
||
|
||
def import_method_steps(self, method_id: int, steps: List[Dict]):
|
||
"""导入方法步骤"""
|
||
for i, step in enumerate(steps, 1):
|
||
try:
|
||
insert_query = """
|
||
INSERT INTO method_steps (method_id, step_number, step_description, key_points, common_mistakes)
|
||
VALUES (%s, %s, %s, %s, %s)
|
||
ON CONFLICT (method_id, step_number)
|
||
DO UPDATE SET
|
||
step_description = EXCLUDED.step_description,
|
||
key_points = EXCLUDED.key_points,
|
||
common_mistakes = EXCLUDED.common_mistakes
|
||
"""
|
||
self.cursor.execute(insert_query, (
|
||
method_id,
|
||
i,
|
||
step.get('步骤描述'),
|
||
step.get('关键要点', []),
|
||
step.get('常见错误', [])
|
||
))
|
||
except Exception as e:
|
||
logger.error(f"导入方法步骤失败: {e}")
|
||
|
||
def import_method_errors(self, method_id: int, errors: List[Dict]):
|
||
"""导入方法常见错误"""
|
||
for error in errors:
|
||
try:
|
||
insert_query = """
|
||
INSERT INTO method_common_errors (method_id, error_description, cause_analysis, correct_approach)
|
||
VALUES (%s, %s, %s, %s)
|
||
"""
|
||
self.cursor.execute(insert_query, (
|
||
method_id,
|
||
error.get('错误描述'),
|
||
error.get('原因'),
|
||
error.get('正确做法')
|
||
))
|
||
except Exception as e:
|
||
logger.error(f"导入方法错误失败: {e}")
|
||
|
||
def import_method_support_knowledge(self, method_id: int, knowledge_codes: List[str]):
|
||
"""导入方法支撑知识点"""
|
||
for code in knowledge_codes:
|
||
try:
|
||
# 查找知识点ID
|
||
self.cursor.execute("SELECT id FROM knowledge_points WHERE code = %s", (code,))
|
||
result = self.cursor.fetchone()
|
||
|
||
if result:
|
||
knowledge_id = result[0]
|
||
insert_query = """
|
||
INSERT INTO method_support_knowledge (method_id, knowledge_id, support_type)
|
||
VALUES (%s, %s, %s)
|
||
ON CONFLICT (method_id, knowledge_id) DO NOTHING
|
||
"""
|
||
self.cursor.execute(insert_query, (method_id, knowledge_id, '直接支撑'))
|
||
except Exception as e:
|
||
logger.error(f"导入方法支撑知识点失败: {e}")
|
||
|
||
def import_problem_subquestions(self, problem_id: int, questions: List[str]) -> List[int]:
|
||
"""导入小题"""
|
||
subquestion_ids = []
|
||
for i, question in enumerate(questions, 1):
|
||
try:
|
||
insert_query = """
|
||
INSERT INTO problem_subquestions (problem_id, subquestion_number, content)
|
||
VALUES (%s, %s, %s)
|
||
RETURNING id
|
||
"""
|
||
self.cursor.execute(insert_query, (problem_id, f"({i})", question))
|
||
subquestion_ids.append(self.cursor.fetchone()[0])
|
||
except Exception as e:
|
||
logger.error(f"导入小题失败: {e}")
|
||
return subquestion_ids
|
||
|
||
def import_problem_knowledge_tags(self, problem_id: int, subquestion_ids: List[int], knowledge_tags: Dict):
|
||
"""导入题目知识点标注"""
|
||
for subquestion_key, tag_info in knowledge_tags.items():
|
||
try:
|
||
# 解析小题编号
|
||
subquestion_idx = self.parse_subquestion_key(subquestion_key)
|
||
subquestion_id = subquestion_ids[subquestion_idx - 1] if subquestion_idx <= len(subquestion_ids) else None
|
||
|
||
# 处理主要考查的知识点
|
||
for knowledge in tag_info.get('主要考查', []):
|
||
self.insert_problem_knowledge_tag(problem_id, subquestion_id,
|
||
knowledge.get('知识点编号'), '主要考查')
|
||
|
||
# 处理辅助涉及的知识点
|
||
for knowledge in tag_info.get('辅助涉及', []):
|
||
self.insert_problem_knowledge_tag(problem_id, subquestion_id,
|
||
knowledge.get('知识点编号'), '辅助涉及')
|
||
except Exception as e:
|
||
logger.error(f"导入题目知识点标注失败: {e}")
|
||
|
||
def import_problem_method_tags(self, problem_id: int, subquestion_ids: List[int], method_tags: Dict):
|
||
"""导入题目方法标注"""
|
||
for subquestion_key, methods in method_tags.items():
|
||
try:
|
||
# 解析小题编号
|
||
subquestion_idx = self.parse_subquestion_key(subquestion_key)
|
||
subquestion_id = subquestion_ids[subquestion_idx - 1] if subquestion_idx <= len(subquestion_ids) else None
|
||
|
||
for method in methods:
|
||
self.insert_problem_method_tag(problem_id, subquestion_id,
|
||
method.get('方法编号'), '主要方法')
|
||
except Exception as e:
|
||
logger.error(f"导入题目方法标注失败: {e}")
|
||
|
||
def insert_problem_knowledge_tag(self, problem_id: int, subquestion_id: int, knowledge_code: str, tag_type: str):
|
||
"""插入题目知识点标注"""
|
||
try:
|
||
# 查找知识点ID
|
||
self.cursor.execute("SELECT id FROM knowledge_points WHERE code = %s", (knowledge_code,))
|
||
result = self.cursor.fetchone()
|
||
|
||
if result:
|
||
knowledge_id = result[0]
|
||
insert_query = """
|
||
INSERT INTO problem_knowledge_tags (problem_id, subquestion_id, knowledge_id, tag_type)
|
||
VALUES (%s, %s, %s, %s)
|
||
ON CONFLICT (problem_id, subquestion_id, knowledge_id, tag_type) DO NOTHING
|
||
"""
|
||
self.cursor.execute(insert_query, (problem_id, subquestion_id, knowledge_id, tag_type))
|
||
except Exception as e:
|
||
logger.error(f"插入题目知识点标注失败: {e}")
|
||
|
||
def insert_problem_method_tag(self, problem_id: int, subquestion_id: int, method_code: str, usage_type: str):
|
||
"""插入题目方法标注"""
|
||
try:
|
||
# 查找方法ID
|
||
self.cursor.execute("SELECT id FROM methods WHERE code = %s", (method_code,))
|
||
result = self.cursor.fetchone()
|
||
|
||
if result:
|
||
method_id = result[0]
|
||
insert_query = """
|
||
INSERT INTO problem_method_tags (problem_id, subquestion_id, method_id, usage_frequency)
|
||
VALUES (%s, %s, %s, %s)
|
||
ON CONFLICT (problem_id, subquestion_id, method_id) DO NOTHING
|
||
"""
|
||
self.cursor.execute(insert_query, (problem_id, subquestion_id, method_id, usage_type))
|
||
except Exception as e:
|
||
logger.error(f"插入题目方法标注失败: {e}")
|
||
|
||
def parse_subquestion_key(self, key: str) -> int:
|
||
"""解析小题键名获取编号"""
|
||
if key == '小题1':
|
||
return 1
|
||
elif key == '小题2':
|
||
return 2
|
||
elif key == '小题3':
|
||
return 3
|
||
elif key == '小题1':
|
||
return 1
|
||
else:
|
||
# 尝试从键名中提取数字
|
||
import re
|
||
match = re.search(r'\d+', key)
|
||
return int(match.group()) if match else 1
|
||
|
||
def import_all_files(self, directory: str):
|
||
"""
|
||
导入目录下的所有JSON文件
|
||
|
||
Args:
|
||
directory: JSON文件目录路径
|
||
"""
|
||
if not os.path.exists(directory):
|
||
logger.error(f"目录不存在: {directory}")
|
||
return
|
||
|
||
files = [f for f in os.listdir(directory) if f.endswith('.json')]
|
||
total_imported = {
|
||
'knowledge': 0,
|
||
'method': 0,
|
||
'problem': 0
|
||
}
|
||
|
||
for filename in files:
|
||
file_path = os.path.join(directory, filename)
|
||
|
||
try:
|
||
if filename.startswith('knowledge-'):
|
||
count = self.import_knowledge_data(file_path)
|
||
total_imported['knowledge'] += count
|
||
elif filename.startswith('method-'):
|
||
count = self.import_method_data(file_path)
|
||
total_imported['method'] += count
|
||
elif filename.startswith('problems-'):
|
||
count = self.import_problem_data(file_path)
|
||
total_imported['problem'] += count
|
||
else:
|
||
logger.warning(f"未知文件类型,跳过: {filename}")
|
||
continue
|
||
|
||
self.conn.commit()
|
||
|
||
except Exception as e:
|
||
logger.error(f"导入文件失败 {filename}: {e}")
|
||
self.conn.rollback()
|
||
continue
|
||
|
||
logger.info("导入完成统计:")
|
||
logger.info(f"知识点: {total_imported['knowledge']} 个")
|
||
logger.info(f"方法: {total_imported['method']} 个")
|
||
logger.info(f"题目: {total_imported['problem']} 个")
|
||
|
||
def main():
|
||
"""主函数"""
|
||
# 数据库配置
|
||
db_config = {
|
||
'host': 'localhost',
|
||
'port': 5432,
|
||
'database': 'math_knowledge_graph',
|
||
'user': 'postgres',
|
||
'password': 'your_password' # 请修改为实际密码
|
||
}
|
||
|
||
# JSON文件目录
|
||
json_directory = '/Users/robertmaxwell/note/知识图谱/教科书-数学/all'
|
||
|
||
# 创建导入器并执行导入
|
||
importer = KnowledgeGraphImporter(db_config)
|
||
|
||
try:
|
||
importer.connect()
|
||
importer.import_all_files(json_directory)
|
||
except Exception as e:
|
||
logger.error(f"导入过程中发生错误: {e}")
|
||
finally:
|
||
importer.disconnect()
|
||
|
||
if __name__ == "__main__":
|
||
main() |