193 lines
5.5 KiB
Python
193 lines
5.5 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
快速设置脚本
|
||
一键初始化知识图谱数据库并导入数据
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import subprocess
|
||
import psycopg2
|
||
from config import DATABASE_CONFIG, JSON_DIRECTORY
|
||
|
||
def run_command(command, description):
|
||
"""运行命令并处理结果"""
|
||
print(f"\n🔧 {description}...")
|
||
try:
|
||
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
|
||
print(f"✅ {description}完成")
|
||
return True
|
||
except subprocess.CalledProcessError as e:
|
||
print(f"❌ {description}失败: {e}")
|
||
print(f"错误输出: {e.stderr}")
|
||
return False
|
||
|
||
def check_dependencies():
|
||
"""检查依赖"""
|
||
print("🔍 检查系统依赖...")
|
||
|
||
# 检查Python包
|
||
try:
|
||
import psycopg2
|
||
print("✅ psycopg2 已安装")
|
||
except ImportError:
|
||
print("❌ psycopg2 未安装,正在安装...")
|
||
if not run_command("pip install psycopg2-binary", "安装 psycopg2-binary"):
|
||
return False
|
||
|
||
return True
|
||
|
||
def setup_database():
|
||
"""设置数据库"""
|
||
print("\n🗄️ 设置数据库...")
|
||
|
||
# 连接到PostgreSQL(默认数据库)
|
||
try:
|
||
conn = psycopg2.connect(
|
||
host=DATABASE_CONFIG['host'],
|
||
port=DATABASE_CONFIG['port'],
|
||
database='edu', # 连接到默认数据库
|
||
user=DATABASE_CONFIG['user'],
|
||
password=DATABASE_CONFIG['password']
|
||
)
|
||
conn.autocommit = True
|
||
cursor = conn.cursor()
|
||
|
||
# 创建数据库
|
||
cursor.execute(f"SELECT 1 FROM pg_database WHERE datname = '{DATABASE_CONFIG['database']}'")
|
||
if cursor.fetchone():
|
||
print(f"✅ 数据库 {DATABASE_CONFIG['database']} 已存在")
|
||
else:
|
||
cursor.execute(f"CREATE DATABASE {DATABASE_CONFIG['database']}")
|
||
print(f"✅ 数据库 {DATABASE_CONFIG['database']} 创建成功")
|
||
|
||
cursor.close()
|
||
conn.close()
|
||
|
||
# 执行架构脚本
|
||
schema_file = "database_schema.sql"
|
||
if os.path.exists(schema_file):
|
||
command = f'psql -h {DATABASE_CONFIG["host"]} -p {DATABASE_CONFIG["port"]} -U {DATABASE_CONFIG["user"]} -d {DATABASE_CONFIG["database"]} -f {schema_file}'
|
||
if run_command(command, "执行数据库架构脚本"):
|
||
print("✅ 数据库架构创建成功")
|
||
else:
|
||
return False
|
||
else:
|
||
print(f"❌ 找不到架构文件: {schema_file}")
|
||
return False
|
||
|
||
except Exception as e:
|
||
print(f"❌ 数据库设置失败: {e}")
|
||
return False
|
||
|
||
return True
|
||
|
||
def import_data():
|
||
"""导入数据"""
|
||
print("\n📊 开始导入数据...")
|
||
|
||
# 检查JSON文件目录
|
||
if not os.path.exists(JSON_DIRECTORY):
|
||
print(f"❌ JSON文件目录不存在: {JSON_DIRECTORY}")
|
||
return False
|
||
|
||
json_files = [f for f in os.listdir(JSON_DIRECTORY) if f.endswith('.json')]
|
||
if not json_files:
|
||
print(f"❌ 在 {JSON_DIRECTORY} 中没有找到JSON文件")
|
||
return False
|
||
|
||
print(f"📁 找到 {len(json_files)} 个JSON文件")
|
||
|
||
# 运行导入脚本
|
||
command = "python data_import.py"
|
||
if run_command(command, "数据导入"):
|
||
print("✅ 数据导入成功")
|
||
return True
|
||
else:
|
||
return False
|
||
|
||
def validate_data():
|
||
"""验证数据"""
|
||
print("\n🔍 开始数据验证...")
|
||
|
||
command = "python data_validation.py"
|
||
if run_command(command, "数据验证"):
|
||
print("✅ 数据验证完成")
|
||
|
||
# 检查验证报告
|
||
if os.path.exists("validation_report.md"):
|
||
print("📄 验证报告已生成: validation_report.md")
|
||
|
||
return True
|
||
else:
|
||
print("⚠️ 数据验证发现问题,请查看详细报告")
|
||
return False
|
||
|
||
def show_statistics():
|
||
"""显示统计信息"""
|
||
print("\n📈 数据库统计信息:")
|
||
|
||
try:
|
||
conn = psycopg2.connect(**DATABASE_CONFIG)
|
||
cursor = conn.cursor()
|
||
|
||
# 基础统计
|
||
tables = [
|
||
("knowledge_points", "知识点"),
|
||
("methods", "方法"),
|
||
("problems", "题目"),
|
||
("knowledge_relations", "知识点关系"),
|
||
]
|
||
|
||
for table, name in tables:
|
||
cursor.execute(f"SELECT COUNT(*) FROM {table}")
|
||
count = cursor.fetchone()[0]
|
||
print(f" • {name}: {count} 个")
|
||
|
||
cursor.close()
|
||
conn.close()
|
||
|
||
except Exception as e:
|
||
print(f"❌ 获取统计信息失败: {e}")
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print("🚀 高中数学知识图谱数据库设置向导")
|
||
print("=" * 50)
|
||
|
||
# 检查配置
|
||
if DATABASE_CONFIG['password'] == 'your_password':
|
||
print("❌ 请先在 config.py 中配置数据库密码")
|
||
return False
|
||
|
||
# 步骤1: 检查依赖
|
||
if not check_dependencies():
|
||
return False
|
||
|
||
# 步骤2: 设置数据库
|
||
if not setup_database():
|
||
return False
|
||
|
||
# 步骤3: 导入数据
|
||
if not import_data():
|
||
return False
|
||
|
||
# 步骤4: 验证数据
|
||
validate_data() # 验证失败不影响整体流程
|
||
|
||
# 步骤5: 显示统计信息
|
||
show_statistics()
|
||
|
||
print("\n🎉 设置完成!")
|
||
print("\n📋 下一步操作:")
|
||
print("1. 查看 README.md 了解详细使用方法")
|
||
print("2. 使用 query_examples.sql 中的查询示例")
|
||
print("3. 使用 data_validation.py 进行数据验证")
|
||
print("4. 使用 data_import.py 进行增量数据导入")
|
||
|
||
return True
|
||
|
||
if __name__ == "__main__":
|
||
success = main()
|
||
sys.exit(0 if success else 1)
|