note/知识图谱/后端/知识图谱数据库设计.md
2025-11-19 10:16:05 +08:00

10 KiB
Raw Blame History

基于你的需求,我来重新梳理一下需要设计的表结构。总共需要7张核心表,使用 PostgreSQL 语法:

知识图谱相关表3张

1. knowledge知识点表

CREATE TABLE knowledge (
    id VARCHAR(20) PRIMARY KEY,        -- K3-1-1-01
    name VARCHAR(100) NOT NULL,        -- 函数的三要素
    type VARCHAR(20),                  -- 概念/定理/公式
    definition TEXT,                   -- 定义内容
    core_features TEXT[],              -- PostgreSQL 数组类型 ["任意性", "唯一性"]
    prerequisites TEXT[],              -- PostgreSQL 数组类型 ["K1-1-01", "K1-2-01"]
    importance VARCHAR(10),            -- 核心/重要/基础
    textbook_location VARCHAR(100),    -- 必修1 P62
    created_at TIMESTAMP DEFAULT NOW()
);

-- 数组字段的 GIN 索引,支持高效的包含查询
CREATE INDEX idx_knowledge_core_features ON knowledge USING GIN(core_features);
CREATE INDEX idx_knowledge_prerequisites ON knowledge USING GIN(prerequisites);

2. method方法表

CREATE TABLE method (
    id VARCHAR(20) PRIMARY KEY,        -- M3-1-1-01
    name VARCHAR(100) NOT NULL,        -- 分式型定义域求解法
    type VARCHAR(20),                  -- 解题方法/计算技巧
    scenario VARCHAR(200),             -- 适用场景
    steps TEXT,                        -- 方法步骤描述,纯文本
    supported_knowledge TEXT[],        -- PostgreSQL 数组类型 ["K3-1-1-02"]
    common_errors TEXT[],              -- PostgreSQL 数组类型 ["遗漏分母"]
    difficulty_level INT,              -- 1-5
    created_at TIMESTAMP DEFAULT NOW()
);

-- 数组字段的 GIN 索引
CREATE INDEX idx_method_supported_knowledge ON method USING GIN(supported_knowledge);
CREATE INDEX idx_method_common_errors ON method USING GIN(common_errors);

3. problem题目表

CREATE TABLE problem (
    id VARCHAR(20) PRIMARY KEY,        -- T3-1-1-E02
    problem_type VARCHAR(20),          -- 例题/练习题/习题
    content TEXT NOT NULL,             -- 完整题目内容
    problem_category VARCHAR(50),      -- 函数基础
    difficulty INT,                    -- 1-5
    source VARCHAR(100),               -- 必修1 P65 例2
    created_at TIMESTAMP DEFAULT NOW()
);

-- 题目-知识点映射表
CREATE TABLE problem_knowledge_mapping (
    problem_id VARCHAR(20),
    question_part VARCHAR(10),        -- 小题1/小题2/整体
    knowledge_id VARCHAR(20),
    is_primary BOOLEAN DEFAULT TRUE,   -- 是否为主要考查点
    weight DECIMAL(3,2) DEFAULT 1.0, -- 权重(0.1-1.0)
    PRIMARY KEY (problem_id, question_part, knowledge_id),
    FOREIGN KEY (problem_id) REFERENCES problem(id),
    FOREIGN KEY (knowledge_id) REFERENCES knowledge(id)
);

-- 题目-方法映射表
CREATE TABLE problem_method_mapping (
    problem_id VARCHAR(20),
    question_part VARCHAR(10),
    method_id VARCHAR(20),
    is_primary BOOLEAN DEFAULT TRUE,
    weight DECIMAL(3,2) DEFAULT 1.0,
    PRIMARY KEY (problem_id, question_part, method_id),
    FOREIGN KEY (problem_id) REFERENCES problem(id),
    FOREIGN KEY (method_id) REFERENCES method(id)
);

-- 性能索引
CREATE INDEX idx_problem_knowledge_problem ON problem_knowledge_mapping(problem_id);
CREATE INDEX idx_problem_knowledge_knowledge ON problem_knowledge_mapping(knowledge_id);
CREATE INDEX idx_problem_method_problem ON problem_method_mapping(problem_id);
CREATE INDEX idx_problem_method_method ON problem_method_mapping(method_id);

学生评分相关表4张

4. student学生表

CREATE TABLE student (
    id VARCHAR(20) PRIMARY KEY,        -- S001
    name VARCHAR(50) NOT NULL,
    grade VARCHAR(10),                 -- 高一/高二/高三
    class_name VARCHAR(20),            -- 班级
    created_at TIMESTAMP DEFAULT NOW()
);

5. learning_record学习记录表 - 核心流水表)

CREATE TABLE learning_record (
    id BIGSERIAL PRIMARY KEY,          -- PostgreSQL 自增类型
    student_id VARCHAR(20) NOT NULL,
    problem_id VARCHAR(20) NOT NULL,   -- T3-1-1-E02
    knowledge_id VARCHAR(20) NOT NULL, -- K3-1-1-02
    method_id VARCHAR(20),             -- M3-1-1-01 (可选)
    question_part VARCHAR(10),         -- 小题1/小题2/整体
    is_correct BOOLEAN NOT NULL,       -- 正确/错误
    score DECIMAL(5,2),                -- 得分(0-100)
    max_score DECIMAL(5,2) NOT NULL,   -- 该题/该小题的满分
    exam_id VARCHAR(20) NOT NULL,      -- 该场考试的id或名称
    exam_date DATE,                    -- 考试/练习日期
    response_time_ms INT,              -- 响应时间(毫秒)
    created_at TIMESTAMP DEFAULT NOW(),

    FOREIGN KEY (student_id) REFERENCES student(id),
    FOREIGN KEY (problem_id) REFERENCES problem(id),
    FOREIGN KEY (knowledge_id) REFERENCES knowledge(id),
    FOREIGN KEY (method_id) REFERENCES method(id)
);

-- 分区表(按月分区,提高查询性能)
CREATE TABLE learning_record_y2024m01 PARTITION OF learning_record
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

-- 性能索引
CREATE INDEX idx_learning_record_student_knowledge ON learning_record(student_id, knowledge_id);
CREATE INDEX idx_learning_record_exam_date ON learning_record(exam_date);
CREATE INDEX idx_learning_record_student_exam ON learning_record(student_id, exam_date DESC);

6. knowledge_mastery知识点掌握度表 - 汇总表)

CREATE TABLE knowledge_mastery (
    student_id VARCHAR(20),
    knowledge_id VARCHAR(20),
    mastery_score DECIMAL(5,2) DEFAULT 100.0, -- 掌握度分数(0-100)
    total_attempts INT DEFAULT 0,      -- 总尝试次数
    correct_attempts INT DEFAULT 0,    -- 正确次数
    avg_response_time DECIMAL(8,2),    -- 平均答题时间
    last_practiced DATE,               -- 最后练习日期
    confidence_level DECIMAL(3,2),     -- 置信度(0-1)
    updated_at TIMESTAMP DEFAULT NOW(),

    PRIMARY KEY (student_id, knowledge_id),
    FOREIGN KEY (student_id) REFERENCES student(id),
    FOREIGN KEY (knowledge_id) REFERENCES knowledge(id)
);

-- 性能索引
CREATE INDEX idx_knowledge_mastery_score ON knowledge_mastery(mastery_score);
CREATE INDEX idx_knowledge_mastery_updated ON knowledge_mastery(updated_at);

7. method_mastery方法掌握度表 - 汇总表)

CREATE TABLE method_mastery (
    student_id VARCHAR(20),
    method_id VARCHAR(20),
    mastery_score DECIMAL(5,2) DEFAULT 100.0,
    total_usage INT DEFAULT 0,         -- 总使用次数
    successful_usage INT DEFAULT 0,    -- 成功使用次数
    avg_success_rate DECIMAL(5,2),     -- 平均成功率
    updated_at TIMESTAMP DEFAULT NOW(),

    PRIMARY KEY (student_id, method_id),
    FOREIGN KEY (student_id) REFERENCES student(id),
    FOREIGN KEY (method_id) REFERENCES method(id)
);

PostgreSQL 特有的查询优势

1. 数组查询示例

-- 查找包含特定特征的知识点
SELECT * FROM knowledge
WHERE '任意性' = ANY(core_features);

-- 查找前置知识点包含 K1-1-01 的所有知识点
SELECT id, name FROM knowledge
WHERE 'K1-1-01' = ANY(prerequisites);

-- 查找支持特定知识点的方法
SELECT id, name FROM method
WHERE 'K3-1-1-02' = ANY(supported_knowledge);

-- 查找常见错误的方法
SELECT id, name FROM method
WHERE array_length(common_errors, 1) > 2;

2. 规范化查询示例

-- 查找考查 K3-1-1-02 的所有题目
SELECT DISTINCT p.* FROM problem p
JOIN problem_knowledge_mapping pkm ON p.id = pkm.problem_id
WHERE pkm.knowledge_id = 'K3-1-1-02';

-- 查找小题1考查特定知识点的题目
SELECT p.* FROM problem p
JOIN problem_knowledge_mapping pkm ON p.id = pkm.problem_id
WHERE pkm.knowledge_id = 'K3-1-1-02' AND pkm.question_part = '小题1';

-- 查找使用特定方法的所有题目
SELECT DISTINCT p.* FROM problem p
JOIN problem_method_mapping pmm ON p.id = pmm.problem_id
WHERE pmm.method_id = 'M3-1-1-01';

-- 获取题目的完整知识点映射
SELECT p.content, pkm.question_part, k.name as knowledge_name, pkm.is_primary, pkm.weight
FROM problem p
JOIN problem_knowledge_mapping pkm ON p.id = pkm.problem_id
JOIN knowledge k ON pkm.knowledge_id = k.id
WHERE p.id = 'T3-1-1-E02'
ORDER BY pkm.question_part;

-- 全文搜索方法步骤
SELECT id, name, steps FROM method
WHERE steps LIKE '%识别分母%' AND steps LIKE '%令≠0%';

3. 复杂关联查询

-- 获取某个学生的薄弱知识点掌握度低于60且尝试次数大于5
SELECT k.id, k.name, km.mastery_score, km.total_attempts
FROM knowledge k
JOIN knowledge_mastery km ON k.id = km.knowledge_id
WHERE km.student_id = 'S001'
  AND km.mastery_score < 60
  AND km.total_attempts >= 5
ORDER BY km.mastery_score ASC;

-- 获取推荐题目(基于薄弱知识点)
SELECT DISTINCT p.*, pkm.question_part, pkm.weight
FROM problem p
JOIN problem_knowledge_mapping pkm ON p.id = pkm.problem_id
WHERE pkm.knowledge_id IN (
    SELECT knowledge_id
    FROM knowledge_mastery
    WHERE student_id = 'S001' AND mastery_score < 60
)
ORDER BY p.difficulty, pkm.weight DESC;

-- 统计知识点考查频率
SELECT k.id, k.name, COUNT(*) as question_count, AVG(p.difficulty) as avg_difficulty
FROM knowledge k
JOIN problem_knowledge_mapping pkm ON k.id = pkm.knowledge_id
JOIN problem p ON pkm.problem_id = p.id
GROUP BY k.id, k.name
ORDER BY question_count DESC;

表关系说明

知识图谱层:
knowledge ← (一对多) → problem (通过knowledge_mapping关联)
method ← (一对多) → problem (通过method_mapping关联)

学生评分层:
student ← (一对多) → learning_record (流水记录)
student ← (一对多) → knowledge_mastery (汇总统计)
student ← (一对多) → method_mastery (汇总统计)

learning_record 同时关联:
- student_id (哪个学生)
- problem_id (哪道题)
- knowledge_id (哪个知识点)
- method_id (哪个方法)

数据流向

学生答题 → learning_record (记录原始数据)
           ↓
定时任务 → knowledge_mastery (计算掌握度)
           ↓
查询分析 → 个性化推荐

核心优势

  1. 数据完整性 - 记录了每个学生的详细学习轨迹
  2. 实时更新 - learning_record实时记录mastery表定期汇总
  3. 灵活查询 - 可以按学生、知识点、方法、时间等多个维度分析
  4. 扩展性强 - 支持后续的智能推荐和预测分析
  5. PostgreSQL 优势
    • 数组类型简化了多值字段的存储和查询
    • 分区表特性优化了大数据量的查询性能
    • GIN 索引支持复杂的包含查询

这7张表构成了完整的知识图谱+个性化评分系统。需要我帮你设计具体的SQL插入示例或API接口吗