note/知识图谱/教科书-数学/选择性必修/method-第八章-成对数据的统计分析.json
2025-11-19 10:16:05 +08:00

361 lines
15 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"章节信息": {
"章": "第八章",
"节": "8.1 成对数据的统计相关性8.2 一元线性回归模型及其应用8.3 列联表与独立性检验",
"小节": "8.1.1 变量的相关关系8.1.2 样本相关系数8.2.1 一元线性回归模型8.2.2 一元线性回归模型参数的最小二乘估计8.3.1 分类变量与列联表8.3.2 独立性检验",
"页码范围": "98-154"
},
"method_list": [
{
"编号": "M8-1-01",
"名称": "散点图绘制法",
"类型": "可视化方法",
"目的": "直观展示两个变量之间的关系特征和分布模式",
"步骤": [
"建立直角坐标系,通常以自变量为横轴,因变量为纵轴",
"将成对数据$(x_i, y_i)$表示为坐标系中的点",
"观察点的分布特征,识别关系模式"
],
"原理依据": {
"理论基础": "K8-1-1-04 散点图",
"核心思想": "通过几何直观展示变量间的统计关系",
"数学依据": "坐标几何原理"
},
"应用条件": {
"数据要求": "成对的数值型数据",
"样本容量": "至少需要5-10个数据点才能看出趋势",
"变量类型": "连续型或离散型数值变量"
},
"结果解释": {
"正相关模式": "点分布从左下到右上的趋势",
"负相关模式": "点分布从左上到右下的趋势",
"线性相关": "点分布在一条直线附近",
"非线性相关": "点分布在曲线附近",
"无明显相关": "点分布杂乱无章"
},
"关联知识": ["K8-1-1-01 相关关系", "K8-1-1-02 正相关与负相关", "K8-1-1-03 线性相关与非线性相关"],
"注意事项": [
"坐标轴比例要适当,避免压缩或拉伸过度",
"注意识别异常值对整体趋势的影响",
"散点图只能展示关系,不能确定因果关系"
]
},
{
"编号": "M8-1-02",
"名称": "样本相关系数计算法",
"类型": "计算方法",
"目的": "定量描述两个变量线性相关的程度和方向",
"步骤": [
"计算变量X的样本均值$\\bar{x} = \\frac{1}{n}\\sum_{i=1}^{n}x_i$",
"计算变量Y的样本均值$\\bar{y} = \\frac{1}{n}\\sum_{i=1}^{n}y_i$",
"计算协方差$S_{xy} = \\sum_{i=1}^{n}(x_i - \\bar{x})(y_i - \\bar{y})$",
"计算X的偏差平方和$S_{xx} = \\sum_{i=1}^{n}(x_i - \\bar{x})^2$",
"计算Y的偏差平方和$S_{yy} = \\sum_{i=1}^{n}(y_i - \\bar{y})^2$",
"代入公式计算相关系数$r = \\frac{S_{xy}}{\\sqrt{S_{xx}S_{yy}}}$"
],
"原理依据": {
"理论基础": "K8-1-2-01 样本相关系数",
"核心思想": "通过标准化处理消除量纲影响,构造相关程度指标",
"数学依据": "柯西不等式保证|r|≤1"
},
"应用条件": {
"数据要求": "成对的数值型数据",
"样本容量": "一般要求n≥3",
"变量特征": "变量应该是连续的,无明显异常值"
},
"结果解释": {
"取值范围": "-1 ≤ r ≤ 1",
"相关方向": "r > 0为正相关r < 0为负相关",
"相关强度": "|r| > 0.8为强相关0.5 < |r| ≤ 0.8为中度相关,|r| ≤ 0.5为弱相关",
"无线性相关": "r = 0表示无线性相关可能有非线性相关"
},
"关联知识": ["K8-1-1-02 正相关与负相关", "K8-1-1-03 线性相关与非线性相关"],
"注意事项": [
"相关系数只反映线性相关程度,不反映非线性相关",
"异常值会对相关系数产生较大影响",
"相关不等于因果,需结合专业知识判断"
]
},
{
"编号": "M8-2-01",
"名称": "最小二乘估计法",
"类型": "参数估计方法",
"目的": "估计一元线性回归模型的参数,找到最佳拟合直线",
"步骤": [
"建立残差平方和函数$Q(a,b) = \\sum_{i=1}^{n}(y_i - bx_i - a)^2$",
"对Q关于a求偏导并令其为0$\\frac{\\partial Q}{\\partial a} = -2\\sum_{i=1}^{n}(y_i - bx_i - a) = 0$",
"对Q关于b求偏导并令其为0$\\frac{\\partial Q}{\\partial b} = -2\\sum_{i=1}^{n}x_i(y_i - bx_i - a) = 0$",
"解方程组得到:$\\hat{b} = \\frac{\\sum_{i=1}^{n}(x_i - \\bar{x})(y_i - \\bar{y})}{\\sum_{i=1}^{n}(x_i - \\bar{x})^2}$",
"计算:$\\hat{a} = \\bar{y} - \\hat{b}\\bar{x}$",
"写出经验回归方程:$\\hat{y} = \\hat{b}x + \\hat{a}$"
],
"原理依据": {
"理论基础": "K8-2-2-01 最小二乘估计",
"核心思想": "使观测值与预测值偏差的平方和最小",
"数学依据": "微积分极值理论,二次函数性质"
},
"应用条件": {
"模型假设": "变量间存在线性相关关系",
"数据要求": "成对的数值型数据样本容量n≥2",
"误差假设": "随机误差相互独立期望为0方差相等"
},
"结果解释": {
"斜率$\\hat{b}$": "表示x每增加一个单位y的平均变化量",
"$\\hat{a}$": "x=0y",
"": "yx",
"": ""
},
"": ["K8-2-1-01 线", "K8-2-2-02 "],
"": [
"",
"线线",
""
]
},
{
"": "M8-2-02",
"": "",
"": "",
"": "",
"": [
"$\\hat{y}_i = \\hat{b}x_i + \\hat{a}$",
"$e_i = y_i - \\hat{y}_i$",
"xe",
"",
""
],
"": {
"": "K8-2-2-02 ",
"": "",
"": ""
},
"": {
"": "",
"": "",
"": ""
},
"": {
"": "0线",
"": "x",
"线": "线线",
"": "x",
"": ""
},
"": ["K8-2-2-01 ", "K8-2-2-03 R²"],
"": [
"",
"",
""
]
},
{
"": "M8-2-03",
"": "",
"": "",
"": "",
"": [
"$SST = \\sum_{i=1}^{n}(y_i - \\bar{y})^2$",
"$SSE = \\sum_{i=1}^{n}(y_i - \\hat{y}_i)^2$",
"$SSR = SST - SSE$",
"$R^2 = 1 - \\frac{SSE}{SST} = \\frac{SSR}{SST}$",
"R²"
],
"": {
"": "K8-2-2-03 R²",
"": "",
"": "=+"
},
"": {
"": "线",
"": "",
"": ""
},
"": {
"": "0 R² 1",
"": "R²",
"": "R² > 0.70.4 < R² 0.7R² 0.4",
"": "线R²"
},
"": ["K8-1-2-01 ", "K8-2-2-01 "],
"": [
"R²",
"R²R²",
"R²"
]
},
{
"": "M8-3-01",
"": "2×2",
"": "",
"": "",
"": [
"XY",
"2×2XY",
"$(X_1,Y_1)$, $(X_1,Y_2)$, $(X_2,Y_1)$, $(X_2,Y_2)$",
"\n| | Y | Y | |\n|---|---|---|---|\n| X | a | b | a+b |\n| X | c | d | c+d |\n| | a+c | b+d | n=a+b+c+d |",
""
],
"": {
"": "K8-3-1-01 2×2",
"": "",
"": ""
},
"": {
"": "",
"": "",
"": "n45"
},
"": {
"": "",
"": "",
"": "",
"": ""
},
"": ["K8-3-2-01 "],
"": [
"",
"",
""
]
},
{
"": "M8-3-02",
"": "",
"": "",
"": "",
"": [
"\n $H_0$XY\n $H_1$XY",
"α0.050.01",
"df=1α$\\chi^2_\\alpha$",
"$\\chi^2 = \\frac{n(ad-bc)^2}{(a+b)(c+d)(a+c)(b+d)}$",
"\n $\\chi^2 \\ge \\chi^2_\\alpha$$H_0$\n $\\chi^2 < \\chi^2_\\alpha$$H_0$",
""
],
"": {
"": "K8-3-2-01 ",
"": "",
"": "",
"": "P(H|H) = α"
},
"": {
"": "2×2",
"": "n45",
"": "",
"": ""
},
"": {
"H": "",
"H": "",
"": "",
"": ""
},
"": ["K8-3-1-01 2×2", "K8-3-2-02 "],
"": [
"",
"",
"",
""
]
},
{
"": "M8-3-03",
"": "",
"": "",
"": "",
"": [
"P(X=i,Y=j) = P(X=i)×P(Y=j)",
"$\\hat{P}(X=i) = \\frac{i}{n}$$\\hat{P}(Y=j) = \\frac{j}{n}$",
"$E_{ij} = n \\times \\hat{P}(X=i) \\times \\hat{P}(Y=j) = \\frac{i \\times j}{n}$",
"\n $E_{11} = \\frac{(a+b)(a+c)}{n}$\n $E_{12} = \\frac{(a+b)(b+d)}{n}$\n $E_{21} = \\frac{(c+d)(a+c)}{n}$\n $E_{22} = \\frac{(c+d)(b+d)}{n}$"
],
"": {
"": "",
"": "",
"": "",
"": ""
},
"": {
"": "",
"": "",
"": ""
},
"": {
"": "",
"": "",
"": "",
"": ""
},
"": ["K8-3-2-01 ", "K8-3-1-01 2×2"],
"": [
"",
"<5",
"",
""
]
}
]
}