note/知识图谱/教科书-数学/knowledge/quick_fix_json.py
2025-11-19 10:16:05 +08:00

113 lines
3.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import json
import re
def fix_json_backslashes(file_path):
"""简单修复JSON文件中的反斜杠问题"""
try:
# 读取原始内容
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 找到所有反斜杠的问题位置并修复
# 使用正则表达式找到在JSON字符串中的无效反斜杠
fixed_content = content
# 先尝试解析,如果成功了就不需要修复
try:
json.loads(content)
print(f"{file_path} - 已经是正确的JSON格式")
return True
except json.JSONDecodeError:
pass # 需要修复
# 简单粗暴的方法:在数学符号前修复反斜杠
# 修复常见的LaTeX命令
latex_patterns = [
(r'\\frac', r'\\frac'),
(r'\\sqrt', r'\\sqrt'),
(r'\\sin', r'\\sin'),
(r'\\cos', r'\\cos'),
(r'\\tan', r'\\tan'),
(r'\\log', r'\\log'),
(r'\\ln', r'\\ln'),
(r'\\alpha', r'\\alpha'),
(r'\\beta', r'\\beta'),
(r'\\gamma', r'\\gamma'),
(r'\\delta', r'\\delta'),
(r'\\theta', r'\\theta'),
(r'\\lambda', r'\\lambda'),
(r'\\mu', r'\\mu'),
(r'\\pi', r'\\pi'),
(r'\\sigma', r'\\sigma'),
(r'\\phi', r'\\phi'),
(r'\\omega', r'\\omega'),
(r'\\mathbf', r'\\mathbf'),
(r'\\vec', r'\\vec'),
(r'\\left', r'\\left'),
(r'\\right', r'\\right'),
(r'\\begin', r'\\begin'),
(r'\\end', r'\\end'),
(r'\\quad', r'\\quad'),
(r'\\cdot', r'\\cdot'),
(r'\\times', r'\\times'),
(r'\\div', r'\\div'),
(r'\\pm', r'\\pm'),
(r'\\mp', r'\\mp'),
(r'\\le', r'\\le'),
(r'\\ge', r'\\ge'),
(r'\\ne', r'\\ne'),
(r'\\approx', r'\\approx'),
(r'\\equiv', r'\\equiv'),
(r'\\perp', r'\\perp'),
(r'\\parallel', r'\\parallel'),
(r'\\in', r'\\in'),
(r'\\subset', r'\\subset'),
(r'\\supset', r'\\supset'),
(r'\\cup', r'\\cup'),
(r'\\cap', r'\\cap'),
(r'\\emptyset', r'\\emptyset'),
(r'\\infty', r'\\infty'),
(r'\\partial', r'\\partial'),
(r'\\nabla', r'\\nabla'),
(r'\\exists', r'\\exists'),
(r'\\forall', r'\\forall'),
(r'\\int', r'\\int'),
(r'\\sum', r'\\sum'),
(r'\\prod', r'\\prod'),
(r'\\lim', r'\\lim'),
]
# 修复模式确保LaTeX命令前有正确的反斜杠
for pattern, replacement in latex_patterns:
# 在JSON字符串中一个反斜杠需要写成两个反斜杠
# 所以我们要修复不正确的转义
fixed_content = re.sub(pattern, replacement, fixed_content)
# 尝试解析修复后的内容
json.loads(fixed_content)
print(f"{file_path} - 修复成功")
# 写回文件
with open(file_path, 'w', encoding='utf-8') as f:
f.write(fixed_content)
return True
except json.JSONDecodeError as e:
print(f"{file_path} - 修复失败: {str(e)}")
return False
except Exception as e:
print(f"{file_path} - 处理出错: {str(e)}")
return False
if __name__ == "__main__":
files_to_fix = [
'knowledge-选择性必修第一章-空间向量与立体几何.json',
'knowledge-选择性必修第二章-直线和圆的方程.json',
'knowledge-选择性必修第五章-一元函数的导数及其应用.json',
'knowledge-选择性必修第八章-成对数据的统计分析.json',
'knowledge-选择性必修第四章-数列.json'
]
for file_path in files_to_fix:
fix_json_backslashes(file_path)