note/work/AI/course/test_hardcoded_values.py
2025-11-19 10:16:05 +08:00

114 lines
4.0 KiB
Python

#!/usr/bin/env python3
"""Test script to verify that hardcoded values have been properly replaced with constants."""
import re
from scraper import (
DEFAULT_PART_FILE,
DEFAULT_OUTPUT_DIR,
DETAIL_BASE_URL,
DEFAULT_MATERIAL_NAME,
DEFAULT_CHAPTER_NAME,
DEFAULT_COURSE_TITLE,
DEFAULT_FILENAME,
TAG_DIMENSION_ID,
MAX_FILENAME_LENGTH,
MAX_DIRNAME_LENGTH,
CHUNK_SIZE,
REQUEST_TIMEOUT,
DETAIL_REQUEST_TIMEOUT,
DEFAULT_TOKEN
)
def test_constants():
"""Test that all constants have expected values."""
print("Testing configuration constants...")
# Test file paths
assert DEFAULT_PART_FILE == "example_part_100.json", f"Expected 'example_part_100.json', got '{DEFAULT_PART_FILE}'"
assert DEFAULT_OUTPUT_DIR == "downloaded", f"Expected 'downloaded', got '{DEFAULT_OUTPUT_DIR}'"
# Test API endpoints
assert DETAIL_BASE_URL == "https://s-file-2.ykt.cbern.com.cn/zxx/ndrv2/national_lesson/resources/details", f"URL mismatch"
# Test default values
assert DEFAULT_MATERIAL_NAME == "未知教材", f"Expected '未知教材', got '{DEFAULT_MATERIAL_NAME}'"
assert DEFAULT_CHAPTER_NAME == "未知章节", f"Expected '未知章节', got '{DEFAULT_CHAPTER_NAME}'"
assert DEFAULT_COURSE_TITLE == "未知课程", f"Expected '未知课程', got '{DEFAULT_COURSE_TITLE}'"
assert DEFAULT_FILENAME == "unnamed", f"Expected 'unnamed', got '{DEFAULT_FILENAME}'"
# Test tag and identifier constants
assert TAG_DIMENSION_ID == "zxxcc", f"Expected 'zxxcc', got '{TAG_DIMENSION_ID}'"
# Test file and directory constraints
assert MAX_FILENAME_LENGTH == 100, f"Expected 100, got {MAX_FILENAME_LENGTH}"
assert MAX_DIRNAME_LENGTH == 50, f"Expected 50, got {MAX_DIRNAME_LENGTH}"
# Test network settings
assert CHUNK_SIZE == 8192, f"Expected 8192, got {CHUNK_SIZE}"
assert REQUEST_TIMEOUT == 30, f"Expected 30, got {REQUEST_TIMEOUT}"
assert DETAIL_REQUEST_TIMEOUT == 20, f"Expected 20, got {DETAIL_REQUEST_TIMEOUT}"
print("✓ All constants have expected values")
def test_no_hardcoded_values():
"""Test that no hardcoded values remain in the scraper.py file."""
print("Testing for remaining hardcoded values...")
with open('scraper.py', 'r', encoding='utf-8') as f:
content = f.read()
# Remove the constant definition section to avoid false positives
# Find where the main function starts
lines = content.split('\n')
main_start = None
for i, line in enumerate(lines):
if line.startswith('def main():'):
main_start = i
break
if main_start is None:
print("❌ Could not find main function")
return False
# Get only the content after constant definitions (from main function onward)
function_content = '\n'.join(lines[main_start:])
# Check for hardcoded values in function bodies only
hardcoded_patterns = [
r'https://s-file-2\.ykt\.cbern\.com\.cn/zxx/ndrv2/national_lesson/resources/details',
r'"example_part_100\.json"',
r'"downloaded"',
r'"未知教材"',
r'"未知章节"',
r'"未知课程"',
r'"unnamed"',
r'"zxxcc"',
r'timeout=20',
r'timeout=30',
r'chunk_size=8192'
]
found_hardcoded = []
for pattern in hardcoded_patterns:
matches = re.findall(pattern, function_content)
if matches:
found_hardcoded.extend(matches)
if found_hardcoded:
print("⚠️ Found potential hardcoded values in function bodies:")
for value in set(found_hardcoded):
print(f" - {value}")
else:
print("✓ No hardcoded values found in function bodies")
return len(found_hardcoded) == 0
if __name__ == "__main__":
test_constants()
all_good = test_no_hardcoded_values()
if all_good:
print("\n🎉 All tests passed! Hardcoded values have been successfully optimized.")
else:
print("\n❌ Some hardcoded values may still exist. Please review the code.")