#!/usr/bin/env python3 """Test script to verify that hardcoded values have been properly replaced with constants.""" import re from scraper import ( DEFAULT_PART_FILE, DEFAULT_OUTPUT_DIR, DETAIL_BASE_URL, DEFAULT_MATERIAL_NAME, DEFAULT_CHAPTER_NAME, DEFAULT_COURSE_TITLE, DEFAULT_FILENAME, TAG_DIMENSION_ID, MAX_FILENAME_LENGTH, MAX_DIRNAME_LENGTH, CHUNK_SIZE, REQUEST_TIMEOUT, DETAIL_REQUEST_TIMEOUT, DEFAULT_TOKEN ) def test_constants(): """Test that all constants have expected values.""" print("Testing configuration constants...") # Test file paths assert DEFAULT_PART_FILE == "example_part_100.json", f"Expected 'example_part_100.json', got '{DEFAULT_PART_FILE}'" assert DEFAULT_OUTPUT_DIR == "downloaded", f"Expected 'downloaded', got '{DEFAULT_OUTPUT_DIR}'" # Test API endpoints assert DETAIL_BASE_URL == "https://s-file-2.ykt.cbern.com.cn/zxx/ndrv2/national_lesson/resources/details", f"URL mismatch" # Test default values assert DEFAULT_MATERIAL_NAME == "未知教材", f"Expected '未知教材', got '{DEFAULT_MATERIAL_NAME}'" assert DEFAULT_CHAPTER_NAME == "未知章节", f"Expected '未知章节', got '{DEFAULT_CHAPTER_NAME}'" assert DEFAULT_COURSE_TITLE == "未知课程", f"Expected '未知课程', got '{DEFAULT_COURSE_TITLE}'" assert DEFAULT_FILENAME == "unnamed", f"Expected 'unnamed', got '{DEFAULT_FILENAME}'" # Test tag and identifier constants assert TAG_DIMENSION_ID == "zxxcc", f"Expected 'zxxcc', got '{TAG_DIMENSION_ID}'" # Test file and directory constraints assert MAX_FILENAME_LENGTH == 100, f"Expected 100, got {MAX_FILENAME_LENGTH}" assert MAX_DIRNAME_LENGTH == 50, f"Expected 50, got {MAX_DIRNAME_LENGTH}" # Test network settings assert CHUNK_SIZE == 8192, f"Expected 8192, got {CHUNK_SIZE}" assert REQUEST_TIMEOUT == 30, f"Expected 30, got {REQUEST_TIMEOUT}" assert DETAIL_REQUEST_TIMEOUT == 20, f"Expected 20, got {DETAIL_REQUEST_TIMEOUT}" print("✓ All constants have expected values") def test_no_hardcoded_values(): """Test that no hardcoded values remain in the scraper.py file.""" print("Testing for remaining hardcoded values...") with open('scraper.py', 'r', encoding='utf-8') as f: content = f.read() # Remove the constant definition section to avoid false positives # Find where the main function starts lines = content.split('\n') main_start = None for i, line in enumerate(lines): if line.startswith('def main():'): main_start = i break if main_start is None: print("❌ Could not find main function") return False # Get only the content after constant definitions (from main function onward) function_content = '\n'.join(lines[main_start:]) # Check for hardcoded values in function bodies only hardcoded_patterns = [ r'https://s-file-2\.ykt\.cbern\.com\.cn/zxx/ndrv2/national_lesson/resources/details', r'"example_part_100\.json"', r'"downloaded"', r'"未知教材"', r'"未知章节"', r'"未知课程"', r'"unnamed"', r'"zxxcc"', r'timeout=20', r'timeout=30', r'chunk_size=8192' ] found_hardcoded = [] for pattern in hardcoded_patterns: matches = re.findall(pattern, function_content) if matches: found_hardcoded.extend(matches) if found_hardcoded: print("⚠️ Found potential hardcoded values in function bodies:") for value in set(found_hardcoded): print(f" - {value}") else: print("✓ No hardcoded values found in function bodies") return len(found_hardcoded) == 0 if __name__ == "__main__": test_constants() all_good = test_no_hardcoded_values() if all_good: print("\n🎉 All tests passed! Hardcoded values have been successfully optimized.") else: print("\n❌ Some hardcoded values may still exist. Please review the code.")