33 lines
983 B
Python
33 lines
983 B
Python
"""Configuration file for scraper.py"""
|
|
|
|
# File paths
|
|
DEFAULT_PART_FILE = "example_part_100.json"
|
|
DEFAULT_OUTPUT_DIR = "downloaded"
|
|
|
|
# API endpoints
|
|
DETAIL_BASE_URL = "https://s-file-2.ykt.cbern.com.cn/zxx/ndrv2/national_lesson/resources/details"
|
|
|
|
# Default values for metadata extraction
|
|
DEFAULT_MATERIAL_NAME = "未知教材"
|
|
DEFAULT_CHAPTER_NAME = "未知章节"
|
|
DEFAULT_COURSE_TITLE = "未知课程"
|
|
DEFAULT_FILENAME = "unnamed"
|
|
|
|
# Tag and identifier constants
|
|
TAG_DIMENSION_ID = "zxxcc"
|
|
|
|
# File and directory constraints
|
|
MAX_FILENAME_LENGTH = 100
|
|
MAX_DIRNAME_LENGTH = 50
|
|
|
|
# Network settings
|
|
CHUNK_SIZE = 8192
|
|
REQUEST_TIMEOUT = 30
|
|
DETAIL_REQUEST_TIMEOUT = 20
|
|
|
|
# Authentication
|
|
DEFAULT_TOKEN = 'MAC id="7F938B205F876FC398BCDC5BCE419D078A9A9DC46BC1C5EB5D458752DA28A954776C4459233C9F6209FA0EC2EC21AE85202FAE132D402538",nonce="1758355290351:STU4ZCMA",mac="cmPIHUYMwn6OiCanuD/OLV75xyyhxyGZzzEwFwMaKbc="'
|
|
|
|
# Regex patterns
|
|
PDF_OR_M3U8_PATTERN = r"https?://\S+\.(?:pdf|m3u8)(?:\?\S*)?"
|