# Python imports
import base64
import json
import re
# Maximum allowed size for binary data (10MB)
MAX_SIZE = 10 * 1024 * 1024
# Maximum recursion depth to prevent stack overflow
MAX_RECURSION_DEPTH = 20
# Dangerous text patterns that could indicate XSS or script injection
DANGEROUS_TEXT_PATTERNS = [
r"",
r"javascript\s*:",
r"data\s*:\s*text/html",
r"eval\s*\(",
r"document\s*\.",
r"window\s*\.",
r"location\s*\.",
]
# Dangerous attribute patterns for HTML attributes
DANGEROUS_ATTR_PATTERNS = [
r"javascript\s*:",
r"data\s*:\s*text/html",
r"eval\s*\(",
r"alert\s*\(",
r"document\s*\.",
r"window\s*\.",
]
# Suspicious patterns for binary data content
SUSPICIOUS_BINARY_PATTERNS = [
"]*>",
r"",
# JavaScript URLs in various attributes
r'(?:href|src|action)\s*=\s*["\']?\s*javascript:',
# Data URLs with text/html (potential XSS)
r'(?:href|src|action)\s*=\s*["\']?\s*data:text/html',
# Dangerous event handlers with JavaScript-like content
r'on(?:load|error|click|focus|blur|change|submit|reset|select|resize|scroll|unload|beforeunload|hashchange|popstate|storage|message|offline|online)\s*=\s*["\']?[^"\']*(?:javascript|alert|eval|document\.|window\.|location\.|history\.)[^"\']*["\']?',
# Object and embed tags that could load external content
r"<(?:object|embed)[^>]*(?:data|src)\s*=",
# Base tag that could change relative URL resolution
r"]*href\s*=",
# Dangerous iframe sources
r'