[WEB-4780] chore: changed the html validation (#7648)
* chore: changed the html validation * chore: added requirements for nh3 * chore: removed the json validations
This commit is contained in:
parent
3602ff6930
commit
0af75897f5
10 changed files with 91 additions and 366 deletions
|
|
@ -24,7 +24,6 @@ from plane.db.models import (
|
|||
)
|
||||
from plane.utils.content_validator import (
|
||||
validate_html_content,
|
||||
validate_json_content,
|
||||
validate_binary_data,
|
||||
)
|
||||
|
||||
|
|
@ -89,20 +88,24 @@ class IssueSerializer(BaseSerializer):
|
|||
raise serializers.ValidationError("Invalid HTML passed")
|
||||
|
||||
# Validate description content for security
|
||||
if data.get("description"):
|
||||
is_valid, error_msg = validate_json_content(data["description"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description": error_msg})
|
||||
|
||||
if data.get("description_html"):
|
||||
is_valid, error_msg = validate_html_content(data["description_html"])
|
||||
is_valid, error_msg, sanitized_html = validate_html_content(
|
||||
data["description_html"]
|
||||
)
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_html": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"error": "html content is not valid"}
|
||||
)
|
||||
# Update the data with sanitized HTML if available
|
||||
if sanitized_html is not None:
|
||||
data["description_html"] = sanitized_html
|
||||
|
||||
if data.get("description_binary"):
|
||||
is_valid, error_msg = validate_binary_data(data["description_binary"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_binary": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"description_binary": "Invalid binary data"}
|
||||
)
|
||||
|
||||
# Validate assignees are from project
|
||||
if data.get("assignees", []):
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ from plane.db.models import (
|
|||
|
||||
from plane.utils.content_validator import (
|
||||
validate_html_content,
|
||||
validate_json_content,
|
||||
)
|
||||
from .base import BaseSerializer
|
||||
|
||||
|
|
@ -200,27 +199,18 @@ class ProjectSerializer(BaseSerializer):
|
|||
)
|
||||
|
||||
# Validate description content for security
|
||||
if "description" in data and data["description"]:
|
||||
# For Project, description might be text field, not JSON
|
||||
if isinstance(data["description"], dict):
|
||||
is_valid, error_msg = validate_json_content(data["description"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description": error_msg})
|
||||
|
||||
if "description_text" in data and data["description_text"]:
|
||||
is_valid, error_msg = validate_json_content(data["description_text"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_text": error_msg})
|
||||
|
||||
if "description_html" in data and data["description_html"]:
|
||||
if isinstance(data["description_html"], dict):
|
||||
is_valid, error_msg = validate_json_content(data["description_html"])
|
||||
else:
|
||||
is_valid, error_msg = validate_html_content(
|
||||
is_valid, error_msg, sanitized_html = validate_html_content(
|
||||
str(data["description_html"])
|
||||
)
|
||||
# Update the data with sanitized HTML if available
|
||||
if sanitized_html is not None:
|
||||
data["description_html"] = sanitized_html
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_html": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"error": "html content is not valid"}
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ from plane.db.models import (
|
|||
)
|
||||
from plane.utils.content_validator import (
|
||||
validate_html_content,
|
||||
validate_json_content,
|
||||
validate_binary_data,
|
||||
)
|
||||
from plane.app.permissions import ROLE
|
||||
|
|
@ -76,20 +75,24 @@ class DraftIssueCreateSerializer(BaseSerializer):
|
|||
raise serializers.ValidationError("Start date cannot exceed target date")
|
||||
|
||||
# Validate description content for security
|
||||
if "description" in attrs and attrs["description"]:
|
||||
is_valid, error_msg = validate_json_content(attrs["description"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description": error_msg})
|
||||
|
||||
if "description_html" in attrs and attrs["description_html"]:
|
||||
is_valid, error_msg = validate_html_content(attrs["description_html"])
|
||||
is_valid, error_msg, sanitized_html = validate_html_content(
|
||||
attrs["description_html"]
|
||||
)
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_html": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"error": "html content is not valid"}
|
||||
)
|
||||
# Update the attrs with sanitized HTML if available
|
||||
if sanitized_html is not None:
|
||||
attrs["description_html"] = sanitized_html
|
||||
|
||||
if "description_binary" in attrs and attrs["description_binary"]:
|
||||
is_valid, error_msg = validate_binary_data(attrs["description_binary"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_binary": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"description_binary": "Invalid binary data"}
|
||||
)
|
||||
|
||||
# Validate assignees are from project
|
||||
if attrs.get("assignee_ids", []):
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ from plane.db.models import (
|
|||
)
|
||||
from plane.utils.content_validator import (
|
||||
validate_html_content,
|
||||
validate_json_content,
|
||||
validate_binary_data,
|
||||
)
|
||||
|
||||
|
|
@ -128,20 +127,24 @@ class IssueCreateSerializer(BaseSerializer):
|
|||
raise serializers.ValidationError("Start date cannot exceed target date")
|
||||
|
||||
# Validate description content for security
|
||||
if "description" in attrs and attrs["description"]:
|
||||
is_valid, error_msg = validate_json_content(attrs["description"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description": error_msg})
|
||||
|
||||
if "description_html" in attrs and attrs["description_html"]:
|
||||
is_valid, error_msg = validate_html_content(attrs["description_html"])
|
||||
is_valid, error_msg, sanitized_html = validate_html_content(
|
||||
attrs["description_html"]
|
||||
)
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_html": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"error": "html content is not valid"}
|
||||
)
|
||||
# Update the attrs with sanitized HTML if available
|
||||
if sanitized_html is not None:
|
||||
attrs["description_html"] = sanitized_html
|
||||
|
||||
if "description_binary" in attrs and attrs["description_binary"]:
|
||||
is_valid, error_msg = validate_binary_data(attrs["description_binary"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_binary": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"description_binary": "Invalid binary data"}
|
||||
)
|
||||
|
||||
# Validate assignees are from project
|
||||
if attrs.get("assignee_ids", []):
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ from .base import BaseSerializer
|
|||
from plane.utils.content_validator import (
|
||||
validate_binary_data,
|
||||
validate_html_content,
|
||||
validate_json_content,
|
||||
)
|
||||
from plane.db.models import (
|
||||
Page,
|
||||
|
|
@ -229,23 +228,13 @@ class PageBinaryUpdateSerializer(serializers.Serializer):
|
|||
return value
|
||||
|
||||
# Use the validation function from utils
|
||||
is_valid, error_message = validate_html_content(value)
|
||||
is_valid, error_message, sanitized_html = validate_html_content(value)
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError(error_message)
|
||||
|
||||
return value
|
||||
# Return sanitized HTML if available, otherwise return original
|
||||
return sanitized_html if sanitized_html is not None else value
|
||||
|
||||
def validate_description(self, value):
|
||||
"""Validate the JSON description"""
|
||||
if not value:
|
||||
return value
|
||||
|
||||
# Use the validation function from utils
|
||||
is_valid, error_message = validate_json_content(value)
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError(error_message)
|
||||
|
||||
return value
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
"""Update the page instance with validated data"""
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ from plane.db.models import (
|
|||
)
|
||||
from plane.utils.content_validator import (
|
||||
validate_html_content,
|
||||
validate_json_content,
|
||||
validate_binary_data,
|
||||
)
|
||||
|
||||
|
|
@ -65,27 +64,18 @@ class ProjectSerializer(BaseSerializer):
|
|||
|
||||
def validate(self, data):
|
||||
# Validate description content for security
|
||||
if "description" in data and data["description"]:
|
||||
# For Project, description might be text field, not JSON
|
||||
if isinstance(data["description"], dict):
|
||||
is_valid, error_msg = validate_json_content(data["description"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description": error_msg})
|
||||
|
||||
if "description_text" in data and data["description_text"]:
|
||||
is_valid, error_msg = validate_json_content(data["description_text"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_text": error_msg})
|
||||
|
||||
if "description_html" in data and data["description_html"]:
|
||||
if isinstance(data["description_html"], dict):
|
||||
is_valid, error_msg = validate_json_content(data["description_html"])
|
||||
else:
|
||||
is_valid, error_msg = validate_html_content(
|
||||
str(data["description_html"])
|
||||
)
|
||||
is_valid, error_msg, sanitized_html = validate_html_content(
|
||||
str(data["description_html"])
|
||||
)
|
||||
# Update the data with sanitized HTML if available
|
||||
if sanitized_html is not None:
|
||||
data["description_html"] = sanitized_html
|
||||
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_html": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"error": "html content is not valid"}
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ from plane.utils.constants import RESTRICTED_WORKSPACE_SLUGS
|
|||
from plane.utils.url import contains_url
|
||||
from plane.utils.content_validator import (
|
||||
validate_html_content,
|
||||
validate_json_content,
|
||||
validate_binary_data,
|
||||
)
|
||||
|
||||
|
|
@ -319,20 +318,24 @@ class StickySerializer(BaseSerializer):
|
|||
|
||||
def validate(self, data):
|
||||
# Validate description content for security
|
||||
if "description" in data and data["description"]:
|
||||
is_valid, error_msg = validate_json_content(data["description"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description": error_msg})
|
||||
|
||||
if "description_html" in data and data["description_html"]:
|
||||
is_valid, error_msg = validate_html_content(data["description_html"])
|
||||
is_valid, error_msg, sanitized_html = validate_html_content(
|
||||
data["description_html"]
|
||||
)
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_html": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"error": "html content is not valid"}
|
||||
)
|
||||
# Update the data with sanitized HTML if available
|
||||
if sanitized_html is not None:
|
||||
data["description_html"] = sanitized_html
|
||||
|
||||
if "description_binary" in data and data["description_binary"]:
|
||||
is_valid, error_msg = validate_binary_data(data["description_binary"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_binary": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"description_binary": "Invalid binary data"}
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ from plane.db.models import (
|
|||
)
|
||||
from plane.utils.content_validator import (
|
||||
validate_html_content,
|
||||
validate_json_content,
|
||||
validate_binary_data,
|
||||
)
|
||||
|
||||
|
|
@ -290,20 +289,22 @@ class IssueCreateSerializer(BaseSerializer):
|
|||
raise serializers.ValidationError("Start date cannot exceed target date")
|
||||
|
||||
# Validate description content for security
|
||||
if "description" in data and data["description"]:
|
||||
is_valid, error_msg = validate_json_content(data["description"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description": error_msg})
|
||||
|
||||
if "description_html" in data and data["description_html"]:
|
||||
is_valid, error_msg = validate_html_content(data["description_html"])
|
||||
is_valid, error_msg, sanitized_html = validate_html_content(
|
||||
data["description_html"]
|
||||
)
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_html": error_msg})
|
||||
raise serializers.ValidationError(
|
||||
{"error": "html content is not valid"}
|
||||
)
|
||||
# Update the data with sanitized HTML if available
|
||||
if sanitized_html is not None:
|
||||
data["description_html"] = sanitized_html
|
||||
|
||||
if "description_binary" in data and data["description_binary"]:
|
||||
is_valid, error_msg = validate_binary_data(data["description_binary"])
|
||||
if not is_valid:
|
||||
raise serializers.ValidationError({"description_binary": error_msg})
|
||||
raise serializers.ValidationError({"description_binary": "Invalid binary data"})
|
||||
|
||||
return data
|
||||
|
||||
|
|
|
|||
|
|
@ -1,36 +1,11 @@
|
|||
# Python imports
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
|
||||
import nh3
|
||||
from plane.utils.exception_logger import log_exception
|
||||
|
||||
# Maximum allowed size for binary data (10MB)
|
||||
MAX_SIZE = 10 * 1024 * 1024
|
||||
|
||||
# Maximum recursion depth to prevent stack overflow
|
||||
MAX_RECURSION_DEPTH = 20
|
||||
|
||||
# Dangerous text patterns that could indicate XSS or script injection
|
||||
DANGEROUS_TEXT_PATTERNS = [
|
||||
r"<script[^>]*>.*?</script>",
|
||||
r"javascript\s*:",
|
||||
r"data\s*:\s*text/html",
|
||||
r"eval\s*\(",
|
||||
r"document\s*\.",
|
||||
r"window\s*\.",
|
||||
r"location\s*\.",
|
||||
]
|
||||
|
||||
# Dangerous attribute patterns for HTML attributes
|
||||
DANGEROUS_ATTR_PATTERNS = [
|
||||
r"javascript\s*:",
|
||||
r"data\s*:\s*text/html",
|
||||
r"eval\s*\(",
|
||||
r"alert\s*\(",
|
||||
r"document\s*\.",
|
||||
r"window\s*\.",
|
||||
]
|
||||
|
||||
# Suspicious patterns for binary data content
|
||||
SUSPICIOUS_BINARY_PATTERNS = [
|
||||
"<html",
|
||||
|
|
@ -41,70 +16,6 @@ SUSPICIOUS_BINARY_PATTERNS = [
|
|||
"<iframe",
|
||||
]
|
||||
|
||||
# Malicious HTML patterns for content validation
|
||||
MALICIOUS_HTML_PATTERNS = [
|
||||
# Script tags with any content
|
||||
r"<script[^>]*>",
|
||||
r"</script>",
|
||||
# JavaScript URLs in various attributes
|
||||
r'(?:href|src|action)\s*=\s*["\']?\s*javascript:',
|
||||
# Data URLs with text/html (potential XSS)
|
||||
r'(?:href|src|action)\s*=\s*["\']?\s*data:text/html',
|
||||
# Dangerous event handlers with JavaScript-like content
|
||||
r'on(?:load|error|click|focus|blur|change|submit|reset|select|resize|scroll|unload|beforeunload|hashchange|popstate|storage|message|offline|online)\s*=\s*["\']?[^"\']*(?:javascript|alert|eval|document\.|window\.|location\.|history\.)[^"\']*["\']?',
|
||||
# Object and embed tags that could load external content
|
||||
r"<(?:object|embed)[^>]*(?:data|src)\s*=",
|
||||
# Base tag that could change relative URL resolution
|
||||
r"<base[^>]*href\s*=",
|
||||
# Dangerous iframe sources
|
||||
r'<iframe[^>]*src\s*=\s*["\']?(?:javascript:|data:text/html)',
|
||||
# Meta refresh redirects
|
||||
r'<meta[^>]*http-equiv\s*=\s*["\']?refresh["\']?',
|
||||
# Link tags - simplified patterns
|
||||
r'<link[^>]*rel\s*=\s*["\']?stylesheet["\']?',
|
||||
r'<link[^>]*href\s*=\s*["\']?https?://',
|
||||
r'<link[^>]*href\s*=\s*["\']?//',
|
||||
r'<link[^>]*href\s*=\s*["\']?(?:data:|javascript:)',
|
||||
# Style tags with external imports
|
||||
r"<style[^>]*>.*?@import.*?(?:https?://|//)",
|
||||
# Link tags with dangerous rel types
|
||||
r'<link[^>]*rel\s*=\s*["\']?(?:import|preload|prefetch|dns-prefetch|preconnect)["\']?',
|
||||
# Forms with action attributes
|
||||
r"<form[^>]*action\s*=",
|
||||
]
|
||||
|
||||
# Dangerous JavaScript patterns for event handlers
|
||||
DANGEROUS_JS_PATTERNS = [
|
||||
r"alert\s*\(",
|
||||
r"eval\s*\(",
|
||||
r"document\s*\.",
|
||||
r"window\s*\.",
|
||||
r"location\s*\.",
|
||||
r"fetch\s*\(",
|
||||
r"XMLHttpRequest",
|
||||
r"innerHTML\s*=",
|
||||
r"outerHTML\s*=",
|
||||
r"document\.write",
|
||||
r"script\s*>",
|
||||
]
|
||||
|
||||
# HTML self-closing tags that don't need closing tags
|
||||
SELF_CLOSING_TAGS = {
|
||||
"img",
|
||||
"br",
|
||||
"hr",
|
||||
"input",
|
||||
"meta",
|
||||
"link",
|
||||
"area",
|
||||
"base",
|
||||
"col",
|
||||
"embed",
|
||||
"source",
|
||||
"track",
|
||||
"wbr",
|
||||
}
|
||||
|
||||
|
||||
def validate_binary_data(data):
|
||||
"""
|
||||
|
|
@ -149,191 +60,21 @@ def validate_binary_data(data):
|
|||
return True, None
|
||||
|
||||
|
||||
def validate_html_content(html_content):
|
||||
def validate_html_content(html_content: str):
|
||||
"""
|
||||
Validate that HTML content is safe and doesn't contain malicious patterns.
|
||||
|
||||
Args:
|
||||
html_content (str): The HTML content to validate
|
||||
|
||||
Returns:
|
||||
tuple: (is_valid: bool, error_message: str or None)
|
||||
Sanitize HTML content using nh3.
|
||||
Returns a tuple: (is_valid, error_message, clean_html)
|
||||
"""
|
||||
if not html_content:
|
||||
return True, None # Empty is OK
|
||||
return True, None, None
|
||||
|
||||
# Size check - 10MB limit (consistent with binary validation)
|
||||
if len(html_content.encode("utf-8")) > MAX_SIZE:
|
||||
return False, "HTML content exceeds maximum size limit (10MB)"
|
||||
|
||||
# Check for specific malicious patterns (simplified and more reliable)
|
||||
for pattern in MALICIOUS_HTML_PATTERNS:
|
||||
if re.search(pattern, html_content, re.IGNORECASE | re.DOTALL):
|
||||
return (
|
||||
False,
|
||||
f"HTML content contains potentially malicious patterns: {pattern}",
|
||||
)
|
||||
|
||||
# Additional check for inline event handlers that contain suspicious content
|
||||
# This is more permissive - only blocks if the event handler contains actual dangerous code
|
||||
event_handler_pattern = r'on\w+\s*=\s*["\']([^"\']*)["\']'
|
||||
event_matches = re.findall(event_handler_pattern, html_content, re.IGNORECASE)
|
||||
|
||||
for handler_content in event_matches:
|
||||
for js_pattern in DANGEROUS_JS_PATTERNS:
|
||||
if re.search(js_pattern, handler_content, re.IGNORECASE):
|
||||
return (
|
||||
False,
|
||||
f"HTML content contains dangerous JavaScript in event handler: {handler_content[:100]}",
|
||||
)
|
||||
|
||||
|
||||
return True, None
|
||||
|
||||
|
||||
def validate_json_content(json_content):
|
||||
"""
|
||||
Validate that JSON content is safe and doesn't contain malicious patterns.
|
||||
|
||||
Args:
|
||||
json_content (dict): The JSON content to validate
|
||||
|
||||
Returns:
|
||||
tuple: (is_valid: bool, error_message: str or None)
|
||||
"""
|
||||
if not json_content:
|
||||
return True, None # Empty is OK
|
||||
return False, "HTML content exceeds maximum size limit (10MB)", None
|
||||
|
||||
try:
|
||||
# Size check - 10MB limit (consistent with other validations)
|
||||
json_str = json.dumps(json_content)
|
||||
if len(json_str.encode("utf-8")) > MAX_SIZE:
|
||||
return False, "JSON content exceeds maximum size limit (10MB)"
|
||||
|
||||
# Basic structure validation for page description JSON
|
||||
if isinstance(json_content, dict):
|
||||
# Check for expected page description structure
|
||||
# This is based on ProseMirror/Tiptap JSON structure
|
||||
if "type" in json_content and json_content.get("type") == "doc":
|
||||
# Valid document structure
|
||||
if "content" in json_content and isinstance(
|
||||
json_content["content"], list
|
||||
):
|
||||
# Recursively check content for suspicious patterns
|
||||
is_valid, error_msg = _validate_json_content_array(
|
||||
json_content["content"]
|
||||
)
|
||||
if not is_valid:
|
||||
return False, error_msg
|
||||
elif "type" not in json_content and "content" not in json_content:
|
||||
# Allow other JSON structures but validate for suspicious content
|
||||
is_valid, error_msg = _validate_json_content_recursive(json_content)
|
||||
if not is_valid:
|
||||
return False, error_msg
|
||||
else:
|
||||
return False, "JSON description must be a valid object"
|
||||
|
||||
except (TypeError, ValueError) as e:
|
||||
return False, "Invalid JSON structure"
|
||||
clean_html = nh3.clean(html_content)
|
||||
return True, None, clean_html
|
||||
except Exception as e:
|
||||
return False, "Failed to validate JSON content"
|
||||
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_json_content_array(content, depth=0):
|
||||
"""
|
||||
Validate JSON content array for suspicious patterns.
|
||||
|
||||
Args:
|
||||
content (list): Array of content nodes to validate
|
||||
depth (int): Current recursion depth (default: 0)
|
||||
|
||||
Returns:
|
||||
tuple: (is_valid: bool, error_message: str or None)
|
||||
"""
|
||||
# Check recursion depth to prevent stack overflow
|
||||
if depth > MAX_RECURSION_DEPTH:
|
||||
return False, f"Maximum recursion depth ({MAX_RECURSION_DEPTH}) exceeded"
|
||||
|
||||
if not isinstance(content, list):
|
||||
return True, None
|
||||
|
||||
for node in content:
|
||||
if isinstance(node, dict):
|
||||
# Check text content for suspicious patterns (more targeted)
|
||||
if node.get("type") == "text" and "text" in node:
|
||||
text_content = node["text"]
|
||||
for pattern in DANGEROUS_TEXT_PATTERNS:
|
||||
if re.search(pattern, text_content, re.IGNORECASE):
|
||||
return (
|
||||
False,
|
||||
"JSON content contains suspicious script patterns in text",
|
||||
)
|
||||
|
||||
# Check attributes for suspicious content (more targeted)
|
||||
if "attrs" in node and isinstance(node["attrs"], dict):
|
||||
for attr_name, attr_value in node["attrs"].items():
|
||||
if isinstance(attr_value, str):
|
||||
# Only check specific attributes that could be dangerous
|
||||
if attr_name.lower() in [
|
||||
"href",
|
||||
"src",
|
||||
"action",
|
||||
"onclick",
|
||||
"onload",
|
||||
"onerror",
|
||||
]:
|
||||
for pattern in DANGEROUS_ATTR_PATTERNS:
|
||||
if re.search(pattern, attr_value, re.IGNORECASE):
|
||||
return (
|
||||
False,
|
||||
f"JSON content contains dangerous pattern in {attr_name} attribute",
|
||||
)
|
||||
|
||||
# Recursively check nested content
|
||||
if "content" in node and isinstance(node["content"], list):
|
||||
is_valid, error_msg = _validate_json_content_array(
|
||||
node["content"], depth + 1
|
||||
)
|
||||
if not is_valid:
|
||||
return False, error_msg
|
||||
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_json_content_recursive(obj, depth=0):
|
||||
"""
|
||||
Recursively validate JSON object for suspicious content.
|
||||
|
||||
Args:
|
||||
obj: JSON object (dict, list, or primitive) to validate
|
||||
depth (int): Current recursion depth (default: 0)
|
||||
|
||||
Returns:
|
||||
tuple: (is_valid: bool, error_message: str or None)
|
||||
"""
|
||||
# Check recursion depth to prevent stack overflow
|
||||
if depth > MAX_RECURSION_DEPTH:
|
||||
return False, f"Maximum recursion depth ({MAX_RECURSION_DEPTH}) exceeded"
|
||||
if isinstance(obj, dict):
|
||||
for key, value in obj.items():
|
||||
if isinstance(value, str):
|
||||
# Check for dangerous patterns using module constants
|
||||
for pattern in DANGEROUS_TEXT_PATTERNS:
|
||||
if re.search(pattern, value, re.IGNORECASE):
|
||||
return (
|
||||
False,
|
||||
"JSON content contains suspicious script patterns",
|
||||
)
|
||||
elif isinstance(value, (dict, list)):
|
||||
is_valid, error_msg = _validate_json_content_recursive(value, depth + 1)
|
||||
if not is_valid:
|
||||
return False, error_msg
|
||||
elif isinstance(obj, list):
|
||||
for item in obj:
|
||||
is_valid, error_msg = _validate_json_content_recursive(item, depth + 1)
|
||||
if not is_valid:
|
||||
return False, error_msg
|
||||
|
||||
return True, None
|
||||
log_exception(e)
|
||||
return False, "Failed to sanitize HTML", None
|
||||
|
|
|
|||
|
|
@ -69,3 +69,5 @@ opentelemetry-instrumentation-django==0.49b1
|
|||
opentelemetry-exporter-otlp==1.28.1
|
||||
# OpenAPI Specification
|
||||
drf-spectacular==0.28.0
|
||||
# html sanitizer
|
||||
nh3==0.2.18
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue