From 0af75897f517e222e8188c4f205f86b121403de3 Mon Sep 17 00:00:00 2001
From: Bavisetti Narayan <72156168+NarayanBavisetti@users.noreply.github.com>
Date: Wed, 27 Aug 2025 00:38:25 +0530
Subject: [PATCH] [WEB-4780] chore: changed the html validation (#7648)
* chore: changed the html validation
* chore: added requirements for nh3
* chore: removed the json validations
---
apps/api/plane/api/serializers/issue.py | 21 +-
apps/api/plane/api/serializers/project.py | 24 +-
apps/api/plane/app/serializers/draft.py | 21 +-
apps/api/plane/app/serializers/issue.py | 21 +-
apps/api/plane/app/serializers/page.py | 17 +-
apps/api/plane/app/serializers/project.py | 30 +--
apps/api/plane/app/serializers/workspace.py | 21 +-
apps/api/plane/space/serializer/issue.py | 19 +-
apps/api/plane/utils/content_validator.py | 281 +-------------------
apps/api/requirements/base.txt | 2 +
10 files changed, 91 insertions(+), 366 deletions(-)
diff --git a/apps/api/plane/api/serializers/issue.py b/apps/api/plane/api/serializers/issue.py
index 69c356246..075823cbf 100644
--- a/apps/api/plane/api/serializers/issue.py
+++ b/apps/api/plane/api/serializers/issue.py
@@ -24,7 +24,6 @@ from plane.db.models import (
)
from plane.utils.content_validator import (
validate_html_content,
- validate_json_content,
validate_binary_data,
)
@@ -89,20 +88,24 @@ class IssueSerializer(BaseSerializer):
raise serializers.ValidationError("Invalid HTML passed")
# Validate description content for security
- if data.get("description"):
- is_valid, error_msg = validate_json_content(data["description"])
- if not is_valid:
- raise serializers.ValidationError({"description": error_msg})
-
if data.get("description_html"):
- is_valid, error_msg = validate_html_content(data["description_html"])
+ is_valid, error_msg, sanitized_html = validate_html_content(
+ data["description_html"]
+ )
if not is_valid:
- raise serializers.ValidationError({"description_html": error_msg})
+ raise serializers.ValidationError(
+ {"error": "html content is not valid"}
+ )
+ # Update the data with sanitized HTML if available
+ if sanitized_html is not None:
+ data["description_html"] = sanitized_html
if data.get("description_binary"):
is_valid, error_msg = validate_binary_data(data["description_binary"])
if not is_valid:
- raise serializers.ValidationError({"description_binary": error_msg})
+ raise serializers.ValidationError(
+ {"description_binary": "Invalid binary data"}
+ )
# Validate assignees are from project
if data.get("assignees", []):
diff --git a/apps/api/plane/api/serializers/project.py b/apps/api/plane/api/serializers/project.py
index e6a257f3e..d860c46b2 100644
--- a/apps/api/plane/api/serializers/project.py
+++ b/apps/api/plane/api/serializers/project.py
@@ -12,7 +12,6 @@ from plane.db.models import (
from plane.utils.content_validator import (
validate_html_content,
- validate_json_content,
)
from .base import BaseSerializer
@@ -200,27 +199,18 @@ class ProjectSerializer(BaseSerializer):
)
# Validate description content for security
- if "description" in data and data["description"]:
- # For Project, description might be text field, not JSON
- if isinstance(data["description"], dict):
- is_valid, error_msg = validate_json_content(data["description"])
- if not is_valid:
- raise serializers.ValidationError({"description": error_msg})
-
- if "description_text" in data and data["description_text"]:
- is_valid, error_msg = validate_json_content(data["description_text"])
- if not is_valid:
- raise serializers.ValidationError({"description_text": error_msg})
-
if "description_html" in data and data["description_html"]:
if isinstance(data["description_html"], dict):
- is_valid, error_msg = validate_json_content(data["description_html"])
- else:
- is_valid, error_msg = validate_html_content(
+ is_valid, error_msg, sanitized_html = validate_html_content(
str(data["description_html"])
)
+ # Update the data with sanitized HTML if available
+ if sanitized_html is not None:
+ data["description_html"] = sanitized_html
if not is_valid:
- raise serializers.ValidationError({"description_html": error_msg})
+ raise serializers.ValidationError(
+ {"error": "html content is not valid"}
+ )
return data
diff --git a/apps/api/plane/app/serializers/draft.py b/apps/api/plane/app/serializers/draft.py
index 38fa65527..852caf8bf 100644
--- a/apps/api/plane/app/serializers/draft.py
+++ b/apps/api/plane/app/serializers/draft.py
@@ -23,7 +23,6 @@ from plane.db.models import (
)
from plane.utils.content_validator import (
validate_html_content,
- validate_json_content,
validate_binary_data,
)
from plane.app.permissions import ROLE
@@ -76,20 +75,24 @@ class DraftIssueCreateSerializer(BaseSerializer):
raise serializers.ValidationError("Start date cannot exceed target date")
# Validate description content for security
- if "description" in attrs and attrs["description"]:
- is_valid, error_msg = validate_json_content(attrs["description"])
- if not is_valid:
- raise serializers.ValidationError({"description": error_msg})
-
if "description_html" in attrs and attrs["description_html"]:
- is_valid, error_msg = validate_html_content(attrs["description_html"])
+ is_valid, error_msg, sanitized_html = validate_html_content(
+ attrs["description_html"]
+ )
if not is_valid:
- raise serializers.ValidationError({"description_html": error_msg})
+ raise serializers.ValidationError(
+ {"error": "html content is not valid"}
+ )
+ # Update the attrs with sanitized HTML if available
+ if sanitized_html is not None:
+ attrs["description_html"] = sanitized_html
if "description_binary" in attrs and attrs["description_binary"]:
is_valid, error_msg = validate_binary_data(attrs["description_binary"])
if not is_valid:
- raise serializers.ValidationError({"description_binary": error_msg})
+ raise serializers.ValidationError(
+ {"description_binary": "Invalid binary data"}
+ )
# Validate assignees are from project
if attrs.get("assignee_ids", []):
diff --git a/apps/api/plane/app/serializers/issue.py b/apps/api/plane/app/serializers/issue.py
index 691140eba..1eda37601 100644
--- a/apps/api/plane/app/serializers/issue.py
+++ b/apps/api/plane/app/serializers/issue.py
@@ -43,7 +43,6 @@ from plane.db.models import (
)
from plane.utils.content_validator import (
validate_html_content,
- validate_json_content,
validate_binary_data,
)
@@ -128,20 +127,24 @@ class IssueCreateSerializer(BaseSerializer):
raise serializers.ValidationError("Start date cannot exceed target date")
# Validate description content for security
- if "description" in attrs and attrs["description"]:
- is_valid, error_msg = validate_json_content(attrs["description"])
- if not is_valid:
- raise serializers.ValidationError({"description": error_msg})
-
if "description_html" in attrs and attrs["description_html"]:
- is_valid, error_msg = validate_html_content(attrs["description_html"])
+ is_valid, error_msg, sanitized_html = validate_html_content(
+ attrs["description_html"]
+ )
if not is_valid:
- raise serializers.ValidationError({"description_html": error_msg})
+ raise serializers.ValidationError(
+ {"error": "html content is not valid"}
+ )
+ # Update the attrs with sanitized HTML if available
+ if sanitized_html is not None:
+ attrs["description_html"] = sanitized_html
if "description_binary" in attrs and attrs["description_binary"]:
is_valid, error_msg = validate_binary_data(attrs["description_binary"])
if not is_valid:
- raise serializers.ValidationError({"description_binary": error_msg})
+ raise serializers.ValidationError(
+ {"description_binary": "Invalid binary data"}
+ )
# Validate assignees are from project
if attrs.get("assignee_ids", []):
diff --git a/apps/api/plane/app/serializers/page.py b/apps/api/plane/app/serializers/page.py
index 78762e4b4..9ac6cc414 100644
--- a/apps/api/plane/app/serializers/page.py
+++ b/apps/api/plane/app/serializers/page.py
@@ -7,7 +7,6 @@ from .base import BaseSerializer
from plane.utils.content_validator import (
validate_binary_data,
validate_html_content,
- validate_json_content,
)
from plane.db.models import (
Page,
@@ -229,23 +228,13 @@ class PageBinaryUpdateSerializer(serializers.Serializer):
return value
# Use the validation function from utils
- is_valid, error_message = validate_html_content(value)
+ is_valid, error_message, sanitized_html = validate_html_content(value)
if not is_valid:
raise serializers.ValidationError(error_message)
- return value
+ # Return sanitized HTML if available, otherwise return original
+ return sanitized_html if sanitized_html is not None else value
- def validate_description(self, value):
- """Validate the JSON description"""
- if not value:
- return value
-
- # Use the validation function from utils
- is_valid, error_message = validate_json_content(value)
- if not is_valid:
- raise serializers.ValidationError(error_message)
-
- return value
def update(self, instance, validated_data):
"""Update the page instance with validated data"""
diff --git a/apps/api/plane/app/serializers/project.py b/apps/api/plane/app/serializers/project.py
index dfa541d9f..1d1ea927d 100644
--- a/apps/api/plane/app/serializers/project.py
+++ b/apps/api/plane/app/serializers/project.py
@@ -15,7 +15,6 @@ from plane.db.models import (
)
from plane.utils.content_validator import (
validate_html_content,
- validate_json_content,
validate_binary_data,
)
@@ -65,27 +64,18 @@ class ProjectSerializer(BaseSerializer):
def validate(self, data):
# Validate description content for security
- if "description" in data and data["description"]:
- # For Project, description might be text field, not JSON
- if isinstance(data["description"], dict):
- is_valid, error_msg = validate_json_content(data["description"])
- if not is_valid:
- raise serializers.ValidationError({"description": error_msg})
-
- if "description_text" in data and data["description_text"]:
- is_valid, error_msg = validate_json_content(data["description_text"])
- if not is_valid:
- raise serializers.ValidationError({"description_text": error_msg})
-
if "description_html" in data and data["description_html"]:
- if isinstance(data["description_html"], dict):
- is_valid, error_msg = validate_json_content(data["description_html"])
- else:
- is_valid, error_msg = validate_html_content(
- str(data["description_html"])
- )
+ is_valid, error_msg, sanitized_html = validate_html_content(
+ str(data["description_html"])
+ )
+ # Update the data with sanitized HTML if available
+ if sanitized_html is not None:
+ data["description_html"] = sanitized_html
+
if not is_valid:
- raise serializers.ValidationError({"description_html": error_msg})
+ raise serializers.ValidationError(
+ {"error": "html content is not valid"}
+ )
return data
diff --git a/apps/api/plane/app/serializers/workspace.py b/apps/api/plane/app/serializers/workspace.py
index ec4c4bf63..6b22f59e8 100644
--- a/apps/api/plane/app/serializers/workspace.py
+++ b/apps/api/plane/app/serializers/workspace.py
@@ -26,7 +26,6 @@ from plane.utils.constants import RESTRICTED_WORKSPACE_SLUGS
from plane.utils.url import contains_url
from plane.utils.content_validator import (
validate_html_content,
- validate_json_content,
validate_binary_data,
)
@@ -319,20 +318,24 @@ class StickySerializer(BaseSerializer):
def validate(self, data):
# Validate description content for security
- if "description" in data and data["description"]:
- is_valid, error_msg = validate_json_content(data["description"])
- if not is_valid:
- raise serializers.ValidationError({"description": error_msg})
-
if "description_html" in data and data["description_html"]:
- is_valid, error_msg = validate_html_content(data["description_html"])
+ is_valid, error_msg, sanitized_html = validate_html_content(
+ data["description_html"]
+ )
if not is_valid:
- raise serializers.ValidationError({"description_html": error_msg})
+ raise serializers.ValidationError(
+ {"error": "html content is not valid"}
+ )
+ # Update the data with sanitized HTML if available
+ if sanitized_html is not None:
+ data["description_html"] = sanitized_html
if "description_binary" in data and data["description_binary"]:
is_valid, error_msg = validate_binary_data(data["description_binary"])
if not is_valid:
- raise serializers.ValidationError({"description_binary": error_msg})
+ raise serializers.ValidationError(
+ {"description_binary": "Invalid binary data"}
+ )
return data
diff --git a/apps/api/plane/space/serializer/issue.py b/apps/api/plane/space/serializer/issue.py
index 3549e7626..64f151a2d 100644
--- a/apps/api/plane/space/serializer/issue.py
+++ b/apps/api/plane/space/serializer/issue.py
@@ -30,7 +30,6 @@ from plane.db.models import (
)
from plane.utils.content_validator import (
validate_html_content,
- validate_json_content,
validate_binary_data,
)
@@ -290,20 +289,22 @@ class IssueCreateSerializer(BaseSerializer):
raise serializers.ValidationError("Start date cannot exceed target date")
# Validate description content for security
- if "description" in data and data["description"]:
- is_valid, error_msg = validate_json_content(data["description"])
- if not is_valid:
- raise serializers.ValidationError({"description": error_msg})
-
if "description_html" in data and data["description_html"]:
- is_valid, error_msg = validate_html_content(data["description_html"])
+ is_valid, error_msg, sanitized_html = validate_html_content(
+ data["description_html"]
+ )
if not is_valid:
- raise serializers.ValidationError({"description_html": error_msg})
+ raise serializers.ValidationError(
+ {"error": "html content is not valid"}
+ )
+ # Update the data with sanitized HTML if available
+ if sanitized_html is not None:
+ data["description_html"] = sanitized_html
if "description_binary" in data and data["description_binary"]:
is_valid, error_msg = validate_binary_data(data["description_binary"])
if not is_valid:
- raise serializers.ValidationError({"description_binary": error_msg})
+ raise serializers.ValidationError({"description_binary": "Invalid binary data"})
return data
diff --git a/apps/api/plane/utils/content_validator.py b/apps/api/plane/utils/content_validator.py
index d28b83fc7..47ee663ff 100644
--- a/apps/api/plane/utils/content_validator.py
+++ b/apps/api/plane/utils/content_validator.py
@@ -1,36 +1,11 @@
# Python imports
import base64
-import json
-import re
-
+import nh3
+from plane.utils.exception_logger import log_exception
# Maximum allowed size for binary data (10MB)
MAX_SIZE = 10 * 1024 * 1024
-# Maximum recursion depth to prevent stack overflow
-MAX_RECURSION_DEPTH = 20
-
-# Dangerous text patterns that could indicate XSS or script injection
-DANGEROUS_TEXT_PATTERNS = [
- r"",
- r"javascript\s*:",
- r"data\s*:\s*text/html",
- r"eval\s*\(",
- r"document\s*\.",
- r"window\s*\.",
- r"location\s*\.",
-]
-
-# Dangerous attribute patterns for HTML attributes
-DANGEROUS_ATTR_PATTERNS = [
- r"javascript\s*:",
- r"data\s*:\s*text/html",
- r"eval\s*\(",
- r"alert\s*\(",
- r"document\s*\.",
- r"window\s*\.",
-]
-
# Suspicious patterns for binary data content
SUSPICIOUS_BINARY_PATTERNS = [
"]*>",
- r"",
- # JavaScript URLs in various attributes
- r'(?:href|src|action)\s*=\s*["\']?\s*javascript:',
- # Data URLs with text/html (potential XSS)
- r'(?:href|src|action)\s*=\s*["\']?\s*data:text/html',
- # Dangerous event handlers with JavaScript-like content
- r'on(?:load|error|click|focus|blur|change|submit|reset|select|resize|scroll|unload|beforeunload|hashchange|popstate|storage|message|offline|online)\s*=\s*["\']?[^"\']*(?:javascript|alert|eval|document\.|window\.|location\.|history\.)[^"\']*["\']?',
- # Object and embed tags that could load external content
- r"<(?:object|embed)[^>]*(?:data|src)\s*=",
- # Base tag that could change relative URL resolution
- r"]*href\s*=",
- # Dangerous iframe sources
- r'