[WIKI-553] chore: improved pages components tracking (#7966)

* chore: page components tracking

* chore: changed the transaction task

* chore: added logger for description html
This commit is contained in:
Bavisetti Narayan 2025-10-23 00:29:05 +05:30 committed by GitHub
parent 5fa9943b66
commit 68aa2fe0b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 129 additions and 53 deletions

View file

@ -4,7 +4,9 @@ import nh3
from plane.utils.exception_logger import log_exception
from bs4 import BeautifulSoup
from collections import defaultdict
import logging
logger = logging.getLogger("plane.api")
# Maximum allowed size for binary data (10MB)
MAX_SIZE = 10 * 1024 * 1024
@ -54,7 +56,9 @@ def validate_binary_data(data):
# Check for suspicious text patterns (HTML/JS)
try:
decoded_text = binary_data.decode("utf-8", errors="ignore")[:200]
if any(pattern in decoded_text.lower() for pattern in SUSPICIOUS_BINARY_PATTERNS):
if any(
pattern in decoded_text.lower() for pattern in SUSPICIOUS_BINARY_PATTERNS
):
return False, "Binary data contains suspicious content patterns"
except Exception:
pass # Binary data might not be decodable as text, which is fine
@ -232,8 +236,9 @@ def validate_html_content(html_content: str):
summary = json.dumps(diff)
except Exception:
summary = str(diff)
logger.warning(f"HTML sanitization removals: {summary}")
log_exception(
f"HTML sanitization removals: {summary}",
ValueError(f"HTML sanitization removals: {summary}"),
warning=True,
)
return True, None, clean_html