[WEB-4452] fix: enhance URL detection logic in contains_url function (#7352)

* fix: enhance URL detection logic in contains_url function * fix: improve error handling in URL detection logic * fix: enhance URL detection logic with length limits and edge case handling * fix: adjust URL line length limit for improved detection accuracy
2025-07-08 18:28:47 +05:30 · 2025-07-08 18:28:47 +05:30 · fd9da3164e
commit fd9da3164e
parent a4ec80ceca
2 changed files with 301 additions and 2 deletions
--- a/apps/api/plane/utils/url.py
+++ b/apps/api/plane/utils/url.py
@ -3,13 +3,50 @@ import re
 from typing import Optional
 from urllib.parse import urlparse, urlunparse

+# Compiled regex pattern for better performance and ReDoS protection
+# Using atomic groups and length limits to prevent excessive backtracking
+URL_PATTERN = re.compile(
+    r"(?i)"  # Case insensitive
+    r"(?:"  # Non-capturing group for alternatives
+    r"https?://[^\s]+"  # http:// or https:// followed by non-whitespace
+    r"|"
+    r"www\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*"  # www.domain with proper length limits
+    r"|"
+    r"(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}"  # domain.tld with length limits
+    r"|"
+    r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"  # IP address with proper validation
+    r")"
+)
+

 def contains_url(value: str) -> bool:
    """
    Check if the value contains a URL.
+
+    This function is protected against ReDoS attacks by:
+    1. Using a pre-compiled regex pattern
+    2. Limiting input length to prevent excessive processing
+    3. Using atomic groups and specific quantifiers to avoid backtracking
+
+    Args:
+        value (str): The input string to check for URLs
+
+    Returns:
+        bool: True if the string contains a URL, False otherwise
    """
-    url_pattern = re.compile(r"https?://|www\\.")
-    return bool(url_pattern.search(value))
+    # Prevent ReDoS by limiting input length
+    if len(value) > 1000:  # Reasonable limit for URL detection
+        return False
+
+    # Additional safety: truncate very long lines that might contain URLs
+    lines = value.split("\n")
+    for line in lines:
+        if len(line) > 500:  # Process only reasonable length lines
+            line = line[:500]
+        if URL_PATTERN.search(line):
+            return True
+
+    return False


 def is_valid_url(url: str) -> bool: