bb-plane-fork/apps/api/plane/utils/path_validator.py
Nikhil 6d3d9e6df7
[WEB-4943]: add url has allowed host or scheme for validating valid redirections (#7809)
* feat: enhance path validation and URL safety in path_validator.py

* Added get_allowed_hosts function to retrieve allowed hosts from settings.
* Updated get_safe_redirect_url to validate URLs against allowed hosts.
* Improved URL construction logic for safer redirection handling.

* feat: enhance URL validation in authentication views

* Added url_has_allowed_host_and_scheme checks in SignUpAuthSpaceEndpoint and MagicSignInSpaceEndpoint for safer redirection.
* Updated redirect logic to fallback to base host if the constructed URL is not allowed.
* Improved overall URL safety and handling in authentication flows.

* fix: improve host extraction in get_allowed_hosts function

* Updated get_allowed_hosts to extract only the host from ADMIN_BASE_URL and SPACE_BASE_URL settings for better URL validation.
* Enhanced overall safety and clarity in allowed hosts retrieval.
2025-09-16 21:37:08 +05:30

125 lines
No EOL
3.6 KiB
Python

# Django imports
from django.utils.http import url_has_allowed_host_and_scheme
from django.conf import settings
# Python imports
from urllib.parse import urlparse
def _contains_suspicious_patterns(path: str) -> bool:
"""
Check for suspicious patterns that might indicate malicious intent.
Args:
path (str): The path to check
Returns:
bool: True if suspicious patterns found, False otherwise
"""
suspicious_patterns = [
r'javascript:', # JavaScript injection
r'data:', # Data URLs
r'vbscript:', # VBScript injection
r'file:', # File protocol
r'ftp:', # FTP protocol
r'%2e%2e', # URL encoded path traversal
r'%2f%2f', # URL encoded double slash
r'%5c%5c', # URL encoded backslashes
r'<script', # Script tags
r'<iframe', # Iframe tags
r'<object', # Object tags
r'<embed', # Embed tags
r'<form', # Form tags
r'onload=', # Event handlers
r'onerror=', # Event handlers
r'onclick=', # Event handlers
]
path_lower = path.lower()
for pattern in suspicious_patterns:
if pattern in path_lower:
return True
return False
def get_allowed_hosts() -> list[str]:
"""Get the allowed hosts from the settings."""
base_origin = settings.WEB_URL or settings.APP_BASE_URL
allowed_hosts = [base_origin]
if settings.ADMIN_BASE_URL:
# Get only the host
host = urlparse(settings.ADMIN_BASE_URL).netloc
allowed_hosts.append(host)
if settings.SPACE_BASE_URL:
# Get only the host
host = urlparse(settings.SPACE_BASE_URL).netloc
allowed_hosts.append(host)
return allowed_hosts
def validate_next_path(next_path: str) -> str:
"""Validates that next_path is a safe relative path for redirection."""
# Browsers interpret backslashes as forward slashes. Remove all backslashes.
if not next_path or not isinstance(next_path, str):
return ""
# Limit input length to prevent DoS attacks
if len(next_path) > 500:
return ""
next_path = next_path.replace("\\", "")
parsed_url = urlparse(next_path)
# Block absolute URLs or anything with scheme/netloc
if parsed_url.scheme or parsed_url.netloc:
next_path = parsed_url.path # Extract only the path component
# Must start with a forward slash and not be empty
if not next_path or not next_path.startswith("/"):
return ""
# Prevent path traversal
if ".." in next_path:
return ""
# Additional security checks
if _contains_suspicious_patterns(next_path):
return ""
return next_path
def get_safe_redirect_url(base_url: str, next_path: str = "", params: dict = {}) -> str:
"""
Safely construct a redirect URL with validated next_path.
Args:
base_url (str): The base URL to redirect to
next_path (str): The next path to append
params (dict): The parameters to append
Returns:
str: The safe redirect URL
"""
from urllib.parse import urlencode, quote
# Validate the next path
validated_path = validate_next_path(next_path)
# Add the next path to the parameters
base_url = base_url.rstrip('/')
if params:
encoded_params = urlencode(params)
url = f"{base_url}/?next_path={validated_path}&{encoded_params}"
else:
url = f"{base_url}/?next_path={validated_path}"
# Check if the URL is allowed
if url_has_allowed_host_and_scheme(url, allowed_hosts=get_allowed_hosts()):
return url
# Return the base URL if the URL is not allowed
return base_url