[WEB-4751] chore: enhance URL utility functions with IP address validation and cleaned up url extraction (#7636)
* feat: enhance URL utility functions with IP address validation and cleaned up the extraction utilities * fix: remove unnecessary type assertion in isLocalhost function
This commit is contained in:
parent
a2d9e70a83
commit
9a77e383cd
1 changed files with 207 additions and 175 deletions
|
|
@ -1,13 +1,160 @@
|
||||||
import tlds from "tlds";
|
import tlds from "tlds";
|
||||||
|
|
||||||
|
const PROTOCOL_REGEX = /^[a-zA-Z]+:\/\//;
|
||||||
|
const EMAIL_REGEX = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
|
||||||
|
const LOCALHOST_ADDRESSES = ["localhost", "127.0.0.1", "0.0.0.0"];
|
||||||
|
const HTTP_PROTOCOL = "http://";
|
||||||
|
const MAILTO_PROTOCOL = "mailto:";
|
||||||
|
const DEFAULT_PROTOCOL = HTTP_PROTOCOL;
|
||||||
|
// IPv4 regex - matches 0.0.0.0 to 255.255.255.255
|
||||||
|
const IPV4_REGEX = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
|
||||||
|
// IPv6 regex - comprehensive pattern for all IPv6 formats
|
||||||
|
const IPV6_REGEX =
|
||||||
|
/^(?:(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?::[0-9a-fA-F]{1,4}){1,6}|:(?::[0-9a-fA-F]{1,4}){1,7}|::|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))$/;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interface representing the components of a URL.
|
* Checks if a string is a valid IPv4 address
|
||||||
|
* @param ip - String to validate as IPv4
|
||||||
|
* @returns True if valid IPv4 address
|
||||||
|
*/
|
||||||
|
export function isValidIPv4(ip: string): boolean {
|
||||||
|
if (!ip || typeof ip !== "string") return false;
|
||||||
|
return IPV4_REGEX.test(ip);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a string is a valid IPv6 address
|
||||||
|
* @param ip - String to validate as IPv6
|
||||||
|
* @returns True if valid IPv6 address
|
||||||
|
*/
|
||||||
|
export function isValidIPv6(ip: string): boolean {
|
||||||
|
if (!ip || typeof ip !== "string") return false;
|
||||||
|
|
||||||
|
// Remove brackets if present (for URL format like [::1])
|
||||||
|
const cleanIP = ip.replace(/^\[|\]$/g, "");
|
||||||
|
|
||||||
|
return IPV6_REGEX.test(cleanIP);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a string is a valid IP address (IPv4 or IPv6)
|
||||||
|
* @param ip - String to validate as IP address
|
||||||
|
* @returns Object with validation results
|
||||||
|
*/
|
||||||
|
export function validateIPAddress(ip: string): {
|
||||||
|
isValid: boolean;
|
||||||
|
type: "ipv4" | "ipv6" | "invalid";
|
||||||
|
formatted?: string;
|
||||||
|
} {
|
||||||
|
if (!ip || typeof ip !== "string") {
|
||||||
|
return { isValid: false, type: "invalid" };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isValidIPv4(ip)) {
|
||||||
|
return { isValid: true, type: "ipv4", formatted: ip };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isValidIPv6(ip)) {
|
||||||
|
const formatted = ip.replace(/^\[|\]$/g, ""); // Remove brackets
|
||||||
|
return { isValid: true, type: "ipv6", formatted };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { isValid: false, type: "invalid" };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a URL string points to a localhost address.
|
||||||
|
* @param url - The URL string to check
|
||||||
|
* @returns True if the URL points to localhost, false otherwise
|
||||||
|
*/
|
||||||
|
export function isLocalhost(url: string): boolean {
|
||||||
|
const hostname = extractHostname(url);
|
||||||
|
return LOCALHOST_ADDRESSES.includes(hostname);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts hostname from a URL string by removing protocol, path, query, hash, and port.
|
||||||
|
* @param url - The URL string to extract hostname from
|
||||||
|
* @returns The cleaned hostname
|
||||||
|
*/
|
||||||
|
export function extractHostname(url: string): string {
|
||||||
|
let hostname = url;
|
||||||
|
|
||||||
|
// Remove protocol if present
|
||||||
|
if (hostname.includes("://")) {
|
||||||
|
hostname = hostname.split("://")[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove auth credentials if present
|
||||||
|
const atIndex = hostname.indexOf("@");
|
||||||
|
if (atIndex !== -1) {
|
||||||
|
hostname = hostname.substring(atIndex + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove path, query, hash, and port in one pass
|
||||||
|
hostname = hostname.split("/")[0].split("?")[0].split("#")[0].split(":")[0];
|
||||||
|
|
||||||
|
return hostname;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts and validates the TLD (Top Level Domain) from a URL string.
|
||||||
|
*
|
||||||
|
* @param {string} urlString - The string to extract TLD from
|
||||||
|
* @returns {string} The valid TLD if found, empty string otherwise
|
||||||
|
*
|
||||||
|
* @description
|
||||||
|
* The function performs the following steps:
|
||||||
|
* 1. Basic validation (rejects empty strings, strings starting/ending with dots)
|
||||||
|
* 2. URL component cleaning:
|
||||||
|
* - Removes protocol (if present)
|
||||||
|
* - Removes auth credentials (if present)
|
||||||
|
* - Removes path component (everything after '/')
|
||||||
|
* - Removes query parameters (everything after '?')
|
||||||
|
* - Removes hash fragments (everything after '#')
|
||||||
|
* - Removes port numbers (everything after ':')
|
||||||
|
* 3. Validates the TLD against a list of known TLDs
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Valid cases (returns the TLD)
|
||||||
|
* extractTLD('example.com') // returns 'com'
|
||||||
|
* extractTLD('sub.example.com') // returns 'com'
|
||||||
|
* extractTLD('example.com/path') // returns 'com'
|
||||||
|
* extractTLD('example.com:8080') // returns 'com'
|
||||||
|
* extractTLD('example.com?query=1') // returns 'com'
|
||||||
|
* extractTLD('example.com#hash') // returns 'com'
|
||||||
|
*
|
||||||
|
* // Invalid cases (returns empty string)
|
||||||
|
* extractTLD('') // returns ''
|
||||||
|
* extractTLD('.example.com') // returns ''
|
||||||
|
* extractTLD('example.com.') // returns ''
|
||||||
|
* extractTLD('example.invalid') // returns ''
|
||||||
|
* extractTLD('localhost') // returns ''
|
||||||
|
*/
|
||||||
|
|
||||||
|
export function extractTLD(urlString: string): string {
|
||||||
|
if (!urlString || urlString.startsWith(".") || urlString.endsWith(".")) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const hostname = extractHostname(urlString);
|
||||||
|
const hostnameParts = hostname.split(".");
|
||||||
|
|
||||||
|
if (hostnameParts.length >= 2) {
|
||||||
|
const potentialTLD = hostnameParts[hostnameParts.length - 1].toLowerCase();
|
||||||
|
return tlds.includes(potentialTLD) ? potentialTLD : "";
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface representing the cleaned components of a URL.
|
||||||
* @interface IURLComponents
|
* @interface IURLComponents
|
||||||
* @property {string} protocol - The URL protocol (e.g., 'http', 'https'), empty if protocol is not present
|
* @property {string} protocol - The URL protocol (e.g., 'http', 'https'), if protocol is not present, Always contains the actual protocol used.
|
||||||
* @property {string} subdomain - The subdomain part of the URL (e.g., 'blog' in 'blog.example.com')
|
* @property {string} subdomain - The subdomain part of the URL (e.g., 'blog' in 'blog.example.com')
|
||||||
* @property {string} rootDomain - The root domain name (e.g., 'example' in 'blog.example.com')
|
* @property {string} rootDomain - The root domain name (e.g., 'example' in 'blog.example.com')
|
||||||
* @property {string} tld - The top-level domain (e.g., 'com', 'org')
|
* @property {string} tld - The top-level domain (e.g., 'com', 'org')
|
||||||
* @property {string} path - The URL path including search params and hash
|
* @property {string} pathname - The URL path excluding search params and hash, empty if pathname is '/'
|
||||||
* @property {URL} full - The original URL object with all native URL properties
|
* @property {URL} full - The original URL object with all native URL properties
|
||||||
*/
|
*/
|
||||||
export interface IURLComponents {
|
export interface IURLComponents {
|
||||||
|
|
@ -15,10 +162,42 @@ export interface IURLComponents {
|
||||||
subdomain: string;
|
subdomain: string;
|
||||||
rootDomain: string;
|
rootDomain: string;
|
||||||
tld: string;
|
tld: string;
|
||||||
path: string;
|
pathname: string;
|
||||||
full: URL;
|
full: URL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a URL object to extract its components
|
||||||
|
*/
|
||||||
|
export function processURL(url: URL): IURLComponents {
|
||||||
|
const protocol = url.protocol.slice(0, -1);
|
||||||
|
const hostnameParts = url.hostname.split(".");
|
||||||
|
|
||||||
|
let subdomain = "";
|
||||||
|
let rootDomain = "";
|
||||||
|
let tld = "";
|
||||||
|
|
||||||
|
if (hostnameParts.length === 1) {
|
||||||
|
rootDomain = hostnameParts[0]; // For cases like 'localhost'
|
||||||
|
} else if (hostnameParts.length >= 2) {
|
||||||
|
tld = hostnameParts[hostnameParts.length - 1];
|
||||||
|
rootDomain = hostnameParts[hostnameParts.length - 2];
|
||||||
|
|
||||||
|
if (hostnameParts.length > 2) {
|
||||||
|
subdomain = hostnameParts.slice(0, -2).join(".");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
protocol,
|
||||||
|
subdomain,
|
||||||
|
rootDomain,
|
||||||
|
tld,
|
||||||
|
pathname: url.pathname === "/" ? "" : url.pathname,
|
||||||
|
full: url,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts components from a URL object or string.
|
* Extracts components from a URL object or string.
|
||||||
*
|
*
|
||||||
|
|
@ -39,189 +218,42 @@ export interface IURLComponents {
|
||||||
* // subdomain: 'blog',
|
* // subdomain: 'blog',
|
||||||
* // rootDomain: 'example',
|
* // rootDomain: 'example',
|
||||||
* // tld: 'com',
|
* // tld: 'com',
|
||||||
* // path: 'posts',
|
* // pathname: 'posts',
|
||||||
* // full: URL {} // The parsed URL object
|
* // full: URL {} // The parsed URL object
|
||||||
* // }
|
* // }
|
||||||
*/
|
*/
|
||||||
|
|
||||||
export function extractURLComponents(url: URL | string): IURLComponents | undefined {
|
export function extractURLComponents(url: URL | string): IURLComponents | undefined {
|
||||||
if (!url) return undefined;
|
// If URL object is passed directly
|
||||||
|
if (typeof url !== "string") {
|
||||||
|
return processURL(url);
|
||||||
|
}
|
||||||
|
|
||||||
let cleanedUrl: URL;
|
// Handle empty strings
|
||||||
let wasProtocolAdded = false;
|
if (!url || url.trim() === "") return undefined;
|
||||||
|
|
||||||
|
// Input length validation for security
|
||||||
|
if (url.length > 2048) return undefined;
|
||||||
|
|
||||||
|
const urlLower = url.toLowerCase();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (typeof url === "string") {
|
// 1. Handle web URLs with protocols (including mailto, http, https, ftp, etc.)
|
||||||
if (url.trim() === "") return undefined;
|
if (PROTOCOL_REGEX.test(urlLower) || urlLower.startsWith(MAILTO_PROTOCOL)) {
|
||||||
|
return processURL(new URL(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Check if it's an email address
|
||||||
|
if (EMAIL_REGEX.test(urlLower)) {
|
||||||
|
return processURL(new URL(`${MAILTO_PROTOCOL}${url}`));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. URL without protocol but valid domain or IP address or TLD
|
||||||
|
if (isLocalhost(urlLower) || isValidIPv4(urlLower) || isValidIPv6(urlLower) || extractTLD(urlLower)) {
|
||||||
|
return processURL(new URL(`${DEFAULT_PROTOCOL}${urlLower}`));
|
||||||
|
}
|
||||||
|
|
||||||
// Check for valid protocol pattern: some characters followed by ://
|
|
||||||
if (/^[a-zA-Z]+:\/\//.test(url)) {
|
|
||||||
cleanedUrl = new URL(url);
|
|
||||||
} else if (hasValidTLD(url) || url.includes("localhost")) {
|
|
||||||
wasProtocolAdded = true;
|
|
||||||
cleanedUrl = new URL(`http://${url}`);
|
|
||||||
} else {
|
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cleanedUrl = url;
|
|
||||||
}
|
|
||||||
|
|
||||||
const protocol = cleanedUrl.protocol.slice(0, -1);
|
|
||||||
const pathname = cleanedUrl.pathname.replace(/^\/+/, "").replace(/\/{2,}/g, "/");
|
|
||||||
const path = pathname + cleanedUrl.search + cleanedUrl.hash;
|
|
||||||
const hostnameParts = cleanedUrl.hostname.split(".");
|
|
||||||
|
|
||||||
let subdomain = "";
|
|
||||||
let rootDomain = "";
|
|
||||||
let tld = "";
|
|
||||||
|
|
||||||
if (hostnameParts.length === 1) {
|
|
||||||
rootDomain = hostnameParts[0]; // For cases like 'localhost'
|
|
||||||
} else if (hostnameParts.length >= 2) {
|
|
||||||
tld = hostnameParts[hostnameParts.length - 1];
|
|
||||||
rootDomain = hostnameParts[hostnameParts.length - 2];
|
|
||||||
|
|
||||||
if (hostnameParts.length > 2) {
|
|
||||||
subdomain = hostnameParts.slice(0, -2).join(".");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
protocol: wasProtocolAdded ? "" : protocol,
|
|
||||||
subdomain,
|
|
||||||
rootDomain,
|
|
||||||
tld,
|
|
||||||
path,
|
|
||||||
full: cleanedUrl,
|
|
||||||
};
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error extracting URL components: ${url?.toString() || url}`, error);
|
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a string contains a valid TLD (Top Level Domain) by cleaning the URL and validating against known TLDs.
|
|
||||||
*
|
|
||||||
* @param {string} urlString - The string to check for valid TLD
|
|
||||||
* @returns {boolean} True if the string contains a valid TLD, false otherwise
|
|
||||||
*
|
|
||||||
* @description
|
|
||||||
* The function performs the following steps:
|
|
||||||
* 1. Basic validation (rejects empty strings, strings starting/ending with dots)
|
|
||||||
* 2. URL component cleaning:
|
|
||||||
* - Removes path component (everything after '/')
|
|
||||||
* - Removes query parameters (everything after '?')
|
|
||||||
* - Removes hash fragments (everything after '#')
|
|
||||||
* - Removes port numbers (everything after ':')
|
|
||||||
* 3. Validates the TLD against a list of known TLDs
|
|
||||||
*
|
|
||||||
* @example
|
|
||||||
* // Valid cases
|
|
||||||
* hasValidTLD('example.com') // returns true
|
|
||||||
* hasValidTLD('sub.example.com') // returns true
|
|
||||||
* hasValidTLD('example.com/path') // returns true (path is stripped)
|
|
||||||
* hasValidTLD('example.com:8080') // returns true (port is stripped)
|
|
||||||
* hasValidTLD('example.com?query=1') // returns true (query is stripped)
|
|
||||||
* hasValidTLD('example.com#hash') // returns true (hash is stripped)
|
|
||||||
*
|
|
||||||
* // Invalid cases
|
|
||||||
* hasValidTLD('') // returns false (empty string)
|
|
||||||
* hasValidTLD('.example.com') // returns false (starts with dot)
|
|
||||||
* hasValidTLD('example.com.') // returns false (ends with dot)
|
|
||||||
* hasValidTLD('example.invalid') // returns false (invalid TLD)
|
|
||||||
* hasValidTLD('localhost') // returns false (no TLD)
|
|
||||||
*/
|
|
||||||
|
|
||||||
function hasValidTLD(urlString: string): boolean {
|
|
||||||
if (!urlString || urlString.startsWith(".") || urlString.endsWith(".")) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
let hostname = urlString;
|
|
||||||
|
|
||||||
// Remove path, query, and hash if present
|
|
||||||
const pathIndex = hostname.indexOf("/");
|
|
||||||
if (pathIndex !== -1) {
|
|
||||||
hostname = hostname.substring(0, pathIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
const queryIndex = hostname.indexOf("?");
|
|
||||||
if (queryIndex !== -1) {
|
|
||||||
hostname = hostname.substring(0, queryIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
const hashIndex = hostname.indexOf("#");
|
|
||||||
if (hashIndex !== -1) {
|
|
||||||
hostname = hostname.substring(0, hashIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove port if present
|
|
||||||
const portIndex = hostname.indexOf(":");
|
|
||||||
if (portIndex !== -1) {
|
|
||||||
hostname = hostname.substring(0, portIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
const hostnameParts = hostname.split(".");
|
|
||||||
if (hostnameParts.length >= 2) {
|
|
||||||
const potentialTLD = hostnameParts[hostnameParts.length - 1].toLowerCase();
|
|
||||||
return tlds.includes(potentialTLD);
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a string is a valid URL.
|
|
||||||
*
|
|
||||||
* @param {string} urlString - The string to validate as URL
|
|
||||||
* @returns {URL | undefined} URL object if valid, undefined if invalid
|
|
||||||
*
|
|
||||||
* @example
|
|
||||||
* // Valid URLs
|
|
||||||
* isUrlValid('https://example.com') // returns true
|
|
||||||
* isUrlValid('http://example.com') // returns true
|
|
||||||
* isUrlValid('https://sub.example.com') // returns true
|
|
||||||
*
|
|
||||||
* // Invalid URLs
|
|
||||||
* isUrlValid('not-a-url') // returns false
|
|
||||||
* isUrlValid('https://invalid.') // returns false
|
|
||||||
* isUrlValid('example.invalid') // returns false (invalid TLD)
|
|
||||||
*
|
|
||||||
* // Test cases:
|
|
||||||
* // isUrlValid('google.com') // ✅ returns true
|
|
||||||
* // isUrlValid('github.io') // ✅ returns true
|
|
||||||
* // isUrlValid('invalid.tld') // ❌ returns false (invalid TLD)
|
|
||||||
*/
|
|
||||||
|
|
||||||
export function isUrlValid(urlString: string): boolean {
|
|
||||||
// Basic input validation
|
|
||||||
if (!urlString || urlString.trim() === "") return false;
|
|
||||||
|
|
||||||
// Handle localhost separately
|
|
||||||
if (urlString.startsWith("localhost")) {
|
|
||||||
try {
|
|
||||||
new URL(`http://${urlString}`);
|
|
||||||
return true;
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for valid protocol format if protocol is present
|
|
||||||
if (urlString.includes("://")) {
|
|
||||||
// Reject invalid protocol formats (e.g. "://example.com")
|
|
||||||
if (!/^[a-zA-Z]+:\/\//.test(urlString)) return false;
|
|
||||||
try {
|
|
||||||
const url = new URL(urlString);
|
|
||||||
return !!url.hostname && url.hostname !== ".com";
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasValidTLD(urlString)) return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue