From 841388e437f78040d64b6ffaa45999e122ce4205 Mon Sep 17 00:00:00 2001 From: Jayash Tripathy <76092296+JayashTripathy@users.noreply.github.com> Date: Sat, 23 Aug 2025 01:07:35 +0530 Subject: [PATCH] [WEB-4751] refactor: added tld validation for urls (#7622) * refactor: added tld validation for urls * refactor: improve TLD validation and update parameter naming in URL utility functions * refactor: enhance URL component extraction and validation logic * fix: lint * chore: remove unused lodash filter import in existing issues list modal --------- Co-authored-by: Sriram Veeraghanta --- packages/propel/src/command/command.tsx | 2 +- packages/utils/package.json | 1 + packages/utils/src/url.ts | 181 ++++++++++++++++++++---- pnpm-lock.yaml | 9 ++ 4 files changed, 167 insertions(+), 26 deletions(-) diff --git a/packages/propel/src/command/command.tsx b/packages/propel/src/command/command.tsx index 865771423..e691e3d76 100644 --- a/packages/propel/src/command/command.tsx +++ b/packages/propel/src/command/command.tsx @@ -1,6 +1,6 @@ +import * as React from "react"; import { Command as CommandPrimitive } from "cmdk"; import { SearchIcon } from "lucide-react"; -import * as React from "react"; import { cn } from "@plane/utils"; function CommandComponent({ className, ...props }: React.ComponentProps) { diff --git a/packages/utils/package.json b/packages/utils/package.json index fce54bb23..e4f72157c 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -30,6 +30,7 @@ "lucide-react": "^0.469.0", "react": "^18.3.1", "tailwind-merge": "^2.5.5", + "tlds": "1.259.0", "uuid": "^10.0.0" }, "devDependencies": { diff --git a/packages/utils/src/url.ts b/packages/utils/src/url.ts index 5c6351f73..f24818a6f 100644 --- a/packages/utils/src/url.ts +++ b/packages/utils/src/url.ts @@ -1,7 +1,9 @@ +import tlds from "tlds"; + /** * Interface representing the components of a URL. * @interface IURLComponents - * @property {string} protocol - The URL protocol (e.g., 'http', 'https') + * @property {string} protocol - The URL protocol (e.g., 'http', 'https'), empty if protocol is not present * @property {string} subdomain - The subdomain part of the URL (e.g., 'blog' in 'blog.example.com') * @property {string} rootDomain - The root domain name (e.g., 'example' in 'blog.example.com') * @property {string} tld - The top-level domain (e.g., 'com', 'org') @@ -18,36 +20,65 @@ export interface IURLComponents { } /** - * Extracts components from a URL object. + * Extracts components from a URL object or string. * - * @param {URL} url - The URL object to extract components from + * @param {URL | string} url - The URL object or string to extract components from * @returns {IURLComponents | undefined} URL components or undefined if invalid * * @example + * // With URL object * const url = new URL('https://blog.example.com/posts'); * extractURLComponents(url); + * + * // With string + * extractURLComponents('blog.example.com/posts'); + * + * // Example output: * // { - * // protocol: 'https', + * // protocol: 'https', // empty string if protocol is not present * // subdomain: 'blog', * // rootDomain: 'example', * // tld: 'com', * // path: 'posts', - * // full: URL {} // The original URL object + * // full: URL {} // The parsed URL object * // } */ -export function extractURLComponents(url: URL): IURLComponents | undefined { +export function extractURLComponents(url: URL | string): IURLComponents | undefined { + if (!url) return undefined; + + let cleanedUrl: URL; + let wasProtocolAdded = false; + try { - const protocol = url.protocol.slice(0, -1); - const pathname = url.pathname.replace(/^\/+/, "").replace(/\/{2,}/g, "/"); - const path = pathname + url.search + url.hash; - const hostnameParts = url.hostname.split("."); + if (typeof url === "string") { + if (url.trim() === "") return undefined; + + // Check for valid protocol pattern: some characters followed by :// + if (/^[a-zA-Z]+:\/\//.test(url)) { + cleanedUrl = new URL(url); + } else if (hasValidTLD(url) || url.includes("localhost")) { + wasProtocolAdded = true; + cleanedUrl = new URL(`http://${url}`); + } else { + return undefined; + } + } else { + cleanedUrl = url; + } + + const protocol = cleanedUrl.protocol.slice(0, -1); + const pathname = cleanedUrl.pathname.replace(/^\/+/, "").replace(/\/{2,}/g, "/"); + const path = pathname + cleanedUrl.search + cleanedUrl.hash; + const hostnameParts = cleanedUrl.hostname.split("."); let subdomain = ""; let rootDomain = ""; let tld = ""; - if (hostnameParts.length >= 2) { + if (hostnameParts.length === 1) { + rootDomain = hostnameParts[0]; // For cases like 'localhost' + } else if (hostnameParts.length >= 2) { tld = hostnameParts[hostnameParts.length - 1]; rootDomain = hostnameParts[hostnameParts.length - 2]; @@ -57,19 +88,90 @@ export function extractURLComponents(url: URL): IURLComponents | undefined { } return { - protocol, + protocol: wasProtocolAdded ? "" : protocol, subdomain, rootDomain, tld, path, - full: url, + full: cleanedUrl, }; } catch (error) { - console.error(`Error extracting URL components: ${url.href}`, error); + console.error(`Error extracting URL components: ${url?.toString() || url}`, error); return undefined; } } +/** + * Checks if a string contains a valid TLD (Top Level Domain) by cleaning the URL and validating against known TLDs. + * + * @param {string} urlString - The string to check for valid TLD + * @returns {boolean} True if the string contains a valid TLD, false otherwise + * + * @description + * The function performs the following steps: + * 1. Basic validation (rejects empty strings, strings starting/ending with dots) + * 2. URL component cleaning: + * - Removes path component (everything after '/') + * - Removes query parameters (everything after '?') + * - Removes hash fragments (everything after '#') + * - Removes port numbers (everything after ':') + * 3. Validates the TLD against a list of known TLDs + * + * @example + * // Valid cases + * hasValidTLD('example.com') // returns true + * hasValidTLD('sub.example.com') // returns true + * hasValidTLD('example.com/path') // returns true (path is stripped) + * hasValidTLD('example.com:8080') // returns true (port is stripped) + * hasValidTLD('example.com?query=1') // returns true (query is stripped) + * hasValidTLD('example.com#hash') // returns true (hash is stripped) + * + * // Invalid cases + * hasValidTLD('') // returns false (empty string) + * hasValidTLD('.example.com') // returns false (starts with dot) + * hasValidTLD('example.com.') // returns false (ends with dot) + * hasValidTLD('example.invalid') // returns false (invalid TLD) + * hasValidTLD('localhost') // returns false (no TLD) + */ + +function hasValidTLD(urlString: string): boolean { + if (!urlString || urlString.startsWith(".") || urlString.endsWith(".")) { + return false; + } + + let hostname = urlString; + + // Remove path, query, and hash if present + const pathIndex = hostname.indexOf("/"); + if (pathIndex !== -1) { + hostname = hostname.substring(0, pathIndex); + } + + const queryIndex = hostname.indexOf("?"); + if (queryIndex !== -1) { + hostname = hostname.substring(0, queryIndex); + } + + const hashIndex = hostname.indexOf("#"); + if (hashIndex !== -1) { + hostname = hostname.substring(0, hashIndex); + } + + // Remove port if present + const portIndex = hostname.indexOf(":"); + if (portIndex !== -1) { + hostname = hostname.substring(0, portIndex); + } + + const hostnameParts = hostname.split("."); + if (hostnameParts.length >= 2) { + const potentialTLD = hostnameParts[hostnameParts.length - 1].toLowerCase(); + return tlds.includes(potentialTLD); + } + + return false; +} + /** * Checks if a string is a valid URL. * @@ -78,19 +180,48 @@ export function extractURLComponents(url: URL): IURLComponents | undefined { * * @example * // Valid URLs - * getValidURL('https://example.com') // returns URL object - * getValidURL('http://example.com') // returns URL object - * getValidURL('https://sub.example.com') // returns URL object + * isUrlValid('https://example.com') // returns true + * isUrlValid('http://example.com') // returns true + * isUrlValid('https://sub.example.com') // returns true * * // Invalid URLs - * getValidURL('not-a-url') // returns undefined - * getValidURL('example.com') // returns undefined (no protocol) - * getValidURL('https://invalid.') // returns undefined + * isUrlValid('not-a-url') // returns false + * isUrlValid('https://invalid.') // returns false + * isUrlValid('example.invalid') // returns false (invalid TLD) + * + * // Test cases: + * // isUrlValid('google.com') // ✅ returns true + * // isUrlValid('github.io') // ✅ returns true + * // isUrlValid('invalid.tld') // ❌ returns false (invalid TLD) */ -export function getValidURL(urlString: string): URL | undefined { - try { - return new URL(urlString); - } catch { - return undefined; + +export function isUrlValid(urlString: string): boolean { + // Basic input validation + if (!urlString || urlString.trim() === "") return false; + + // Handle localhost separately + if (urlString.startsWith("localhost")) { + try { + new URL(`http://${urlString}`); + return true; + } catch { + return false; + } } + + // Check for valid protocol format if protocol is present + if (urlString.includes("://")) { + // Reject invalid protocol formats (e.g. "://example.com") + if (!/^[a-zA-Z]+:\/\//.test(urlString)) return false; + try { + const url = new URL(urlString); + return !!url.hostname && url.hostname !== ".com"; + } catch { + return false; + } + } + + if (hasValidTLD(urlString)) return true; + + return false; } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b4d469b21..1526fccf0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1280,6 +1280,9 @@ importers: tailwind-merge: specifier: ^2.5.5 version: 2.6.0 + tlds: + specifier: 1.259.0 + version: 1.259.0 uuid: specifier: ^10.0.0 version: 10.0.0 @@ -7161,6 +7164,10 @@ packages: peerDependencies: '@tiptap/core': ^2.0.3 + tlds@1.259.0: + resolution: {integrity: sha512-AldGGlDP0PNgwppe2quAvuBl18UcjuNtOnDuUkqhd6ipPqrYYBt3aTxK1QTsBVknk97lS2JcafWMghjGWFtunw==} + hasBin: true + tldts-core@6.1.86: resolution: {integrity: sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA==} @@ -14253,6 +14260,8 @@ snapshots: markdown-it-task-lists: 2.1.1 prosemirror-markdown: 1.13.2 + tlds@1.259.0: {} + tldts-core@6.1.86: {} tldts@6.1.86: