refactor: sanitize HTML function (#8307)

* refactor: replace isomorphic-dompurify with sanitize-html

* dompurify fixes

* more fixes with fallback and title

* build

---------

Co-authored-by: Prateek Shourya <prateekshourya29@gmail.com>
This commit is contained in:
M. Palanikannan 2025-12-11 13:30:31 +05:30 committed by GitHub
parent 76ebf395e6
commit e0c97c5471
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 235 additions and 260 deletions

View file

@ -66,7 +66,6 @@
"emoji-regex": "^10.3.0",
"highlight.js": "^11.8.0",
"is-emoji-supported": "^0.0.5",
"isomorphic-dompurify": "^1.8.0",
"jsx-dom-cjs": "^8.0.3",
"linkifyjs": "^4.3.2",
"lowlight": "^3.0.0",

View file

@ -1,5 +1,5 @@
import { Buffer } from "buffer";
import type { Extensions } from "@tiptap/core";
import type { Extensions, JSONContent } from "@tiptap/core";
import { getSchema } from "@tiptap/core";
import { generateHTML, generateJSON } from "@tiptap/html";
import { prosemirrorJSONToYDoc, yXmlFragmentToProseMirrorRootNode } from "y-prosemirror";
@ -11,7 +11,7 @@ import {
DocumentEditorExtensionsWithoutProps,
} from "@/extensions/core-without-props";
import { TitleExtensions } from "@/extensions/title-extension";
import DOMPurify from "isomorphic-dompurify";
import { sanitizeHTML } from "@plane/utils";
// editor extension configs
const RICH_TEXT_EDITOR_EXTENSIONS = CoreEditorExtensionsWithoutProps;
@ -69,16 +69,49 @@ export const getBinaryDataFromRichTextEditorHTMLString = (descriptionHTML: strin
return encodedData;
};
export const generateTitleProsemirrorJson = (text: string): JSONContent => {
return {
type: "doc",
content: [
{
type: "heading",
attrs: { level: 1 },
...(text
? {
content: [
{
type: "text",
text,
},
],
}
: {}),
},
],
};
};
/**
* @description this function generates the binary equivalent of html content for the document editor
* @param {string} descriptionHTML
* @param {string} descriptionHTML - The HTML content to convert
* @param {string} [title] - Optional title to append to the document
* @returns {Uint8Array}
*/
export const getBinaryDataFromDocumentEditorHTMLString = (descriptionHTML: string): Uint8Array => {
export const getBinaryDataFromDocumentEditorHTMLString = (descriptionHTML: string, title?: string): Uint8Array => {
// convert HTML to JSON
const contentJSON = generateJSON(descriptionHTML ?? "<p></p>", DOCUMENT_EDITOR_EXTENSIONS);
// convert JSON to Y.Doc format
const transformedData = prosemirrorJSONToYDoc(documentEditorSchema, contentJSON, "default");
// If title is provided, merge it into the document
if (title != null) {
const titleJSON = generateTitleProsemirrorJson(title);
const titleField = prosemirrorJSONToYDoc(documentEditorSchema, titleJSON, "title");
// Encode the title YDoc to updates and apply them to the main document
const titleUpdates = Y.encodeStateAsUpdate(titleField);
Y.applyUpdate(transformedData, titleUpdates);
}
// convert Y.Doc to Uint8Array format
const encodedData = Y.encodeStateAsUpdate(transformedData);
return encodedData;
@ -210,6 +243,6 @@ export const extractTextFromHTML = (html: string): string => {
// Use DOMPurify to safely extract text and remove all HTML tags
// This is more secure than regex as it handles edge cases and prevents injection
// Note: sanitizeHTML trims whitespace, which is acceptable for title extraction
const sanitizedText = DOMPurify.sanitize(html, { ALLOWED_TAGS: [] }); // sanitize the string to remove all HTML tags
const sanitizedText = sanitizeHTML(html); // sanitize the string to remove all HTML tags
return sanitizedText.trim() || ""; // trim the string to remove leading and trailing whitespaces
};