refactor: sanitize HTML function (#8307)

* refactor: replace isomorphic-dompurify with sanitize-html

* dompurify fixes

* more fixes with fallback and title

* build

---------

Co-authored-by: Prateek Shourya <prateekshourya29@gmail.com>
This commit is contained in:
M. Palanikannan 2025-12-11 13:30:31 +05:30 committed by GitHub
parent 76ebf395e6
commit e0c97c5471
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 235 additions and 260 deletions

View file

@ -45,7 +45,6 @@
"express-ws": "^5.0.2",
"helmet": "^7.1.0",
"ioredis": "5.7.0",
"isomorphic-dompurify": "^1.8.0",
"uuid": "catalog:",
"ws": "^8.18.3",
"y-prosemirror": "^1.3.7",

View file

@ -20,24 +20,32 @@ const fetchDocument = async ({ context, documentName: pageId, instance }: FetchP
try {
const service = getPageService(context.documentType, context);
// fetch details
const response = await service.fetchDescriptionBinary(pageId);
const response = (await service.fetchDescriptionBinary(pageId)) as Buffer;
const binaryData = new Uint8Array(response);
// if binary data is empty, convert HTML to binary data
if (binaryData.byteLength === 0) {
const pageDetails = await service.fetchDetails(pageId);
const convertedBinaryData = getBinaryDataFromDocumentEditorHTMLString(pageDetails.description_html ?? "<p></p>");
const convertedBinaryData = getBinaryDataFromDocumentEditorHTMLString(
pageDetails.description_html ?? "<p></p>",
pageDetails.name
);
if (convertedBinaryData) {
// save the converted binary data back to the database
const { contentBinaryEncoded, contentHTML, contentJSON } = getAllDocumentFormatsFromDocumentEditorBinaryData(
convertedBinaryData,
true
);
const payload = {
description_binary: contentBinaryEncoded,
description_html: contentHTML,
description: contentJSON,
};
await service.updateDescriptionBinary(pageId, payload);
try {
const { contentBinaryEncoded, contentHTML, contentJSON } = getAllDocumentFormatsFromDocumentEditorBinaryData(
convertedBinaryData,
true
);
const payload = {
description_binary: contentBinaryEncoded,
description_html: contentHTML,
description: contentJSON,
};
await service.updateDescriptionBinary(pageId, payload);
} catch (e) {
const error = new AppError(e);
logger.error("Failed to save binary after first convertion from html:", error);
}
return convertedBinaryData;
}
}

View file

@ -1,5 +1,13 @@
import { Database } from "./database";
import { ForceCloseHandler } from "./force-close-handler";
import { Logger } from "./logger";
import { Redis } from "./redis";
import { TitleSyncExtension } from "./title-sync";
export const getExtensions = () => [new Logger(), new Database(), new Redis()];
export const getExtensions = () => [
new Logger(),
new Database(),
new Redis(),
new TitleSyncExtension(),
new ForceCloseHandler(), // Must be after Redis to receive broadcasts
];

View file

@ -1,18 +1,22 @@
// hocuspocus
import type { Extension, Hocuspocus, Document } from "@hocuspocus/server";
import { TiptapTransformer } from "@hocuspocus/transformer";
import type { AnyExtension, JSONContent } from "@tiptap/core";
import type * as Y from "yjs";
// editor extensions
import { TITLE_EDITOR_EXTENSIONS, createRealtimeEvent } from "@plane/editor";
import {
TITLE_EDITOR_EXTENSIONS,
createRealtimeEvent,
extractTextFromHTML,
generateTitleProsemirrorJson,
} from "@plane/editor";
import { logger } from "@plane/logger";
import { AppError } from "@/lib/errors";
// helpers
import { getPageService } from "@/services/page/handler";
import type { HocusPocusServerContext, OnLoadDocumentPayloadWithContext } from "@/types";
import { generateTitleProsemirrorJson } from "@/utils";
import { broadcastMessageToPage } from "@/utils/broadcast-message";
import { TitleUpdateManager } from "./title-update/title-update-manager";
import { extractTextFromHTML } from "./title-update/title-utils";
/**
* Hocuspocus extension for synchronizing document titles
@ -41,15 +45,11 @@ export class TitleSyncExtension implements Extension {
// in the yjs binary
if (document.isEmpty("title")) {
const service = getPageService(context.documentType, context);
// const title = await service.fe
const title = (await service.fetchDetails?.(documentName)).name;
const pageDetails = await service.fetchDetails(documentName);
const title = pageDetails.name;
if (title == null) return;
const titleField = TiptapTransformer.toYdoc(
generateTitleProsemirrorJson(title),
"title",
// editor
TITLE_EDITOR_EXTENSIONS as any
);
const titleJson = (generateTitleProsemirrorJson as (text: string) => JSONContent)(title);
const titleField = TiptapTransformer.toYdoc(titleJson, "title", TITLE_EDITOR_EXTENSIONS as AnyExtension[]);
document.merge(titleField);
}
} catch (error) {

View file

@ -1,21 +0,0 @@
import DOMPurify from "isomorphic-dompurify";
/**
* Sanitizes HTML by removing all HTML tags, leaving only text content
* @param htmlString - The HTML string to sanitize
* @returns The sanitized text with all HTML tags removed
*/
const sanitizeHTML = (htmlString: string): string => {
const sanitizedText = DOMPurify.sanitize(htmlString, { ALLOWED_TAGS: [] }); // sanitize the string to remove all HTML tags
return sanitizedText.trim(); // trim the string to remove leading and trailing whitespaces
};
/**
* Utility function to extract text from HTML content
*/
export const extractTextFromHTML = (html: string): string => {
// Use sanitizeHTML to safely extract text and remove all HTML tags
// This is more secure than regex as it handles edge cases and prevents injection
// Note: sanitizeHTML trims whitespace, which is acceptable for title extraction
return sanitizeHTML(html) || "";
};

View file

@ -1,21 +0,0 @@
export const generateTitleProsemirrorJson = (text: string) => {
return {
type: "doc",
content: [
{
type: "heading",
attrs: { level: 1 },
...(text
? {
content: [
{
type: "text",
text,
},
],
}
: {}),
},
],
};
};

View file

@ -1 +0,0 @@
export * from "./document";

View file

@ -69,6 +69,7 @@ export const PageRoot = observer((props: TPageRootProps) => {
const { isFetchingFallbackBinary } = usePageFallback({
editorRef,
fetchPageDescription: handlers.fetchDescriptionBinary,
page,
collaborationState,
updatePageDescription: handlers.updateDescription,
});

View file

@ -2,22 +2,22 @@ import { useCallback, useEffect, useRef, useState } from "react";
import type { EditorRefApi, CollaborationState } from "@plane/editor";
// plane editor
import { convertBinaryDataToBase64String, getBinaryDataFromDocumentEditorHTMLString } from "@plane/editor";
// plane propel
import { setToast, TOAST_TYPE } from "@plane/propel/toast";
// plane types
import type { TDocumentPayload } from "@plane/types";
// hooks
import useAutoSave from "@/hooks/use-auto-save";
import type { TPageInstance } from "@/store/pages/base-page";
type TArgs = {
editorRef: React.RefObject<EditorRefApi>;
fetchPageDescription: () => Promise<ArrayBuffer>;
collaborationState: CollaborationState | null;
updatePageDescription: (data: TDocumentPayload) => Promise<void>;
page: TPageInstance;
};
export const usePageFallback = (args: TArgs) => {
const { editorRef, fetchPageDescription, collaborationState, updatePageDescription } = args;
const { editorRef, fetchPageDescription, collaborationState, updatePageDescription, page } = args;
const hasShownFallbackToast = useRef(false);
const [isFetchingFallbackBinary, setIsFetchingFallbackBinary] = useState(false);
@ -32,12 +32,7 @@ export const usePageFallback = (args: TArgs) => {
// Show toast notification when fallback mechanism kicks in (only once)
if (!hasShownFallbackToast.current) {
// setToast({
// type: TOAST_TYPE.WARNING,
// title: "Connection lost",
// message: "Your changes are being saved using backup mechanism. ",
// });
console.log("Connection lost");
console.warn("Websocket Connection lost, your changes are being saved using backup mechanism.");
hasShownFallbackToast.current = true;
}
@ -49,7 +44,11 @@ export const usePageFallback = (args: TArgs) => {
if (latestEncodedDescription && latestEncodedDescription.byteLength > 0) {
latestDecodedDescription = new Uint8Array(latestEncodedDescription);
} else {
latestDecodedDescription = getBinaryDataFromDocumentEditorHTMLString("<p></p>");
const pageDescriptionHtml = page.description_html;
latestDecodedDescription = getBinaryDataFromDocumentEditorHTMLString(
pageDescriptionHtml ?? "<p></p>",
page.name
);
}
editor.setProviderDocument(latestDecodedDescription);
@ -64,15 +63,10 @@ export const usePageFallback = (args: TArgs) => {
});
} catch (error: any) {
console.error(error);
// setToast({
// type: TOAST_TYPE.ERROR,
// title: "Error",
// message: `Failed to update description using backup mechanism, ${error?.message}`,
// });
} finally {
setIsFetchingFallbackBinary(false);
}
}, [editorRef, fetchPageDescription, hasConnectionFailed, updatePageDescription]);
}, [editorRef, fetchPageDescription, hasConnectionFailed, updatePageDescription, page.description_html, page.name]);
useEffect(() => {
if (hasConnectionFailed) {