Browse Source

refac: enhanced response content sanitisation

'<' and '>' can be correctly displayed now
Timothy J. Baek 8 months ago
parent
commit
5a6ece9513

+ 4 - 3
package-lock.json

@@ -18,6 +18,7 @@
 				"codemirror": "^6.0.1",
 				"codemirror": "^6.0.1",
 				"crc-32": "^1.2.2",
 				"crc-32": "^1.2.2",
 				"dayjs": "^1.11.10",
 				"dayjs": "^1.11.10",
+				"dompurify": "^3.1.6",
 				"eventsource-parser": "^1.1.2",
 				"eventsource-parser": "^1.1.2",
 				"file-saver": "^2.0.5",
 				"file-saver": "^2.0.5",
 				"fuse.js": "^7.0.0",
 				"fuse.js": "^7.0.0",
@@ -3918,9 +3919,9 @@
 			}
 			}
 		},
 		},
 		"node_modules/dompurify": {
 		"node_modules/dompurify": {
-			"version": "3.1.5",
-			"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.5.tgz",
-			"integrity": "sha512-lwG+n5h8QNpxtyrJW/gJWckL+1/DQiYMX8f7t8Z2AZTPw1esVrqjI63i7Zc2Gz0aKzLVMYC1V1PL/ky+aY/NgA=="
+			"version": "3.1.6",
+			"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.6.tgz",
+			"integrity": "sha512-cTOAhc36AalkjtBpfG6O8JimdTMWNXjiePT2xQH/ppBGi/4uIpmj8eKyIkMJErXWARyINV/sB38yf8JCLF5pbQ=="
 		},
 		},
 		"node_modules/domutils": {
 		"node_modules/domutils": {
 			"version": "3.1.0",
 			"version": "3.1.0",

+ 1 - 0
package.json

@@ -59,6 +59,7 @@
 		"codemirror": "^6.0.1",
 		"codemirror": "^6.0.1",
 		"crc-32": "^1.2.2",
 		"crc-32": "^1.2.2",
 		"dayjs": "^1.11.10",
 		"dayjs": "^1.11.10",
+		"dompurify": "^3.1.6",
 		"eventsource-parser": "^1.1.2",
 		"eventsource-parser": "^1.1.2",
 		"file-saver": "^2.0.5",
 		"file-saver": "^2.0.5",
 		"fuse.js": "^7.0.0",
 		"fuse.js": "^7.0.0",

+ 1 - 1
src/lib/components/chat/Messages/CodeBlock.svelte

@@ -261,7 +261,7 @@ __builtins__.input = input`);
 		<div
 		<div
 			class="flex justify-between bg-[#202123] text-white text-xs px-4 pt-1 pb-0.5 rounded-t-lg overflow-x-auto"
 			class="flex justify-between bg-[#202123] text-white text-xs px-4 pt-1 pb-0.5 rounded-t-lg overflow-x-auto"
 		>
 		>
-			<div class="p-1">{@html lang}</div>
+			<div class="p-1">{lang}</div>
 
 
 			<div class="flex items-center">
 			<div class="flex items-center">
 				{#if lang.toLowerCase() === 'python' || lang.toLowerCase() === 'py' || (lang === '' && checkPythonCode(code))}
 				{#if lang.toLowerCase() === 'python' || lang.toLowerCase() === 'py' || (lang === '' && checkPythonCode(code))}

+ 7 - 1
src/lib/components/chat/Messages/MarkdownInlineTokens.svelte

@@ -1,4 +1,5 @@
 <script lang="ts">
 <script lang="ts">
+	import DOMPurify from 'dompurify';
 	import type { Token } from 'marked';
 	import type { Token } from 'marked';
 	import { revertSanitizedResponseContent, unescapeHtml } from '$lib/utils';
 	import { revertSanitizedResponseContent, unescapeHtml } from '$lib/utils';
 	import { onMount } from 'svelte';
 	import { onMount } from 'svelte';
@@ -14,7 +15,12 @@
 	{#if token.type === 'escape'}
 	{#if token.type === 'escape'}
 		{unescapeHtml(token.text)}
 		{unescapeHtml(token.text)}
 	{:else if token.type === 'html'}
 	{:else if token.type === 'html'}
-		{@html token.text}
+		{@const html = DOMPurify.sanitize(token.text)}
+		{#if html}
+			{@html html}
+		{:else}
+			{token.text}
+		{/if}
 	{:else if token.type === 'link'}
 	{:else if token.type === 'link'}
 		<a href={token.href} target="_blank" rel="nofollow" title={token.title}>{token.text}</a>
 		<a href={token.href} target="_blank" rel="nofollow" title={token.title}>{token.text}</a>
 	{:else if token.type === 'image'}
 	{:else if token.type === 'image'}

+ 7 - 1
src/lib/components/chat/Messages/MarkdownTokens.svelte

@@ -1,4 +1,5 @@
 <script lang="ts">
 <script lang="ts">
+	import DOMPurify from 'dompurify';
 	import { onMount } from 'svelte';
 	import { onMount } from 'svelte';
 	import type { Token } from 'marked';
 	import type { Token } from 'marked';
 	import { revertSanitizedResponseContent, unescapeHtml } from '$lib/utils';
 	import { revertSanitizedResponseContent, unescapeHtml } from '$lib/utils';
@@ -91,7 +92,12 @@
 			</ul>
 			</ul>
 		{/if}
 		{/if}
 	{:else if token.type === 'html'}
 	{:else if token.type === 'html'}
-		{@html token.text}
+		{@const html = DOMPurify.sanitize(token.text)}
+		{#if html}
+			{@html html}
+		{:else}
+			{token.text}
+		{/if}
 	{:else if token.type === 'paragraph'}
 	{:else if token.type === 'paragraph'}
 		<p>
 		<p>
 			<MarkdownInlineTokens id={`${id}-${tokenIdx}-p`} tokens={token.tokens ?? []} />
 			<MarkdownInlineTokens id={`${id}-${tokenIdx}-p`} tokens={token.tokens ?? []} />

+ 2 - 3
src/lib/components/chat/Messages/ResponseMessage.svelte

@@ -18,8 +18,7 @@
 		approximateToHumanReadable,
 		approximateToHumanReadable,
 		extractSentences,
 		extractSentences,
 		replaceTokens,
 		replaceTokens,
-		revertSanitizedResponseContent,
-		sanitizeResponseContent
+		processResponseContent
 	} from '$lib/utils';
 	} from '$lib/utils';
 	import { WEBUI_BASE_URL } from '$lib/constants';
 	import { WEBUI_BASE_URL } from '$lib/constants';
 
 
@@ -88,7 +87,7 @@
 	$: (async () => {
 	$: (async () => {
 		if (message?.content) {
 		if (message?.content) {
 			tokens = marked.lexer(
 			tokens = marked.lexer(
-				replaceTokens(sanitizeResponseContent(message?.content), model?.name, $user?.name)
+				replaceTokens(processResponseContent(message?.content), model?.name, $user?.name)
 			);
 			);
 		}
 		}
 	})();
 	})();

+ 17 - 34
src/lib/utils/index.ts

@@ -23,39 +23,6 @@ const convertLatexToSingleLine = (content) => {
 	return content;
 	return content;
 };
 };
 
 
-export const sanitizeResponseContent = (content: string) => {
-	// replace single backslash with double backslash
-	content = content.replace(/\\\\/g, '\\\\\\\\');
-
-	content = convertLatexToSingleLine(content);
-
-	// First, temporarily replace valid <video> tags with a placeholder
-	const videoTagRegex = /<video\s+src="([^"]+)"\s+controls><\/video>/gi;
-	const placeholders: string[] = [];
-	content = content.replace(videoTagRegex, (_, src) => {
-		const placeholder = `{{VIDEO_${placeholders.length}}}`;
-		placeholders.push(`<video src="${src}" controls></video>`);
-		return placeholder;
-	});
-
-	// Now apply the sanitization to the rest of the content
-	content = content
-		.replace(/<\|[a-z]*$/, '')
-		.replace(/<\|[a-z]+\|$/, '')
-		.replace(/<$/, '')
-		.replaceAll(/<\|[a-z]+\|>/g, ' ')
-		.replaceAll('<', '&lt;')
-		.replaceAll('>', '&gt;')
-		.trim();
-
-	// Replace placeholders with original <video> tags
-	placeholders.forEach((placeholder, index) => {
-		content = content.replace(`{{VIDEO_${index}}}`, placeholder);
-	});
-
-	return content.trim();
-};
-
 export const replaceTokens = (content, char, user) => {
 export const replaceTokens = (content, char, user) => {
 	const charToken = /{{char}}/gi;
 	const charToken = /{{char}}/gi;
 	const userToken = /{{user}}/gi;
 	const userToken = /{{user}}/gi;
@@ -87,8 +54,24 @@ export const replaceTokens = (content, char, user) => {
 	return content;
 	return content;
 };
 };
 
 
+export const sanitizeResponseContent = (content: string) => {
+	return content
+		.replace(/<\|[a-z]*$/, '')
+		.replace(/<\|[a-z]+\|$/, '')
+		.replace(/<$/, '')
+		.replaceAll(/<\|[a-z]+\|>/g, ' ')
+		.replaceAll('<', '&lt;')
+		.replaceAll('>', '&gt;')
+		.trim();
+};
+
+export const processResponseContent = (content: string) => {
+	content = convertLatexToSingleLine(content);
+	return content.trim();
+};
+
 export const revertSanitizedResponseContent = (content: string) => {
 export const revertSanitizedResponseContent = (content: string) => {
-	return content.replaceAll('&lt;', '<').replaceAll('&gt;', '>').replaceAll('\\\\', '\\');
+	return content.replaceAll('&lt;', '<').replaceAll('&gt;', '>');
 };
 };
 
 
 export function unescapeHtml(html: string) {
 export function unescapeHtml(html: string) {