Leaderboard.svelte 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. <script lang="ts">
  2. import * as ort from 'onnxruntime-web';
  3. import { env, AutoModel, AutoTokenizer } from '@huggingface/transformers';
  4. env.backends.onnx.wasm.wasmPaths = '/wasm/';
  5. import { onMount, getContext } from 'svelte';
  6. import { models } from '$lib/stores';
  7. import Spinner from '$lib/components/common/Spinner.svelte';
  8. import Tooltip from '$lib/components/common/Tooltip.svelte';
  9. import MagnifyingGlass from '$lib/components/icons/MagnifyingGlass.svelte';
  10. const i18n = getContext('i18n');
  11. const EMBEDDING_MODEL = 'TaylorAI/bge-micro-v2';
  12. let tokenizer = null;
  13. let model = null;
  14. export let feedbacks = [];
  15. let rankedModels = [];
  16. let query = '';
  17. let tagEmbeddings = new Map();
  18. let loadingLeaderboard = true;
  19. let debounceTimer;
  20. type Feedback = {
  21. id: string;
  22. data: {
  23. rating: number;
  24. model_id: string;
  25. sibling_model_ids: string[] | null;
  26. reason: string;
  27. comment: string;
  28. tags: string[];
  29. };
  30. user: {
  31. name: string;
  32. profile_image_url: string;
  33. };
  34. updated_at: number;
  35. };
  36. type ModelStats = {
  37. rating: number;
  38. won: number;
  39. lost: number;
  40. };
  41. //////////////////////
  42. //
  43. // Rank models by Elo rating
  44. //
  45. //////////////////////
  46. const rankHandler = async (similarities: Map<string, number> = new Map()) => {
  47. const modelStats = calculateModelStats(feedbacks, similarities);
  48. rankedModels = $models
  49. .filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true)
  50. .map((model) => {
  51. const stats = modelStats.get(model.id);
  52. return {
  53. ...model,
  54. rating: stats ? Math.round(stats.rating) : '-',
  55. stats: {
  56. count: stats ? stats.won + stats.lost : 0,
  57. won: stats ? stats.won.toString() : '-',
  58. lost: stats ? stats.lost.toString() : '-'
  59. }
  60. };
  61. })
  62. .sort((a, b) => {
  63. if (a.rating === '-' && b.rating !== '-') return 1;
  64. if (b.rating === '-' && a.rating !== '-') return -1;
  65. if (a.rating !== '-' && b.rating !== '-') return b.rating - a.rating;
  66. return a.name.localeCompare(b.name);
  67. });
  68. loadingLeaderboard = false;
  69. };
  70. function calculateModelStats(
  71. feedbacks: Feedback[],
  72. similarities: Map<string, number>
  73. ): Map<string, ModelStats> {
  74. const stats = new Map<string, ModelStats>();
  75. const K = 32;
  76. function getOrDefaultStats(modelId: string): ModelStats {
  77. return stats.get(modelId) || { rating: 1000, won: 0, lost: 0 };
  78. }
  79. function updateStats(modelId: string, ratingChange: number, outcome: number) {
  80. const currentStats = getOrDefaultStats(modelId);
  81. currentStats.rating += ratingChange;
  82. if (outcome === 1) currentStats.won++;
  83. else if (outcome === 0) currentStats.lost++;
  84. stats.set(modelId, currentStats);
  85. }
  86. function calculateEloChange(
  87. ratingA: number,
  88. ratingB: number,
  89. outcome: number,
  90. similarity: number
  91. ): number {
  92. const expectedScore = 1 / (1 + Math.pow(10, (ratingB - ratingA) / 400));
  93. return K * (outcome - expectedScore) * similarity;
  94. }
  95. feedbacks.forEach((feedback) => {
  96. const modelA = feedback.data.model_id;
  97. const statsA = getOrDefaultStats(modelA);
  98. let outcome: number;
  99. switch (feedback.data.rating.toString()) {
  100. case '1':
  101. outcome = 1;
  102. break;
  103. case '-1':
  104. outcome = 0;
  105. break;
  106. default:
  107. return; // Skip invalid ratings
  108. }
  109. // If the query is empty, set similarity to 1, else get the similarity from the map
  110. const similarity = query !== '' ? similarities.get(feedback.id) || 0 : 1;
  111. const opponents = feedback.data.sibling_model_ids || [];
  112. opponents.forEach((modelB) => {
  113. const statsB = getOrDefaultStats(modelB);
  114. const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome, similarity);
  115. const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome, similarity);
  116. updateStats(modelA, changeA, outcome);
  117. updateStats(modelB, changeB, 1 - outcome);
  118. });
  119. });
  120. return stats;
  121. }
  122. //////////////////////
  123. //
  124. // Calculate cosine similarity
  125. //
  126. //////////////////////
  127. const cosineSimilarity = (vecA, vecB) => {
  128. // Ensure the lengths of the vectors are the same
  129. if (vecA.length !== vecB.length) {
  130. throw new Error('Vectors must be the same length');
  131. }
  132. // Calculate the dot product
  133. let dotProduct = 0;
  134. let normA = 0;
  135. let normB = 0;
  136. for (let i = 0; i < vecA.length; i++) {
  137. dotProduct += vecA[i] * vecB[i];
  138. normA += vecA[i] ** 2;
  139. normB += vecB[i] ** 2;
  140. }
  141. // Calculate the magnitudes
  142. normA = Math.sqrt(normA);
  143. normB = Math.sqrt(normB);
  144. // Avoid division by zero
  145. if (normA === 0 || normB === 0) {
  146. return 0;
  147. }
  148. // Return the cosine similarity
  149. return dotProduct / (normA * normB);
  150. };
  151. const calculateMaxSimilarity = (queryEmbedding, tagEmbeddings: Map<string, number[]>) => {
  152. let maxSimilarity = 0;
  153. for (const tagEmbedding of tagEmbeddings.values()) {
  154. const similarity = cosineSimilarity(queryEmbedding, tagEmbedding);
  155. maxSimilarity = Math.max(maxSimilarity, similarity);
  156. }
  157. return maxSimilarity;
  158. };
  159. //////////////////////
  160. //
  161. // Embedding functions
  162. //
  163. //////////////////////
  164. const loadEmbeddingModel = async () => {
  165. // Check if the tokenizer and model are already loaded and stored in the window object
  166. if (!window.tokenizer) {
  167. window.tokenizer = await AutoTokenizer.from_pretrained(EMBEDDING_MODEL);
  168. }
  169. if (!window.model) {
  170. window.model = await AutoModel.from_pretrained(EMBEDDING_MODEL);
  171. }
  172. // Use the tokenizer and model from the window object
  173. tokenizer = window.tokenizer;
  174. model = window.model;
  175. // Pre-compute embeddings for all unique tags
  176. const allTags = new Set(feedbacks.flatMap((feedback) => feedback.data.tags || []));
  177. await getTagEmbeddings(Array.from(allTags));
  178. };
  179. const getEmbeddings = async (text: string) => {
  180. const tokens = await tokenizer(text);
  181. const output = await model(tokens);
  182. // Perform mean pooling on the last hidden states
  183. const embeddings = output.last_hidden_state.mean(1);
  184. return embeddings.ort_tensor.data;
  185. };
  186. const getTagEmbeddings = async (tags: string[]) => {
  187. const embeddings = new Map();
  188. for (const tag of tags) {
  189. if (!tagEmbeddings.has(tag)) {
  190. tagEmbeddings.set(tag, await getEmbeddings(tag));
  191. }
  192. embeddings.set(tag, tagEmbeddings.get(tag));
  193. }
  194. return embeddings;
  195. };
  196. const debouncedQueryHandler = async () => {
  197. loadingLeaderboard = true;
  198. if (query.trim() === '') {
  199. rankHandler();
  200. return;
  201. }
  202. clearTimeout(debounceTimer);
  203. debounceTimer = setTimeout(async () => {
  204. const queryEmbedding = await getEmbeddings(query);
  205. const similarities = new Map<string, number>();
  206. for (const feedback of feedbacks) {
  207. const feedbackTags = feedback.data.tags || [];
  208. const tagEmbeddings = await getTagEmbeddings(feedbackTags);
  209. const maxSimilarity = calculateMaxSimilarity(queryEmbedding, tagEmbeddings);
  210. similarities.set(feedback.id, maxSimilarity);
  211. }
  212. rankHandler(similarities);
  213. }, 1500); // Debounce for 1.5 seconds
  214. };
  215. $: query, debouncedQueryHandler();
  216. onMount(async () => {
  217. rankHandler();
  218. });
  219. </script>
  220. <div class="mt-0.5 mb-2 gap-1 flex flex-col md:flex-row justify-between">
  221. <div class="flex md:self-center text-lg font-medium px-0.5 shrink-0 items-center">
  222. <div class=" gap-1">
  223. {$i18n.t('Leaderboard')}
  224. </div>
  225. <div class="flex self-center w-[1px] h-6 mx-2.5 bg-gray-50 dark:bg-gray-850" />
  226. <span class="text-lg font-medium text-gray-500 dark:text-gray-300 mr-1.5"
  227. >{rankedModels.length}</span
  228. >
  229. </div>
  230. <div class=" flex space-x-2">
  231. <Tooltip content={$i18n.t('Re-rank models by topic similarity')}>
  232. <div class="flex flex-1">
  233. <div class=" self-center ml-1 mr-3">
  234. <MagnifyingGlass className="size-3" />
  235. </div>
  236. <input
  237. class=" w-full text-sm pr-4 py-1 rounded-r-xl outline-hidden bg-transparent"
  238. bind:value={query}
  239. placeholder={$i18n.t('Search')}
  240. on:focus={() => {
  241. loadEmbeddingModel();
  242. }}
  243. />
  244. </div>
  245. </Tooltip>
  246. </div>
  247. </div>
  248. <div
  249. class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded-sm pt-0.5"
  250. >
  251. {#if loadingLeaderboard}
  252. <div class=" absolute top-0 bottom-0 left-0 right-0 flex">
  253. <div class="m-auto">
  254. <Spinner />
  255. </div>
  256. </div>
  257. {/if}
  258. {#if (rankedModels ?? []).length === 0}
  259. <div class="text-center text-xs text-gray-500 dark:text-gray-400 py-1">
  260. {$i18n.t('No models found')}
  261. </div>
  262. {:else}
  263. <table
  264. class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto max-w-full rounded {loadingLeaderboard
  265. ? 'opacity-20'
  266. : ''}"
  267. >
  268. <thead
  269. class="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-850 dark:text-gray-400 -translate-y-0.5"
  270. >
  271. <tr class="">
  272. <th scope="col" class="px-3 py-1.5 cursor-pointer select-none w-3">
  273. {$i18n.t('RK')}
  274. </th>
  275. <th scope="col" class="px-3 py-1.5 cursor-pointer select-none">
  276. {$i18n.t('Model')}
  277. </th>
  278. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit">
  279. {$i18n.t('Rating')}
  280. </th>
  281. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-5">
  282. {$i18n.t('Won')}
  283. </th>
  284. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-5">
  285. {$i18n.t('Lost')}
  286. </th>
  287. </tr>
  288. </thead>
  289. <tbody class="">
  290. {#each rankedModels as model, modelIdx (model.id)}
  291. <tr class="bg-white dark:bg-gray-900 dark:border-gray-850 text-xs group">
  292. <td class="px-3 py-1.5 text-left font-medium text-gray-900 dark:text-white w-fit">
  293. <div class=" line-clamp-1">
  294. {model?.rating !== '-' ? modelIdx + 1 : '-'}
  295. </div>
  296. </td>
  297. <td class="px-3 py-1.5 flex flex-col justify-center">
  298. <div class="flex items-center gap-2">
  299. <div class="shrink-0">
  300. <img
  301. src={model?.info?.meta?.profile_image_url ?? '/favicon.png'}
  302. alt={model.name}
  303. class="size-5 rounded-full object-cover shrink-0"
  304. />
  305. </div>
  306. <div class="font-medium text-gray-800 dark:text-gray-200 pr-4">
  307. {model.name}
  308. </div>
  309. </div>
  310. </td>
  311. <td class="px-3 py-1.5 text-right font-medium text-gray-900 dark:text-white w-max">
  312. {model.rating}
  313. </td>
  314. <td class=" px-3 py-1.5 text-right font-semibold text-green-500">
  315. <div class=" w-10">
  316. {#if model.stats.won === '-'}
  317. -
  318. {:else}
  319. <span class="hidden group-hover:inline"
  320. >{((model.stats.won / model.stats.count) * 100).toFixed(1)}%</span
  321. >
  322. <span class=" group-hover:hidden">{model.stats.won}</span>
  323. {/if}
  324. </div>
  325. </td>
  326. <td class="px-3 py-1.5 text-right font-semibold text-red-500">
  327. <div class=" w-10">
  328. {#if model.stats.lost === '-'}
  329. -
  330. {:else}
  331. <span class="hidden group-hover:inline"
  332. >{((model.stats.lost / model.stats.count) * 100).toFixed(1)}%</span
  333. >
  334. <span class=" group-hover:hidden">{model.stats.lost}</span>
  335. {/if}
  336. </div>
  337. </td>
  338. </tr>
  339. {/each}
  340. </tbody>
  341. </table>
  342. {/if}
  343. </div>
  344. <div class=" text-gray-500 text-xs mt-1.5 w-full flex justify-end">
  345. <div class=" text-right">
  346. <div class="line-clamp-1">
  347. ⓘ {$i18n.t(
  348. 'The evaluation leaderboard is based on the Elo rating system and is updated in real-time.'
  349. )}
  350. </div>
  351. {$i18n.t(
  352. 'The leaderboard is currently in beta, and we may adjust the rating calculations as we refine the algorithm.'
  353. )}
  354. </div>
  355. </div>