Evaluations.svelte 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. <script lang="ts">
  2. import { onMount, getContext } from 'svelte';
  3. import dayjs from 'dayjs';
  4. import relativeTime from 'dayjs/plugin/relativeTime';
  5. dayjs.extend(relativeTime);
  6. import { models } from '$lib/stores';
  7. import { getAllFeedbacks } from '$lib/apis/evaluations';
  8. import FeedbackMenu from './Evaluations/FeedbackMenu.svelte';
  9. import EllipsisHorizontal from '../icons/EllipsisHorizontal.svelte';
  10. import Tooltip from '../common/Tooltip.svelte';
  11. import Badge from '../common/Badge.svelte';
  12. const i18n = getContext('i18n');
  13. let rankedModels = [];
  14. let feedbacks = [];
  15. type Feedback = {
  16. model_id: string;
  17. sibling_model_ids?: string[];
  18. rating: number;
  19. };
  20. type ModelStats = {
  21. rating: number;
  22. won: number;
  23. draw: number;
  24. lost: number;
  25. };
  26. function calculateModelStats(feedbacks: Feedback[]): Map<string, ModelStats> {
  27. const stats = new Map<string, ModelStats>();
  28. const K = 32;
  29. function getOrDefaultStats(modelId: string): ModelStats {
  30. return stats.get(modelId) || { rating: 1000, won: 0, draw: 0, lost: 0 };
  31. }
  32. function updateStats(modelId: string, ratingChange: number, outcome: number) {
  33. const currentStats = getOrDefaultStats(modelId);
  34. currentStats.rating += ratingChange;
  35. if (outcome === 1) currentStats.won++;
  36. else if (outcome === 0.5) currentStats.draw++;
  37. else if (outcome === 0) currentStats.lost++;
  38. stats.set(modelId, currentStats);
  39. }
  40. function calculateEloChange(ratingA: number, ratingB: number, outcome: number): number {
  41. const expectedScore = 1 / (1 + Math.pow(10, (ratingB - ratingA) / 400));
  42. return K * (outcome - expectedScore);
  43. }
  44. feedbacks.forEach((feedback) => {
  45. const modelA = feedback.data.model_id;
  46. const statsA = getOrDefaultStats(modelA);
  47. let outcome: number;
  48. switch (feedback.data.rating.toString()) {
  49. case '1':
  50. outcome = 1;
  51. break;
  52. case '0':
  53. outcome = 0.5;
  54. break;
  55. case '-1':
  56. outcome = 0;
  57. break;
  58. default:
  59. return; // Skip invalid ratings
  60. }
  61. const opponents = feedback.data.sibling_model_ids || [];
  62. opponents.forEach((modelB) => {
  63. const statsB = getOrDefaultStats(modelB);
  64. const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome);
  65. const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome);
  66. updateStats(modelA, changeA, outcome);
  67. updateStats(modelB, changeB, 1 - outcome);
  68. });
  69. });
  70. return stats;
  71. }
  72. let loaded = false;
  73. onMount(async () => {
  74. feedbacks = await getAllFeedbacks(localStorage.token);
  75. const modelStats = calculateModelStats(feedbacks);
  76. rankedModels = $models
  77. .filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true)
  78. .map((model) => {
  79. const stats = modelStats.get(model.name);
  80. return {
  81. ...model,
  82. rating: stats ? Math.round(stats.rating) : '-',
  83. stats: {
  84. won: stats ? stats.won.toString() : '-',
  85. draw: stats ? stats.draw.toString() : '-',
  86. lost: stats ? stats.lost.toString() : '-'
  87. }
  88. };
  89. })
  90. .sort((a, b) => {
  91. // Handle sorting by rating ('-' goes to the end)
  92. if (a.rating === '-' && b.rating !== '-') return 1;
  93. if (b.rating === '-' && a.rating !== '-') return -1;
  94. // If both have ratings (non '-'), sort by rating numerically (descending)
  95. if (a.rating !== '-' && b.rating !== '-') return b.rating - a.rating;
  96. // If both ratings are '-', sort alphabetically (by 'name')
  97. return a.name.localeCompare(b.name);
  98. });
  99. loaded = true;
  100. });
  101. </script>
  102. {#if loaded}
  103. <div class="mt-0.5 mb-2 gap-1 flex flex-col md:flex-row justify-between">
  104. <div class="flex md:self-center text-lg font-medium px-0.5">
  105. {$i18n.t('Leaderboard')}
  106. <div class="flex self-center w-[1px] h-6 mx-2.5 bg-gray-50 dark:bg-gray-850" />
  107. <span class="text-lg font-medium text-gray-500 dark:text-gray-300">{rankedModels.length}</span
  108. >
  109. </div>
  110. </div>
  111. <div
  112. class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded pt-0.5"
  113. >
  114. {#if (rankedModels ?? []).length === 0}
  115. <div class="text-center text-xs text-gray-500 dark:text-gray-400 py-1">
  116. {$i18n.t('No models found')}
  117. </div>
  118. {:else}
  119. <table
  120. class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto max-w-full rounded"
  121. >
  122. <thead
  123. class="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-850 dark:text-gray-400 -translate-y-0.5"
  124. >
  125. <tr class="">
  126. <th scope="col" class="px-3 py-1.5 cursor-pointer select-none w-3">
  127. {$i18n.t('RK')}
  128. </th>
  129. <th scope="col" class="px-3 py-1.5 cursor-pointer select-none">
  130. {$i18n.t('Model')}
  131. </th>
  132. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit">
  133. {$i18n.t('Rating')}
  134. </th>
  135. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit">
  136. {$i18n.t('Won')}
  137. </th>
  138. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit">
  139. {$i18n.t('Draw')}
  140. </th>
  141. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit">
  142. {$i18n.t('Lost')}
  143. </th>
  144. </tr>
  145. </thead>
  146. <tbody class="">
  147. {#each rankedModels as model, modelIdx (model.id)}
  148. <tr class="bg-white dark:bg-gray-900 dark:border-gray-850 text-xs">
  149. <td class="px-3 py-1.5 text-left font-medium text-gray-900 dark:text-white w-fit">
  150. <div class=" line-clamp-1">
  151. {model?.rating !== '-' ? modelIdx + 1 : '-'}
  152. </div>
  153. </td>
  154. <td class="px-3 py-1.5 flex flex-col justify-center">
  155. <div class="flex items-center gap-2">
  156. <div class="flex-shrink-0">
  157. <img
  158. src={model?.info?.meta?.profile_image_url ?? '/favicon.png'}
  159. alt={model.name}
  160. class="size-5 rounded-full object-cover shrink-0"
  161. />
  162. </div>
  163. <div class="font-medium text-gray-800 dark:text-gray-200 pr-4">
  164. {model.name}
  165. </div>
  166. </div>
  167. </td>
  168. <td class="px-3 py-1.5 text-right font-medium text-gray-900 dark:text-white w-max">
  169. {model.rating}
  170. </td>
  171. <td class=" px-3 py-1.5 text-right font-semibold text-green-500">
  172. {model.stats.won}
  173. </td>
  174. <td class=" px-3 py-1.5 text-right font-semibold">
  175. {model.stats.draw}
  176. </td>
  177. <td class="px-3 py-1.5 text-right font-semibold text-red-500">
  178. {model.stats.lost}
  179. </td>
  180. </tr>
  181. {/each}
  182. </tbody>
  183. </table>
  184. {/if}
  185. </div>
  186. <div class="pb-4"></div>
  187. <div class="mt-0.5 mb-2 gap-1 flex flex-col md:flex-row justify-between">
  188. <div class="flex md:self-center text-lg font-medium px-0.5">
  189. {$i18n.t('Feedback History')}
  190. </div>
  191. </div>
  192. <div
  193. class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded pt-0.5"
  194. >
  195. {#if (feedbacks ?? []).length === 0}
  196. <div class="text-center text-xs text-gray-500 dark:text-gray-400 py-1">
  197. {$i18n.t('No feedbacks found')}
  198. </div>
  199. {:else}
  200. <table
  201. class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto max-w-full rounded"
  202. >
  203. <thead
  204. class="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-850 dark:text-gray-400 -translate-y-0.5"
  205. >
  206. <tr class="">
  207. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-0">
  208. {$i18n.t('User')}
  209. </th>
  210. <th scope="col" class="px-3 py-1.5 cursor-pointer select-none">
  211. {$i18n.t('Models')}
  212. </th>
  213. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit">
  214. {$i18n.t('Result')}
  215. </th>
  216. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-0">
  217. {$i18n.t('Updated At')}
  218. </th>
  219. <th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-0"> </th>
  220. </tr>
  221. </thead>
  222. <tbody class="">
  223. {#each feedbacks as feedback (feedback.id)}
  224. <tr class="bg-white dark:bg-gray-900 dark:border-gray-850 text-xs">
  225. <td class=" py-1 text-right font-semibold">
  226. <div class="flex justify-center">
  227. <Tooltip content={feedback?.user?.name}>
  228. <div class="flex-shrink-0">
  229. <img
  230. src={feedback?.user?.profile_image_url ?? '/user.png'}
  231. alt={feedback?.user?.name}
  232. class="size-6 rounded-full object-cover shrink-0"
  233. />
  234. </div>
  235. </Tooltip>
  236. </div>
  237. </td>
  238. <td class="px-3 py-1 flex flex-col">
  239. <div class="flex flex-col items-start gap-0.5 h-full">
  240. <div class="flex flex-col h-full">
  241. {#if feedback.data?.sibling_model_ids}
  242. <div class="font-semibold text-gray-600 dark:text-gray-400 flex-1">
  243. {feedback.data?.model_id}
  244. </div>
  245. <div class=" text-[0.65rem] text-gray-600 dark:text-gray-400 line-clamp-1">
  246. {feedback.data.sibling_model_ids.join(', ')}
  247. </div>
  248. {:else}
  249. <div
  250. class=" text-sm font-medium text-gray-600 dark:text-gray-400 flex-1 py-2"
  251. >
  252. {feedback.data?.model_id}
  253. </div>
  254. {/if}
  255. </div>
  256. </div>
  257. </td>
  258. <td class="px-3 py-1 text-right font-medium text-gray-900 dark:text-white w-max">
  259. <div class=" flex justify-end">
  260. {#if feedback.data.rating.toString() === '1'}
  261. <Badge type="info" content={$i18n.t('Won')} />
  262. {:else if feedback.data.rating.toString() === '0'}
  263. <Badge type="muted" content={$i18n.t('Draw')} />
  264. {:else if feedback.data.rating.toString() === '-1'}
  265. <Badge type="error" content={$i18n.t('Lost')} />
  266. {/if}
  267. </div>
  268. </td>
  269. <td class=" px-3 py-1 text-right font-medium">
  270. {dayjs(feedback.updated_at * 1000).fromNow()}
  271. </td>
  272. <td class=" px-3 py-1 text-right font-semibold">
  273. <FeedbackMenu>
  274. <button
  275. class="self-center w-fit text-sm p-1.5 dark:text-gray-300 dark:hover:text-white hover:bg-black/5 dark:hover:bg-white/5 rounded-xl"
  276. >
  277. <EllipsisHorizontal />
  278. </button>
  279. </FeedbackMenu>
  280. </td>
  281. </tr>
  282. {/each}
  283. </tbody>
  284. </table>
  285. {/if}
  286. </div>
  287. <div class="pb-8"></div>
  288. {/if}