Added first token latency and replaced latency with total generation time.

#2
by KingNish - opened
src/lib/components/InferencePlayground/InferencePlayground.svelte CHANGED
@@ -42,7 +42,8 @@
42
  let viewSettings = false;
43
  let showTokenModal = false;
44
  let loading = false;
45
- let latency = 0;
 
46
  let generatedTokensCount = 0;
47
  let abortController: AbortController | undefined = undefined;
48
  let waitForNonStreaming = true;
@@ -91,12 +92,16 @@
91
 
92
  (document.activeElement as HTMLElement).blur();
93
  loading = true;
 
 
 
94
 
95
  try {
96
  const startTime = performance.now();
97
  const hf = createHfInference(hfToken);
98
 
99
  if (conversation.streaming) {
 
100
  const streamingMessage = { role: "assistant", content: "" };
101
  conversation.messages = [...conversation.messages, streamingMessage];
102
  abortController = new AbortController();
@@ -109,6 +114,11 @@
109
  streamingMessage.content = content;
110
  conversation.messages = [...conversation.messages];
111
  generatedTokensCount += 1;
 
 
 
 
 
112
  }
113
  },
114
  abortController
@@ -123,11 +133,11 @@
123
  if (waitForNonStreaming) {
124
  conversation.messages = [...conversation.messages, newMessage];
125
  generatedTokensCount += newTokensCount;
 
126
  }
127
  }
128
 
129
- const endTime = performance.now();
130
- latency = Math.round(endTime - startTime);
131
  } catch (error) {
132
  if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
133
  conversation.messages.pop();
@@ -261,7 +271,9 @@
261
  <IconDelete />
262
  </button>
263
  <div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
264
- <span class="max-xl:hidden">{generatedTokensCount} tokens · Latency {latency}ms</span>
 
 
265
  </div>
266
  <button
267
  type="button"
@@ -269,8 +281,8 @@
269
  class="flex h-[39px] items-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:outline-none focus:ring-4 focus:ring-gray-100 dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700"
270
  >
271
  <IconCode />
272
- {!viewCode ? "View Code" : "Hide Code"}</button
273
- >
274
  <button
275
  on:click={() => {
276
  viewCode = false;
@@ -357,4 +369,4 @@
357
  >
358
  <IconInfo classNames="text-xs" />
359
  Give feedback
360
- </a>
 
42
  let viewSettings = false;
43
  let showTokenModal = false;
44
  let loading = false;
45
+ let latency = 0; // Renamed to total generation time
46
+ let firstTokenLatency = 0; // New variable for first token latency
47
  let generatedTokensCount = 0;
48
  let abortController: AbortController | undefined = undefined;
49
  let waitForNonStreaming = true;
 
92
 
93
  (document.activeElement as HTMLElement).blur();
94
  loading = true;
95
+ firstTokenLatency = 0; // Reset before each submission
96
+ generatedTokensCount = 0; // Reset before each submission
97
+
98
 
99
  try {
100
  const startTime = performance.now();
101
  const hf = createHfInference(hfToken);
102
 
103
  if (conversation.streaming) {
104
+ let firstTokenReceived = false; // Flag to track first token
105
  const streamingMessage = { role: "assistant", content: "" };
106
  conversation.messages = [...conversation.messages, streamingMessage];
107
  abortController = new AbortController();
 
114
  streamingMessage.content = content;
115
  conversation.messages = [...conversation.messages];
116
  generatedTokensCount += 1;
117
+
118
+ if (!firstTokenReceived) { // Check if it's the first token
119
+ firstTokenLatency = Math.round(performance.now() - startTime);
120
+ firstTokenReceived = true;
121
+ }
122
  }
123
  },
124
  abortController
 
133
  if (waitForNonStreaming) {
134
  conversation.messages = [...conversation.messages, newMessage];
135
  generatedTokensCount += newTokensCount;
136
+ firstTokenLatency = latency; //In non-streaming, first token latency equals total latency.
137
  }
138
  }
139
 
140
+ latency = Math.round(performance.now() - startTime); // Total generation time
 
141
  } catch (error) {
142
  if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
143
  conversation.messages.pop();
 
271
  <IconDelete />
272
  </button>
273
  <div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
274
+ <span class="max-xl:hidden">
275
+ {generatedTokensCount} tokens · First Token: {firstTokenLatency}ms · Total Generation: {latency}ms
276
+ </span>
277
  </div>
278
  <button
279
  type="button"
 
281
  class="flex h-[39px] items-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:outline-none focus:ring-4 focus:ring-gray-100 dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700"
282
  >
283
  <IconCode />
284
+ {!viewCode ? "View Code" : "Hide Code"}
285
+ </button>
286
  <button
287
  on:click={() => {
288
  viewCode = false;
 
369
  >
370
  <IconInfo classNames="text-xs" />
371
  Give feedback
372
+ </a>