diff --git a/lib/llm/CerebrasClient.ts b/lib/llm/CerebrasClient.ts
index 4d5a0daca..4b12c0380 100644
--- a/lib/llm/CerebrasClient.ts
+++ b/lib/llm/CerebrasClient.ts
@@ -183,7 +183,9 @@ export class CerebrasClient extends LLMClient {
             ? [
                 {
                   role: "system" as const,
-                  content: `IMPORTANT: Your response must be valid JSON that matches this schema: ${JSON.stringify(options.response_model.schema)}`,
+                  content: `IMPORTANT: Your response must be valid JSON that matches this schema: ${JSON.stringify(
+                    options.response_model.schema,
+                  )}`,
                 },
               ]
             : []),
@@ -234,77 +236,91 @@ export class CerebrasClient extends LLMClient {
         },
       });
 
-      if (options.response_model) {
-        // First try standard function calling format
-        const toolCall = response.choices[0]?.message?.tool_calls?.[0];
-        if (toolCall?.function?.arguments) {
-          try {
-            const result = JSON.parse(toolCall.function.arguments);
-            if (this.enableCaching) {
-              this.cache.set(cacheOptions, result, options.requestId);
-            }
-            return result as T;
-          } catch (e) {
-            // If JSON parse fails, the model might be returning a different format
-            logger({
-              category: "cerebras",
-              message: "failed to parse tool call arguments as JSON, retrying",
-              level: 0,
-              auxiliary: {
-                error: {
-                  value: e.message,
-                  type: "string",
-                },
-              },
-            });
-          }
+      // If we have no response model, just return the entire LLMResponse
+      if (!options.response_model) {
+        if (this.enableCaching) {
+          await this.cache.set(cacheOptions, response, options.requestId);
         }
+        return response as T;
+      }
 
-        // If we have content but no tool calls, try to parse the content as JSON
-        const content = response.choices[0]?.message?.content;
-        if (content) {
-          try {
-            // Try to extract JSON from the content
-            const jsonMatch = content.match(/\{[\s\S]*\}/);
-            if (jsonMatch) {
-              const result = JSON.parse(jsonMatch[0]);
-              if (this.enableCaching) {
-                this.cache.set(cacheOptions, result, options.requestId);
-              }
-              return result as T;
-            }
-          } catch (e) {
-            logger({
-              category: "cerebras",
-              message: "failed to parse content as JSON",
-              level: 0,
-              auxiliary: {
-                error: {
-                  value: e.message,
-                  type: "string",
-                },
-              },
-            });
+      // If we have a response model, parse JSON from tool calls or content
+      const toolCall = response.choices[0]?.message?.tool_calls?.[0];
+      if (toolCall?.function?.arguments) {
+        try {
+          const result = JSON.parse(toolCall.function.arguments);
+          const finalResponse = {
+            data: result,
+            usage: response.usage,
+          };
+          if (this.enableCaching) {
+            await this.cache.set(
+              cacheOptions,
+              finalResponse,
+              options.requestId,
+            );
           }
+          return finalResponse as T;
+        } catch (e) {
+          logger({
+            category: "cerebras",
+            message: "failed to parse tool call arguments as JSON, retrying",
+            level: 0,
+            auxiliary: {
+              error: {
+                value: e.message,
+                type: "string",
+              },
+            },
+          });
         }
+      }
 
-        // If we still haven't found valid JSON and have retries left, try again
-        if (!retries || retries < 5) {
-          return this.createChatCompletion({
-            options,
-            logger,
-            retries: (retries ?? 0) + 1,
+      // If we have content but no tool calls, try to parse the content as JSON
+      const content = response.choices[0]?.message?.content;
+      if (content) {
+        try {
+          const jsonMatch = content.match(/\{[\s\S]*\}/);
+          if (jsonMatch) {
+            const result = JSON.parse(jsonMatch[0]);
+            const finalResponse = {
+              data: result,
+              usage: response.usage,
+            };
+            if (this.enableCaching) {
+              await this.cache.set(
+                cacheOptions,
+                finalResponse,
+                options.requestId,
+              );
+            }
+            return finalResponse as T;
+          }
+        } catch (e) {
+          logger({
+            category: "cerebras",
+            message: "failed to parse content as JSON",
+            level: 0,
+            auxiliary: {
+              error: {
+                value: e.message,
+                type: "string",
+              },
+            },
           });
         }
-
-        throw new CreateChatCompletionResponseError("Invalid response schema");
       }
 
-      if (this.enableCaching) {
-        this.cache.set(cacheOptions, response, options.requestId);
+      // If we still haven't found valid JSON and have retries left, try again
+      if (!retries || retries < 5) {
+        return this.createChatCompletion({
+          options,
+          logger,
+          retries: (retries ?? 0) + 1,
+        });
       }
 
-      return response as T;
+      throw new CreateChatCompletionResponseError("Invalid response schema");
     } catch (error) {
       logger({
         category: "cerebras",
diff --git a/lib/llm/GroqClient.ts b/lib/llm/GroqClient.ts
index fe91d06ba..d512ed6f9 100644
--- a/lib/llm/GroqClient.ts
+++ b/lib/llm/GroqClient.ts
@@ -183,7 +183,9 @@ export class GroqClient extends LLMClient {
             ? [
                 {
                   role: "system" as const,
-                  content: `IMPORTANT: Your response must be valid JSON that matches this schema: ${JSON.stringify(options.response_model.schema)}`,
+                  content: `IMPORTANT: Your response must be valid JSON that matches this schema: ${JSON.stringify(
+                    options.response_model.schema,
+                  )}`,
                 },
               ]
             : []),
@@ -234,77 +236,92 @@ export class GroqClient extends LLMClient {
         },
       });
 
-      if (options.response_model) {
-        // First try standard function calling format
-        const toolCall = response.choices[0]?.message?.tool_calls?.[0];
-        if (toolCall?.function?.arguments) {
-          try {
-            const result = JSON.parse(toolCall.function.arguments);
-            if (this.enableCaching) {
-              this.cache.set(cacheOptions, result, options.requestId);
-            }
-            return result as T;
-          } catch (e) {
-            // If JSON parse fails, the model might be returning a different format
-            logger({
-              category: "groq",
-              message: "failed to parse tool call arguments as JSON, retrying",
-              level: 0,
-              auxiliary: {
-                error: {
-                  value: e.message,
-                  type: "string",
-                },
-              },
-            });
-          }
+      // If there's no response model, return the entire response object
+      if (!options.response_model) {
+        if (this.enableCaching) {
+          await this.cache.set(cacheOptions, response, options.requestId);
         }
+        return response as T;
+      }
 
-        // If we have content but no tool calls, try to parse the content as JSON
-        const content = response.choices[0]?.message?.content;
-        if (content) {
-          try {
-            // Try to extract JSON from the content
-            const jsonMatch = content.match(/\{[\s\S]*\}/);
-            if (jsonMatch) {
-              const result = JSON.parse(jsonMatch[0]);
-              if (this.enableCaching) {
-                this.cache.set(cacheOptions, result, options.requestId);
-              }
-              return result as T;
-            }
-          } catch (e) {
-            logger({
-              category: "groq",
-              message: "failed to parse content as JSON",
-              level: 0,
-              auxiliary: {
-                error: {
-                  value: e.message,
-                  type: "string",
-                },
-              },
-            });
+      // Otherwise, try parsing the JSON from the tool call or content
+      const toolCall = response.choices[0]?.message?.tool_calls?.[0];
+      if (toolCall?.function?.arguments) {
+        try {
+          const result = JSON.parse(toolCall.function.arguments);
+          const finalResponse = {
+            data: result,
+            usage: response.usage,
+          };
+          if (this.enableCaching) {
+            await this.cache.set(
+              cacheOptions,
+              finalResponse,
+              options.requestId,
+            );
           }
+          return finalResponse as T;
+        } catch (e) {
+          logger({
+            category: "groq",
+            message: "failed to parse tool call arguments as JSON, retrying",
+            level: 0,
+            auxiliary: {
+              error: {
+                value: e.message,
+                type: "string",
+              },
+            },
+          });
         }
+      }
 
-        // If we still haven't found valid JSON and have retries left, try again
-        if (!retries || retries < 5) {
-          return this.createChatCompletion({
-            options,
-            logger,
-            retries: (retries ?? 0) + 1,
+      // If we have content but no tool calls, try to parse the content as JSON
+      const content = response.choices[0]?.message?.content;
+      if (content) {
+        try {
+          // Try to extract JSON from the content
+          const jsonMatch = content.match(/\{[\s\S]*\}/);
+          if (jsonMatch) {
+            const result = JSON.parse(jsonMatch[0]);
+            const finalResponse = {
+              data: result,
+              usage: response.usage,
+            };
+            if (this.enableCaching) {
+              await this.cache.set(
+                cacheOptions,
+                finalResponse,
+                options.requestId,
+              );
+            }
+            return finalResponse as T;
+          }
+        } catch (e) {
+          logger({
+            category: "groq",
+            message: "failed to parse content as JSON",
+            level: 0,
+            auxiliary: {
+              error: {
+                value: e.message,
+                type: "string",
+              },
+            },
           });
         }
-
-        throw new CreateChatCompletionResponseError("Invalid response schema");
       }
 
-      if (this.enableCaching) {
-        this.cache.set(cacheOptions, response, options.requestId);
+      // If we still haven't found valid JSON and have retries left, try again
+      if (!retries || retries < 5) {
+        return this.createChatCompletion({
+          options,
+          logger,
+          retries: (retries ?? 0) + 1,
+        });
       }
 
-      return response as T;
+      throw new CreateChatCompletionResponseError("Invalid response schema");
     } catch (error) {
       logger({
         category: "groq",