Skip to content

Commit 6f4412c

Browse files
authored
Merge pull request #9 from Zipstack/update-client_for_status_api
Changes to handle status API change
2 parents 6149a91 + bd99737 commit 6f4412c

15 files changed

+202
-161
lines changed

.vscode/launch.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"type": "node",
9+
"request": "launch",
10+
"name": "Debug Jest Tests",
11+
"program": "${workspaceFolder}/node_modules/jest/bin/jest.js",
12+
"args": ["--runInBand"],
13+
"console": "integratedTerminal",
14+
"internalConsoleOptions": "neverOpen"
15+
}
16+
]
17+
}

index.js

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,11 @@
1111
*
1212
1313
*/
14-
14+
require("dotenv").config();
1515
const axios = require("axios");
1616
const winston = require("winston");
1717
const fs = require("fs");
1818
const { register } = require("module");
19-
2019
const BASE_URL = "https://llmwhisperer-api.unstract.com/v1";
2120
const BASE_URL_V2 = "https://llmwhisperer-api.us-central.unstract.com/api/v2";
2221

@@ -374,8 +373,12 @@ class LLMWhispererClientV2 {
374373

375374
this.headers = {
376375
"unstract-key": this.apiKey,
377-
"Subscription-Id": "test", //TODO: Remove this line. For testing only
378-
"Start-Date": "9-07-2024", //TODO: Remove this line. For testing only
376+
// "Subscription-Id": "jsclient-client",
377+
// "Subscription-Name": "jsclient-client",
378+
// "User-Id": "jsclient-client-user",
379+
// "Product-Id": "jsclient-client-product",
380+
// "Product-Name": "jsclient-client-product",
381+
// "Start-Date": "2024-07-09",
379382
};
380383
}
381384

@@ -532,34 +535,30 @@ class LLMWhispererClientV2 {
532535
message["extraction"] = {};
533536
message["status_code"] = -1;
534537
message["message"] = "Whisper client operation timed out";
535-
break;
538+
return message;
536539
}
537540
const whisperStatus = await this.whisperStatus(whisperHash);
541+
this.logger.debug(`whisperStatus: ${JSON.stringify(whisperStatus)}`);
542+
538543
if (whisperStatus.statusCode !== 200) {
539544
message["extraction"] = {};
540545
message["status_code"] = whisperStatus.statusCode;
541546
message["message"] = "Whisper client operation failed";
542-
break;
547+
return message;
543548
}
544-
if (whisperStatus.status === "processing") {
545-
this.logger.debug("Status: processing");
546-
} else if (whisperStatus.status === "delivered") {
547-
this.logger.debug("Status: delivered");
548-
throw new LLMWhispererClientException(
549-
"Whisper operation already delivered",
550-
-1,
549+
if (whisperStatus.status === "accepted") {
550+
this.logger.debug("Status: accepted...");
551+
} else if (whisperStatus.status === "processing") {
552+
this.logger.debug("Status: processing...");
553+
} else if (whisperStatus.status === "error") {
554+
this.logger.debug("Status: error");
555+
this.logger.error(
556+
"Whisper-hash: ${whisperHash} | STATUS: failed with ${whisperStatus.message}",
551557
);
552-
} else if (whisperStatus.status === "unknown") {
553-
this.logger.debug("Status: unknown");
554-
throw new LLMWhispererClientException(
555-
"Whisper operation status unknown",
556-
-1,
557-
);
558-
} else if (whisperStatus.status === "failed") {
559-
this.logger.debug("Status: failed");
560558
message["extraction"] = {};
561559
message["status_code"] = -1;
562-
message["message"] = "Whisper client operation failed";
560+
message["status"] = "error";
561+
message["message"] = whisperStatus.message;
563562
break;
564563
} else if (whisperStatus.status === "processed") {
565564
this.logger.debug("Status: processed");
@@ -602,25 +601,30 @@ class LLMWhispererClientV2 {
602601
* @throws {LLMWhispererClientException} Throws an LLMWhispererClientException if an error occurs during the operation.
603602
*/
604603
async whisperStatus(whisperHash) {
605-
this.logger.debug("whisper_status called");
604+
this.logger.debug(`whisper_status called for ${whisperHash}`);
606605
const url = `${this.baseUrl}/whisper-status`;
607606
const params = { whisper_hash: whisperHash };
608607
this.logger.debug(`url: ${url}`);
608+
this.logger.debug(`params: ${JSON.stringify(params)}`);
609+
delete this.headers["Content-Length"];
610+
this.logger.debug(`headers: ${JSON.stringify(this.headers)}`);
609611

610612
try {
611613
const response = await axios.get(url, {
612614
headers: this.headers,
613615
params,
614616
timeout: this.apiTimeout * 1000,
615617
});
616-
617618
const message = response.data;
618619
message.statusCode = response.status;
619620
return message;
620621
} catch (error) {
622+
this.logger.debug("Hel00000000002");
623+
this.logger.debug(`error: ${JSON.stringify(error)}`);
621624
const err = error.response
622625
? error.response.data
623626
: { message: error.message };
627+
this.logger.debug(`error: ${JSON.stringify(err)}`);
624628
err.statusCode = error.response ? error.response.status : -1;
625629
throw new LLMWhispererClientException(err.message, err.statusCode);
626630
}

package-lock.json

Lines changed: 23 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "llmwhisperer-client",
3-
"version": "2.0.1",
3+
"version": "2.1.0",
44
"description": "LLMWhisper JS Client",
55
"main": "index.js",
66
"scripts": {
@@ -14,11 +14,13 @@
1414
"license": "MIT",
1515
"dependencies": {
1616
"axios": "~1.7.2",
17+
"llmwhisperer-client": "^2.0.1",
18+
"string-similarity": "^4.0.4",
1719
"winston": "~3.13.0"
1820
},
1921
"devDependencies": {
2022
"@eslint/js": "^9.4.0",
21-
"dotenv": "^16.4.5",
23+
"dotenv": "^16.4.7",
2224
"eslint": "^9.4.0",
2325
"eslint-config-prettier": "^9.1.0",
2426
"eslint-config-turbo": "^1.13.3",

sample.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2
2+
LLMWHISPERER_LOG_LEVEL=DEBUG
3+
LLMWHISPERER_API_KEY=

test/data/test.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"test": "HelloWorld"}

test/sample.env

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
export LLMWHISPERER_BASE_URL=https://llmwhisperer-api.unstract.com/v1
2-
export LLMWHISPERER_LOG_LEVEL=DEBUG
3-
export LLMWHISPERER_API_KEY=
1+
LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2
2+
LLMWHISPERER_LOG_LEVEL=DEBUG
3+
LLMWHISPERER_API_KEY=

test/test.js

Lines changed: 28 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
12
const fs = require("fs");
23
const path = require("path");
3-
const LLMWhispererClient = require("../index").LLMWhispererClient;
4-
const client = new LLMWhispererClient({
5-
apiKey: process.env.LLMWHISPERER_API_KEY,
6-
});
7-
describe("LLMWhispererClient", () => {
4+
const stringSimilarity = require("string-similarity");
5+
const LLMWhispererClientV2 = require("../index").LLMWhispererClientV2;
6+
7+
const client = new LLMWhispererClientV2();
8+
9+
describe("LLMWhispererClientV2", () => {
810
test("get_usage_info", async () => {
911
const usage_info = await client.getUsageInfo();
1012
console.info(usage_info);
@@ -22,74 +24,44 @@ describe("LLMWhispererClient", () => {
2224
);
2325
});
2426

27+
2528
const test_cases = [
26-
["ocr", "line-printer", "restaurant_invoice_photo.pdf"],
27-
["ocr", "line-printer", "credit_card.pdf"],
28-
["ocr", "line-printer", "handwritten-form.pdf"],
29-
["ocr", "text", "restaurant_invoice_photo.pdf"],
30-
["text", "line-printer", "restaurant_invoice_photo.pdf"],
31-
["text", "text", "handwritten-form.pdf"],
29+
["high_quality", "layout_preserving", "restaurant_invoice_photo.pdf", 99],
30+
["native_text", "layout_preserving", "credit_card.pdf", 99],
31+
["form", "layout_preserving", "handwritten-form.pdf", 99],
32+
["high_quality", "layout_preserving", "handwritten-form.pdf", 80],
3233
];
3334

3435
test.each(test_cases)(
3536
"whisper(%s, %s, %s)",
36-
async (processing_mode, output_mode, input_file) => {
37+
async (mode, output_mode, input_file, percent_simlarity) => {
3738
const data_dir = path.join(__dirname, "data");
3839
const file_path = path.join(data_dir, input_file);
3940
const response = await client.whisper({
40-
processingMode: processing_mode,
41+
mode: mode,
4142
outputMode: output_mode,
4243
filePath: file_path,
4344
timeout: 200,
45+
waitForCompletion: true
4446
});
45-
console.debug(response);
4647

47-
const exp_basename = `${path.parse(input_file).name}.${processing_mode}.${output_mode}.txt`;
48+
49+
const exp_basename = `${path.parse(input_file).name}.${mode}.${output_mode}.txt`;
4850
const exp_file = path.join(data_dir, "expected", exp_basename);
49-
const exp = await fs.promises.readFile(exp_file, "utf-8");
51+
const expected_text = await fs.promises.readFile(exp_file, "utf-8");
5052

5153
expect(typeof response).toBe("object");
52-
expect(response.statusCode).toBe(200);
53-
// expect(response.extracted_text).toBe(exp);
54-
},
55-
200000,
56-
);
5754

58-
// TODO: Review and port to Jest based tests
59-
test.skip("whisper", () => {
60-
// response = client.whisper(
61-
// 'https://storage.googleapis.com/pandora-static/samples/bill.jpg.pdf'
62-
// );
63-
const response = client.whisper("test_files/restaurant_invoice_photo.pdf", {
64-
timeout: 200,
65-
store_metadata_for_highlighting: true,
66-
});
67-
console.info(response);
68-
// expect(typeof response).toBe('object');
69-
});
55+
const extracted_text = response.extraction.result_text
7056

71-
test.skip("whisper_status", () => {
72-
const response = client.whisper_status(
73-
"7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a",
74-
);
75-
console.info(response);
76-
expect(typeof response).toBe("object");
77-
});
57+
console.log(`Extracted Text: ${extracted_text}`);
58+
expect(response.status_code).toBe(200);
59+
const similarity = stringSimilarity.compareTwoStrings(extracted_text, expected_text);
60+
console.log(`Similarity: ${(similarity * 100).toFixed(2)}%`);
61+
expect(similarity * 100).toBeGreaterThan(percent_simlarity); // Expect at least 80% match
7862

79-
test.skip("whisper_retrieve", () => {
80-
const response = client.whisper_retrieve(
81-
"7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a",
82-
);
83-
console.info(response);
84-
expect(typeof response).toBe("object");
85-
});
86-
87-
test.skip("whisper_highlight_data", () => {
88-
const response = client.highlight_data(
89-
"9924d865|5f1d285a7cf18d203de7af1a1abb0a3a",
90-
"Indiranagar",
91-
);
92-
console.info(response);
93-
expect(typeof response).toBe("object");
94-
});
63+
},
64+
200000,
65+
);
9566
});
67+

0 commit comments

Comments
 (0)