Zipstack · jagadeeswaran-zipstack · Jan 9, 2025 · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -55,6 +55,7 @@ dependencies = [
     "httpx>=0.25.2",
     "pdfplumber>=0.11.2",
     "redis>=5.2.1",
+    "llmwhisperer-client>=2.1.0",
 ]
 readme = "README.md"
 urls = { Homepage = "https://unstract.com", "Release notes" = "https://github.com/Zipstack/unstract-sdk/releases", Source = "https://github.com/Zipstack/unstract-sdk" }

diff --git a/src/unstract/sdk/__init__.py b/src/unstract/sdk/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.57.0rc3"
+__version__ = "0.58.0rc1"
 
 
 def get_sdk_version():

diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer/README.md b/src/unstract/sdk/adapters/x2text/llm_whisperer/README.md
@@ -4,7 +4,55 @@
 
 The below env variables are resolved by LLMWhisperer adapter
 
-| Variable                     | Description                                                                                  |
-| ---------------------------- | -------------------------------------------------------------------------------------------- |
-| `ADAPTER_LLMW_POLL_INTERVAL` | Time in seconds to wait before polling LLMWhisperer's status API. Defaults to 30s            |
-| `ADAPTER_LLMW_MAX_POLLS`     | Total number of times to poll the status API. Defaults to 30                                 |
+| Variable                     | Description                                                                       |
+| ---------------------------- | --------------------------------------------------------------------------------- |
+| `ADAPTER_LLMW_POLL_INTERVAL` | Time in seconds to wait before polling LLMWhisperer's status API. Defaults to 30s |
+| `ADAPTER_LLMW_MAX_POLLS`     | Total number of times to poll the status API. Defaults to 30                      |
+
+---
+
+## id: llm_whisperer_apis_changelog
+
+# Changelog
+
+## Version 2.0.0
+
+:::warning
+This version of the API is not backward compatible with the previous version.
+:::
+
+### API endpoint
+
+- The base URL for the **V2** APIs is `https://llmwhisperer-api.unstract.com/api/v2`
+
+### Global change in parameter naming
+
+- All use of `whisper-hash` as a parameter has been replaced with `whisper_hash` for consistency.
+
+### Whisper parameters
+
+#### Added
+
+- `mode` (str, optional): The processing mode.
+- `mark_vertical_lines` (bool, optional): Whether to reproduce vertical lines in the document.
+- `mark_horizontal_lines` (bool, optional): Whether to reproduce horizontal lines in the document.
+- `line_splitter_strategy` (str, optional): The line splitter strategy to use. An advanced option for customizing the line splitting process.
+- `lang` (str, optional): The language of the document.
+- `tag` (str, optional): A tag to associate with the document. Used for auditing and tracking purposes.
+- `file_name` (str, optional): The name of the file being processed. Used for auditing and tracking purposes.
+- `use_webhook` (str, optional): The name of the webhook to call after the document is processed.
+- `webhook_metadata` (str, optional): Metadata to send to the webhook after the document is processed.
+
+#### Removed
+
+- `timeout` (int, optional): The timeout for API requests. _There is no sync mode now. All requests are async._
+- `force_text_processing` (bool, optional): Whether to force text processing. _This is feature is removed_
+- `ocr_provider` (str, optional): The OCR provider to use. _This is superseded by `mode`_
+- `processing_mode` (str, optional): The processing mode. _This is superseded by `mode`_
+- `store_metadata_for_highlighting` (bool, optional): Whether to store metadata for highlighting. _Feature is removed. Data still available and set back when retrieve is called_
+
+### New features
+
+#### Webhooks
+
+- Added support for webhooks. You can now register a webhook and use it to receive the processed document.
diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py b/src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py
@@ -18,6 +18,7 @@ class OutputModes(Enum):
     LINE_PRINTER = "line-printer"
     DUMP_TEXT = "dump-text"
     TEXT = "text"
+    LAYOUT_PRESERVING = "layout_preserving"
 
 
 class HTTPMethod(Enum):
@@ -48,10 +49,16 @@ class WhispererEnv:
             LLMWhisperer's status API. Defaults to 30s
         MAX_POLLS: Total number of times to poll the status API.
             Set to -1 to poll indefinitely. Defaults to -1
+        STATUS_RETRIES: Number of times to retry calling LLLMWhisperer's status API
+            on failure during polling. Defaults to 5.
     """
 
     POLL_INTERVAL = "ADAPTER_LLMW_POLL_INTERVAL"
     MAX_POLLS = "ADAPTER_LLMW_MAX_POLLS"
+    POLL_INTERVAL_V2 = "ADAPTER_LLMW_POLL_INTERVAL_V2"
+    MAX_POLLS_V2 = "ADAPTER_LLMW_MAX_POLLS_V2"
+    STATUS_RETRIES = "ADAPTER_LLMW_STATUS_RETRIES"
+    WAIT_TIMEOUT = "ADAPTER_LLMW_WAIT_TIMEOUT"
 
 
 class WhispererConfig:
@@ -66,6 +73,7 @@ class WhispererConfig:
     GAUSSIAN_BLUR_RADIUS = "gaussian_blur_radius"
     FORCE_TEXT_PROCESSING = "force_text_processing"
     LINE_SPLITTER_TOLERANCE = "line_splitter_tolerance"
+    LINE_SPLITTER_STRATEGY = "line_spitter_strategy"
     HORIZONTAL_STRETCH_FACTOR = "horizontal_stretch_factor"
     PAGES_TO_EXTRACT = "pages_to_extract"
     STORE_METADATA_FOR_HIGHLIGHTING = "store_metadata_for_highlighting"
@@ -74,6 +82,13 @@ class WhispererConfig:
     PAGE_SEPARATOR = "page_seperator"
     MARK_VERTICAL_LINES = "mark_vertical_lines"
     MARK_HORIZONTAL_LINES = "mark_horizontal_lines"
+    TAG = "tag"
+    USE_WEBHOOK = "use_webhook"
+    WEBHOOK_METADATA = "webhook_metadata"
+    TEXT_ONLY = "text_only"
+    VERSION = "version"
+    WAIT_TIMEOUT = "wait_timeout"
+    WAIT_FOR_COMPLETION = "wait_for_completion"
 
 
 class WhisperStatus:
@@ -86,6 +101,7 @@ class WhisperStatus:
     # Used for async processing
     WHISPER_HASH = "whisper-hash"
     STATUS = "status"
+    WHISPER_HASH_V2 = "whisper_hash"
 
 
 class WhispererDefaults:
@@ -95,12 +111,19 @@ class WhispererDefaults:
     GAUSSIAN_BLUR_RADIUS = 0.0
     FORCE_TEXT_PROCESSING = False
     LINE_SPLITTER_TOLERANCE = 0.75
+    LINE_SPLITTER_STRATEGY = "left-priority"
     HORIZONTAL_STRETCH_FACTOR = 1.0
-    POLL_INTERVAL = int(os.getenv(WhispererEnv.POLL_INTERVAL, 30))
-    MAX_POLLS = int(os.getenv(WhispererEnv.MAX_POLLS, 30))
     PAGES_TO_EXTRACT = ""
     ADD_LINE_NOS = True
     OUTPUT_JSON = True
     PAGE_SEPARATOR = "<<< >>>"
     MARK_VERTICAL_LINES = False
     MARK_HORIZONTAL_LINES = False
+    URL_IN_POST = False
+    TAG = "default"
+    TEXT_ONLY = False
+    WAIT_TIMEOUT = int(os.getenv(WhispererEnv.WAIT_TIMEOUT, 300))
+    WAIT_FOR_COMPLETION = True
+    POLL_INTERVAL = int(os.getenv(WhispererEnv.POLL_INTERVAL, 30))
+    MAX_POLLS = int(os.getenv(WhispererEnv.MAX_POLLS, 30))
+    STATUS_RETRIES = int(os.getenv(WhispererEnv.STATUS_RETRIES, 5))
diff --git a/...apters/x2text/llm_whisperer_v2/src/dto.py → .../adapters/x2text/llm_whisperer/src/dto.py b/...apters/x2text/llm_whisperer_v2/src/dto.py → .../adapters/x2text/llm_whisperer/src/dto.py
@@ -17,4 +17,4 @@ class WhispererRequestParams:
     def __post_init__(self) -> None:
         # TODO: Allow list of tags once its supported in LLMW v2
         if isinstance(self.tag, list):
-            self.tag = self.tag[0] if self.tag else None
+            self.tag = self.tag[0] if self.tag else None
diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer/src/helper.py b/src/unstract/sdk/adapters/x2text/llm_whisperer/src/helper.py
@@ -0,0 +1,90 @@
+import logging
+from typing import Any
+from unstract.sdk.adapters.x2text.llm_whisperer.src.constants import (
+    Modes,
+    OutputModes,
+    WhispererConfig,
+    WhispererDefaults,
+)
+from unstract.sdk.adapters.x2text.llm_whisperer.src.dto import WhispererRequestParams
+
+logger = logging.getLogger(__name__)
+
+
+class LLMWhispererHelper:
+
+    @staticmethod
+    def get_whisperer_params(
+        config: dict[str, Any], extra_params: WhispererRequestParams
+    ) -> dict[str, Any]:
+        """Gets query params meant for /whisper endpoint.
+
+        The params is filled based on the configuration passed.
+
+        Returns:
+            dict[str, Any]: Query params
+        """
+        params = {
+            WhispererConfig.MODE: config.get(WhispererConfig.MODE, Modes.FORM.value),
+            WhispererConfig.OUTPUT_MODE: config.get(
+                WhispererConfig.OUTPUT_MODE, OutputModes.LAYOUT_PRESERVING.value
+            ),
+            WhispererConfig.LINE_SPLITTER_TOLERANCE: config.get(
+                WhispererConfig.LINE_SPLITTER_TOLERANCE,
+                WhispererDefaults.LINE_SPLITTER_TOLERANCE,
+            ),
+            WhispererConfig.LINE_SPLITTER_STRATEGY: config.get(
+                WhispererConfig.LINE_SPLITTER_STRATEGY,
+                WhispererDefaults.LINE_SPLITTER_STRATEGY,
+            ),
+            WhispererConfig.HORIZONTAL_STRETCH_FACTOR: config.get(
+                WhispererConfig.HORIZONTAL_STRETCH_FACTOR,
+                WhispererDefaults.HORIZONTAL_STRETCH_FACTOR,
+            ),
+            WhispererConfig.PAGES_TO_EXTRACT: config.get(
+                WhispererConfig.PAGES_TO_EXTRACT,
+                WhispererDefaults.PAGES_TO_EXTRACT,
+            ),
+            WhispererConfig.MARK_VERTICAL_LINES: config.get(
+                WhispererConfig.MARK_VERTICAL_LINES,
+                WhispererDefaults.MARK_VERTICAL_LINES,
+            ),
+            WhispererConfig.MARK_HORIZONTAL_LINES: config.get(
+                WhispererConfig.MARK_HORIZONTAL_LINES,
+                WhispererDefaults.MARK_HORIZONTAL_LINES,
+            ),
+            WhispererConfig.PAGE_SEPARATOR: config.get(
+                WhispererConfig.PAGE_SEPARATOR,
+                WhispererDefaults.PAGE_SEPARATOR,
+            ),
+            # Not providing default value to maintain legacy compatablity
+            # these are optional params and identifiers for audit
+            WhispererConfig.TAG: extra_params.tag
+            or config.get(
+                WhispererConfig.TAG,
+                WhispererDefaults.TAG,
+            ),
+            WhispererConfig.USE_WEBHOOK: config.get(WhispererConfig.USE_WEBHOOK),
+            WhispererConfig.WEBHOOK_METADATA: config.get(
+                WhispererConfig.WEBHOOK_METADATA
+            ),
+            WhispererConfig.WAIT_TIMEOUT: config.get(
+                WhispererConfig.WAIT_TIMEOUT,
+                WhispererDefaults.WAIT_TIMEOUT,
+            ),
+            WhispererConfig.WAIT_FOR_COMPLETION: WhispererDefaults.WAIT_FOR_COMPLETION,
+        }
+        if params[WhispererConfig.MODE] == Modes.LOW_COST.value:
+            params.update(
+                {
+                    WhispererConfig.MEDIAN_FILTER_SIZE: config.get(
+                        WhispererConfig.MEDIAN_FILTER_SIZE,
+                        WhispererDefaults.MEDIAN_FILTER_SIZE,
+                    ),
+                    WhispererConfig.GAUSSIAN_BLUR_RADIUS: config.get(
+                        WhispererConfig.GAUSSIAN_BLUR_RADIUS,
+                        WhispererDefaults.GAUSSIAN_BLUR_RADIUS,
+                    ),
+                }
+            )
+        return params