diff --git a/pyproject.toml b/pyproject.toml index f483a2d..9adc14e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dependencies = [ "scipy>=1.14.1", "nmrglue>=0.11", "navani @ git+https://github.com/the-grey-group/navani.git", + "h5py", ] [project.optional-dependencies] local = ["datalab-api >= 0.2.8"] diff --git a/src/datalab_app_plugin_insitu/blocks.py b/src/datalab_app_plugin_insitu/blocks.py index a0ec85e..6f822fc 100644 --- a/src/datalab_app_plugin_insitu/blocks.py +++ b/src/datalab_app_plugin_insitu/blocks.py @@ -1,8 +1,9 @@ +import json import logging import os import zipfile from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Tuple import bokeh.embed import numpy as np @@ -22,6 +23,12 @@ from bokeh.plotting import figure from .nmr_insitu import process_local_data +from .utils import ( + load_data_from_cache, + load_plot_from_cache, + save_data_to_cache, + save_plot_to_cache, +) try: from pydatalab.blocks.base import DataBlock @@ -64,6 +71,9 @@ def __init__(self, *args, **kwargs): for key, value in self.defaults.items(): self.data.setdefault(key, value) + self._figures_cache = None + self._plot_data_cache = None + @property def plot_functions(self): return (self.generate_insitu_nmr_plot,) @@ -82,7 +92,6 @@ def get_available_folders(self) -> List[str]: raise ValueError("No file_id in data") main_folder = self.data.get("folder_name") - LOGGER.info(f"Main folder name: {main_folder}") if not main_folder: raise ValueError("Main folder name not specified") @@ -90,7 +99,6 @@ def get_available_folders(self) -> List[str]: try: file_info = get_file_info_by_id(self.data["file_id"]) file_path = file_info.get("location") - LOGGER.info(f"File path: {file_path}") if not file_path or not os.path.exists(file_path): raise FileNotFoundError(f"File not found: {file_path}") @@ -107,7 +115,6 @@ def get_available_folders(self) -> List[str]: folders.add(sub_folder) folder_list = sorted(list(folders)) - LOGGER.info(f"Found folders in '{main_folder}': {folder_list}") return folder_list except Exception as e: @@ -122,75 +129,118 @@ def process_and_store_data(self) -> bool: Returns: bool: True if processing was successful, False otherwise. + + Raises: + ValueError: If required parameters are missing + FileNotFoundError: If required files are not found + RuntimeError: For other processing errors """ + folders = self.get_available_folders() self.data["available_folders"] = folders nmr_folder_name = self.data.get("nmr_folder_name") echem_folder_name = self.data.get("echem_folder_name") + file_id = self.data.get("file_id") - file_path = get_file_info_by_id(self.data["file_id"])["location"] + if not file_id: + raise ValueError("No file ID specified") - if not nmr_folder_name or not echem_folder_name: - raise ValueError("Both NMR and Echem folder names must be specified") + cached_data = load_data_from_cache(file_id) - try: - nmr_folder_name = self.data.get("nmr_folder_name") - echem_folder_name = self.data.get("echem_folder_name") + if cached_data: + ppm_values = np.array(cached_data["nmr_spectra"]["ppm"]) + min_ppm = min(ppm_values) + max_ppm = max(ppm_values) + + self.data.update( + { + "metadata": cached_data["metadata"], + "ppm1": min_ppm, + "ppm2": max_ppm, + "processing_params": { + "ppm1": float(self.data.get("ppm1", min_ppm)), + "ppm2": float(self.data.get("ppm2", max_ppm)), + "file_id": file_id, + "start_exp": int(self.data.get("start_exp", self.defaults["start_exp"])), + "exclude_exp": self.data.get("exclude_exp", self.defaults["exclude_exp"]), + }, + "cached_data_id": file_id, + } + ) - if not all([nmr_folder_name, echem_folder_name]): - raise ValueError("Both NMR and Echem folder names are required") + return True - start_exp = int(self.data.get("start_exp", self.defaults["start_exp"])) - exclude_exp = self.data.get("exclude_exp", self.defaults["exclude_exp"]) + file_info = get_file_info_by_id(file_id) + if not file_info: + raise ValueError(f"No file info found for ID: {file_id}") - try: - result = process_local_data( - folder_name=file_path, - nmr_folder_name=nmr_folder_name, - echem_folder_name=echem_folder_name, - start_at=start_exp, - exclude_exp=exclude_exp, - ) + file_path = file_info.get("location") + if not file_path: + raise ValueError(f"No file location found for ID: {file_id}") + + if not nmr_folder_name or not echem_folder_name: + raise ValueError("Both NMR and Echem folder names must be specified") + + if not all([nmr_folder_name, echem_folder_name]): + raise ValueError("Both NMR and Echem folder names are required") + + start_exp = int(self.data.get("start_exp", self.defaults["start_exp"])) + exclude_exp = self.data.get("exclude_exp", self.defaults["exclude_exp"]) - except FileNotFoundError as e: - raise FileNotFoundError(f"Folder not found: {str(e)}") + try: + result = process_local_data( + folder_name=file_path, + nmr_folder_name=nmr_folder_name, + echem_folder_name=echem_folder_name, + start_at=start_exp, + exclude_exp=exclude_exp, + ) - except Exception as e: - raise RuntimeError(f"Error processing data: {str(e)}") + if result is None: + raise RuntimeError("Processing returned None instead of expected data") - nmr_data = result["nmr_spectra"] - ppm_values = np.array(nmr_data.get("ppm", [])) + except FileNotFoundError as e: + raise FileNotFoundError(f"Folder not found: {str(e)}") + except Exception as e: + raise RuntimeError(f"Error processing data: {str(e)}") - ppm1 = self.data["ppm1"] = min(ppm_values) - ppm2 = self.data["ppm2"] = max(ppm_values) + try: + save_data_to_cache(file_id, result) + except Exception as e: + raise RuntimeError(f"Error saving data to cache: {str(e)}") + try: + ppm_values = np.array(result["nmr_spectra"]["ppm"]) self.data.update( { - "nmr_data": result["nmr_spectra"], - "echem_data": result.get("echem", {}), "metadata": result["metadata"], + "ppm1": min(ppm_values), + "ppm2": max(ppm_values), "processing_params": { - "ppm1": ppm1, - "ppm2": ppm2, - "file_id": self.data.get("file_id"), + "ppm1": min(ppm_values), + "ppm2": max(ppm_values), + "file_id": file_id, "start_exp": start_exp, "exclude_exp": exclude_exp, }, + "cached_data_id": file_id, } ) return True - except Exception as e: - raise RuntimeError(f"Error processing data: {str(e)}") + raise RuntimeError(f"Error updating data attributes: {str(e)}") def should_reprocess_data(self) -> bool: """ Determine if data needs to be reprocessed based on parameter changes. PPM changes should not trigger reprocessing. """ - if "processing_params" not in self.data or "nmr_data" not in self.data: + if not self.data.get("nmr_folder_name") or not self.data.get("echem_folder_name"): + return False + + if "processing_params" not in self.data or "cached_data_id" not in self.data: return True params = self.data["processing_params"] @@ -202,7 +252,6 @@ def should_reprocess_data(self) -> bool: for key in current_params: if params.get(key) != current_params[key]: - LOGGER.info(f"Parameter {key} changed, reprocessing data...") return True return False @@ -216,116 +265,161 @@ def generate_insitu_nmr_plot(self) -> Tuple[pd.DataFrame, List[str]]: Returns: Tuple[pd.DataFrame, List[str]]: Time data and status messages. + + Raises: + ValueError: If required parameters are missing or invalid + FileNotFoundError: If required files are not found + RuntimeError: For other processing errors """ if "file_id" not in self.data: - raise ValueError("No file set in the DataBlock") + return None, ["No file set. Please select a file first."] - try: - file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True) - if Path(file_info["location"]).suffix.lower() not in self.accepted_file_extensions: - raise ValueError( - f"Unsupported file extension (must be one of {self.accepted_file_extensions})" - ) - - needs_reprocessing = self.should_reprocess_data() - if needs_reprocessing: - LOGGER.info("Processing new data...") - if not self.process_and_store_data(): - return None, [] - else: - LOGGER.info("Using stored data...") - self.data["processing_params"]["ppm1"] = float( - self.data.get("ppm1", self.defaults["ppm1"]) - ) - self.data["processing_params"]["ppm2"] = float( - self.data.get("ppm2", self.defaults["ppm2"]) - ) - - plot_data = self._prepare_plot_data() - if not plot_data: - return None, [] - - shared_ranges = self._create_shared_ranges(plot_data) - - heatmap_figure = self._create_heatmap_figure(plot_data, shared_ranges) - nmrplot_figure = self._create_nmr_line_figure(plot_data, shared_ranges) - echemplot_figure = self._create_echem_figure(plot_data, shared_ranges) - - heatmap_figure.js_on_event( - DoubleTap, CustomJS(args=dict(p=heatmap_figure), code="p.reset.emit()") + nmr_folder_name = self.data.get("nmr_folder_name") + echem_folder_name = self.data.get("echem_folder_name") + + if not nmr_folder_name or not echem_folder_name: + folders = self.get_available_folders() + self.data["available_folders"] = folders + return None, ["Please select both NMR and Echem folders before processing."] + + file_id = self.data.get("file_id") + nmr_folder_name = self.data.get("nmr_folder_name") + echem_folder_name = self.data.get("echem_folder_name") + + cache_key = f"{file_id}_{nmr_folder_name}_{echem_folder_name}" + + cached_plot = load_plot_from_cache(cache_key) + if cached_plot: + self.data["bokeh_plot_data"] = cached_plot + return self.data.get("time_data"), ["Plot loaded from cache"] + + file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True) + if not file_info: + raise ValueError(f"No file info found for ID: {self.data['file_id']}") + + if Path(file_info["location"]).suffix.lower() not in self.accepted_file_extensions: + raise ValueError( + f"Unsupported file extension (must be one of {self.accepted_file_extensions})" ) - nmrplot_figure.js_on_event( - DoubleTap, CustomJS(args=dict(p=nmrplot_figure), code="p.reset.emit()") + + needs_reprocessing = self.should_reprocess_data() + + if needs_reprocessing: + self.process_and_store_data() + else: + self.data["processing_params"]["ppm1"] = float( + self.data.get("ppm1", self.defaults["ppm1"]) ) - echemplot_figure.js_on_event( - DoubleTap, CustomJS(args=dict(p=echemplot_figure), code="p.reset.emit()") + self.data["processing_params"]["ppm2"] = float( + self.data.get("ppm2", self.defaults["ppm2"]) ) - self._link_plots(heatmap_figure, nmrplot_figure, echemplot_figure, plot_data) + plot_data = self._prepare_plot_data() + + shared_ranges = self._create_shared_ranges(plot_data) + + heatmap_figure = self._create_heatmap_figure(plot_data, shared_ranges) + nmrplot_figure = self._create_nmr_line_figure(plot_data, shared_ranges) + echemplot_figure = self._create_echem_figure(plot_data, shared_ranges) + + heatmap_figure.js_on_event( + DoubleTap, CustomJS(args=dict(p=heatmap_figure), code="p.reset.emit()") + ) + nmrplot_figure.js_on_event( + DoubleTap, CustomJS(args=dict(p=nmrplot_figure), code="p.reset.emit()") + ) + echemplot_figure.js_on_event( + DoubleTap, CustomJS(args=dict(p=echemplot_figure), code="p.reset.emit()") + ) + + self._link_plots(heatmap_figure, nmrplot_figure, echemplot_figure, plot_data) + + grid = [[None, nmrplot_figure], [echemplot_figure, heatmap_figure]] + gp = gridplot(grid, merge_tools=True, toolbar_location="right") - grid = [[None, nmrplot_figure], [echemplot_figure, heatmap_figure]] - gp = gridplot(grid, merge_tools=True) + try: + if DATALAB_BOKEH_THEME is not None: + json_item = bokeh.embed.json_item(gp, theme=DATALAB_BOKEH_THEME) + else: + json_item = bokeh.embed.json_item(gp) - self.data["bokeh_plot_data"] = bokeh.embed.json_item(gp, theme=DATALAB_BOKEH_THEME) + except Exception: + try: + from bokeh.core.json_encoder import serialize_json - return self.data.get("time_data"), ["Plot successfully generated"] + if DATALAB_BOKEH_THEME is not None: + serialized = serialize_json(gp, theme=DATALAB_BOKEH_THEME) + else: + serialized = serialize_json(gp) + json_item = json.loads(serialized) + except Exception as inner_e: + raise ValueError(f"Alternative serialization failed: {str(inner_e)}") + + try: + save_plot_to_cache(cache_key, json_item) except Exception as e: - raise RuntimeError(f"Failed to generate insitu NMR plot: {str(e)}") + raise ValueError(f"Error caching plot: {str(e)}") + + self.data["bokeh_plot_data"] = json_item + + return self.data.get("time_data"), ["Plot successfully generated"] - def _prepare_plot_data(self) -> Optional[Dict[str, Any]]: + def _prepare_plot_data(self) -> Dict[str, Any]: """ Extract and prepare data for plotting. Returns: - Optional[Dict[str, Any]]: Dictionary containing prepared plot data, - or None if data extraction fails. + Dict[str, Any]: Dictionary containing prepared plot data + + Raises: + ValueError: If required data is missing or invalid + RuntimeError: For processing errors """ - try: - nmr_data = self.data["nmr_data"] - echem_data = self.data["echem_data"] - metadata = self.data["metadata"] + file_id = self.data.get("cached_data_id") + if not file_id: + raise ValueError("No cached data ID found") - ppm_values = np.array(nmr_data.get("ppm", [])) - if len(ppm_values) == 0: - raise ValueError("No PPM values found in NMR data") + cached_data = load_data_from_cache(file_id) + if not cached_data: + raise ValueError(f"No cached data found for ID: {file_id}") - spectra = nmr_data.get("spectra", []) - if not spectra: - raise ValueError("No spectra found in NMR data") + nmr_data = cached_data["nmr_spectra"] + echem_data = cached_data.get("echem", {}) + metadata = cached_data["metadata"] - try: - spectra_intensities = [ - np.array(spectrum["intensity"]).tolist() for spectrum in spectra - ] - - intensity_matrix = np.array( - [np.array(spectrum["intensity"]) for spectrum in spectra] - ) - - except Exception as e: - raise ValueError(f"Error processing spectrum intensities: {e}") - - time_range = metadata["time_range"] - first_spectrum_intensities = np.array(spectra[0]["intensity"]) - - intensity_min = np.min(intensity_matrix) - intensity_max = np.max(intensity_matrix) - - return { - "ppm_values": ppm_values, - "spectra": spectra, - "spectra_intensities": spectra_intensities, - "intensity_matrix": intensity_matrix, - "time_range": time_range, - "first_spectrum_intensities": first_spectrum_intensities, - "intensity_min": intensity_min, - "intensity_max": intensity_max, - "echem_data": echem_data, - } + ppm_values = np.array(nmr_data.get("ppm", [])) + if len(ppm_values) == 0: + raise ValueError("No PPM values found in NMR data") + + spectra = nmr_data.get("spectra", []) + if not spectra: + raise ValueError("No spectra found in NMR data") + + try: + spectra_intensities = [np.array(spectrum["intensity"]).tolist() for spectrum in spectra] + intensity_matrix = np.array([np.array(spectrum["intensity"]) for spectrum in spectra]) except Exception as e: - raise RuntimeError(f"Error preparing plot data: {str(e)}") + raise RuntimeError(f"Error processing spectrum intensities: {str(e)}") + + time_range = metadata["time_range"] + first_spectrum_intensities = np.array(spectra[0]["intensity"]) + + intensity_min = np.min(intensity_matrix) + intensity_max = np.max(intensity_matrix) + + return { + "ppm_values": ppm_values, + "spectra": spectra, + "spectra_intensities": spectra_intensities, + "intensity_matrix": intensity_matrix, + "time_range": time_range, + "first_spectrum_intensities": first_spectrum_intensities, + "intensity_min": intensity_min, + "intensity_max": intensity_max, + "echem_data": echem_data, + } def _create_shared_ranges(self, plot_data: Dict[str, Any]) -> Dict[str, Range1d]: """ @@ -391,6 +485,7 @@ def _create_heatmap_figure( y_range=ranges["shared_y_range"], height=400, tools=tools, + toolbar_location=None, ) color_mapper = LinearColorMapper(palette="Turbo256", low=intensity_min, high=intensity_max) @@ -490,6 +585,7 @@ def _create_nmr_line_figure( x_range=ranges["shared_x_range"], y_range=ranges["intensity_range"], tools=tools, + toolbar_location=None, ) nmrplot_figure.line( @@ -539,6 +635,7 @@ def _create_echem_figure(self, plot_data: Dict[str, Any], ranges: Dict[str, Rang height=400, width=250, tools=tools, + toolbar_location=None, ) if echem_data and "Voltage" in echem_data and "time" in echem_data: @@ -585,13 +682,8 @@ def _link_plots( ) -> None: """ Link the plots together with interactive tools and callbacks. - - Args: - heatmap_figure: The heatmap figure component - nmrplot_figure: The NMR line plot figure component - echemplot_figure: The electrochemical figure component - plot_data: Dictionary containing prepared plot data """ + line_source = plot_data["line_source"] clicked_spectra_source = plot_data["clicked_spectra_source"] spectra_intensities = plot_data["spectra_intensities"] @@ -599,6 +691,11 @@ def _link_plots( intensity_matrix = plot_data["intensity_matrix"] heatmap_source = plot_data.get("heatmap_source") + ppm_list = ppm_values.tolist() if isinstance(ppm_values, np.ndarray) else ppm_values + + global_min = float(np.min(intensity_matrix)) + global_max = float(np.max(intensity_matrix)) + colors = [ "red", "green", @@ -656,42 +753,34 @@ def _link_plots( heatmap_source=heatmap_source, clicked_spectra_source=clicked_spectra_source, spectra_intensities=spectra_intensities, - ppm_values=ppm_values.tolist(), + ppm_values=ppm_list, colors=colors, ), code=""" - const indices = cb_obj.indices; - if (indices.length === 0) return; - - const index = indices[0]; - const exp_num = heatmap_source.data.exp_num[index]; - - const existing_indices = clicked_spectra_source.data.exp_index; - if (existing_indices.includes(exp_num)) return; - - const color_index = existing_indices.length % colors.length; - - const new_xs = [...clicked_spectra_source.data['δ (ppm)']]; - const new_ys = [...clicked_spectra_source.data.intensity]; - const new_indices = [...clicked_spectra_source.data.exp_index]; - const new_colors = [...clicked_spectra_source.data.color]; - - new_xs.push(ppm_values); - new_ys.push(spectra_intensities[index]); - new_indices.push(exp_num); - new_colors.push(colors[color_index]); - - clicked_spectra_source.data = { - 'δ (ppm)': new_xs, - 'intensity': new_ys, - 'exp_index': new_indices, - 'color': new_colors - }; - - clicked_spectra_source.change.emit(); - """, + const indices = cb_obj.indices; + if (indices.length === 0) return; + const index = indices[0]; + const exp_num = heatmap_source.data.exp_num[index]; + const existing_indices = clicked_spectra_source.data.exp_index; + if (existing_indices.includes(exp_num)) return; + const color_index = existing_indices.length % colors.length; + const new_xs = [...clicked_spectra_source.data['δ (ppm)']]; + const new_ys = [...clicked_spectra_source.data.intensity]; + const new_indices = [...clicked_spectra_source.data.exp_index]; + const new_colors = [...clicked_spectra_source.data.color]; + new_xs.push(ppm_values); + new_ys.push(spectra_intensities[index]); + new_indices.push(exp_num); + new_colors.push(colors[color_index]); + clicked_spectra_source.data = { + 'δ (ppm)': new_xs, + 'intensity': new_ys, + 'exp_index': new_indices, + 'color': new_colors + }; + clicked_spectra_source.change.emit(); + """, ) - heatmap_source.selected.js_on_change("indices", tap_callback) heatmap_figure.x_range.js_on_change( @@ -699,68 +788,28 @@ def _link_plots( CustomJS( args=dict( color_mapper=heatmap_figure.select_one(LinearColorMapper), - intensity_matrix=intensity_matrix.tolist(), - ppm_array=ppm_values.tolist(), - global_min=np.min(intensity_matrix), - global_max=np.max(intensity_matrix), + global_min=global_min, + global_max=global_max, ), code=""" - const start_index = ppm_array.findIndex(ppm => ppm <= cb_obj.end); - const end_index = ppm_array.findIndex(ppm => ppm <= cb_obj.start); - - if (start_index < 0 || end_index < 0 || start_index >= ppm_array.length || end_index >= ppm_array.length) { - color_mapper.low = global_min; - color_mapper.high = global_max; - return; - } - - if (Math.abs(end_index - start_index) < 5) { - return; - } - - let min_intensity = Infinity; - let max_intensity = -Infinity; - - for (let i = 0; i < intensity_matrix.length; i++) { - for (let j = Math.min(start_index, end_index); j <= Math.max(start_index, end_index); j++) { - if (j >= 0 && j < intensity_matrix[i].length) { - const value = intensity_matrix[i][j]; - min_intensity = Math.min(min_intensity, value); - max_intensity = Math.max(max_intensity, value); - } - } - } - - if (Math.abs(max_intensity - min_intensity) < 0.1 * Math.abs(global_max - global_min)) { - const padding = 0.1 * Math.abs(global_max - global_min); - min_intensity = Math.max(min_intensity - padding, global_min); - max_intensity = Math.min(max_intensity + padding, global_max); - } - - color_mapper.low = min_intensity; - color_mapper.high = max_intensity; - """, + color_mapper.low = global_min; + color_mapper.high = global_max; + """, ), ) - heatmap_figure.x_range.tags = [ppm_values.tolist(), intensity_matrix.tolist()] - line_y_range = nmrplot_figure.y_range line_y_range.js_link("start", heatmap_figure.select_one(LinearColorMapper), "low") line_y_range.js_link("end", heatmap_figure.select_one(LinearColorMapper), "high") tap_tool = TapTool() - nmrplot_figure.add_tools(tap_tool) - remove_line_callback = CustomJS( args=dict(clicked_spectra_source=clicked_spectra_source), code=""" const indices = clicked_spectra_source.selected.indices; if (indices.length === 0) return; - let data = clicked_spectra_source.data; - for (let i = indices.length - 1; i >= 0; i--) { let index = indices[i]; data['δ (ppm)'].splice(index, 1); @@ -768,9 +817,7 @@ def _link_plots( data['exp_index'].splice(index, 1); data['color'].splice(index, 1); } - clicked_spectra_source.change.emit(); - """, + """, ) - clicked_spectra_source.selected.js_on_change("indices", remove_line_callback) diff --git a/src/datalab_app_plugin_insitu/nmr_insitu.py b/src/datalab_app_plugin_insitu/nmr_insitu.py index 1a93cd3..20cc080 100644 --- a/src/datalab_app_plugin_insitu/nmr_insitu.py +++ b/src/datalab_app_plugin_insitu/nmr_insitu.py @@ -1,6 +1,5 @@ import os import tempfile -import warnings import zipfile from pathlib import Path from typing import Dict, List, Optional @@ -36,9 +35,6 @@ def process_local_data( if folder_name.endswith(".zip"): with zipfile.ZipFile(folder_name, "r") as zip_ref: zip_ref.extractall(tmpdir) - base_path = Path(tmpdir) - else: - base_path = Path(folder_name) folder_name = Path(folder_name).stem nmr_folder_name = Path(nmr_folder_name).stem @@ -50,10 +46,6 @@ def process_local_data( if not nmr_folder_path.exists(): raise FileNotFoundError(f"NMR folder not found: {nmr_folder_name}") - echem_folder_path = base_path / echem_folder_name if echem_folder_name else None - if echem_folder_path and not echem_folder_path.exists(): - warnings.warn(f"Echem folder not found: {echem_folder_name}") - return _process_data( Path(tmpdir) / folder_name, nmr_folder_path, diff --git a/src/datalab_app_plugin_insitu/utils.py b/src/datalab_app_plugin_insitu/utils.py index cd5a2f7..d7d141e 100644 --- a/src/datalab_app_plugin_insitu/utils.py +++ b/src/datalab_app_plugin_insitu/utils.py @@ -1,9 +1,11 @@ +import json +import os import re -import warnings from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Tuple +import h5py import nmrglue as ng import numpy as np import pandas as pd @@ -208,14 +210,12 @@ def process_echem_data(base_folder: Path, echem_folder_name: str) -> Optional[pd echem_folder_path = Path(base_folder) / echem_folder_name if not echem_folder_path.exists(): - warnings.warn(f"Echem folder not found at {echem_folder_path}") - return None + raise FileNotFoundError(f"Echem folder not found at {echem_folder_path}") mpr_files = [f for f in echem_folder_path.iterdir() if f.suffix.upper() == ".MPR"] if not mpr_files: - warnings.warn(f"No MPR files found in {echem_folder_path}") - return None + raise FileNotFoundError(f"No MPR files found in {echem_folder_path}") if len(mpr_files) == 1: file_to_process = mpr_files[0] @@ -252,6 +252,9 @@ def prepare_for_bokeh( ) -> Dict: """Prepare data for Bokeh visualization, with optional echem data.""" + if nmr_data is None or df is None: + raise ValueError("Required NMR data or integrated data is None") + result = { "metadata": { "time_range": {"start": df["time"].min(), "end": df["time"].max()}, @@ -339,7 +342,146 @@ def _process_data( process_echem_data(base_folder, echem_folder_name) if echem_folder_name else None ) - return prepare_for_bokeh(nmr_data, df, merged_df, num_experiments) + result = prepare_for_bokeh(nmr_data, df, merged_df, num_experiments) + + if result is None: + raise RuntimeError("prepare_for_bokeh returned None instead of expected data") + + return result except Exception as e: raise RuntimeError(f"Error in common processing: {str(e)}") + + +def get_cache_path(key: str) -> str: + """ + Get the path to the cache file for a given key. + If key is a composite key (e.g. "file_id_subfolder1_subfolder2"), + it extracts the file_id and uses it to determine the cache location. + Uses the parent directory of the original file location for cache storage. + """ + + file_id = key.split("_")[0] if "_" in key else key + + try: + from pydatalab.file_utils import get_file_info_by_id + + file_info = get_file_info_by_id(file_id) + if file_info and "location" in file_info: + file_location = Path(file_info["location"]) + cache_dir = file_location.parent / ".datalab_cache" + else: + cache_dir = Path(os.path.expanduser("~")) / ".datalab_cache" + except Exception: + cache_dir = Path(os.path.expanduser("~")) / ".datalab_cache" + + os.makedirs(cache_dir, exist_ok=True) + return str(cache_dir / key) + + +def save_data_to_cache(file_id: str, data: Dict) -> None: + """Save processed data to HDF5 cache file.""" + cache_path = get_cache_path(file_id) + + try: + os.makedirs(os.path.dirname(cache_path), exist_ok=True) + + with h5py.File(cache_path, "w") as f: + if "metadata" in data: + f.attrs["metadata"] = json.dumps(data["metadata"]) + + if "nmr_spectra" in data: + nmr_group = f.create_group("nmr_spectra") + nmr_group.create_dataset("ppm", data=data["nmr_spectra"]["ppm"]) + + spectra_group = nmr_group.create_group("spectra") + for i, spectrum in enumerate(data["nmr_spectra"]["spectra"]): + spectrum_group = spectra_group.create_group(str(i)) + spectrum_group.attrs["time"] = spectrum["time"] + spectrum_group.attrs["experiment_number"] = spectrum.get( + "experiment_number", i + 1 + ) + spectrum_group.create_dataset("intensity", data=spectrum["intensity"]) + + if "integrated_data" in data: + integrated_group = f.create_group("integrated_data") + for key, values in data["integrated_data"].items(): + integrated_group.create_dataset(key, data=values) + + if "echem" in data and data["echem"]: + echem_group = f.create_group("echem") + for key, values in data["echem"].items(): + echem_group.create_dataset(key, data=values) + + except Exception as e: + raise RuntimeError(f"Error saving to cache: {str(e)}") + + +def load_data_from_cache(file_id: str) -> Optional[Dict]: + """Load processed data from HDF5 cache file.""" + cache_path = get_cache_path(file_id) + if not os.path.exists(cache_path): + return None + + try: + with h5py.File(cache_path, "r") as f: + data = {} + + if "metadata" in f.attrs: + data["metadata"] = json.loads(f.attrs["metadata"]) + + if "nmr_spectra" in f: + nmr_group = f["nmr_spectra"] + data["nmr_spectra"] = {"ppm": nmr_group["ppm"][:].tolist(), "spectra": []} + + for i in range(len(nmr_group["spectra"])): + spectrum_group = nmr_group["spectra"][str(i)] + data["nmr_spectra"]["spectra"].append( + { + "time": float(spectrum_group.attrs["time"]), + "experiment_number": int(spectrum_group.attrs["experiment_number"]), + "intensity": spectrum_group["intensity"][:].tolist(), + } + ) + + if "integrated_data" in f: + integrated_group = f["integrated_data"] + data["integrated_data"] = {} + for key in integrated_group.keys(): + data["integrated_data"][key] = integrated_group[key][:].tolist() + + if "echem" in f: + echem_group = f["echem"] + data["echem"] = {} + for key in echem_group.keys(): + data["echem"][key] = echem_group[key][:].tolist() + + return data + except Exception as e: + raise RuntimeError(f"Error loading from cache: {str(e)}") + + +def save_plot_to_cache(cache_key: str, plot_data: Dict) -> None: + """Save plot data to a separate JSON cache file.""" + plot_cache_path = f"{get_cache_path(cache_key)}.json" + + try: + os.makedirs(os.path.dirname(plot_cache_path), exist_ok=True) + with open(plot_cache_path, "w") as f: + json.dump(plot_data, f) + except Exception as e: + raise RuntimeError(f"Error saving plot to cache: {str(e)}") + + +def load_plot_from_cache(cache_key: str) -> Optional[Dict]: + """Load plot data from the JSON cache file.""" + plot_cache_path = f"{get_cache_path(cache_key)}.json" + + if not os.path.exists(plot_cache_path): + return None + + try: + with open(plot_cache_path) as f: + return json.load(f) + except Exception as e: + raise RuntimeError(f"Error loading plot from cache: {str(e)}") diff --git a/uv.lock b/uv.lock index 95b93da..79c9802 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.10" resolution-markers = [ "python_full_version < '3.11'", @@ -329,8 +328,10 @@ wheels = [ [[package]] name = "datalab-app-plugin-insitu" +version = "0.1.4.post7+gd25a0ad" source = { editable = "." } dependencies = [ + { name = "h5py" }, { name = "lmfit" }, { name = "matplotlib" }, { name = "navani" }, @@ -361,6 +362,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "datalab-api", marker = "extra == 'local'", specifier = ">=0.2.8" }, + { name = "h5py" }, { name = "lmfit", specifier = ">=1.3.2" }, { name = "matplotlib", specifier = ">=3.9.2" }, { name = "navani", git = "https://github.com/the-grey-group/navani.git" }, @@ -370,7 +372,6 @@ requires-dist = [ { name = "pyreadr", specifier = ">=0.5.2" }, { name = "scipy", specifier = ">=1.14.1" }, ] -provides-extras = ["local"] [package.metadata.requires-dev] dev = [ @@ -571,6 +572,37 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, ] +[[package]] +name = "h5py" +version = "3.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/03/2e/a22d6a8bfa6f8be33e7febd985680fba531562795f0a9077ed1eb047bfb0/h5py-3.13.0.tar.gz", hash = "sha256:1870e46518720023da85d0895a1960ff2ce398c5671eac3b1a41ec696b7105c3", size = 414876 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/8a/bc76588ff1a254e939ce48f30655a8f79fac614ca8bd1eda1a79fa276671/h5py-3.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5540daee2b236d9569c950b417f13fd112d51d78b4c43012de05774908dff3f5", size = 3413286 }, + { url = "https://files.pythonhosted.org/packages/19/bd/9f249ecc6c517b2796330b0aab7d2351a108fdbd00d4bb847c0877b5533e/h5py-3.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:10894c55d46df502d82a7a4ed38f9c3fdbcb93efb42e25d275193e093071fade", size = 2915673 }, + { url = "https://files.pythonhosted.org/packages/72/71/0dd079208d7d3c3988cebc0776c2de58b4d51d8eeb6eab871330133dfee6/h5py-3.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb267ce4b83f9c42560e9ff4d30f60f7ae492eacf9c7ede849edf8c1b860e16b", size = 4283822 }, + { url = "https://files.pythonhosted.org/packages/d8/fa/0b6a59a1043c53d5d287effa02303bd248905ee82b25143c7caad8b340ad/h5py-3.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2cf6a231a07c14acd504a945a6e9ec115e0007f675bde5e0de30a4dc8d86a31", size = 4548100 }, + { url = "https://files.pythonhosted.org/packages/12/42/ad555a7ff7836c943fe97009405566dc77bcd2a17816227c10bd067a3ee1/h5py-3.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:851ae3a8563d87a5a0dc49c2e2529c75b8842582ccaefbf84297d2cfceeacd61", size = 2950547 }, + { url = "https://files.pythonhosted.org/packages/86/2b/50b15fdefb577d073b49699e6ea6a0a77a3a1016c2b67e2149fc50124a10/h5py-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8a8e38ef4ceb969f832cc230c0cf808c613cc47e31e768fd7b1106c55afa1cb8", size = 3422922 }, + { url = "https://files.pythonhosted.org/packages/94/59/36d87a559cab9c59b59088d52e86008d27a9602ce3afc9d3b51823014bf3/h5py-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f35640e81b03c02a88b8bf99fb6a9d3023cc52f7c627694db2f379e0028f2868", size = 2921619 }, + { url = "https://files.pythonhosted.org/packages/37/ef/6f80b19682c0b0835bbee7b253bec9c16af9004f2fd6427b1dd858100273/h5py-3.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:337af114616f3656da0c83b68fcf53ecd9ce9989a700b0883a6e7c483c3235d4", size = 4259366 }, + { url = "https://files.pythonhosted.org/packages/03/71/c99f662d4832c8835453cf3476f95daa28372023bda4aa1fca9e97c24f09/h5py-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:782ff0ac39f455f21fd1c8ebc007328f65f43d56718a89327eec76677ebf238a", size = 4509058 }, + { url = "https://files.pythonhosted.org/packages/56/89/e3ff23e07131ff73a72a349be9639e4de84e163af89c1c218b939459a98a/h5py-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:22ffe2a25770a2d67213a1b94f58006c14dce06933a42d2aaa0318c5868d1508", size = 2966428 }, + { url = "https://files.pythonhosted.org/packages/d8/20/438f6366ba4ded80eadb38f8927f5e2cd6d2e087179552f20ae3dbcd5d5b/h5py-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:477c58307b6b9a2509c59c57811afb9f598aedede24a67da808262dfa0ee37b4", size = 3384442 }, + { url = "https://files.pythonhosted.org/packages/10/13/cc1cb7231399617d9951233eb12fddd396ff5d4f7f057ee5d2b1ca0ee7e7/h5py-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:57c4c74f627c616f02b7aec608a8c706fe08cb5b0ba7c08555a4eb1dde20805a", size = 2917567 }, + { url = "https://files.pythonhosted.org/packages/9e/d9/aed99e1c858dc698489f916eeb7c07513bc864885d28ab3689d572ba0ea0/h5py-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:357e6dc20b101a805ccfd0024731fbaf6e8718c18c09baf3b5e4e9d198d13fca", size = 4669544 }, + { url = "https://files.pythonhosted.org/packages/a7/da/3c137006ff5f0433f0fb076b1ebe4a7bf7b5ee1e8811b5486af98b500dd5/h5py-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6f13f9b5ce549448c01e4dfe08ea8d1772e6078799af2c1c8d09e941230a90d", size = 4932139 }, + { url = "https://files.pythonhosted.org/packages/25/61/d897952629cae131c19d4c41b2521e7dd6382f2d7177c87615c2e6dced1a/h5py-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:21daf38171753899b5905f3d82c99b0b1ec2cbbe282a037cad431feb620e62ec", size = 2954179 }, + { url = "https://files.pythonhosted.org/packages/60/43/f276f27921919a9144074320ce4ca40882fc67b3cfee81c3f5c7df083e97/h5py-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e520ec76de00943dd017c8ea3f354fa1d2f542eac994811943a8faedf2a7d5cb", size = 3358040 }, + { url = "https://files.pythonhosted.org/packages/1b/86/ad4a4cf781b08d4572be8bbdd8f108bb97b266a14835c640dc43dafc0729/h5py-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e79d8368cd9295045956bfb436656bea3f915beaa11d342e9f79f129f5178763", size = 2892766 }, + { url = "https://files.pythonhosted.org/packages/69/84/4c6367d6b58deaf0fa84999ec819e7578eee96cea6cbd613640d0625ed5e/h5py-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56dd172d862e850823c4af02dc4ddbc308f042b85472ffdaca67f1598dff4a57", size = 4664255 }, + { url = "https://files.pythonhosted.org/packages/fd/41/bc2df86b72965775f6d621e0ee269a5f3ac23e8f870abf519de9c7d93b4d/h5py-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be949b46b7388074c5acae017fbbe3e5ba303fd9daaa52157fdfef30bbdacadd", size = 4927580 }, + { url = "https://files.pythonhosted.org/packages/97/34/165b87ea55184770a0c1fcdb7e017199974ad2e271451fd045cfe35f3add/h5py-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:4f97ecde7ac6513b21cd95efdfc38dc6d19f96f6ca6f2a30550e94e551458e0a", size = 2940890 }, +] + [[package]] name = "httpcore" version = "1.0.7"