Skip to content

Commit dee9567

Browse files
committed
v0.0.4 - new params
1 parent b57b7d7 commit dee9567

File tree

4 files changed

+105
-61
lines changed

4 files changed

+105
-61
lines changed

pandas_llm/example-chatbot.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def main():
5555
print("No OpenAI API key provided. Exiting.")
5656
return
5757

58-
conv_df = PandasLLM(data=df, openai_api_key = openai_key)
58+
conv_df = PandasLLM(data=df, llm_api_key = openai_key)
5959
print()
6060
banner = """
6161
Welcome to the Donation Data CLI.

pandas_llm/example.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
('Olivia Jackson', 29, 55)]
1818
df = pd.DataFrame(data, columns=['name', 'age', 'donation'])
1919

20-
conv_df = PandasLLM(data=df, openai_api_key = os.environ.get("OPENAI_API_KEY"))
20+
conv_df = PandasLLM(data=df, llm_api_key = os.environ.get("OPENAI_API_KEY"))
2121
result = conv_df.prompt("What is the average donation of people older than 30 who donated more than $50?")
2222

2323
print(f"Result ({type(result)}):\n {result}")

pandas_llm/pandas_llm.py

+101-57
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
import pandas as pd
22
import datetime
33
import numpy as np
4-
import time
54
import openai
65
import os
76
from sandbox import Sandbox
87
import re
98
import json
109

1110
class PandasLLM(pd.DataFrame):
11+
"""
12+
PandasLLM is a subclass of the Pandas DataFrame class. It is designed to provide a
13+
wrapper around the OpenAI API.
14+
"""
1215

1316
code_blocks = [r'```python(.*?)```',r'```(.*?)```']
1417

@@ -17,31 +20,63 @@ class PandasLLM(pd.DataFrame):
1720
llm_engine = "openai"
1821
llm_default_params = { "model": llm_default_model,
1922
"temperature": llm_default_temperature}
20-
openai_api_key = None
23+
llm_api_key = None
2124

2225
prompt_override = False
2326
custom_prompt = ""
2427
data_privacy = True
2528
path = None
2629
verbose = False
2730
code_block = ""
31+
force_sandbox = False
2832
def __init__(self,
29-
data=None,
30-
llm_engine = "openai", llm_params=llm_default_params,
31-
prompt_override = False,
32-
custom_prompt = "",
33-
path = None,
34-
verbose = False,
35-
data_privacy = True,
36-
openai_api_key = None,
33+
data,
34+
llm_engine:str = "openai", llm_params=llm_default_params,
35+
prompt_override:bool = False,
36+
custom_prompt:str = "",
37+
path:str = None,
38+
verbose:bool = False,
39+
data_privacy:bool = True,
40+
llm_api_key:str = None,
41+
force_sandbox:bool = False,
3742
*args, **kwargs):
43+
"""
44+
This is the constructor for the PandasLLM class. It takes in the following arguments:
45+
data: The data to be used. It can be a Pandas DataFrame, a list of lists, a list of tuples,
46+
a list of dictionaries, a dictionary, a string, or a list.
47+
llm_engine: The name of the OpenAI engine to use.
48+
llm_params: A dictionary of parameters to be used with the OpenAI API.
49+
prompt_override: A boolean that determines whether or not the prompt is overridden.
50+
custom_prompt: A string that overrides the prompt.
51+
path: The path to the file to be used.
52+
verbose: A boolean that determines whether or not the output is verbose.
53+
data_privacy: A boolean that determines whether or not the data is private.
54+
llm_api_key: The OpenAI API key to be used.
55+
force_sandbox: if False and the sandbox fails, it will retry using eval (less safe)
56+
57+
The constructor also calls the parent class's constructor.
58+
3859
60+
Args:
61+
data (pandas dataframe, mandatory): dataset to query. Defaults to None.
62+
llm_engine (str, optional): LLM engine, currently only OpenAI is supported. Defaults to "openai".
63+
llm_params (dict, optional): LLM engine parameters. Defaults to model=gpt-3.5-turbo and temperature=0.2".
64+
prompt_override (bool, optional): if True, the custom prompt is mandatory and it will became the main prompt. Defaults to False.
65+
custom_prompt (str, optional): if prompt_override is False, the custom prompt will be added to the default pandas_llm prompt. Defaults to "".
66+
path (str, optional): the path where the files containing debug data will be save. Defaults to None.
67+
verbose (bool, optional): if True debugging info will be printed. Defaults to False.
68+
data_privacy (bool, optional): if True, the function will not send the data content to OpenAI. Defaults to True.
69+
llm_api_key (str, optional): the Open API key. Defaults to None.
70+
force_sandbox (bool, optional): if False and the sandbox fails, it will retry using eval (less safe). Defaults to False.
71+
"""
72+
73+
3974
super().__init__(data, *args, **kwargs)
4075

4176
self.llm_params = llm_params or {}
4277

4378
# Set up OpenAI API key from the environment or the config
44-
self.openai_api_key = openai_api_key or os.environ.get("OPENAI_API_KEY")
79+
self.llm_api_key = llm_api_key or os.environ.get("OPENAI_API_KEY")
4580

4681
self.llm_engine = llm_engine
4782
self.llm_params = llm_params or {}
@@ -54,8 +89,9 @@ def __init__(self,
5489
self.data_privacy = data_privacy
5590
self.path = path
5691
self.verbose = verbose
92+
self.force_sandbox = force_sandbox
5793

58-
def buildPromptForRole(self):
94+
def _buildPromptForRole(self):
5995
prompt_role = f"""
6096
I want you to act as a data scientist and Python coder. I want you code for me.
6197
I have a dataset of {len(self)} rows and {len(self.columns)} columns.
@@ -68,7 +104,7 @@ def buildPromptForRole(self):
68104

69105
return prompt_role
70106

71-
def buildPromptForProblemSolving(self, request):
107+
def _buildPromptForProblemSolving(self, request):
72108

73109
if self.prompt_override:
74110
return self.custom_prompt
@@ -105,7 +141,7 @@ def buildPromptForProblemSolving(self, request):
105141

106142
return prompt_problem
107143

108-
def extractPythonCode(self, text: str, regexp: str) -> str:
144+
def _extractPythonCode(self, text: str, regexp: str) -> str:
109145
# Define the regular expression pattern for the Python code block
110146
pattern = regexp
111147

@@ -119,44 +155,44 @@ def extractPythonCode(self, text: str, regexp: str) -> str:
119155
# If no match is found, return an empty string
120156
return ""
121157

122-
def print(self, *args, **kwargs):
158+
def _print(self, *args, **kwargs):
123159
if self.verbose:
124160
print(*args, **kwargs)
125161

126-
def variable_to_string(self, variable):
127-
if variable is None: return None
128-
try:
129-
130-
if isinstance(variable, pd.Series):
131-
# convert to dataframe
132-
variable = variable.to_frame()
133-
134-
if isinstance(variable, pd.DataFrame):
135-
variable = variable.drop_duplicates()
136-
if len(variable) == 0: return None
137-
return str(variable)
138-
139-
elif isinstance(variable, np.ndarray):
140-
if len(variable) == 0: return None
141-
return np.array2string(variable)
142-
else:
143-
# Convert the variable to a string
144-
return str(variable)
145-
except Exception as e:
146-
return str(variable)
162+
# def _variable_to_string(self, variable):
163+
# if variable is None: return None
164+
# try:
165+
166+
# if isinstance(variable, pd.Series):
167+
# # convert to dataframe
168+
# variable = variable.to_frame()
169+
170+
# if isinstance(variable, pd.DataFrame):
171+
# variable = variable.drop_duplicates()
172+
# if len(variable) == 0: return None
173+
# return str(variable)
174+
175+
# elif isinstance(variable, np.ndarray):
176+
# if len(variable) == 0: return None
177+
# return np.array2string(variable)
178+
# else:
179+
# # Convert the variable to a string
180+
# return str(variable)
181+
# except Exception as e:
182+
# return str(variable)
147183

148184

149-
def save(self,name,value):
185+
def _save(self,name,value):
150186
if self.path is None or self.path == "":
151187
return
152188
try:
153189
with open(f"{self.path}/{name}", 'w') as file:
154190
file.write(value)
155191
except Exception as e:
156-
self.print(f"error {e}")
192+
self._print(f"error {e}")
157193
return
158194

159-
def execInSandbox(self, df, generated_code:str):
195+
def _execInSandbox(self, df, generated_code:str):
160196

161197
# Create a Sandbox instance and allow pandas to be imported
162198
sandbox = Sandbox()
@@ -175,25 +211,32 @@ def execInSandbox(self, df, generated_code:str):
175211
# Combine the initial code and the generated code
176212
full_code = initial_code + "\n" + generated_code
177213

178-
self.save("temp/prompt_code.py",full_code)
214+
self._save("temp/prompt_code.py",full_code)
179215
# Execute the combined code in the Sandbox
180216
sandbox_result = sandbox.execute(full_code, {"df":df})
181217

182218
# Get the result from the local_vars dictionary
183219
result = sandbox_result.get("result")
184220
return result
185221

222+
def prompt(self, request: str):
223+
"""
186224
225+
Args:
226+
request (str): prompt containing the request. it must be expressed as a question or a problem to solve
187227
188-
def prompt(self, request: str):
228+
Returns:
229+
Any: contains the result or solution of the problem. Tipically the result data type is a dataframe, a Series or a float
230+
"""
231+
189232
# Set up OpenAI API key
190-
openai.api_key = self.openai_api_key
233+
openai.api_key = self.llm_api_key
191234

192235
messages=[
193236
{"role": "system",
194-
"content": self.buildPromptForRole()},
237+
"content": self._buildPromptForRole()},
195238
{"role": "user",
196-
"content": self.buildPromptForProblemSolving(request)
239+
"content": self._buildPromptForProblemSolving(request)
197240
}
198241
]
199242

@@ -207,13 +250,13 @@ def prompt(self, request: str):
207250
)
208251
break;
209252
except Exception as e:
210-
self.print(f"error {e}")
253+
self._print(f"error {e}")
211254
continue
212255

213256
if response is None:
214257
return "Please try later"
215258

216-
self.save("temp/prompt_cmd.json",json.dumps(messages, indent=4))
259+
self._save("temp/prompt_cmd.json",json.dumps(messages, indent=4))
217260

218261
generated_code = response.choices[0].message.content
219262
if generated_code == "" or generated_code is None:
@@ -224,7 +267,7 @@ def prompt(self, request: str):
224267

225268
results=[]
226269
for regexp in self.code_blocks:
227-
cleaned_code = self.extractPythonCode(generated_code,regexp)
270+
cleaned_code = self._extractPythonCode(generated_code,regexp)
228271
if cleaned_code == "" or cleaned_code is None:
229272
continue
230273
results.append(cleaned_code)
@@ -233,19 +276,20 @@ def prompt(self, request: str):
233276
if len(results) == 0:
234277
return None
235278

279+
result = None
236280
for cleaned_code in results:
237-
238-
result = None
281+
239282
try:
240283
result = self.execInSandbox(self, cleaned_code)
241284
except Exception as e:
242-
self.print(f"error {e}")
243-
try:
244-
expression = re.sub(r"^\s*result\s*=", "", cleaned_code).strip()
245-
result = eval(expression, {'df': self, 'pd': pd, 'np': np, 'datetime': datetime, 'result': result})
246-
except Exception as e:
247-
self.print(f"error {e}")
248-
pass
285+
self._print(f"error {e}")
286+
if not self.force_sandbox:
287+
try:
288+
expression = re.sub(r"^\s*result\s*=", "", cleaned_code).strip()
289+
result = eval(expression, {'df': self, 'pd': pd, 'np': np, 'datetime': datetime, 'result': result})
290+
except Exception as e:
291+
self._print(f"error {e}")
292+
pass
249293

250294
if result is not None and str(result) != "":
251295
break
@@ -257,6 +301,6 @@ def prompt(self, request: str):
257301
# currently the privacy option is not needed.
258302
# in the future, we can choose to send data to LLM if privacy is set to false
259303

260-
return None
304+
return result
261305

262306

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
setup(
88
name='pandas_llm', # should match the package folder
9-
version='0.0.3', # important for updates
9+
version='0.0.4', # important for updates
1010
license='MIT', # should match your chosen license
1111
description='Conversational Pandas Dataframes',
1212
long_description=long_description, # loads your README.md
@@ -51,5 +51,5 @@
5151
"urllib3",
5252
"yarl",
5353
],
54-
download_url="https://github.com/DashyDashOrg/pandas-llm/releases/tag/v0.0.3",
54+
download_url="https://github.com/DashyDashOrg/pandas-llm/releases/tag/v0.0.4",
5555
)

0 commit comments

Comments
 (0)