Skip to content

Commit ea7d754

Browse files
committed
update app, add model param
1 parent 242613c commit ea7d754

File tree

3 files changed

+23
-9
lines changed

3 files changed

+23
-9
lines changed

Dockerfile

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,22 @@ FROM python:3.10
33
ADD . /workspace/code-repo
44
WORKDIR /workspace/code-repo
55

6-
RUN pip install fastapi uvicorn
6+
RUN pip3 install fastapi uvicorn
77
RUN pip3 install requests
8+
RUN pip3 install jinja2
89

910
ENV PYTHONPATH /workspace/code-repo
1011

1112
RUN apt-get update && apt-get install -y curl
1213

13-
RUN curl -fsSL https://ollama.com/install.sh | sh
14-
15-
RUN ollama serve
16-
RUN ollama run llama3.2
14+
#RUN curl -fsSL https://ollama.com/install.sh | sh
15+
#RUN ollama serve
16+
#RUN ollama run llama3.2
1717

1818
ENV FLASK_RUN_HOST=0.0.0.0
1919

2020
EXPOSE 8000
2121

2222
CMD sh -c "ollama serve & ollama run llama3.2"
23-
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000", "--load-balancer", "sunrpc"]
23+
#CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000", "--load-balancer", "sunrpc"]
24+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

app.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,25 @@ async def generate(request: Request):
7373
request_dict = await request.json()
7474
model = request_dict.get("model", "")
7575
messages = request_dict.get("messages", dict())
76-
stream = request_dict.get("stream", False)
7776
prompt = messages[-1].get("content", "")
77+
stream = request_dict.get("stream", False)
78+
79+
infer_param = request_dict.get("infer_param", dict())
80+
max_tokens = infer_param.get('max_tokens', 4096)
81+
temperature = infer_param.get('temperature', 0.9)
82+
top_p = infer_param.get('top_p', 0.9)
83+
n = infer_param.get('n', 1)
84+
stop = infer_param.get('stop', [])
7885

7986
data = {
8087
"model": model,
8188
"prompt": prompt,
82-
"stream": stream
89+
"stream": stream,
90+
"max_tokens": max_tokens,
91+
"temperature": temperature,
92+
"top_p": top_p,
93+
"n": n,
94+
"stop": stop,
8395
}
8496
# url_generate = "http://127.0.0.1:11434/api/generate"
8597

example/ollama_app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ def run():
88
prompt = 'hello'
99
model = 'llama3.2'
1010
messages = [{"role": "user", "content": prompt}]
11-
data = {'model': model, 'messages': messages}
11+
infer_param = {}
12+
data = {'engine': 'ollama', 'model': model, 'messages': messages, 'infer_param': infer_param}
1213
headers = {"Content-Type": "application/json"}
1314
response = requests.post(url, headers=headers, data=json.dumps(data))
1415

0 commit comments

Comments
 (0)