62 lines
2.2 KiB
Python
62 lines
2.2 KiB
Python
import ollama
|
|
|
|
class OllamaProvider:
|
|
|
|
def __init__(self, model):
|
|
self.model = model
|
|
self.sys_prompt = "You are a helpful assistant."
|
|
|
|
def generate_stream(self, messages, temperature=0.8, max_tokens=300):
|
|
"""Генерирует ответ от Ollama и возвращает его"""
|
|
try:
|
|
stream = ollama.chat(
|
|
model=self.model,
|
|
messages=messages,
|
|
stream=True,
|
|
options={
|
|
"temperature": temperature,
|
|
"num_predict": max_tokens,
|
|
},
|
|
)
|
|
|
|
# Собираем ответ в строку
|
|
response_text = ""
|
|
for chunk in stream:
|
|
content = chunk['message']['content']
|
|
print(content, end='', flush=True)
|
|
response_text += content
|
|
return response_text
|
|
|
|
except Exception as e:
|
|
error_msg = str(e)
|
|
if "502" in error_msg:
|
|
return "Ошибка: Проверьте наличие VPN или Proxy."
|
|
return f"Ошибка: {error_msg}"
|
|
|
|
def generate_stream_tokens(self, messages, temperature=0.8, max_tokens=300):
|
|
"""Генерирует ответ от Ollama и возвращает его в виде стрима"""
|
|
try:
|
|
stream = ollama.chat(
|
|
model=self.model,
|
|
messages=messages,
|
|
stream=True,
|
|
options={
|
|
"temperature": temperature,
|
|
"num_predict": max_tokens,
|
|
},
|
|
)
|
|
|
|
# Собираем ответ в строку
|
|
response_text = ""
|
|
for chunk in stream:
|
|
content = chunk['message']['content']
|
|
#print(content, end='', flush=True)
|
|
#response_text += content
|
|
#return response_text
|
|
yield content
|
|
|
|
except Exception as e:
|
|
error_msg = str(e)
|
|
if "502" in error_msg:
|
|
return "Ошибка: Проверьте наличие VPN или Proxy."
|
|
return f"Ошибка: {error_msg}" |