# ollama部署
curl -fsSL https://ollama.com/install.sh | sh
ollama run mistral-small
ollama run mistral-small:24b-instruct-2501-q4_K_M
ollama run mistral-small:24b-instruct-2501-q8_0
ollama run mistral-small:24b-instruct-2501-fp16
# vLLM部署
conda create -n mistral_env python=3.10
conda activate mistral_env
pip install --upgrade torchvision vllm xformers
pip install --upgrade vllm
pip install --upgrade mistral_common
huggingface-cli login
vllm serve mistralai/Mistral-Small-24B-Instruct-2501 --tokenizer_mode mistral --config_format mistral --load_format mistral --enable-auto-tool-choice --tool-call-parser mistral
!pip install mistralai gradio requests
import os
from mistralai import Mistral
from google.colab import userdata
api_key = userdata.get('MISTRAL_API_KEY')
model = "mistral-small-latest"
client = Mistral(api_key=api_key)
chat_response = client.chat.complete(
model = model,
messages = [
{
"role": "user",
"content": "写一篇200字的科幻小说?",
},
]
)
print(chat_response.choices[0].message.content)
import os
import json
import requests
import gradio as gr
from google.colab import userdata
# 配置API
API_KEY = userdata.get('FIREWORKS_API_KEY')
API_URL = "https://api.fireworks.ai/inference/v1/chat/completions"
def generate_response(message, history):
"""
使用Fireworks AI API生成响应
"""
try:
# 准备请求数据
payload = {
"model": "accounts/fireworks/models/mistral-small-24b-instruct-2501",
"max_tokens": 4096,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "user",
"content": message
}
]
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}"
}
# 发送请求
response = requests.post(
API_URL,
headers=headers,
data=json.dumps(payload)
)
# 检查响应状态
response.raise_for_status()
# 解析响应
result = response.json()
return result['choices'][0]['message']['content']
except Exception as e:
return f"发生错误: {str(e)}"
# 自定义CSS样式
custom_css = """
#component-0 {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
display: flex;
flex-direction: column;
}
#chatbot {
flex-grow: 1;
overflow-y: auto;
margin: 20px;
padding: 20px;
}
#input-container {
position: fixed;
bottom: 0;
left: 0;
right: 0;
padding: 20px;
background-color: white;
box-shadow: 0 -2px 5px rgba(0,0,0,0.1);
}
.message {
padding: 15px;
margin: 10px 0;
border-radius: 10px;
}
.user-message {
background-color: #f0f0f0;
}
.bot-message {
background-color: #e6f3ff;
}
"""
# 创建Gradio界面
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("# Fireworks AI 聊天界面")
chatbot = gr.Chatbot(
elem_id="chatbot",
bubble_full_width=False,
show_label=False,
height=600
)
with gr.Row(elem_id="input-container"):
with gr.Column(scale=20):
input_text = gr.Textbox(
placeholder="输入消息...",
label="输入",
lines=1,
max_lines=5,
show_label=False
)
with gr.Column(scale=1, min_width=50):
submit_btn = gr.Button("发送", variant="primary")
# 清空对话按钮
clear_btn = gr.Button("清空对话", variant="secondary")
# 事件处理
def user_input(user_message, history):
return "", history + [[user_message, None]]
def bot_response(history):
user_message = history[-1][0]
bot_message = generate_response(user_message, history)
history[-1][1] = bot_message
return history
def clear_history():
return []
input_text.submit(
user_input,
[input_text, chatbot],
[input_text, chatbot],
queue=False
).then(
bot_response,
chatbot,
chatbot
)
submit_btn.click(
user_input,
[input_text, chatbot],
[input_text, chatbot],
queue=False
).then(
bot_response,
chatbot,
chatbot
)
clear_btn.click(clear_history, None, chatbot)
# 启动界面
demo.launch(
share=True,
height=800,
)