Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ CodeFuse-ChatBot是由蚂蚁CodeFuse团队开发的开源AI智能助手,致力
| model_name | model_size | gpu_memory | quantize | HFhub | ModelScope |
| ------------------ | ---------- | ---------- | -------- | ----- | ---------- |
| chatgpt | - | - | - | - | - |
| [MiniMax-M2.7](https://platform.minimax.io) | - | - | - | - | - |
| codellama-34b-int4 | 34b | 20g | int4 | coming soon| [link](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B-4bits/summary) |


Expand Down
1 change: 1 addition & 0 deletions README_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ If you need to integrate a specific model, please inform us of your requirements
| model_name | model_size | gpu_memory | quantize | HFhub | ModelScope |
| ------------------ | ---------- | ---------- | -------- | ----- | ---------- |
| chatgpt | - | - | - | - | - |
| [MiniMax-M2.7](https://platform.minimax.io) | - | - | - | - | - |
| codellama-34b-int4 | 34b | 20g | int4 | coming soon| [link](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B-4bits/summary) |


Expand Down
9 changes: 9 additions & 0 deletions configs/model_config.py.example
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ ONLINE_LLM_MODEL = ONLINE_LLM_MODEL or {
"api_key": "",
"provider": "ExampleWorker",
},

# MiniMax OpenAI-compatible API
# Docs: https://platform.minimax.io/docs/api-reference/text-openai-api
"minimax-api": {
"version": "MiniMax-M2.7", # or "MiniMax-M2.7-highspeed"
"api_base_url": "https://api.minimax.io/v1",
"api_key": os.environ.get("MINIMAX_API_KEY", ""),
"provider": "MiniMaxWorker",
},
}

# 建议使用chat模型,不要使用base,无法获取正确输出
Expand Down
3 changes: 2 additions & 1 deletion configs/server_config.py.example
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ FSCHAT_MODEL_WORKERS = FSCHAT_MODEL_WORKERS or {
'Qwen-72B-Chat-Int4': {'host': DEFAULT_BIND_HOST, 'port': 20020},
'gpt-3.5-turbo': {'host': DEFAULT_BIND_HOST, 'port': 20021},
'example': {'host': DEFAULT_BIND_HOST, 'port': 20022},
'openai-api': {'host': DEFAULT_BIND_HOST, 'port': 20023}
'openai-api': {'host': DEFAULT_BIND_HOST, 'port': 20023},
'minimax-api': {'host': DEFAULT_BIND_HOST, 'port': 20024},
}
# fastchat multi model worker server
FSCHAT_MULTI_MODEL_WORKERS = {
Expand Down
190 changes: 83 additions & 107 deletions examples/model_workers/minimax.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,158 +4,134 @@
import sys
import os
import json
# from server.utils import get_httpx_client
from typing import List, Dict
from loguru import logger
# from configs import logger, log_verbose

log_verbose = os.environ.get("log_verbose", False)

# MiniMax supported models
MINIMAX_MODELS = ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]

# Default base URL for MiniMax OpenAI-compatible API
MINIMAX_DEFAULT_BASE_URL = "https://api.minimax.io/v1"


def _clamp_temperature(temperature: float) -> float:
"""Clamp temperature to MiniMax's valid range (0.0, 1.0].

MiniMax does not accept temperature=0. Values at or below 0 are
clamped to a small positive epsilon; values above 1.0 are clamped to 1.0.
"""
if temperature is None:
return 1.0
if temperature <= 0:
return 0.01
if temperature > 1.0:
return 1.0
return temperature


class MiniMaxWorker(ApiModelWorker):
DEFAULT_EMBED_MODEL = "embo-01"
"""MiniMax model worker using the OpenAI-compatible Chat Completions API.

Supports MiniMax-M2.7 and MiniMax-M2.7-highspeed models via the
standard /v1/chat/completions endpoint at api.minimax.io.

Configuration:
api_key: MiniMax API key (or set MINIMAX_API_KEY env var)
api_base_url: API base URL (default: https://api.minimax.io/v1)
version: Model name (default: MiniMax-M2.7)
"""

def __init__(
self,
*,
model_names: List[str] = ["minimax-api"],
controller_addr: str = None,
worker_addr: str = None,
version: str = "abab5.5-chat",
version: str = "MiniMax-M2.7",
**kwargs,
):
kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
kwargs.setdefault("context_len", 16384)
kwargs.update(
model_names=model_names,
controller_addr=controller_addr,
worker_addr=worker_addr,
)
kwargs.setdefault("context_len", 204800)
super().__init__(**kwargs)
self.version = version

def validate_messages(self, messages: List[Dict]) -> List[Dict]:
role_maps = {
"user": self.user_role,
"assistant": self.ai_role,
"system": "system",
}
messages = [{"sender_type": role_maps[x["role"]], "text": x["content"]} for x in messages]
return messages

def do_chat(self, params: ApiChatParams) -> Dict:
# 按照官网推荐,直接调用abab 5.5模型
# TODO: 支持指定回复要求,支持指定用户名称、AI名称
params.load_config(self.model_names[0])

url = 'https://api.minimax.chat/v1/text/chatcompletion{pro}?GroupId={group_id}'
pro = "_pro" if params.is_pro else ""
api_key = params.api_key or os.environ.get("MINIMAX_API_KEY", "")
base_url = (params.api_base_url or MINIMAX_DEFAULT_BASE_URL).rstrip("/")
url = f"{base_url}/chat/completions"

headers = {
"Authorization": f"Bearer {params.api_key}",
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
messages = self.validate_messages(params.messages)

temperature = _clamp_temperature(params.temperature)

data = {
"model": params.version,
"model": params.version or self.version,
"messages": params.messages,
"stream": True,
"mask_sensitive_info": True,
"messages": messages,
"temperature": params.temperature,
"top_p": params.top_p,
"tokens_to_generate": params.max_tokens or 1024,
# TODO: 以下参数为minimax特有,传入空值会出错。
# "prompt": params.system_message or self.conv.system_message,
# "bot_setting": [],
# "role_meta": params.role_meta,
"temperature": temperature,
"max_tokens": params.max_tokens or 1024,
}

if log_verbose:
logger.info(f'{self.__class__.__name__}:data: {data}')
logger.info(f'{self.__class__.__name__}:url: {url.format(pro=pro, group_id=params.group_id)}')
logger.info(f'{self.__class__.__name__}:headers: {headers}')
logger.info(f"{self.__class__.__name__}:url: {url}")
logger.info(f"{self.__class__.__name__}:data: {data}")

with get_httpx_client() as client:
response = client.stream("POST",
url.format(pro=pro, group_id=params.group_id),
headers=headers,
json=data)
response = client.stream("POST", url, headers=headers, json=data)
with response as r:
text = ""
for e in r.iter_text():
if not e.startswith("data: "): # 真是优秀的返回
for line in r.iter_lines():
if not line or not line.startswith("data: "):
continue
payload = line[6:].strip()
if payload == "[DONE]":
break
try:
chunk = json.loads(payload)
except json.JSONDecodeError:
continue

if error := chunk.get("error"):
data = {
"error_code": 500,
"text": f"minimax返回错误的结果:{e}",
"error": {
"message": f"minimax返回错误的结果:{e}",
"type": "invalid_request_error",
"param": None,
"code": None,
}
"error_code": 500,
"text": error.get("message", str(error)),
"error": error,
}
self.logger.error(f"请求 MiniMax API 时发生错误:{data}")
self.logger.error(
f"MiniMax API error: {data}"
)
yield data
continue

data = json.loads(e[6:])
if data.get("usage"):
break
return

if choices := data.get("choices"):
if chunk := choices[0].get("delta", ""):
text += chunk
if choices := chunk.get("choices"):
delta = choices[0].get("delta", {})
if content := delta.get("content", ""):
text += content
yield {"error_code": 0, "text": text}

def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict:
params.load_config(self.model_names[0])
url = f"https://api.minimax.chat/v1/embeddings?GroupId={params.group_id}"

headers = {
"Authorization": f"Bearer {params.api_key}",
"Content-Type": "application/json",
}

data = {
"model": params.embed_model or self.DEFAULT_EMBED_MODEL,
"texts": [],
"type": "query" if params.to_query else "db",
}
if log_verbose:
logger.info(f'{self.__class__.__name__}:data: {data}')
logger.info(f'{self.__class__.__name__}:url: {url}')
logger.info(f'{self.__class__.__name__}:headers: {headers}')

with get_httpx_client() as client:
result = []
i = 0
batch_size = 10
while i < len(params.texts):
texts = params.texts[i:i+batch_size]
data["texts"] = texts
r = client.post(url, headers=headers, json=data).json()
if embeddings := r.get("vectors"):
result += embeddings
elif error := r.get("base_resp"):
data = {
"code": error["status_code"],
"msg": error["status_msg"],
"error": {
"message": error["status_msg"],
"type": "invalid_request_error",
"param": None,
"code": None,
}
}
self.logger.error(f"请求 MiniMax API 时发生错误:{data}")
return data
i += batch_size
return {"code": 200, "data": embeddings}

def get_embeddings(self, params):
# TODO: 支持embeddings
print("embedding")
print(params)

def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation:
# TODO: 确认模板是否需要修改
def make_conv_template(
self, conv_template: str = None, model_path: str = None
) -> Conversation:
return conv.Conversation(
name=self.model_names[0],
system_message="你是MiniMax自主研发的大型语言模型,回答问题简洁有条理。",
system_message="You are MiniMax, a helpful AI assistant.",
messages=[],
roles=["USER", "BOT"],
roles=["user", "assistant"],
sep="\n### ",
stop_str="###",
)
Expand Down
Loading