heaven_base.unified_chat
1import json 2import os 3from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, BaseMessage 4from langchain_core.language_models.chat_models import BaseChatModel 5from langchain_anthropic import ChatAnthropic 6from langchain_openai import ChatOpenAI 7from langchain_google_genai import ChatGoogleGenerativeAI 8from langchain_groq import ChatGroq 9from langchain_deepseek import ChatDeepSeek 10from enum import Enum 11from .utils.get_env_value import EnvConfigUtil, DynamicString 12import requests 13 14def get_uni_api_url(): 15 """Load uni-api URL from config file with automatic container IP discovery""" 16 17 def discover_uni_api_ip(): 18 """Automatically discover uni-api container IP""" 19 try: 20 import subprocess 21 result = subprocess.run( 22 ['docker', 'inspect', 'uni-api', '--format', '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'], 23 capture_output=True, text=True, check=True 24 ) 25 container_ip = result.stdout.strip() 26 if container_ip: 27 return f'http://{container_ip}:8000/v1/chat/completions' 28 except Exception as e: 29 print(f"Warning: Could not discover uni-api container IP: {e}") 30 return None 31 32 try: 33 config_path = os.path.join(os.path.dirname(__file__), 'configs', 'uni_api_config.json') 34 with open(config_path, 'r') as f: 35 config = json.load(f) 36 configured_url = config.get('uni_api_url') 37 38 # If config exists, try it first, but fall back to auto-discovery if it fails 39 if configured_url: 40 return configured_url 41 42 except Exception as e: 43 print(f"Warning: Could not load uni-api config: {e}") 44 45 # Try automatic discovery 46 discovered_url = discover_uni_api_ip() 47 if discovered_url: 48 print(f"Auto-discovered uni-api URL: {discovered_url}") 49 return discovered_url 50 51 # Final fallback 52 print("Using fallback uni-api URL") 53 return 'http://host.docker.internal:8002/v1/chat/completions' 54 55# Define the Provider Enum 56class ProviderEnum(Enum): 57 ANTHROPIC = 'anthropic' 58 OPENAI = 'openai' 59 GOOGLE = 'google' 60 GROQ = 'groq' 61 DEEPSEEK = 'deepseek' 62 63# Default model mapping 64# DEFAULT_MODELS = { 65# ProviderEnum.ANTHROPIC: "claude-3-5-sonnet-20241022", 66# ProviderEnum.OPENAI: "gpt-4o", 67# ProviderEnum.GOOGLE: "gemini-2.0-pro-exp-02-05", 68# ProviderEnum.GROQ: "deepseek-r1-distill-llama-70b-specdec", 69# ProviderEnum.DEEPSEEK: "deepseek-reasoner" 70# } 71 72class UnifiedChat: 73 PROVIDERS = { 74 ProviderEnum.ANTHROPIC: ChatAnthropic, 75 ProviderEnum.OPENAI: ChatOpenAI, 76 ProviderEnum.GOOGLE: ChatGoogleGenerativeAI, 77 ProviderEnum.GROQ: ChatGroq, 78 ProviderEnum.DEEPSEEK: ChatDeepSeek 79 } 80 81 @classmethod 82 def create( 83 cls, 84 provider: ProviderEnum = ProviderEnum.ANTHROPIC, 85 model: str = None, 86 **kwargs 87 ) -> BaseChatModel: 88 """ 89 Create a unified chat model instance from any supported provider. 90 91 Args: 92 provider (ProviderEnum): The provider Enum (anthropic, openai, google, groq). 93 model (str, optional): Specific model name. 94 **kwargs: Additional configuration for the model. 95 96 Returns: 97 BaseChatModel: Instantiated chat model. 98 """ 99 if provider not in cls.PROVIDERS: 100 raise ValueError(f"Unsupported provider: {provider}. Supported providers: {list(cls.PROVIDERS.keys())}") 101 102 ModelClass = cls.PROVIDERS[provider] 103 104 # Load provider-specific settings ENV values, for now it is only the api key 105 kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_API_KEY") 106 # If a model name is specified, add it to kwargs. 107 108 # Use default model if none is specified 109 if not model: 110 model = DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_MODEL") 111 112 # this is an Anthropic specific value and should be removed and remapped in the kwargs 113 thinking_budget = kwargs.get('thinking_budget', None) 114 # kwargs.pop("thinking_budget", None) # dont pop them, langchain needs it 115 116 # Add OpenAI-specific settings 117 if provider == ProviderEnum.OPENAI: 118 # kwargs.update({ 119 # 'model': model, 120 # 'callbacks': None, 121 # 'verbose': True 122 # }) 123 kwargs.pop("thinking_budget", None) 124 kwargs.update({ 125 'model': model, 126 'callbacks': None, 127 'verbose': True, 128 'use_responses_api': True, 129 'model_kwargs': { 130 "reasoning": { 131 "effort": None, 132 "summary": "auto", 133 } 134 } 135 }) 136 # remove temperature from o series model params 137 if "o3" in model or "o1" in model or "o4" in model: 138 kwargs.pop("temperature", None) 139 reasoning_effort = None 140 # enable thinking if thinking should be enabled (budget is not none) 141 if thinking_budget is not None: 142 # TODO we might need to make a dedicated chatgpt ui for setting reasoning-effort 143 if thinking_budget > 1024 * 6: 144 reasoning_effort = "high" 145 elif thinking_budget > 1024 * 3: 146 reasoning_effort = "medium" 147 else: 148 reasoning_effort = "low" 149 150 if reasoning_effort: 151 kwargs.setdefault("model_kwargs", {}) 152 kwargs["model_kwargs"]["reasoning"] = { 153 "effort": reasoning_effort, 154 "summary": "auto", 155 } 156 157 else: # for 4o or 4o-mini model temperature 158 if 'temperature' not in kwargs: 159 kwargs['temperature'] = float(EnvConfigUtil.get_env_value("MODEL_DEFAULT_TEMP", 0.7)) 160 161 elif provider == ProviderEnum.GOOGLE: 162 163 kwargs["model"] = model 164 165 supports_thinking = "2.5" in model or "3." in model # Gemini ≥2.5 166 167 168 if thinking_budget is not None and supports_thinking: 169 170 kwargs["thinking_budget"] = thinking_budget 171 172 kwargs["include_thoughts"] = True 173 174 else: 175 176 # Explicitly disable any thought tracing 177 # remove both keys completely 178 179 kwargs.pop("include_thoughts", None) 180 181 kwargs.pop("thinking_budget", None) 182 # OLD unsafe 183 # elif provider == ProviderEnum.GOOGLE: 184 # kwargs["model"] = model # Gemini model name 185 186 # # 1) forward the budget (0 disables thoughts per Google docs) 187 # if thinking_budget is not None: 188 # kwargs["thinking_budget"] = thinking_budget # int 189 # kwargs["include_thoughts"] = True # turn trace on 190 # else: 191 # # expose thoughts unless the caller explicitly opted out 192 # kwargs.setdefault("include_thoughts", True) 193 194 elif provider == ProviderEnum.ANTHROPIC: 195 kwargs['model'] = model 196 # MiniMax models use Anthropic-compatible API with different key and URL 197 if model and model.lower().startswith("minimax"): 198 kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, "MINIMAX_API_KEY") 199 kwargs.setdefault("anthropic_api_url", "https://api.minimax.io/anthropic") 200 if extract_model_number(model) > 3.6 and thinking_budget is not None: # assuming only claude model 3.7 or higher allow thinking 201 kwargs['thinking'] = {"type": "enabled", "budget_tokens": thinking_budget} 202 kwargs['temperature'] = 0.7 203 else: 204 kwargs['model'] = model 205 206 # print(f"\nCreating {provider} model: {model}") 207 # print(f"With kwargs: {kwargs}\n") 208 # print(f">>> DEBUG: UnifiedChat: FINAL KWARGS: {kwargs}") 209 return ModelClass(**kwargs) 210 211 # @classmethod 212 # def invoke_uni_api( 213 # cls, 214 # model: str, 215 # uni_messages, 216 # uni_api_url: str = None, 217 # **kwargs 218 # ): 219 # """Invoke uni-api directly""" 220 # import requests 221 222 # # Use config URL if none provided 223 # if uni_api_url is None: 224 # uni_api_url = get_uni_api_url() 225 226 # headers = { 227 # "Content-Type": "application/json", 228 # "Authorization": "Bearer sk-heaven-uni-api-test-12345" 229 # } 230 231 # payload = { 232 # "model": model, 233 # "messages": uni_messages, 234 # **kwargs 235 # } 236 237 # response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120) 238 239 # # DEBUG: Print full request/response details on error 240 # if response.status_code != 200: 241 # print(f"🚨 400 ERROR DEBUG 🚨") 242 # print(f"Request URL: {uni_api_url}") 243 # print(f"Response status: {response.status_code}") 244 # print(f"Response body: {response.text}") 245 246 # # Print EXACT JSON of each message 247 # messages = payload.get("messages", []) 248 # print(f"\n📨 EXACT JSON CONVERSATION HISTORY ({len(messages)} messages):") 249 250 # for i, msg in enumerate(messages): 251 # print(f"Message {i}: {json.dumps(msg, indent=2)}") 252 # print("---") 253 254 # print(f"🚨 END DEBUG 🚨") 255 256 # if response.status_code != 200: 257 # print(f"🚨 uni-api ERROR {response.status_code}: {response.text}") 258 # response.raise_for_status() 259 # return response.json() 260 261 # ---------------------------- helper: cleanse -------------------------- # 262 @staticmethod 263 def _cleanse_messages_for_uni_api(messages: list[dict]) -> list[dict]: 264 """Enforce *one* tool_call per assistant message and fix null content.""" 265 for msg in messages: 266 if not isinstance(msg, dict): 267 continue 268 if msg.get("tool_calls"): 269 # keep only the first request 270 if len(msg["tool_calls"]) > 1: 271 msg["tool_calls"] = [msg["tool_calls"][0]] 272 # OpenAI requires content to be "" 273 if msg.get("content") is None: 274 msg["content"] = "" 275 return messages 276 277 # --------------------------- uni-api direct call ----------------------- # 278 @classmethod 279 def invoke_uni_api( 280 cls, 281 model: str, 282 uni_messages: list[dict], 283 uni_api_url: str | None = None, 284 **kwargs, 285 ): 286 """Low-level POST to uni-api with built-in single-tool guard.""" 287 if uni_api_url is None: 288 uni_api_url = get_uni_api_url() 289 290 # --- cleansing guard here ----------------------------------------- # 291 uni_messages = cls._cleanse_messages_for_uni_api(uni_messages) 292 293 headers = { 294 "Content-Type": "application/json", 295 "Authorization": "Bearer sk-heaven-uni-api-test-12345", 296 } 297 payload = {"model": model, "messages": uni_messages, **kwargs} 298 299 response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120) 300 301 if response.status_code != 200: 302 print(f"🚨 uni-api ERROR {response.status_code}") 303 print(response.text) 304 # dump conversation for fast debugging 305 for i, m in enumerate(payload["messages"]): 306 print(f"[{i}] {json.dumps(m, indent=2)}") 307 response.raise_for_status() 308 return response.json() 309 310 311 312def extract_model_number(model: str) -> float: 313 """ 314 Given a string of the form 'claude-3-7-something-20250219' 315 or 'claude-4-something-20250219', extract the numeric parts 316 and combine into a decimal (e.g., 3.7 or 4.0). 317 318 Rules: 319 - The first numeric part we encounter is the integer portion. 320 - The next numeric part (if present) is the decimal portion. 321 - If no second numeric part is found, use 0 as the decimal portion. 322 """ 323 parts = model.split("-") 324 325 integer_part = None 326 decimal_part = "0" # default if we don't find a second numeric part 327 328 for i, part in enumerate(parts): 329 # Check if the current part is strictly numeric 330 if part.isdigit(): 331 integer_part = part 332 # If next part exists and is numeric, treat that as the decimal part 333 if i + 1 < len(parts) and parts[i + 1].isdigit(): 334 decimal_part = parts[i + 1] 335 break # we found what we need, so stop 336 337 # If no numeric part was found at all (very edge case), return 0.0 or raise an error 338 if integer_part is None: 339 return 0.0 340 return float(f"{integer_part}.{decimal_part}") 341 342 343# Example usage: 344# if __name__ == "__main__": 345# # Create a chat model instance using UnifiedChat. 346# chat = UnifiedChat.create( 347# provider=ProviderEnum.GROQ, 348# model='deepseek-r1-distill-llama-70b', 349# temperature=0, 350# max_tokens=None, 351# timeout=None, 352# max_retries=2 353# ) 354# response = chat.invoke([ 355# ( 356# "system", 357# "You are a helpful assistant that translates English to French. Translate the user sentence.", 358# ), 359# ("human", "I love programming."), 360# ]) 361 362# print(response.content)
def
get_uni_api_url():
15def get_uni_api_url(): 16 """Load uni-api URL from config file with automatic container IP discovery""" 17 18 def discover_uni_api_ip(): 19 """Automatically discover uni-api container IP""" 20 try: 21 import subprocess 22 result = subprocess.run( 23 ['docker', 'inspect', 'uni-api', '--format', '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'], 24 capture_output=True, text=True, check=True 25 ) 26 container_ip = result.stdout.strip() 27 if container_ip: 28 return f'http://{container_ip}:8000/v1/chat/completions' 29 except Exception as e: 30 print(f"Warning: Could not discover uni-api container IP: {e}") 31 return None 32 33 try: 34 config_path = os.path.join(os.path.dirname(__file__), 'configs', 'uni_api_config.json') 35 with open(config_path, 'r') as f: 36 config = json.load(f) 37 configured_url = config.get('uni_api_url') 38 39 # If config exists, try it first, but fall back to auto-discovery if it fails 40 if configured_url: 41 return configured_url 42 43 except Exception as e: 44 print(f"Warning: Could not load uni-api config: {e}") 45 46 # Try automatic discovery 47 discovered_url = discover_uni_api_ip() 48 if discovered_url: 49 print(f"Auto-discovered uni-api URL: {discovered_url}") 50 return discovered_url 51 52 # Final fallback 53 print("Using fallback uni-api URL") 54 return 'http://host.docker.internal:8002/v1/chat/completions'
Load uni-api URL from config file with automatic container IP discovery
class
ProviderEnum(enum.Enum):
57class ProviderEnum(Enum): 58 ANTHROPIC = 'anthropic' 59 OPENAI = 'openai' 60 GOOGLE = 'google' 61 GROQ = 'groq' 62 DEEPSEEK = 'deepseek'
ANTHROPIC =
<ProviderEnum.ANTHROPIC: 'anthropic'>
OPENAI =
<ProviderEnum.OPENAI: 'openai'>
GOOGLE =
<ProviderEnum.GOOGLE: 'google'>
GROQ =
<ProviderEnum.GROQ: 'groq'>
DEEPSEEK =
<ProviderEnum.DEEPSEEK: 'deepseek'>
class
UnifiedChat:
73class UnifiedChat: 74 PROVIDERS = { 75 ProviderEnum.ANTHROPIC: ChatAnthropic, 76 ProviderEnum.OPENAI: ChatOpenAI, 77 ProviderEnum.GOOGLE: ChatGoogleGenerativeAI, 78 ProviderEnum.GROQ: ChatGroq, 79 ProviderEnum.DEEPSEEK: ChatDeepSeek 80 } 81 82 @classmethod 83 def create( 84 cls, 85 provider: ProviderEnum = ProviderEnum.ANTHROPIC, 86 model: str = None, 87 **kwargs 88 ) -> BaseChatModel: 89 """ 90 Create a unified chat model instance from any supported provider. 91 92 Args: 93 provider (ProviderEnum): The provider Enum (anthropic, openai, google, groq). 94 model (str, optional): Specific model name. 95 **kwargs: Additional configuration for the model. 96 97 Returns: 98 BaseChatModel: Instantiated chat model. 99 """ 100 if provider not in cls.PROVIDERS: 101 raise ValueError(f"Unsupported provider: {provider}. Supported providers: {list(cls.PROVIDERS.keys())}") 102 103 ModelClass = cls.PROVIDERS[provider] 104 105 # Load provider-specific settings ENV values, for now it is only the api key 106 kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_API_KEY") 107 # If a model name is specified, add it to kwargs. 108 109 # Use default model if none is specified 110 if not model: 111 model = DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_MODEL") 112 113 # this is an Anthropic specific value and should be removed and remapped in the kwargs 114 thinking_budget = kwargs.get('thinking_budget', None) 115 # kwargs.pop("thinking_budget", None) # dont pop them, langchain needs it 116 117 # Add OpenAI-specific settings 118 if provider == ProviderEnum.OPENAI: 119 # kwargs.update({ 120 # 'model': model, 121 # 'callbacks': None, 122 # 'verbose': True 123 # }) 124 kwargs.pop("thinking_budget", None) 125 kwargs.update({ 126 'model': model, 127 'callbacks': None, 128 'verbose': True, 129 'use_responses_api': True, 130 'model_kwargs': { 131 "reasoning": { 132 "effort": None, 133 "summary": "auto", 134 } 135 } 136 }) 137 # remove temperature from o series model params 138 if "o3" in model or "o1" in model or "o4" in model: 139 kwargs.pop("temperature", None) 140 reasoning_effort = None 141 # enable thinking if thinking should be enabled (budget is not none) 142 if thinking_budget is not None: 143 # TODO we might need to make a dedicated chatgpt ui for setting reasoning-effort 144 if thinking_budget > 1024 * 6: 145 reasoning_effort = "high" 146 elif thinking_budget > 1024 * 3: 147 reasoning_effort = "medium" 148 else: 149 reasoning_effort = "low" 150 151 if reasoning_effort: 152 kwargs.setdefault("model_kwargs", {}) 153 kwargs["model_kwargs"]["reasoning"] = { 154 "effort": reasoning_effort, 155 "summary": "auto", 156 } 157 158 else: # for 4o or 4o-mini model temperature 159 if 'temperature' not in kwargs: 160 kwargs['temperature'] = float(EnvConfigUtil.get_env_value("MODEL_DEFAULT_TEMP", 0.7)) 161 162 elif provider == ProviderEnum.GOOGLE: 163 164 kwargs["model"] = model 165 166 supports_thinking = "2.5" in model or "3." in model # Gemini ≥2.5 167 168 169 if thinking_budget is not None and supports_thinking: 170 171 kwargs["thinking_budget"] = thinking_budget 172 173 kwargs["include_thoughts"] = True 174 175 else: 176 177 # Explicitly disable any thought tracing 178 # remove both keys completely 179 180 kwargs.pop("include_thoughts", None) 181 182 kwargs.pop("thinking_budget", None) 183 # OLD unsafe 184 # elif provider == ProviderEnum.GOOGLE: 185 # kwargs["model"] = model # Gemini model name 186 187 # # 1) forward the budget (0 disables thoughts per Google docs) 188 # if thinking_budget is not None: 189 # kwargs["thinking_budget"] = thinking_budget # int 190 # kwargs["include_thoughts"] = True # turn trace on 191 # else: 192 # # expose thoughts unless the caller explicitly opted out 193 # kwargs.setdefault("include_thoughts", True) 194 195 elif provider == ProviderEnum.ANTHROPIC: 196 kwargs['model'] = model 197 # MiniMax models use Anthropic-compatible API with different key and URL 198 if model and model.lower().startswith("minimax"): 199 kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, "MINIMAX_API_KEY") 200 kwargs.setdefault("anthropic_api_url", "https://api.minimax.io/anthropic") 201 if extract_model_number(model) > 3.6 and thinking_budget is not None: # assuming only claude model 3.7 or higher allow thinking 202 kwargs['thinking'] = {"type": "enabled", "budget_tokens": thinking_budget} 203 kwargs['temperature'] = 0.7 204 else: 205 kwargs['model'] = model 206 207 # print(f"\nCreating {provider} model: {model}") 208 # print(f"With kwargs: {kwargs}\n") 209 # print(f">>> DEBUG: UnifiedChat: FINAL KWARGS: {kwargs}") 210 return ModelClass(**kwargs) 211 212 # @classmethod 213 # def invoke_uni_api( 214 # cls, 215 # model: str, 216 # uni_messages, 217 # uni_api_url: str = None, 218 # **kwargs 219 # ): 220 # """Invoke uni-api directly""" 221 # import requests 222 223 # # Use config URL if none provided 224 # if uni_api_url is None: 225 # uni_api_url = get_uni_api_url() 226 227 # headers = { 228 # "Content-Type": "application/json", 229 # "Authorization": "Bearer sk-heaven-uni-api-test-12345" 230 # } 231 232 # payload = { 233 # "model": model, 234 # "messages": uni_messages, 235 # **kwargs 236 # } 237 238 # response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120) 239 240 # # DEBUG: Print full request/response details on error 241 # if response.status_code != 200: 242 # print(f"🚨 400 ERROR DEBUG 🚨") 243 # print(f"Request URL: {uni_api_url}") 244 # print(f"Response status: {response.status_code}") 245 # print(f"Response body: {response.text}") 246 247 # # Print EXACT JSON of each message 248 # messages = payload.get("messages", []) 249 # print(f"\n📨 EXACT JSON CONVERSATION HISTORY ({len(messages)} messages):") 250 251 # for i, msg in enumerate(messages): 252 # print(f"Message {i}: {json.dumps(msg, indent=2)}") 253 # print("---") 254 255 # print(f"🚨 END DEBUG 🚨") 256 257 # if response.status_code != 200: 258 # print(f"🚨 uni-api ERROR {response.status_code}: {response.text}") 259 # response.raise_for_status() 260 # return response.json() 261 262 # ---------------------------- helper: cleanse -------------------------- # 263 @staticmethod 264 def _cleanse_messages_for_uni_api(messages: list[dict]) -> list[dict]: 265 """Enforce *one* tool_call per assistant message and fix null content.""" 266 for msg in messages: 267 if not isinstance(msg, dict): 268 continue 269 if msg.get("tool_calls"): 270 # keep only the first request 271 if len(msg["tool_calls"]) > 1: 272 msg["tool_calls"] = [msg["tool_calls"][0]] 273 # OpenAI requires content to be "" 274 if msg.get("content") is None: 275 msg["content"] = "" 276 return messages 277 278 # --------------------------- uni-api direct call ----------------------- # 279 @classmethod 280 def invoke_uni_api( 281 cls, 282 model: str, 283 uni_messages: list[dict], 284 uni_api_url: str | None = None, 285 **kwargs, 286 ): 287 """Low-level POST to uni-api with built-in single-tool guard.""" 288 if uni_api_url is None: 289 uni_api_url = get_uni_api_url() 290 291 # --- cleansing guard here ----------------------------------------- # 292 uni_messages = cls._cleanse_messages_for_uni_api(uni_messages) 293 294 headers = { 295 "Content-Type": "application/json", 296 "Authorization": "Bearer sk-heaven-uni-api-test-12345", 297 } 298 payload = {"model": model, "messages": uni_messages, **kwargs} 299 300 response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120) 301 302 if response.status_code != 200: 303 print(f"🚨 uni-api ERROR {response.status_code}") 304 print(response.text) 305 # dump conversation for fast debugging 306 for i, m in enumerate(payload["messages"]): 307 print(f"[{i}] {json.dumps(m, indent=2)}") 308 response.raise_for_status() 309 return response.json()
PROVIDERS =
{<ProviderEnum.ANTHROPIC: 'anthropic'>: <class 'langchain_anthropic.chat_models.ChatAnthropic'>, <ProviderEnum.OPENAI: 'openai'>: <class 'langchain_openai.chat_models.base.ChatOpenAI'>, <ProviderEnum.GOOGLE: 'google'>: <class 'langchain_google_genai.chat_models.ChatGoogleGenerativeAI'>, <ProviderEnum.GROQ: 'groq'>: <class 'langchain_groq.chat_models.ChatGroq'>, <ProviderEnum.DEEPSEEK: 'deepseek'>: <class 'langchain_deepseek.chat_models.ChatDeepSeek'>}
@classmethod
def
create( cls, provider: ProviderEnum = <ProviderEnum.ANTHROPIC: 'anthropic'>, model: str = None, **kwargs) -> langchain_core.language_models.chat_models.BaseChatModel:
82 @classmethod 83 def create( 84 cls, 85 provider: ProviderEnum = ProviderEnum.ANTHROPIC, 86 model: str = None, 87 **kwargs 88 ) -> BaseChatModel: 89 """ 90 Create a unified chat model instance from any supported provider. 91 92 Args: 93 provider (ProviderEnum): The provider Enum (anthropic, openai, google, groq). 94 model (str, optional): Specific model name. 95 **kwargs: Additional configuration for the model. 96 97 Returns: 98 BaseChatModel: Instantiated chat model. 99 """ 100 if provider not in cls.PROVIDERS: 101 raise ValueError(f"Unsupported provider: {provider}. Supported providers: {list(cls.PROVIDERS.keys())}") 102 103 ModelClass = cls.PROVIDERS[provider] 104 105 # Load provider-specific settings ENV values, for now it is only the api key 106 kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_API_KEY") 107 # If a model name is specified, add it to kwargs. 108 109 # Use default model if none is specified 110 if not model: 111 model = DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_MODEL") 112 113 # this is an Anthropic specific value and should be removed and remapped in the kwargs 114 thinking_budget = kwargs.get('thinking_budget', None) 115 # kwargs.pop("thinking_budget", None) # dont pop them, langchain needs it 116 117 # Add OpenAI-specific settings 118 if provider == ProviderEnum.OPENAI: 119 # kwargs.update({ 120 # 'model': model, 121 # 'callbacks': None, 122 # 'verbose': True 123 # }) 124 kwargs.pop("thinking_budget", None) 125 kwargs.update({ 126 'model': model, 127 'callbacks': None, 128 'verbose': True, 129 'use_responses_api': True, 130 'model_kwargs': { 131 "reasoning": { 132 "effort": None, 133 "summary": "auto", 134 } 135 } 136 }) 137 # remove temperature from o series model params 138 if "o3" in model or "o1" in model or "o4" in model: 139 kwargs.pop("temperature", None) 140 reasoning_effort = None 141 # enable thinking if thinking should be enabled (budget is not none) 142 if thinking_budget is not None: 143 # TODO we might need to make a dedicated chatgpt ui for setting reasoning-effort 144 if thinking_budget > 1024 * 6: 145 reasoning_effort = "high" 146 elif thinking_budget > 1024 * 3: 147 reasoning_effort = "medium" 148 else: 149 reasoning_effort = "low" 150 151 if reasoning_effort: 152 kwargs.setdefault("model_kwargs", {}) 153 kwargs["model_kwargs"]["reasoning"] = { 154 "effort": reasoning_effort, 155 "summary": "auto", 156 } 157 158 else: # for 4o or 4o-mini model temperature 159 if 'temperature' not in kwargs: 160 kwargs['temperature'] = float(EnvConfigUtil.get_env_value("MODEL_DEFAULT_TEMP", 0.7)) 161 162 elif provider == ProviderEnum.GOOGLE: 163 164 kwargs["model"] = model 165 166 supports_thinking = "2.5" in model or "3." in model # Gemini ≥2.5 167 168 169 if thinking_budget is not None and supports_thinking: 170 171 kwargs["thinking_budget"] = thinking_budget 172 173 kwargs["include_thoughts"] = True 174 175 else: 176 177 # Explicitly disable any thought tracing 178 # remove both keys completely 179 180 kwargs.pop("include_thoughts", None) 181 182 kwargs.pop("thinking_budget", None) 183 # OLD unsafe 184 # elif provider == ProviderEnum.GOOGLE: 185 # kwargs["model"] = model # Gemini model name 186 187 # # 1) forward the budget (0 disables thoughts per Google docs) 188 # if thinking_budget is not None: 189 # kwargs["thinking_budget"] = thinking_budget # int 190 # kwargs["include_thoughts"] = True # turn trace on 191 # else: 192 # # expose thoughts unless the caller explicitly opted out 193 # kwargs.setdefault("include_thoughts", True) 194 195 elif provider == ProviderEnum.ANTHROPIC: 196 kwargs['model'] = model 197 # MiniMax models use Anthropic-compatible API with different key and URL 198 if model and model.lower().startswith("minimax"): 199 kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, "MINIMAX_API_KEY") 200 kwargs.setdefault("anthropic_api_url", "https://api.minimax.io/anthropic") 201 if extract_model_number(model) > 3.6 and thinking_budget is not None: # assuming only claude model 3.7 or higher allow thinking 202 kwargs['thinking'] = {"type": "enabled", "budget_tokens": thinking_budget} 203 kwargs['temperature'] = 0.7 204 else: 205 kwargs['model'] = model 206 207 # print(f"\nCreating {provider} model: {model}") 208 # print(f"With kwargs: {kwargs}\n") 209 # print(f">>> DEBUG: UnifiedChat: FINAL KWARGS: {kwargs}") 210 return ModelClass(**kwargs)
Create a unified chat model instance from any supported provider.
Args: provider (ProviderEnum): The provider Enum (anthropic, openai, google, groq). model (str, optional): Specific model name. **kwargs: Additional configuration for the model.
Returns: BaseChatModel: Instantiated chat model.
@classmethod
def
invoke_uni_api( cls, model: str, uni_messages: list[dict], uni_api_url: str | None = None, **kwargs):
279 @classmethod 280 def invoke_uni_api( 281 cls, 282 model: str, 283 uni_messages: list[dict], 284 uni_api_url: str | None = None, 285 **kwargs, 286 ): 287 """Low-level POST to uni-api with built-in single-tool guard.""" 288 if uni_api_url is None: 289 uni_api_url = get_uni_api_url() 290 291 # --- cleansing guard here ----------------------------------------- # 292 uni_messages = cls._cleanse_messages_for_uni_api(uni_messages) 293 294 headers = { 295 "Content-Type": "application/json", 296 "Authorization": "Bearer sk-heaven-uni-api-test-12345", 297 } 298 payload = {"model": model, "messages": uni_messages, **kwargs} 299 300 response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120) 301 302 if response.status_code != 200: 303 print(f"🚨 uni-api ERROR {response.status_code}") 304 print(response.text) 305 # dump conversation for fast debugging 306 for i, m in enumerate(payload["messages"]): 307 print(f"[{i}] {json.dumps(m, indent=2)}") 308 response.raise_for_status() 309 return response.json()
Low-level POST to uni-api with built-in single-tool guard.
def
extract_model_number(model: str) -> float:
313def extract_model_number(model: str) -> float: 314 """ 315 Given a string of the form 'claude-3-7-something-20250219' 316 or 'claude-4-something-20250219', extract the numeric parts 317 and combine into a decimal (e.g., 3.7 or 4.0). 318 319 Rules: 320 - The first numeric part we encounter is the integer portion. 321 - The next numeric part (if present) is the decimal portion. 322 - If no second numeric part is found, use 0 as the decimal portion. 323 """ 324 parts = model.split("-") 325 326 integer_part = None 327 decimal_part = "0" # default if we don't find a second numeric part 328 329 for i, part in enumerate(parts): 330 # Check if the current part is strictly numeric 331 if part.isdigit(): 332 integer_part = part 333 # If next part exists and is numeric, treat that as the decimal part 334 if i + 1 < len(parts) and parts[i + 1].isdigit(): 335 decimal_part = parts[i + 1] 336 break # we found what we need, so stop 337 338 # If no numeric part was found at all (very edge case), return 0.0 or raise an error 339 if integer_part is None: 340 return 0.0 341 return float(f"{integer_part}.{decimal_part}")
Given a string of the form 'claude-3-7-something-20250219' or 'claude-4-something-20250219', extract the numeric parts and combine into a decimal (e.g., 3.7 or 4.0).
Rules:
- The first numeric part we encounter is the integer portion.
- The next numeric part (if present) is the decimal portion.
- If no second numeric part is found, use 0 as the decimal portion.