heaven_base.unified_chat

  1import json
  2import os
  3from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, BaseMessage
  4from langchain_core.language_models.chat_models import BaseChatModel
  5from langchain_anthropic import ChatAnthropic
  6from langchain_openai import ChatOpenAI
  7from langchain_google_genai import ChatGoogleGenerativeAI
  8from langchain_groq import ChatGroq
  9from langchain_deepseek import ChatDeepSeek
 10from enum import Enum
 11from .utils.get_env_value import EnvConfigUtil, DynamicString
 12import requests
 13
 14def get_uni_api_url():
 15    """Load uni-api URL from config file with automatic container IP discovery"""
 16    
 17    def discover_uni_api_ip():
 18        """Automatically discover uni-api container IP"""
 19        try:
 20            import subprocess
 21            result = subprocess.run(
 22                ['docker', 'inspect', 'uni-api', '--format', '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'],
 23                capture_output=True, text=True, check=True
 24            )
 25            container_ip = result.stdout.strip()
 26            if container_ip:
 27                return f'http://{container_ip}:8000/v1/chat/completions'
 28        except Exception as e:
 29            print(f"Warning: Could not discover uni-api container IP: {e}")
 30        return None
 31    
 32    try:
 33        config_path = os.path.join(os.path.dirname(__file__), 'configs', 'uni_api_config.json')
 34        with open(config_path, 'r') as f:
 35            config = json.load(f)
 36        configured_url = config.get('uni_api_url')
 37        
 38        # If config exists, try it first, but fall back to auto-discovery if it fails
 39        if configured_url:
 40            return configured_url
 41            
 42    except Exception as e:
 43        print(f"Warning: Could not load uni-api config: {e}")
 44    
 45    # Try automatic discovery
 46    discovered_url = discover_uni_api_ip()
 47    if discovered_url:
 48        print(f"Auto-discovered uni-api URL: {discovered_url}")
 49        return discovered_url
 50    
 51    # Final fallback
 52    print("Using fallback uni-api URL")
 53    return 'http://host.docker.internal:8002/v1/chat/completions'
 54
 55# Define the Provider Enum 
 56class ProviderEnum(Enum):
 57    ANTHROPIC = 'anthropic'
 58    OPENAI = 'openai'
 59    GOOGLE = 'google'
 60    GROQ = 'groq'
 61    DEEPSEEK = 'deepseek'
 62
 63# Default model mapping
 64# DEFAULT_MODELS = {
 65#     ProviderEnum.ANTHROPIC: "claude-3-5-sonnet-20241022",
 66#    ProviderEnum.OPENAI: "gpt-4o",
 67#    ProviderEnum.GOOGLE: "gemini-2.0-pro-exp-02-05",
 68#    ProviderEnum.GROQ: "deepseek-r1-distill-llama-70b-specdec",
 69#    ProviderEnum.DEEPSEEK: "deepseek-reasoner"
 70# }
 71
 72class UnifiedChat:
 73    PROVIDERS = {
 74        ProviderEnum.ANTHROPIC: ChatAnthropic,
 75        ProviderEnum.OPENAI: ChatOpenAI,
 76        ProviderEnum.GOOGLE: ChatGoogleGenerativeAI,
 77        ProviderEnum.GROQ: ChatGroq,
 78        ProviderEnum.DEEPSEEK: ChatDeepSeek
 79    }
 80
 81    @classmethod
 82    def create(
 83        cls, 
 84        provider: ProviderEnum = ProviderEnum.ANTHROPIC, 
 85        model: str = None,
 86        **kwargs
 87    ) -> BaseChatModel:
 88        """
 89        Create a unified chat model instance from any supported provider.
 90        
 91        Args:
 92            provider (ProviderEnum): The provider Enum (anthropic, openai, google, groq).
 93            model (str, optional): Specific model name.
 94            **kwargs: Additional configuration for the model.
 95        
 96        Returns:
 97            BaseChatModel: Instantiated chat model.
 98        """
 99        if provider not in cls.PROVIDERS:
100            raise ValueError(f"Unsupported provider: {provider}. Supported providers: {list(cls.PROVIDERS.keys())}")
101        
102        ModelClass = cls.PROVIDERS[provider]
103        
104        # Load provider-specific settings ENV values, for now it is only the api key
105        kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_API_KEY")
106        # If a model name is specified, add it to kwargs.
107                
108        # Use default model if none is specified
109        if not model:
110            model =  DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_MODEL")
111        
112        # this is an Anthropic specific value and should be removed and remapped in the kwargs
113        thinking_budget = kwargs.get('thinking_budget', None)
114        # kwargs.pop("thinking_budget", None) # dont pop them, langchain needs it
115
116        # Add OpenAI-specific settings
117        if provider == ProviderEnum.OPENAI:
118            # kwargs.update({
119            #     'model': model,
120            #     'callbacks': None,
121            #     'verbose': True
122            # })
123            kwargs.pop("thinking_budget", None) 
124            kwargs.update({
125                'model': model,
126                'callbacks': None,
127                'verbose': True,
128                'use_responses_api': True,
129                'model_kwargs': {
130                    "reasoning": {
131                        "effort": None,
132                        "summary": "auto",
133                    }
134                }
135            })
136            # remove temperature from o series model params
137            if "o3" in model or "o1" in model or "o4" in model:
138                kwargs.pop("temperature", None)
139                reasoning_effort = None
140                # enable thinking if thinking should be enabled (budget is not none)
141                if thinking_budget is not None:
142                    # TODO we might need to make a dedicated chatgpt ui for setting reasoning-effort
143                    if thinking_budget > 1024 * 6:
144                        reasoning_effort = "high"
145                    elif thinking_budget > 1024 * 3:
146                        reasoning_effort = "medium"
147                    else:
148                        reasoning_effort = "low"
149                
150                if reasoning_effort:
151                    kwargs.setdefault("model_kwargs", {})
152                    kwargs["model_kwargs"]["reasoning"] = {
153                        "effort": reasoning_effort,
154                        "summary": "auto",
155                    }
156                    
157            else: # for 4o or 4o-mini model temperature
158                if 'temperature' not in kwargs:
159                    kwargs['temperature'] = float(EnvConfigUtil.get_env_value("MODEL_DEFAULT_TEMP", 0.7))
160        
161        elif provider == ProviderEnum.GOOGLE:
162
163            kwargs["model"] = model
164
165            supports_thinking = "2.5" in model or "3." in model  # Gemini ≥2.5
166
167
168            if thinking_budget is not None and supports_thinking:
169
170                kwargs["thinking_budget"] = thinking_budget
171
172                kwargs["include_thoughts"] = True
173
174            else:
175
176                # Explicitly disable any thought tracing
177                # remove both keys completely
178
179                kwargs.pop("include_thoughts", None)
180
181                kwargs.pop("thinking_budget", None)
182        # OLD unsafe
183        # elif provider == ProviderEnum.GOOGLE:
184        #     kwargs["model"] = model           # Gemini model name
185
186        #     # 1) forward the budget (0 disables thoughts per Google docs)
187        #     if thinking_budget is not None:
188        #         kwargs["thinking_budget"] = thinking_budget     # int
189        #         kwargs["include_thoughts"] = True               # turn trace on
190        #     else:
191        #         # expose thoughts unless the caller explicitly opted out
192        #         kwargs.setdefault("include_thoughts", True)
193
194        elif provider == ProviderEnum.ANTHROPIC:
195            kwargs['model'] = model
196            # MiniMax models use Anthropic-compatible API with different key and URL
197            if model and model.lower().startswith("minimax"):
198                kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, "MINIMAX_API_KEY")
199                kwargs.setdefault("anthropic_api_url", "https://api.minimax.io/anthropic")
200            if extract_model_number(model) > 3.6 and thinking_budget is not None: # assuming only claude model 3.7 or higher allow thinking
201                kwargs['thinking'] = {"type": "enabled", "budget_tokens": thinking_budget}
202                kwargs['temperature'] = 0.7
203        else:
204            kwargs['model'] = model
205        
206        # print(f"\nCreating {provider} model: {model}")
207        # print(f"With kwargs: {kwargs}\n")
208        # print(f">>> DEBUG: UnifiedChat: FINAL KWARGS: {kwargs}")
209        return ModelClass(**kwargs)
210
211    # @classmethod
212    # def invoke_uni_api(
213    #     cls,
214    #     model: str,
215    #     uni_messages,
216    #     uni_api_url: str = None,
217    #     **kwargs
218    # ):
219    #     """Invoke uni-api directly"""
220    #     import requests
221        
222    #     # Use config URL if none provided
223    #     if uni_api_url is None:
224    #         uni_api_url = get_uni_api_url()
225        
226    #     headers = {
227    #         "Content-Type": "application/json",
228    #         "Authorization": "Bearer sk-heaven-uni-api-test-12345"
229    #     }
230        
231    #     payload = {
232    #         "model": model,
233    #         "messages": uni_messages,
234    #         **kwargs
235    #     }
236        
237    #     response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120)
238        
239    #     # DEBUG: Print full request/response details on error
240    #     if response.status_code != 200:
241    #         print(f"🚨 400 ERROR DEBUG 🚨")
242    #         print(f"Request URL: {uni_api_url}")
243    #         print(f"Response status: {response.status_code}")
244    #         print(f"Response body: {response.text}")
245            
246    #         # Print EXACT JSON of each message
247    #         messages = payload.get("messages", [])
248    #         print(f"\n📨 EXACT JSON CONVERSATION HISTORY ({len(messages)} messages):")
249            
250    #         for i, msg in enumerate(messages):
251    #             print(f"Message {i}: {json.dumps(msg, indent=2)}")
252    #             print("---")
253            
254    #         print(f"🚨 END DEBUG 🚨")
255        
256    #     if response.status_code != 200:
257    #         print(f"🚨 uni-api ERROR {response.status_code}: {response.text}")
258    #     response.raise_for_status()
259    #     return response.json()
260
261    # ---------------------------- helper: cleanse -------------------------- #
262    @staticmethod
263    def _cleanse_messages_for_uni_api(messages: list[dict]) -> list[dict]:
264        """Enforce *one* tool_call per assistant message and fix null content."""
265        for msg in messages:
266            if not isinstance(msg, dict):
267                continue
268            if msg.get("tool_calls"):
269                # keep only the first request
270                if len(msg["tool_calls"]) > 1:
271                    msg["tool_calls"] = [msg["tool_calls"][0]]
272                # OpenAI requires content to be ""
273                if msg.get("content") is None:
274                    msg["content"] = ""
275        return messages
276
277    # --------------------------- uni-api direct call ----------------------- #
278    @classmethod
279    def invoke_uni_api(
280        cls,
281        model: str,
282        uni_messages: list[dict],
283        uni_api_url: str | None = None,
284        **kwargs,
285    ):
286        """Low-level POST to uni-api with built-in single-tool guard."""
287        if uni_api_url is None:
288            uni_api_url = get_uni_api_url()
289
290        # --- cleansing guard here ----------------------------------------- #
291        uni_messages = cls._cleanse_messages_for_uni_api(uni_messages)
292
293        headers = {
294            "Content-Type": "application/json",
295            "Authorization": "Bearer sk-heaven-uni-api-test-12345",
296        }
297        payload = {"model": model, "messages": uni_messages, **kwargs}
298
299        response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120)
300
301        if response.status_code != 200:
302            print(f"🚨 uni-api ERROR {response.status_code}")
303            print(response.text)
304            # dump conversation for fast debugging
305            for i, m in enumerate(payload["messages"]):
306                print(f"[{i}] {json.dumps(m, indent=2)}")
307        response.raise_for_status()
308        return response.json()
309
310
311
312def extract_model_number(model: str) -> float:
313    """
314    Given a string of the form 'claude-3-7-something-20250219' 
315    or 'claude-4-something-20250219', extract the numeric parts 
316    and combine into a decimal (e.g., 3.7 or 4.0).
317    
318    Rules:
319      - The first numeric part we encounter is the integer portion.
320      - The next numeric part (if present) is the decimal portion.
321      - If no second numeric part is found, use 0 as the decimal portion.
322    """
323    parts = model.split("-")
324
325    integer_part = None
326    decimal_part = "0"  # default if we don't find a second numeric part
327    
328    for i, part in enumerate(parts):
329        # Check if the current part is strictly numeric
330        if part.isdigit():
331            integer_part = part
332            # If next part exists and is numeric, treat that as the decimal part
333            if i + 1 < len(parts) and parts[i + 1].isdigit():
334                decimal_part = parts[i + 1]
335            break  # we found what we need, so stop
336    
337    # If no numeric part was found at all (very edge case), return 0.0 or raise an error
338    if integer_part is None:
339        return 0.0
340    return float(f"{integer_part}.{decimal_part}")
341
342
343# Example usage:
344# if __name__ == "__main__":
345#     # Create a chat model instance using UnifiedChat.
346#     chat = UnifiedChat.create(
347#         provider=ProviderEnum.GROQ, 
348#         model='deepseek-r1-distill-llama-70b',
349#         temperature=0,
350#         max_tokens=None,
351#         timeout=None,
352#         max_retries=2
353#     )
354#     response = chat.invoke([
355#         (
356#             "system",
357#             "You are a helpful assistant that translates English to French. Translate the user sentence.",
358#         ),
359#         ("human", "I love programming."),
360#     ])
361
362#     print(response.content)
def get_uni_api_url():
15def get_uni_api_url():
16    """Load uni-api URL from config file with automatic container IP discovery"""
17    
18    def discover_uni_api_ip():
19        """Automatically discover uni-api container IP"""
20        try:
21            import subprocess
22            result = subprocess.run(
23                ['docker', 'inspect', 'uni-api', '--format', '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'],
24                capture_output=True, text=True, check=True
25            )
26            container_ip = result.stdout.strip()
27            if container_ip:
28                return f'http://{container_ip}:8000/v1/chat/completions'
29        except Exception as e:
30            print(f"Warning: Could not discover uni-api container IP: {e}")
31        return None
32    
33    try:
34        config_path = os.path.join(os.path.dirname(__file__), 'configs', 'uni_api_config.json')
35        with open(config_path, 'r') as f:
36            config = json.load(f)
37        configured_url = config.get('uni_api_url')
38        
39        # If config exists, try it first, but fall back to auto-discovery if it fails
40        if configured_url:
41            return configured_url
42            
43    except Exception as e:
44        print(f"Warning: Could not load uni-api config: {e}")
45    
46    # Try automatic discovery
47    discovered_url = discover_uni_api_ip()
48    if discovered_url:
49        print(f"Auto-discovered uni-api URL: {discovered_url}")
50        return discovered_url
51    
52    # Final fallback
53    print("Using fallback uni-api URL")
54    return 'http://host.docker.internal:8002/v1/chat/completions'

Load uni-api URL from config file with automatic container IP discovery

class ProviderEnum(enum.Enum):
57class ProviderEnum(Enum):
58    ANTHROPIC = 'anthropic'
59    OPENAI = 'openai'
60    GOOGLE = 'google'
61    GROQ = 'groq'
62    DEEPSEEK = 'deepseek'
ANTHROPIC = <ProviderEnum.ANTHROPIC: 'anthropic'>
OPENAI = <ProviderEnum.OPENAI: 'openai'>
GOOGLE = <ProviderEnum.GOOGLE: 'google'>
GROQ = <ProviderEnum.GROQ: 'groq'>
DEEPSEEK = <ProviderEnum.DEEPSEEK: 'deepseek'>
class UnifiedChat:
 73class UnifiedChat:
 74    PROVIDERS = {
 75        ProviderEnum.ANTHROPIC: ChatAnthropic,
 76        ProviderEnum.OPENAI: ChatOpenAI,
 77        ProviderEnum.GOOGLE: ChatGoogleGenerativeAI,
 78        ProviderEnum.GROQ: ChatGroq,
 79        ProviderEnum.DEEPSEEK: ChatDeepSeek
 80    }
 81
 82    @classmethod
 83    def create(
 84        cls, 
 85        provider: ProviderEnum = ProviderEnum.ANTHROPIC, 
 86        model: str = None,
 87        **kwargs
 88    ) -> BaseChatModel:
 89        """
 90        Create a unified chat model instance from any supported provider.
 91        
 92        Args:
 93            provider (ProviderEnum): The provider Enum (anthropic, openai, google, groq).
 94            model (str, optional): Specific model name.
 95            **kwargs: Additional configuration for the model.
 96        
 97        Returns:
 98            BaseChatModel: Instantiated chat model.
 99        """
100        if provider not in cls.PROVIDERS:
101            raise ValueError(f"Unsupported provider: {provider}. Supported providers: {list(cls.PROVIDERS.keys())}")
102        
103        ModelClass = cls.PROVIDERS[provider]
104        
105        # Load provider-specific settings ENV values, for now it is only the api key
106        kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_API_KEY")
107        # If a model name is specified, add it to kwargs.
108                
109        # Use default model if none is specified
110        if not model:
111            model =  DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_MODEL")
112        
113        # this is an Anthropic specific value and should be removed and remapped in the kwargs
114        thinking_budget = kwargs.get('thinking_budget', None)
115        # kwargs.pop("thinking_budget", None) # dont pop them, langchain needs it
116
117        # Add OpenAI-specific settings
118        if provider == ProviderEnum.OPENAI:
119            # kwargs.update({
120            #     'model': model,
121            #     'callbacks': None,
122            #     'verbose': True
123            # })
124            kwargs.pop("thinking_budget", None) 
125            kwargs.update({
126                'model': model,
127                'callbacks': None,
128                'verbose': True,
129                'use_responses_api': True,
130                'model_kwargs': {
131                    "reasoning": {
132                        "effort": None,
133                        "summary": "auto",
134                    }
135                }
136            })
137            # remove temperature from o series model params
138            if "o3" in model or "o1" in model or "o4" in model:
139                kwargs.pop("temperature", None)
140                reasoning_effort = None
141                # enable thinking if thinking should be enabled (budget is not none)
142                if thinking_budget is not None:
143                    # TODO we might need to make a dedicated chatgpt ui for setting reasoning-effort
144                    if thinking_budget > 1024 * 6:
145                        reasoning_effort = "high"
146                    elif thinking_budget > 1024 * 3:
147                        reasoning_effort = "medium"
148                    else:
149                        reasoning_effort = "low"
150                
151                if reasoning_effort:
152                    kwargs.setdefault("model_kwargs", {})
153                    kwargs["model_kwargs"]["reasoning"] = {
154                        "effort": reasoning_effort,
155                        "summary": "auto",
156                    }
157                    
158            else: # for 4o or 4o-mini model temperature
159                if 'temperature' not in kwargs:
160                    kwargs['temperature'] = float(EnvConfigUtil.get_env_value("MODEL_DEFAULT_TEMP", 0.7))
161        
162        elif provider == ProviderEnum.GOOGLE:
163
164            kwargs["model"] = model
165
166            supports_thinking = "2.5" in model or "3." in model  # Gemini ≥2.5
167
168
169            if thinking_budget is not None and supports_thinking:
170
171                kwargs["thinking_budget"] = thinking_budget
172
173                kwargs["include_thoughts"] = True
174
175            else:
176
177                # Explicitly disable any thought tracing
178                # remove both keys completely
179
180                kwargs.pop("include_thoughts", None)
181
182                kwargs.pop("thinking_budget", None)
183        # OLD unsafe
184        # elif provider == ProviderEnum.GOOGLE:
185        #     kwargs["model"] = model           # Gemini model name
186
187        #     # 1) forward the budget (0 disables thoughts per Google docs)
188        #     if thinking_budget is not None:
189        #         kwargs["thinking_budget"] = thinking_budget     # int
190        #         kwargs["include_thoughts"] = True               # turn trace on
191        #     else:
192        #         # expose thoughts unless the caller explicitly opted out
193        #         kwargs.setdefault("include_thoughts", True)
194
195        elif provider == ProviderEnum.ANTHROPIC:
196            kwargs['model'] = model
197            # MiniMax models use Anthropic-compatible API with different key and URL
198            if model and model.lower().startswith("minimax"):
199                kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, "MINIMAX_API_KEY")
200                kwargs.setdefault("anthropic_api_url", "https://api.minimax.io/anthropic")
201            if extract_model_number(model) > 3.6 and thinking_budget is not None: # assuming only claude model 3.7 or higher allow thinking
202                kwargs['thinking'] = {"type": "enabled", "budget_tokens": thinking_budget}
203                kwargs['temperature'] = 0.7
204        else:
205            kwargs['model'] = model
206        
207        # print(f"\nCreating {provider} model: {model}")
208        # print(f"With kwargs: {kwargs}\n")
209        # print(f">>> DEBUG: UnifiedChat: FINAL KWARGS: {kwargs}")
210        return ModelClass(**kwargs)
211
212    # @classmethod
213    # def invoke_uni_api(
214    #     cls,
215    #     model: str,
216    #     uni_messages,
217    #     uni_api_url: str = None,
218    #     **kwargs
219    # ):
220    #     """Invoke uni-api directly"""
221    #     import requests
222        
223    #     # Use config URL if none provided
224    #     if uni_api_url is None:
225    #         uni_api_url = get_uni_api_url()
226        
227    #     headers = {
228    #         "Content-Type": "application/json",
229    #         "Authorization": "Bearer sk-heaven-uni-api-test-12345"
230    #     }
231        
232    #     payload = {
233    #         "model": model,
234    #         "messages": uni_messages,
235    #         **kwargs
236    #     }
237        
238    #     response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120)
239        
240    #     # DEBUG: Print full request/response details on error
241    #     if response.status_code != 200:
242    #         print(f"🚨 400 ERROR DEBUG 🚨")
243    #         print(f"Request URL: {uni_api_url}")
244    #         print(f"Response status: {response.status_code}")
245    #         print(f"Response body: {response.text}")
246            
247    #         # Print EXACT JSON of each message
248    #         messages = payload.get("messages", [])
249    #         print(f"\n📨 EXACT JSON CONVERSATION HISTORY ({len(messages)} messages):")
250            
251    #         for i, msg in enumerate(messages):
252    #             print(f"Message {i}: {json.dumps(msg, indent=2)}")
253    #             print("---")
254            
255    #         print(f"🚨 END DEBUG 🚨")
256        
257    #     if response.status_code != 200:
258    #         print(f"🚨 uni-api ERROR {response.status_code}: {response.text}")
259    #     response.raise_for_status()
260    #     return response.json()
261
262    # ---------------------------- helper: cleanse -------------------------- #
263    @staticmethod
264    def _cleanse_messages_for_uni_api(messages: list[dict]) -> list[dict]:
265        """Enforce *one* tool_call per assistant message and fix null content."""
266        for msg in messages:
267            if not isinstance(msg, dict):
268                continue
269            if msg.get("tool_calls"):
270                # keep only the first request
271                if len(msg["tool_calls"]) > 1:
272                    msg["tool_calls"] = [msg["tool_calls"][0]]
273                # OpenAI requires content to be ""
274                if msg.get("content") is None:
275                    msg["content"] = ""
276        return messages
277
278    # --------------------------- uni-api direct call ----------------------- #
279    @classmethod
280    def invoke_uni_api(
281        cls,
282        model: str,
283        uni_messages: list[dict],
284        uni_api_url: str | None = None,
285        **kwargs,
286    ):
287        """Low-level POST to uni-api with built-in single-tool guard."""
288        if uni_api_url is None:
289            uni_api_url = get_uni_api_url()
290
291        # --- cleansing guard here ----------------------------------------- #
292        uni_messages = cls._cleanse_messages_for_uni_api(uni_messages)
293
294        headers = {
295            "Content-Type": "application/json",
296            "Authorization": "Bearer sk-heaven-uni-api-test-12345",
297        }
298        payload = {"model": model, "messages": uni_messages, **kwargs}
299
300        response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120)
301
302        if response.status_code != 200:
303            print(f"🚨 uni-api ERROR {response.status_code}")
304            print(response.text)
305            # dump conversation for fast debugging
306            for i, m in enumerate(payload["messages"]):
307                print(f"[{i}] {json.dumps(m, indent=2)}")
308        response.raise_for_status()
309        return response.json()
PROVIDERS = {<ProviderEnum.ANTHROPIC: 'anthropic'>: <class 'langchain_anthropic.chat_models.ChatAnthropic'>, <ProviderEnum.OPENAI: 'openai'>: <class 'langchain_openai.chat_models.base.ChatOpenAI'>, <ProviderEnum.GOOGLE: 'google'>: <class 'langchain_google_genai.chat_models.ChatGoogleGenerativeAI'>, <ProviderEnum.GROQ: 'groq'>: <class 'langchain_groq.chat_models.ChatGroq'>, <ProviderEnum.DEEPSEEK: 'deepseek'>: <class 'langchain_deepseek.chat_models.ChatDeepSeek'>}
@classmethod
def create( cls, provider: ProviderEnum = <ProviderEnum.ANTHROPIC: 'anthropic'>, model: str = None, **kwargs) -> langchain_core.language_models.chat_models.BaseChatModel:
 82    @classmethod
 83    def create(
 84        cls, 
 85        provider: ProviderEnum = ProviderEnum.ANTHROPIC, 
 86        model: str = None,
 87        **kwargs
 88    ) -> BaseChatModel:
 89        """
 90        Create a unified chat model instance from any supported provider.
 91        
 92        Args:
 93            provider (ProviderEnum): The provider Enum (anthropic, openai, google, groq).
 94            model (str, optional): Specific model name.
 95            **kwargs: Additional configuration for the model.
 96        
 97        Returns:
 98            BaseChatModel: Instantiated chat model.
 99        """
100        if provider not in cls.PROVIDERS:
101            raise ValueError(f"Unsupported provider: {provider}. Supported providers: {list(cls.PROVIDERS.keys())}")
102        
103        ModelClass = cls.PROVIDERS[provider]
104        
105        # Load provider-specific settings ENV values, for now it is only the api key
106        kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_API_KEY")
107        # If a model name is specified, add it to kwargs.
108                
109        # Use default model if none is specified
110        if not model:
111            model =  DynamicString(EnvConfigUtil.get_env_value, f"{provider.name}_MODEL")
112        
113        # this is an Anthropic specific value and should be removed and remapped in the kwargs
114        thinking_budget = kwargs.get('thinking_budget', None)
115        # kwargs.pop("thinking_budget", None) # dont pop them, langchain needs it
116
117        # Add OpenAI-specific settings
118        if provider == ProviderEnum.OPENAI:
119            # kwargs.update({
120            #     'model': model,
121            #     'callbacks': None,
122            #     'verbose': True
123            # })
124            kwargs.pop("thinking_budget", None) 
125            kwargs.update({
126                'model': model,
127                'callbacks': None,
128                'verbose': True,
129                'use_responses_api': True,
130                'model_kwargs': {
131                    "reasoning": {
132                        "effort": None,
133                        "summary": "auto",
134                    }
135                }
136            })
137            # remove temperature from o series model params
138            if "o3" in model or "o1" in model or "o4" in model:
139                kwargs.pop("temperature", None)
140                reasoning_effort = None
141                # enable thinking if thinking should be enabled (budget is not none)
142                if thinking_budget is not None:
143                    # TODO we might need to make a dedicated chatgpt ui for setting reasoning-effort
144                    if thinking_budget > 1024 * 6:
145                        reasoning_effort = "high"
146                    elif thinking_budget > 1024 * 3:
147                        reasoning_effort = "medium"
148                    else:
149                        reasoning_effort = "low"
150                
151                if reasoning_effort:
152                    kwargs.setdefault("model_kwargs", {})
153                    kwargs["model_kwargs"]["reasoning"] = {
154                        "effort": reasoning_effort,
155                        "summary": "auto",
156                    }
157                    
158            else: # for 4o or 4o-mini model temperature
159                if 'temperature' not in kwargs:
160                    kwargs['temperature'] = float(EnvConfigUtil.get_env_value("MODEL_DEFAULT_TEMP", 0.7))
161        
162        elif provider == ProviderEnum.GOOGLE:
163
164            kwargs["model"] = model
165
166            supports_thinking = "2.5" in model or "3." in model  # Gemini ≥2.5
167
168
169            if thinking_budget is not None and supports_thinking:
170
171                kwargs["thinking_budget"] = thinking_budget
172
173                kwargs["include_thoughts"] = True
174
175            else:
176
177                # Explicitly disable any thought tracing
178                # remove both keys completely
179
180                kwargs.pop("include_thoughts", None)
181
182                kwargs.pop("thinking_budget", None)
183        # OLD unsafe
184        # elif provider == ProviderEnum.GOOGLE:
185        #     kwargs["model"] = model           # Gemini model name
186
187        #     # 1) forward the budget (0 disables thoughts per Google docs)
188        #     if thinking_budget is not None:
189        #         kwargs["thinking_budget"] = thinking_budget     # int
190        #         kwargs["include_thoughts"] = True               # turn trace on
191        #     else:
192        #         # expose thoughts unless the caller explicitly opted out
193        #         kwargs.setdefault("include_thoughts", True)
194
195        elif provider == ProviderEnum.ANTHROPIC:
196            kwargs['model'] = model
197            # MiniMax models use Anthropic-compatible API with different key and URL
198            if model and model.lower().startswith("minimax"):
199                kwargs["api_key"] = DynamicString(EnvConfigUtil.get_env_value, "MINIMAX_API_KEY")
200                kwargs.setdefault("anthropic_api_url", "https://api.minimax.io/anthropic")
201            if extract_model_number(model) > 3.6 and thinking_budget is not None: # assuming only claude model 3.7 or higher allow thinking
202                kwargs['thinking'] = {"type": "enabled", "budget_tokens": thinking_budget}
203                kwargs['temperature'] = 0.7
204        else:
205            kwargs['model'] = model
206        
207        # print(f"\nCreating {provider} model: {model}")
208        # print(f"With kwargs: {kwargs}\n")
209        # print(f">>> DEBUG: UnifiedChat: FINAL KWARGS: {kwargs}")
210        return ModelClass(**kwargs)

Create a unified chat model instance from any supported provider.

Args: provider (ProviderEnum): The provider Enum (anthropic, openai, google, groq). model (str, optional): Specific model name. **kwargs: Additional configuration for the model.

Returns: BaseChatModel: Instantiated chat model.

@classmethod
def invoke_uni_api( cls, model: str, uni_messages: list[dict], uni_api_url: str | None = None, **kwargs):
279    @classmethod
280    def invoke_uni_api(
281        cls,
282        model: str,
283        uni_messages: list[dict],
284        uni_api_url: str | None = None,
285        **kwargs,
286    ):
287        """Low-level POST to uni-api with built-in single-tool guard."""
288        if uni_api_url is None:
289            uni_api_url = get_uni_api_url()
290
291        # --- cleansing guard here ----------------------------------------- #
292        uni_messages = cls._cleanse_messages_for_uni_api(uni_messages)
293
294        headers = {
295            "Content-Type": "application/json",
296            "Authorization": "Bearer sk-heaven-uni-api-test-12345",
297        }
298        payload = {"model": model, "messages": uni_messages, **kwargs}
299
300        response = requests.post(uni_api_url, headers=headers, json=payload, timeout=120)
301
302        if response.status_code != 200:
303            print(f"🚨 uni-api ERROR {response.status_code}")
304            print(response.text)
305            # dump conversation for fast debugging
306            for i, m in enumerate(payload["messages"]):
307                print(f"[{i}] {json.dumps(m, indent=2)}")
308        response.raise_for_status()
309        return response.json()

Low-level POST to uni-api with built-in single-tool guard.

def extract_model_number(model: str) -> float:
313def extract_model_number(model: str) -> float:
314    """
315    Given a string of the form 'claude-3-7-something-20250219' 
316    or 'claude-4-something-20250219', extract the numeric parts 
317    and combine into a decimal (e.g., 3.7 or 4.0).
318    
319    Rules:
320      - The first numeric part we encounter is the integer portion.
321      - The next numeric part (if present) is the decimal portion.
322      - If no second numeric part is found, use 0 as the decimal portion.
323    """
324    parts = model.split("-")
325
326    integer_part = None
327    decimal_part = "0"  # default if we don't find a second numeric part
328    
329    for i, part in enumerate(parts):
330        # Check if the current part is strictly numeric
331        if part.isdigit():
332            integer_part = part
333            # If next part exists and is numeric, treat that as the decimal part
334            if i + 1 < len(parts) and parts[i + 1].isdigit():
335                decimal_part = parts[i + 1]
336            break  # we found what we need, so stop
337    
338    # If no numeric part was found at all (very edge case), return 0.0 or raise an error
339    if integer_part is None:
340        return 0.0
341    return float(f"{integer_part}.{decimal_part}")

Given a string of the form 'claude-3-7-something-20250219' or 'claude-4-something-20250219', extract the numeric parts and combine into a decimal (e.g., 3.7 or 4.0).

Rules:

  • The first numeric part we encounter is the integer portion.
  • The next numeric part (if present) is the decimal portion.
  • If no second numeric part is found, use 0 as the decimal portion.