# Copyright (c) Saga Inc.
# Distributed under the terms of the GNU Affero General Public License v3.0 License.

"""Kernel code run in IPython/Jupyter to list user variables for AI context.

This file is the single source of truth. The JupyterLab extension embeds it via
mito-ai/scripts/emit-kernel-variable-inspection.mjs; mito-ai-python-tool-executor
loads the same text at runtime.
"""

import json
import inspect

# Prefer IPython user namespace when available (matches notebook globals).
try:
    from IPython import get_ipython

    _ip = get_ipython()
    _ns = _ip.user_ns if _ip is not None else globals()
except Exception:
    _ns = globals()

# We need to check if pandas is imported so we know if its safe
# to check for pandas dataframes
_is_pandas_imported = False
try:
    import pandas as pd

    _is_pandas_imported = True
except Exception:
    pass


# Function to convert dataframe to structured format
def get_dataframe_structure(df, sample_size=5):
    def convert_value(value):
        if pd.isna(value):
            # Handle None and NaN (convert to None, which maps to null in JSON)
            return None
        elif not isinstance(value, (str, int, float, bool, type(None))):
            value_str = str(value)
            if len(value_str) > 50:
                return value_str[:50] + "..."
            return value_str
        elif isinstance(value, str) and len(value) > 50:
            # Truncate strings longer than 50 characters
            return value[:50] + "..."
        return value

    structure = {}
    for column in df.columns:
        structure[column] = {
            "dtype": str(df[column].dtype),
            "samples": [convert_value(x) for x in df[column].head(sample_size)],
        }
    return structure


def is_from_mitosheet(obj):
    """Check if an object is from any mitosheet module"""
    try:
        module = inspect.getmodule(obj)
        if module and (module.__name__.startswith("mitosheet")):
            return True

        # if the dictionary contains all of the mito functions, then we can assume that the object is from mitosheet
        mito_functions = ["STRIPTIMETOMONTHS", "GETNEXTVALUE", "FILLNAN"]
        if isinstance(obj, dict) and all(key in obj for key in mito_functions):
            return True

    except Exception:
        return False
    return False


def structured_globals():
    output = []
    for k, v in _ns.items():

        # Skip mitosheet functions
        if is_from_mitosheet(v):
            continue

        if k.startswith("scratch_"):
            # Skip scratchpad variables the agent creates
            continue

        if not k.startswith("_") and k not in ("In", "Out", "json") and not callable(v):

            if _is_pandas_imported and isinstance(v, pd.DataFrame):

                new_variable = {
                    "variable_name": k,
                    "type": "pd.DataFrame",
                    "value": get_dataframe_structure(v),
                }

                try:
                    # Check if the variable can be converted to JSON.
                    # If it can, add it to the outputs. If it can't, we just skip it.
                    # We check each variable individually so that we don't crash
                    # the entire variable inspection if just one variable cannot be serialized.
                    json.dumps(new_variable["value"])
                    output.append(new_variable)
                except Exception:
                    pass

            else:

                max_value_length = 4000  # This is roughly 1000 tokens in the LLM
                truncated_value = str(repr(v))  # Force to string

                if len(truncated_value) > max_value_length:
                    split_length = max_value_length // 2
                    truncated_value = (
                        truncated_value[:split_length]
                        + "... <middle of value truncated> ..."
                        + truncated_value[-split_length:]
                    )

                new_variable = {
                    "variable_name": k,
                    "type": str(type(v)),
                    "value": truncated_value,
                }

                try:
                    # Check if the variable can be converted to JSON.
                    # If it can, add it to the outputs. If it can't, we just skip it.
                    # We check each variable individually so that we don't crash
                    # the entire variable inspection if just one variable cannot be serialized.
                    json.dumps(new_variable["value"])
                    output.append(new_variable)
                except Exception:
                    pass

    return json.dumps(output)


print(structured_globals())
