Generated by Cython 3.2.4
Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.
Raw output: guidedrollout_cy.c
+001: from libc.math cimport sqrt
__pyx_t_2 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_test, __pyx_t_2) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
002: from lean_reinforcement.agent.mcts.mcts_cy.base_mcts_cy cimport Node, BaseMCTS
+003: import math
__pyx_t_1 = __Pyx_Import(__pyx_mstate_global->__pyx_n_u_math, 0, 0, NULL, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 3, __pyx_L1_error) __pyx_t_2 = __pyx_t_1; __Pyx_GOTREF(__pyx_t_2); if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_math, __pyx_t_2) < (0)) __PYX_ERR(0, 3, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+004: from lean_dojo import TacticState, ProofFinished, LeanError, ProofGivenUp
{
PyObject* const __pyx_imported_names[] = {__pyx_mstate_global->__pyx_n_u_TacticState,__pyx_mstate_global->__pyx_n_u_ProofFinished,__pyx_mstate_global->__pyx_n_u_LeanError,__pyx_mstate_global->__pyx_n_u_ProofGivenUp};
__pyx_t_1 = __Pyx_Import(__pyx_mstate_global->__pyx_n_u_lean_dojo, __pyx_imported_names, 4, NULL, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 4, __pyx_L1_error)
}
__pyx_t_2 = __pyx_t_1;
__Pyx_GOTREF(__pyx_t_2);
{
PyObject* const __pyx_imported_names[] = {__pyx_mstate_global->__pyx_n_u_TacticState,__pyx_mstate_global->__pyx_n_u_ProofFinished,__pyx_mstate_global->__pyx_n_u_LeanError,__pyx_mstate_global->__pyx_n_u_ProofGivenUp};
for (__pyx_t_3=0; __pyx_t_3 < 4; __pyx_t_3++) {
__pyx_t_4 = __Pyx_ImportFrom(__pyx_t_2, __pyx_imported_names[__pyx_t_3]); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 4, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_imported_names[__pyx_t_3], __pyx_t_4) < (0)) __PYX_ERR(0, 4, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
}
}
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
005:
+006: cdef class MCTS_GuidedRollout(BaseMCTS):
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout {
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS __pyx_base;
};
/* … */
struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout {
struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS __pyx_base;
float (*_puct_score)(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *, struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *, int __pyx_skip_dispatch);
};
static struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_vtabptr_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout;
007:
+008: def __init__(
/* Python wrapper */
static int __pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static int __pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
PyObject *__pyx_v_env = 0;
PyObject *__pyx_v_transformer = 0;
float __pyx_v_exploration_weight;
int __pyx_v_max_tree_nodes;
int __pyx_v_batch_size;
int __pyx_v_num_tactics_to_expand;
int __pyx_v_max_rollout_depth;
CYTHON_UNUSED double __pyx_v_max_time;
CYTHON_UNUSED PyObject *__pyx_v_kwargs = 0;
CYTHON_UNUSED Py_ssize_t __pyx_nargs;
CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
int __pyx_r;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__init__ (wrapper)", 0);
#if CYTHON_ASSUME_SAFE_SIZE
__pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
#else
__pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1;
#endif
__pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs);
{
PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_env,&__pyx_mstate_global->__pyx_n_u_transformer,&__pyx_mstate_global->__pyx_n_u_exploration_weight,&__pyx_mstate_global->__pyx_n_u_max_tree_nodes,&__pyx_mstate_global->__pyx_n_u_batch_size,&__pyx_mstate_global->__pyx_n_u_num_tactics_to_expand,&__pyx_mstate_global->__pyx_n_u_max_rollout_depth,&__pyx_mstate_global->__pyx_n_u_max_time,0};
PyObject* values[8] = {0,0,0,0,0,0,0,0};
const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_VARARGS(__pyx_kwds) : 0;
if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 8, __pyx_L3_error)
if (__pyx_kwds_len > 0) {
switch (__pyx_nargs) {
case 8:
values[7] = __Pyx_ArgRef_VARARGS(__pyx_args, 7);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[7])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 7:
values[6] = __Pyx_ArgRef_VARARGS(__pyx_args, 6);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[6])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 6:
values[5] = __Pyx_ArgRef_VARARGS(__pyx_args, 5);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[5])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 5:
values[4] = __Pyx_ArgRef_VARARGS(__pyx_args, 4);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[4])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 4:
values[3] = __Pyx_ArgRef_VARARGS(__pyx_args, 3);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[3])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 3:
values[2] = __Pyx_ArgRef_VARARGS(__pyx_args, 2);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[2])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 2:
values[1] = __Pyx_ArgRef_VARARGS(__pyx_args, 1);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 1:
values[0] = __Pyx_ArgRef_VARARGS(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 0: break;
default: goto __pyx_L5_argtuple_error;
}
const Py_ssize_t kwd_pos_args = __pyx_nargs;
if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, __pyx_v_kwargs, values, kwd_pos_args, __pyx_kwds_len, "__init__", 1) < (0)) __PYX_ERR(0, 8, __pyx_L3_error)
for (Py_ssize_t i = __pyx_nargs; i < 2; i++) {
if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("__init__", 0, 2, 8, i); __PYX_ERR(0, 8, __pyx_L3_error) }
}
} else {
switch (__pyx_nargs) {
case 8:
values[7] = __Pyx_ArgRef_VARARGS(__pyx_args, 7);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[7])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 7:
values[6] = __Pyx_ArgRef_VARARGS(__pyx_args, 6);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[6])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 6:
values[5] = __Pyx_ArgRef_VARARGS(__pyx_args, 5);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[5])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 5:
values[4] = __Pyx_ArgRef_VARARGS(__pyx_args, 4);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[4])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 4:
values[3] = __Pyx_ArgRef_VARARGS(__pyx_args, 3);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[3])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 3:
values[2] = __Pyx_ArgRef_VARARGS(__pyx_args, 2);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[2])) __PYX_ERR(0, 8, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 2:
values[1] = __Pyx_ArgRef_VARARGS(__pyx_args, 1);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 8, __pyx_L3_error)
values[0] = __Pyx_ArgRef_VARARGS(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 8, __pyx_L3_error)
break;
default: goto __pyx_L5_argtuple_error;
}
}
__pyx_v_env = values[0];
__pyx_v_transformer = values[1];
if (values[2]) {
__pyx_v_exploration_weight = __Pyx_PyFloat_AsFloat(values[2]); if (unlikely((__pyx_v_exploration_weight == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 12, __pyx_L3_error)
} else {
__pyx_v_exploration_weight = ((float)1.41421356);
}
if (values[3]) {
__pyx_v_max_tree_nodes = __Pyx_PyLong_As_int(values[3]); if (unlikely((__pyx_v_max_tree_nodes == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 13, __pyx_L3_error)
} else {
__pyx_v_max_tree_nodes = ((int)0x2710);
}
if (values[4]) {
__pyx_v_batch_size = __Pyx_PyLong_As_int(values[4]); if (unlikely((__pyx_v_batch_size == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 14, __pyx_L3_error)
} else {
__pyx_v_batch_size = ((int)8);
}
if (values[5]) {
__pyx_v_num_tactics_to_expand = __Pyx_PyLong_As_int(values[5]); if (unlikely((__pyx_v_num_tactics_to_expand == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error)
} else {
__pyx_v_num_tactics_to_expand = ((int)8);
}
if (values[6]) {
__pyx_v_max_rollout_depth = __Pyx_PyLong_As_int(values[6]); if (unlikely((__pyx_v_max_rollout_depth == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 16, __pyx_L3_error)
} else {
__pyx_v_max_rollout_depth = ((int)30);
}
if (values[7]) {
__pyx_v_max_time = __Pyx_PyFloat_AsDouble(values[7]); if (unlikely((__pyx_v_max_time == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 17, __pyx_L3_error)
} else {
__pyx_v_max_time = ((double)300.0);
}
}
goto __pyx_L6_skip;
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("__init__", 0, 2, 8, __pyx_nargs); __PYX_ERR(0, 8, __pyx_L3_error)
__pyx_L6_skip:;
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__Pyx_XDECREF(__pyx_v_kwargs); __pyx_v_kwargs = 0;
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return -1;
__pyx_L4_argument_unpacking_done:;
__pyx_r = __pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout___init__(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self), __pyx_v_env, __pyx_v_transformer, __pyx_v_exploration_weight, __pyx_v_max_tree_nodes, __pyx_v_batch_size, __pyx_v_num_tactics_to_expand, __pyx_v_max_rollout_depth, __pyx_v_max_time, __pyx_v_kwargs);
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
/* function exit code */
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__Pyx_XDECREF(__pyx_v_kwargs);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static int __pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout___init__(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, PyObject *__pyx_v_env, PyObject *__pyx_v_transformer, float __pyx_v_exploration_weight, int __pyx_v_max_tree_nodes, int __pyx_v_batch_size, int __pyx_v_num_tactics_to_expand, int __pyx_v_max_rollout_depth, CYTHON_UNUSED double __pyx_v_max_time, CYTHON_UNUSED PyObject *__pyx_v_kwargs) {
int __pyx_r;
/* … */
/* function exit code */
__pyx_r = 0;
goto __pyx_L0;
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_XDECREF(__pyx_t_3);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_XDECREF(__pyx_t_6);
__Pyx_XDECREF(__pyx_t_7);
__Pyx_XDECREF(__pyx_t_8);
__Pyx_XDECREF(__pyx_t_9);
__Pyx_XDECREF(__pyx_t_10);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = -1;
__pyx_L0:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
009: self,
010: env,
011: transformer,
012: float exploration_weight=1.41421356,
013: int max_tree_nodes=10000,
014: int batch_size=8,
015: int num_tactics_to_expand=8,
016: int max_rollout_depth=30,
017: max_time: float = 300.0,
018: **kwargs,
019: ):
+020: super().__init__(
__pyx_t_4 = NULL;
__pyx_t_5 = 1;
{
PyObject *__pyx_callargs[3] = {__pyx_t_4, ((PyObject *)__pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout), ((PyObject *)__pyx_v_self)};
__pyx_t_3 = __Pyx_PyObject_FastCall((PyObject*)__pyx_builtin_super, __pyx_callargs+__pyx_t_5, (3-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 20, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_3);
}
__pyx_t_2 = __pyx_t_3;
__Pyx_INCREF(__pyx_t_2);
021: env=env,
022: transformer=transformer,
+023: exploration_weight=exploration_weight,
__pyx_t_4 = PyFloat_FromDouble(__pyx_v_exploration_weight); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 23, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4);
+024: max_tree_nodes=max_tree_nodes,
__pyx_t_6 = __Pyx_PyLong_From_int(__pyx_v_max_tree_nodes); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 24, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6);
+025: batch_size=batch_size,
__pyx_t_7 = __Pyx_PyLong_From_int(__pyx_v_batch_size); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 25, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7);
+026: num_tactics_to_expand=num_tactics_to_expand,
__pyx_t_8 = __Pyx_PyLong_From_int(__pyx_v_num_tactics_to_expand); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 26, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8);
+027: max_rollout_depth=max_rollout_depth,
__pyx_t_9 = __Pyx_PyLong_From_int(__pyx_v_max_rollout_depth); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 27, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __pyx_t_5 = 0; { PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 7 : 0)] = {__pyx_t_2, NULL}; __pyx_t_10 = __Pyx_MakeVectorcallBuilderKwds(7); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 20, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_env, __pyx_v_env, __pyx_t_10, __pyx_callargs+1, 0) < (0)) __PYX_ERR(0, 20, __pyx_L1_error) if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_transformer, __pyx_v_transformer, __pyx_t_10, __pyx_callargs+1, 1) < (0)) __PYX_ERR(0, 20, __pyx_L1_error) if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_exploration_weight, __pyx_t_4, __pyx_t_10, __pyx_callargs+1, 2) < (0)) __PYX_ERR(0, 20, __pyx_L1_error) if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_max_tree_nodes, __pyx_t_6, __pyx_t_10, __pyx_callargs+1, 3) < (0)) __PYX_ERR(0, 20, __pyx_L1_error) if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_batch_size, __pyx_t_7, __pyx_t_10, __pyx_callargs+1, 4) < (0)) __PYX_ERR(0, 20, __pyx_L1_error) if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_num_tactics_to_expand, __pyx_t_8, __pyx_t_10, __pyx_callargs+1, 5) < (0)) __PYX_ERR(0, 20, __pyx_L1_error) if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_max_rollout_depth, __pyx_t_9, __pyx_t_10, __pyx_callargs+1, 6) < (0)) __PYX_ERR(0, 20, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_VectorcallMethod_CallFromBuilder((PyObject*)__pyx_mstate_global->__pyx_n_u_init, __pyx_callargs+__pyx_t_5, (1-__pyx_t_5) | (1*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_10); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
028: )
029:
+030: cpdef float _puct_score(self, Node node):
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_3_puct_score(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static float __pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__puct_score(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node, int __pyx_skip_dispatch) {
float __pyx_v_q_value;
float __pyx_v_exploration;
int __pyx_v_v_loss;
int __pyx_v_visit_count;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_parent = 0;
float __pyx_r;
/* Check if called by wrapper */
if (unlikely(__pyx_skip_dispatch)) ;
/* Check if overridden in Python */
else if (
#if !CYTHON_USE_TYPE_SLOTS
unlikely(Py_TYPE(((PyObject *)__pyx_v_self)) != __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout &&
__Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), Py_TPFLAGS_HAVE_GC))
#else
unlikely(Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0 || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))
#endif
) {
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) {
PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
#endif
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_mstate_global->__pyx_n_u_puct_score); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
if (!__Pyx_IsSameCFunction(__pyx_t_1, (void(*)(void)) __pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_3_puct_score)) {
__pyx_t_3 = NULL;
__Pyx_INCREF(__pyx_t_1);
__pyx_t_4 = __pyx_t_1;
__pyx_t_5 = 1;
#if CYTHON_UNPACK_METHODS
if (unlikely(PyMethod_Check(__pyx_t_4))) {
__pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4);
assert(__pyx_t_3);
PyObject* __pyx__function = PyMethod_GET_FUNCTION(__pyx_t_4);
__Pyx_INCREF(__pyx_t_3);
__Pyx_INCREF(__pyx__function);
__Pyx_DECREF_SET(__pyx_t_4, __pyx__function);
__pyx_t_5 = 0;
}
#endif
{
PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_node)};
__pyx_t_2 = __Pyx_PyObject_FastCall((PyObject*)__pyx_t_4, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
__pyx_t_6 = __Pyx_PyFloat_AsFloat(__pyx_t_2); if (unlikely((__pyx_t_6 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__pyx_r = __pyx_t_6;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
goto __pyx_L0;
}
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
__pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
__pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self));
if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) {
__pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
}
#endif
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
}
#endif
}
/* … */
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_XDECREF(__pyx_t_3);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._puct_score", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XDECREF((PyObject *)__pyx_v_parent);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* Python wrapper */
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_3_puct_score(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static PyMethodDef __pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_3_puct_score = {"_puct_score", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_3_puct_score, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_3_puct_score(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
) {
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node = 0;
#if !CYTHON_METH_FASTCALL
CYTHON_UNUSED Py_ssize_t __pyx_nargs;
#endif
CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("_puct_score (wrapper)", 0);
#if !CYTHON_METH_FASTCALL
#if CYTHON_ASSUME_SAFE_SIZE
__pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
#else
__pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
#endif
#endif
__pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
{
PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_node,0};
PyObject* values[1] = {0};
const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0;
if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 30, __pyx_L3_error)
if (__pyx_kwds_len > 0) {
switch (__pyx_nargs) {
case 1:
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 30, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 0: break;
default: goto __pyx_L5_argtuple_error;
}
const Py_ssize_t kwd_pos_args = __pyx_nargs;
if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "_puct_score", 0) < (0)) __PYX_ERR(0, 30, __pyx_L3_error)
for (Py_ssize_t i = __pyx_nargs; i < 1; i++) {
if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("_puct_score", 1, 1, 1, i); __PYX_ERR(0, 30, __pyx_L3_error) }
}
} else if (unlikely(__pyx_nargs != 1)) {
goto __pyx_L5_argtuple_error;
} else {
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 30, __pyx_L3_error)
}
__pyx_v_node = ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)values[0]);
}
goto __pyx_L6_skip;
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("_puct_score", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 30, __pyx_L3_error)
__pyx_L6_skip:;
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._puct_score", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_node), __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node, 1, "node", 0))) __PYX_ERR(0, 30, __pyx_L1_error)
__pyx_r = __pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_2_puct_score(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self), __pyx_v_node);
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
/* function exit code */
goto __pyx_L0;
__pyx_L1_error:;
__pyx_r = NULL;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
goto __pyx_L7_cleaned_up;
__pyx_L0:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__pyx_L7_cleaned_up:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_2_puct_score(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node) {
PyObject *__pyx_r = NULL;
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__puct_score(__pyx_v_self, __pyx_v_node, 1); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 30, __pyx_L1_error)
__pyx_t_2 = PyFloat_FromDouble(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__pyx_r = __pyx_t_2;
__pyx_t_2 = 0;
goto __pyx_L0;
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_2);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._puct_score", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* … */
__pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_3_puct_score, __Pyx_CYFUNCTION_CCLASS, __pyx_mstate_global->__pyx_n_u_MCTS_GuidedRollout__puct_score, NULL, __pyx_mstate_global->__pyx_n_u_lean_reinforcement_agent_mcts_mc, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[0])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030E0000
PyUnstable_Object_EnableDeferredRefcount(__pyx_t_2);
#endif
if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout, __pyx_mstate_global->__pyx_n_u_puct_score, __pyx_t_2) < (0)) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
031: cdef float q_value
032: cdef float exploration
033: cdef int v_loss
034: cdef int visit_count
+035: cdef Node parent = node.parent
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_node), __pyx_mstate_global->__pyx_n_u_parent); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 35, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 35, __pyx_L1_error) __pyx_v_parent = ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_1); __pyx_t_1 = 0;
036:
+037: if parent is None:
__pyx_t_7 = (((PyObject *)__pyx_v_parent) == Py_None);
if (__pyx_t_7) {
/* … */
}
+038: return 0.0
__pyx_r = 0.0;
goto __pyx_L0;
039:
+040: v_loss = self._get_virtual_loss(node)
__pyx_t_8 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._get_virtual_loss(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), __pyx_v_node, 0); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 40, __pyx_L1_error)
__pyx_v_v_loss = __pyx_t_8;
+041: visit_count = node.visit_count + v_loss
__pyx_v_visit_count = (__pyx_v_node->visit_count + __pyx_v_v_loss);
042:
+043: if visit_count == 0:
__pyx_t_7 = (__pyx_v_visit_count == 0);
if (__pyx_t_7) {
/* … */
goto __pyx_L4;
}
+044: q_value = 0.0
__pyx_v_q_value = 0.0;
045: else:
+046: q_value = node.max_value - (v_loss / <float>visit_count)
/*else*/ {
__pyx_v_q_value = (__pyx_v_node->max_value - (((float)__pyx_v_v_loss) / ((float)__pyx_v_visit_count)));
}
__pyx_L4:;
047:
048: exploration = (
049: self.exploration_weight
050: * node.prior_p
+051: * (sqrt(parent.visit_count) / (1 + visit_count))
__pyx_v_exploration = ((__pyx_v_self->__pyx_base.exploration_weight * __pyx_v_node->prior_p) * (sqrt(__pyx_v_parent->visit_count) / ((double)(1 + __pyx_v_visit_count))));
052: )
053:
+054: return q_value + exploration
__pyx_r = (__pyx_v_q_value + __pyx_v_exploration); goto __pyx_L0;
055:
+056: cpdef Node _get_best_child(self, Node node):
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_5_get_best_child(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__get_best_child(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node, int __pyx_skip_dispatch) {
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_child = 0;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_best_child = 0;
float __pyx_v_max_score;
float __pyx_v_score;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_r = NULL;
/* Check if called by wrapper */
if (unlikely(__pyx_skip_dispatch)) ;
/* Check if overridden in Python */
else if (
#if !CYTHON_USE_TYPE_SLOTS
unlikely(Py_TYPE(((PyObject *)__pyx_v_self)) != __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout &&
__Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), Py_TPFLAGS_HAVE_GC))
#else
unlikely(Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0 || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))
#endif
) {
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) {
PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
#endif
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_mstate_global->__pyx_n_u_get_best_child); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
if (!__Pyx_IsSameCFunction(__pyx_t_1, (void(*)(void)) __pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_5_get_best_child)) {
__Pyx_XDECREF((PyObject *)__pyx_r);
__pyx_t_3 = NULL;
__Pyx_INCREF(__pyx_t_1);
__pyx_t_4 = __pyx_t_1;
__pyx_t_5 = 1;
#if CYTHON_UNPACK_METHODS
if (unlikely(PyMethod_Check(__pyx_t_4))) {
__pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4);
assert(__pyx_t_3);
PyObject* __pyx__function = PyMethod_GET_FUNCTION(__pyx_t_4);
__Pyx_INCREF(__pyx_t_3);
__Pyx_INCREF(__pyx__function);
__Pyx_DECREF_SET(__pyx_t_4, __pyx__function);
__pyx_t_5 = 0;
}
#endif
{
PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_node)};
__pyx_t_2 = __Pyx_PyObject_FastCall((PyObject*)__pyx_t_4, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 56, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 56, __pyx_L1_error)
__pyx_r = ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_2);
__pyx_t_2 = 0;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
goto __pyx_L0;
}
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
__pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
__pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self));
if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) {
__pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
}
#endif
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
}
#endif
}
/* … */
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_XDECREF(__pyx_t_3);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._get_best_child", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XDECREF((PyObject *)__pyx_v_child);
__Pyx_XDECREF((PyObject *)__pyx_v_best_child);
__Pyx_XGIVEREF((PyObject *)__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* Python wrapper */
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_5_get_best_child(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static PyMethodDef __pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_5_get_best_child = {"_get_best_child", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_5_get_best_child, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_5_get_best_child(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
) {
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node = 0;
#if !CYTHON_METH_FASTCALL
CYTHON_UNUSED Py_ssize_t __pyx_nargs;
#endif
CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("_get_best_child (wrapper)", 0);
#if !CYTHON_METH_FASTCALL
#if CYTHON_ASSUME_SAFE_SIZE
__pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
#else
__pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
#endif
#endif
__pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
{
PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_node,0};
PyObject* values[1] = {0};
const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0;
if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 56, __pyx_L3_error)
if (__pyx_kwds_len > 0) {
switch (__pyx_nargs) {
case 1:
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 56, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 0: break;
default: goto __pyx_L5_argtuple_error;
}
const Py_ssize_t kwd_pos_args = __pyx_nargs;
if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "_get_best_child", 0) < (0)) __PYX_ERR(0, 56, __pyx_L3_error)
for (Py_ssize_t i = __pyx_nargs; i < 1; i++) {
if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("_get_best_child", 1, 1, 1, i); __PYX_ERR(0, 56, __pyx_L3_error) }
}
} else if (unlikely(__pyx_nargs != 1)) {
goto __pyx_L5_argtuple_error;
} else {
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 56, __pyx_L3_error)
}
__pyx_v_node = ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)values[0]);
}
goto __pyx_L6_skip;
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("_get_best_child", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 56, __pyx_L3_error)
__pyx_L6_skip:;
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._get_best_child", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_node), __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node, 1, "node", 0))) __PYX_ERR(0, 56, __pyx_L1_error)
__pyx_r = __pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_4_get_best_child(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self), __pyx_v_node);
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
/* function exit code */
goto __pyx_L0;
__pyx_L1_error:;
__pyx_r = NULL;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
goto __pyx_L7_cleaned_up;
__pyx_L0:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__pyx_L7_cleaned_up:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_4_get_best_child(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node) {
PyObject *__pyx_r = NULL;
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = ((PyObject *)__pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__get_best_child(__pyx_v_self, __pyx_v_node, 1)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._get_best_child", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* … */
__pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_5_get_best_child, __Pyx_CYFUNCTION_CCLASS, __pyx_mstate_global->__pyx_n_u_MCTS_GuidedRollout__get_best_chi, NULL, __pyx_mstate_global->__pyx_n_u_lean_reinforcement_agent_mcts_mc, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[1])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 56, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030E0000
PyUnstable_Object_EnableDeferredRefcount(__pyx_t_2);
#endif
if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout, __pyx_mstate_global->__pyx_n_u_get_best_child, __pyx_t_2) < (0)) __PYX_ERR(0, 56, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
057: cdef Node child
+058: cdef Node best_child = None
__Pyx_INCREF(Py_None);
__pyx_v_best_child = ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)Py_None);
+059: cdef float max_score = -1e9
__pyx_v_max_score = -1e9;
060: cdef float score
061:
+062: if not node.children:
if (__pyx_v_node->children == Py_None) __pyx_t_6 = 0;
else
{
Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_v_node->children);
if (unlikely(((!CYTHON_ASSUME_SAFE_SIZE) && __pyx_temp < 0))) __PYX_ERR(0, 62, __pyx_L1_error)
__pyx_t_6 = (__pyx_temp != 0);
}
__pyx_t_7 = (!__pyx_t_6);
if (unlikely(__pyx_t_7)) {
/* … */
}
+063: raise ValueError("Node has no children")
__pyx_t_2 = NULL;
__pyx_t_5 = 1;
{
PyObject *__pyx_callargs[2] = {__pyx_t_2, __pyx_mstate_global->__pyx_kp_u_Node_has_no_children};
__pyx_t_1 = __Pyx_PyObject_FastCall((PyObject*)(((PyTypeObject*)PyExc_ValueError)), __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 63, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
}
__Pyx_Raise(__pyx_t_1, 0, 0, 0);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__PYX_ERR(0, 63, __pyx_L1_error)
064:
+065: for child in node.children:
if (unlikely(__pyx_v_node->children == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
__PYX_ERR(0, 65, __pyx_L1_error)
}
__pyx_t_1 = __pyx_v_node->children; __Pyx_INCREF(__pyx_t_1);
__pyx_t_8 = 0;
for (;;) {
{
Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1);
#if !CYTHON_ASSUME_SAFE_SIZE
if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 65, __pyx_L1_error)
#endif
if (__pyx_t_8 >= __pyx_temp) break;
}
__pyx_t_2 = __Pyx_PyList_GetItemRefFast(__pyx_t_1, __pyx_t_8, __Pyx_ReferenceSharing_OwnStrongReference);
++__pyx_t_8;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 65, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 65, __pyx_L1_error)
__Pyx_XDECREF_SET(__pyx_v_child, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_2));
__pyx_t_2 = 0;
/* … */
}
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+066: score = self._puct_score(child)
__pyx_t_9 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->_puct_score(__pyx_v_self, __pyx_v_child, 0); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 66, __pyx_L1_error)
__pyx_v_score = __pyx_t_9;
+067: if best_child is None or score > max_score:
__pyx_t_6 = (((PyObject *)__pyx_v_best_child) == Py_None);
if (!__pyx_t_6) {
} else {
__pyx_t_7 = __pyx_t_6;
goto __pyx_L7_bool_binop_done;
}
__pyx_t_6 = (__pyx_v_score > __pyx_v_max_score);
__pyx_t_7 = __pyx_t_6;
__pyx_L7_bool_binop_done:;
if (__pyx_t_7) {
/* … */
}
+068: max_score = score
__pyx_v_max_score = __pyx_v_score;
+069: best_child = child
__Pyx_INCREF((PyObject *)__pyx_v_child); __Pyx_DECREF_SET(__pyx_v_best_child, __pyx_v_child);
070:
+071: return best_child
__Pyx_XDECREF((PyObject *)__pyx_r); __Pyx_INCREF((PyObject *)__pyx_v_best_child); __pyx_r = __pyx_v_best_child; goto __pyx_L0;
072:
+073: cpdef Node _expand(self, Node node):
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_7_expand(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__expand(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node, int __pyx_skip_dispatch) {
PyObject *__pyx_v_state_key = 0;
PyObject *__pyx_v_next_state = 0;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_child = 0;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_existing_node = 0;
PyObject *__pyx_v_state_str = NULL;
PyObject *__pyx_v_tactics_with_probs = NULL;
PyObject *__pyx_v_tactic = NULL;
PyObject *__pyx_v_prob = NULL;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_r = NULL;
/* Check if called by wrapper */
if (unlikely(__pyx_skip_dispatch)) ;
/* Check if overridden in Python */
else if (
#if !CYTHON_USE_TYPE_SLOTS
unlikely(Py_TYPE(((PyObject *)__pyx_v_self)) != __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout &&
__Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), Py_TPFLAGS_HAVE_GC))
#else
unlikely(Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0 || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))
#endif
) {
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) {
PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
#endif
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_mstate_global->__pyx_n_u_expand); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 73, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
if (!__Pyx_IsSameCFunction(__pyx_t_1, (void(*)(void)) __pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_7_expand)) {
__Pyx_XDECREF((PyObject *)__pyx_r);
__pyx_t_3 = NULL;
__Pyx_INCREF(__pyx_t_1);
__pyx_t_4 = __pyx_t_1;
__pyx_t_5 = 1;
#if CYTHON_UNPACK_METHODS
if (unlikely(PyMethod_Check(__pyx_t_4))) {
__pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4);
assert(__pyx_t_3);
PyObject* __pyx__function = PyMethod_GET_FUNCTION(__pyx_t_4);
__Pyx_INCREF(__pyx_t_3);
__Pyx_INCREF(__pyx__function);
__Pyx_DECREF_SET(__pyx_t_4, __pyx__function);
__pyx_t_5 = 0;
}
#endif
{
PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_node)};
__pyx_t_2 = __Pyx_PyObject_FastCall((PyObject*)__pyx_t_4, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 73, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 73, __pyx_L1_error)
__pyx_r = ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_2);
__pyx_t_2 = 0;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
goto __pyx_L0;
}
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
__pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
__pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self));
if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) {
__pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
}
#endif
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
}
#endif
}
/* … */
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_XDECREF(__pyx_t_3);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_XDECREF(__pyx_t_10);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._expand", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XDECREF(__pyx_v_state_key);
__Pyx_XDECREF(__pyx_v_next_state);
__Pyx_XDECREF((PyObject *)__pyx_v_child);
__Pyx_XDECREF((PyObject *)__pyx_v_existing_node);
__Pyx_XDECREF(__pyx_v_state_str);
__Pyx_XDECREF(__pyx_v_tactics_with_probs);
__Pyx_XDECREF(__pyx_v_tactic);
__Pyx_XDECREF(__pyx_v_prob);
__Pyx_XGIVEREF((PyObject *)__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* Python wrapper */
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_7_expand(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static PyMethodDef __pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_7_expand = {"_expand", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_7_expand, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_7_expand(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
) {
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node = 0;
#if !CYTHON_METH_FASTCALL
CYTHON_UNUSED Py_ssize_t __pyx_nargs;
#endif
CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("_expand (wrapper)", 0);
#if !CYTHON_METH_FASTCALL
#if CYTHON_ASSUME_SAFE_SIZE
__pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
#else
__pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
#endif
#endif
__pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
{
PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_node,0};
PyObject* values[1] = {0};
const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0;
if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 73, __pyx_L3_error)
if (__pyx_kwds_len > 0) {
switch (__pyx_nargs) {
case 1:
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 73, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 0: break;
default: goto __pyx_L5_argtuple_error;
}
const Py_ssize_t kwd_pos_args = __pyx_nargs;
if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "_expand", 0) < (0)) __PYX_ERR(0, 73, __pyx_L3_error)
for (Py_ssize_t i = __pyx_nargs; i < 1; i++) {
if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("_expand", 1, 1, 1, i); __PYX_ERR(0, 73, __pyx_L3_error) }
}
} else if (unlikely(__pyx_nargs != 1)) {
goto __pyx_L5_argtuple_error;
} else {
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 73, __pyx_L3_error)
}
__pyx_v_node = ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)values[0]);
}
goto __pyx_L6_skip;
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("_expand", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 73, __pyx_L3_error)
__pyx_L6_skip:;
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._expand", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_node), __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node, 1, "node", 0))) __PYX_ERR(0, 73, __pyx_L1_error)
__pyx_r = __pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_6_expand(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self), __pyx_v_node);
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
/* function exit code */
goto __pyx_L0;
__pyx_L1_error:;
__pyx_r = NULL;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
goto __pyx_L7_cleaned_up;
__pyx_L0:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__pyx_L7_cleaned_up:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_6_expand(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node) {
PyObject *__pyx_r = NULL;
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = ((PyObject *)__pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__expand(__pyx_v_self, __pyx_v_node, 1)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 73, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._expand", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* … */
__pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_7_expand, __Pyx_CYFUNCTION_CCLASS, __pyx_mstate_global->__pyx_n_u_MCTS_GuidedRollout__expand, NULL, __pyx_mstate_global->__pyx_n_u_lean_reinforcement_agent_mcts_mc, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[2])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 73, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030E0000
PyUnstable_Object_EnableDeferredRefcount(__pyx_t_2);
#endif
if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout, __pyx_mstate_global->__pyx_n_u_expand, __pyx_t_2) < (0)) __PYX_ERR(0, 73, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
074: cdef object state_key
075: cdef object next_state
076: cdef Node child
077: cdef Node existing_node
078:
+079: if not isinstance(node.state, TacticState):
__pyx_t_1 = __pyx_v_node->state; __Pyx_INCREF(__pyx_t_1); __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_TacticState); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 79, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_6 = PyObject_IsInstance(__pyx_t_1, __pyx_t_2); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(0, 79, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_7 = (!__pyx_t_6); if (unlikely(__pyx_t_7)) { /* … */ }
+080: raise TypeError("Cannot expand a node without a TacticState.")
__pyx_t_1 = NULL;
__pyx_t_5 = 1;
{
PyObject *__pyx_callargs[2] = {__pyx_t_1, __pyx_mstate_global->__pyx_kp_u_Cannot_expand_a_node_without_a_T};
__pyx_t_2 = __Pyx_PyObject_FastCall((PyObject*)(((PyTypeObject*)PyExc_TypeError)), __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 80, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
__Pyx_Raise(__pyx_t_2, 0, 0, 0);
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__PYX_ERR(0, 80, __pyx_L1_error)
081:
+082: state_str = node.state.pp
__pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_node->state, __pyx_mstate_global->__pyx_n_u_pp); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 82, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_v_state_str = __pyx_t_2; __pyx_t_2 = 0;
083:
+084: tactics_with_probs = self.transformer.generate_tactics_with_probs(
__pyx_t_1 = __pyx_v_self->__pyx_base.transformer;
__Pyx_INCREF(__pyx_t_1);
+085: state_str, n=self.num_tactics_to_expand
__pyx_t_4 = __Pyx_PyLong_From_int(__pyx_v_self->__pyx_base.num_tactics_to_expand); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 85, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_5 = 0; { PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 1 : 0)] = {__pyx_t_1, __pyx_v_state_str}; __pyx_t_3 = __Pyx_MakeVectorcallBuilderKwds(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 84, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_n, __pyx_t_4, __pyx_t_3, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 84, __pyx_L1_error) __pyx_t_2 = __Pyx_Object_VectorcallMethod_CallFromBuilder((PyObject*)__pyx_mstate_global->__pyx_n_u_generate_tactics_with_probs, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (1*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_3); __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 84, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); } __pyx_v_tactics_with_probs = __pyx_t_2; __pyx_t_2 = 0;
086: )
087:
+088: for tactic, prob in tactics_with_probs:
if (likely(PyList_CheckExact(__pyx_v_tactics_with_probs)) || PyTuple_CheckExact(__pyx_v_tactics_with_probs)) { __pyx_t_2 = __pyx_v_tactics_with_probs; __Pyx_INCREF(__pyx_t_2); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { __pyx_t_8 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_tactics_with_probs); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 88, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_9 = (CYTHON_COMPILING_IN_LIMITED_API) ? PyIter_Next : __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 88, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_9)) { if (likely(PyList_CheckExact(__pyx_t_2))) { { Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 88, __pyx_L1_error) #endif if (__pyx_t_8 >= __pyx_temp) break; } __pyx_t_3 = __Pyx_PyList_GetItemRefFast(__pyx_t_2, __pyx_t_8, __Pyx_ReferenceSharing_OwnStrongReference); ++__pyx_t_8; } else { { Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 88, __pyx_L1_error) #endif if (__pyx_t_8 >= __pyx_temp) break; } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_3 = __Pyx_NewRef(PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_8)); #else __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_8); #endif ++__pyx_t_8; } if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 88, __pyx_L1_error) } else { __pyx_t_3 = __pyx_t_9(__pyx_t_2); if (unlikely(!__pyx_t_3)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (unlikely(!__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) __PYX_ERR(0, 88, __pyx_L1_error) PyErr_Clear(); } break; } } __Pyx_GOTREF(__pyx_t_3); if ((likely(PyTuple_CheckExact(__pyx_t_3))) || (PyList_CheckExact(__pyx_t_3))) { PyObject* sequence = __pyx_t_3; Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); __PYX_ERR(0, 88, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { __pyx_t_4 = PyTuple_GET_ITEM(sequence, 0); __Pyx_INCREF(__pyx_t_4); __pyx_t_1 = PyTuple_GET_ITEM(sequence, 1); __Pyx_INCREF(__pyx_t_1); } else { __pyx_t_4 = __Pyx_PyList_GetItemRefFast(sequence, 0, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 88, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_4); __pyx_t_1 = __Pyx_PyList_GetItemRefFast(sequence, 1, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_1); } #else __pyx_t_4 = __Pyx_PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 88, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_1 = __Pyx_PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { Py_ssize_t index = -1; __pyx_t_10 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 88, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_11 = (CYTHON_COMPILING_IN_LIMITED_API) ? PyIter_Next : __Pyx_PyObject_GetIterNextFunc(__pyx_t_10); index = 0; __pyx_t_4 = __pyx_t_11(__pyx_t_10); if (unlikely(!__pyx_t_4)) goto __pyx_L6_unpacking_failed; __Pyx_GOTREF(__pyx_t_4); index = 1; __pyx_t_1 = __pyx_t_11(__pyx_t_10); if (unlikely(!__pyx_t_1)) goto __pyx_L6_unpacking_failed; __Pyx_GOTREF(__pyx_t_1); if (__Pyx_IternextUnpackEndCheck(__pyx_t_11(__pyx_t_10), 2) < (0)) __PYX_ERR(0, 88, __pyx_L1_error) __pyx_t_11 = NULL; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L7_unpacking_done; __pyx_L6_unpacking_failed:; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_t_11 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); __PYX_ERR(0, 88, __pyx_L1_error) __pyx_L7_unpacking_done:; } __Pyx_XDECREF_SET(__pyx_v_tactic, __pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF_SET(__pyx_v_prob, __pyx_t_1); __pyx_t_1 = 0; /* … */ __pyx_L4_continue:; } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+089: next_state = self.env.run_tactic_stateless(node.state, tactic)
__pyx_t_1 = __pyx_v_self->__pyx_base.env;
__Pyx_INCREF(__pyx_t_1);
__pyx_t_5 = 0;
{
PyObject *__pyx_callargs[3] = {__pyx_t_1, __pyx_v_node->state, __pyx_v_tactic};
__pyx_t_3 = __Pyx_PyObject_FastCallMethod((PyObject*)__pyx_mstate_global->__pyx_n_u_run_tactic_stateless, __pyx_callargs+__pyx_t_5, (3-__pyx_t_5) | (1*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 89, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_3);
}
__Pyx_XDECREF_SET(__pyx_v_next_state, __pyx_t_3);
__pyx_t_3 = 0;
090:
091: # Check for duplicate states
+092: state_key = self._get_state_key(next_state)
__pyx_t_3 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._get_state_key(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), __pyx_v_next_state, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 92, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_XDECREF_SET(__pyx_v_state_key, __pyx_t_3); __pyx_t_3 = 0;
+093: if state_key is not None and state_key in self.seen_states:
__pyx_t_6 = (__pyx_v_state_key != Py_None);
if (__pyx_t_6) {
} else {
__pyx_t_7 = __pyx_t_6;
goto __pyx_L9_bool_binop_done;
}
if (unlikely(__pyx_v_self->__pyx_base.seen_states == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
__PYX_ERR(0, 93, __pyx_L1_error)
}
__pyx_t_6 = (__Pyx_PyDict_ContainsTF(__pyx_v_state_key, __pyx_v_self->__pyx_base.seen_states, Py_EQ)); if (unlikely((__pyx_t_6 < 0))) __PYX_ERR(0, 93, __pyx_L1_error)
__pyx_t_7 = __pyx_t_6;
__pyx_L9_bool_binop_done:;
if (__pyx_t_7) {
/* … */
}
094: # Reuse existing node - add as child with additional parent edge
+095: existing_node = self.seen_states[state_key]
if (unlikely(__pyx_v_self->__pyx_base.seen_states == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
__PYX_ERR(0, 95, __pyx_L1_error)
}
__pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_self->__pyx_base.seen_states, __pyx_v_state_key); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 95, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_3);
if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 95, __pyx_L1_error)
__Pyx_XDECREF_SET(__pyx_v_existing_node, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_3));
__pyx_t_3 = 0;
+096: existing_node.add_parent(node, tactic)
__pyx_t_12.__pyx_n = 1;
__pyx_t_12.action = __pyx_v_tactic;
((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_v_existing_node->__pyx_vtab)->add_parent(__pyx_v_existing_node, __pyx_v_node, 0, &__pyx_t_12); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 96, __pyx_L1_error)
+097: if existing_node not in node.children:
__pyx_t_7 = (__Pyx_PySequence_ContainsTF(((PyObject *)__pyx_v_existing_node), __pyx_v_node->children, Py_NE)); if (unlikely((__pyx_t_7 < 0))) __PYX_ERR(0, 97, __pyx_L1_error) if (__pyx_t_7) { /* … */ }
+098: node.children.append(existing_node)
if (unlikely(__pyx_v_node->children == Py_None)) {
PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "append");
__PYX_ERR(0, 98, __pyx_L1_error)
}
__pyx_t_13 = __Pyx_PyList_Append(__pyx_v_node->children, ((PyObject *)__pyx_v_existing_node)); if (unlikely(__pyx_t_13 == ((int)-1))) __PYX_ERR(0, 98, __pyx_L1_error)
+099: continue
goto __pyx_L4_continue;
100:
+101: child = Node(next_state, parent=node, action=tactic)
__pyx_t_1 = NULL;
__pyx_t_5 = 1;
{
PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 2 : 0)] = {__pyx_t_1, __pyx_v_next_state};
__pyx_t_4 = __Pyx_MakeVectorcallBuilderKwds(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 101, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_parent, ((PyObject *)__pyx_v_node), __pyx_t_4, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 101, __pyx_L1_error)
if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_action, __pyx_v_tactic, __pyx_t_4, __pyx_callargs+2, 1) < (0)) __PYX_ERR(0, 101, __pyx_L1_error)
__pyx_t_3 = __Pyx_Object_Vectorcall_CallFromBuilder((PyObject*)__pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_4);
__Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 101, __pyx_L1_error)
__Pyx_GOTREF((PyObject *)__pyx_t_3);
}
__Pyx_XDECREF_SET(__pyx_v_child, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_3));
__pyx_t_3 = 0;
+102: child.prior_p = prob
__pyx_t_14 = __Pyx_PyFloat_AsFloat(__pyx_v_prob); if (unlikely((__pyx_t_14 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 102, __pyx_L1_error) __pyx_v_child->prior_p = __pyx_t_14;
+103: node.children.append(child)
if (unlikely(__pyx_v_node->children == Py_None)) {
PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "append");
__PYX_ERR(0, 103, __pyx_L1_error)
}
__pyx_t_13 = __Pyx_PyList_Append(__pyx_v_node->children, ((PyObject *)__pyx_v_child)); if (unlikely(__pyx_t_13 == ((int)-1))) __PYX_ERR(0, 103, __pyx_L1_error)
+104: self.node_count += 1
__pyx_v_self->__pyx_base.node_count = (__pyx_v_self->__pyx_base.node_count + 1);
105:
106: # Register new state in seen_states
+107: if state_key is not None:
__pyx_t_7 = (__pyx_v_state_key != Py_None);
if (__pyx_t_7) {
/* … */
}
+108: self.seen_states[state_key] = child
if (unlikely(__pyx_v_self->__pyx_base.seen_states == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
__PYX_ERR(0, 108, __pyx_L1_error)
}
if (unlikely((PyDict_SetItem(__pyx_v_self->__pyx_base.seen_states, __pyx_v_state_key, ((PyObject *)__pyx_v_child)) < 0))) __PYX_ERR(0, 108, __pyx_L1_error)
109:
+110: node.untried_actions = []
__pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 110, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __Pyx_GOTREF(__pyx_v_node->untried_actions); __Pyx_DECREF(__pyx_v_node->untried_actions); __pyx_v_node->untried_actions = ((PyObject*)__pyx_t_2); __pyx_t_2 = 0;
111:
+112: if node.children:
if (__pyx_v_node->children == Py_None) __pyx_t_7 = 0;
else
{
Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_v_node->children);
if (unlikely(((!CYTHON_ASSUME_SAFE_SIZE) && __pyx_temp < 0))) __PYX_ERR(0, 112, __pyx_L1_error)
__pyx_t_7 = (__pyx_temp != 0);
}
if (__pyx_t_7) {
/* … */
}
+113: return self._get_best_child(node)
__Pyx_XDECREF((PyObject *)__pyx_r); __pyx_t_2 = ((PyObject *)((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._get_best_child(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), __pyx_v_node, 0)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 113, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_r = ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_2); __pyx_t_2 = 0; goto __pyx_L0;
+114: return node
__Pyx_XDECREF((PyObject *)__pyx_r); __Pyx_INCREF((PyObject *)__pyx_v_node); __pyx_r = __pyx_v_node; goto __pyx_L0;
115:
+116: cpdef list _expand_batch(self, list nodes):
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_9_expand_batch(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static PyObject *__pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__expand_batch(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, PyObject *__pyx_v_nodes, int __pyx_skip_dispatch) {
PyObject *__pyx_v_states = 0;
PyObject *__pyx_v_nodes_to_generate = 0;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node = 0;
PyObject *__pyx_v_batch_tactics_with_probs = 0;
PyObject *__pyx_v_tasks = 0;
PyObject *__pyx_v_results = 0;
int __pyx_v_i;
PyObject *__pyx_v_tactic = 0;
float __pyx_v_prob;
PyObject *__pyx_v_next_state = 0;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_child = 0;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_existing_node = 0;
PyObject *__pyx_v_state_key = 0;
PyObject *__pyx_v_tactics_probs = NULL;
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_7genexpr__pyx_v_node = NULL;
PyObject *__pyx_r = NULL;
/* Check if called by wrapper */
if (unlikely(__pyx_skip_dispatch)) ;
/* Check if overridden in Python */
else if (
#if !CYTHON_USE_TYPE_SLOTS
unlikely(Py_TYPE(((PyObject *)__pyx_v_self)) != __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout &&
__Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), Py_TPFLAGS_HAVE_GC))
#else
unlikely(Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0 || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))
#endif
) {
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) {
PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
#endif
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_mstate_global->__pyx_n_u_expand_batch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 116, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
if (!__Pyx_IsSameCFunction(__pyx_t_1, (void(*)(void)) __pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_9_expand_batch)) {
__Pyx_XDECREF(__pyx_r);
__pyx_t_3 = NULL;
__Pyx_INCREF(__pyx_t_1);
__pyx_t_4 = __pyx_t_1;
__pyx_t_5 = 1;
#if CYTHON_UNPACK_METHODS
if (unlikely(PyMethod_Check(__pyx_t_4))) {
__pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4);
assert(__pyx_t_3);
PyObject* __pyx__function = PyMethod_GET_FUNCTION(__pyx_t_4);
__Pyx_INCREF(__pyx_t_3);
__Pyx_INCREF(__pyx__function);
__Pyx_DECREF_SET(__pyx_t_4, __pyx__function);
__pyx_t_5 = 0;
}
#endif
{
PyObject *__pyx_callargs[2] = {__pyx_t_3, __pyx_v_nodes};
__pyx_t_2 = __Pyx_PyObject_FastCall((PyObject*)__pyx_t_4, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 116, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
if (!(likely(PyList_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_2))) __PYX_ERR(0, 116, __pyx_L1_error)
__pyx_r = ((PyObject*)__pyx_t_2);
__pyx_t_2 = 0;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
goto __pyx_L0;
}
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
__pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
__pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self));
if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) {
__pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
}
#endif
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
}
#endif
}
/* … */
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_XDECREF(__pyx_t_3);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_XDECREF(__pyx_t_14);
__Pyx_XDECREF(__pyx_t_17);
__Pyx_XDECREF(__pyx_t_18);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._expand_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XDECREF(__pyx_v_states);
__Pyx_XDECREF(__pyx_v_nodes_to_generate);
__Pyx_XDECREF((PyObject *)__pyx_v_node);
__Pyx_XDECREF(__pyx_v_batch_tactics_with_probs);
__Pyx_XDECREF(__pyx_v_tasks);
__Pyx_XDECREF(__pyx_v_results);
__Pyx_XDECREF(__pyx_v_tactic);
__Pyx_XDECREF(__pyx_v_next_state);
__Pyx_XDECREF((PyObject *)__pyx_v_child);
__Pyx_XDECREF((PyObject *)__pyx_v_existing_node);
__Pyx_XDECREF(__pyx_v_state_key);
__Pyx_XDECREF(__pyx_v_tactics_probs);
__Pyx_XDECREF((PyObject *)__pyx_7genexpr__pyx_v_node);
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* Python wrapper */
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_9_expand_batch(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static PyMethodDef __pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_9_expand_batch = {"_expand_batch", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_9_expand_batch, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_9_expand_batch(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
) {
PyObject *__pyx_v_nodes = 0;
#if !CYTHON_METH_FASTCALL
CYTHON_UNUSED Py_ssize_t __pyx_nargs;
#endif
CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("_expand_batch (wrapper)", 0);
#if !CYTHON_METH_FASTCALL
#if CYTHON_ASSUME_SAFE_SIZE
__pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
#else
__pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
#endif
#endif
__pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
{
PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_nodes,0};
PyObject* values[1] = {0};
const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0;
if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 116, __pyx_L3_error)
if (__pyx_kwds_len > 0) {
switch (__pyx_nargs) {
case 1:
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 116, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 0: break;
default: goto __pyx_L5_argtuple_error;
}
const Py_ssize_t kwd_pos_args = __pyx_nargs;
if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "_expand_batch", 0) < (0)) __PYX_ERR(0, 116, __pyx_L3_error)
for (Py_ssize_t i = __pyx_nargs; i < 1; i++) {
if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("_expand_batch", 1, 1, 1, i); __PYX_ERR(0, 116, __pyx_L3_error) }
}
} else if (unlikely(__pyx_nargs != 1)) {
goto __pyx_L5_argtuple_error;
} else {
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 116, __pyx_L3_error)
}
__pyx_v_nodes = ((PyObject*)values[0]);
}
goto __pyx_L6_skip;
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("_expand_batch", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 116, __pyx_L3_error)
__pyx_L6_skip:;
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._expand_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_nodes), (&PyList_Type), 1, "nodes", 1))) __PYX_ERR(0, 116, __pyx_L1_error)
__pyx_r = __pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_8_expand_batch(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self), __pyx_v_nodes);
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
/* function exit code */
goto __pyx_L0;
__pyx_L1_error:;
__pyx_r = NULL;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
goto __pyx_L7_cleaned_up;
__pyx_L0:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__pyx_L7_cleaned_up:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_8_expand_batch(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, PyObject *__pyx_v_nodes) {
PyObject *__pyx_r = NULL;
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__expand_batch(__pyx_v_self, __pyx_v_nodes, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 116, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._expand_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* … */
__pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_9_expand_batch, __Pyx_CYFUNCTION_CCLASS, __pyx_mstate_global->__pyx_n_u_MCTS_GuidedRollout__expand_batch, NULL, __pyx_mstate_global->__pyx_n_u_lean_reinforcement_agent_mcts_mc, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[3])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 116, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030E0000
PyUnstable_Object_EnableDeferredRefcount(__pyx_t_2);
#endif
if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout, __pyx_mstate_global->__pyx_n_u_expand_batch, __pyx_t_2) < (0)) __PYX_ERR(0, 116, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+117: cdef list states = []
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 117, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_states = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0;
+118: cdef list nodes_to_generate = []
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 118, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_nodes_to_generate = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0;
119: cdef Node node
120: cdef list batch_tactics_with_probs
+121: cdef list tasks = []
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 121, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_tasks = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0;
+122: cdef list results = []
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 122, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_results = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0;
123: cdef int i
124: cdef object tactic
125: cdef float prob
126: cdef object next_state
127: cdef Node child
128: cdef Node existing_node
129: cdef object state_key
130:
+131: for node in nodes:
if (unlikely(__pyx_v_nodes == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
__PYX_ERR(0, 131, __pyx_L1_error)
}
__pyx_t_1 = __pyx_v_nodes; __Pyx_INCREF(__pyx_t_1);
__pyx_t_6 = 0;
for (;;) {
{
Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1);
#if !CYTHON_ASSUME_SAFE_SIZE
if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 131, __pyx_L1_error)
#endif
if (__pyx_t_6 >= __pyx_temp) break;
}
__pyx_t_2 = __Pyx_PyList_GetItemRefFast(__pyx_t_1, __pyx_t_6, __Pyx_ReferenceSharing_OwnStrongReference);
++__pyx_t_6;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 131, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 131, __pyx_L1_error)
__Pyx_XDECREF_SET(__pyx_v_node, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_2));
__pyx_t_2 = 0;
/* … */
}
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+132: if isinstance(node.state, TacticState):
__pyx_t_2 = __pyx_v_node->state;
__Pyx_INCREF(__pyx_t_2);
__Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_mstate_global->__pyx_n_u_TacticState); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 132, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
__pyx_t_7 = PyObject_IsInstance(__pyx_t_2, __pyx_t_4); if (unlikely(__pyx_t_7 == ((int)-1))) __PYX_ERR(0, 132, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
if (__pyx_t_7) {
/* … */
}
+133: states.append(node.state.pp)
__pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_node->state, __pyx_mstate_global->__pyx_n_u_pp); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 133, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_8 = __Pyx_PyList_Append(__pyx_v_states, __pyx_t_4); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 133, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+134: nodes_to_generate.append(node)
__pyx_t_8 = __Pyx_PyList_Append(__pyx_v_nodes_to_generate, ((PyObject *)__pyx_v_node)); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 134, __pyx_L1_error)
135:
+136: if not states:
{
Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_v_states);
if (unlikely(((!CYTHON_ASSUME_SAFE_SIZE) && __pyx_temp < 0))) __PYX_ERR(0, 136, __pyx_L1_error)
__pyx_t_7 = (__pyx_temp != 0);
}
__pyx_t_9 = (!__pyx_t_7);
if (__pyx_t_9) {
/* … */
}
+137: return nodes
__Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_nodes); __pyx_r = __pyx_v_nodes; goto __pyx_L0;
138:
139: # Check timeout before expensive model call
+140: if self._is_timeout():
__pyx_t_9 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._is_timeout(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), 0); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 140, __pyx_L1_error)
if (__pyx_t_9) {
/* … */
}
+141: return nodes
__Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_nodes); __pyx_r = __pyx_v_nodes; goto __pyx_L0;
142:
+143: batch_tactics_with_probs = self.transformer.generate_tactics_with_probs_batch(
__pyx_t_4 = __pyx_v_self->__pyx_base.transformer; __Pyx_INCREF(__pyx_t_4); /* … */ if (!(likely(PyList_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_1))) __PYX_ERR(0, 143, __pyx_L1_error) __pyx_v_batch_tactics_with_probs = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0;
+144: states, n=self.num_tactics_to_expand
__pyx_t_2 = __Pyx_PyLong_From_int(__pyx_v_self->__pyx_base.num_tactics_to_expand); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 144, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_5 = 0; { PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 1 : 0)] = {__pyx_t_4, __pyx_v_states}; __pyx_t_3 = __Pyx_MakeVectorcallBuilderKwds(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 143, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_n, __pyx_t_2, __pyx_t_3, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 143, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_VectorcallMethod_CallFromBuilder((PyObject*)__pyx_mstate_global->__pyx_n_u_generate_tactics_with_probs_batc, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (1*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_3); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 143, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); }
145: )
146:
147: # Check timeout after model call
+148: if self._is_timeout():
__pyx_t_9 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._is_timeout(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), 0); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 148, __pyx_L1_error)
if (__pyx_t_9) {
/* … */
}
+149: return nodes
__Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_nodes); __pyx_r = __pyx_v_nodes; goto __pyx_L0;
150:
+151: for i in range(len(batch_tactics_with_probs)):
if (unlikely(__pyx_v_batch_tactics_with_probs == Py_None)) {
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
__PYX_ERR(0, 151, __pyx_L1_error)
}
__pyx_t_6 = __Pyx_PyList_GET_SIZE(__pyx_v_batch_tactics_with_probs); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 151, __pyx_L1_error)
__pyx_t_10 = __pyx_t_6;
for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_10; __pyx_t_11+=1) {
__pyx_v_i = __pyx_t_11;
+152: tactics_probs = batch_tactics_with_probs[i]
if (unlikely(__pyx_v_batch_tactics_with_probs == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
__PYX_ERR(0, 152, __pyx_L1_error)
}
__pyx_t_1 = __Pyx_PyList_GET_ITEM(__pyx_v_batch_tactics_with_probs, __pyx_v_i);
__Pyx_INCREF(__pyx_t_1);
__Pyx_XDECREF_SET(__pyx_v_tactics_probs, __pyx_t_1);
__pyx_t_1 = 0;
+153: node = nodes_to_generate[i]
__pyx_t_1 = __Pyx_PyList_GET_ITEM(__pyx_v_nodes_to_generate, __pyx_v_i); __Pyx_INCREF(__pyx_t_1); if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 153, __pyx_L1_error) __Pyx_XDECREF_SET(__pyx_v_node, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_1)); __pyx_t_1 = 0;
+154: for tactic, prob in tactics_probs:
if (likely(PyList_CheckExact(__pyx_v_tactics_probs)) || PyTuple_CheckExact(__pyx_v_tactics_probs)) { __pyx_t_1 = __pyx_v_tactics_probs; __Pyx_INCREF(__pyx_t_1); __pyx_t_12 = 0; __pyx_t_13 = NULL; } else { __pyx_t_12 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_tactics_probs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_13 = (CYTHON_COMPILING_IN_LIMITED_API) ? PyIter_Next : __Pyx_PyObject_GetIterNextFunc(__pyx_t_1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 154, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_13)) { if (likely(PyList_CheckExact(__pyx_t_1))) { { Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 154, __pyx_L1_error) #endif if (__pyx_t_12 >= __pyx_temp) break; } __pyx_t_3 = __Pyx_PyList_GetItemRefFast(__pyx_t_1, __pyx_t_12, __Pyx_ReferenceSharing_OwnStrongReference); ++__pyx_t_12; } else { { Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_1); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 154, __pyx_L1_error) #endif if (__pyx_t_12 >= __pyx_temp) break; } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_3 = __Pyx_NewRef(PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_12)); #else __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_1, __pyx_t_12); #endif ++__pyx_t_12; } if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 154, __pyx_L1_error) } else { __pyx_t_3 = __pyx_t_13(__pyx_t_1); if (unlikely(!__pyx_t_3)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (unlikely(!__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) __PYX_ERR(0, 154, __pyx_L1_error) PyErr_Clear(); } break; } } __Pyx_GOTREF(__pyx_t_3); if ((likely(PyTuple_CheckExact(__pyx_t_3))) || (PyList_CheckExact(__pyx_t_3))) { PyObject* sequence = __pyx_t_3; Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); __PYX_ERR(0, 154, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { __pyx_t_2 = PyTuple_GET_ITEM(sequence, 0); __Pyx_INCREF(__pyx_t_2); __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1); __Pyx_INCREF(__pyx_t_4); } else { __pyx_t_2 = __Pyx_PyList_GetItemRefFast(sequence, 0, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 154, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_2); __pyx_t_4 = __Pyx_PyList_GetItemRefFast(sequence, 1, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 154, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_4); } #else __pyx_t_2 = __Pyx_PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_4 = __Pyx_PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { Py_ssize_t index = -1; __pyx_t_14 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_15 = (CYTHON_COMPILING_IN_LIMITED_API) ? PyIter_Next : __Pyx_PyObject_GetIterNextFunc(__pyx_t_14); index = 0; __pyx_t_2 = __pyx_t_15(__pyx_t_14); if (unlikely(!__pyx_t_2)) goto __pyx_L14_unpacking_failed; __Pyx_GOTREF(__pyx_t_2); index = 1; __pyx_t_4 = __pyx_t_15(__pyx_t_14); if (unlikely(!__pyx_t_4)) goto __pyx_L14_unpacking_failed; __Pyx_GOTREF(__pyx_t_4); if (__Pyx_IternextUnpackEndCheck(__pyx_t_15(__pyx_t_14), 2) < (0)) __PYX_ERR(0, 154, __pyx_L1_error) __pyx_t_15 = NULL; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; goto __pyx_L15_unpacking_done; __pyx_L14_unpacking_failed:; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_15 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); __PYX_ERR(0, 154, __pyx_L1_error) __pyx_L15_unpacking_done:; } __pyx_t_16 = __Pyx_PyFloat_AsFloat(__pyx_t_4); if (unlikely((__pyx_t_16 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 154, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF_SET(__pyx_v_tactic, __pyx_t_2); __pyx_t_2 = 0; __pyx_v_prob = __pyx_t_16; /* … */ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; }
+155: tasks.append((node, tactic, prob))
__pyx_t_3 = PyFloat_FromDouble(__pyx_v_prob); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 155, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 155, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF((PyObject *)__pyx_v_node); __Pyx_GIVEREF((PyObject *)__pyx_v_node); if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_v_node)) != (0)) __PYX_ERR(0, 155, __pyx_L1_error); __Pyx_INCREF(__pyx_v_tactic); __Pyx_GIVEREF(__pyx_v_tactic); if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_v_tactic) != (0)) __PYX_ERR(0, 155, __pyx_L1_error); __Pyx_GIVEREF(__pyx_t_3); if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_t_3) != (0)) __PYX_ERR(0, 155, __pyx_L1_error); __pyx_t_3 = 0; __pyx_t_8 = __Pyx_PyList_Append(__pyx_v_tasks, __pyx_t_4); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 155, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
156:
+157: for node, tactic, prob in tasks:
__pyx_t_1 = __pyx_v_tasks; __Pyx_INCREF(__pyx_t_1); __pyx_t_6 = 0; for (;;) { { Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 157, __pyx_L1_error) #endif if (__pyx_t_6 >= __pyx_temp) break; } __pyx_t_4 = __Pyx_PyList_GetItemRefFast(__pyx_t_1, __pyx_t_6, __Pyx_ReferenceSharing_OwnStrongReference); ++__pyx_t_6; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 157, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); if ((likely(PyTuple_CheckExact(__pyx_t_4))) || (PyList_CheckExact(__pyx_t_4))) { PyObject* sequence = __pyx_t_4; Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); if (unlikely(size != 3)) { if (size > 3) __Pyx_RaiseTooManyValuesError(3); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); __PYX_ERR(0, 157, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); __Pyx_INCREF(__pyx_t_3); __pyx_t_2 = PyTuple_GET_ITEM(sequence, 1); __Pyx_INCREF(__pyx_t_2); __pyx_t_14 = PyTuple_GET_ITEM(sequence, 2); __Pyx_INCREF(__pyx_t_14); } else { __pyx_t_3 = __Pyx_PyList_GetItemRefFast(sequence, 0, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 157, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_3); __pyx_t_2 = __Pyx_PyList_GetItemRefFast(sequence, 1, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 157, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_2); __pyx_t_14 = __Pyx_PyList_GetItemRefFast(sequence, 2, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 157, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_14); } #else __pyx_t_3 = __Pyx_PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 157, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = __Pyx_PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 157, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_14 = __Pyx_PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 157, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); #endif __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; } else { Py_ssize_t index = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 157, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_15 = (CYTHON_COMPILING_IN_LIMITED_API) ? PyIter_Next : __Pyx_PyObject_GetIterNextFunc(__pyx_t_17); index = 0; __pyx_t_3 = __pyx_t_15(__pyx_t_17); if (unlikely(!__pyx_t_3)) goto __pyx_L19_unpacking_failed; __Pyx_GOTREF(__pyx_t_3); index = 1; __pyx_t_2 = __pyx_t_15(__pyx_t_17); if (unlikely(!__pyx_t_2)) goto __pyx_L19_unpacking_failed; __Pyx_GOTREF(__pyx_t_2); index = 2; __pyx_t_14 = __pyx_t_15(__pyx_t_17); if (unlikely(!__pyx_t_14)) goto __pyx_L19_unpacking_failed; __Pyx_GOTREF(__pyx_t_14); if (__Pyx_IternextUnpackEndCheck(__pyx_t_15(__pyx_t_17), 3) < (0)) __PYX_ERR(0, 157, __pyx_L1_error) __pyx_t_15 = NULL; __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; goto __pyx_L20_unpacking_done; __pyx_L19_unpacking_failed:; __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __pyx_t_15 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); __PYX_ERR(0, 157, __pyx_L1_error) __pyx_L20_unpacking_done:; } if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 157, __pyx_L1_error) __pyx_t_16 = __Pyx_PyFloat_AsFloat(__pyx_t_14); if (unlikely((__pyx_t_16 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 157, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_XDECREF_SET(__pyx_v_node, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_3)); __pyx_t_3 = 0; __Pyx_XDECREF_SET(__pyx_v_tactic, __pyx_t_2); __pyx_t_2 = 0; __pyx_v_prob = __pyx_t_16; /* … */ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L22_for_end; __pyx_L18_break:; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L22_for_end; __pyx_L22_for_end:;
158: # Check timeout before each Lean call
+159: if self._is_timeout():
__pyx_t_9 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._is_timeout(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), 0); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 159, __pyx_L1_error)
if (__pyx_t_9) {
/* … */
}
+160: break
goto __pyx_L18_break;
+161: next_state = self.env.run_tactic_stateless(node.state, tactic)
__pyx_t_14 = __pyx_v_self->__pyx_base.env;
__Pyx_INCREF(__pyx_t_14);
__pyx_t_5 = 0;
{
PyObject *__pyx_callargs[3] = {__pyx_t_14, __pyx_v_node->state, __pyx_v_tactic};
__pyx_t_4 = __Pyx_PyObject_FastCallMethod((PyObject*)__pyx_mstate_global->__pyx_n_u_run_tactic_stateless, __pyx_callargs+__pyx_t_5, (3-__pyx_t_5) | (1*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_14); __pyx_t_14 = 0;
if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 161, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
}
__Pyx_XDECREF_SET(__pyx_v_next_state, __pyx_t_4);
__pyx_t_4 = 0;
+162: results.append((node, tactic, prob, next_state))
__pyx_t_4 = PyFloat_FromDouble(__pyx_v_prob); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 162, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_14 = PyTuple_New(4); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 162, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_INCREF((PyObject *)__pyx_v_node); __Pyx_GIVEREF((PyObject *)__pyx_v_node); if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 0, ((PyObject *)__pyx_v_node)) != (0)) __PYX_ERR(0, 162, __pyx_L1_error); __Pyx_INCREF(__pyx_v_tactic); __Pyx_GIVEREF(__pyx_v_tactic); if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 1, __pyx_v_tactic) != (0)) __PYX_ERR(0, 162, __pyx_L1_error); __Pyx_GIVEREF(__pyx_t_4); if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 2, __pyx_t_4) != (0)) __PYX_ERR(0, 162, __pyx_L1_error); __Pyx_INCREF(__pyx_v_next_state); __Pyx_GIVEREF(__pyx_v_next_state); if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 3, __pyx_v_next_state) != (0)) __PYX_ERR(0, 162, __pyx_L1_error); __pyx_t_4 = 0; __pyx_t_8 = __Pyx_PyList_Append(__pyx_v_results, __pyx_t_14); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 162, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0;
163:
164: # Create children (reusing existing nodes for duplicates - DAG structure)
+165: for node, tactic, prob, next_state in results:
__pyx_t_1 = __pyx_v_results; __Pyx_INCREF(__pyx_t_1); __pyx_t_6 = 0; for (;;) { { Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 165, __pyx_L1_error) #endif if (__pyx_t_6 >= __pyx_temp) break; } __pyx_t_14 = __Pyx_PyList_GetItemRefFast(__pyx_t_1, __pyx_t_6, __Pyx_ReferenceSharing_OwnStrongReference); ++__pyx_t_6; if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); if ((likely(PyTuple_CheckExact(__pyx_t_14))) || (PyList_CheckExact(__pyx_t_14))) { PyObject* sequence = __pyx_t_14; Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); if (unlikely(size != 4)) { if (size > 4) __Pyx_RaiseTooManyValuesError(4); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); __PYX_ERR(0, 165, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { __pyx_t_4 = PyTuple_GET_ITEM(sequence, 0); __Pyx_INCREF(__pyx_t_4); __pyx_t_2 = PyTuple_GET_ITEM(sequence, 1); __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = PyTuple_GET_ITEM(sequence, 2); __Pyx_INCREF(__pyx_t_3); __pyx_t_17 = PyTuple_GET_ITEM(sequence, 3); __Pyx_INCREF(__pyx_t_17); } else { __pyx_t_4 = __Pyx_PyList_GetItemRefFast(sequence, 0, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_4); __pyx_t_2 = __Pyx_PyList_GetItemRefFast(sequence, 1, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_2); __pyx_t_3 = __Pyx_PyList_GetItemRefFast(sequence, 2, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_3); __pyx_t_17 = __Pyx_PyList_GetItemRefFast(sequence, 3, __Pyx_ReferenceSharing_SharedReference); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_XGOTREF(__pyx_t_17); } #else { Py_ssize_t i; PyObject** temps[4] = {&__pyx_t_4,&__pyx_t_2,&__pyx_t_3,&__pyx_t_17}; for (i=0; i < 4; i++) { PyObject* item = __Pyx_PySequence_ITEM(sequence, i); if (unlikely(!item)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_GOTREF(item); *(temps[i]) = item; } } #endif __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; } else { Py_ssize_t index = -1; PyObject** temps[4] = {&__pyx_t_4,&__pyx_t_2,&__pyx_t_3,&__pyx_t_17}; __pyx_t_18 = PyObject_GetIter(__pyx_t_14); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_18); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_15 = (CYTHON_COMPILING_IN_LIMITED_API) ? PyIter_Next : __Pyx_PyObject_GetIterNextFunc(__pyx_t_18); for (index=0; index < 4; index++) { PyObject* item = __pyx_t_15(__pyx_t_18); if (unlikely(!item)) goto __pyx_L25_unpacking_failed; __Pyx_GOTREF(item); *(temps[index]) = item; } if (__Pyx_IternextUnpackEndCheck(__pyx_t_15(__pyx_t_18), 4) < (0)) __PYX_ERR(0, 165, __pyx_L1_error) __pyx_t_15 = NULL; __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; goto __pyx_L26_unpacking_done; __pyx_L25_unpacking_failed:; __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; __pyx_t_15 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); __PYX_ERR(0, 165, __pyx_L1_error) __pyx_L26_unpacking_done:; } if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 165, __pyx_L1_error) __pyx_t_16 = __Pyx_PyFloat_AsFloat(__pyx_t_3); if (unlikely((__pyx_t_16 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 165, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_XDECREF_SET(__pyx_v_node, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_4)); __pyx_t_4 = 0; __Pyx_XDECREF_SET(__pyx_v_tactic, __pyx_t_2); __pyx_t_2 = 0; __pyx_v_prob = __pyx_t_16; __Pyx_XDECREF_SET(__pyx_v_next_state, __pyx_t_17); __pyx_t_17 = 0; /* … */ __pyx_L23_continue:; } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
166: # Check for duplicate states
+167: state_key = self._get_state_key(next_state)
__pyx_t_14 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._get_state_key(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), __pyx_v_next_state, 0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 167, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_XDECREF_SET(__pyx_v_state_key, __pyx_t_14); __pyx_t_14 = 0;
+168: if state_key is not None and state_key in self.seen_states:
__pyx_t_7 = (__pyx_v_state_key != Py_None);
if (__pyx_t_7) {
} else {
__pyx_t_9 = __pyx_t_7;
goto __pyx_L28_bool_binop_done;
}
if (unlikely(__pyx_v_self->__pyx_base.seen_states == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
__PYX_ERR(0, 168, __pyx_L1_error)
}
__pyx_t_7 = (__Pyx_PyDict_ContainsTF(__pyx_v_state_key, __pyx_v_self->__pyx_base.seen_states, Py_EQ)); if (unlikely((__pyx_t_7 < 0))) __PYX_ERR(0, 168, __pyx_L1_error)
__pyx_t_9 = __pyx_t_7;
__pyx_L28_bool_binop_done:;
if (__pyx_t_9) {
/* … */
}
169: # Reuse existing node - add as child with additional parent edge
+170: existing_node = self.seen_states[state_key]
if (unlikely(__pyx_v_self->__pyx_base.seen_states == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
__PYX_ERR(0, 170, __pyx_L1_error)
}
__pyx_t_14 = __Pyx_PyDict_GetItem(__pyx_v_self->__pyx_base.seen_states, __pyx_v_state_key); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 170, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_14);
if (!(likely(((__pyx_t_14) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_14, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 170, __pyx_L1_error)
__Pyx_XDECREF_SET(__pyx_v_existing_node, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_14));
__pyx_t_14 = 0;
+171: existing_node.add_parent(node, tactic)
__pyx_t_19.__pyx_n = 1;
__pyx_t_19.action = __pyx_v_tactic;
((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_v_existing_node->__pyx_vtab)->add_parent(__pyx_v_existing_node, __pyx_v_node, 0, &__pyx_t_19); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 171, __pyx_L1_error)
+172: if existing_node not in node.children:
__pyx_t_9 = (__Pyx_PySequence_ContainsTF(((PyObject *)__pyx_v_existing_node), __pyx_v_node->children, Py_NE)); if (unlikely((__pyx_t_9 < 0))) __PYX_ERR(0, 172, __pyx_L1_error) if (__pyx_t_9) { /* … */ }
+173: node.children.append(existing_node)
if (unlikely(__pyx_v_node->children == Py_None)) {
PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "append");
__PYX_ERR(0, 173, __pyx_L1_error)
}
__pyx_t_8 = __Pyx_PyList_Append(__pyx_v_node->children, ((PyObject *)__pyx_v_existing_node)); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 173, __pyx_L1_error)
+174: continue
goto __pyx_L23_continue;
175:
+176: child = Node(next_state, parent=node, action=tactic)
__pyx_t_17 = NULL;
__pyx_t_5 = 1;
{
PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 2 : 0)] = {__pyx_t_17, __pyx_v_next_state};
__pyx_t_3 = __Pyx_MakeVectorcallBuilderKwds(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 176, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_3);
if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_parent, ((PyObject *)__pyx_v_node), __pyx_t_3, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 176, __pyx_L1_error)
if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_action, __pyx_v_tactic, __pyx_t_3, __pyx_callargs+2, 1) < (0)) __PYX_ERR(0, 176, __pyx_L1_error)
__pyx_t_14 = __Pyx_Object_Vectorcall_CallFromBuilder((PyObject*)__pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_3);
__Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0;
__Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 176, __pyx_L1_error)
__Pyx_GOTREF((PyObject *)__pyx_t_14);
}
__Pyx_XDECREF_SET(__pyx_v_child, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_14));
__pyx_t_14 = 0;
+177: child.prior_p = prob
__pyx_v_child->prior_p = __pyx_v_prob;
+178: node.children.append(child)
if (unlikely(__pyx_v_node->children == Py_None)) {
PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "append");
__PYX_ERR(0, 178, __pyx_L1_error)
}
__pyx_t_8 = __Pyx_PyList_Append(__pyx_v_node->children, ((PyObject *)__pyx_v_child)); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 178, __pyx_L1_error)
+179: self.node_count += 1
__pyx_v_self->__pyx_base.node_count = (__pyx_v_self->__pyx_base.node_count + 1);
180:
181: # Register new state in seen_states
+182: if state_key is not None:
__pyx_t_9 = (__pyx_v_state_key != Py_None);
if (__pyx_t_9) {
/* … */
}
+183: self.seen_states[state_key] = child
if (unlikely(__pyx_v_self->__pyx_base.seen_states == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
__PYX_ERR(0, 183, __pyx_L1_error)
}
if (unlikely((PyDict_SetItem(__pyx_v_self->__pyx_base.seen_states, __pyx_v_state_key, ((PyObject *)__pyx_v_child)) < 0))) __PYX_ERR(0, 183, __pyx_L1_error)
184:
+185: for node in nodes_to_generate:
__pyx_t_1 = __pyx_v_nodes_to_generate; __Pyx_INCREF(__pyx_t_1); __pyx_t_6 = 0; for (;;) { { Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 185, __pyx_L1_error) #endif if (__pyx_t_6 >= __pyx_temp) break; } __pyx_t_14 = __Pyx_PyList_GetItemRefFast(__pyx_t_1, __pyx_t_6, __Pyx_ReferenceSharing_OwnStrongReference); ++__pyx_t_6; if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 185, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); if (!(likely(((__pyx_t_14) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_14, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 185, __pyx_L1_error) __Pyx_XDECREF_SET(__pyx_v_node, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_14)); __pyx_t_14 = 0; /* … */ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+186: node.untried_actions = []
__pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 186, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_GIVEREF(__pyx_t_14); __Pyx_GOTREF(__pyx_v_node->untried_actions); __Pyx_DECREF(__pyx_v_node->untried_actions); __pyx_v_node->untried_actions = ((PyObject*)__pyx_t_14); __pyx_t_14 = 0;
187:
+188: return [self._get_best_child(node) if node.children else node for node in nodes]
__Pyx_XDECREF(__pyx_r); { /* enter inner scope */ __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 188, __pyx_L38_error) __Pyx_GOTREF(__pyx_t_1); if (unlikely(__pyx_v_nodes == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); __PYX_ERR(0, 188, __pyx_L38_error) } __pyx_t_14 = __pyx_v_nodes; __Pyx_INCREF(__pyx_t_14); __pyx_t_6 = 0; for (;;) { { Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_14); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 188, __pyx_L38_error) #endif if (__pyx_t_6 >= __pyx_temp) break; } __pyx_t_3 = __Pyx_PyList_GetItemRefFast(__pyx_t_14, __pyx_t_6, __Pyx_ReferenceSharing_OwnStrongReference); ++__pyx_t_6; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 188, __pyx_L38_error) __Pyx_GOTREF(__pyx_t_3); if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 188, __pyx_L38_error) __Pyx_XDECREF_SET(__pyx_7genexpr__pyx_v_node, ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_t_3)); __pyx_t_3 = 0; if (__pyx_7genexpr__pyx_v_node->children == Py_None) __pyx_t_9 = 0; else { Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_7genexpr__pyx_v_node->children); if (unlikely(((!CYTHON_ASSUME_SAFE_SIZE) && __pyx_temp < 0))) __PYX_ERR(0, 188, __pyx_L38_error) __pyx_t_9 = (__pyx_temp != 0); } if (__pyx_t_9) { __pyx_t_17 = ((PyObject *)((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._get_best_child(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), __pyx_7genexpr__pyx_v_node, 0)); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 188, __pyx_L38_error) __Pyx_GOTREF(__pyx_t_17); __pyx_t_3 = __pyx_t_17; __pyx_t_17 = 0; } else { __Pyx_INCREF((PyObject *)__pyx_7genexpr__pyx_v_node); __pyx_t_3 = ((PyObject *)__pyx_7genexpr__pyx_v_node); } if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_3))) __PYX_ERR(0, 188, __pyx_L38_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_XDECREF((PyObject *)__pyx_7genexpr__pyx_v_node); __pyx_7genexpr__pyx_v_node = 0; goto __pyx_L42_exit_scope; __pyx_L38_error:; __Pyx_XDECREF((PyObject *)__pyx_7genexpr__pyx_v_node); __pyx_7genexpr__pyx_v_node = 0; goto __pyx_L1_error; __pyx_L42_exit_scope:; } /* exit inner scope */ __pyx_r = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L0;
189:
+190: cpdef float _simulate(self, Node node):
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_11_simulate(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static float __pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__simulate(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node, int __pyx_skip_dispatch) {
PyObject *__pyx_v_current_state = 0;
PyObject *__pyx_v_sim_env = 0;
int __pyx_v_step_idx;
PyObject *__pyx_v_state_str = 0;
PyObject *__pyx_v_tactic = 0;
PyObject *__pyx_v_result = 0;
float __pyx_r;
/* Check if called by wrapper */
if (unlikely(__pyx_skip_dispatch)) ;
/* Check if overridden in Python */
else if (
#if !CYTHON_USE_TYPE_SLOTS
unlikely(Py_TYPE(((PyObject *)__pyx_v_self)) != __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout &&
__Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), Py_TPFLAGS_HAVE_GC))
#else
unlikely(Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0 || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))
#endif
) {
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) {
PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
#endif
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_mstate_global->__pyx_n_u_simulate); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 190, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
if (!__Pyx_IsSameCFunction(__pyx_t_1, (void(*)(void)) __pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_11_simulate)) {
__pyx_t_3 = NULL;
__Pyx_INCREF(__pyx_t_1);
__pyx_t_4 = __pyx_t_1;
__pyx_t_5 = 1;
#if CYTHON_UNPACK_METHODS
if (unlikely(PyMethod_Check(__pyx_t_4))) {
__pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4);
assert(__pyx_t_3);
PyObject* __pyx__function = PyMethod_GET_FUNCTION(__pyx_t_4);
__Pyx_INCREF(__pyx_t_3);
__Pyx_INCREF(__pyx__function);
__Pyx_DECREF_SET(__pyx_t_4, __pyx__function);
__pyx_t_5 = 0;
}
#endif
{
PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_node)};
__pyx_t_2 = __Pyx_PyObject_FastCall((PyObject*)__pyx_t_4, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 190, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
__pyx_t_6 = __Pyx_PyFloat_AsFloat(__pyx_t_2); if (unlikely((__pyx_t_6 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 190, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__pyx_r = __pyx_t_6;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
goto __pyx_L0;
}
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
__pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
__pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self));
if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) {
__pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
}
#endif
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
}
#endif
}
/* … */
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_XDECREF(__pyx_t_3);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._simulate", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XDECREF(__pyx_v_current_state);
__Pyx_XDECREF(__pyx_v_sim_env);
__Pyx_XDECREF(__pyx_v_state_str);
__Pyx_XDECREF(__pyx_v_tactic);
__Pyx_XDECREF(__pyx_v_result);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* Python wrapper */
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_11_simulate(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static PyMethodDef __pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_11_simulate = {"_simulate", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_11_simulate, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_11_simulate(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
) {
struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node = 0;
#if !CYTHON_METH_FASTCALL
CYTHON_UNUSED Py_ssize_t __pyx_nargs;
#endif
CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("_simulate (wrapper)", 0);
#if !CYTHON_METH_FASTCALL
#if CYTHON_ASSUME_SAFE_SIZE
__pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
#else
__pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
#endif
#endif
__pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
{
PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_node,0};
PyObject* values[1] = {0};
const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0;
if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 190, __pyx_L3_error)
if (__pyx_kwds_len > 0) {
switch (__pyx_nargs) {
case 1:
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 190, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 0: break;
default: goto __pyx_L5_argtuple_error;
}
const Py_ssize_t kwd_pos_args = __pyx_nargs;
if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "_simulate", 0) < (0)) __PYX_ERR(0, 190, __pyx_L3_error)
for (Py_ssize_t i = __pyx_nargs; i < 1; i++) {
if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("_simulate", 1, 1, 1, i); __PYX_ERR(0, 190, __pyx_L3_error) }
}
} else if (unlikely(__pyx_nargs != 1)) {
goto __pyx_L5_argtuple_error;
} else {
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 190, __pyx_L3_error)
}
__pyx_v_node = ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)values[0]);
}
goto __pyx_L6_skip;
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("_simulate", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 190, __pyx_L3_error)
__pyx_L6_skip:;
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._simulate", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_node), __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node, 1, "node", 0))) __PYX_ERR(0, 190, __pyx_L1_error)
__pyx_r = __pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_10_simulate(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self), __pyx_v_node);
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
/* function exit code */
goto __pyx_L0;
__pyx_L1_error:;
__pyx_r = NULL;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
goto __pyx_L7_cleaned_up;
__pyx_L0:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__pyx_L7_cleaned_up:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_10_simulate(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *__pyx_v_node) {
PyObject *__pyx_r = NULL;
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__simulate(__pyx_v_self, __pyx_v_node, 1); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 190, __pyx_L1_error)
__pyx_t_2 = PyFloat_FromDouble(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 190, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__pyx_r = __pyx_t_2;
__pyx_t_2 = 0;
goto __pyx_L0;
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_2);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._simulate", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* … */
__pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_11_simulate, __Pyx_CYFUNCTION_CCLASS, __pyx_mstate_global->__pyx_n_u_MCTS_GuidedRollout__simulate, NULL, __pyx_mstate_global->__pyx_n_u_lean_reinforcement_agent_mcts_mc, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[4])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 190, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030E0000
PyUnstable_Object_EnableDeferredRefcount(__pyx_t_2);
#endif
if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout, __pyx_mstate_global->__pyx_n_u_simulate, __pyx_t_2) < (0)) __PYX_ERR(0, 190, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
191: cdef object current_state
192: cdef object sim_env
193: cdef int step_idx
194: cdef str state_str
195: cdef str tactic
196: cdef object result
197:
198: # Check timeout at start of simulation
+199: if self._is_timeout():
__pyx_t_7 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._is_timeout(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), 0); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 199, __pyx_L1_error)
if (__pyx_t_7) {
/* … */
}
+200: return 0.0 # Neutral reward on timeout
__pyx_r = 0.0;
goto __pyx_L0;
201:
+202: if node.is_terminal:
if (__pyx_v_node->is_terminal) {
/* … */
}
+203: if isinstance(node.state, ProofFinished):
__pyx_t_1 = __pyx_v_node->state;
__Pyx_INCREF(__pyx_t_1);
__Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_ProofFinished); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 203, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__pyx_t_7 = PyObject_IsInstance(__pyx_t_1, __pyx_t_2); if (unlikely(__pyx_t_7 == ((int)-1))) __PYX_ERR(0, 203, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
if (__pyx_t_7) {
/* … */
}
+204: return 1.0
__pyx_r = 1.0;
goto __pyx_L0;
+205: if isinstance(node.state, (LeanError, ProofGivenUp)):
__Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_LeanError); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 205, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_mstate_global->__pyx_n_u_ProofGivenUp); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 205, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_4 = __pyx_v_node->state; __Pyx_INCREF(__pyx_t_4); __pyx_t_8 = PyObject_IsInstance(__pyx_t_4, __pyx_t_2); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (!__pyx_t_8) { } else { __pyx_t_7 = __pyx_t_8; goto __pyx_L7_bool_binop_done; } __pyx_t_4 = __pyx_v_node->state; __Pyx_INCREF(__pyx_t_4); __pyx_t_8 = PyObject_IsInstance(__pyx_t_4, __pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_7 = __pyx_t_8; __pyx_L7_bool_binop_done:; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_7) { /* … */ }
+206: return -1.0
__pyx_r = -1.0;
goto __pyx_L0;
207:
+208: if not isinstance(node.state, TacticState):
__pyx_t_2 = __pyx_v_node->state; __Pyx_INCREF(__pyx_t_2); __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_mstate_global->__pyx_n_u_TacticState); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 208, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_7 = PyObject_IsInstance(__pyx_t_2, __pyx_t_1); if (unlikely(__pyx_t_7 == ((int)-1))) __PYX_ERR(0, 208, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_8 = (!__pyx_t_7); if (__pyx_t_8) { /* … */ }
+209: return -1.0
__pyx_r = -1.0;
goto __pyx_L0;
210:
+211: current_state = node.state
__pyx_t_1 = __pyx_v_node->state;
__Pyx_INCREF(__pyx_t_1);
__pyx_v_current_state = __pyx_t_1;
__pyx_t_1 = 0;
+212: sim_env = self.env
__pyx_t_1 = __pyx_v_self->__pyx_base.env;
__Pyx_INCREF(__pyx_t_1);
__pyx_v_sim_env = __pyx_t_1;
__pyx_t_1 = 0;
213:
+214: for step_idx in range(self.max_rollout_depth):
__pyx_t_9 = __pyx_v_self->__pyx_base.max_rollout_depth;
__pyx_t_10 = __pyx_t_9;
for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_10; __pyx_t_11+=1) {
__pyx_v_step_idx = __pyx_t_11;
215: # Check timeout at each rollout step
+216: if self._is_timeout():
__pyx_t_8 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._is_timeout(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), 0); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 216, __pyx_L1_error)
if (__pyx_t_8) {
/* … */
}
+217: return 0.0 # Neutral reward on timeout
__pyx_r = 0.0;
goto __pyx_L0;
218:
+219: state_str = current_state.pp
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_current_state, __pyx_mstate_global->__pyx_n_u_pp); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 219, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_t_1))) __PYX_ERR(0, 219, __pyx_L1_error) __Pyx_XDECREF_SET(__pyx_v_state_str, ((PyObject*)__pyx_t_1)); __pyx_t_1 = 0;
+220: tactic = self.transformer.generate_tactics(state_str, n=1)[0]
__pyx_t_2 = __pyx_v_self->__pyx_base.transformer;
__Pyx_INCREF(__pyx_t_2);
__pyx_t_5 = 0;
{
PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 1 : 0)] = {__pyx_t_2, __pyx_v_state_str};
__pyx_t_4 = __Pyx_MakeVectorcallBuilderKwds(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 220, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_n, __pyx_mstate_global->__pyx_int_1, __pyx_t_4, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 220, __pyx_L1_error)
__pyx_t_1 = __Pyx_Object_VectorcallMethod_CallFromBuilder((PyObject*)__pyx_mstate_global->__pyx_n_u_generate_tactics, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (1*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_4);
__Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 220, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
}
__pyx_t_4 = __Pyx_GetItemInt(__pyx_t_1, 0, long, 1, __Pyx_PyLong_From_long, 0, 0, 0, 1, __Pyx_ReferenceSharing_OwnStrongReference); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 220, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
if (!(likely(PyUnicode_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_t_4))) __PYX_ERR(0, 220, __pyx_L1_error)
__Pyx_XDECREF_SET(__pyx_v_tactic, ((PyObject*)__pyx_t_4));
__pyx_t_4 = 0;
221:
222: # Check timeout after model call
+223: if self._is_timeout():
__pyx_t_8 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._is_timeout(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), 0); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 223, __pyx_L1_error)
if (__pyx_t_8) {
/* … */
}
+224: return 0.0
__pyx_r = 0.0;
goto __pyx_L0;
225:
+226: result = sim_env.run_tactic_stateless(current_state, tactic)
__pyx_t_1 = __pyx_v_sim_env;
__Pyx_INCREF(__pyx_t_1);
__pyx_t_5 = 0;
{
PyObject *__pyx_callargs[3] = {__pyx_t_1, __pyx_v_current_state, __pyx_v_tactic};
__pyx_t_4 = __Pyx_PyObject_FastCallMethod((PyObject*)__pyx_mstate_global->__pyx_n_u_run_tactic_stateless, __pyx_callargs+__pyx_t_5, (3-__pyx_t_5) | (1*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 226, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
}
__Pyx_XDECREF_SET(__pyx_v_result, __pyx_t_4);
__pyx_t_4 = 0;
227:
+228: if isinstance(result, ProofFinished):
__Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_mstate_global->__pyx_n_u_ProofFinished); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 228, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_8 = PyObject_IsInstance(__pyx_v_result, __pyx_t_4); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 228, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_8) { /* … */ }
+229: return 1.0 - 0.01 * (step_idx + 1)
__pyx_r = (1.0 - (0.01 * (__pyx_v_step_idx + 1)));
goto __pyx_L0;
+230: if isinstance(result, (LeanError, ProofGivenUp)):
__Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_mstate_global->__pyx_n_u_LeanError); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 230, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_mstate_global->__pyx_n_u_ProofGivenUp); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 230, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_7 = PyObject_IsInstance(__pyx_v_result, __pyx_t_4); if (!__pyx_t_7) { } else { __pyx_t_8 = __pyx_t_7; goto __pyx_L16_bool_binop_done; } __pyx_t_7 = PyObject_IsInstance(__pyx_v_result, __pyx_t_1); __pyx_t_8 = __pyx_t_7; __pyx_L16_bool_binop_done:; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_8) { /* … */ }
+231: return -1.0
__pyx_r = -1.0;
goto __pyx_L0;
+232: if not isinstance(result, TacticState):
__Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_mstate_global->__pyx_n_u_TacticState); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 232, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_8 = PyObject_IsInstance(__pyx_v_result, __pyx_t_4); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 232, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_7 = (!__pyx_t_8); if (__pyx_t_7) { /* … */ }
+233: return -1.0
__pyx_r = -1.0;
goto __pyx_L0;
234:
+235: current_state = result
__Pyx_INCREF(__pyx_v_result); __Pyx_DECREF_SET(__pyx_v_current_state, __pyx_v_result); }
236:
+237: return 0.0
__pyx_r = 0.0; goto __pyx_L0;
238:
+239: cpdef list _simulate_batch(self, list nodes):
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_13_simulate_batch(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static PyObject *__pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__simulate_batch(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, PyObject *__pyx_v_nodes, int __pyx_skip_dispatch) {
PyObject *__pyx_8genexpr1__pyx_v_node = NULL;
PyObject *__pyx_r = NULL;
/* Check if called by wrapper */
if (unlikely(__pyx_skip_dispatch)) ;
/* Check if overridden in Python */
else if (
#if !CYTHON_USE_TYPE_SLOTS
unlikely(Py_TYPE(((PyObject *)__pyx_v_self)) != __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout &&
__Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), Py_TPFLAGS_HAVE_GC))
#else
unlikely(Py_TYPE(((PyObject *)__pyx_v_self))->tp_dictoffset != 0 || __Pyx_PyType_HasFeature(Py_TYPE(((PyObject *)__pyx_v_self)), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))
#endif
) {
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
static PY_UINT64_T __pyx_tp_dict_version = __PYX_DICT_VERSION_INIT, __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
if (unlikely(!__Pyx_object_dict_version_matches(((PyObject *)__pyx_v_self), __pyx_tp_dict_version, __pyx_obj_dict_version))) {
PY_UINT64_T __pyx_typedict_guard = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
#endif
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_mstate_global->__pyx_n_u_simulate_batch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 239, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
if (!__Pyx_IsSameCFunction(__pyx_t_1, (void(*)(void)) __pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_13_simulate_batch)) {
__Pyx_XDECREF(__pyx_r);
__pyx_t_3 = NULL;
__Pyx_INCREF(__pyx_t_1);
__pyx_t_4 = __pyx_t_1;
__pyx_t_5 = 1;
#if CYTHON_UNPACK_METHODS
if (unlikely(PyMethod_Check(__pyx_t_4))) {
__pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4);
assert(__pyx_t_3);
PyObject* __pyx__function = PyMethod_GET_FUNCTION(__pyx_t_4);
__Pyx_INCREF(__pyx_t_3);
__Pyx_INCREF(__pyx__function);
__Pyx_DECREF_SET(__pyx_t_4, __pyx__function);
__pyx_t_5 = 0;
}
#endif
{
PyObject *__pyx_callargs[2] = {__pyx_t_3, __pyx_v_nodes};
__pyx_t_2 = __Pyx_PyObject_FastCall((PyObject*)__pyx_t_4, __pyx_callargs+__pyx_t_5, (2-__pyx_t_5) | (__pyx_t_5*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 239, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
if (!(likely(PyList_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("list", __pyx_t_2))) __PYX_ERR(0, 239, __pyx_L1_error)
__pyx_r = ((PyObject*)__pyx_t_2);
__pyx_t_2 = 0;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
goto __pyx_L0;
}
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
__pyx_tp_dict_version = __Pyx_get_tp_dict_version(((PyObject *)__pyx_v_self));
__pyx_obj_dict_version = __Pyx_get_object_dict_version(((PyObject *)__pyx_v_self));
if (unlikely(__pyx_typedict_guard != __pyx_tp_dict_version)) {
__pyx_tp_dict_version = __pyx_obj_dict_version = __PYX_DICT_VERSION_INIT;
}
#endif
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
}
#endif
}
/* … */
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_XDECREF(__pyx_t_3);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._simulate_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XDECREF(__pyx_8genexpr1__pyx_v_node);
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* Python wrapper */
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_13_simulate_batch(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
); /*proto*/
static PyMethodDef __pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_13_simulate_batch = {"_simulate_batch", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_13_simulate_batch, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_13_simulate_batch(PyObject *__pyx_v_self,
#if CYTHON_METH_FASTCALL
PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
#else
PyObject *__pyx_args, PyObject *__pyx_kwds
#endif
) {
PyObject *__pyx_v_nodes = 0;
#if !CYTHON_METH_FASTCALL
CYTHON_UNUSED Py_ssize_t __pyx_nargs;
#endif
CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("_simulate_batch (wrapper)", 0);
#if !CYTHON_METH_FASTCALL
#if CYTHON_ASSUME_SAFE_SIZE
__pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
#else
__pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
#endif
#endif
__pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
{
PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_nodes,0};
PyObject* values[1] = {0};
const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0;
if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 239, __pyx_L3_error)
if (__pyx_kwds_len > 0) {
switch (__pyx_nargs) {
case 1:
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 239, __pyx_L3_error)
CYTHON_FALLTHROUGH;
case 0: break;
default: goto __pyx_L5_argtuple_error;
}
const Py_ssize_t kwd_pos_args = __pyx_nargs;
if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "_simulate_batch", 0) < (0)) __PYX_ERR(0, 239, __pyx_L3_error)
for (Py_ssize_t i = __pyx_nargs; i < 1; i++) {
if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("_simulate_batch", 1, 1, 1, i); __PYX_ERR(0, 239, __pyx_L3_error) }
}
} else if (unlikely(__pyx_nargs != 1)) {
goto __pyx_L5_argtuple_error;
} else {
values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0);
if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 239, __pyx_L3_error)
}
__pyx_v_nodes = ((PyObject*)values[0]);
}
goto __pyx_L6_skip;
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("_simulate_batch", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 239, __pyx_L3_error)
__pyx_L6_skip:;
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._simulate_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_nodes), (&PyList_Type), 1, "nodes", 1))) __PYX_ERR(0, 239, __pyx_L1_error)
__pyx_r = __pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_12_simulate_batch(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self), __pyx_v_nodes);
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
/* function exit code */
goto __pyx_L0;
__pyx_L1_error:;
__pyx_r = NULL;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
goto __pyx_L7_cleaned_up;
__pyx_L0:;
for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
Py_XDECREF(values[__pyx_temp]);
}
__pyx_L7_cleaned_up:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_12_simulate_batch(struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *__pyx_v_self, PyObject *__pyx_v_nodes) {
PyObject *__pyx_r = NULL;
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_f_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout__simulate_batch(__pyx_v_self, __pyx_v_nodes, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 239, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("lean_reinforcement.agent.mcts.mcts_cy.guidedrollout_cy.MCTS_GuidedRollout._simulate_batch", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* … */
__pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_18MCTS_GuidedRollout_13_simulate_batch, __Pyx_CYFUNCTION_CCLASS, __pyx_mstate_global->__pyx_n_u_MCTS_GuidedRollout__simulate_bat, NULL, __pyx_mstate_global->__pyx_n_u_lean_reinforcement_agent_mcts_mc, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[5])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 239, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030E0000
PyUnstable_Object_EnableDeferredRefcount(__pyx_t_2);
#endif
if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout, __pyx_mstate_global->__pyx_n_u_simulate_batch, __pyx_t_2) < (0)) __PYX_ERR(0, 239, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+240: return [self._simulate(node) for node in nodes]
__Pyx_XDECREF(__pyx_r); { /* enter inner scope */ __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 240, __pyx_L5_error) __Pyx_GOTREF(__pyx_t_1); if (unlikely(__pyx_v_nodes == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); __PYX_ERR(0, 240, __pyx_L5_error) } __pyx_t_2 = __pyx_v_nodes; __Pyx_INCREF(__pyx_t_2); __pyx_t_6 = 0; for (;;) { { Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 240, __pyx_L5_error) #endif if (__pyx_t_6 >= __pyx_temp) break; } __pyx_t_4 = __Pyx_PyList_GetItemRefFast(__pyx_t_2, __pyx_t_6, __Pyx_ReferenceSharing_OwnStrongReference); ++__pyx_t_6; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 240, __pyx_L5_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_XDECREF_SET(__pyx_8genexpr1__pyx_v_node, __pyx_t_4); __pyx_t_4 = 0; if (!(likely(((__pyx_8genexpr1__pyx_v_node) == Py_None) || likely(__Pyx_TypeTest(__pyx_8genexpr1__pyx_v_node, __pyx_mstate_global->__pyx_ptype_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node))))) __PYX_ERR(0, 240, __pyx_L5_error) __pyx_t_7 = ((struct __pyx_vtabstruct_18lean_reinforcement_5agent_4mcts_7mcts_cy_16guidedrollout_cy_MCTS_GuidedRollout *)__pyx_v_self->__pyx_base.__pyx_vtab)->__pyx_base._simulate(((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_BaseMCTS *)__pyx_v_self), ((struct __pyx_obj_18lean_reinforcement_5agent_4mcts_7mcts_cy_12base_mcts_cy_Node *)__pyx_8genexpr1__pyx_v_node), 0); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 240, __pyx_L5_error) __pyx_t_4 = PyFloat_FromDouble(__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 240, __pyx_L5_error) __Pyx_GOTREF(__pyx_t_4); if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_4))) __PYX_ERR(0, 240, __pyx_L5_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XDECREF(__pyx_8genexpr1__pyx_v_node); __pyx_8genexpr1__pyx_v_node = 0; goto __pyx_L9_exit_scope; __pyx_L5_error:; __Pyx_XDECREF(__pyx_8genexpr1__pyx_v_node); __pyx_8genexpr1__pyx_v_node = 0; goto __pyx_L1_error; __pyx_L9_exit_scope:; } /* exit inner scope */ __pyx_r = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L0;