LICENSE
README.md
pyproject.toml
setup.py
examples/__init__.py
examples/detect_bad_code.py
examples/detect_bias.py
examples/detect_gender_bias.py
examples/detect_hallucinations.py
examples/detect_hallucinations_is_harmful_vs_blocked.py
examples/detect_harmful_content.py
examples/detect_personal_info.py
examples/detect_scheming.py
examples/evaluate_personal.py
examples/just_inference.py
examples/livecodebench_evaluation_example.py
examples/measure_steering_methods_latency.py
examples/synthetic_steering_optimization.py
examples/test_llama_steering.py
examples/test_multi_property.py
examples/train_classifiers_for_different_models.py
examples/train_different_detectors.py
examples/utils/__init__.py
examples/utils/content_detector.py
examples/utils/load_model.py
wisent_guard/__init__.py
wisent_guard/__main__.py
wisent_guard/cli.py
wisent_guard.egg-info/PKG-INFO
wisent_guard.egg-info/SOURCES.txt
wisent_guard.egg-info/dependency_links.txt
wisent_guard.egg-info/requires.txt
wisent_guard.egg-info/top_level.txt
wisent_guard/cli/__init__.py
wisent_guard/cli/cli_logger.py
wisent_guard/cli/classifiers/__init__.py
wisent_guard/cli/classifiers/classifier_rotator.py
wisent_guard/cli/cli_examples/__init__.py
wisent_guard/cli/cli_examples/ex_classifiers.py
wisent_guard/cli/cli_examples/ex_data_loading.py
wisent_guard/cli/cli_examples/ex_evaluation.py
wisent_guard/cli/cli_examples/ex_model.py
wisent_guard/cli/cli_examples/ex_steering.py
wisent_guard/cli/cli_examples/ex_training.py
wisent_guard/cli/data_loaders/__init__.py
wisent_guard/cli/data_loaders/data_loader_rotator.py
wisent_guard/cli/evaluators/__init__.py
wisent_guard/cli/evaluators/evaluator_rotator.py
wisent_guard/cli/steering_methods/__init__.py
wisent_guard/cli/steering_methods/steering_rotator.py
wisent_guard/core/__init__.py
wisent_guard/core/autonomous_agent.py
wisent_guard/core/bigcode_integration.py
wisent_guard/core/detection_handling.py
wisent_guard/core/download_full_benchmarks.py
wisent_guard/core/hyperparameter_optimizer.py
wisent_guard/core/lm_eval_harness_ground_truth.py
wisent_guard/core/log_likelihoods_evaluator.py
wisent_guard/core/managed_cached_benchmarks.py
wisent_guard/core/mixed_benchmark_sampler.py
wisent_guard/core/model_config_manager.py
wisent_guard/core/model_persistence.py
wisent_guard/core/multi_steering.py
wisent_guard/core/parser.py
wisent_guard/core/representation.py
wisent_guard/core/sample_size_optimizer.py
wisent_guard/core/sample_size_optimizer_v2.py
wisent_guard/core/save_results.py
wisent_guard/core/secure_code_evaluator.py
wisent_guard/core/steering.py
wisent_guard/core/steering_method.py
wisent_guard/core/steering_optimizer.py
wisent_guard/core/task_interface.py
wisent_guard/core/task_selector.py
wisent_guard/core/time_estimator.py
wisent_guard/core/timing_calibration.py
wisent_guard/core/user_model_config.py
wisent_guard/core/activations/__init__.py
wisent_guard/core/activations/core/__init__.py
wisent_guard/core/activations/core/activations_collector.py
wisent_guard/core/activations/core/atoms.py
wisent_guard/core/agent/__init__.py
wisent_guard/core/agent/budget.py
wisent_guard/core/agent/device_benchmarks.py
wisent_guard/core/agent/diagnose.py
wisent_guard/core/agent/steer.py
wisent_guard/core/agent/timeout.py
wisent_guard/core/agent/diagnose/__init__.py
wisent_guard/core/agent/diagnose/agent_classifier_decision.py
wisent_guard/core/agent/diagnose/classifier_marketplace.py
wisent_guard/core/agent/diagnose/create_classifier.py
wisent_guard/core/agent/diagnose/response_diagnostics.py
wisent_guard/core/agent/diagnose/select_classifiers.py
wisent_guard/core/agent/diagnose/synthetic_classifier_option.py
wisent_guard/core/agent/diagnose/test_synthetic_classifier.py
wisent_guard/core/agent/diagnose/tasks/__init__.py
wisent_guard/core/agent/diagnose/tasks/task_manager.py
wisent_guard/core/agent/diagnose/tasks/task_relevance.py
wisent_guard/core/agent/diagnose/tasks/task_selector.py
wisent_guard/core/classifiers/__init__.py
wisent_guard/core/classifiers/core/__init__.py
wisent_guard/core/classifiers/core/atoms.py
wisent_guard/core/classifiers/models/__init__.py
wisent_guard/core/classifiers/models/logistic.py
wisent_guard/core/classifiers/models/mlp.py
wisent_guard/core/contrastive_pairs/__init__.py
wisent_guard/core/contrastive_pairs/core/__init__.py
wisent_guard/core/contrastive_pairs/core/atoms.py
wisent_guard/core/contrastive_pairs/core/buliders.py
wisent_guard/core/contrastive_pairs/core/pair.py
wisent_guard/core/contrastive_pairs/core/response.py
wisent_guard/core/contrastive_pairs/core/serialization.py
wisent_guard/core/contrastive_pairs/core/set.py
wisent_guard/core/contrastive_pairs/diagnostics/__init__.py
wisent_guard/core/contrastive_pairs/diagnostics/activations.py
wisent_guard/core/contrastive_pairs/diagnostics/base.py
wisent_guard/core/contrastive_pairs/diagnostics/control_vectors.py
wisent_guard/core/contrastive_pairs/diagnostics/coverage.py
wisent_guard/core/contrastive_pairs/diagnostics/divergence.py
wisent_guard/core/contrastive_pairs/diagnostics/duplicates.py
wisent_guard/core/contrastive_pairs/lm_eval_pairs/__init__.py
wisent_guard/core/contrastive_pairs/lm_eval_pairs/atoms.py
wisent_guard/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py
wisent_guard/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py
wisent_guard/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py
wisent_guard/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py
wisent_guard/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py
wisent_guard/core/data_loaders/__init__.py
wisent_guard/core/data_loaders/core/__init__.py
wisent_guard/core/data_loaders/core/atoms.py
wisent_guard/core/data_loaders/loaders/__init__.py
wisent_guard/core/data_loaders/loaders/custom.py
wisent_guard/core/data_loaders/loaders/lm_loader.py
wisent_guard/core/docker/__init__.py
wisent_guard/core/docker/optimized_docker_executor.py
wisent_guard/core/evaluate/__init__.py
wisent_guard/core/evaluate/single_prompt_evaluator.py
wisent_guard/core/evaluate/stop_nonsense.py
wisent_guard/core/evaluators/__init__.py
wisent_guard/core/evaluators/oracles/__init__.py
wisent_guard/core/evaluators/oracles/interactive.py
wisent_guard/core/evaluators/oracles/nlp_evaluator.py
wisent_guard/core/evaluators/oracles/user_specified.py
wisent_guard/core/models/__init__.py
wisent_guard/core/models/wisent_model.py
wisent_guard/core/models/core/__init__.py
wisent_guard/core/models/core/atoms.py
wisent_guard/core/optuna/__init__.py
wisent_guard/core/optuna/classifier/__init__.py
wisent_guard/core/optuna/classifier/activation_generator.py
wisent_guard/core/optuna/classifier/classifier_cache.py
wisent_guard/core/optuna/classifier/optuna_classifier_optimizer.py
wisent_guard/core/optuna/steering/__init__.py
wisent_guard/core/optuna/steering/bigcode_evaluator_wrapper.py
wisent_guard/core/optuna/steering/data_utils.py
wisent_guard/core/optuna/steering/metrics.py
wisent_guard/core/optuna/steering/optuna_pipeline.py
wisent_guard/core/optuna/steering/steering_optimization.py
wisent_guard/core/prompts/__init__.py
wisent_guard/core/prompts/core/__init__.py
wisent_guard/core/prompts/core/atom.py
wisent_guard/core/prompts/core/prompt_formater.py
wisent_guard/core/prompts/prompt_stratiegies/__init__.py
wisent_guard/core/prompts/prompt_stratiegies/direct_completion.py
wisent_guard/core/prompts/prompt_stratiegies/instruction_following.py
wisent_guard/core/prompts/prompt_stratiegies/multiple_choice.py
wisent_guard/core/prompts/prompt_stratiegies/role_playing.py
wisent_guard/core/steering_methods/__init__.py
wisent_guard/core/steering_methods/core/__init__.py
wisent_guard/core/steering_methods/core/atoms.py
wisent_guard/core/steering_methods/methods/__init__.py
wisent_guard/core/steering_methods/methods/caa.py
wisent_guard/core/tasks/__init__.py
wisent_guard/core/tasks/aime_task.py
wisent_guard/core/tasks/file_task.py
wisent_guard/core/tasks/hle_task.py
wisent_guard/core/tasks/hmmt_task.py
wisent_guard/core/tasks/livecodebench_task.py
wisent_guard/core/tasks/livemathbench_task.py
wisent_guard/core/tasks/lm_eval_task.py
wisent_guard/core/tasks/math500_task.py
wisent_guard/core/tasks/polymath_task.py
wisent_guard/core/tasks/supergpqa_task.py
wisent_guard/core/tracking/__init__.py
wisent_guard/core/tracking/latency.py
wisent_guard/core/tracking/memory.py
wisent_guard/core/trainers/__init__.py
wisent_guard/core/trainers/steering_trainer.py
wisent_guard/core/trainers/core/__init__.py
wisent_guard/core/trainers/core/atoms.py
wisent_guard/parameters/__init__.py
wisent_guard/parameters/task_config.py
wisent_guard/utils/__init__.py
wisent_guard/utils/evaluate_layer_performance.py
wisent_guard/utils/evaluate_layer_performance_truthfulqa.py
wisent_guard/utils/helpers.py
wisent_guard/utils/logger.py