LICENSE
MANIFEST.in
README.md
pyproject.toml
src/examples/basic_example.py
src/examples/ddp_example.py
src/examples/fsdp_gpu_example.py
src/examples/hf-trainer-minimal.py
src/examples/input-stall.py
src/examples/advanced/bert_ddp.py
src/examples/advanced/bert_gradient_accum.py
src/examples/advanced/bert_single_gpu.py
src/examples/advanced/cnn_mnist.py
src/examples/advanced/hf_trainer_integration.py
src/examples/advanced/hf_trainer_vision.py
src/examples/advanced/llama_finetuning.py
src/examples/advanced/test_lightning_traceml.py
src/examples/advanced/vit_ddp.py
src/trainlens/__init__.py
src/trainlens/cli.py
src/trainlens/decorators.py
src/trainlens/sdk.py
src/trainlens/server_launcher.py
src/trainlens/aggregator/__init__.py
src/trainlens/aggregator/action_engine.py
src/trainlens/aggregator/aggregator_main.py
src/trainlens/aggregator/auth.py
src/trainlens/aggregator/correlation_engine.py
src/trainlens/aggregator/fault_interpreter.py
src/trainlens/aggregator/fault_model.py
src/trainlens/aggregator/fault_signatures.py
src/trainlens/aggregator/final_summary.py
src/trainlens/aggregator/k8s_rest_api.py
src/trainlens/aggregator/project_registry.py
src/trainlens/aggregator/serve_api.py
src/trainlens/aggregator/sqlite_writer.py
src/trainlens/aggregator/thresholds.py
src/trainlens/aggregator/trace_aggregator.py
src/trainlens/aggregator/training_context.py
src/trainlens/aggregator/display_drivers/__init__.py
src/trainlens/aggregator/display_drivers/base.py
src/trainlens/aggregator/display_drivers/cli.py
src/trainlens/aggregator/display_drivers/layout.py
src/trainlens/aggregator/display_drivers/page_layout.py
src/trainlens/aggregator/ml/__init__.py
src/trainlens/aggregator/ml/attribution.py
src/trainlens/aggregator/ml/auto_labeler.py
src/trainlens/aggregator/ml/auto_terminator.py
src/trainlens/aggregator/ml/batch_logger.py
src/trainlens/aggregator/ml/batch_logging_pipeline.py
src/trainlens/aggregator/ml/bootstrap.py
src/trainlens/aggregator/ml/calibration_pipeline.py
src/trainlens/aggregator/ml/causal_attribution.py
src/trainlens/aggregator/ml/checkpoint_writer.py
src/trainlens/aggregator/ml/cold_start.py
src/trainlens/aggregator/ml/cpd_shadow.py
src/trainlens/aggregator/ml/cross_run_attributor.py
src/trainlens/aggregator/ml/cross_run_store.py
src/trainlens/aggregator/ml/deploy.py
src/trainlens/aggregator/ml/drift_detector.py
src/trainlens/aggregator/ml/emitter_bus.py
src/trainlens/aggregator/ml/ensemble_predictor.py
src/trainlens/aggregator/ml/feature_extractor.py
src/trainlens/aggregator/ml/gate.py
src/trainlens/aggregator/ml/model_registry.py
src/trainlens/aggregator/ml/model_store.py
src/trainlens/aggregator/ml/model_trainer.py
src/trainlens/aggregator/ml/odal_queue.py
src/trainlens/aggregator/ml/pipeline_manager.py
src/trainlens/aggregator/ml/prediction_pipeline.py
src/trainlens/aggregator/ml/prediction_store.py
src/trainlens/aggregator/ml/rocket_predictor.py
src/trainlens/aggregator/ml/rocket_transform.py
src/trainlens/aggregator/ml/run_store.py
src/trainlens/aggregator/ml/self_calibration.py
src/trainlens/aggregator/ml/step_buffer.py
src/trainlens/aggregator/ml/straggler_classifier.py
src/trainlens/aggregator/ml/survival.py
src/trainlens/aggregator/ml/synthetic_data.py
src/trainlens/aggregator/ml/tcn_predictor.py
src/trainlens/aggregator/ml/tcn_trainer.py
src/trainlens/aggregator/ml/uncertain_path.py
src/trainlens/aggregator/ml/xgboost_predictor.py
src/trainlens/aggregator/ml/emitters/__init__.py
src/trainlens/aggregator/ml/emitters/alert_emitter.py
src/trainlens/aggregator/ml/emitters/mlflow_emitter.py
src/trainlens/aggregator/ml/emitters/otel_emitter.py
src/trainlens/aggregator/ml/emitters/prometheus_emitter.py
src/trainlens/aggregator/ml/emitters/wandb_emitter.py
src/trainlens/aggregator/ml/emitters/webhook_emitter.py
src/trainlens/aggregator/sqlite_writers/__init__.py
src/trainlens/aggregator/sqlite_writers/batch_record.py
src/trainlens/aggregator/sqlite_writers/hang_event.py
src/trainlens/aggregator/sqlite_writers/run_record.py
src/trainlens/aggregator/sqlite_writers/sdc.py
src/trainlens/aggregator/sqlite_writers/sdc_guard.py
src/trainlens/aggregator/sqlite_writers/step_signal.py
src/trainlens/aggregator/sqlite_writers/step_time.py
src/trainlens/aggregator/sqlite_writers/straggler.py
src/trainlens/aggregator/sqlite_writers/system.py
src/trainlens/aggregator/summaries/__init__.py
src/trainlens/aggregator/summaries/cross_run_attribution.py
src/trainlens/aggregator/summaries/dataloader_savings.py
src/trainlens/aggregator/summaries/step_time.py
src/trainlens/aggregator/summaries/system.py
src/trainlens/database/__init__.py
src/trainlens/database/database.py
src/trainlens/database/database_sender.py
src/trainlens/database/database_writer.py
src/trainlens/database/remote_database_store.py
src/trainlens/dataloader_autotuner/__init__.py
src/trainlens/dataloader_autotuner/candidates.py
src/trainlens/dataloader_autotuner/cloud_emitter.py
src/trainlens/dataloader_autotuner/config_store.py
src/trainlens/dataloader_autotuner/sh_search.py
src/trainlens/dataloader_autotuner/tune.py
src/trainlens/finetune/__init__.py
src/trainlens/finetune/alerts.py
src/trainlens/finetune/capture.py
src/trainlens/finetune/cohort_confidence.py
src/trainlens/finetune/rule_detectors.py
src/trainlens/finetune/runtime.py
src/trainlens/finetune/trajectory_store.py
src/trainlens/integrations/__init__.py
src/trainlens/integrations/huggingface.py
src/trainlens/integrations/lightning.py
src/trainlens/k8s/__init__.py
src/trainlens/k8s/node_agent.py
src/trainlens/k8s/operator.py
src/trainlens/k8s/webhook.py
src/trainlens/loggers/__init__.py
src/trainlens/loggers/error_log.py
src/trainlens/renderers/__init__.py
src/trainlens/renderers/base_renderer.py
src/trainlens/renderers/scale_utils.py
src/trainlens/renderers/stdout_stderr_renderer.py
src/trainlens/renderers/user_time_renderer.py
src/trainlens/renderers/utils.py
src/trainlens/renderers/comm_overlap/__init__.py
src/trainlens/renderers/comm_overlap/renderer.py
src/trainlens/renderers/diagnostic/__init__.py
src/trainlens/renderers/diagnostic/renderer.py
src/trainlens/renderers/fsdp/__init__.py
src/trainlens/renderers/fsdp/renderer.py
src/trainlens/renderers/grad_norm/__init__.py
src/trainlens/renderers/grad_norm/renderer.py
src/trainlens/renderers/layer_combined_memory/__init__.py
src/trainlens/renderers/layer_combined_memory/compute.py
src/trainlens/renderers/layer_combined_memory/renderer.py
src/trainlens/renderers/layer_combined_memory/schema.py
src/trainlens/renderers/layer_combined_time/__init__.py
src/trainlens/renderers/layer_combined_time/compute.py
src/trainlens/renderers/layer_combined_time/renderer.py
src/trainlens/renderers/layer_combined_time/schema.py
src/trainlens/renderers/mfu/__init__.py
src/trainlens/renderers/mfu/renderer.py
src/trainlens/renderers/nan_inf/__init__.py
src/trainlens/renderers/nan_inf/renderer.py
src/trainlens/renderers/pp_bubble/__init__.py
src/trainlens/renderers/pp_bubble/renderer.py
src/trainlens/renderers/prediction/__init__.py
src/trainlens/renderers/prediction/renderer.py
src/trainlens/renderers/process/__init__.py
src/trainlens/renderers/process/compute.py
src/trainlens/renderers/process/renderer.py
src/trainlens/renderers/step_memory/compute.py
src/trainlens/renderers/step_memory/renderer.py
src/trainlens/renderers/step_memory/schema.py
src/trainlens/renderers/step_time/__init__.py
src/trainlens/renderers/step_time/compute.py
src/trainlens/renderers/step_time/diagnostics.py
src/trainlens/renderers/step_time/renderer.py
src/trainlens/renderers/step_time/schema.py
src/trainlens/renderers/system/__init__.py
src/trainlens/renderers/system/compute.py
src/trainlens/renderers/system/renderer.py
src/trainlens/renderers/tp/__init__.py
src/trainlens/renderers/tp/renderer.py
src/trainlens/runtime/__init__.py
src/trainlens/runtime/auto_terminate.py
src/trainlens/runtime/config.py
src/trainlens/runtime/config_bucket.py
src/trainlens/runtime/executor.py
src/trainlens/runtime/runtime.py
src/trainlens/runtime/session.py
src/trainlens/runtime/settings.py
src/trainlens/runtime/stdout_stderr_capture.py
src/trainlens/samplers/__init__.py
src/trainlens/samplers/base_sampler.py
src/trainlens/samplers/batch_element_sampler.py
src/trainlens/samplers/comm_overlap_sampler.py
src/trainlens/samplers/ddp_grad_consistency_sampler.py
src/trainlens/samplers/expert_load_sampler.py
src/trainlens/samplers/fsdp_hook_sampler.py
src/trainlens/samplers/grad_norm_sampler.py
src/trainlens/samplers/hardware_sampler.py
src/trainlens/samplers/inv_violation_sampler.py
src/trainlens/samplers/layer_backward_memory_sampler.py
src/trainlens/samplers/layer_backward_time_sampler.py
src/trainlens/samplers/layer_forward_memory_sampler.py
src/trainlens/samplers/layer_forward_time_sampler.py
src/trainlens/samplers/layer_memory_sampler.py
src/trainlens/samplers/mfu_sampler.py
src/trainlens/samplers/model_forward_memory_sampler.py
src/trainlens/samplers/nan_inf_sampler.py
src/trainlens/samplers/network_sampler.py
src/trainlens/samplers/optimizer_state_sampler.py
src/trainlens/samplers/pp_bubble_sampler.py
src/trainlens/samplers/process_sampler.py
src/trainlens/samplers/router_entropy_sampler.py
src/trainlens/samplers/sdc_guard_sampler.py
src/trainlens/samplers/stdout_stderr_sampler.py
src/trainlens/samplers/step_memory_sampler.py
src/trainlens/samplers/step_time_sampler.py
src/trainlens/samplers/system_sampler.py
src/trainlens/samplers/tp_comm_sampler.py
src/trainlens/samplers/tp_hook_sampler.py
src/trainlens/samplers/schema/layer_forward_backward_memory.py
src/trainlens/samplers/schema/layer_forward_backward_time.py
src/trainlens/samplers/schema/layer_memory.py
src/trainlens/samplers/schema/process.py
src/trainlens/samplers/schema/step_memory.py
src/trainlens/samplers/schema/step_time_schema.py
src/trainlens/samplers/schema/system.py
src/trainlens/static/favicon.svg
src/trainlens/static/icons.svg
src/trainlens/static/index.html
src/trainlens/static/assets/index-Bgejll0k.css
src/trainlens/static/assets/index-XBbkBOJt.js
src/trainlens/transport/__init__.py
src/trainlens/transport/distributed.py
src/trainlens/transport/tcp_transport.py
src/trainlens/utils/__init__.py
src/trainlens/utils/base_trace_event.py
src/trainlens/utils/batch_element_queue.py
src/trainlens/utils/comm_overlap.py
src/trainlens/utils/cuda_event_pool.py
src/trainlens/utils/ddp_grad_consistency.py
src/trainlens/utils/entry_hook.py
src/trainlens/utils/flight_recorder_reader.py
src/trainlens/utils/flush_buffers.py
src/trainlens/utils/formatting.py
src/trainlens/utils/fsdp_events.py
src/trainlens/utils/grad_norm.py
src/trainlens/utils/hang_attributor.py
src/trainlens/utils/hang_watchdog.py
src/trainlens/utils/layer_parameter_memory.py
src/trainlens/utils/mfu.py
src/trainlens/utils/nan_inf.py
src/trainlens/utils/nccl_log_parser.py
src/trainlens/utils/optimizer_state.py
src/trainlens/utils/pp_bubble.py
src/trainlens/utils/rank_stall_detector.py
src/trainlens/utils/sdc_canary.py
src/trainlens/utils/sdc_guard.py
src/trainlens/utils/shared_utils.py
src/trainlens/utils/step_loss.py
src/trainlens/utils/step_memory.py
src/trainlens/utils/straggler_tracker.py
src/trainlens/utils/termination_poller.py
src/trainlens/utils/timing.py
src/trainlens/utils/tp_events.py
src/trainlens/utils/traincheck_reader.py
src/trainlens/utils/training_loop_invariants.py
src/trainlens/utils/hooks/__init__.py
src/trainlens/utils/hooks/layer_backward_memory_hook.py
src/trainlens/utils/hooks/layer_backward_time_hooks.py
src/trainlens/utils/hooks/layer_forward_memory_hook.py
src/trainlens/utils/hooks/layer_forward_time_hooks.py
src/trainlens/utils/hooks/model_forward_memory_hook.py
src/trainlens/utils/hooks/optimizer_hook.py
src/trainlens/utils/patches/__init__.py
src/trainlens/utils/patches/backward_auto_timer_patch.py
src/trainlens/utils/patches/dataloader_patch.py
src/trainlens/utils/patches/forward_auto_timer_patch.py
src/trainlens_ai.egg-info/PKG-INFO
src/trainlens_ai.egg-info/SOURCES.txt
src/trainlens_ai.egg-info/dependency_links.txt
src/trainlens_ai.egg-info/entry_points.txt
src/trainlens_ai.egg-info/requires.txt
src/trainlens_ai.egg-info/top_level.txt
tests/test_build_extensions.py
tests/test_cli_args.py
tests/test_cli_attribution.py
tests/test_cli_history.py
tests/test_cli_inspect.py
tests/test_database_writer_rank.py
tests/test_grad_accum.py
tests/test_hf_trainer.py
tests/test_msgpack_roundtrip.py
tests/test_multinode_ddp.py
tests/test_run_store.py
tests/test_seq_counter.py
tests/test_server_launcher.py