.gitattributes
.gitignore
.nojekyll
.pre-commit-config.yaml
LICENSE
README.md
env.yml
index.html
pyproject.toml
requirements.txt
uv.lock
.github/workflows/publish-pypi.yml
.github/workflows/push.yml
assets/lobster.png
docs/CONTRIBUTORS.md
examples/inference.py
examples/intervene.py
notebooks/01-inference.ipynb
notebooks/02-intervention.ipynb
notebooks/03-architecture-analyzer.ipynb
notebooks/04-ume-multimodal-embeddings.ipynb
slurm/README.md
slurm/scripts/train_ume.sh
src/lbster.egg-info/PKG-INFO
src/lbster.egg-info/SOURCES.txt
src/lbster.egg-info/dependency_links.txt
src/lbster.egg-info/entry_points.txt
src/lbster.egg-info/requires.txt
src/lbster.egg-info/top_level.txt
src/lobster/__init__.py
src/lobster/assets/__init__.py
src/lobster/assets/ab_vocab.txt
src/lobster/assets/pmlm_vocab.txt
src/lobster/assets/protein_vocab.txt
src/lobster/assets/3di_tokenizer/__init__.py
src/lobster/assets/3di_tokenizer/special_tokens_map.json
src/lobster/assets/3di_tokenizer/tokenizer_config.json
src/lobster/assets/3di_tokenizer/vocab.txt
src/lobster/assets/amino_acid_tokenizer/__init__.py
src/lobster/assets/amino_acid_tokenizer/special_tokens_map.json
src/lobster/assets/amino_acid_tokenizer/tokenizer.json
src/lobster/assets/amino_acid_tokenizer/tokenizer_config.json
src/lobster/assets/cdna_tokenizer/__init__.py
src/lobster/assets/cdna_tokenizer/special_tokens_map.json
src/lobster/assets/cdna_tokenizer/tokenizer_config.json
src/lobster/assets/cdna_tokenizer/vocab.txt
src/lobster/assets/codon_tables/codon_table.json
src/lobster/assets/codon_tables/vendor_codon_table.json
src/lobster/assets/concepts/biological_process_unique_values.txt
src/lobster/assets/concepts/cellular_component_unique_values.txt
src/lobster/assets/concepts/cluster_name_unique_values.txt
src/lobster/assets/concepts/members_unique_values.txt
src/lobster/assets/concepts/molecular_function_unique_values.txt
src/lobster/assets/concepts/organism_unique_values.txt
src/lobster/assets/concepts/rep_id_unique_values.txt
src/lobster/assets/concepts/taxon_id_unique_values.txt
src/lobster/assets/concepts/taxon_unique_values.txt
src/lobster/assets/concepts/unique_identifier_unique_values.txt
src/lobster/assets/cortex_ab_tokenizer/__init__.py
src/lobster/assets/cortex_ab_tokenizer/special_tokens_map.json
src/lobster/assets/cortex_ab_tokenizer/tokenizer_config.json
src/lobster/assets/cortex_ab_tokenizer/vocab.txt
src/lobster/assets/hyena_tokenizer/__init__.py
src/lobster/assets/hyena_tokenizer/special_tokens_map.json
src/lobster/assets/hyena_tokenizer/tokenizer_config.json
src/lobster/assets/hyena_tokenizer/vocab.txt
src/lobster/assets/latent_generator_tokenizer/special_tokens_map.json
src/lobster/assets/latent_generator_tokenizer/tokenizer.json
src/lobster/assets/latent_generator_tokenizer/tokenizer_config.json
src/lobster/assets/latent_generator_tokenizer/vocab.txt
src/lobster/assets/mgm_tokenizer/__init__.py
src/lobster/assets/mgm_tokenizer/special_tokens_map.json
src/lobster/assets/mgm_tokenizer/tokenizer_config.json
src/lobster/assets/mgm_tokenizer/vocab.txt
src/lobster/assets/nucleotide_tokenizer/special_tokens_map.json
src/lobster/assets/nucleotide_tokenizer/tokenizer.json
src/lobster/assets/nucleotide_tokenizer/tokenizer_config.json
src/lobster/assets/pmlm_tokenizer/__init__.py
src/lobster/assets/pmlm_tokenizer/special_tokens_map.json
src/lobster/assets/pmlm_tokenizer/tokenizer_config.json
src/lobster/assets/pmlm_tokenizer/vocab.txt
src/lobster/assets/pmlm_tokenizer_32/__init__.py
src/lobster/assets/pmlm_tokenizer_32/special_tokens_map.json
src/lobster/assets/pmlm_tokenizer_32/tokenizer_config.json
src/lobster/assets/pmlm_tokenizer_32/vocab.txt
src/lobster/assets/smiles_tokenizer/special_tokens_map.json
src/lobster/assets/smiles_tokenizer/tokenizer.json
src/lobster/assets/smiles_tokenizer/tokenizer_config.json
src/lobster/assets/smiles_tokenizer/vocab.txt
src/lobster/assets/ume_tokenizers/special_tokens.txt
src/lobster/assets/ume_tokenizers/amino_acid_tokenizer/special_tokens_map.json
src/lobster/assets/ume_tokenizers/amino_acid_tokenizer/tokenizer.json
src/lobster/assets/ume_tokenizers/amino_acid_tokenizer/tokenizer_config.json
src/lobster/assets/ume_tokenizers/amino_acid_tokenizer/vocab.txt
src/lobster/assets/ume_tokenizers/latent_generator_tokenizer/special_tokens_map.json
src/lobster/assets/ume_tokenizers/latent_generator_tokenizer/tokenizer.json
src/lobster/assets/ume_tokenizers/latent_generator_tokenizer/tokenizer_config.json
src/lobster/assets/ume_tokenizers/nucleotide_tokenizer/special_tokens_map.json
src/lobster/assets/ume_tokenizers/nucleotide_tokenizer/tokenizer.json
src/lobster/assets/ume_tokenizers/nucleotide_tokenizer/tokenizer_config.json
src/lobster/assets/ume_tokenizers/nucleotide_tokenizer/vocab.txt
src/lobster/assets/ume_tokenizers/smiles_tokenizer/special_tokens_map.json
src/lobster/assets/ume_tokenizers/smiles_tokenizer/tokenizer.json
src/lobster/assets/ume_tokenizers/smiles_tokenizer/tokenizer_config.json
src/lobster/assets/uniref_tokenzier/biological_process_unique_values_100.txt
src/lobster/assets/uniref_tokenzier/cellular_component_unique_values_100.txt
src/lobster/assets/uniref_tokenzier/cluster_name_unique_values_100.txt
src/lobster/assets/uniref_tokenzier/members_unique_values.txt
src/lobster/assets/uniref_tokenzier/molecular_function_unique_values_100.txt
src/lobster/assets/uniref_tokenzier/organism_unique_values_100.txt
src/lobster/assets/uniref_tokenzier/taxon_id_unique_values.txt
src/lobster/assets/uniref_tokenzier/taxon_unique_values_100.txt
src/lobster/callbacks/__init__.py
src/lobster/callbacks/_calm_linear_probe_callback.py
src/lobster/callbacks/_dataloader_checkpoint_callback.py
src/lobster/callbacks/_linear_probe_callback.py
src/lobster/callbacks/_moleculeace_linear_probe_callback.py
src/lobster/callbacks/_peer_evaluation_callback.py
src/lobster/callbacks/_tokens_per_second_callback.py
src/lobster/cmdline/__init__.py
src/lobster/cmdline/_embed.py
src/lobster/cmdline/_eval_embed.py
src/lobster/cmdline/_intervene.py
src/lobster/cmdline/_intervene_multiproperty.py
src/lobster/cmdline/_perplexity.py
src/lobster/cmdline/_predict.py
src/lobster/cmdline/_train.py
src/lobster/cmdline/_utils.py
src/lobster/concepts/__init__.py
src/lobster/concepts/_descriptors.py
src/lobster/concepts/_large_molecule_descriptors.py
src/lobster/concepts/_utils.py
src/lobster/constants/.gitkeep
src/lobster/constants/__init__.py
src/lobster/constants/_architecture_analyzer.py
src/lobster/constants/_calm_tasks.py
src/lobster/constants/_modality.py
src/lobster/constants/_moleculeace_tasks.py
src/lobster/constants/_peer_tasks.py
src/lobster/constants/_split.py
src/lobster/constants/_weighted_concat_sampler_chunk_size.py
src/lobster/data/__init__.py
src/lobster/data/_calm_datamodule.py
src/lobster/data/_chembl_datamodule.py
src/lobster/data/_collate.py
src/lobster/data/_constants.py
src/lobster/data/_dataframe_dataset_in_memory.py
src/lobster/data/_dyab_data.py
src/lobster/data/_farthest_first_traversal.py
src/lobster/data/_fasta_datamodule.py
src/lobster/data/_imports.py
src/lobster/data/_m3_20m_datamodule.py
src/lobster/data/_minhasher.py
src/lobster/data/_mmseqs.py
src/lobster/data/_ppi_sequence_datamodule.py
src/lobster/data/_structure_datamodule.py
src/lobster/data/_ume_datamodule.py
src/lobster/data/_utils.py
src/lobster/datasets/__init__.py
src/lobster/datasets/_amplify_dataset.py
src/lobster/datasets/_atomica_dataset.py
src/lobster/datasets/_calm_dataset.py
src/lobster/datasets/_calm_property_dataset.py
src/lobster/datasets/_distributed_environment_utils.py
src/lobster/datasets/_fasta_dataset.py
src/lobster/datasets/_huggingface_iterable_dataset.py
src/lobster/datasets/_latent_generator_3d_coordinates_dataset.py
src/lobster/datasets/_m3_20m_dataset.py
src/lobster/datasets/_moleculeace_dataset.py
src/lobster/datasets/_multiplexed_sampling_dataset.py
src/lobster/datasets/_open_genome_2.py
src/lobster/datasets/_peer_dataset.py
src/lobster/datasets/_round_robin_concat_iterable_dataset.py
src/lobster/datasets/_shuffled_iterable_dataset.py
src/lobster/datasets/_zinc_dataset.py
src/lobster/extern/openfold_utils/__init__.py
src/lobster/extern/openfold_utils/_data_pipeline.py
src/lobster/extern/openfold_utils/_data_transforms.py
src/lobster/extern/openfold_utils/_fape.py
src/lobster/extern/openfold_utils/_feats.py
src/lobster/extern/openfold_utils/_protein.py
src/lobster/extern/openfold_utils/_residue_constants.py
src/lobster/extern/openfold_utils/_rigids.py
src/lobster/extern/openfold_utils/_tensor_utils.py
src/lobster/extern/openfold_utils/resources/stereo_chemical_props.txt
src/lobster/features/__init__.py
src/lobster/features/_feature.py
src/lobster/hydra_config/__init__.py
src/lobster/hydra_config/embed.yaml
src/lobster/hydra_config/intervene.yaml
src/lobster/hydra_config/intervene_multiproperty.yaml
src/lobster/hydra_config/perplexity.yaml
src/lobster/hydra_config/predict.yaml
src/lobster/hydra_config/train.yaml
src/lobster/hydra_config/callbacks/base.yaml
src/lobster/hydra_config/callbacks/calm_linear_probe.yaml
src/lobster/hydra_config/callbacks/calm_linear_probe_fast.yaml
src/lobster/hydra_config/callbacks/default.yaml
src/lobster/hydra_config/callbacks/default_eval.yaml
src/lobster/hydra_config/callbacks/early_stopping.yaml
src/lobster/hydra_config/callbacks/lr_monitor.yaml
src/lobster/hydra_config/callbacks/model_checkpoint.yaml
src/lobster/hydra_config/callbacks/moleculeace_linear_probe.yaml
src/lobster/hydra_config/callbacks/moleculeace_linear_probe_fast.yaml
src/lobster/hydra_config/callbacks/naturalness.yaml
src/lobster/hydra_config/callbacks/progress_bar.yaml
src/lobster/hydra_config/callbacks/throughput.yaml
src/lobster/hydra_config/callbacks/timer.yaml
src/lobster/hydra_config/callbacks/tokens_per_second.yaml
src/lobster/hydra_config/data/base.yaml
src/lobster/hydra_config/data/calm.yaml
src/lobster/hydra_config/data/cath.yaml
src/lobster/hydra_config/data/chembl.yaml
src/lobster/hydra_config/data/cmap.yaml
src/lobster/hydra_config/data/cyno_pk.yaml
src/lobster/hydra_config/data/dyab.yaml
src/lobster/hydra_config/data/fasta.yaml
src/lobster/hydra_config/data/m320m.yaml
src/lobster/hydra_config/data/ppi.yaml
src/lobster/hydra_config/data/ppi_infer.yaml
src/lobster/hydra_config/data/ume.yaml
src/lobster/hydra_config/data/transform_fn/hyena_tokenizer_transform.yaml
src/lobster/hydra_config/data/transform_fn/mgm_tokenizer_transform.yaml
src/lobster/hydra_config/data/transform_fn/nucleotide_tokenizer_transform.yaml
src/lobster/hydra_config/data/transform_fn/pmlm_tokenizer_transform.yaml
src/lobster/hydra_config/data/transform_fn/pt5_teacher_forcing_transform.yaml
src/lobster/hydra_config/data/transform_fn/smiles_tokenizer_transform.yaml
src/lobster/hydra_config/experiment/train_ume.yaml
src/lobster/hydra_config/logger/csv.yaml
src/lobster/hydra_config/logger/wandb.yaml
src/lobster/hydra_config/lr_scheduler/default.yaml
src/lobster/hydra_config/lr_scheduler/wsd.yaml
src/lobster/hydra_config/model/clip.yaml
src/lobster/hydra_config/model/clm.yaml
src/lobster/hydra_config/model/cmap.yaml
src/lobster/hydra_config/model/dyab.yaml
src/lobster/hydra_config/model/hyena.yaml
src/lobster/hydra_config/model/lobsterfold.yaml
src/lobster/hydra_config/model/mgm.yaml
src/lobster/hydra_config/model/mlm.yaml
src/lobster/hydra_config/model/mlp.yaml
src/lobster/hydra_config/model/modern_bert.yaml
src/lobster/hydra_config/model/ppi.yaml
src/lobster/hydra_config/model/seq2seq.yaml
src/lobster/hydra_config/model/ume.yaml
src/lobster/hydra_config/paths/default.yaml
src/lobster/hydra_config/plugins/base.yaml
src/lobster/hydra_config/plugins/bitsandbytes.yaml
src/lobster/hydra_config/plugins/default.yaml
src/lobster/hydra_config/setup/default.yaml
src/lobster/hydra_config/setup/seed/default.yaml
src/lobster/hydra_config/setup/torch/default.yaml
src/lobster/hydra_config/trainer/default.yaml
src/lobster/metrics/__init__.py
src/lobster/metrics/_binary_classification.py
src/lobster/model/__init__.py
src/lobster/model/_cbmlm.py
src/lobster/model/_clip.py
src/lobster/model/_clm.py
src/lobster/model/_clm_configuration.py
src/lobster/model/_cmap.py
src/lobster/model/_conditioanalclassifiermlm.py
src/lobster/model/_conditioanalmlm.py
src/lobster/model/_dyab.py
src/lobster/model/_linear_probe.py
src/lobster/model/_lobster_fold.py
src/lobster/model/_lobster_fold_base.py
src/lobster/model/_lobster_fold_configuration.py
src/lobster/model/_mgm.py
src/lobster/model/_mlm.py
src/lobster/model/_mlm_configuration.py
src/lobster/model/_mlp.py
src/lobster/model/_peft_lightning_module.py
src/lobster/model/_pooler.py
src/lobster/model/_pooling_layers.py
src/lobster/model/_ppi_clf.py
src/lobster/model/_seq2seq.py
src/lobster/model/_seq2seq_configuration.py
src/lobster/model/_ume.py
src/lobster/model/_utils.py
src/lobster/model/hyena/__init__.py
src/lobster/model/hyena/_hyena.py
src/lobster/model/hyena/_hyena_base.py
src/lobster/model/hyena/_hyena_configuration.py
src/lobster/model/llama_base/__init__.py
src/lobster/model/llama_base/_attn_mask_utils.py
src/lobster/model/llama_base/_llama_encoder.py
src/lobster/model/lm_base/__init__.py
src/lobster/model/lm_base/_lm_base.py
src/lobster/model/lm_base/_lm_base_heads.py
src/lobster/model/lm_base/_utils.py
src/lobster/model/modern_bert/LICENSE
src/lobster/model/modern_bert/__init__.py
src/lobster/model/modern_bert/_activation.py
src/lobster/model/modern_bert/_attention.py
src/lobster/model/modern_bert/_config.py
src/lobster/model/modern_bert/_embedding.py
src/lobster/model/modern_bert/_initialization.py
src/lobster/model/modern_bert/_layers.py
src/lobster/model/modern_bert/_mlp.py
src/lobster/model/modern_bert/_model.py
src/lobster/model/modern_bert/_modern_bert.py
src/lobster/model/modern_bert/_modern_bert_configuration.py
src/lobster/model/modern_bert/_normalization.py
src/lobster/model/modern_bert/_padding.py
src/lobster/model/modern_bert/_rotary.py
src/lobster/model/modern_bert/_utils.py
src/lobster/model/openfold_utils/__init__.py
src/lobster/model/openfold_utils/_data_pipeline.py
src/lobster/model/openfold_utils/_data_transforms.py
src/lobster/model/openfold_utils/_fape.py
src/lobster/model/openfold_utils/_feats.py
src/lobster/model/openfold_utils/_protein.py
src/lobster/model/openfold_utils/_residue_constants.py
src/lobster/model/openfold_utils/_rigids.py
src/lobster/model/openfold_utils/_tensor_utils.py
src/lobster/model/openfold_utils/resources/stereo_chemical_props.txt
src/lobster/model/utils/__init__.py
src/lobster/model/utils/_architecture_analyzer.py
src/lobster/tokenization/__init__.py
src/lobster/tokenization/_amino_acid.py
src/lobster/tokenization/_cached_bert_tokenizer.py
src/lobster/tokenization/_hyena_tokenizer.py
src/lobster/tokenization/_hyena_tokenizer_transform.py
src/lobster/tokenization/_latent_generator_3d_coord_tokenizer.py
src/lobster/tokenization/_load_vocab_file.py
src/lobster/tokenization/_make_pretrained_tokenizer_fast.py
src/lobster/tokenization/_mgm_tokenizer.py
src/lobster/tokenization/_mgm_tokenizer_transform.py
src/lobster/tokenization/_nucleotide_tokenizer.py
src/lobster/tokenization/_pmlm_custom_concept_tokenizer_transform.py
src/lobster/tokenization/_pmlm_tokenizer.py
src/lobster/tokenization/_pmlm_tokenizer_transform.py
src/lobster/tokenization/_smiles_tokenizer.py
src/lobster/tokenization/_ume_tokenizers.py
src/lobster/transforms/__init__.py
src/lobster/transforms/_atom3d_ppi_transforms.py
src/lobster/transforms/_auto_tokenizer_transform.py
src/lobster/transforms/_binarize.py
src/lobster/transforms/_convert_seqs.py
src/lobster/transforms/_lambda.py
src/lobster/transforms/_structure_featurizer.py
src/lobster/transforms/_tokenizer_transform.py
src/lobster/transforms/_transform.py
src/lobster/transforms/_utils.py
src/lobster/transforms/functional/__init__.py
src/lobster/transforms/functional/_biopython.py
src/lobster/transforms/functional/_sample_item.py
src/lobster/transforms/functional/_sample_tokenized_input.py
static/css/bulma-carousel.min.css
static/css/bulma-slider.min.css
static/css/bulma.css.map.txt
static/css/bulma.min.css
static/css/fontawesome.all.min.css
static/css/index.css
static/images/carousel1.png
static/images/carousel2.png
static/images/favicon.ico
static/js/bulma-carousel.js
static/js/bulma-carousel.min.js
static/js/bulma-slider.js
static/js/bulma-slider.min.js
static/js/fontawesome.all.min.js
static/js/index.js
static/pdfs/Cramming_MLSB_Neurips_2023_poster.pdf
static/videos/banner_video.mp4
static/videos/carousel1.mp4
static/videos/carousel2.mp4
static/videos/carousel3.mp4
test_data/2ah5A02.pdb
test_data/fv.pdb
test_data/query.fasta
test_data/test.pdb
test_data/pdbs/1a2oA01
test_data/pdbs/1ae2A00
tests/__init__.py
tests/test_placeholder.py
tests/lobster/__init__.py
tests/lobster/conftest.py
tests/lobster/test__imports.py
tests/lobster/callbacks/test__calm_linear_probe_callback.py
tests/lobster/callbacks/test__dataloader_checkpoint_callback.py
tests/lobster/callbacks/test__peer_evaluation_callback.py
tests/lobster/callbacks/test__tokens_per_second_callback.py
tests/lobster/cmdline/__init__.py
tests/lobster/cmdline/test__cmdline.py
tests/lobster/data/__init__.py
tests/lobster/data/test__calm_datamodule.py
tests/lobster/data/test__chembl_datamodule.py
tests/lobster/data/test__farthest_first_traversal.py
tests/lobster/data/test__fasta_lightning_datamodule.py
tests/lobster/data/test__minhasher.py
tests/lobster/data/test__mmseqs.py
tests/lobster/data/test__ume_datamodule.py
tests/lobster/datasets/__init__.py
tests/lobster/datasets/test__amplify_dataset.py
tests/lobster/datasets/test__atomica_dataset.py
tests/lobster/datasets/test__calm_dataset.py
tests/lobster/datasets/test__concat_iterable_dataset.py
tests/lobster/datasets/test__huggingface_iterable_dataset.py
tests/lobster/datasets/test__latent_generator_3d_coord_dataset.py
tests/lobster/datasets/test__m3_20m_dataset.py
tests/lobster/datasets/test__multiplexed_sampling_dataset.py
tests/lobster/datasets/test__shuffled_iterable_dataset.py
tests/lobster/datasets/test__zinc_dataset.py
tests/lobster/model/__init__.py
tests/lobster/model/test__cbm.py
tests/lobster/model/test__clm.py
tests/lobster/model/test__dyab.py
tests/lobster/model/test__foldseek_transform.py
tests/lobster/model/test__linear_probe.py
tests/lobster/model/test__lobsterfold.py
tests/lobster/model/test__mgm.py
tests/lobster/model/test__mlm.py
tests/lobster/model/test__mlp.py
tests/lobster/model/test__peft_lightning_module.py
tests/lobster/model/test__pooler.py
tests/lobster/model/test__ppi.py
tests/lobster/model/test__ume.py
tests/lobster/model/hyena/__init__.py
tests/lobster/model/hyena/test__hyena.py
tests/lobster/model/modern_bert/test__modern_bert.py
tests/lobster/model/openfold_utils/test__fape.py
tests/lobster/model/utils/test__architecture_analyzer.py
tests/lobster/tokenization/__init__.py
tests/lobster/tokenization/test__hyena_tokenizer.py
tests/lobster/tokenization/test__hyena_tokenizer_transform.py
tests/lobster/tokenization/test__latent_generator_tokenizer.py
tests/lobster/tokenization/test__make_pretrained_tokenizer_fast.py
tests/lobster/tokenization/test__mgm_tokenizer.py
tests/lobster/tokenization/test__mgm_tokenizer_transform.py
tests/lobster/tokenization/test__nucleotide_tokenizer.py
tests/lobster/tokenization/test__pmlm_tokenizer.py
tests/lobster/tokenization/test__smiles_tokenizer.py
tests/lobster/tokenization/test__ume_tokenizers.py
tests/lobster/transforms/__init__.py
tests/lobster/transforms/test__binarize_transform.py
tests/lobster/transforms/test__convert_seqs.py
tests/lobster/transforms/test__structure_featurizer.py
tests/lobster/transforms/test__tokenizer_transform.py
tests/lobster/transforms/test__utils.py