.gitignore
.readthedocs.yaml
LICENSE
MANIFEST.in
README.md
pyproject.toml
.github/workflows/python-publish.yml
assets/Figure2_segmentation.png
assets/Figure2_tokenization.png
assets/Figure3_masking2.jpg
assets/Figure4_promoter_db.png
assets/Figure5_umaps.jpg
assets/Figure6_prom_res.png
assets/Figure7_phag_res.png
docs/conf.py
docs/config_utils.rst
docs/datacollator.rst
docs/general_utils.rst
docs/index.rst
docs/prok_datasets.rst
docs/prokbert_logo.png
docs/prokbert_tokenizer.rst
docs/requirements.in
docs/requirements.txt
docs/sequtils.rst
docs/training_utils.rst
docs/api/prokbert.sequtils.load_contigs.rst
docs/api/prokbert.sequtils.pretty_print_overlapping_sequence.rst
envs/Dockerfile
envs/prokbert.def
examples/Embedding_visualization.ipynb
examples/Finetuning.ipynb
examples/Inference.ipynb
examples/ProkBERT_Training_Helper.ipynb
examples/Segmentation.ipynb
examples/Tokenization.ipynb
examples/finetuning.py
examples/prokbert_pretrain.py
examples/prokbert_seqpreprocess.py
examples/data/ESKAPE_sample.fasta
examples/data/pretraining_sample.h5
examples/data/pretraining/ESKAPE_sample00.fasta.gz
examples/data/pretraining/ESKAPE_sample01.fasta.gz
examples/data/pretraining/ESKAPE_sample02.fasta.gz
examples/data/pretraining/ESKAPE_sample03.fasta.gz
examples/data/pretraining/ESKAPE_sample04.fasta.gz
examples/data/pretraining/ESKAPE_sample05.fasta.gz
examples/data/pretraining/ESKAPE_sample06.fasta.gz
examples/data/pretraining/ESKAPE_sample07.fasta.gz
examples/data/pretraining/ESKAPE_sample08.fasta.gz
examples/data/pretraining/ESKAPE_sample09.fasta.gz
examples/data/pretraining/ESKAPE_sample10.fasta.gz
src/prokbert/ProkBERTDataCollator.py
src/prokbert/__init__.py
src/prokbert/config_utils.py
src/prokbert/general_utils.py
src/prokbert/models.py
src/prokbert/parser_utils.py
src/prokbert/prok_datasets.py
src/prokbert/prokbert_tokenizer.py
src/prokbert/sequtils.py
src/prokbert/tokenizer.py
src/prokbert/training_utils.py
src/prokbert/traininghelper_utils.py
src/prokbert.egg-info/PKG-INFO
src/prokbert.egg-info/SOURCES.txt
src/prokbert.egg-info/dependency_links.txt
src/prokbert.egg-info/requires.txt
src/prokbert.egg-info/top_level.txt
src/prokbert/configs/pretraining.yaml
src/prokbert/configs/sequence_processing.yaml
src/prokbert/data/ESKAPE_sample.fasta
src/prokbert/data/pretraining_sample.h5
src/prokbert/data/preprocessed/pretraining.h5
src/prokbert/data/preprocessed/pretraining_k1s1.h5
src/prokbert/data/preprocessed/pretraining_k6s1.h5
src/prokbert/data/preprocessed/pretraining_k6s2.h5
src/prokbert/data/preprocessed/pretraininge.h5
src/prokbert/data/pretraining/ESKAPE_sample00.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample01.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample02.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample03.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample04.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample05.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample06.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample07.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample08.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample09.fasta.gz
src/prokbert/data/pretraining/ESKAPE_sample10.fasta.gz
src/prokbert/data/prokbert_vocabs/prokbert-base-dna1/vocab.txt
src/prokbert/data/prokbert_vocabs/prokbert-base-dna2/vocab.txt
src/prokbert/data/prokbert_vocabs/prokbert-base-dna3/vocab.txt
src/prokbert/data/prokbert_vocabs/prokbert-base-dna4/vocab.txt
src/prokbert/data/prokbert_vocabs/prokbert-base-dna5/vocab.txt
src/prokbert/data/prokbert_vocabs/prokbert-base-dna6/vocab.txt
src/prokbert/data/prokbert_vocabs/prokbert-base-dna7/vocab.txt
src/prokbert/data/prokbert_vocabs/prokbert-base-dna8/vocab.txt
src/prokbert/data/prokbert_vocabs/prokbert-base-dna9/vocab.txt
src/prokbert/data/sample_data/ESKAPE_sample.fasta
src/prokbert/data/sample_data/pretraining/ESKAPE_sample00.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample01.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample02.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample03.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample04.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample05.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample06.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample07.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample08.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample09.fasta.gz
src/prokbert/data/sample_data/pretraining/ESKAPE_sample10.fasta.gz
src/prokbert/tests/__init__.py
src/prokbert/tests/test_configutils.py
src/prokbert/tests/test_general_utils.py
src/prokbert/tests/test_prokBERTDataCollator.py
src/prokbert/tests/test_prok_datasets.py
src/prokbert/tests/test_prokbert_tokenizer.py
src/prokbert/tests/test_sequtils.py