# TEST
python main.py --base_model visobert --pretrained_model_name uitnlp/visobert --training_mode weakly_supervised --num_epochs 1 --num_unsup_epochs 1 --num_iter 1 --num_rules 4 --train_batch_size 64 --unsup_batch_size 64 --eval_batch_size 16 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 1 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 128 --remove_accents --rm_accent_ratio 1.0

python main.py --base_model bartpho --pretrained_model_name vinai/bartpho-syllable-base --training_mode weakly_supervised --num_epochs 1 --num_unsup_epochs 1 --num_iter 1 --num_rules 4 --train_batch_size 32 --unsup_batch_size 32 --eval_batch_size 16 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 1 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 128 --remove_accents --rm_accent_ratio 1.0

python main.py --base_model bartpho --pretrained_model_name vinai/bartpho-syllable --training_mode weakly_supervised --num_epochs 1 --num_unsup_epochs 1 --num_iter 1 --num_rules 4 --train_batch_size 32 --unsup_batch_size 32 --eval_batch_size 16 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 1 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 128 --remove_accents --rm_accent_ratio 1.0

python main.py --base_model vit5 --pretrained_model_name VietAI/vit5-base --training_mode weakly_supervised --num_epochs 1 --num_unsup_epochs 1 --num_iter 1 --num_rules 4 --train_batch_size 32 --unsup_batch_size 32 --eval_batch_size 16 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 1 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 128 --remove_accents --rm_accent_ratio 1.0

# TRAINING
python main.py --base_model visobert --pretrained_model_name uitnlp/visobert --training_mode weakly_supervised --num_epochs 5 --num_unsup_epochs 5 --num_iter 10 --num_rules 4 --train_batch_size 1024 --unsup_batch_size 1024 --eval_batch_size 32 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 2 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 16192 --remove_accents --rm_accent_ratio 1.0 --nsw_loss_weight 0.5 --n_mask_loss_weight 0.5

python main.py --base_model bartpho --pretrained_model_name vinai/bartpho-syllable-base --training_mode weakly_supervised --num_epochs 10 --num_unsup_epochs 5 --num_iter 20 --num_rules 4 --train_batch_size 128 --unsup_batch_size 128 --eval_batch_size 16 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 3 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 4096 --remove_accents --rm_accent_ratio 1.0 --nsw_loss_weight 0.5 --n_mask_loss_weight 0.5

python main.py --base_model bartpho --pretrained_model_name vinai/bartpho-syllable --training_mode weakly_supervised --num_epochs 10 --num_unsup_epochs 5 --num_iter 20 --num_rules 4 --train_batch_size 128 --unsup_batch_size 128 --eval_batch_size 16 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 3 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 4096 --remove_accents --rm_accent_ratio 1.0 --nsw_loss_weight 0.5 --n_mask_loss_weight 0.5

python main.py --base_model phobert --pretrained_model_name vinai/phobert-base-v2 --training_mode weakly_supervised --num_epochs 8 --num_unsup_epochs 5 --num_iter 40 --num_rules 3 --train_batch_size 32 --unsup_batch_size 32 --eval_batch_size 8 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 3 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 1024 --remove_accents --rm_accent_ratio 1.0 --nsw_loss_weight 0.5 --n_mask_loss_weight 0.5

python main.py --base_model vit5 --pretrained_model_name VietAI/vit5-base --training_mode weakly_supervised --num_epochs 10 --num_unsup_epochs 5 --num_iter 20 --num_rules 4 --train_batch_size 128 --unsup_batch_size 128 --eval_batch_size 16 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 3 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 4096 --remove_accents --rm_accent_ratio 1.0

python main.py --base_model vit5 --pretrained_model_name VietAI/vit5-large --training_mode weakly_supervised --num_epochs 10 --num_unsup_epochs 5 --num_iter 20 --num_rules 4 --train_batch_size 64 --unsup_batch_size 64 --eval_batch_size 16 --learning_rate 2e-5 --optimizer adamw --weight_decay 0.01 --use_scheduler --scheduler_type cosine --warmup_epochs 3 --max_grad_norm 1.0 --append_n_mask --nsw_detector --lower_case --hard_student_rule --soft_labels --sample_size 4096 --remove_accents --rm_accent_ratio 1.0

# Try to export from your trained model first
python export_tokenizer.py --model_path experiments/visobert/weakly_supervised/student_best/pytorch_model.bin --student_name visobert --output_dir experiments/visobert/weakly_supervised/student_best

python upload_to_huggingface.py --student_name visobert --repo_name <user/repo> --model_path experiments/visobert/weakly_supervised/student_best/pytorch_model.bin --token <HF_TOKEN>

# TEST model
python upload_to_huggingface.py --model_path ./experiments/vit5_base_mix100/weakly_supervised/student_best --base_model vit5 --repo_name hadung1802/test --token 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

# ViT5 model
python upload_to_huggingface.py --model_path ./experiments/vit5_base_mix100/weakly_supervised/student_best --base_model vit5 --repo_name hadung1802/vit5-base-normalizer-mix100 --token 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

python upload_to_huggingface.py --model_path ./experiments/vit5_large_mix100/weakly_supervised/student_best --base_model vit5 --repo_name hadung1802/vit5-large-normalizer-mix100 --token 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

# ViSoBERT model
python upload_to_huggingface.py --model_path ./experiments/visobert/weakly_supervised/student_best --base_model visobert --repo_name hadung1802/visobert-normalizer --token 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

python upload_to_huggingface.py --model_path ./experiments/visobert_mix100/weakly_supervised/student_best --base_model visobert --repo_name hadung1802/visobert-normalizer-mix100 --token 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

# BartPho model
python upload_to_huggingface.py --model_path ./experiments/bartpho_large_mix100/weakly_supervised/student_best --base_model bartpho --repo_name hadung1802/bartpho-large-normalizer-mix100 --token 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

# PhoBERT model  
python upload_to_huggingface.py --model_path ./experiments/phobert/weakly_supervised/student_best --base_model phobert --repo_name your-username/phobert-normalizer