LICENSE
README.md
pyproject.toml
requirements.txt
dataflow/__init__.py
dataflow/cli.py
dataflow/logger.py
dataflow/version.py
dataflow/agent/__init__.py
dataflow/cli_funcs/__init__.py
dataflow/cli_funcs/cli_env.py
dataflow/cli_funcs/cli_init.py
dataflow/cli_funcs/copy_funcs.py
dataflow/cli_funcs/paths.py
dataflow/core/LLMServing.py
dataflow/core/Operator.py
dataflow/core/__init__.py
dataflow/llmserving/APILLMServing_aisuite.py
dataflow/llmserving/APILLMServing_request.py
dataflow/llmserving/LocalModelLLMServing.py
dataflow/llmserving/__init__.py
dataflow/operators/__init__.py
dataflow/operators/db/db_operator.py
dataflow/operators/eval/__init__.py
dataflow/operators/eval/GeneralText/__init__.py
dataflow/operators/eval/GeneralText/APIcaller/alpagasus_scorer.py
dataflow/operators/eval/GeneralText/models/debertav3_scorer.py
dataflow/operators/eval/GeneralText/models/deita_complexity_scorer.py
dataflow/operators/eval/GeneralText/models/deita_quality_scorer.py
dataflow/operators/eval/GeneralText/models/fineweb_edu_scorer.py
dataflow/operators/eval/GeneralText/models/instag_scorer.py
dataflow/operators/eval/GeneralText/models/pair_qual_scorer.py
dataflow/operators/eval/GeneralText/models/qurating_scorer.py
dataflow/operators/eval/GeneralText/models/superfiltering_scorer.py
dataflow/operators/eval/GeneralText/models/textbook_scorer.py
dataflow/operators/eval/GeneralText/models/Kenlm/model.py
dataflow/operators/eval/GeneralText/models/Qurating/qurater_annotate.py
dataflow/operators/eval/GeneralText/models/Qurating/modeling/modeling_flash_llama.py
dataflow/operators/eval/GeneralText/models/Superfiltering/data_analysis.py
dataflow/operators/eval/GeneralText/statistics/__init__.py
dataflow/operators/eval/GeneralText/statistics/langkit_scorer.py
dataflow/operators/eval/GeneralText/statistics/lexical_diversity_scorer.py
dataflow/operators/eval/GeneralText/statistics/ngram_scorer.py
dataflow/operators/generate/__init__.py
dataflow/operators/generate/AgenticRAG/AutoPromptGenerator.py
dataflow/operators/generate/AgenticRAG/QAGenerator.py
dataflow/operators/generate/AgenticRAG/QAScorer.py
dataflow/operators/generate/AgenticRAG/__init__.py
dataflow/operators/generate/GeneralText/PretrainGenerator.py
dataflow/operators/generate/GeneralText/SupervisedFinetuneGenerator.py
dataflow/operators/generate/GeneralText/__init__.py
dataflow/operators/generate/KnowledgeCleaning/CorpusTextSplitter.py
dataflow/operators/generate/KnowledgeCleaning/KnowledgeCleaner.py
dataflow/operators/generate/KnowledgeCleaning/KnowledgeExtractor.py
dataflow/operators/generate/KnowledgeCleaning/MultiHopQAGenerator.py
dataflow/operators/generate/KnowledgeCleaning/__init__.py
dataflow/operators/generate/Reasoning/AnswerExtraction_QwenMathEval.py
dataflow/operators/generate/Reasoning/AnswerGenerator.py
dataflow/operators/generate/Reasoning/PretrainFormatConverter.py
dataflow/operators/generate/Reasoning/PseudoAnswerGenerator.py
dataflow/operators/generate/Reasoning/QuestionCategoryClassifier.py
dataflow/operators/generate/Reasoning/QuestionDifficultyClassifier.py
dataflow/operators/generate/Reasoning/QuestionGenerator.py
dataflow/operators/generate/Reasoning/__init__.py
dataflow/operators/generate/Text2SQL/DatabaseSchemaExtractor.py
dataflow/operators/generate/Text2SQL/ExtraKnowledgeGenerator.py
dataflow/operators/generate/Text2SQL/PromptGenerator.py
dataflow/operators/generate/Text2SQL/QuestionRefiner.py
dataflow/operators/generate/Text2SQL/SQLDifficultyClassifier.py
dataflow/operators/generate/Text2SQL/SQLFilter.py
dataflow/operators/generate/Text2SQL/SchemaLinking.py
dataflow/operators/generate/Text2SQL/Text2SQLDifficultyClassifier.py
dataflow/operators/generate/Text2SQL/__init__.py
dataflow/operators/process/__init__.py
dataflow/operators/process/AgenticRAG/ContentChooser.py
dataflow/operators/process/AgenticRAG/__init__.py
dataflow/operators/process/GeneralText/__init__.py
dataflow/operators/process/GeneralText/deduplicators/minhash_deduplicator.py
dataflow/operators/process/GeneralText/filters/__init__.py
dataflow/operators/process/GeneralText/filters/alpagasus_filter.py
dataflow/operators/process/GeneralText/filters/deita_complexity_filter.py
dataflow/operators/process/GeneralText/filters/deita_quality_filter.py
dataflow/operators/process/GeneralText/filters/fineweb_edu_filter.py
dataflow/operators/process/GeneralText/filters/heuristics.py
dataflow/operators/process/GeneralText/filters/instag_filter.py
dataflow/operators/process/GeneralText/filters/language_filter.py
dataflow/operators/process/GeneralText/filters/ngram_filter.py
dataflow/operators/process/GeneralText/filters/pair_qual_filter.py
dataflow/operators/process/GeneralText/filters/qurating_filter.py
dataflow/operators/process/GeneralText/filters/superfiltering_filter.py
dataflow/operators/process/GeneralText/filters/text_book_filter.py
dataflow/operators/process/Reasoning/AnswerFormatterFilter.py
dataflow/operators/process/Reasoning/AnswerGroundTruthFilter.py
dataflow/operators/process/Reasoning/AnswerJudger_MathVerify.py
dataflow/operators/process/Reasoning/AnswerNgramFilter.py
dataflow/operators/process/Reasoning/AnswerPipelineRoot.py
dataflow/operators/process/Reasoning/AnswerTokenLengthFilter.py
dataflow/operators/process/Reasoning/QuestionFilter.py
dataflow/operators/process/Reasoning/__init__.py
dataflow/operators/refine/__init__.py
dataflow/operators/refine/GeneralText/__init__.py
dataflow/operators/refine/GeneralText/html_remove_refiner.py
dataflow/operators/refine/GeneralText/remove_emoji_refiner.py
dataflow/operators/refine/GeneralText/remove_extra_spaces_refiner.py
dataflow/prompts/__init__.py
dataflow/prompts/agenticrag.py
dataflow/prompts/general_text.py
dataflow/prompts/kbcleaning.py
dataflow/prompts/multihopqa.py
dataflow/prompts/reasoning.py
dataflow/prompts/text2sql.py
dataflow/statics/pipelines/api_pipelines/agenticrag_pipeline.py
dataflow/statics/pipelines/api_pipelines/reasoning_pipeline.py
dataflow/statics/pipelines/api_pipelines/test_dockbcleaning.py
dataflow/statics/pipelines/api_pipelines/test_pdfkbcleaning.py
dataflow/statics/pipelines/api_pipelines/test_urlkbcleaning.py
dataflow/statics/pipelines/api_pipelines/text2sql_pipeline.py
dataflow/statics/pipelines/api_pipelines/text_sft_filter.py
dataflow/statics/pipelines/cpu_pipelines/reasoning_pipeline.py
dataflow/statics/pipelines/cpu_pipelines/test_urlkbcleaning.py
dataflow/statics/pipelines/cpu_pipelines/text2sql_pipeline.py
dataflow/statics/pipelines/cpu_pipelines/text_pt_filter.py
dataflow/statics/pipelines/cpu_pipelines/text_sft_filter.py
dataflow/statics/pipelines/gpu_pipelines/agenticrag_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/reasoning_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/test_dockbcleaning.py
dataflow/statics/pipelines/gpu_pipelines/test_pdfkbcleaning.py
dataflow/statics/pipelines/gpu_pipelines/test_urlkbcleaning.py
dataflow/statics/pipelines/gpu_pipelines/text2sql_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/text_pt_filter.py
dataflow/statics/pipelines/gpu_pipelines/text_pt_synthetic.py
dataflow/statics/pipelines/gpu_pipelines/text_sft_filter.py
dataflow/statics/pipelines/gpu_pipelines/text_sft_synthetic.py
dataflow/utils/__init__.py
dataflow/utils/kbcleaning.py
dataflow/utils/registry.py
dataflow/utils/storage.py
dataflow/utils/utils.py
dataflow/utils/reasoning/AnswerExtraction.py
dataflow/utils/reasoning/CategoryFuzz.py
open_dataflow_adp.egg-info/PKG-INFO
open_dataflow_adp.egg-info/SOURCES.txt
open_dataflow_adp.egg-info/dependency_links.txt
open_dataflow_adp.egg-info/entry_points.txt
open_dataflow_adp.egg-info/requires.txt
open_dataflow_adp.egg-info/top_level.txt
test/test_agentic_rag.py
test/test_dockbcleaning.py
test/test_general_text.py
test/test_pdfkbcleaning.py
test/test_pipelines.py
test/test_pt_filter.py
test/test_pt_synthetic.py
test/test_reasoning.py
test/test_reasoning_pretrain.py
test/test_sft_filter.py
test/test_sft_synthetic.py
test/test_text2sql.py
test/test_urlkbcleaning.py