LICENSE
MANIFEST.in
README.md
pyproject.toml
requirements.txt
dataflow/__init__.py
dataflow/cli.py
dataflow/logger.py
dataflow/version.py
dataflow/webui.py
dataflow/agent/__init__.py
dataflow/agent/agentrole/__init__.py
dataflow/agent/agentrole/analyst.py
dataflow/agent/agentrole/debugger.py
dataflow/agent/agentrole/executioner.py
dataflow/agent/agentrole/planner.py
dataflow/agent/promptstemplates/__init__.py
dataflow/agent/promptstemplates/prompt_template.py
dataflow/agent/promptstemplates/resources/__init__.py
dataflow/agent/promptstemplates/resources/code_debug_template.json
dataflow/agent/promptstemplates/resources/json_form_template.json
dataflow/agent/promptstemplates/resources/operator_template.json
dataflow/agent/promptstemplates/resources/template.json
dataflow/agent/servicemanager/__init__.py
dataflow/agent/servicemanager/analysis_service.py
dataflow/agent/servicemanager/memory_service.py
dataflow/agent/servicemanager/storage_service.py
dataflow/agent/taskcenter/__init__.py
dataflow/agent/taskcenter/task_definitions.py
dataflow/agent/taskcenter/task_dispatcher.py
dataflow/agent/taskcenter/task_reg.py
dataflow/agent/taskcenter/resources/TaskInfo.yaml
dataflow/agent/toolkits/__init__.py
dataflow/agent/toolkits/minio_tookits.py
dataflow/agent/toolkits/operator_processor.py
dataflow/agent/toolkits/pipeline_processor.py
dataflow/agent/toolkits/post_processor.py
dataflow/agent/toolkits/tool_factory.py
dataflow/agent/toolkits/tools.py
dataflow/agent/toolkits/resources/Operator_patched.json
dataflow/agent/webui/__init__.py
dataflow/agent/webui/run_agent_fronted_gradio.py
dataflow/agent/webui/run_dataflow_agent_with_ui.py
dataflow/cli_funcs/__init__.py
dataflow/cli_funcs/cli_env.py
dataflow/cli_funcs/cli_init.py
dataflow/cli_funcs/copy_funcs.py
dataflow/cli_funcs/paths.py
dataflow/core/LLMServing.py
dataflow/core/Operator.py
dataflow/core/Wrapper.py
dataflow/core/__init__.py
dataflow/example/AgenticRAGPipeline/eval_test_data.jsonl
dataflow/example/AgenticRAGPipeline/pipeline_small_chunk.json
dataflow/example/DataflowAgent/agent_test_data.json
dataflow/example/DataflowAgent/mq_test_data.jsonl
dataflow/example/FuncCallPipeline/chat_data.jsonl
dataflow/example/GeneralTextPipeline/abbreviation.jsonl
dataflow/example/GeneralTextPipeline/filtering.json
dataflow/example/GeneralTextPipeline/gen_input.jsonl
dataflow/example/GeneralTextPipeline/math_100.jsonl
dataflow/example/GeneralTextPipeline/pandas.json
dataflow/example/GeneralTextPipeline/pt_input.jsonl
dataflow/example/GeneralTextPipeline/sft_input.jsonl
dataflow/example/GeneralTextPipeline/translation.jsonl
dataflow/example/KBCleaningPipeline/bitter_lesson.pdf
dataflow/example/KBCleaningPipeline/kbc_placeholder.json
dataflow/example/KBCleaningPipeline/kbc_playground.jsonl
dataflow/example/KBCleaningPipeline/kbc_test.jsonl
dataflow/example/KBCleaningPipeline/questionextract_test.pdf
dataflow/example/KBCleaningPipeline/test.doc
dataflow/example/KBCleaningPipeline/test.pdf
dataflow/example/ReasoningPipeline/pipeline_general.json
dataflow/example/ReasoningPipeline/pipeline_general_short.json
dataflow/example/ReasoningPipeline/pipeline_math.json
dataflow/example/ReasoningPipeline/pipeline_math_short.json
dataflow/example/Text2SQLPipeline/pipeline_refine.jsonl
dataflow/example/VQA/pic_path.json
dataflow/example/VQA/pdfimages/page_0.jpg
dataflow/example/VQA/pdfimages/page_1.jpg
dataflow/example/VQA/pdfimages/page_2.jpg
dataflow/operators/__init__.py
dataflow/operators/pandas_operator.py
dataflow/operators/conversations/__init__.py
dataflow/operators/conversations/consistent_chat.py
dataflow/operators/conversations/func_call_operators.py
dataflow/operators/db/__init__.py
dataflow/operators/db/db_operator.py
dataflow/operators/eval/__init__.py
dataflow/operators/eval/AgenticRAG/f1_scorer.py
dataflow/operators/eval/GeneralText/__init__.py
dataflow/operators/eval/GeneralText/APIcaller/alpagasus_scorer.py
dataflow/operators/eval/GeneralText/APIcaller/meta_scorer.py
dataflow/operators/eval/GeneralText/APIcaller/perspective_scorer.py
dataflow/operators/eval/GeneralText/APIcaller/treeinstruct_scorer.py
dataflow/operators/eval/GeneralText/diversity/task2vec_scorer.py
dataflow/operators/eval/GeneralText/diversity/vendi_scorer.py
dataflow/operators/eval/GeneralText/diversity/task2vec/task2vec.py
dataflow/operators/eval/GeneralText/diversity/task2vec/task_similarity.py
dataflow/operators/eval/GeneralText/diversity/task2vec/utils.py
dataflow/operators/eval/GeneralText/gen/bert_scorer.py
dataflow/operators/eval/GeneralText/gen/bleu_scorer.py
dataflow/operators/eval/GeneralText/gen/cider_scorer.py
dataflow/operators/eval/GeneralText/gen/bleu/__init__.py
dataflow/operators/eval/GeneralText/gen/bleu/bleu.py
dataflow/operators/eval/GeneralText/gen/cider/__init__.py
dataflow/operators/eval/GeneralText/gen/cider/cider.py
dataflow/operators/eval/GeneralText/models/debertav3_scorer.py
dataflow/operators/eval/GeneralText/models/deita_complexity_scorer.py
dataflow/operators/eval/GeneralText/models/deita_quality_scorer.py
dataflow/operators/eval/GeneralText/models/fineweb_edu_scorer.py
dataflow/operators/eval/GeneralText/models/instag_scorer.py
dataflow/operators/eval/GeneralText/models/pair_qual_scorer.py
dataflow/operators/eval/GeneralText/models/perplexity_scorer.py
dataflow/operators/eval/GeneralText/models/presidio_scorer.py
dataflow/operators/eval/GeneralText/models/qurating_scorer.py
dataflow/operators/eval/GeneralText/models/rm_scorer.py
dataflow/operators/eval/GeneralText/models/superfiltering_scorer.py
dataflow/operators/eval/GeneralText/models/textbook_scorer.py
dataflow/operators/eval/GeneralText/models/Kenlm/model.py
dataflow/operators/eval/GeneralText/models/Qurating/qurater_annotate.py
dataflow/operators/eval/GeneralText/models/Qurating/modeling/modeling_flash_llama.py
dataflow/operators/eval/GeneralText/models/Superfiltering/data_analysis.py
dataflow/operators/eval/GeneralText/statistics/__init__.py
dataflow/operators/eval/GeneralText/statistics/langkit_scorer.py
dataflow/operators/eval/GeneralText/statistics/lexical_diversity_scorer.py
dataflow/operators/eval/GeneralText/statistics/ngram_scorer.py
dataflow/operators/eval/Reasoning/category_info.py
dataflow/operators/eval/Reasoning/difficulty_info.py
dataflow/operators/eval/Reasoning/token_info.py
dataflow/operators/eval/Text2SQL/sql_component_classifier.py
dataflow/operators/eval/Text2SQL/sql_execution_classifier.py
dataflow/operators/filter/__init__.py
dataflow/operators/filter/AgenticRAG/__init__.py
dataflow/operators/filter/AgenticRAG/content_chooser.py
dataflow/operators/filter/GeneralText/__init__.py
dataflow/operators/filter/GeneralText/alpagasus_filter.py
dataflow/operators/filter/GeneralText/ccnet_deduplicator.py
dataflow/operators/filter/GeneralText/debertav3_filter.py
dataflow/operators/filter/GeneralText/deita_complexity_filter.py
dataflow/operators/filter/GeneralText/deita_quality_filter.py
dataflow/operators/filter/GeneralText/fineweb_edu_filter.py
dataflow/operators/filter/GeneralText/general_filter.py
dataflow/operators/filter/GeneralText/hash_deduplicator.py
dataflow/operators/filter/GeneralText/heuristics.py
dataflow/operators/filter/GeneralText/instag_filter.py
dataflow/operators/filter/GeneralText/langkit_filter.py
dataflow/operators/filter/GeneralText/language_filter.py
dataflow/operators/filter/GeneralText/lexical_diversity_filter.py
dataflow/operators/filter/GeneralText/llm_language_filter.py
dataflow/operators/filter/GeneralText/minhash_deduplicator.py
dataflow/operators/filter/GeneralText/ngram_filter.py
dataflow/operators/filter/GeneralText/ngramhash_deduplicator.py
dataflow/operators/filter/GeneralText/pair_qual_filter.py
dataflow/operators/filter/GeneralText/perplexity_filter.py
dataflow/operators/filter/GeneralText/perspective_filter.py
dataflow/operators/filter/GeneralText/presidio_filter.py
dataflow/operators/filter/GeneralText/qurating_filter.py
dataflow/operators/filter/GeneralText/reward_model_filter.py
dataflow/operators/filter/GeneralText/sem_deduplicator.py
dataflow/operators/filter/GeneralText/simhash_deduplicator.py
dataflow/operators/filter/GeneralText/superfiltering_filter.py
dataflow/operators/filter/GeneralText/text_book_filter.py
dataflow/operators/filter/GeneralText/treeinstruct_filter.py
dataflow/operators/filter/GeneralText/blocklist/en.txt
dataflow/operators/filter/GeneralText/blocklist/zh.txt
dataflow/operators/filter/Reasoning/__init__.py
dataflow/operators/filter/Reasoning/answer_formatter_filter.py
dataflow/operators/filter/Reasoning/answer_groundtruth_filter.py
dataflow/operators/filter/Reasoning/answer_judger_mathverify.py
dataflow/operators/filter/Reasoning/answer_model_judge.py
dataflow/operators/filter/Reasoning/answer_ngram_filter.py
dataflow/operators/filter/Reasoning/answer_pipeline_root.py
dataflow/operators/filter/Reasoning/answer_token_length_filter.py
dataflow/operators/filter/Reasoning/question_filter.py
dataflow/operators/filter/Text2SQL/sql_consistency_filter.py
dataflow/operators/filter/Text2SQL/sql_execution_filter.py
dataflow/operators/generate/__init__.py
dataflow/operators/generate/AgenticRAG/__init__.py
dataflow/operators/generate/AgenticRAG/atomic_task_generator.py
dataflow/operators/generate/AgenticRAG/auto_prompt_generator.py
dataflow/operators/generate/AgenticRAG/depth_qa_generator.py
dataflow/operators/generate/AgenticRAG/qa_generator.py
dataflow/operators/generate/AgenticRAG/qa_scorer.py
dataflow/operators/generate/AgenticRAG/width_qa_generator.py
dataflow/operators/generate/GeneralText/__init__.py
dataflow/operators/generate/GeneralText/condor_generator.py
dataflow/operators/generate/GeneralText/pretrain_generator.py
dataflow/operators/generate/GeneralText/prompted_generator.py
dataflow/operators/generate/GeneralText/sft_generator_from_seed.py
dataflow/operators/generate/KnowledgeCleaning/__init__.py
dataflow/operators/generate/KnowledgeCleaning/corpus_text_splitter.py
dataflow/operators/generate/KnowledgeCleaning/corpus_text_splitter_batch.py
dataflow/operators/generate/KnowledgeCleaning/file_or_url_to_markdown_converter.py
dataflow/operators/generate/KnowledgeCleaning/file_or_url_to_markdown_converter_batch.py
dataflow/operators/generate/KnowledgeCleaning/knowledge_cleaner.py
dataflow/operators/generate/KnowledgeCleaning/knowledge_cleaner_batch.py
dataflow/operators/generate/KnowledgeCleaning/mathbook_question_extract.py
dataflow/operators/generate/KnowledgeCleaning/multihop_qa_generator.py
dataflow/operators/generate/KnowledgeCleaning/multihop_qa_generator_batch.py
dataflow/operators/generate/RARE/__init__.py
dataflow/operators/generate/RARE/bm25_hard_negative.py
dataflow/operators/generate/RARE/doc_to_query.py
dataflow/operators/generate/RARE/reason_distill.py
dataflow/operators/generate/Reasoning/__init__.py
dataflow/operators/generate/Reasoning/answer_extraction_qwenmatheval.py
dataflow/operators/generate/Reasoning/answer_generator.py
dataflow/operators/generate/Reasoning/pretrain_format_converter.py
dataflow/operators/generate/Reasoning/pseudo_answer_generator.py
dataflow/operators/generate/Reasoning/question_category_classifier.py
dataflow/operators/generate/Reasoning/question_difficulty_classifier.py
dataflow/operators/generate/Reasoning/question_generator.py
dataflow/operators/generate/Text2SQL/sql_generator.py
dataflow/operators/generate/Text2SQL/sql_variation_generator.py
dataflow/operators/generate/Text2SQL/text2sql_cot_generator.py
dataflow/operators/generate/Text2SQL/text2sql_prompt_generator.py
dataflow/operators/generate/Text2SQL/text2sql_question_generator.py
dataflow/operators/generate/VQA/PromptedVQAGenerator.py
dataflow/operators/refine/__init__.py
dataflow/operators/refine/GeneralText/__init__.py
dataflow/operators/refine/GeneralText/condor_refiner.py
dataflow/operators/refine/GeneralText/html_entity_refiner.py
dataflow/operators/refine/GeneralText/html_url_remover_refiner.py
dataflow/operators/refine/GeneralText/lowercase_refiner.py
dataflow/operators/refine/GeneralText/ner_refiner.py
dataflow/operators/refine/GeneralText/pii_anonymize_refiner.py
dataflow/operators/refine/GeneralText/ref_removal_refiner.py
dataflow/operators/refine/GeneralText/remove_contractions_refiner.py
dataflow/operators/refine/GeneralText/remove_emoji_refiner.py
dataflow/operators/refine/GeneralText/remove_emoticons_refiner.py
dataflow/operators/refine/GeneralText/remove_extra_spaces_refiner.py
dataflow/operators/refine/GeneralText/remove_image_ref_refiner.py
dataflow/operators/refine/GeneralText/remove_number_refiner.py
dataflow/operators/refine/GeneralText/remove_punctuation_refiner.py
dataflow/operators/refine/GeneralText/remove_repetitions_punctuation_refiner.py
dataflow/operators/refine/GeneralText/remove_stopwords_refiner.py
dataflow/operators/refine/GeneralText/spelling_correction_refiner.py
dataflow/operators/refine/GeneralText/stemming_lemmatization_refiner.py
dataflow/operators/refine/GeneralText/text_normalization_refiner.py
dataflow/prompts/__init__.py
dataflow/prompts/agenticrag.py
dataflow/prompts/func_call.py
dataflow/prompts/general_text.py
dataflow/prompts/kbcleaning.py
dataflow/prompts/multihopqa.py
dataflow/prompts/reasoning.py
dataflow/prompts/text2sql.py
dataflow/prompts/reasoning/__init__.py
dataflow/prompts/reasoning/diy.py
dataflow/prompts/reasoning/general.py
dataflow/prompts/reasoning/math.py
dataflow/serving/APILLMServing_request.py
dataflow/serving/APIVLMServing_openai.py
dataflow/serving/GoogleAPIServing.py
dataflow/serving/LiteLLMServing.py
dataflow/serving/LocalModelLLMServing.py
dataflow/serving/__init__.py
dataflow/statics/dataflow_agent/dataflow_agent/run_agent_fronted_gradio.py
dataflow/statics/dataflow_agent/dataflow_agent/run_dataflow_agen_with_console.py
dataflow/statics/dataflow_agent/dataflow_agent/run_dataflow_agent_with_ui.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/medica_pipeline.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_a_distractor_gen.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_code_formula.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_keywords.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_length_density.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_llm_translate.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_ner.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_q_context_expand.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_q_cot.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_q_para.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_question_gen.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_sentiment.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_spam_detect.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_summarization.py
dataflow/statics/dataflow_agent/dataflow_agent/agent_generated_files/operator_text_generate.py
dataflow/statics/pipelines/__init__.py
dataflow/statics/pipelines/api_pipelines/agentic_rag_eval_pipeline.py
dataflow/statics/pipelines/api_pipelines/agenticrag_pipeline.py
dataflow/statics/pipelines/api_pipelines/func_call_synthesis.py
dataflow/statics/pipelines/api_pipelines/kbcleaning_pipeline_pdf.py
dataflow/statics/pipelines/api_pipelines/kbcleaning_pipeline_url.py
dataflow/statics/pipelines/api_pipelines/rare_pipeline.py
dataflow/statics/pipelines/api_pipelines/reasoning_diy_pipeline.py
dataflow/statics/pipelines/api_pipelines/reasoning_general_pipeline.py
dataflow/statics/pipelines/api_pipelines/reasoning_math_pipeline.py
dataflow/statics/pipelines/api_pipelines/reasoning_pretrain_pipeline.py
dataflow/statics/pipelines/api_pipelines/text2sql_pipeline_gen.py
dataflow/statics/pipelines/api_pipelines/text2sql_pipeline_refine.py
dataflow/statics/pipelines/api_pipelines/text_conversation_synthesis_pipeline.py
dataflow/statics/pipelines/api_pipelines/text_sft_synthesis_pipeline.py
dataflow/statics/pipelines/cpu_pipelines/kbcleaning_pipeline_url.py
dataflow/statics/pipelines/cpu_pipelines/reasoning_pipeline.py
dataflow/statics/pipelines/cpu_pipelines/text2sql_pipeline.py
dataflow/statics/pipelines/cpu_pipelines/text_pt_filter.py
dataflow/statics/pipelines/cpu_pipelines/text_sft_filter.py
dataflow/statics/pipelines/gpu_pipelines/agenticrag_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/rare_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/reasoning_math_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/text2sql_pipeline_gen.py
dataflow/statics/pipelines/gpu_pipelines/text2sql_pipeline_refine.py
dataflow/statics/pipelines/gpu_pipelines/text_pt_filter.py
dataflow/statics/pipelines/gpu_pipelines/text_pt_synthetic.py
dataflow/statics/pipelines/gpu_pipelines/text_sft_filter.py
dataflow/statics/pipelines/gpu_pipelines/text_sft_synthetic.py
dataflow/statics/pipelines/gpu_pipelines/kbcleaning/kbcleaning_pipeline_batch_sglang.py
dataflow/statics/pipelines/gpu_pipelines/kbcleaning/kbcleaning_pipeline_batch_vllm.py
dataflow/statics/pipelines/gpu_pipelines/kbcleaning/kbcleaning_pipeline_pdf_sglang.py
dataflow/statics/pipelines/gpu_pipelines/kbcleaning/kbcleaning_pipeline_pdf_vllm.py
dataflow/statics/pipelines/gpu_pipelines/kbcleaning/kbcleaning_pipeline_url_sglang.py
dataflow/statics/pipelines/gpu_pipelines/kbcleaning/kbcleaning_pipeline_url_vllm.py
dataflow/statics/playground/playground/abbreviation_qa_api.py
dataflow/statics/playground/playground/abbreviation_qa_local.py
dataflow/statics/playground/playground/generate_qa_api.py
dataflow/statics/playground/playground/generate_qa_local.py
dataflow/statics/playground/playground/kbcleaning_pipeline_batch_sglang.py
dataflow/statics/playground/playground/mathbook_extract.py
dataflow/statics/playground/playground/quick_evaluate.py
dataflow/statics/playground/playground/reasoning_general_pipeline.py
dataflow/statics/playground/playground/text_conversation_synthesis_pipeline.py
dataflow/statics/playground/playground/text_sft_synthesis_from_scratch.py
dataflow/statics/playground/playground/text_sft_synthesis_from_seed.py
dataflow/statics/playground/playground/translation_api.py
dataflow/statics/playground/playground/translation_local.py
dataflow/statics/playground/playground/vqa.py
dataflow/utils/__init__.py
dataflow/utils/registry.py
dataflow/utils/storage.py
dataflow/utils/utils.py
dataflow/utils/reasoning/AnswerExtraction.py
dataflow/utils/reasoning/CategoryFuzz.py
dataflow/utils/text2sql/database_manager.py
dataflow/wrapper/__init__.py
dataflow/wrapper/batch_wrapper.py
open_dataflow.egg-info/PKG-INFO
open_dataflow.egg-info/SOURCES.txt
open_dataflow.egg-info/dependency_links.txt
open_dataflow.egg-info/entry_points.txt
open_dataflow.egg-info/requires.txt
open_dataflow.egg-info/top_level.txt
test/test_API_conversations.py
test/test_agent_fronted_gradio.py
test/test_agentic_rag.py
test/test_agentic_rag_eval.py
test/test_batchify.py
test/test_dataflow_agent.py
test/test_dataflow_agent_with_ui.py
test/test_dockbcleaning.py
test/test_general_text.py
test/test_general_text_refine.py
test/test_llm_lang_filter_and_meta_scorer.py
test/test_pdfkbcleaning.py
test/test_pipelines.py
test/test_pt_filter.py
test/test_pt_synthetic.py
test/test_question_extract.py
test/test_reasoning_diy.py
test/test_reasoning_general.py
test/test_reasoning_math.py
test/test_reasoning_math_sglang.py
test/test_reasoning_pretrain.py
test/test_sft_filter.py
test/test_sft_synthetic.py
test/test_taskcraft.py
test/test_text2sql.py
test/test_urlkbcleaning.py