.pre-commit-config.yaml
.readthedocs.yml
LICENSE
NOTICE
README.md
example.py
pyproject.toml
.github/workflows/release.yml
.github/workflows/test.yml
docs/sphinx/Makefile
docs/sphinx/README.rst
docs/sphinx/autogenerate.sh
docs/sphinx/make.bat
docs/sphinx/source/api_index.rst
docs/sphinx/source/conf.py
docs/sphinx/source/emm.aggregation.rst
docs/sphinx/source/emm.base.rst
docs/sphinx/source/emm.data.rst
docs/sphinx/source/emm.features.rst
docs/sphinx/source/emm.helper.rst
docs/sphinx/source/emm.indexing.rst
docs/sphinx/source/emm.loggers.rst
docs/sphinx/source/emm.pipeline.rst
docs/sphinx/source/emm.preprocessing.rst
docs/sphinx/source/emm.rst
docs/sphinx/source/emm.supervised_model.rst
docs/sphinx/source/emm.threshold.rst
docs/sphinx/source/fitting.rst
docs/sphinx/source/index.rst
docs/sphinx/source/overview.rst
docs/sphinx/source/parameters.rst
docs/sphinx/source/persistence.rst
docs/sphinx/source/pipeline.rst
docs/sphinx/source/spark.rst
emm/__init__.py
emm/parameters.py
emm/resources.py
emm/version.py
emm.egg-info/PKG-INFO
emm.egg-info/SOURCES.txt
emm.egg-info/dependency_links.txt
emm.egg-info/requires.txt
emm.egg-info/top_level.txt
emm/aggregation/__init__.py
emm/aggregation/base_entity_aggregation.py
emm/aggregation/pandas_entity_aggregation.py
emm/aggregation/spark_entity_aggregation.py
emm/base/__init__.py
emm/base/module.py
emm/base/pipeline.py
emm/data/README.md
emm/data/__init__.py
emm/data/create_data.py
emm/data/negative_data_creation.py
emm/data/noiser.py
emm/data/prepare_name_pairs.py
emm/data/unittest_sample_namepairs.csv.gz
emm/features/__init__.py
emm/features/base_feature_extractor.py
emm/features/features_extra.py
emm/features/features_lef.py
emm/features/features_name.py
emm/features/features_rank.py
emm/features/features_vocabulary.py
emm/features/pandas_feature_extractor.py
emm/helper/__init__.py
emm/helper/blocking_functions.py
emm/helper/custom_path.py
emm/helper/io.py
emm/helper/sklearn_pipeline.py
emm/helper/spark_custom_reader_writer.py
emm/helper/spark_ml_pipeline.py
emm/helper/spark_utils.py
emm/helper/util.py
emm/indexing/__init__.py
emm/indexing/base_indexer.py
emm/indexing/pandas_candidate_selection.py
emm/indexing/pandas_cos_sim_matcher.py
emm/indexing/pandas_naive_indexer.py
emm/indexing/pandas_normalized_tfidf.py
emm/indexing/pandas_sni.py
emm/indexing/spark_candidate_selection.py
emm/indexing/spark_character_tokenizer.py
emm/indexing/spark_cos_sim_matcher.py
emm/indexing/spark_indexing_utils.py
emm/indexing/spark_normalized_tfidf.py
emm/indexing/spark_sni.py
emm/indexing/spark_word_tokenizer.py
emm/loggers/__init__.py
emm/loggers/logger.py
emm/loggers/timer.py
emm/pipeline/__init__.py
emm/pipeline/base_entity_matching.py
emm/pipeline/pandas_entity_matching.py
emm/pipeline/spark_entity_matching.py
emm/preprocessing/__init__.py
emm/preprocessing/abbreviation_util.py
emm/preprocessing/base_name_preprocessor.py
emm/preprocessing/functions.py
emm/preprocessing/pandas_functions.py
emm/preprocessing/pandas_preprocessor.py
emm/preprocessing/spark_functions.py
emm/preprocessing/spark_preprocessor.py
emm/supervised_model/__init__.py
emm/supervised_model/base_supervised_model.py
emm/supervised_model/pandas_supervised_model.py
emm/supervised_model/spark_supervised_model.py
emm/threshold/__init__.py
emm/threshold/threshold_decision.py
notebooks/01-entity-matching-pandas-version.ipynb
notebooks/02-entity-matching-spark-version.ipynb
notebooks/03-entity-matching-training-pandas-version.ipynb
notebooks/04-entity-matching-aggregation-pandas-version.ipynb
tests/__init__.py
tests/conftest.py
tests/utils.py
tests/benchmark/__init__.py
tests/benchmark/test_bench.py
tests/integration/__init__.py
tests/integration/test_artificial_integration.py
tests/integration/test_em_add_model.py
tests/integration/test_entity_matching.py
tests/integration/test_indexers.py
tests/integration/test_normalized_tfidf.py
tests/integration/test_pandas_em.py
tests/integration/test_readme_example.py
tests/integration/test_spark_vs_pandas.py
tests/integration/test_supervised.py
tests/integration/test_training_classifier.py
tests/notebooks/__init__.py
tests/notebooks/test_notebooks.py
tests/unit/__init__.py
tests/unit/test_abbreviations.py
tests/unit/test_carry_on_cols.py
tests/unit/test_commonshorthands.py
tests/unit/test_cos_sim_matcher.py
tests/unit/test_custom_path.py
tests/unit/test_data.py
tests/unit/test_entity_aggregation.py
tests/unit/test_feature_extractor.py
tests/unit/test_features_abbreviations.py
tests/unit/test_features_lef.py
tests/unit/test_features_vocabulary.py
tests/unit/test_name_preprocessing.py
tests/unit/test_negative_sample_creation.py
tests/unit/test_serialization.py
tests/unit/test_threshold_decision.py
tests/unit/test_timer.py
tests/unit/test_util.py