bash: cannot set terminal process group (-1): Inappropriate ioctl for device
bash: no job control in this shell
2020-09-13 13:19:13,651 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training
2020-09-13 13:19:13,654 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2020-09-13 13:19:13,664 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.
2020-09-13 13:19:14,284 sagemaker_pytorch_container.training INFO     Invoking user training script.
2020-09-13 13:19:14,550 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2020-09-13 13:19:14,563 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2020-09-13 13:19:14,576 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2020-09-13 13:19:14,586 sagemaker-training-toolkit INFO     Invoking user script

Training Env:

{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "bucket": "/opt/ml/input/data/bucket",
        "data": "/opt/ml/input/data/data"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_pytorch_container.training:main",
    "hosts": [
        "algo-1"
    ],
    "hyperparameters": {},
    "input_config_dir": "/opt/ml/input/config",
    "channel_data_config": {
        "bucket": {
            "TrainingInputMode": "File",
            "S3DistributionType": "FullyReplicated",
            "RecordWrapperType": "None"
        },
        "data": {
            "TrainingInputMode": "File",
            "S3DistributionType": "ShardedByS3Key",
            "RecordWrapperType": "None"
        }
    },
    "input_dir": "/opt/ml/input",
    "is_master": true,
    "job_name": "task4-2020-09-13-13-16-15-JsEqM0p4",
    "log_level": 20,
    "master_hostname": "algo-1",
    "model_dir": "/opt/ml/model",
    "module_dir": "s3://sagemaker-us-east-1-667232328135/tests/simple-sagemaker-example-cli_2020-09-13-13-16-10_py37/task4/task4-2020-09-13-13-16-15-JsEqM0p4/source/sourcedir.tar.gz",
    "module_name": "worker4",
    "network_interface_name": "eth0",
    "num_cpus": 2,
    "num_gpus": 0,
    "output_data_dir": "/opt/ml/output/data",
    "output_dir": "/opt/ml/output",
    "output_intermediate_dir": "/opt/ml/output/intermediate",
    "resource_config": {
        "current_host": "algo-1",
        "hosts": [
            "algo-1"
        ],
        "network_interface_name": "eth0"
    },
    "user_entry_point": "worker4.py"
}

Environment variables:

SM_HOSTS=["algo-1"]
SM_NETWORK_INTERFACE_NAME=eth0
SM_HPS={}
SM_USER_ENTRY_POINT=worker4.py
SM_FRAMEWORK_PARAMS={}
SM_RESOURCE_CONFIG={"current_host":"algo-1","hosts":["algo-1"],"network_interface_name":"eth0"}
SM_channel_data_CONFIG={"bucket":{"RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"},"data":{"RecordWrapperType":"None","S3DistributionType":"ShardedByS3Key","TrainingInputMode":"File"}}
SM_OUTPUT_DATA_DIR=/opt/ml/output/data
SM_CHANNELS=["bucket","data"]
SM_CURRENT_HOST=algo-1
SM_MODULE_NAME=worker4
SM_LOG_LEVEL=20
SM_FRAMEWORK_MODULE=sagemaker_pytorch_container.training:main
SM_INPUT_DIR=/opt/ml/input
SM_INPUT_CONFIG_DIR=/opt/ml/input/config
SM_OUTPUT_DIR=/opt/ml/output
SM_NUM_CPUS=2
SM_NUM_GPUS=0
SM_MODEL_DIR=/opt/ml/model
SM_MODULE_DIR=s3://sagemaker-us-east-1-667232328135/tests/simple-sagemaker-example-cli_2020-09-13-13-16-10_py37/task4/task4-2020-09-13-13-16-15-JsEqM0p4/source/sourcedir.tar.gz
SM_TRAINING_ENV={"additional_framework_parameters":{},"channel_input_dirs":{"bucket":"/opt/ml/input/data/bucket","data":"/opt/ml/input/data/data"},"current_host":"algo-1","framework_module":"sagemaker_pytorch_container.training:main","hosts":["algo-1"],"hyperparameters":{},"input_config_dir":"/opt/ml/input/config","channel_data_config":{"bucket":{"RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"},"data":{"RecordWrapperType":"None","S3DistributionType":"ShardedByS3Key","TrainingInputMode":"File"}},"input_dir":"/opt/ml/input","is_master":true,"job_name":"task4-2020-09-13-13-16-15-JsEqM0p4","log_level":20,"master_hostname":"algo-1","model_dir":"/opt/ml/model","module_dir":"s3://sagemaker-us-east-1-667232328135/tests/simple-sagemaker-example-cli_2020-09-13-13-16-10_py37/task4/task4-2020-09-13-13-16-15-JsEqM0p4/source/sourcedir.tar.gz","module_name":"worker4","network_interface_name":"eth0","num_cpus":2,"num_gpus":0,"output_data_dir":"/opt/ml/output/data","output_dir":"/opt/ml/output","output_intermediate_dir":"/opt/ml/output/intermediate","resource_config":{"current_host":"algo-1","hosts":["algo-1"],"network_interface_name":"eth0"},"user_entry_point":"worker4.py"}
SM_USER_ARGS=[]
SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate
SM_CHANNEL_BUCKET=/opt/ml/input/data/bucket
SM_CHANNEL_DATA=/opt/ml/input/data/data
PYTHONPATH=/opt/ml/code:/opt/conda/bin:/opt/conda/lib/python36.zip:/opt/conda/lib/python3.6:/opt/conda/lib/python3.6/lib-dynload:/opt/conda/lib/python3.6/site-packages

Invoking script with the following command:

/opt/conda/bin/python worker4.py


INFO:worker_toolkit.worker_lib:Args: Namespace(batch_size=64, channel_names=['bucket', 'data'], current_host='algo-1', epochs=50, hosts=['algo-1'], hps={}, channel_bucket='/opt/ml/input/data/bucket', input_config_dir='/opt/ml/input/config', channel_data='/opt/ml/input/data/data', channel_data_config='{"bucket":{"RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"},"data":{"RecordWrapperType":"None","S3DistributionType":"ShardedByS3Key","TrainingInputMode":"File"}}', input_dir='/opt/ml/input', channel_model='', job_name='task4-2020-09-13-13-16-15-JsEqM0p4', learning_rate=0.05, model_dir='/opt/ml/model', network_interface='eth0', num_cpus=2, num_gpus=0, output_data_dir='/opt/ml/output/data', output_dir='/opt/ml/output', resource_config='{"current_host":"algo-1","hosts":["algo-1"],"network_interface_name":"eth0"}', state='/state', use_cuda=False)
INFO:worker_toolkit.worker_lib:Unmatched: []
INFO:__main__:*** START listing files in /opt/ml/input/data/data
INFO:__main__:/opt/ml/input/data/data:
total 12
drwxr-xr-x 2 root root 4096 Sep 13 13:18 .
drwxr-xr-x 4 root root 4096 Sep 13 13:18 ..
-rw-r--r-- 1 root root   19 Sep 13 13:18 sample_data.txt

INFO:__main__:*** END file listing /opt/ml/input/data/data
INFO:__main__:*** START listing files in /opt/ml/input/data/bucket
INFO:__main__:/opt/ml/input/data/bucket:
total 7796
drwxr-xr-x 2 root root    4096 Sep 13 13:18 .
drwxr-xr-x 4 root root    4096 Sep 13 13:18 ..
-rw-r--r-- 1 root root 7973806 Sep 13 13:18 persons.json

INFO:__main__:*** END file listing /opt/ml/input/data/bucket
2020-09-13 13:19:14,666 sagemaker-training-toolkit INFO     Reporting training SUCCESS
