gaitsetpy.dataset.utils
This file contains the utility functions to download and extract the datasets. Supported datasets: - Daphnet
Maintainer: @aharshit123456
1''' 2 This file contains the utility functions to download and extract the datasets. 3 Supported datasets: 4 - Daphnet 5 6Maintainer: @aharshit123456 7''' 8 9## imports 10import os 11import requests 12import zipfile 13import tarfile 14import json 15import pandas as pd 16import numpy as np 17from glob import glob 18 19################################################################################# 20############################## DATASET DOWNLOAD ################################# 21################################################################################# 22 23def download_dataset(dataset_name, data_dir): 24 """Download the dataset.""" 25 if dataset_name == "daphnet": 26 download_daphnet_data(data_dir) 27 elif dataset_name == "mobifall": 28 download_mobifall_data(data_dir) 29 elif dataset_name == "arduous": 30 download_arduous_data(data_dir) 31 elif dataset_name == "harup": 32 download_harup_data(data_dir) 33 elif dataset_name == "physionet": 34 # PhysioNet dataset is handled by the PhysioNetLoader itself 35 pass 36 else: 37 raise ValueError(f"Dataset {dataset_name} not supported.") 38 39 40def download_daphnet_data(data_dir): 41 """Download the Daphnet dataset. 42 43 This function downloads the Daphnet Freezing of Gait dataset from the UCI Machine Learning Repository. 44 It shows a progress bar during download and handles various potential errors. 45 If the file already exists in the specified directory, it skips the download. 46 47 Args: 48 data_dir (str): Directory where the dataset will be downloaded 49 50 Returns: 51 str: Path to the downloaded file 52 53 Raises: 54 ConnectionError: If unable to connect to the download URL 55 IOError: If unable to create or write to the download directory/file 56 Exception: For other unexpected errors during download 57 """ 58 import os 59 import requests 60 from tqdm import tqdm 61 62 url = "https://archive.ics.uci.edu/static/public/245/daphnet+freezing+of+gait.zip" 63 file_path = os.path.join(data_dir, "daphnet.zip") 64 65 # Check if file already exists 66 if os.path.exists(file_path) and os.path.getsize(file_path) > 0: 67 print(f"Dataset already exists at: {file_path}") 68 return file_path 69 70 try: 71 # Create directory if it doesn't exist 72 os.makedirs(data_dir, exist_ok=True) 73 print(f"Downloading Daphnet dataset to: {file_path}") 74 75 # Send a HEAD request first to get the file size 76 response = requests.head(url) 77 total_size = int(response.headers.get('content-length', 0)) 78 79 # Start the download with progress bar 80 response = requests.get(url, stream=True) 81 response.raise_for_status() # Raise an exception for bad status codes 82 83 # Initialize progress bar 84 progress_bar = tqdm( 85 total=total_size, 86 unit='iB', 87 unit_scale=True, 88 desc='Download Progress' 89 ) 90 91 # Write the file with progress updates 92 with open(file_path, "wb") as file: 93 for chunk in response.iter_content(chunk_size=8192): 94 if chunk: 95 size = file.write(chunk) 96 progress_bar.update(size) 97 98 progress_bar.close() 99 100 # Verify download completed successfully 101 if os.path.getsize(file_path) > 0: 102 print(f"Download completed successfully! File saved to: {file_path}") 103 return file_path 104 else: 105 raise IOError("Downloaded file is empty") 106 107 except requests.exceptions.RequestException as e: 108 print(f"Error connecting to download URL: {e}") 109 if os.path.exists(file_path): 110 os.remove(file_path) # Clean up partial download 111 raise ConnectionError(f"Failed to download dataset: {e}") 112 113 except IOError as e: 114 print(f"Error writing download file: {e}") 115 if os.path.exists(file_path): 116 os.remove(file_path) # Clean up partial download 117 raise IOError(f"Failed to save dataset: {e}") 118 119 except Exception as e: 120 print(f"Unexpected error during download: {e}") 121 if os.path.exists(file_path): 122 os.remove(file_path) # Clean up partial download 123 raise Exception(f"Download failed: {e}") 124 125def download_mobifall_data(data_dir): 126 """Download the MobiFall dataset.""" 127 pass 128 129def download_arduous_data(data_dir): 130 """Download the Arduous dataset.""" 131 pass 132 133 134################################################################################# 135############################## EXTRACT DOWNLOAD ################################# 136################################################################################# 137 138def extract_dataset(dataset_name, data_dir): 139 """Extract the dataset.""" 140 if dataset_name == "daphnet": 141 extract_daphnet_data(data_dir) 142 elif dataset_name == "mobifall": 143 extract_mobifall_data(data_dir) 144 elif dataset_name == "arduous": 145 extract_arduous_data(data_dir) 146 elif dataset_name == "harup": 147 extract_harup_data(data_dir) 148 elif dataset_name == "physionet": 149 # PhysioNet dataset is handled by the PhysioNetLoader itself 150 pass 151 else: 152 raise ValueError(f"Dataset {dataset_name} not supported.") 153 154 155def extract_daphnet_data(data_dir): 156 """Extract the Daphnet dataset.""" 157 file_path = os.path.join(data_dir, "daphnet.zip") 158 with zipfile.ZipFile(file_path, "r") as zip_ref: 159 zip_ref.extractall(data_dir) 160 161def extract_mobifall_data(data_dir): 162 """Extract the MobiFall dataset.""" 163 pass 164 165def extract_arduous_data(data_dir): 166 """Extract the Arduous dataset.""" 167 pass 168 169 170################################################################################# 171############################ OTHER UTILS DOWNLOAD ############################### 172################################################################################# 173 174 175def sliding_window(data, window_size, step_size): 176 num_windows = (len(data) - window_size) // step_size + 1 177 windows = [] 178 for i in range(num_windows): 179 start = i * step_size 180 end = start + window_size 181 windows.append(data[start:end]) 182 return windows 183 184def download_harup_data(data_dir): 185 """ 186 Download the HAR-UP dataset. 187 188 This function provides instructions for downloading the HAR-UP dataset and offers 189 an option to download it directly from Google Drive as a ZIP file. 190 191 Args: 192 data_dir (str): Directory where the dataset will be downloaded 193 194 Returns: 195 str: Path to the extracted dataset directory or None if not performed 196 """ 197 import os 198 import requests 199 from tqdm import tqdm 200 import webbrowser 201 import zipfile 202 203 # Create directory if it doesn't exist 204 os.makedirs(data_dir, exist_ok=True) 205 206 # Define file paths 207 zip_filename = "HAR-UP_Dataset.zip" 208 zip_path = os.path.join(data_dir, zip_filename) 209 dataset_dir = os.path.join(data_dir, "DataSet") 210 211 # Check if dataset directory already exists 212 if os.path.exists(dataset_dir): 213 print(f"HAR-UP dataset directory already exists at: {dataset_dir}") 214 return dataset_dir 215 216 # Direct download URL from Google Drive (update if needed) 217 url = "https://drive.usercontent.google.com/download?id=1Y2MSUijPcB7--PcGoAKhGeqI8GxKK0Pm&export=download&authuser=0" 218 print("\n" + "="*80) 219 print("HAR-UP DATASET DOWNLOAD") 220 print("="*80) 221 print("The HAR-UP dataset can be downloaded automatically or manually.") 222 print("\nOptions:") 223 print("1. Automatic download (recommended)") 224 print("2. Manual download") 225 print("3. Skip download (if you already have the dataset elsewhere)") 226 227 choice = input("\nEnter your choice (1-3): ") 228 229 if choice == "1": 230 try: 231 print(f"\nDownloading HAR-UP dataset ZIP to: {zip_path}") 232 print("This may take some time depending on your internet connection...") 233 response = requests.get(url, stream=True) 234 response.raise_for_status() # Raise an exception for bad status codes 235 total_size = int(response.headers.get('content-length', 0)) 236 progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc='Download Progress') 237 with open(zip_path, "wb") as file: 238 for chunk in response.iter_content(chunk_size=8192): 239 if chunk: 240 size = file.write(chunk) 241 progress_bar.update(size) 242 progress_bar.close() 243 if os.path.getsize(zip_path) > 0: 244 print(f"Download completed successfully! File saved to: {zip_path}") 245 print("\nExtracting the downloaded ZIP file...") 246 with zipfile.ZipFile(zip_path, "r") as zip_ref: 247 zip_ref.extractall(data_dir) 248 # Check for DataSet folder 249 if not os.path.exists(dataset_dir): 250 # Sometimes the zip may contain a top-level folder, e.g., HAR-UP_Dataset/DataSet/... 251 for entry in os.listdir(data_dir): 252 entry_path = os.path.join(data_dir, entry) 253 if os.path.isdir(entry_path) and os.path.exists(os.path.join(entry_path, "DataSet")): 254 import shutil 255 shutil.move(os.path.join(entry_path, "DataSet"), dataset_dir) 256 break 257 if os.path.exists(dataset_dir): 258 print(f"Extraction complete. DataSet directory at: {dataset_dir}") 259 return dataset_dir 260 else: 261 print("Extraction failed: DataSet directory not found after extraction.") 262 return None 263 else: 264 raise IOError("Downloaded file is empty") 265 except Exception as e: 266 print(f"\nError during download: {e}") 267 print("\nPlease try the manual download option instead.") 268 if os.path.exists(zip_path): 269 os.remove(zip_path) # Clean up partial download 270 return None 271 272 elif choice == "2": 273 print("\nOpening the HAR-UP dataset download page in your browser...") 274 print("Please download the ZIP file and save it to the following location:") 275 print(f" {zip_path}") 276 webbrowser.open("https://sites.google.com/up.edu.mx/har-up/download") 277 print("\nAfter downloading, please ensure the ZIP file is named 'HAR-UP_Dataset.zip' and placed in your data directory.") 278 print("Then, rerun this function or choose option 1 to extract.") 279 return None 280 281 elif choice == "3": 282 print("\nSkipping download. Please ensure the dataset is available at:") 283 print(f" {os.path.join(data_dir, 'DataSet')}") 284 return None 285 286 else: 287 print("\nInvalid choice. Please run again and select a valid option.") 288 return None 289 290 291def extract_harup_data(data_dir): 292 """ 293 Extract the HAR-UP dataset zip file if not already extracted. 294 """ 295 dataset_dir = os.path.join(data_dir, "DataSet") 296 if os.path.exists(dataset_dir): 297 print(f"HAR-UP dataset already extracted at: {dataset_dir}") 298 return 299 zip_path = os.path.join(data_dir, "HAR-UP_Dataset.zip") 300 if not os.path.exists(zip_path): 301 print(f"HAR-UP zip file not found at: {zip_path}") 302 print("Please run download_harup_data first.") 303 return 304 import zipfile 305 print(f"Extracting HAR-UP dataset zip to: {data_dir}") 306 with zipfile.ZipFile(zip_path, "r") as zip_ref: 307 zip_ref.extractall(data_dir) 308 print(f"Extraction complete.")
24def download_dataset(dataset_name, data_dir): 25 """Download the dataset.""" 26 if dataset_name == "daphnet": 27 download_daphnet_data(data_dir) 28 elif dataset_name == "mobifall": 29 download_mobifall_data(data_dir) 30 elif dataset_name == "arduous": 31 download_arduous_data(data_dir) 32 elif dataset_name == "harup": 33 download_harup_data(data_dir) 34 elif dataset_name == "physionet": 35 # PhysioNet dataset is handled by the PhysioNetLoader itself 36 pass 37 else: 38 raise ValueError(f"Dataset {dataset_name} not supported.")
Download the dataset.
41def download_daphnet_data(data_dir): 42 """Download the Daphnet dataset. 43 44 This function downloads the Daphnet Freezing of Gait dataset from the UCI Machine Learning Repository. 45 It shows a progress bar during download and handles various potential errors. 46 If the file already exists in the specified directory, it skips the download. 47 48 Args: 49 data_dir (str): Directory where the dataset will be downloaded 50 51 Returns: 52 str: Path to the downloaded file 53 54 Raises: 55 ConnectionError: If unable to connect to the download URL 56 IOError: If unable to create or write to the download directory/file 57 Exception: For other unexpected errors during download 58 """ 59 import os 60 import requests 61 from tqdm import tqdm 62 63 url = "https://archive.ics.uci.edu/static/public/245/daphnet+freezing+of+gait.zip" 64 file_path = os.path.join(data_dir, "daphnet.zip") 65 66 # Check if file already exists 67 if os.path.exists(file_path) and os.path.getsize(file_path) > 0: 68 print(f"Dataset already exists at: {file_path}") 69 return file_path 70 71 try: 72 # Create directory if it doesn't exist 73 os.makedirs(data_dir, exist_ok=True) 74 print(f"Downloading Daphnet dataset to: {file_path}") 75 76 # Send a HEAD request first to get the file size 77 response = requests.head(url) 78 total_size = int(response.headers.get('content-length', 0)) 79 80 # Start the download with progress bar 81 response = requests.get(url, stream=True) 82 response.raise_for_status() # Raise an exception for bad status codes 83 84 # Initialize progress bar 85 progress_bar = tqdm( 86 total=total_size, 87 unit='iB', 88 unit_scale=True, 89 desc='Download Progress' 90 ) 91 92 # Write the file with progress updates 93 with open(file_path, "wb") as file: 94 for chunk in response.iter_content(chunk_size=8192): 95 if chunk: 96 size = file.write(chunk) 97 progress_bar.update(size) 98 99 progress_bar.close() 100 101 # Verify download completed successfully 102 if os.path.getsize(file_path) > 0: 103 print(f"Download completed successfully! File saved to: {file_path}") 104 return file_path 105 else: 106 raise IOError("Downloaded file is empty") 107 108 except requests.exceptions.RequestException as e: 109 print(f"Error connecting to download URL: {e}") 110 if os.path.exists(file_path): 111 os.remove(file_path) # Clean up partial download 112 raise ConnectionError(f"Failed to download dataset: {e}") 113 114 except IOError as e: 115 print(f"Error writing download file: {e}") 116 if os.path.exists(file_path): 117 os.remove(file_path) # Clean up partial download 118 raise IOError(f"Failed to save dataset: {e}") 119 120 except Exception as e: 121 print(f"Unexpected error during download: {e}") 122 if os.path.exists(file_path): 123 os.remove(file_path) # Clean up partial download 124 raise Exception(f"Download failed: {e}")
Download the Daphnet dataset.
This function downloads the Daphnet Freezing of Gait dataset from the UCI Machine Learning Repository. It shows a progress bar during download and handles various potential errors. If the file already exists in the specified directory, it skips the download.
Args: data_dir (str): Directory where the dataset will be downloaded
Returns: str: Path to the downloaded file
Raises: ConnectionError: If unable to connect to the download URL IOError: If unable to create or write to the download directory/file Exception: For other unexpected errors during download
Download the MobiFall dataset.
Download the Arduous dataset.
139def extract_dataset(dataset_name, data_dir): 140 """Extract the dataset.""" 141 if dataset_name == "daphnet": 142 extract_daphnet_data(data_dir) 143 elif dataset_name == "mobifall": 144 extract_mobifall_data(data_dir) 145 elif dataset_name == "arduous": 146 extract_arduous_data(data_dir) 147 elif dataset_name == "harup": 148 extract_harup_data(data_dir) 149 elif dataset_name == "physionet": 150 # PhysioNet dataset is handled by the PhysioNetLoader itself 151 pass 152 else: 153 raise ValueError(f"Dataset {dataset_name} not supported.")
Extract the dataset.
156def extract_daphnet_data(data_dir): 157 """Extract the Daphnet dataset.""" 158 file_path = os.path.join(data_dir, "daphnet.zip") 159 with zipfile.ZipFile(file_path, "r") as zip_ref: 160 zip_ref.extractall(data_dir)
Extract the Daphnet dataset.
Extract the MobiFall dataset.
Extract the Arduous dataset.
185def download_harup_data(data_dir): 186 """ 187 Download the HAR-UP dataset. 188 189 This function provides instructions for downloading the HAR-UP dataset and offers 190 an option to download it directly from Google Drive as a ZIP file. 191 192 Args: 193 data_dir (str): Directory where the dataset will be downloaded 194 195 Returns: 196 str: Path to the extracted dataset directory or None if not performed 197 """ 198 import os 199 import requests 200 from tqdm import tqdm 201 import webbrowser 202 import zipfile 203 204 # Create directory if it doesn't exist 205 os.makedirs(data_dir, exist_ok=True) 206 207 # Define file paths 208 zip_filename = "HAR-UP_Dataset.zip" 209 zip_path = os.path.join(data_dir, zip_filename) 210 dataset_dir = os.path.join(data_dir, "DataSet") 211 212 # Check if dataset directory already exists 213 if os.path.exists(dataset_dir): 214 print(f"HAR-UP dataset directory already exists at: {dataset_dir}") 215 return dataset_dir 216 217 # Direct download URL from Google Drive (update if needed) 218 url = "https://drive.usercontent.google.com/download?id=1Y2MSUijPcB7--PcGoAKhGeqI8GxKK0Pm&export=download&authuser=0" 219 print("\n" + "="*80) 220 print("HAR-UP DATASET DOWNLOAD") 221 print("="*80) 222 print("The HAR-UP dataset can be downloaded automatically or manually.") 223 print("\nOptions:") 224 print("1. Automatic download (recommended)") 225 print("2. Manual download") 226 print("3. Skip download (if you already have the dataset elsewhere)") 227 228 choice = input("\nEnter your choice (1-3): ") 229 230 if choice == "1": 231 try: 232 print(f"\nDownloading HAR-UP dataset ZIP to: {zip_path}") 233 print("This may take some time depending on your internet connection...") 234 response = requests.get(url, stream=True) 235 response.raise_for_status() # Raise an exception for bad status codes 236 total_size = int(response.headers.get('content-length', 0)) 237 progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc='Download Progress') 238 with open(zip_path, "wb") as file: 239 for chunk in response.iter_content(chunk_size=8192): 240 if chunk: 241 size = file.write(chunk) 242 progress_bar.update(size) 243 progress_bar.close() 244 if os.path.getsize(zip_path) > 0: 245 print(f"Download completed successfully! File saved to: {zip_path}") 246 print("\nExtracting the downloaded ZIP file...") 247 with zipfile.ZipFile(zip_path, "r") as zip_ref: 248 zip_ref.extractall(data_dir) 249 # Check for DataSet folder 250 if not os.path.exists(dataset_dir): 251 # Sometimes the zip may contain a top-level folder, e.g., HAR-UP_Dataset/DataSet/... 252 for entry in os.listdir(data_dir): 253 entry_path = os.path.join(data_dir, entry) 254 if os.path.isdir(entry_path) and os.path.exists(os.path.join(entry_path, "DataSet")): 255 import shutil 256 shutil.move(os.path.join(entry_path, "DataSet"), dataset_dir) 257 break 258 if os.path.exists(dataset_dir): 259 print(f"Extraction complete. DataSet directory at: {dataset_dir}") 260 return dataset_dir 261 else: 262 print("Extraction failed: DataSet directory not found after extraction.") 263 return None 264 else: 265 raise IOError("Downloaded file is empty") 266 except Exception as e: 267 print(f"\nError during download: {e}") 268 print("\nPlease try the manual download option instead.") 269 if os.path.exists(zip_path): 270 os.remove(zip_path) # Clean up partial download 271 return None 272 273 elif choice == "2": 274 print("\nOpening the HAR-UP dataset download page in your browser...") 275 print("Please download the ZIP file and save it to the following location:") 276 print(f" {zip_path}") 277 webbrowser.open("https://sites.google.com/up.edu.mx/har-up/download") 278 print("\nAfter downloading, please ensure the ZIP file is named 'HAR-UP_Dataset.zip' and placed in your data directory.") 279 print("Then, rerun this function or choose option 1 to extract.") 280 return None 281 282 elif choice == "3": 283 print("\nSkipping download. Please ensure the dataset is available at:") 284 print(f" {os.path.join(data_dir, 'DataSet')}") 285 return None 286 287 else: 288 print("\nInvalid choice. Please run again and select a valid option.") 289 return None
Download the HAR-UP dataset.
This function provides instructions for downloading the HAR-UP dataset and offers an option to download it directly from Google Drive as a ZIP file.
Args: data_dir (str): Directory where the dataset will be downloaded
Returns: str: Path to the extracted dataset directory or None if not performed
292def extract_harup_data(data_dir): 293 """ 294 Extract the HAR-UP dataset zip file if not already extracted. 295 """ 296 dataset_dir = os.path.join(data_dir, "DataSet") 297 if os.path.exists(dataset_dir): 298 print(f"HAR-UP dataset already extracted at: {dataset_dir}") 299 return 300 zip_path = os.path.join(data_dir, "HAR-UP_Dataset.zip") 301 if not os.path.exists(zip_path): 302 print(f"HAR-UP zip file not found at: {zip_path}") 303 print("Please run download_harup_data first.") 304 return 305 import zipfile 306 print(f"Extracting HAR-UP dataset zip to: {data_dir}") 307 with zipfile.ZipFile(zip_path, "r") as zip_ref: 308 zip_ref.extractall(data_dir) 309 print(f"Extraction complete.")
Extract the HAR-UP dataset zip file if not already extracted.