Source code for locpix.scripts.preprocessing.annotate

#!/usr/bin/env python
"""Annotate module

Take in items, convert to histograms, annotate,
visualise histo mask, save the exported annotation .parquet

N.B. Preprocess just converts to datastructure
It is annotate that converts to image etc.
i.e. preprocess doesn't assume histogram
Therefore, for scripts which use image info such as x pixel they need to
take in the annotate parquets
"""

import yaml
import os
from locpix.preprocessing import datastruc

# from locpix.visualise import vis_img
import argparse
import json
import time
import numpy as np

# import numpy as np


[docs] def main(): parser = argparse.ArgumentParser(description="Annotate the data.") parser.add_argument( "-i", "--project_directory", action="store", type=str, help="the location of the project directory", required=True, ) parser.add_argument( "-c", "--config", action="store", type=str, help="the location of the .yaml configuaration file\ for preprocessing", required=True, ) parser.add_argument( "-m", "--project_metadata", action="store_true", help="check the metadata for the specified project and" "seek confirmation!", ) parser.add_argument( "-r", "--relabel", action="store_true", default=False, help="If true will relabel and assume labels are present (default = False)", ) args = parser.parse_args() project_folder = args.project_directory # load config with open(args.config, "r") as ymlfile: config = yaml.safe_load(ymlfile) metadata_path = os.path.join(project_folder, "metadata.json") with open( metadata_path, ) as file: metadata = json.load(file) # check metadata if args.project_metadata: print("".join([f"{key} : {value} \n" for key, value in metadata.items()])) check = input("Are you happy with this? (YES)") if check != "YES": exit() # add time ran this script to metadata file = os.path.basename(__file__) if file not in metadata: metadata[file] = time.asctime(time.gmtime(time.time())) else: print("Overwriting metadata...") metadata[file] = time.asctime(time.gmtime(time.time())) with open(metadata_path, "w") as outfile: json.dump(metadata, outfile) # list items if args.relabel: input_folder = os.path.join(project_folder, "annotate/annotated") else: input_folder = os.path.join(project_folder, "preprocess/no_gt_label") print(input_folder) try: files = os.listdir(input_folder) except FileNotFoundError: raise ValueError("There should be some preprocessed files to open") # if output directory not present create it output_folder = os.path.join(project_folder, "annotate/annotated") if not os.path.exists(output_folder): os.makedirs(output_folder) # if output directory not present create it markers_folder = os.path.join(project_folder, "markers") if not os.path.exists(markers_folder): os.makedirs(markers_folder) # if output directory for seg imgs not present create it # output_seg_folder = os.path.join(project_folder, "annotate/seg_imgs") # if not os.path.exists(output_seg_folder): # os.makedirs(output_seg_folder) if config["dim"] == 2: histo_size = (config["x_bins"], config["y_bins"]) elif config["dim"] == 3: histo_size = (config["x_bins"], config["y_bins"], config["z_bins"]) else: raise ValueError("Dim should be 2 or 3") for file in files: item = datastruc.item(None, None, None, None, None) item.load_from_parquet(os.path.join(input_folder, file)) # check if file already present and annotated # note assumptions # 1. assumes name convention of save_to_parquet is # os.path.join(save_folder, self.name + '.parquet') parquet_save_loc = os.path.join(output_folder, item.name + ".parquet") # seg_save_loc = os.path.join(output_seg_folder, item.name + ".png") if os.path.exists(parquet_save_loc) and not args.relabel: print(f"Skipping file as already present: {parquet_save_loc}") continue # if os.path.exists(seg_save_loc) and not args.force: # print(f"Skipping file as already present: {seg_save_loc}") # continue # coord2histo item.coord_2_histo(histo_size) # markers loc markers_loc = os.path.join(markers_folder, item.name + ".npy") # manual segment markers = item.manual_segment(relabel=args.relabel, markers_loc=markers_loc) # save markers np.save(markers_loc, markers) # save df to parquet with mapping metadata item.save_to_parquet( output_folder, drop_zero_label=config["drop_zero_label"], gt_label_map=config["gt_label_map"], overwrite=args.relabel, ) # convert to histo # histo, channel_map, label_map = item.render_histo([config["channel"]]) # img = np.transpose(histo, (0, 2, 1)) # save images # if config["save_img"] is True: # # only visualise one channel # vis_img.visualise_seg( # img, # item.histo_mask.T, # item.bin_sizes, # axes=[0], # label_map=label_map, # threshold=config["save_threshold"], # how=config["save_interpolate"], # alphas=config["alphas"], # blend_overlays=False, # alpha_seg=config["alpha_seg"], # cmap_img=None, # cmap_seg=config["cmap_seg"], # figsize=config["fig_size"], # origin="upper", # save=True, # save_loc=seg_save_loc, # four_colour=config["four_colour"], # background_one_colour=config["background_one_colour"], # ) # save yaml file yaml_save_loc = os.path.join(project_folder, "annotate.yaml") with open(yaml_save_loc, "w") as outfile: yaml.dump(config, outfile)
if __name__ == "__main__": main()