Merge pull request #20 from dsp-uga/develop

adds version 1.0
dsp-uga · Mar 28, 2018 · f619da0 · f619da0
2 parents 1fafa67 + 4da850e
commit f619da0
Show file tree

Hide file tree

Showing 14 changed files with 822 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,18 @@
+# pycharm
+
+.idea/
+
+# ignore data files
+
+data/*.txt
+data/train/
+data/test/
+
+# ignore output files
+
+output/
+export/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/data/README.MD b/data/README.MD
@@ -0,0 +1,17 @@
+# Downloader  
+
+file 'downloader.sh' is a bash script to download files from the google storage. URLs follow the instructions. 
+
+if other sources are being considerd, the URLs should be changed
+
+to run the downloader use : 
+
+    $bash downloader.sh
+
+## direcotry structure  
+
+after the script is done with the downloads the structure will contain the following folders : 
+
+* train -> data : this will have the tairning samples. Ground truth for each sample will be in the directory and will be named `mask.png`
+* test -> data : this folder contains test set for which there is no ground truth. 
+
diff --git a/data/downloader.sh b/data/downloader.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+
+# this script downloads files in the current directory and creates the structure as :
+# --train
+# --|--data ( this contains folders with names as hashes )
+# --test
+# --|--data ( this contains folders with names as hashes )
+#
+# masks are downloaded as "mask.png" in each training folder
+
+
+# download the guide files if test and train are
+if [ ! -f "test.txt"   ]
+then
+    wget "https://storage.googleapis.com/uga-dsp/project4/test.txt"
+fi
+if [ ! -f "train.txt"   ]
+then
+    wget "https://storage.googleapis.com/uga-dsp/project4/train.txt"
+fi
+
+# create folders
+if [ ! -d "train" ]
+then
+    mkdir "train"
+fi
+
+if [ ! -d "test" ]
+then
+    mkdir "test"
+fi
+
+echo "downloading train"
+
+cd train
+
+while read -r line || [[ -n "$line" ]]; do
+
+    # check if the sample folder already is there skip the dopwnload
+    if [ ! -d "data/$line" ]
+    then
+        wget -nv "https://storage.googleapis.com/uga-dsp/project4/data/$line.tar"
+        tar -xf  "$line.tar"
+        rm "$line.tar"
+        cd "data/$line"
+        wget -v  -O "mask.png" "https://storage.googleapis.com/uga-dsp/project4/masks/$line.png"
+        cd ../..
+        echo "$line"
+    fi
+
+done < "../train.txt"
+
+
+echo "downloading training"
+cd ../test
+
+while read -r line || [[ -n "$line" ]]; do
+    if [ ! -d "data/$line" ]
+    then
+        wget -nv "https://storage.googleapis.com/uga-dsp/project4/data/$line.tar"
+        tar -xf  "$line.tar"
+        rm "$line.tar"
+        echo "$line"
+    fi
+done < "../test.txt"
+
+
+echo "over and out"
diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/main.py b/src/main.py
@@ -0,0 +1,125 @@
+"""
+this file contains the main runner for the project
+"""
+
+import argparse
+import sys
+import os
+import logging
+from src.preprocessing import preprocessor
+from src.preprocessing import EveryOther
+from src.segmentation.segmenter import Segmenter
+from src.segmentation.UnetSegmenter import UNET_Segmenter
+# from src.postprocessing.postprocessing import postProcess
+# from src.Classifiers.Classifier import Classifier
+# from src.Classifiers.FCN import FCN_Classifier
+# from src.Classifiers.UNet_Classifier import UNET_Classifier
+from src.postprocessing.Postprocessor import  postProcess
+description = ' '
+
+parser = argparse.ArgumentParser(description=description, add_help='How to use', prog='python main.py <options>')
+
+parser.add_argument("-d", "--dataset", default="../data/tarin/",
+                    help='Path to the training data [DEFAULT: "data/tarin/"]')
+
+parser.add_argument("-ts", "--testset", default=None,
+                    help='Path to the testing data [DEFAULT: None]')
+
+parser.add_argument("-m", "--model", default="unet",
+                    help='model to be used in the segmentation can be UNET/FCN/NMF [DEFAULT: "FCN"]')
+
+parser.add_argument("-t", "--train", action="store_true",
+                    help='To ensure a model is being trained')
+
+parser.add_argument("-p", "--predict", action="store_true",
+                    help='To ensure a segmentation is performed on the test set (This requires --testset to have value)')
+
+parser.add_argument("-e", "--epoch", default="1024",
+                    help='Sets number of epochs for which the network will be trained')
+
+parser.add_argument("-b", "--batch", default="4",
+                    help='sets the batch size for training the models')
+
+parser.add_argument("-pp", "--preprocessor", default="sum",
+                    help='Chooses the Preprcessor to be applied ')
+
+parser.add_argument("-ep", "--exportpath", default=None,
+
+                    help='Chooses the path to export model and numpy files')
+
+parser.add_argument("-o", "--output", default=None,
+
+                    help='sets the path for the output files to be stored')
+
+
+parser.add_argument("-lf", "--logfile", default="log.log",
+                    help="Path to the log file, this file will contain the log records")
+
+
+# compile arguments
+args = parser.parse_args()
+
+# setup logging
+logging.basicConfig(filename=args.logfile, level=logging.INFO, filemode="w",
+                    format=" %(asctime)s - %(module)s.%(funcName)s - %(levelname)s : %(message)s ")
+
+the_preprocessor = None
+the_Segmenter = None
+
+# set the preprocessor
+if (args.preprocessor == "everyother"):
+    the_preprocessor = EveryOther.EveryOther(images_size=[640, 640],trainingPath=args.dataset, testPath=args.testset,
+                                         exportPath=args.exportpath, importPath=args.exportpath )
+else:
+    the_preprocessor = preprocessor.preprocessor()
+
+# set the set classifier :
+
+if args.model == "unet":
+    the_Segmenter = UNET_Segmenter()
+else:
+    the_Segmenter = Segmenter()
+
+# -------------- Loading the data
+
+# try to load pre calculated data :
+# try:
+#     x_train, y_train, x_test, test_size_ref = the_preprocessor.load_from_files()
+#
+# except FileNotFoundError:
+    # if there is no file to load set them as null, they will be loaded autiomatically
+x_train, y_train, x_test, test_size_ref = None, None, None, None
+
+# check if there is no data, read them from input ( this will take time! )
+if  ( x_train is None):
+    logging.info("Loading data from original data")
+    x_train, y_train, x_test, test_size_ref = the_preprocessor.preprocess()
+    logging.info("Done loading data from original data")
+else:
+    logging.info("data loaded from pre-calculated files")
+# --------------- Loading the data
+
+
+# --------------- train model!
+if( args.train ):
+    logging.info("Starting training")
+    model = the_Segmenter.train(x_train=x_train, y_train=y_train , epochs=int(args.epoch) ,batch_size=int(args.batch) )
+    the_Segmenter.saveModel( args.exportpath )
+    logging.info("Done with training")
+else :
+    model = the_Segmenter.load_model( args.exportpath )
+
+# --------------- train model!
+
+#------------ predict
+if( args.predict  and x_test ):
+    # run the prediction
+    predicted={}
+    import numpy as np
+    for key in x_test :
+        print( x_test[key].shape , np.max( x_test[key]) , np.min( x_test[key] )  )
+        predicted[key] = the_Segmenter.predict( x_test[key] )
+
+
+    # save the results
+    postProcess(theDic=predicted,output_path=args.output , size_dic=test_size_ref)
diff --git a/src/postprocessing/Postprocessor.py b/src/postprocessing/Postprocessor.py
@@ -0,0 +1,59 @@
+""""This file manages the postprocessing of the objects in the images
+this is based on script :
+https://github.com/dsp-uga/team-ball/blob/master/src/postprocessing/postprocessing.py
+"""
+
+import numpy as np
+import cv2
+import json
+import os
+
+
+def postProcess (theDic, output_path, size_dic):
+    """
+    this function handels the postprocessing of values,
+    :param fileNames: if out put is written to files and funciton is supposed to load them from files this dictionary contains their names in format {"sampleName":"filename"}
+    :param theDic: if output is to be loaded from a dictionary, this is the dictionary in format { "sampleName" : ValueArray }
+    :param output_file_name: name of the file to which the json results should be written
+    """
+
+    def downsize ( inp , size):
+        return inp[ :size[0], :size[1] ]
+
+    # validate input:
+    if not(theDic):
+        raise ValueError('One of the values filename or theDic has to be set!')
+
+
+    file_name_values_dic =theDic
+
+    final_dic = []
+
+    for key in file_name_values_dic :
+        theImage = downsize(file_name_values_dic[key], size_dic[key])
+        theImage = 2 * theImage
+        cv2.imwrite( os.path.join( output_path, key+".png"), theImage)
+        # find connected components
+    #     x, markers = cv2.connectedComponents( file_name_values_dic[key] )
+    #
+    #     #convert them to the writeable format
+    #     temp =   []
+    #     for i in range (1,x) :
+    #         temp.append( list(  [int(pick[0]),int(pick[1])]  for pick in np.argwhere(markers==i)  ))
+    #
+    #     # convert lists to dictionaries
+    #     temp = [ {"coordinates": pick } for pick in temp ]
+    #
+    #     # add sample name info to the dictionary
+    #     temp_dic = { "dataset": key,
+    #                  "regions": temp
+    #                  }
+    #
+    #     # add to the completed object
+    #     final_dic.append( temp_dic )
+    #
+    # # create json file string
+    # json_dump = json.dumps( final_dic,  indent=4 )
+    # # save the json file string
+    # with open( output_file_name,'w' ) as file:
+    #     file.write(json_dump)
diff --git a/src/postprocessing/__init__.py b/src/postprocessing/__init__.py