dssg · courtneyjean · May 10, 2020 · May 10, 2020 · May 10, 2020 · May 10, 2020
diff --git a/conf/infra/models_schema.sql b/conf/infra/models_schema.sql
@@ -164,6 +164,9 @@ create table segmentation.predictions(
    output_image_seg varchar,
    output_image_orig varchar,
    output_image_overlay varchar,
+   min_pixel_intensity float,
+   max_pixel_intensity float,
+   np_prediction_total float,
    primary key(prediction_id)
 );
 
@@ -182,6 +185,8 @@ create table segmentation.evaluations(
     score_value float,
     gt_view_name varchar,
     pred_view_name varchar,
+    dataset varchar,
+    seg_model varchar,
     primary key(evaluation_id)
 );
 

diff --git a/requirements.txt b/requirements.txt
@@ -10,3 +10,4 @@ black>=19.3b0
 shapely>=1.6
 pyinquirer>=1.0
 tqdm>=4.34
+medpy>=0.4.0
diff --git a/src/usal_echo/d00_utils/db_utils.py b/src/usal_echo/d00_utils/db_utils.py
@@ -183,10 +183,15 @@ def __init__(self):
             self.engine.execute(CreateSchema(self.schema))
 
     def save_prediction_numpy_array_to_db(self, binary_data_array, column_names):
-        # Columns names are:prediction_id	study_id	instance_id	file_name
-        # num_frames	model_name	date_run	output_np_lv	output_np_la
-        # output_np_lvo	output_image_seg	output_image_orig	output_image_overlay
-        sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {}, {}, '{}', '{}', '{}')".format(
+        # Columns names are:
+        # prediction_id serial, study_id integer, instance_id integer, file_name varchar,
+        # num_frames integer, model_name varchar, date_run timestamp with time zone,
+        # output_np_lv bytea, output_np_la bytea, output_np_lvo bytea, output_image_seg varchar,
+        # output_image_orig varchar, output_image_overlay varchar, min_pixel_intensity float,
+        # max_pixel_intensity float, np_prediction_total float
+
+
+        sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {}, {}, '{}', '{}', '{}', '{}', '{}', '{}')".format(
             self.schema,
             "predictions",
             ",".join(column_names),
@@ -202,6 +207,9 @@ def save_prediction_numpy_array_to_db(self, binary_data_array, column_names):
             binary_data_array[9],
             binary_data_array[10],
             binary_data_array[11],
+            binary_data_array[12],
+            binary_data_array[13],
+            binary_data_array[14]
         )
         self.cursor.execute(sql)
         self.raw_conn.commit()
@@ -266,7 +274,7 @@ def save_seg_evaluation_to_db(self, df, column_names, if_exists="append"):
 
         # Create new database table from empty dataframe
         # df.to_sql('evaluation', self.engine, self.schema, if_exists, index=False)
-        sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format(
+        sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format(
             self.schema,
             "evaluations",
             ",".join(column_names),
@@ -278,6 +286,9 @@ def save_seg_evaluation_to_db(self, df, column_names, if_exists="append"):
             df[5],
             df[6],
             df[7],
+            df[8],
+            df[9],
+            df[10]
         )
         self.cursor.execute(sql)
         self.raw_conn.commit()

diff --git a/src/usal_echo/d02_intermediate/clean_dcm.py b/src/usal_echo/d02_intermediate/clean_dcm.py
@@ -41,4 +41,14 @@ def clean_dcm_meta():
     meta_lite = metadata[metadata["tags"].isin(dicom_tags.values())]
 
     io_clean.save_to_db(meta_lite, "meta_lite")
+
+        #create a colour scheme lookup
+    #Create a colour scheme lookup for filenames
+    colour_scheme_lookup =  meta_lite[(meta_lite['tag1'] == '0028') & (meta_lite['tag2'] == '0004')].copy()
+    colour_scheme_lookup =  colour_scheme_lookup.drop_duplicates()
+    colour_scheme_lookup =  colour_scheme_lookup.drop_duplicates(subset='filename', keep='first')
+    colour_scheme_lookup =  colour_scheme_lookup.rename(columns={'value':'colour_scheme'})
+
+    io_clean.save_to_db(colour_scheme_lookup, "colour_scheme_lookup")
+
     logger.info("Metadata filtered.")
diff --git a/src/usal_echo/d02_intermediate/download_dcm.py b/src/usal_echo/d02_intermediate/download_dcm.py
@@ -6,10 +6,10 @@
 import numpy as np
 import subprocess
 
-from scipy.misc import imresize
+from skimage.transform import resize
+from PIL import Image
 import cv2
 import pydicom
-from skimage.color import rgb2gray
 
 from usal_echo.d00_utils.db_utils import dbReadWriteViews
 from usal_echo.d00_utils.s3_utils import download_s3_objects
@@ -18,13 +18,7 @@
 logger = setup_logging(__name__, __name__)
 
 
-def _ybr2gray(y, u, v):
-    r = y + 1.402 * (v - 128)
-    g = y - 0.34414 * (u - 128) - 0.71414 * (v - 128)
-    b = y + 1.772 * (u - 128)
-    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
 
-    return np.array(gray, dtype="int8")
 
 
 def decompress_dcm(dcm_filepath, dcmraw_filepath):
@@ -60,7 +54,7 @@ def _split_train_test(ratio, table_name):
     df = io_views.get_table(table_name)
 
     np.random.seed(0)
-    msk = np.random.rand(len(df)) < ratio
+    msk = np.random.rand(len(df)) > ratio
     df_train = df[msk].reset_index(drop=True)
     df_test = df[~msk].reset_index(drop=True)
 
@@ -110,7 +104,7 @@ def s3_download_decomp_dcm(
     :param downsample_ratio (float): percentage by which to downsample dataset
                          e.g. if ratio=0.1, will downsample by a factor of 10
     :param train_test_ratio (float): ratio for splitting into train/test
-    :param table_name (str): name of views.table with master instancest
+    :param table_name (str): name of views.table with master instances
     :param train (bool): download train set instead of test set, default=False
 
     """
@@ -192,46 +186,18 @@ def _dcmraw_to_np(dcmraw_obj):
 
     if len(pxl_array.shape) == 4:  # format 3, nframes, nrow, ncol
         nframes = pxl_array.shape[1]
-        maxframes = nframes * 3
     elif len(pxl_array.shape) == 3:  # format nframes, nrow, ncol
         nframes = pxl_array.shape[0]
-        maxframes = nframes * 1
 
     nrow = int(dcmraw_obj.Rows)
     ncol = int(dcmraw_obj.Columns)
     ArrayDicom = np.zeros((nrow, ncol), dtype=pxl_array.dtype)
     framedict = {}
-
-    for counter in range(0, maxframes, 3):  # iterate through all subframes
-        k = counter % nframes
-        j = (counter) // nframes
-        m = (counter + 1) % nframes
-        l = (counter + 1) // nframes
-        o = (counter + 2) % nframes
-        n = (counter + 2) // nframes
-
-        if len(pxl_array.shape) == 4:
-            a = pxl_array[j, k, :, :]
-            b = pxl_array[l, m, :, :]
-            c = pxl_array[n, o, :, :]
-            ArrayDicom[:, :] = _ybr2gray(a, b, c)
-            ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0  # blanks out name
-            counter = counter + 1
-            ArrayDicom.clip(0)
-            nrowout = nrow
-            ncolout = ncol
-            x = int(counter / 3)
-            framedict[x] = imresize(ArrayDicom, (nrowout, ncolout))
-        elif len(pxl_array.shape) == 3:
-            ArrayDicom[:, :] = pxl_array[counter, :, :]
-            ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0  # blanks out name
-            counter = counter + 1
-            ArrayDicom.clip(0)
-            nrowout = nrow
-            ncolout = ncol
-            x = int(counter / 3)
-            framedict[x] = imresize(ArrayDicom, (nrowout, ncolout))
-
+
+    for i in range(nframes):
+        ArrayDicom[:, :] = pxl_array[0, i, :, :].copy()
+        ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0
+        framedict[i] = np.resize(ArrayDicom, (nrow,ncol))
     return framedict
 
 
@@ -332,8 +298,9 @@ def dcm_to_segmentation_arrays(dcm_dir, filename):
 
         for key in list(framedict.keys()):
             image = np.zeros((384, 384))
-            image[:, :] = imresize(rgb2gray(framedict[key]), (384, 384, 1))
-            images.append(image)
+            image[:, :] = resize(framedict[key], (384, 384), anti_aliasing=True)
+            image = 255 * image
+            images.append(image.astype(np.uint8))
             orig_images.append(framedict[key])
 
         images = np.array(images).reshape((len(images), 384, 384, 1))

diff --git a/src/usal_echo/d02_intermediate/instance_filters/filter_views.py b/src/usal_echo/d02_intermediate/instance_filters/filter_views.py
@@ -256,7 +256,7 @@ def filter_by_views():
     inst_3 = df_ultra_color_filt["instanceidk"].tolist()
 
     # Get instances that passed all filtering steps
-    inst_final = list(set(inst_1) & set(inst_2) & set(inst_3))
+    inst_final = list(set().union(inst_1, inst_2, inst_3))
 
     # Filter out instances that do not meet the dicom metadata criteria
     df = df_inst_all

diff --git a/src/usal_echo/d04_segmentation/create_seg_view.py b/src/usal_echo/d04_segmentation/create_seg_view.py
@@ -6,11 +6,14 @@
 
 @author: court
 """
+from usal_echo.d00_utils.log_utils import setup_logging
 from usal_echo.d00_utils.db_utils import dbReadWriteClean, dbReadWriteViews
 from usal_echo.d04_segmentation.segment_utils import *
 
 import pandas as pd
 
+logger = setup_logging(__name__, __name__)
+
 
 def create_seg_view():
     """
@@ -191,3 +194,53 @@ def create_seg_view():
     del a_modvolume_df
 
     io_views.save_to_db(df_9, "chords_by_volume_mask")
+
+    # create studies_w_segmentation_labels table
+    df_10 = df_9.groupby(
+        ["studyidk", "instanceidk", "indexinmglist"]).agg(
+        {
+            "x1coordinate": list,
+            "y1coordinate": list,
+            "x2coordinate": list,
+            "y2coordinate": list,
+            "chamber": pd.Series.unique,
+            "frame": pd.Series.unique,
+            "view": pd.Series.unique,
+            "instancefilename": pd.Series.unique,
+        }
+    )
+    df_10 = df_10.reset_index()
+    df_11 = df_10[df_10['chamber'] != ""]
+
+    #get unique study ids
+    df_12 = pd.DataFrame(df_11['studyidk'].unique())
+    df_12.columns = ['studyidk']
+    io_views.save_to_db(df_12, "studies_w_segmentation_labels")
+
+    #get study id that have a pair of segmentation masks (lv and la) on the same frame
+
+    gt_LA = df_11[df_11['chamber'] == 'la']
+    gt_LV = df_11[df_11['chamber'] == 'lv']
+
+    logger.info('gt_LA Shape : {} rows {} columns'.format(gt_LA.shape[0], gt_LA.shape[1]))
+    logger.info('gt_LV Shape : {} rows {} columns'.format(gt_LV.shape[0], gt_LV.shape[1]))
+
+    #inner join only includes rows where there is a match on the stated columns
+    gt_lv_la_pairs = pd.merge(gt_LA, gt_LV, how='inner', on=['studyidk', 'instanceidk', 'instancefilename', 
+                                                             'frame', 'view'])
+
+    gt_lv_la_pairs = gt_lv_la_pairs.rename(columns={'ground_truth_id_x':'ground_truth_id_la',
+                                   'chamber_x':'chamber_la',
+                                   'numpy_array_x':'numpy_array_la',
+                                   'ground_truth_id_y':'ground_truth_id_lv',
+                                   'chamber_y':'chamber_lv',
+                                   'numpy_array_y':'numpy_array_lv'})
+
+    instances_w_lv_la_segmentation_pairs = gt_lv_la_pairs[['studyidk']].copy()  
+    instances_w_lv_la_segmentation_pairs = instances_w_lv_la_segmentation_pairs.drop_duplicates()  
+
+    logger.info('instances_w_lv_la_segmentation_pairs, shape: {}'.format(instances_w_lv_la_segmentation_pairs.shape))
+    logger.info('instances_w_lv_la_segmentation_pairs format is {}'.format(",".join(instances_w_lv_la_segmentation_pairs.columns)))
+
+    io_views.save_to_db(instances_w_lv_la_segmentation_pairs, "instances_w_lv_la_segmentation_pairs")
+