Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI May update #1

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions conf/infra/models_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ create table segmentation.predictions(
output_image_seg varchar,
output_image_orig varchar,
output_image_overlay varchar,
min_pixel_intensity float,
max_pixel_intensity float,
np_prediction_total float,
primary key(prediction_id)
);

Expand All @@ -182,6 +185,8 @@ create table segmentation.evaluations(
score_value float,
gt_view_name varchar,
pred_view_name varchar,
dataset varchar,
seg_model varchar,
primary key(evaluation_id)
);

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ black>=19.3b0
shapely>=1.6
pyinquirer>=1.0
tqdm>=4.34
medpy>=0.4.0
21 changes: 16 additions & 5 deletions src/usal_echo/d00_utils/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,15 @@ def __init__(self):
self.engine.execute(CreateSchema(self.schema))

def save_prediction_numpy_array_to_db(self, binary_data_array, column_names):
# Columns names are:prediction_id study_id instance_id file_name
# num_frames model_name date_run output_np_lv output_np_la
# output_np_lvo output_image_seg output_image_orig output_image_overlay
sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {}, {}, '{}', '{}', '{}')".format(
# Columns names are:
# prediction_id serial, study_id integer, instance_id integer, file_name varchar,
# num_frames integer, model_name varchar, date_run timestamp with time zone,
# output_np_lv bytea, output_np_la bytea, output_np_lvo bytea, output_image_seg varchar,
# output_image_orig varchar, output_image_overlay varchar, min_pixel_intensity float,
# max_pixel_intensity float, np_prediction_total float


sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {}, {}, '{}', '{}', '{}', '{}', '{}', '{}')".format(
self.schema,
"predictions",
",".join(column_names),
Expand All @@ -202,6 +207,9 @@ def save_prediction_numpy_array_to_db(self, binary_data_array, column_names):
binary_data_array[9],
binary_data_array[10],
binary_data_array[11],
binary_data_array[12],
binary_data_array[13],
binary_data_array[14]
)
self.cursor.execute(sql)
self.raw_conn.commit()
Expand Down Expand Up @@ -266,7 +274,7 @@ def save_seg_evaluation_to_db(self, df, column_names, if_exists="append"):

# Create new database table from empty dataframe
# df.to_sql('evaluation', self.engine, self.schema, if_exists, index=False)
sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format(
sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format(
self.schema,
"evaluations",
",".join(column_names),
Expand All @@ -278,6 +286,9 @@ def save_seg_evaluation_to_db(self, df, column_names, if_exists="append"):
df[5],
df[6],
df[7],
df[8],
df[9],
df[10]
)
self.cursor.execute(sql)
self.raw_conn.commit()
Expand Down
10 changes: 10 additions & 0 deletions src/usal_echo/d02_intermediate/clean_dcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,14 @@ def clean_dcm_meta():
meta_lite = metadata[metadata["tags"].isin(dicom_tags.values())]

io_clean.save_to_db(meta_lite, "meta_lite")

#create a colour scheme lookup
#Create a colour scheme lookup for filenames
colour_scheme_lookup = meta_lite[(meta_lite['tag1'] == '0028') & (meta_lite['tag2'] == '0004')].copy()
colour_scheme_lookup = colour_scheme_lookup.drop_duplicates()
colour_scheme_lookup = colour_scheme_lookup.drop_duplicates(subset='filename', keep='first')
colour_scheme_lookup = colour_scheme_lookup.rename(columns={'value':'colour_scheme'})

io_clean.save_to_db(colour_scheme_lookup, "colour_scheme_lookup")

logger.info("Metadata filtered.")
57 changes: 12 additions & 45 deletions src/usal_echo/d02_intermediate/download_dcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import numpy as np
import subprocess

from scipy.misc import imresize
from skimage.transform import resize
from PIL import Image
import cv2
import pydicom
from skimage.color import rgb2gray

from usal_echo.d00_utils.db_utils import dbReadWriteViews
from usal_echo.d00_utils.s3_utils import download_s3_objects
Expand All @@ -18,13 +18,7 @@
logger = setup_logging(__name__, __name__)


def _ybr2gray(y, u, v):
r = y + 1.402 * (v - 128)
g = y - 0.34414 * (u - 128) - 0.71414 * (v - 128)
b = y + 1.772 * (u - 128)
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b

return np.array(gray, dtype="int8")


def decompress_dcm(dcm_filepath, dcmraw_filepath):
Expand Down Expand Up @@ -60,7 +54,7 @@ def _split_train_test(ratio, table_name):
df = io_views.get_table(table_name)

np.random.seed(0)
msk = np.random.rand(len(df)) < ratio
msk = np.random.rand(len(df)) > ratio
df_train = df[msk].reset_index(drop=True)
df_test = df[~msk].reset_index(drop=True)

Expand Down Expand Up @@ -110,7 +104,7 @@ def s3_download_decomp_dcm(
:param downsample_ratio (float): percentage by which to downsample dataset
e.g. if ratio=0.1, will downsample by a factor of 10
:param train_test_ratio (float): ratio for splitting into train/test
:param table_name (str): name of views.table with master instancest
:param table_name (str): name of views.table with master instances
:param train (bool): download train set instead of test set, default=False

"""
Expand Down Expand Up @@ -192,46 +186,18 @@ def _dcmraw_to_np(dcmraw_obj):

if len(pxl_array.shape) == 4: # format 3, nframes, nrow, ncol
nframes = pxl_array.shape[1]
maxframes = nframes * 3
elif len(pxl_array.shape) == 3: # format nframes, nrow, ncol
nframes = pxl_array.shape[0]
maxframes = nframes * 1

nrow = int(dcmraw_obj.Rows)
ncol = int(dcmraw_obj.Columns)
ArrayDicom = np.zeros((nrow, ncol), dtype=pxl_array.dtype)
framedict = {}

for counter in range(0, maxframes, 3): # iterate through all subframes
k = counter % nframes
j = (counter) // nframes
m = (counter + 1) % nframes
l = (counter + 1) // nframes
o = (counter + 2) % nframes
n = (counter + 2) // nframes

if len(pxl_array.shape) == 4:
a = pxl_array[j, k, :, :]
b = pxl_array[l, m, :, :]
c = pxl_array[n, o, :, :]
ArrayDicom[:, :] = _ybr2gray(a, b, c)
ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0 # blanks out name
counter = counter + 1
ArrayDicom.clip(0)
nrowout = nrow
ncolout = ncol
x = int(counter / 3)
framedict[x] = imresize(ArrayDicom, (nrowout, ncolout))
elif len(pxl_array.shape) == 3:
ArrayDicom[:, :] = pxl_array[counter, :, :]
ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0 # blanks out name
counter = counter + 1
ArrayDicom.clip(0)
nrowout = nrow
ncolout = ncol
x = int(counter / 3)
framedict[x] = imresize(ArrayDicom, (nrowout, ncolout))


for i in range(nframes):
ArrayDicom[:, :] = pxl_array[0, i, :, :].copy()
ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0
framedict[i] = np.resize(ArrayDicom, (nrow,ncol))
return framedict


Expand Down Expand Up @@ -332,8 +298,9 @@ def dcm_to_segmentation_arrays(dcm_dir, filename):

for key in list(framedict.keys()):
image = np.zeros((384, 384))
image[:, :] = imresize(rgb2gray(framedict[key]), (384, 384, 1))
images.append(image)
image[:, :] = resize(framedict[key], (384, 384), anti_aliasing=True)
image = 255 * image
images.append(image.astype(np.uint8))
orig_images.append(framedict[key])

images = np.array(images).reshape((len(images), 384, 384, 1))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def filter_by_views():
inst_3 = df_ultra_color_filt["instanceidk"].tolist()

# Get instances that passed all filtering steps
inst_final = list(set(inst_1) & set(inst_2) & set(inst_3))
inst_final = list(set().union(inst_1, inst_2, inst_3))

# Filter out instances that do not meet the dicom metadata criteria
df = df_inst_all
Expand Down
53 changes: 53 additions & 0 deletions src/usal_echo/d04_segmentation/create_seg_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@

@author: court
"""
from usal_echo.d00_utils.log_utils import setup_logging
from usal_echo.d00_utils.db_utils import dbReadWriteClean, dbReadWriteViews
from usal_echo.d04_segmentation.segment_utils import *

import pandas as pd

logger = setup_logging(__name__, __name__)


def create_seg_view():
"""
Expand Down Expand Up @@ -191,3 +194,53 @@ def create_seg_view():
del a_modvolume_df

io_views.save_to_db(df_9, "chords_by_volume_mask")

# create studies_w_segmentation_labels table
df_10 = df_9.groupby(
["studyidk", "instanceidk", "indexinmglist"]).agg(
{
"x1coordinate": list,
"y1coordinate": list,
"x2coordinate": list,
"y2coordinate": list,
"chamber": pd.Series.unique,
"frame": pd.Series.unique,
"view": pd.Series.unique,
"instancefilename": pd.Series.unique,
}
)
df_10 = df_10.reset_index()
df_11 = df_10[df_10['chamber'] != ""]

#get unique study ids
df_12 = pd.DataFrame(df_11['studyidk'].unique())
df_12.columns = ['studyidk']
io_views.save_to_db(df_12, "studies_w_segmentation_labels")

#get study id that have a pair of segmentation masks (lv and la) on the same frame

gt_LA = df_11[df_11['chamber'] == 'la']
gt_LV = df_11[df_11['chamber'] == 'lv']

logger.info('gt_LA Shape : {} rows {} columns'.format(gt_LA.shape[0], gt_LA.shape[1]))
logger.info('gt_LV Shape : {} rows {} columns'.format(gt_LV.shape[0], gt_LV.shape[1]))

#inner join only includes rows where there is a match on the stated columns
gt_lv_la_pairs = pd.merge(gt_LA, gt_LV, how='inner', on=['studyidk', 'instanceidk', 'instancefilename',
'frame', 'view'])

gt_lv_la_pairs = gt_lv_la_pairs.rename(columns={'ground_truth_id_x':'ground_truth_id_la',
'chamber_x':'chamber_la',
'numpy_array_x':'numpy_array_la',
'ground_truth_id_y':'ground_truth_id_lv',
'chamber_y':'chamber_lv',
'numpy_array_y':'numpy_array_lv'})

instances_w_lv_la_segmentation_pairs = gt_lv_la_pairs[['studyidk']].copy()
instances_w_lv_la_segmentation_pairs = instances_w_lv_la_segmentation_pairs.drop_duplicates()

logger.info('instances_w_lv_la_segmentation_pairs, shape: {}'.format(instances_w_lv_la_segmentation_pairs.shape))
logger.info('instances_w_lv_la_segmentation_pairs format is {}'.format(",".join(instances_w_lv_la_segmentation_pairs.columns)))

io_views.save_to_db(instances_w_lv_la_segmentation_pairs, "instances_w_lv_la_segmentation_pairs")

Loading