Skip to content

Commit

Permalink
[DC-3645] Update Union QC notebook with fixes for column name and mov…
Browse files Browse the repository at this point in the history
…ing notes check (#1830)

[DC-3645] updated QC notebook for ehr_union and moved 1 check to combined.
  • Loading branch information
ratuagga committed Jan 29, 2024
1 parent 02fca65 commit 939624f
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 32 deletions.
24 changes: 24 additions & 0 deletions data_steward/analytics/cdr_ops/combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,30 @@
execute(client, query)
# -

# ## Verify Note text data

# +
query = f'''
SELECT 'note_text' AS field, note_text AS field_value, COUNT(note_text) AS row_count,
FROM `{PROJECT_ID}.{DATASET_ID}.note`
GROUP BY note_text
UNION ALL
SELECT 'note_title' AS field, note_title AS field_value, COUNT(note_title) AS row_count,
FROM `{PROJECT_ID}.{DATASET_ID}.note`
GROUP BY note_title
UNION ALL
SELECT 'note_source_value' AS field, note_source_value AS field_value, COUNT(note_source_value) AS row_count,
FROM `{PROJECT_ID}.{DATASET_ID}.note`
GROUP BY note_source_value
'''

execute(client, query)
# -

# ## Date and datetime fields should have the same date
# The date represented by associated `_date` and `_datetime` fields of the same
# row should be the same. If there any discrepancies, there may be a bug in the
Expand Down
40 changes: 8 additions & 32 deletions data_steward/analytics/cdr_ops/ehr_union_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@
GROUP BY
src_hpo_id)
SELECT
cc.src_hpo_id,
src_hpo_id,
coalesce(previous_unioned_ct, 0) as previous_unioned_ct,
coalesce(current_unioned_ct, 0) AS current_unioned_ct,
coalesce(cast((current_unioned_ct - previous_unioned_ct)/ current_unioned_ct * 100 as INT64), 100) as percentage_change
Expand All @@ -172,77 +172,53 @@
execute(client, query)
# -

# ## Verify Note text data

# +
query = f'''
SELECT 'note_text' AS field, note_text AS field_value, COUNT(note_text) AS row_count,
FROM `{PROJECT_ID}.{CURRENT_UNIONED_EHR_DATASET_ID}.note`
GROUP BY note_text
UNION ALL
SELECT 'note_title' AS field, note_title AS field_value, COUNT(note_title) AS row_count,
FROM `{PROJECT_ID}.{CURRENT_UNIONED_EHR_DATASET_ID}.note`
GROUP BY note_title
UNION ALL
SELECT 'note_source_value' AS field, note_source_value AS field_value, COUNT(note_source_value) AS row_count,
FROM `{PROJECT_ID}.{CURRENT_UNIONED_EHR_DATASET_ID}.note`
GROUP BY note_source_value
'''

execute(client, query)
# -

# ## Verifying no data past cut-off date

# +
query = f'''
SELECT
'observation' AS TABLE,
COUNT(*) AS non_clompling_rows
COUNT(*) AS non_complying_rows
FROM
`{PROJECT_ID}.{CURRENT_UNIONED_EHR_DATASET_ID}.observation`
WHERE
observation_date > DATE('{EHR_CUTOFF_DATE}')
UNION ALL
SELECT
'measurement' AS TABLE,
COUNT(*) AS non_clompling_rows
COUNT(*) AS non_complying_rows
FROM
`{PROJECT_ID}.{CURRENT_UNIONED_EHR_DATASET_ID}.measurement`
WHERE
measurement_date > DATE('{EHR_CUTOFF_DATE}')
UNION ALL
SELECT
'visit_occurrence' AS TABLE,
COUNT(*) AS non_clompling_rows
COUNT(*) AS non_complying_rows
FROM
`{PROJECT_ID}.{CURRENT_UNIONED_EHR_DATASET_ID}.visit_occurrence`
WHERE
visit_end_date > DATE('{EHR_CUTOFF_DATE}')
UNION ALL
SELECT
'drug_exposure' AS TABLE,
COUNT(*) AS non_clompling_rows
COUNT(*) AS non_complying_rows
FROM
`{PROJECT_ID}.{CURRENT_UNIONED_EHR_DATASET_ID}.drug_exposure`
WHERE
drug_exposure_end_date > DATE('{EHR_CUTOFF_DATE}')
UNION ALL
SELECT
'procedure' AS TABLE,
COUNT(*) AS non_clompling_rows
COUNT(*) AS non_complying_rows
FROM
`{PROJECT_ID}.{CURRENT_UNIONED_EHR_DATASET_ID}.procedure_occurrence`
WHERE
procedure_date > DATE('{EHR_CUTOFF_DATE}')
UNION ALL
SELECT
'visit_detail' AS TABLE,
COUNT(*) AS non_clompling_rows
COUNT(*) AS non_complying_rows
FROM
`{PROJECT_ID}.{CURRENT_UNIONED_EHR_DATASET_ID}.visit_detail`
WHERE
Expand Down Expand Up @@ -353,7 +329,7 @@
SELECT
aou_death_id,
CASE WHEN aou_death_id IN (
SELECT aou_death_id FROM `{{project_id}}.{{dataset_id}}.aou_death`
SELECT aou_death_id FROM `{{project_id}}.{{dataset}}.aou_death`
WHERE death_date IS NOT NULL -- NULL death_date records must not become primary --
QUALIFY RANK() OVER (
PARTITION BY person_id
Expand Down

0 comments on commit 939624f

Please sign in to comment.