Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DC-3786] [DC-3788] Add self reported population columns #1862

Merged
merged 4 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions data_steward/cdr_cleaner/cleaning_rules/create_person_ext_table.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Original Issues: DC-1012, DC-1514, DC-3260
Original Issues: DC-1012, DC-1514, DC-3260, DC-3786

Background
In order to avoid further changes to the standard OMOP person table, five non-standard fields will be housed in a
Expand All @@ -16,6 +16,10 @@
sex_at_birth_source_concept_id: value_source_concept_id in observation where observation_source_concept_id = 1585845
sex_at_birth_source_value: concept_code in the concept table where joining from observation where
observation_source_concept_id = 1585845
self_reported_population_concept_id: value_as_concept_id in observation where observation_source_concept_id = 1586140
self_reported_population_source_concept_id: value_source_concept_id in observation where observation_source_concept_id = 1586140
self_reported_population_source_value: concept_code in the concept table where joining from observation where
observation_source_concept_id = 1586140
"""
import logging

Expand All @@ -35,7 +39,10 @@
t.state_of_residence_source_value = c.concept_name,
t.sex_at_birth_concept_id = COALESCE(os.value_as_concept_id, 0),
t.sex_at_birth_source_concept_id = COALESCE(os.value_source_concept_id, 0),
t.sex_at_birth_source_value = COALESCE(sc.concept_code, 'No matching concept')
t.sex_at_birth_source_value = COALESCE(sc.concept_code, 'No matching concept'),
t.self_reported_population_concept_id = COALESCE(srp.value_as_concept_id, 0),
t.self_reported_population_source_concept_id = COALESCE(srp.value_source_concept_id, 0),
t.self_reported_population_source_value = COALESCE(srp.value_source_value, 'No matching concept')
FROM
`{{project}}.{{dataset}}.person` p
LEFT JOIN
Expand All @@ -53,6 +60,34 @@
ON
o.observation_id = e.observation_id
AND o.observation_source_concept_id = 1585249
LEFT JOIN
(SELECT person_id,
CASE WHEN num_ans = 1 THEN (
CASE WHEN value_as_concept_id = 1586143 THEN 8516 /*black/aa*/
WHEN value_as_concept_id = 45877987 THEN 8527 /*white*/
WHEN value_as_concept_id = 45879439 THEN 8515 /*asian*/
WHEN value_as_concept_id = 45877442 THEN 8657 /*aian*/
WHEN value_as_concept_id = 45878240 THEN 8557 /*nhpi*/
WHEN value_as_concept_id = 1586144 THEN 38003615 /*mena*/
ELSE value_as_concept_id END
) ELSE 2000000008
END AS value_as_concept_id,
CASE WHEN num_ans = 1 THEN value_source_concept_id ELSE 2000000008
END AS value_source_concept_id,
CASE WHEN num_ans = 1 THEN co.concept_code ELSE "WhatRaceEthnicity_GeneralizedMultPopulations"
END AS value_source_value
FROM (SELECT person_id,
MIN(value_source_concept_id) value_source_concept_id,
MIN(value_as_concept_id) value_as_concept_id,
COUNT(value_source_concept_id) num_ans
FROM (SELECT person_id, value_as_concept_id, value_source_concept_id
FROM `{{project}}.{{dataset}}.observation`
WHERE observation_source_concept_id = 1586140)
GROUP BY 1) up
JOIN `{{project}}.{{dataset}}.concept` co
ON up.value_source_concept_id = co.concept_id) srp
ON
p.person_id = srp.person_id
LEFT JOIN
(SELECT DISTINCT person_id, observation_source_concept_id, value_as_concept_id, value_source_concept_id
FROM `{{project}}.{{dataset}}.observation`) os
Expand Down Expand Up @@ -90,7 +125,7 @@ def __init__(self,
DO NOT REMOVE ORIGINAL JIRA ISSUE NUMBERS!
"""
desc = ('Create person_ext table')
super().__init__(issue_numbers=['DC1012', 'DC1514', 'DC3260'],
super().__init__(issue_numbers=['DC1012', 'DC1514', 'DC3260', 'DC3786'],
description=desc,
affected_datasets=[
cdr_consts.REGISTERED_TIER_DEID_BASE,
Expand Down
15 changes: 13 additions & 2 deletions data_steward/tools/recreate_person.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

LOGGER = logging.getLogger(__name__)

ISSUE_NUMBERS = ["DC-3788"]

ADD_COLUMNS_QUERY = JINJA_ENV.from_string("""
ALTER TABLE `{{person.project}}.{{person.dataset_id}}.{{person.table_id}}`
ADD COLUMN IF NOT EXISTS state_of_residence_concept_id INT64
Expand All @@ -19,7 +21,13 @@
ADD COLUMN IF NOT EXISTS sex_at_birth_source_concept_id INT64
OPTIONS(description="[All of Us OMOP extension] A foreign key to the biological sex at birth source concept."),
ADD COLUMN IF NOT EXISTS sex_at_birth_source_value STRING
OPTIONS(description="[All of Us OMOP extension] The source code for the biological sex at birth.")
OPTIONS(description="[All of Us OMOP extension] The source code for the biological sex at birth."),
ADD COLUMN IF NOT EXISTS self_reported_population_concept_id INT64
OPTIONS(description="[All of Us OMOP extension] A foreign key to the participant's self-reported population concept."),
ADD COLUMN IF NOT EXISTS self_reported_population_source_concept_id INT64
OPTIONS(description="[All of Us OMOP extension] A foreign key to the participant's self-reported population source concept."),
ADD COLUMN IF NOT EXISTS self_reported_population_source_value STRING
OPTIONS(description="[All of Us OMOP extension] The source code for the participant's self reported population.")
""")

UPDATE_PERSON_QUERY = JINJA_ENV.from_string("""
Expand All @@ -29,7 +37,10 @@
state_of_residence_source_value = ext.state_of_residence_source_value,
sex_at_birth_concept_id = ext.sex_at_birth_concept_id,
sex_at_birth_source_concept_id = ext.sex_at_birth_source_concept_id,
sex_at_birth_source_value = ext.sex_at_birth_source_value
sex_at_birth_source_value = ext.sex_at_birth_source_value,
self_reported_population_concept_id = ext.self_reported_population_concept_id,
self_reported_population_source_concept_id = ext.self_reported_population_source_concept_id,
self_reported_population_source_value = ext.self_reported_population_source_value
FROM
`{{person_ext.project}}.{{person_ext.dataset_id}}.{{person_ext.table_id}}` ext
WHERE p.person_id = ext.person_id
Expand Down