Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
PHS Data Library
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Josef Hardi
PHS Data Library
Commits
656b5c81
Commit
656b5c81
authored
5 years ago
by
Josef Hardi
Browse files
Options
Downloads
Patches
Plain Diff
feature(optum): allow specifying the starting occurrence date
parent
d238c8a0
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
phs/data/optum.py
+83
-56
83 additions, 56 deletions
phs/data/optum.py
with
83 additions
and
56 deletions
phs/data/optum.py
+
83
−
56
View file @
656b5c81
...
...
@@ -158,23 +158,9 @@ class Optum:
t1.observation_period_start_date,
t1.observation_period_end_date
FROM (
SELECT
t0.person_id,
t0.gender_source_value,
t0.year_of_birth,
t0.zip,
t0.condition_source_value,
t0.index_date,
t0.observation_period_start_date,
t0.observation_period_end_date,
row_number() OVER (PARTITION BY t0.person_id ORDER BY t0.index_date, t0.condition_source_value) AS ordinal
FROM (
-- START
'
Entry Criteria
'
>>>>
{entry_criteria}
-- <<< END
'
Entry Criteria
'
) AS t0
WHERE DATE_ADD(t0.observation_period_start_date, interval {obs_prior} DAY) <= t0.index_date
AND DATE_ADD(t0.index_date, interval {obs_subsequent} DAY) <= t0.observation_period_end_date
-- START
'
Entry Criteria
'
>>>>
{entry_criteria}
-- <<< END
'
Entry Criteria
'
) AS t1
WHERE t1.ordinal = 1
)
...
...
@@ -187,40 +173,54 @@ class Optum:
# SQL template for getting a population with a certain disease
_ENTRY_CRITERIA
=
"""
WITH {person_table}, {person_claims_table}, {condition_occurrence_table}
SELECT
DISTINCT
t
3
.person_id,
t
3
.gender_source_value,
t
3
.year_of_birth,
t
3.index_date
,
t
3.observation_period_start_dat
e,
t
3.observation_period_end
_date,
t
3.zip
,
t
4.claim_id
,
t4
.condition_source_value
SELECT
t
5
.person_id,
t
5
.gender_source_value,
t
5
.year_of_birth,
t
5.zip
,
t
5.condition_source_valu
e,
t
5.index
_date,
t
5.observation_period_start_date
,
t
5.observation_period_end_date
,
row_number() OVER (PARTITION BY t5.person_id ORDER BY t5.index_date, t5
.condition_source_value
) AS ordinal
FROM (
SELECT
t1.*,
t2.claim_id,
t2.index_date
SELECT DISTINCT
t3.person_id,
t3.gender_source_value,
t3.year_of_birth,
t3.index_date,
t3.observation_period_start_date,
t3.observation_period_end_date,
t3.zip,
t4.claim_id,
t4.condition_source_value
FROM (
SELECT
t0.*
FROM
person AS t0
WHERE
t0.zip in ({zipcode_valueset})
) AS t1
INNER JOIN
person_claims AS t2
ON t1.person_id = t2.person_id
AND t2.index_date >= t1.observation_period_start_date
AND t2.index_date <= t1.observation_period_end_date
) AS t3
INNER JOIN (
{event_occurrence}
) AS t4
ON t3.person_id = t4.person_id
AND t3.claim_id = t4.claim_id
"""
SELECT
t1.*,
t2.claim_id,
t2.index_date
FROM (
SELECT
t0.*
FROM
person AS t0
WHERE
t0.zip in ({zipcode_valueset})
) AS t1
INNER JOIN
person_claims AS t2
ON t1.person_id = t2.person_id
AND t2.index_date >= t1.observation_period_start_date
AND t2.index_date <= t1.observation_period_end_date
) AS t3
INNER JOIN (
{event_occurrence}
) AS t4
ON t3.person_id = t4.person_id
AND t3.claim_id = t4.claim_id
{event_occurrence_filters}
) AS t5
{continuous_observation_filters}
"""
def
info
(
self
):
"""
Returns a brief description about the dataset.
...
...
@@ -301,13 +301,38 @@ class Optum:
"""
event_occurrence_sql
=
""
event_occurrence_filters_sql
=
""
continuous_observation_filters_sql
=
""
for
k
,
v
in
entry_criteria
.
items
():
if
k
==
'
condition
'
:
condition_valueset
=
"
,
"
.
join
(
f
'"
{
x
}
"'
for
x
in
self
.
_CONDITIONS
[
v
][
'
codes
'
])
event_occurrence_sql
=
f
'
SELECT t0.*
\n
FROM condition_occurrence AS t0
\n
WHERE t0.condition_source_value in (
{
condition_valueset
}
)
'
elif
k
==
'
conditionCodes
'
:
condition_valueset
=
"
,
"
.
join
(
f
'"
{
x
}
"'
for
x
in
v
)
event_occurrence_sql
=
f
'
SELECT t0.*
\n
FROM condition_occurrence AS t0
\n
WHERE t0.condition_source_value in (
{
condition_valueset
}
)
'
if
k
==
'
event
'
:
event_type
=
list
(
v
.
keys
())[
0
]
if
event_type
==
'
condition
'
:
event_value
=
v
[
'
condition
'
]
if
type
(
event_value
)
is
str
:
condition_valueset
=
"
,
"
.
join
(
f
'"
{
x
}
"'
for
x
in
self
.
_CONDITIONS
[
event_value
][
'
codes
'
])
event_occurrence_sql
=
f
'
SELECT t0.*
\n
FROM condition_occurrence AS t0
\n
WHERE t0.condition_source_value in (
{
condition_valueset
}
)
'
elif
type
(
event_value
)
is
list
:
condition_valueset
=
"
,
"
.
join
(
f
'"
{
x
}
"'
for
x
in
event_value
)
event_occurrence_sql
=
f
'
SELECT t0.*
\n
FROM condition_occurrence AS t0
\n
WHERE t0.condition_source_value in (
{
condition_valueset
}
)
'
elif
k
==
'
occurrence
'
:
occurrence_type
=
list
(
v
.
keys
())[
0
]
if
occurrence_type
==
'
dateStartBefore
'
:
occurrence_date
=
v
[
'
dateStartBefore
'
]
event_occurrence_filters_sql
=
f
'
WHERE t3.index_date <
"
{
occurrence_date
}
"'
elif
occurrence_type
==
'
dateStartAfter
'
:
occurrence_date
=
v
[
'
dateStartAfter
'
]
event_occurrence_filters_sql
=
f
'
WHERE t3.index_date >
"
{
occurrence_date
}
"'
elif
occurrence_type
==
'
dateStartAt
'
:
occurrence_date
=
v
[
'
dateStartAt
'
]
event_occurrence_filters_sql
=
f
'
WHERE t3.index_date =
"
{
occurrence_date
}
"'
elif
occurrence_type
==
'
dateStartRange
'
:
occurrence_start_date
=
v
[
'
dateStartRange
'
].
split
(
'
/
'
)[
0
]
occurrence_end_date
=
v
[
'
dateStartRange
'
].
split
(
'
/
'
)[
1
]
event_occurrence_filters_sql
=
f
'
WHERE t3.index_date >=
"
{
occurrence_start_date
}
"
AND t3.index_date <=
"
{
occurrence_end_date
}
"'
elif
k
==
'
continuousObservations
'
:
days_before
=
v
[
'
daysBeforeOccurrence
'
]
if
'
daysBeforeOccurrence
'
in
v
else
0
days_after
=
v
[
'
daysAfterOccurrence
'
]
if
'
daysAfterOccurrence
'
in
v
else
0
continuous_observation_filters_sql
=
f
'
WHERE DATE_ADD(t5.observation_period_start_date, interval
{
days_before
}
DAY) <= t5.index_date AND DATE_ADD(t5.index_date, interval
{
days_after
}
DAY) <= t5.observation_period_end_date
'
entry_criteria_sql
=
textwrap
.
dedent
(
self
.
_ENTRY_CRITERIA
.
format
(
...
...
@@ -315,7 +340,9 @@ class Optum:
person_claims_table
=
self
.
_PERSON_CLAIMS
,
condition_occurrence_table
=
self
.
_CONDITION_OCCURRENCE
,
zipcode_valueset
=
"
,
"
.
join
(
str
(
z
)
for
z
in
zips
),
event_occurrence
=
event_occurrence_sql
))
event_occurrence
=
event_occurrence_sql
,
event_occurrence_filters
=
event_occurrence_filters_sql
,
continuous_observation_filters
=
continuous_observation_filters_sql
))
inclusion_criteria_sql
=
""
if
inclusion_criteria
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment