From 45d2cbe35e8917138913d547414e62a2abb3eed9 Mon Sep 17 00:00:00 2001 From: rlittle08 Date: Fri, 7 Jul 2023 16:42:31 -0500 Subject: [PATCH 1/5] draft normalize scores using xwalks --- ..._ef3__student_assessments_long_results.sql | 37 ++++++++++++++++--- .../core_warehouse/fct_student_assessment.sql | 2 +- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql b/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql index 39e4b74f..24a3648b 100644 --- a/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql +++ b/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql @@ -4,6 +4,12 @@ with score_results as ( xwalk_scores as ( select * from {{ ref('xwalk_assessment_scores') }} ), +xwalk_score_values as ( + select * from {{ ref('xwalk_assessment_score_values') }} +), +xwalk_score_value_thresholds as ( + select * from {{ ref('xwalk_assessment_score_value_thresholds') }} +), performance_levels as ( select tenant_code, @@ -34,16 +40,35 @@ dedupe_results as ( ), merged_xwalk as ( select - tenant_code, - api_year, - k_student_assessment, - score_name as original_score_name, - coalesce(normalized_score_name, 'other') as normalized_score_name, - score_result + dedupe_results.tenant_code, + dedupe_results.api_year, + dedupe_results.k_student_assessment, + dedupe_results.score_name as original_score_name, + coalesce(xwalk_scores.normalized_score_name, 'other') as normalized_score_name, + -- todo should we rename this to original_? + dedupe_results.score_result, + coalesce(xwalk_score_value_thresholds.normalized_score_result::varchar, + xwalk_score_values.normalized_score_result::varchar, + score_result::varchar + ) as normalized_score_result from dedupe_results left join xwalk_scores on dedupe_results.assessment_identifier = xwalk_scores.assessment_identifier and dedupe_results.namespace = xwalk_scores.namespace and dedupe_results.score_name = xwalk_scores.original_score_name + left join xwalk_score_values + on dedupe_results.assessment_identifier = xwalk_score_values.assessment_identifier + and dedupe_results.namespace = xwalk_score_values.namespace + and xwalk_scores.normalized_score_name = xwalk_score_values.normalized_score_name + and dedupe_results.score_result = xwalk_score_values.original_score_result + left join xwalk_score_value_thresholds + on dedupe_results.assessment_identifier = xwalk_score_value_thresholds.assessment_identifier + and dedupe_results.namespace = xwalk_score_value_thresholds.namespace + and xwalk_scores.normalized_score_name = xwalk_score_value_thresholds.normalized_score_name + -- todo check these comparators -- what if there's a value between the upper and next lower? eg value is 20.4 and the cutoffs are 20 and 21 + -- todo review my use of try_to_numeric here -- the idea is to allow numeric values to merge, otherwise don't merge without error + and try_to_numeric(dedupe_results.score_result) >= xwalk_score_value_thresholds.lower_bound + and try_to_numeric(dedupe_results.score_result) <= xwalk_score_value_thresholds.upper_bound + ) select * from merged_xwalk \ No newline at end of file diff --git a/models/core_warehouse/fct_student_assessment.sql b/models/core_warehouse/fct_student_assessment.sql index a767c843..e20bac83 100644 --- a/models/core_warehouse/fct_student_assessment.sql +++ b/models/core_warehouse/fct_student_assessment.sql @@ -46,7 +46,7 @@ student_assessments_wide as ( {{ dbt_utils.pivot( 'normalized_score_name', dbt_utils.get_column_values(ref('xwalk_assessment_scores'), 'normalized_score_name'), - then_value='score_result', + then_value='normalized_score_result', else_value='NULL', agg='max', quote_identifiers=False From 0b0034c7c05d8efd77e581a99b12cbf30a20518b Mon Sep 17 00:00:00 2001 From: rlittle08 Date: Wed, 12 Jul 2023 10:55:55 -0500 Subject: [PATCH 2/5] add seasons and normalized scores --- .../core_warehouse/fct_student_assessment.sql | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/models/core_warehouse/fct_student_assessment.sql b/models/core_warehouse/fct_student_assessment.sql index e20bac83..eab01f49 100644 --- a/models/core_warehouse/fct_student_assessment.sql +++ b/models/core_warehouse/fct_student_assessment.sql @@ -14,6 +14,9 @@ with student_assessments_long_results as ( student_assessments as ( select * from {{ ref('stg_ef3__student_assessments') }} ), +xwalk_assessment_seasons as ( + select * from {{ ref('xwalk_assessment_seasons') }} +), object_agg_other_results as ( select k_student_assessment, @@ -30,9 +33,11 @@ student_assessments_wide as ( student_assessments.tenant_code, student_assessments.student_assessment_identifier, student_assessments.serial_number, - school_year, - administration_date, - administration_end_date, + student_assessments.school_year, + student_assessments.administration_date, + student_assessments.administration_end_date, + xwalk_assessment_seasons.season_name as administration_season, + xwalk_assessment_seasons.season_num as administration_season_num, event_description, administration_environment, administration_language, @@ -43,11 +48,20 @@ student_assessments_wide as ( when_assessed_grade_level, v_other_results {%- if not is_empty_model('xwalk_assessment_scores') -%}, + {{ dbt_utils.pivot( + 'normalized_score_name', + dbt_utils.get_column_values(ref('xwalk_assessment_scores'), 'normalized_score_name'), + then_value='score_result', + else_value='NULL', + agg='max', + quote_identifiers=False + ) }}, {{ dbt_utils.pivot( 'normalized_score_name', dbt_utils.get_column_values(ref('xwalk_assessment_scores'), 'normalized_score_name'), then_value='normalized_score_result', else_value='NULL', + prefix='normalized_', agg='max', quote_identifiers=False ) }} @@ -58,7 +72,11 @@ student_assessments_wide as ( and student_assessments_long_results.normalized_score_name != 'other' left join object_agg_other_results on student_assessments.k_student_assessment = object_agg_other_results.k_student_assessment - {{ dbt_utils.group_by(n=18) }} + left join xwalk_assessment_seasons + on student_assessments.school_year = xwalk_assessment_seasons.school_year + and student_assessments.administration_date >= xwalk_assessment_seasons.start_date + and student_assessments.administration_date <= xwalk_assessment_seasons.end_date + {{ dbt_utils.group_by(n=20) }} ) select * from student_assessments_wide From b7b56f5b2dcd86644ae7bacead6316f85a6c386c Mon Sep 17 00:00:00 2001 From: rlittle08 Date: Wed, 12 Jul 2023 13:33:26 -0500 Subject: [PATCH 3/5] only add normalize cols if specified in any xwalk --- models/core_warehouse/fct_student_assessment.sql | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/models/core_warehouse/fct_student_assessment.sql b/models/core_warehouse/fct_student_assessment.sql index eab01f49..3b173f95 100644 --- a/models/core_warehouse/fct_student_assessment.sql +++ b/models/core_warehouse/fct_student_assessment.sql @@ -1,3 +1,5 @@ +-- depends_on: {{ ref('xwalk_assessment_score_values') }} +-- depends_on: {{ ref('xwalk_assessment_score_value_thresholds') }} {{ config( post_hook=[ @@ -56,9 +58,12 @@ student_assessments_wide as ( agg='max', quote_identifiers=False ) }}, + {#- find distinct score names that are in one of the normalize_result xwalks (distinct scores to add normalized_ column for) -#} + {% set normalized_names_values = dbt_utils.get_column_values(ref('xwalk_assessment_score_values'), 'normalized_score_name') %} + {% set normalized_names_thresholds = dbt_utils.get_column_values(ref('xwalk_assessment_score_value_thresholds'), 'normalized_score_name') or [] %} {{ dbt_utils.pivot( 'normalized_score_name', - dbt_utils.get_column_values(ref('xwalk_assessment_scores'), 'normalized_score_name'), + (score_values_names + score_value_threshold_names) | unique, then_value='normalized_score_result', else_value='NULL', prefix='normalized_', From 0b500982d0df153785addab9052b27c07213209d Mon Sep 17 00:00:00 2001 From: rlittle08 Date: Wed, 12 Jul 2023 14:04:20 -0500 Subject: [PATCH 4/5] remove seasons from here, add todo comment --- .../bld_ef3__student_assessments_long_results.sql | 1 + models/core_warehouse/fct_student_assessment.sql | 13 ++----------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql b/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql index 24a3648b..ae9da382 100644 --- a/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql +++ b/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql @@ -69,6 +69,7 @@ merged_xwalk as ( -- todo review my use of try_to_numeric here -- the idea is to allow numeric values to merge, otherwise don't merge without error and try_to_numeric(dedupe_results.score_result) >= xwalk_score_value_thresholds.lower_bound and try_to_numeric(dedupe_results.score_result) <= xwalk_score_value_thresholds.upper_bound + -- todo in future, may need to include subject & grade level in this join (with options to join across subjects) ) select * from merged_xwalk \ No newline at end of file diff --git a/models/core_warehouse/fct_student_assessment.sql b/models/core_warehouse/fct_student_assessment.sql index 3b173f95..adac44cd 100644 --- a/models/core_warehouse/fct_student_assessment.sql +++ b/models/core_warehouse/fct_student_assessment.sql @@ -16,9 +16,6 @@ with student_assessments_long_results as ( student_assessments as ( select * from {{ ref('stg_ef3__student_assessments') }} ), -xwalk_assessment_seasons as ( - select * from {{ ref('xwalk_assessment_seasons') }} -), object_agg_other_results as ( select k_student_assessment, @@ -38,8 +35,6 @@ student_assessments_wide as ( student_assessments.school_year, student_assessments.administration_date, student_assessments.administration_end_date, - xwalk_assessment_seasons.season_name as administration_season, - xwalk_assessment_seasons.season_num as administration_season_num, event_description, administration_environment, administration_language, @@ -63,7 +58,7 @@ student_assessments_wide as ( {% set normalized_names_thresholds = dbt_utils.get_column_values(ref('xwalk_assessment_score_value_thresholds'), 'normalized_score_name') or [] %} {{ dbt_utils.pivot( 'normalized_score_name', - (score_values_names + score_value_threshold_names) | unique, + (normalized_names_values + normalized_names_thresholds) | unique, then_value='normalized_score_result', else_value='NULL', prefix='normalized_', @@ -77,11 +72,7 @@ student_assessments_wide as ( and student_assessments_long_results.normalized_score_name != 'other' left join object_agg_other_results on student_assessments.k_student_assessment = object_agg_other_results.k_student_assessment - left join xwalk_assessment_seasons - on student_assessments.school_year = xwalk_assessment_seasons.school_year - and student_assessments.administration_date >= xwalk_assessment_seasons.start_date - and student_assessments.administration_date <= xwalk_assessment_seasons.end_date - {{ dbt_utils.group_by(n=20) }} + {{ dbt_utils.group_by(n=18) }} ) select * from student_assessments_wide From 6aa0dc7945f7035adce8cecb2d455c5e3a8a21b6 Mon Sep 17 00:00:00 2001 From: rlittle08 Date: Wed, 12 Jul 2023 15:14:30 -0500 Subject: [PATCH 5/5] add normalized results to stu obj assess --- ..._ef3__student_assessments_long_results.sql | 1 - ...ent_objective_assessments_long_results.sql | 28 +++++++++++++++++-- .../core_warehouse/fct_student_assessment.sql | 2 +- .../fct_student_objective_assessment.sql | 14 ++++++++++ 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql b/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql index ae9da382..0a19af26 100644 --- a/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql +++ b/models/build/edfi_3/assessments/bld_ef3__student_assessments_long_results.sql @@ -45,7 +45,6 @@ merged_xwalk as ( dedupe_results.k_student_assessment, dedupe_results.score_name as original_score_name, coalesce(xwalk_scores.normalized_score_name, 'other') as normalized_score_name, - -- todo should we rename this to original_? dedupe_results.score_result, coalesce(xwalk_score_value_thresholds.normalized_score_result::varchar, xwalk_score_values.normalized_score_result::varchar, diff --git a/models/build/edfi_3/assessments/bld_ef3__student_objective_assessments_long_results.sql b/models/build/edfi_3/assessments/bld_ef3__student_objective_assessments_long_results.sql index 9061582f..c155a91c 100644 --- a/models/build/edfi_3/assessments/bld_ef3__student_objective_assessments_long_results.sql +++ b/models/build/edfi_3/assessments/bld_ef3__student_objective_assessments_long_results.sql @@ -4,6 +4,12 @@ with score_results as ( xwalk_scores as ( select * from {{ ref('xwalk_objective_assessment_scores') }} ), +xwalk_score_values as ( + select * from {{ ref('xwalk_assessment_score_values') }} +), +xwalk_score_value_thresholds as ( + select * from {{ ref('xwalk_assessment_score_value_thresholds') }} +), performance_levels as ( select tenant_code, @@ -39,13 +45,31 @@ merged_xwalk as ( api_year, k_student_objective_assessment, score_name as original_score_name, - coalesce(normalized_score_name, 'other') as normalized_score_name, - score_result + coalesce(xwalk_scores.normalized_score_name, 'other') as normalized_score_name, + score_result, + coalesce(xwalk_score_value_thresholds.normalized_score_result::varchar, + xwalk_score_values.normalized_score_result::varchar, + score_result::varchar + ) as normalized_score_result from dedupe_results left join xwalk_scores on dedupe_results.assessment_identifier = xwalk_scores.assessment_identifier and dedupe_results.namespace = xwalk_scores.namespace and dedupe_results.objective_assessment_identification_code = xwalk_scores.objective_assessment_identification_code and dedupe_results.score_name = xwalk_scores.original_score_name + left join xwalk_score_values + on dedupe_results.assessment_identifier = xwalk_score_values.assessment_identifier + and dedupe_results.namespace = xwalk_score_values.namespace + and xwalk_scores.normalized_score_name = xwalk_score_values.normalized_score_name + and dedupe_results.score_result = xwalk_score_values.original_score_result + left join xwalk_score_value_thresholds + on dedupe_results.assessment_identifier = xwalk_score_value_thresholds.assessment_identifier + and dedupe_results.namespace = xwalk_score_value_thresholds.namespace + and xwalk_scores.normalized_score_name = xwalk_score_value_thresholds.normalized_score_name + -- todo check these comparators -- what if there's a value between the upper and next lower? eg value is 20.4 and the cutoffs are 20 and 21 + -- todo review my use of try_to_numeric here -- the idea is to allow numeric values to merge, otherwise don't merge without error + and try_to_numeric(dedupe_results.score_result) >= xwalk_score_value_thresholds.lower_bound + and try_to_numeric(dedupe_results.score_result) <= xwalk_score_value_thresholds.upper_bound + -- todo in future, may need to include subject & grade level in this join (with options to join across subjects) ) select * from merged_xwalk \ No newline at end of file diff --git a/models/core_warehouse/fct_student_assessment.sql b/models/core_warehouse/fct_student_assessment.sql index adac44cd..eb8ebb53 100644 --- a/models/core_warehouse/fct_student_assessment.sql +++ b/models/core_warehouse/fct_student_assessment.sql @@ -54,7 +54,7 @@ student_assessments_wide as ( quote_identifiers=False ) }}, {#- find distinct score names that are in one of the normalize_result xwalks (distinct scores to add normalized_ column for) -#} - {% set normalized_names_values = dbt_utils.get_column_values(ref('xwalk_assessment_score_values'), 'normalized_score_name') %} + {% set normalized_names_values = dbt_utils.get_column_values(ref('xwalk_assessment_score_values'), 'normalized_score_name') or [] %} {% set normalized_names_thresholds = dbt_utils.get_column_values(ref('xwalk_assessment_score_value_thresholds'), 'normalized_score_name') or [] %} {{ dbt_utils.pivot( 'normalized_score_name', diff --git a/models/core_warehouse/fct_student_objective_assessment.sql b/models/core_warehouse/fct_student_objective_assessment.sql index 93c4611f..67c2639a 100644 --- a/models/core_warehouse/fct_student_objective_assessment.sql +++ b/models/core_warehouse/fct_student_objective_assessment.sql @@ -1,3 +1,5 @@ +-- depends_on: {{ ref('xwalk_objective_assessment_score_values') }} +-- depends_on: {{ ref('xwalk_objective_assessment_score_value_thresholds') }} {{ config( post_hook=[ @@ -51,6 +53,18 @@ student_obj_assessments_wide as ( else_value='NULL', agg='max', quote_identifiers=False + ) }}, + {#- find distinct score names that are in one of the normalize_result xwalks (distinct scores to add normalized_ column for) -#} + {% set normalized_names_values = dbt_utils.get_column_values(ref('xwalk_objective_assessment_score_values'), 'normalized_score_name') or [] %} + {% set normalized_names_thresholds = dbt_utils.get_column_values(ref('xwalk_objective_assessment_score_value_thresholds'), 'normalized_score_name') or [] %} + {{ dbt_utils.pivot( + 'normalized_score_name', + (normalized_names_values + normalized_names_thresholds) | unique, + then_value='normalized_score_result', + else_value='NULL', + prefix='normalized_', + agg='max', + quote_identifiers=False ) }} {%- endif %} from student_obj_assessments