diff --git a/Model/lib/dst/cellularLocalization.dst b/Model/lib/dst/cellularLocalization.dst index 824979b34e..8cd2925e37 100644 --- a/Model/lib/dst/cellularLocalization.dst +++ b/Model/lib/dst/cellularLocalization.dst @@ -125,6 +125,35 @@ WHERE av.attribute_stable_id = ag.stable_id >templateTextEnd< +[templateStart] +name=cellularLocalizationAllDataGeneTableSql +anchorFile=ApiCommonModel/Model/lib/wdk/model/records/geneTableQueries.xml +prop=datasetName +prop=edaStudyStableId +prop=edaEntityAbbrev +prop=datasetDisplayName +prop=shortAttribution +>templateTextStart< +UNION +SELECT genes.string_value AS gene, + '${datasetName}' AS dataset_presenter_id, + '${datasetDisplayName}' AS dataset_presenter_display_name, + '${shortAttribution}' AS short_attribution, + ag.display_name AS variable, + coalesce(av.string_value, coalesce(round(av.number_value::numeric, 4)::text, av.date_value::text)) AS value +FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av, + eda.attributegraph_${edaStudyStableId}_${edaEntityAbbrev} ag, + (SELECT av.${edaEntityAbbrev}_stable_id, MIN(gi.gene) as string_value + FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av + JOIN apidbtuning.GeneId gi ON gi.id = av.string_value + WHERE av.attribute_stable_id = 'VEUPATHDB_GENE_ID' + GROUP BY av.${edaEntityAbbrev}_stable_id) genes +WHERE av.attribute_stable_id = ag.stable_id + AND av.${edaEntityAbbrev}_stable_id = genes.${edaEntityAbbrev}_stable_id + AND av.attribute_stable_id != 'VEUPATHDB_GENE_ID' +>templateTextEnd< + + [templateStart] name=cellularLocalizationEdaAttributeQueriesNumeric anchorFile=ApiCommonModel/Model/lib/wdk/model/records/transcriptAttributeQueries.xml diff --git a/Model/lib/dst/phenotype.dst b/Model/lib/dst/phenotype.dst index 9484b70642..cbfa36560d 100644 --- a/Model/lib/dst/phenotype.dst +++ b/Model/lib/dst/phenotype.dst @@ -110,6 +110,35 @@ WHERE av.attribute_stable_id = ag.stable_id >templateTextEnd< +[templateStart] +name=phenotypeAllDataGeneTableSql +anchorFile=ApiCommonModel/Model/lib/wdk/model/records/geneTableQueries.xml +prop=datasetName +prop=edaStudyStableId +prop=edaEntityAbbrev +prop=datasetDisplayName +prop=shortAttribution +>templateTextStart< +UNION +SELECT genes.string_value AS gene, + '${datasetName}' AS dataset_presenter_id, + '${datasetDisplayName}' AS dataset_presenter_display_name, + '${shortAttribution}' AS short_attribution, + ag.display_name AS variable, + coalesce(av.string_value, coalesce(round(av.number_value::numeric, 4)::text, av.date_value::text)) AS value +FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av, + eda.attributegraph_${edaStudyStableId}_${edaEntityAbbrev} ag, + (SELECT av.${edaEntityAbbrev}_stable_id, MIN(gi.gene) as string_value + FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av + JOIN apidbtuning.GeneId gi ON gi.id = av.string_value + WHERE av.attribute_stable_id = 'VEUPATHDB_GENE_ID' + GROUP BY av.${edaEntityAbbrev}_stable_id) genes +WHERE av.attribute_stable_id = ag.stable_id + AND av.${edaEntityAbbrev}_stable_id = genes.${edaEntityAbbrev}_stable_id + AND av.attribute_stable_id != 'VEUPATHDB_GENE_ID' +>templateTextEnd< + + [templateStart] name=phenotypeEdaAttributeQueriesNumeric anchorFile=ApiCommonModel/Model/lib/wdk/model/records/transcriptAttributeQueries.xml diff --git a/Model/lib/wdk/model/records/geneRecord.xml b/Model/lib/wdk/model/records/geneRecord.xml index f3ed52f795..0894129ff4 100644 --- a/Model/lib/wdk/model/records/geneRecord.xml +++ b/Model/lib/wdk/model/records/geneRecord.xml @@ -813,6 +813,48 @@ + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Model/lib/wdk/ontology/individuals.txt b/Model/lib/wdk/ontology/individuals.txt index 2fee398d65..bfa8746dc3 100644 --- a/Model/lib/wdk/ontology/individuals.txt +++ b/Model/lib/wdk/ontology/individuals.txt @@ -222,6 +222,7 @@ GeneRecordClasses.GeneRecordClass.Antibody http://edamontology.org/topic_0121 Pr GeneRecordClasses.GeneRecordClass.CommunityExpComments http://edamontology.org/topic_0219 Curation and Annotation GeneRecordClasses.GeneRecordClass table CommunityExpComments gene record download GeneRecordClasses.GeneRecordClass.EdaCellularLocalizationDatasets http://edamontology.org/topic_0140 Protein targeting and localization GeneRecordClasses.GeneRecordClass table EdaCellularLocalizationDatasets gene record GeneRecordClasses.GeneRecordClass.EdaCellularLocalizationGraphsDataTable http://edamontology.org/topic_0140 Protein targeting and localization GeneRecordClasses.GeneRecordClass table EdaCellularLocalizationGraphsDataTable gene record-internal download +GeneRecordClasses.GeneRecordClass.EdaCellularLocalizationDataTable http://edamontology.org/topic_0140 Protein targeting and localization GeneRecordClasses.GeneRecordClass table EdaCellularLocalizationDataTable gene record download GeneRecordClasses.GeneRecordClass.Products http://edamontology.org/topic_0219 Curation and Annotation GeneRecordClasses.GeneRecordClass table Products transcript record download GeneRecordClasses.GeneRecordClass.SNPsAlignment http://edamontology.org/topic_0199 Genetic Variation GeneRecordClasses.GeneRecordClass table SNPsAlignment gene record GeneRecordClasses.GeneRecordClass.MercatorTable http://edamontology.org/topic_3299 EvoBiol Comparison GeneRecordClasses.GeneRecordClass table MercatorTable gene record @@ -262,6 +263,7 @@ GeneRecordClasses.GeneRecordClass.eQTLPhenotypeGraphs http://edamontology.org/to GeneRecordClasses.GeneRecordClass.PhenotypeGraphs http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table PhenotypeGraphs gene record GeneRecordClasses.GeneRecordClass.PhenotypeScoreGraphsDataTable http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table PhenotypeScoreGraphsDataTable gene record-internal GeneRecordClasses.GeneRecordClass.EdaPhenotypeGraphsDataTable http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table EdaPhenotypeGraphsDataTable gene record-internal download +GeneRecordClasses.GeneRecordClass.EdaPhenotypeDataTable http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table EdaPhenotypeDataTable gene record download GeneRecordClasses.GeneRecordClass.HostResponseGraphsDataTable http://edamontology.org/topic_0804 Immunology GeneRecordClasses.GeneRecordClass table HostResponseGraphsDataTable gene record-internal download GeneRecordClasses.GeneRecordClass.RodMalPhenotype http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table RodMalPhenotype transcript record download GeneRecordClasses.GeneRecordClass.Plasmo_eQTL_Table http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table Plasmo_eQTL_Table transcript record download diff --git a/docs/superpowers/plans/2026-05-19-eda-all-data-tables.md b/docs/superpowers/plans/2026-05-19-eda-all-data-tables.md new file mode 100644 index 0000000000..65360b2d12 --- /dev/null +++ b/docs/superpowers/plans/2026-05-19-eda-all-data-tables.md @@ -0,0 +1,745 @@ +# EDA All-Data Tables Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add `EdaPhenotypeDataTable` and `EdaCellularLocalizationDataTable` WDK data tables to the gene record page, exposing all EDA variable/value data for each category across all datasets (not just graph-backed ones), with a linked dataset name, attribution, variable, and value columns. + +**Architecture:** Two new TEMPLATE_ANCHOR-backed WDK sqlQuery elements in `geneTableQueries.xml` drive the data; two new `.dst` templates generate per-dataset UNION SQL; two new table definitions in `geneRecord.xml` expose the data with a `linkAttribute` for the dataset name; ontology entries in `individuals.txt` categorize the tables; and two Java dataset injector classes are updated to wire the templates and model references. + +**Tech Stack:** WDK XML config, PostgreSQL SQL, Java (dataset injectors), tab-delimited ontology file. + +--- + +## File Map + +| File | Action | Responsibility | +|---|---|---| +| `Model/lib/dst/phenotype.dst` | Modify | Add `phenotypeAllDataGeneTableSql` template | +| `Model/lib/dst/cellularLocalization.dst` | Modify | Add `cellularLocalizationAllDataGeneTableSql` template | +| `Model/lib/wdk/model/records/geneTableQueries.xml` | Modify | Add `EdaPhenotypeDataTable` and `EdaCellularLocalizationDataTable` sqlQuery elements | +| `Model/lib/wdk/model/records/geneRecord.xml` | Modify | Add table definitions with `linkAttribute` for both new tables | +| `Model/lib/wdk/ontology/individuals.txt` | Modify | Add ontology entries for both new tables (`record` + `download` scope) | +| `EbrcModelCommon/Model/src/main/java/org/apidb/apicommon/model/datasetInjector/PhenotypeEDAStudy.java` | Modify | Inject `phenotypeAllDataGeneTableSql` + add model reference | +| `EbrcModelCommon/Model/src/main/java/org/apidb/apicommon/model/datasetInjector/CellularLocalizationEDAStudy.java` | Modify | Inject `cellularLocalizationAllDataGeneTableSql` + add model reference | + +> **Note:** Tasks 1–7 are in the `ApiCommonModel` worktree at: +> `/home/jbrestel/workspaces/dataLoad/project_home/ApiCommonModel/.claude/worktrees/eda-data-tables` +> +> Tasks 8–9 are in the separate `EbrcModelCommon` repo at: +> `/home/jbrestel/workspaces/dataLoad/project_home/EbrcModelCommon` +> These should be done on a feature branch in that repo. + +--- + +### Task 1: Add `phenotypeAllDataGeneTableSql` template to phenotype.dst + +**Files:** +- Modify: `Model/lib/dst/phenotype.dst` (after line 110, after `phenotypeDataTableGeneTableSql` templateTextEnd) + +- [ ] **Step 1: Insert the new template block** + +In `Model/lib/dst/phenotype.dst`, find the exact text after the `phenotypeDataTableGeneTableSql` template ends: + +``` + AND av.attribute_stable_id != 'VEUPATHDB_GENE_ID' +>templateTextEnd< + + +[templateStart] +name=phenotypeEdaAttributeQueriesNumeric +``` + +Replace with: + +``` + AND av.attribute_stable_id != 'VEUPATHDB_GENE_ID' +>templateTextEnd< + + +[templateStart] +name=phenotypeAllDataGeneTableSql +anchorFile=ApiCommonModel/Model/lib/wdk/model/records/geneTableQueries.xml +prop=datasetName +prop=edaStudyStableId +prop=edaEntityAbbrev +prop=datasetDisplayName +prop=shortAttribution +>templateTextStart< +UNION +SELECT genes.string_value AS gene, + '${datasetName}' AS dataset_presenter_id, + '${datasetDisplayName}' AS dataset_presenter_display_name, + '${shortAttribution}' AS short_attribution, + ag.display_name AS variable, + coalesce(av.string_value, coalesce(round(av.number_value::numeric, 4)::text, av.date_value::text)) AS value +FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av, + eda.attributegraph_${edaStudyStableId}_${edaEntityAbbrev} ag, + (SELECT av.${edaEntityAbbrev}_stable_id, MIN(gi.gene) as string_value + FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av + JOIN apidbtuning.GeneId gi ON gi.id = av.string_value + WHERE av.attribute_stable_id = 'VEUPATHDB_GENE_ID' + GROUP BY av.${edaEntityAbbrev}_stable_id) genes +WHERE av.attribute_stable_id = ag.stable_id + AND av.${edaEntityAbbrev}_stable_id = genes.${edaEntityAbbrev}_stable_id + AND av.attribute_stable_id != 'VEUPATHDB_GENE_ID' +>templateTextEnd< + + +[templateStart] +name=phenotypeEdaAttributeQueriesNumeric +``` + +- [ ] **Step 2: Verify the file is structurally intact** + +```bash +grep -c "templateStart" Model/lib/dst/phenotype.dst +``` + +Expected: one more `templateStart` than before the edit (count was 8 before, now 9). + +```bash +grep -n "phenotypeAllDataGeneTableSql" Model/lib/dst/phenotype.dst +``` + +Expected: two lines — one with `name=` and one with `anchorFile=`. + +- [ ] **Step 3: Commit** + +```bash +git add Model/lib/dst/phenotype.dst +git commit -m "feat: add phenotypeAllDataGeneTableSql template to phenotype.dst" +``` + +--- + +### Task 2: Add `cellularLocalizationAllDataGeneTableSql` template to cellularLocalization.dst + +**Files:** +- Modify: `Model/lib/dst/cellularLocalization.dst` (after line 125, after `cellularLocalizationDataTableGeneTableSql` templateTextEnd) + +- [ ] **Step 1: Insert the new template block** + +In `Model/lib/dst/cellularLocalization.dst`, find: + +``` + AND av.attribute_stable_id != 'VEUPATHDB_GENE_ID' +>templateTextEnd< + + +[templateStart] +name=cellularLocalizationEdaAttributeQueriesNumeric +``` + +Replace with: + +``` + AND av.attribute_stable_id != 'VEUPATHDB_GENE_ID' +>templateTextEnd< + + +[templateStart] +name=cellularLocalizationAllDataGeneTableSql +anchorFile=ApiCommonModel/Model/lib/wdk/model/records/geneTableQueries.xml +prop=datasetName +prop=edaStudyStableId +prop=edaEntityAbbrev +prop=datasetDisplayName +prop=shortAttribution +>templateTextStart< +UNION +SELECT genes.string_value AS gene, + '${datasetName}' AS dataset_presenter_id, + '${datasetDisplayName}' AS dataset_presenter_display_name, + '${shortAttribution}' AS short_attribution, + ag.display_name AS variable, + coalesce(av.string_value, coalesce(round(av.number_value::numeric, 4)::text, av.date_value::text)) AS value +FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av, + eda.attributegraph_${edaStudyStableId}_${edaEntityAbbrev} ag, + (SELECT av.${edaEntityAbbrev}_stable_id, MIN(gi.gene) as string_value + FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av + JOIN apidbtuning.GeneId gi ON gi.id = av.string_value + WHERE av.attribute_stable_id = 'VEUPATHDB_GENE_ID' + GROUP BY av.${edaEntityAbbrev}_stable_id) genes +WHERE av.attribute_stable_id = ag.stable_id + AND av.${edaEntityAbbrev}_stable_id = genes.${edaEntityAbbrev}_stable_id + AND av.attribute_stable_id != 'VEUPATHDB_GENE_ID' +>templateTextEnd< + + +[templateStart] +name=cellularLocalizationEdaAttributeQueriesNumeric +``` + +- [ ] **Step 2: Verify the file is structurally intact** + +```bash +grep -c "templateStart" Model/lib/dst/cellularLocalization.dst +``` + +Expected: one more than before (was 8, now 9). + +```bash +grep -n "cellularLocalizationAllDataGeneTableSql" Model/lib/dst/cellularLocalization.dst +``` + +Expected: two lines. + +- [ ] **Step 3: Commit** + +```bash +git add Model/lib/dst/cellularLocalization.dst +git commit -m "feat: add cellularLocalizationAllDataGeneTableSql template to cellularLocalization.dst" +``` + +--- + +### Task 3: Add `EdaPhenotypeDataTable` sqlQuery to geneTableQueries.xml + +**Files:** +- Modify: `Model/lib/wdk/model/records/geneTableQueries.xml` (after line 4599, after `EdaCellularLocalizationGraphsDataTable` query) + +- [ ] **Step 1: Insert the new sqlQuery element** + +In `Model/lib/wdk/model/records/geneTableQueries.xml`, find the closing tag of `EdaCellularLocalizationGraphsDataTable` (around line 4599): + +```xml + +``` + +Find the text immediately after that query ends: + +```xml + AND ga.org_abbrev IN (%%PARTITION_KEYS%%) + ]]> + + + + + + + + + + + + + + + + + + + + + + + + &1 +``` + +Expected: no output (no errors). + +- [ ] **Step 3: Verify the new query is present** + +```bash +grep -n "EdaPhenotypeDataTable\|phenotypeAllDataGeneTableSql" Model/lib/wdk/model/records/geneTableQueries.xml +``` + +Expected: lines showing the new `sqlQuery name` and the `TEMPLATE_ANCHOR` comment. + +- [ ] **Step 4: Commit** + +```bash +git add Model/lib/wdk/model/records/geneTableQueries.xml +git commit -m "feat: add EdaPhenotypeDataTable sqlQuery to geneTableQueries.xml" +``` + +--- + +### Task 4: Add `EdaCellularLocalizationDataTable` sqlQuery to geneTableQueries.xml + +**Files:** +- Modify: `Model/lib/wdk/model/records/geneTableQueries.xml` (immediately after the `EdaPhenotypeDataTable` query added in Task 3) + +- [ ] **Step 1: Insert the new sqlQuery element** + +In `Model/lib/wdk/model/records/geneTableQueries.xml`, find the closing of `EdaPhenotypeDataTable` (the text added in Task 3): + +```xml +ORDER BY gd.dataset_presenter_display_name, gd.variable, gd.value + ]]> + + + + + + + + + + + + + + + + + + + + + + + + &1 +``` + +Expected: no output. + +- [ ] **Step 3: Verify both new queries are present** + +```bash +grep -n "EdaPhenotypeDataTable\|EdaCellularLocalizationDataTable" Model/lib/wdk/model/records/geneTableQueries.xml +``` + +Expected: 4 lines total — one `sqlQuery name` and one `TEMPLATE_ANCHOR` for each. + +- [ ] **Step 4: Commit** + +```bash +git add Model/lib/wdk/model/records/geneTableQueries.xml +git commit -m "feat: add EdaCellularLocalizationDataTable sqlQuery to geneTableQueries.xml" +``` + +--- + +### Task 5: Add `EdaPhenotypeDataTable` table definition to geneRecord.xml + +**Files:** +- Modify: `Model/lib/wdk/model/records/geneRecord.xml` (after line 813, after `EdaCellularLocalizationGraphsDataTable` table) + +- [ ] **Step 1: Insert the new table definition** + +In `Model/lib/wdk/model/records/geneRecord.xml`, find the closing of `EdaCellularLocalizationGraphsDataTable`: + +```xml +
+ + + +
+ + + + + + +
+ + + + + + + + + + + + + + + + +
+ + + &1 +``` + +Expected: no output. + +- [ ] **Step 3: Verify the table is present** + +```bash +grep -n "EdaPhenotypeDataTable" Model/lib/wdk/model/records/geneRecord.xml +``` + +Expected: lines for `table name`, `queryRef`, and column/link attributes. + +- [ ] **Step 4: Commit** + +```bash +git add Model/lib/wdk/model/records/geneRecord.xml +git commit -m "feat: add EdaPhenotypeDataTable table definition to geneRecord.xml" +``` + +--- + +### Task 6: Add `EdaCellularLocalizationDataTable` table definition to geneRecord.xml + +**Files:** +- Modify: `Model/lib/wdk/model/records/geneRecord.xml` (immediately after `EdaPhenotypeDataTable` added in Task 5) + +- [ ] **Step 1: Insert the new table definition** + +In `Model/lib/wdk/model/records/geneRecord.xml`, find the closing of `EdaPhenotypeDataTable` (added in Task 5): + +```xml + +
+ + + +
+ + + + + + + + + + + + + + + + +
+ + + &1 +``` + +Expected: no output. + +- [ ] **Step 3: Verify both new tables are present** + +```bash +grep -n "EdaPhenotypeDataTable\|EdaCellularLocalizationDataTable" Model/lib/wdk/model/records/geneRecord.xml +``` + +Expected: multiple lines for each — `table name`, `queryRef`, `linkAttribute`. + +- [ ] **Step 4: Commit** + +```bash +git add Model/lib/wdk/model/records/geneRecord.xml +git commit -m "feat: add EdaCellularLocalizationDataTable table definition to geneRecord.xml" +``` + +--- + +### Task 7: Add ontology entries to individuals.txt + +**Files:** +- Modify: `Model/lib/wdk/ontology/individuals.txt` + +The file uses tab-separated columns. The column layout is: +``` + gene [] +``` + +- [ ] **Step 1: Add the cellular localization entry after line 224** + +In `Model/lib/wdk/ontology/individuals.txt`, find the line for `EdaCellularLocalizationGraphsDataTable` (line 224): + +``` +GeneRecordClasses.GeneRecordClass.EdaCellularLocalizationGraphsDataTable http://edamontology.org/topic_0140 Protein targeting and localization GeneRecordClasses.GeneRecordClass table EdaCellularLocalizationGraphsDataTable gene record-internal download +``` + +Replace with (adding the new entry immediately after): + +``` +GeneRecordClasses.GeneRecordClass.EdaCellularLocalizationGraphsDataTable http://edamontology.org/topic_0140 Protein targeting and localization GeneRecordClasses.GeneRecordClass table EdaCellularLocalizationGraphsDataTable gene record-internal download +GeneRecordClasses.GeneRecordClass.EdaCellularLocalizationDataTable http://edamontology.org/topic_0140 Protein targeting and localization GeneRecordClasses.GeneRecordClass table EdaCellularLocalizationDataTable gene record download +``` + +**Important:** The separator between columns is a literal tab character (`\t`), not spaces. Use your editor's tab insertion, not spaces. + +- [ ] **Step 2: Add the phenotype entry after the `EdaPhenotypeGraphsDataTable` line** + +In `Model/lib/wdk/ontology/individuals.txt`, find the line for `EdaPhenotypeGraphsDataTable` (now around line 265 after the previous insert): + +``` +GeneRecordClasses.GeneRecordClass.EdaPhenotypeGraphsDataTable http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table EdaPhenotypeGraphsDataTable gene record-internal download +``` + +Replace with: + +``` +GeneRecordClasses.GeneRecordClass.EdaPhenotypeGraphsDataTable http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table EdaPhenotypeGraphsDataTable gene record-internal download +GeneRecordClasses.GeneRecordClass.EdaPhenotypeDataTable http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table EdaPhenotypeDataTable gene record download +``` + +- [ ] **Step 3: Verify both entries are present and use tabs** + +```bash +grep "EdaPhenotypeDataTable\|EdaCellularLocalizationDataTable" Model/lib/wdk/ontology/individuals.txt | cat -A | head -5 +``` + +Expected: both lines appear, each column separated by `^I` (tab character), ending with `record^Idownload$`. + +- [ ] **Step 4: Commit** + +```bash +git add Model/lib/wdk/ontology/individuals.txt +git commit -m "feat: add ontology entries for EdaPhenotypeDataTable and EdaCellularLocalizationDataTable" +``` + +--- + +### Task 8: Update `PhenotypeEDAStudy.java` in EbrcModelCommon + +**Files:** +- Modify: `EbrcModelCommon/Model/src/main/java/org/apidb/apicommon/model/datasetInjector/PhenotypeEDAStudy.java` + +Full path: `/home/jbrestel/workspaces/dataLoad/project_home/EbrcModelCommon/Model/src/main/java/org/apidb/apicommon/model/datasetInjector/PhenotypeEDAStudy.java` + +> **Note:** This file is in a different repo (`EbrcModelCommon`). Create a feature branch before editing: +> ```bash +> cd /home/jbrestel/workspaces/dataLoad/project_home/EbrcModelCommon +> git checkout -b eda-all-data-tables +> ``` + +- [ ] **Step 1: Add template injection in `injectTemplates()`** + +Find: + +```java + injectTemplate("phenotypeDataTableGeneTableSql"); + injectTemplate("phenotypeEdaAttributeQueriesNumeric"); +``` + +Replace with: + +```java + injectTemplate("phenotypeDataTableGeneTableSql"); + injectTemplate("phenotypeAllDataGeneTableSql"); + injectTemplate("phenotypeEdaAttributeQueriesNumeric"); +``` + +- [ ] **Step 2: Add model reference in `addModelReferences()`** + +Find: + +```java + addWdkReference("GeneRecordClasses.GeneRecordClass", "table", "EdaPhenotypeGraphsDataTable"); + } +``` + +Replace with: + +```java + addWdkReference("GeneRecordClasses.GeneRecordClass", "table", "EdaPhenotypeGraphsDataTable"); + addWdkReference("GeneRecordClasses.GeneRecordClass", "table", "EdaPhenotypeDataTable"); + } +``` + +- [ ] **Step 3: Verify the file compiles** + +```bash +cd /home/jbrestel/workspaces/dataLoad/project_home/EbrcModelCommon +mvn compile -pl Model -am -q 2>&1 | tail -20 +``` + +Expected: `BUILD SUCCESS` with no errors. + +- [ ] **Step 4: Commit in EbrcModelCommon** + +```bash +cd /home/jbrestel/workspaces/dataLoad/project_home/EbrcModelCommon +git add Model/src/main/java/org/apidb/apicommon/model/datasetInjector/PhenotypeEDAStudy.java +git commit -m "feat: inject phenotypeAllDataGeneTableSql and add EdaPhenotypeDataTable model reference" +``` + +--- + +### Task 9: Update `CellularLocalizationEDAStudy.java` in EbrcModelCommon + +**Files:** +- Modify: `EbrcModelCommon/Model/src/main/java/org/apidb/apicommon/model/datasetInjector/CellularLocalizationEDAStudy.java` + +Full path: `/home/jbrestel/workspaces/dataLoad/project_home/EbrcModelCommon/Model/src/main/java/org/apidb/apicommon/model/datasetInjector/CellularLocalizationEDAStudy.java` + +> **Note:** Should be on the `eda-all-data-tables` branch created in Task 8. + +- [ ] **Step 1: Add template injection in `injectTemplates()`** + +Find: + +```java + injectTemplate("cellularLocalizationDataTableGeneTableSql"); + injectTemplate("cellularLocalizationEdaAttributeQueriesNumeric"); +``` + +Replace with: + +```java + injectTemplate("cellularLocalizationDataTableGeneTableSql"); + injectTemplate("cellularLocalizationAllDataGeneTableSql"); + injectTemplate("cellularLocalizationEdaAttributeQueriesNumeric"); +``` + +- [ ] **Step 2: Add model reference in `addModelReferences()`** + +Find: + +```java + addWdkReference("GeneRecordClasses.GeneRecordClass", "table", "EdaCellularLocalizationGraphsDataTable"); + } +``` + +Replace with: + +```java + addWdkReference("GeneRecordClasses.GeneRecordClass", "table", "EdaCellularLocalizationGraphsDataTable"); + addWdkReference("GeneRecordClasses.GeneRecordClass", "table", "EdaCellularLocalizationDataTable"); + } +``` + +- [ ] **Step 3: Verify the file compiles** + +```bash +cd /home/jbrestel/workspaces/dataLoad/project_home/EbrcModelCommon +mvn compile -pl Model -am -q 2>&1 | tail -20 +``` + +Expected: `BUILD SUCCESS` with no errors. + +- [ ] **Step 4: Commit in EbrcModelCommon** + +```bash +cd /home/jbrestel/workspaces/dataLoad/project_home/EbrcModelCommon +git add Model/src/main/java/org/apidb/apicommon/model/datasetInjector/CellularLocalizationEDAStudy.java +git commit -m "feat: inject cellularLocalizationAllDataGeneTableSql and add EdaCellularLocalizationDataTable model reference" +``` + +--- + +## Functional Testing (post-deploy) + +After deploying both repos to a dev instance: + +1. Navigate to a gene record page in ToxoDB for a gene known to have phenotype data (e.g., TGGT1_248070 in T. gondii GT1). +2. Verify the **Phenotype Data** table appears in the Phenotype section of the gene record. +3. Confirm rows show: a clickable dataset name (linking to the dataset page), attribution, variable, and value. +4. Confirm rows are sorted by dataset name → variable → value. +5. Repeat for a TriTrypDB gene with LOPIT data to verify **Cellular Localization Data** table. +6. Verify the tables appear in gene downloads (check download tool for both tables). +7. Spot-check that existing **Phenotype Graphs Data Table** and **Cellular Localization Graphs Data Table** are unaffected. diff --git a/docs/superpowers/specs/2026-05-19-eda-all-data-tables-design.md b/docs/superpowers/specs/2026-05-19-eda-all-data-tables-design.md new file mode 100644 index 0000000000..582dafb5ad --- /dev/null +++ b/docs/superpowers/specs/2026-05-19-eda-all-data-tables-design.md @@ -0,0 +1,193 @@ +# EDA All-Data Tables: EdaPhenotypeDataTable & EdaCellularLocalizationDataTable + +## Overview + +Add two new WDK data tables to the gene record page — one for all phenotype EDA variable/value data and one for all cellular localization EDA variable/value data. These tables cover all datasets in their respective categories, including datasets that lack graph configuration. They are intentionally redundant with the existing `EdaPhenotypeGraphsDataTable` and `EdaCellularLocalizationGraphsDataTable` sub-tables, which back graph rendering and are untouched. + +Each table exposes: a linked dataset name (to the dataset record page), short attribution, variable, and value. Rows are sorted by dataset display name → variable → value. + +## Motivation + +Additional EDA datasets exist for both phenotype and cellular localization that have variable/value data but are not suitable for graphs. The existing graph data sub-tables only cover graph-backed datasets. A single unified "all data" table per category is preferred over a separate table per dataset (the Rodent Malaria anti-pattern). + +## Files Changed (7 total) + +| File | Change | +|---|---| +| `Model/lib/dst/phenotype.dst` | Add `phenotypeAllDataGeneTableSql` template | +| `Model/lib/dst/cellularLocalization.dst` | Add `cellularLocalizationAllDataGeneTableSql` template | +| `Model/lib/wdk/model/records/geneTableQueries.xml` | Add `EdaPhenotypeDataTable` and `EdaCellularLocalizationDataTable` sqlQuery elements | +| `Model/lib/wdk/model/records/geneRecord.xml` | Add `EdaPhenotypeDataTable` and `EdaCellularLocalizationDataTable` table definitions with linkAttribute | +| `Model/lib/wdk/ontology/individuals.txt` | Add ontology entries for both new tables with `record` and `download` scope | +| `EbrcModelCommon/Model/src/main/java/.../PhenotypeEDAStudy.java` | Inject new template + add model reference | +| `EbrcModelCommon/Model/src/main/java/.../CellularLocalizationEDAStudy.java` | Inject new template + add model reference | + +## Template SQL (.dst files) + +Both templates mirror the existing `phenotypeDataTableGeneTableSql` / `cellularLocalizationDataTableGeneTableSql` but add `dataset_presenter_id`, `dataset_presenter_display_name`, and `short_attribution` as literal columns from template props. The `value` coalesce is applied in the template (not the outer query) so the raw string/number/date columns are not exposed. + +### phenotype.dst — new template + +``` +[templateStart] +name=phenotypeAllDataGeneTableSql +anchorFile=ApiCommonModel/Model/lib/wdk/model/records/geneTableQueries.xml +prop=datasetName +prop=edaStudyStableId +prop=edaEntityAbbrev +prop=datasetDisplayName +prop=shortAttribution +>templateTextStart< +UNION +SELECT genes.string_value AS gene, + '${datasetName}' AS dataset_presenter_id, + '${datasetDisplayName}' AS dataset_presenter_display_name, + '${shortAttribution}' AS short_attribution, + ag.display_name AS variable, + coalesce(av.string_value, coalesce(round(av.number_value::numeric, 4)::text, av.date_value::text)) AS value +FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av, + eda.attributegraph_${edaStudyStableId}_${edaEntityAbbrev} ag, + (SELECT av.${edaEntityAbbrev}_stable_id, MIN(gi.gene) as string_value + FROM eda.attributevalue_${edaStudyStableId}_${edaEntityAbbrev} av + JOIN apidbtuning.GeneId gi ON gi.id = av.string_value + WHERE av.attribute_stable_id = 'VEUPATHDB_GENE_ID' + GROUP BY av.${edaEntityAbbrev}_stable_id) genes +WHERE av.attribute_stable_id = ag.stable_id + AND av.${edaEntityAbbrev}_stable_id = genes.${edaEntityAbbrev}_stable_id + AND av.attribute_stable_id != 'VEUPATHDB_GENE_ID' +>templateTextEnd< + +Note: no ORDER BY in the template — it belongs on the outer query in geneTableQueries.xml. +``` + +### cellularLocalization.dst — new template + +Identical to above with name `cellularLocalizationAllDataGeneTableSql`. + +## WDK Queries (geneTableQueries.xml) + +### EdaPhenotypeDataTable + +- `includeProjects="ToxoDB,PlasmoDB,TriTrypDB,FungiDB,AmoebaDB,UniDB"` +- Includes the ME49 gene remapping CTE via `RefSynOrthologousGenes_P` (same as existing phenotype graph data table) +- TEMPLATE_ANCHOR: `phenotypeAllDataGeneTableSql` +- Columns: `source_id`, `project_id`, `dataset_presenter_id`, `dataset_presenter_display_name`, `short_attribution`, `variable`, `value` +- Stub row uses empty strings / sentinel values (matching existing pattern) + +```xml + + + + + + + + + + +``` + +### EdaCellularLocalizationDataTable + +- `includeProjects="ToxoDB,TriTrypDB,UniDB"` +- No ME49 remapping CTE (matching existing cellular localization graph data table) +- TEMPLATE_ANCHOR: `cellularLocalizationAllDataGeneTableSql` +- Same columns as phenotype table + +## WDK Table Definitions (geneRecord.xml) + +### EdaPhenotypeDataTable + +```xml +
+ + + + + + + + + +
+``` + +### EdaCellularLocalizationDataTable + +Identical structure with `includeProjects="ToxoDB,TriTrypDB,UniDB"` and `displayName="Cellular Localization Data"`. + +## Ontology (individuals.txt) + +Two new tab-delimited entries, placed near their existing counterparts (lines ~224 and ~264): + +``` +GeneRecordClasses.GeneRecordClass.EdaPhenotypeDataTable http://edamontology.org/topic_3298 phenotype GeneRecordClasses.GeneRecordClass table EdaPhenotypeDataTable gene record download + +GeneRecordClasses.GeneRecordClass.EdaCellularLocalizationDataTable http://edamontology.org/topic_0140 Protein targeting and localization GeneRecordClasses.GeneRecordClass table EdaCellularLocalizationDataTable gene record download +``` + +## Injector Changes + +### PhenotypeEDAStudy.java + +In `injectTemplates()`, add after the existing `phenotypeDataTableGeneTableSql` inject: +```java +injectTemplate("phenotypeAllDataGeneTableSql"); +``` + +In `addModelReferences()`, add: +```java +addWdkReference("GeneRecordClasses.GeneRecordClass", "table", "EdaPhenotypeDataTable"); +``` + +### CellularLocalizationEDAStudy.java + +In `injectTemplates()`, add after the existing `cellularLocalizationDataTableGeneTableSql` inject: +```java +injectTemplate("cellularLocalizationAllDataGeneTableSql"); +``` + +In `addModelReferences()`, add: +```java +addWdkReference("GeneRecordClasses.GeneRecordClass", "table", "EdaCellularLocalizationDataTable"); +``` + +## Data Source + +Datasets are identified via `apidb.datasource.type IN ('phenotype', 'cellularLocalization')`. The TEMPLATE_ANCHOR mechanism drives per-dataset SQL generation — the injectors are called for each dataset presenter that uses `PhenotypeEDAStudy` or `CellularLocalizationEDAStudy`, so only datasets wired to those injectors will contribute rows. + +## Sorting + +Sorting (dataset display name → variable → value) is applied via `ORDER BY` at the end of the outer `sqlQuery` SQL in `geneTableQueries.xml`. ORDER BY cannot appear inside a UNION piece, so it is not in the template.