Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 18 additions & 27 deletions Model/lib/psql/webready/orgSpecific/GeneProduct_p.psql
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,6 @@ create unlogged table :SCHEMA.:CLEAN_ORG_ABBREVGeneFeatureProductTmp as

:CREATE_AND_POPULATE
WITH
-- Identify 1:1 gene:transcript relationships for this organism
one_to_one_genes AS (
SELECT gf.na_feature_id as gene_na_feature_id,
gf.source_id as gene_source_id,
MAX(t.na_feature_id) as transcript_na_feature_id
FROM :SCHEMA.:CLEAN_ORG_ABBREVGeneFeatureProductTmp gf
LEFT JOIN dots.Transcript t ON t.parent_id = gf.na_feature_id
GROUP BY gf.na_feature_id, gf.source_id
HAVING COUNT(t.na_feature_id) = 1
),

-- Informative gene feature products (uninformative products excluded)
informative_gfp AS (
SELECT gfp.na_feature_id, gfp.product, gfp.assigned_by, gfp.is_preferred
Expand Down Expand Up @@ -101,35 +90,37 @@ create unlogged table :SCHEMA.:CLEAN_ORG_ABBREVGeneFeatureProductTmp as
GROUP BY gf.source_id
),

-- Priority 3: 1:1 + Preferred curated transcript products
-- Priority 3: Preferred curated transcript products with 1 distinct informative product across all transcripts
transcript_curated_preferred_one_to_one AS (
SELECT oto.gene_source_id as source_id,
SUBSTR(STRING_AGG(DISTINCT itp.product, ', ' ORDER BY itp.product), 1, 4000) as product,
SELECT gf.source_id,
MIN(itp.product) as product,
3 as source_rule,
COUNT(DISTINCT itp.product) as value_count,
'preferred_curated_transcript_1to1' as rule_description
FROM one_to_one_genes oto
INNER JOIN informative_tp itp ON itp.gene_na_feature_id = oto.gene_na_feature_id
1 as value_count,
'preferred_curated_transcript_1distinct' as rule_description
FROM :SCHEMA.:CLEAN_ORG_ABBREVGeneFeatureProductTmp gf
INNER JOIN informative_tp itp ON itp.gene_na_feature_id = gf.na_feature_id
WHERE LOWER(itp.assigned_by) IN ('veupathdb', 'genedb', 'hagai', 'liverpool', 'beverleylab',
'kissingerlab', 'robello_lab', 'sanger', 'sgd', 'cgd',
'pombase', 'dictybase', 'flybase', 'tair')
AND itp.is_preferred = 1
GROUP BY oto.gene_source_id
GROUP BY gf.na_feature_id, gf.source_id
HAVING COUNT(DISTINCT itp.product) = 1
),

-- Priority 4: 1:1 + Any curated transcript products
-- Priority 4: Any curated transcript products with 1 distinct informative product across all transcripts
transcript_curated_any_one_to_one AS (
SELECT oto.gene_source_id as source_id,
SUBSTR(STRING_AGG(DISTINCT itp.product, ', ' ORDER BY itp.product), 1, 4000) as product,
SELECT gf.source_id,
MIN(itp.product) as product,
4 as source_rule,
COUNT(DISTINCT itp.product) as value_count,
'curated_transcript_1to1' as rule_description
FROM one_to_one_genes oto
INNER JOIN informative_tp itp ON itp.gene_na_feature_id = oto.gene_na_feature_id
1 as value_count,
'curated_transcript_1distinct' as rule_description
FROM :SCHEMA.:CLEAN_ORG_ABBREVGeneFeatureProductTmp gf
INNER JOIN informative_tp itp ON itp.gene_na_feature_id = gf.na_feature_id
WHERE LOWER(itp.assigned_by) IN ('veupathdb', 'genedb', 'hagai', 'liverpool', 'beverleylab',
'kissingerlab', 'robello_lab', 'sanger', 'sgd', 'cgd',
'pombase', 'dictybase', 'flybase', 'tair')
GROUP BY oto.gene_source_id
GROUP BY gf.na_feature_id, gf.source_id
HAVING COUNT(DISTINCT itp.product) = 1
),

-- Priority 5: ARBA annotations
Expand Down
16 changes: 8 additions & 8 deletions Model/lib/xml/tuningManager/apiTuningManager.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<!-- unpartitioned version of this table -->
<tuningTable name="GeneAttributes">
<externalDependency name="webready.GeneAttributes_p"/>
<externalDependency name="webready.GeneProduct_p"/>
<externalDependency name="apidb.tempGeneProduct"/>
<sql>
<![CDATA[
create table geneattributes&1 as select * from webready.geneattributes_p;
Expand All @@ -16,7 +16,7 @@
<![CDATA[
UPDATE geneattributes&1 ga
SET product = gp.product
FROM webready.GeneProduct_p gp
FROM apidb.tempGeneProduct gp
WHERE ga.source_id = gp.source_id
AND gp.product IS NOT NULL
]]>
Expand Down Expand Up @@ -46,7 +46,7 @@
<!-- unpartitioned version of this table -->
<tuningTable name="TranscriptAttributes">
<externalDependency name="webready.TranscriptAttributes_p"/>
<externalDependency name="webready.GeneProduct_p"/>
<externalDependency name="apidb.tempGeneProduct"/>
<!--externalDependency name="webready.TranscriptProduct_p"/-->
<sql>
<![CDATA[
Expand All @@ -57,7 +57,7 @@
<![CDATA[
UPDATE transcriptattributes&1 ta
SET gene_product = gp.product
FROM webready.GeneProduct_p gp
FROM apidb.tempGeneProduct gp
WHERE ta.gene_source_id = gp.source_id
AND gp.product IS NOT NULL
]]>
Expand Down Expand Up @@ -2807,7 +2807,7 @@ create index Organism_projectId_idx&1 ON OrganismAttributes&1 (project_id, sourc
<externalDependency name="webready.GeneAttributes_p"/>
<externalDependency name="webready.TranscriptAttributes_p"/>
<externalDependency name="webready.OrganismAbbreviation_p"/>
<externalDependency name="webready.GeneProduct_p"/>
<externalDependency name="apidb.tempGeneProduct"/>
<externalDependency name="dots.GoAssocInstEvidCode"/>
<externalDependency name="dots.Transcript"/>
<externalDependency name="sres.OntologyTerm"/>
Expand Down Expand Up @@ -2887,7 +2887,7 @@ create index Organism_projectId_idx&1 ON OrganismAttributes&1 (project_id, sourc
null as transcript_ids, null as reference, null as evidence_code,
null as evidence_code_parameter, null as assigned_by,
'dots.GeneFeature' as source, orgrecord.org_abbrev
from dots.GeneFeature gf, webready.GeneAttributes_p ga, webready.GeneProduct_p gp
from dots.GeneFeature gf, webready.GeneAttributes_p ga, apidb.tempGeneProduct gp
where ga.na_feature_id = gf.na_feature_id
and gf.product is not null
and ga.source_id = gp.source_id
Expand All @@ -2907,7 +2907,7 @@ create index Organism_projectId_idx&1 ON OrganismAttributes&1 (project_id, sourc
and ta.org_abbrev = orgrecord.org_abbrev
and ta.gene_source_id
not in (select source_id
from webready.GeneProduct_p
from apidb.tempGeneProduct
where source_rule < 6
and org_abbrev = orgrecord.org_abbrev) -- product only in dots.Transcript
group by ta.gene_source_id, ta.project_id, t.product;
Expand All @@ -2918,7 +2918,7 @@ create index Organism_projectId_idx&1 ON OrganismAttributes&1 (project_id, sourc
null as transcript_ids, null as reference, null as evidence_code,
null as evidence_code_parameter, null as assigned_by,
'unspecified product' as source, orgrecord.org_abbrev
from webready.GeneProduct_p gp, webready.GeneAttributes_p ga
from apidb.tempGeneProduct gp, webready.GeneAttributes_p ga
where gp.source_rule = 7
and gp.source_id = ga.source_id
and ga.org_abbrev = orgrecord.org_abbrev
Expand Down