From 921858ddc745decc7e0e9721365521ad4fbd5821 Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Mon, 24 Jul 2023 17:17:52 -0700
Subject: [PATCH 01/11] multiple service provider kps: change pubmed keyword to
 pmid

---
 CTD/smartapi.yaml                        | 18 +++++++++---------
 EBIgene2phenotype/smartapi.yaml          |  2 +-
 MGIgene2phenotype/smartapi.yaml          |  2 +-
 bindingdb/smartapi.yaml                  |  4 ++--
 dgidb/openapi.yml                        |  4 ++--
 gtrx/gtrx.yaml                           |  4 ++--
 mychem.info/openapi_full.yml             |  4 ++--
 mydisease.info/smartapi.yaml             |  8 ++++----
 mygene.info/openapi_full.yml             |  6 +++---
 myvariant.info/openapi_full.yml          |  6 +++---
 pharmgkb/smartapi.yaml                   | 10 +++++-----
 rhea/smartapi.yaml                       | 10 +++++-----
 semmeddb/smartapi.yaml                   |  4 ++--
 semmeddb/version_without_operations.yaml |  4 ++--
 14 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/CTD/smartapi.yaml b/CTD/smartapi.yaml
index 262e66b8..b6ff566f 100644
--- a/CTD/smartapi.yaml
+++ b/CTD/smartapi.yaml
@@ -139,7 +139,7 @@ paths:
       - "$ref": "#/components/x-bte-kgs-operations/pathway2gene_2"
 components:
   x-bte-kgs-operations:
-    ## - pubmed IDs are |-delimited strings but BTE seems to parse these correctly
+    ## - pubmed IDs are pipe-delimited (|) strings but BTE seems to parse these correctly
     ## - while the API supports multiple IDs as input (batch-query)...we aren't using it because
     ##   response doesn't separate info from 1 input ID vs another. So BTE has trouble processing it
     ##   (would need a custom api-response-transform)
@@ -537,29 +537,29 @@ components:
   x-bte-response-mapping:
     chemical2gene:
       NCBIGene: data.GeneId            ## no prefix
-      pubmed: data.PubMedIds           ## no prefix
+      pmid: data.PubMedIds             ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # inTaxonName: data.Organism       ## human-readable label 
       # inTaxon: data.OrganismId         ## no prefix
     gene2chemical:
       MESH: data.ChemicalId            ## no prefix
-      pubmed: data.PubMedIds           
+      pmid: data.PubMedIds           
       ## commenting out because data-processing / biolink-modeling issues
       # inTaxonName: data.Organism       ## human-readable label 
       # inTaxon: data.OrganismId
     # chemical2disease_1:
     #   MESH: data.DiseaseID          ## HAS prefix
     #   ctd_chemical_disease_interaction_types: data.DirectEvidence
-    #   pubmed: data.PubMedIDs
+    #   pmid: data.PubMedIDs
     disease2chemical:
       MESH: data.ChemicalID
-      pubmed: data.PubMedIDs
+      pmid: data.PubMedIDs
       ## commenting out because data-processing / biolink-modeling issues
       # ctd_chemical_disease_interaction_types: data.DirectEvidence  ## |-delimited string is kept right now
     # chemical2disease_2:
     #   OMIM: data.DiseaseID          ## HAS PREFIX
     #   ctd_chemical_disease_interaction_types: data.DirectEvidence
-    #   pubmed: data.PubMedIDs
+    #   pmid: data.PubMedIDs
     chemical2go:
       GO: data.GoTermID  ## HAS PREFIX
       ## commenting out because data-processing / biolink-modeling issues
@@ -567,17 +567,17 @@ components:
       # related_gene_ncbigene: data.GeneID  ## no prefix
     disease2gene:
       NCBIGene: data.GeneID
-      pubmed: data.PubMedIDs
+      pmid: data.PubMedIDs
       ## commenting out because data-processing / biolink-modeling issues
       # ctd_gene_disease_interaction_types: data.DirectEvidence
     # gene2disease_1:
     #   MESH: data.DiseaseID  ## HAS PREFIX
     #   ctd_gene_disease_interaction_types: data.DirectEvidence
-    #   pubmed: data.PubMedIDs
+    #   pmid: data.PubMedIDs
     # gene2disease_2:
     #   OMIM: data.DiseaseID  ## HAS PREFIX
     #   ctd_gene_disease_interaction_types: data.DirectEvidence
-    #   pubmed: data.PubMedIDs
+    #   pmid: data.PubMedIDs
     # gene2pathway_1:
     #   REACT: data.PathwayID  ## HAS PREFIX
     #   output_name: data.PathwayName
diff --git a/EBIgene2phenotype/smartapi.yaml b/EBIgene2phenotype/smartapi.yaml
index 6956b9be..88e2e05f 100644
--- a/EBIgene2phenotype/smartapi.yaml
+++ b/EBIgene2phenotype/smartapi.yaml
@@ -659,7 +659,7 @@ components:
     disease-object:
       OMIM: gene2phenotype.disease.disease_mim   ## no prefix
       output_name: gene2phenotype.disease.disease_name
-      pubmed: gene2phenotype.pmids   ## no prefix
+      pmid: gene2phenotype.pmids   ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # ## similar to "biolink:has_mode_of_inheritance" (predicate) but doesn't use the expected ontology terms and seems a bit different
       # g2p_allelic_requirement: gene2phenotype.allelic_requirement
diff --git a/MGIgene2phenotype/smartapi.yaml b/MGIgene2phenotype/smartapi.yaml
index d7351919..f7549bd3 100644
--- a/MGIgene2phenotype/smartapi.yaml
+++ b/MGIgene2phenotype/smartapi.yaml
@@ -677,7 +677,7 @@ components:
       DOID: mgi.associated_with_disease.doid  ## has prefix
     gene_related_to_phenotype:
       MP: mgi.associated_with_phenotype.mp  ## has prefix
-      pubmed: mgi.associated_with_phenotype.pubmed  ## no prefix
+      pmid: mgi.associated_with_phenotype.pubmed  ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # mgi_allele_id: mgi.associated_with_phenotype.allele_id  ## MGI curie
       # mgi_allele_symbol: mgi.associated_with_phenotype.allele_symbol  ## free text?
diff --git a/bindingdb/smartapi.yaml b/bindingdb/smartapi.yaml
index a0e29310..fa07653c 100644
--- a/bindingdb/smartapi.yaml
+++ b/bindingdb/smartapi.yaml
@@ -642,7 +642,7 @@ components:
     pubchem-object:
       "PUBCHEM.COMPOUND": object.pubchem_cid       ## no prefix
       "biolink:original_subject": subject.name     ## was called "Target Name Assigned by Curator or DataSource"
-      pubmed: relation.pmid
+      pmid: relation.pmid                          ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # in_taxon: subject.organism                 ## was called "Target Source Organism According to Curator or DataSource"
       # bindingdb_curation_datasource: relation.curation_datasource    ## Curation/DataSource
@@ -652,7 +652,7 @@ components:
     uniprot-subject:
       UniProtKB: subject.uniprot.accession  ## no prefix
       "biolink:original_subject": subject.name
-      pubmed: relation.pmid
+      pmid: relation.pmid                          ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # in_taxon: subject.organism
       # bindingdb_curation_datasource: relation.curation_datasource
diff --git a/dgidb/openapi.yml b/dgidb/openapi.yml
index ee1f8648..4a367d49 100644
--- a/dgidb/openapi.yml
+++ b/dgidb/openapi.yml
@@ -1979,14 +1979,14 @@ components:
   x-bte-response-mapping:
     forward:
       NCBIGene: object.NCBIGene
-      pubmed: association.pmids
+      pmid: association.pmids     # no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # dgidb_interaction_claim_source: association.interaction_claim_source
       # "biolink:original_predicate": association.interaction_types  ## or dgidb interaction types
       # dgidb_interaction_group_score: association.interaction_group_score
     reverse:
       "CHEMBL.COMPOUND": subject.CHEMBL_COMPOUND
-      pubmed: association.pmids
+      pmid: association.pmids
       ## commenting out because data-processing / biolink-modeling issues
       # dgidb_interaction_claim_source: association.interaction_claim_source
       # "biolink:original_predicate": association.interaction_types  ## or dgidb interaction types
diff --git a/gtrx/gtrx.yaml b/gtrx/gtrx.yaml
index 87694bdb..812afc34 100644
--- a/gtrx/gtrx.yaml
+++ b/gtrx/gtrx.yaml
@@ -666,7 +666,7 @@ components:
     drug:
     ## using order and language from gtrx website for these fields
       UNII: object.intervention.inxight   ## no prefix
-      pubmed: references.pmid  ## no prefix
+      pmid: references.pmid  ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # type_of_intervention: object.intervention.int_class
       # intervention_MUST_be_started_within: object.timeframe
@@ -679,7 +679,7 @@ components:
       # clinical_summary: subject.clinical_summary
     disease:
       OMIM: subject.omim   ## no prefix
-      pubmed: references.pmid  ## no prefix
+      pmid: references.pmid  ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # type_of_intervention: object.intervention.int_class
       # intervention_MUST_be_started_within: object.timeframe
diff --git a/mychem.info/openapi_full.yml b/mychem.info/openapi_full.yml
index d5484433..b3f75de3 100644
--- a/mychem.info/openapi_full.yml
+++ b/mychem.info/openapi_full.yml
@@ -631,7 +631,7 @@ components:
       RHEA: chebi.xrefs.rhea
     drugMechChembl_EnsemblOutput:
       ENSEMBL: chembl.drug_mechanisms.target_components.ensembl_gene  ## no prefix
-      pubmed: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
+      pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
       output_name: chembl.drug_mechanisms.target_name
       ## commenting out because data-processing / biolink-modeling issues
       # chembl_drug_action_type: chembl.drug_mechanisms.action_type
@@ -645,7 +645,7 @@ components:
       input_name: chembl.drug_mechanisms.target_name
     drugMechChembl_UniprotOutput:
       UniProtKB: chembl.drug_mechanisms.target_components.uniprot  ## no prefix
-      pubmed: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
+      pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
       output_name: chembl.drug_mechanisms.target_name
       ## commenting out because data-processing / biolink-modeling issues
       # chembl_drug_action_type: chembl.drug_mechanisms.action_type
diff --git a/mydisease.info/smartapi.yaml b/mydisease.info/smartapi.yaml
index 03f7c1aa..4f1ac011 100644
--- a/mydisease.info/smartapi.yaml
+++ b/mydisease.info/smartapi.yaml
@@ -605,7 +605,7 @@ components:
     ## not including "name" field since BTE isn't ingesting / using it to annotate nodes...
     disease-gene:
       NCBIGene: disgenet.genes_related_to_disease.gene_id  ## no prefix
-      pubmed: disgenet.genes_related_to_disease.pubmed     ## no prefix
+      pmid: disgenet.genes_related_to_disease.pubmed     ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # disgenet_source: disgenet.genes_related_to_disease.source
       # ## could remove "disgenet-" prefix from the key
@@ -616,7 +616,7 @@ components:
       # disgenet-score: disgenet.genes_related_to_disease.score
     disease-variant:
       DBSNP: disgenet.variants_related_to_disease.rsid       ## no prefix
-      pubmed: disgenet.variants_related_to_disease.pubmed    ## no prefix
+      pmid: disgenet.variants_related_to_disease.pubmed    ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # disgenet_source: disgenet.variants_related_to_disease.source
       # ## could remove "disgenet-" prefix from the key
@@ -628,7 +628,7 @@ components:
     disease-phenotype:
       HP: hpo.phenotype_related_to_disease.hpo_id                                   ## HAS PREFIX (HP)
       ## note: there are 6 reference-related fields (parser created based on ID namespace)
-      pubmed: hpo.phenotype_related_to_disease.pmid_refs                            ## HAS PREFIX (PMID)
+      pmid: hpo.phenotype_related_to_disease.pmid_refs                            ## HAS PREFIX (PMID)
       ## note: there are 4 frequency-related fields (parser created based on different data types)
       "biolink:has_quotient": hpo.phenotype_related_to_disease.numeric_freq         ## decimal freq
       "biolink:frequency_qualifier": hpo.phenotype_related_to_disease.hp_freq       ## HP ontology freq term
@@ -652,7 +652,7 @@ components:
       # hp_sex: hpo.phenotype_related_to_disease.sex
     disease-chemical:
       MESH: ctd.chemical_related_to_disease.mesh_chemical_id    ## no prefix
-      pubmed: ctd.chemical_related_to_disease.pubmed     ## no prefix
+      pmid: ctd.chemical_related_to_disease.pubmed     ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # # name: ctd.chemical_related_to_disease.chemical_name
       # ## direct_evidence has two values, 'marker/mechanism' or 'therapeutic'
diff --git a/mygene.info/openapi_full.yml b/mygene.info/openapi_full.yml
index 5041db2e..1ed6773c 100644
--- a/mygene.info/openapi_full.yml
+++ b/mygene.info/openapi_full.yml
@@ -1235,19 +1235,19 @@ components:
       output_name: pathway.biocarta.name
     biologicalProcess: 
       GO: go.BP.id  ## HAS PREFIX (GO)
-      pubmed: go.BP.pubmed
+      pmid: go.BP.pubmed
       ## commenting out because data-processing / biolink-modeling issues
       # evidence: go.BP.evidence  ## categorical variable?
       # go_qualifier: go.BP.qualifier  ## seems to be like a relation
     molecularFunction: 
       GO: go.MF.id  ## HAS PREFIX (GO)
-      pubmed: go.MF.pubmed
+      pmid: go.MF.pubmed
       ## commenting out because data-processing / biolink-modeling issues
       # evidence: go.MF.evidence  ## categorical variable?
       # go_qualifier: go.MF.qualifier  ## seems to be like a relation
     cellularComponent: 
       GO: go.CC.id  ## HAS PREFIX (GO)
-      pubmed: go.CC.pubmed
+      pmid: go.CC.pubmed
       ## commenting out because data-processing / biolink-modeling issues
       # evidence: go.CC.evidence  ## categorical variable?
       # go_qualifier: go.CC.qualifier  ## seems to be like a relation
diff --git a/myvariant.info/openapi_full.yml b/myvariant.info/openapi_full.yml
index 041228e0..9d80b4cd 100644
--- a/myvariant.info/openapi_full.yml
+++ b/myvariant.info/openapi_full.yml
@@ -615,7 +615,7 @@ components:
   x-bte-response-mapping:
     civic-geneDisease:
       DOID: civic.evidence_items.disease.doid
-      pubmed: civic.evidence_items.source.pubmed
+      pmid: civic.evidence_items.source.pubmed    ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # ## categorical var / relation
       # civic_clinical_significance: civic.evidence_items.clinical_significance
@@ -641,7 +641,7 @@ components:
       DOID: civic.evidence_items.disease.doid
       ## variant name
       input_name: _id
-      pubmed: civic.evidence_items.source.pubmed
+      pmid: civic.evidence_items.source.pubmed    ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # ## categorical var / relation
       # civic_clinical_significance: civic.evidence_items.clinical_significance
@@ -704,7 +704,7 @@ components:
       input_name: _id      ## variant name
       ## commenting out because data-processing / biolink-modeling issues
       ## not taking notes on a bunch of fields about the gene and variant
-      # pubmed: docm.pubmed_id   ## issue: lists are ", "-delimited strings
+      # pmid: docm.pubmed_id   ## no prefix. issue: lists are ", "-delimited strings
       # docm_source: docm.source   ## issue: sometimes it's just the value "-" or `null`
       # source_url: docm.url       ##: sometimes it's just `null`
       # variant_consequence_type: docm.trv_type
diff --git a/pharmgkb/smartapi.yaml b/pharmgkb/smartapi.yaml
index 89fae4d8..2f911001 100644
--- a/pharmgkb/smartapi.yaml
+++ b/pharmgkb/smartapi.yaml
@@ -1936,34 +1936,34 @@ components:
       "PHARMGKB.CHEMICAL": data.obj2Id
       output_name: data.obj2Name
       input_name: data.obj1Name
-      pubmed: data.pmid
+      pmid: data.pmid  ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     connection-obj-pathway:
       "PHARMGKB.PATHWAYS": data.obj2Id
       output_name: data.obj2Name
       input_name: data.obj1Name
-      pubmed: data.pmid
+      pmid: data.pmid
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     connection-obj-disease:
       "PHARMGKB.DISEASE": data.obj2Id
       output_name: data.obj2Name
       input_name: data.obj1Name
-      pubmed: data.pmid
+      pmid: data.pmid
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     connection-obj-gene:
       "PHARMGKB.GENE": data.obj2Id
       output_name: data.obj2Name
       input_name: data.obj1Name
-      pubmed: data.pmid
+      pmid: data.pmid
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     connection-obj-dbsnp:
       DBSNP: data.obj2Name    ## DBSNP rs ID is put there
       input_name: data.obj1Name
-      pubmed: data.pmid
+      pmid: data.pmid
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     pathEnd-chem:
diff --git a/rhea/smartapi.yaml b/rhea/smartapi.yaml
index 388b97fa..5acde2c9 100644
--- a/rhea/smartapi.yaml
+++ b/rhea/smartapi.yaml
@@ -766,35 +766,35 @@ components:
   x-bte-response-mapping:
     side_l-object:
       CHEBI: side_l.chebi_id     ## has CHEBI prefix
-      pubmed: citations          ## has PMID prefix
+      pmid: citations          ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
       # reaction_status: status
     side_r-object:
       CHEBI: side_r.chebi_id     ## has CHEBI prefix
-      pubmed: citations          ## has PMID prefix
+      pmid: citations          ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
       # reaction_status: status
     side_l_reactive-object:
       CHEBI: side_l.reactive_parts.chebi_id     ## has CHEBI prefix
-      pubmed: citations                         ## has PMID prefix
+      pmid: citations                         ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
       # reaction_status: status
     side_r_reactive-object:
       CHEBI: side_r.reactive_parts.chebi_id     ## has CHEBI prefix
-      pubmed: citations                         ## has PMID prefix
+      pmid: citations                         ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
       # reaction_status: status
     reaction-object:
       RHEA: _id     ## has RHEA prefix
-      pubmed: citations          ## has PMID prefix
+      pmid: citations          ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
diff --git a/semmeddb/smartapi.yaml b/semmeddb/smartapi.yaml
index 703c7698..0b20faf2 100644
--- a/semmeddb/smartapi.yaml
+++ b/semmeddb/smartapi.yaml
@@ -3125,7 +3125,7 @@ components:
   x-bte-response-mapping:
     umls-obj:
       UMLS: object.umls  ## no prefix
-      pubmed: predication.pmid       ## no prefix
+      pmid: predication.pmid       ## no prefix
       "biolink:supporting_text": predication.sentence
       "biolink:original_subject": subject.umls
       "biolink:original_object": object.umls
@@ -3137,7 +3137,7 @@ components:
       # "biolink:original_predicate": predicate
     umls-subj:
       UMLS: subject.umls  ## no prefix
-      pubmed: predication.pmid       ## no prefix
+      pmid: predication.pmid       ## no prefix
       "biolink:supporting_text": predication.sentence
       "biolink:original_subject": subject.umls
       "biolink:original_object": object.umls
diff --git a/semmeddb/version_without_operations.yaml b/semmeddb/version_without_operations.yaml
index cb5be128..613bd342 100644
--- a/semmeddb/version_without_operations.yaml
+++ b/semmeddb/version_without_operations.yaml
@@ -627,7 +627,7 @@ components:
   x-bte-response-mapping:
     umls-obj:
       UMLS: object.umls  ## no prefix
-      pubmed: predication.pmid       ## no prefix
+      pmid: predication.pmid       ## no prefix
       "biolink:supporting_text": predication.sentence
       "biolink:original_subject": subject.umls
       "biolink:original_object": object.umls
@@ -639,7 +639,7 @@ components:
       # "biolink:original_predicate": predicate
     umls-subj:
       UMLS: subject.umls  ## no prefix
-      pubmed: predication.pmid       ## no prefix
+      pmid: predication.pmid       ## no prefix
       "biolink:supporting_text": predication.sentence
       "biolink:original_subject": subject.umls
       "biolink:original_object": object.umls

From 8b8a0052d30c8491116a7d927e7a310c25d01ec7 Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Mon, 24 Jul 2023 19:35:47 -0700
Subject: [PATCH 02/11] multiple service provider kps: change pmid keyword to
 ref_pmid

---
 CTD/smartapi.yaml                        | 16 ++++++++--------
 EBIgene2phenotype/smartapi.yaml          |  2 +-
 MGIgene2phenotype/smartapi.yaml          |  2 +-
 bindingdb/smartapi.yaml                  |  4 ++--
 dgidb/openapi.yml                        |  4 ++--
 gtrx/gtrx.yaml                           |  4 ++--
 mychem.info/openapi_full.yml             |  4 ++--
 mydisease.info/smartapi.yaml             |  8 ++++----
 mygene.info/openapi_full.yml             |  6 +++---
 myvariant.info/openapi_full.yml          |  6 +++---
 pharmgkb/smartapi.yaml                   | 10 +++++-----
 rhea/smartapi.yaml                       | 10 +++++-----
 semmeddb/smartapi.yaml                   |  4 ++--
 semmeddb/version_without_operations.yaml |  4 ++--
 14 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/CTD/smartapi.yaml b/CTD/smartapi.yaml
index b6ff566f..9ad6c153 100644
--- a/CTD/smartapi.yaml
+++ b/CTD/smartapi.yaml
@@ -537,29 +537,29 @@ components:
   x-bte-response-mapping:
     chemical2gene:
       NCBIGene: data.GeneId            ## no prefix
-      pmid: data.PubMedIds             ## no prefix
+      ref_pmid: data.PubMedIds             ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # inTaxonName: data.Organism       ## human-readable label 
       # inTaxon: data.OrganismId         ## no prefix
     gene2chemical:
       MESH: data.ChemicalId            ## no prefix
-      pmid: data.PubMedIds           
+      ref_pmid: data.PubMedIds           
       ## commenting out because data-processing / biolink-modeling issues
       # inTaxonName: data.Organism       ## human-readable label 
       # inTaxon: data.OrganismId
     # chemical2disease_1:
     #   MESH: data.DiseaseID          ## HAS prefix
     #   ctd_chemical_disease_interaction_types: data.DirectEvidence
-    #   pmid: data.PubMedIDs
+    #   ref_pmid: data.PubMedIDs
     disease2chemical:
       MESH: data.ChemicalID
-      pmid: data.PubMedIDs
+      ref_pmid: data.PubMedIDs
       ## commenting out because data-processing / biolink-modeling issues
       # ctd_chemical_disease_interaction_types: data.DirectEvidence  ## |-delimited string is kept right now
     # chemical2disease_2:
     #   OMIM: data.DiseaseID          ## HAS PREFIX
     #   ctd_chemical_disease_interaction_types: data.DirectEvidence
-    #   pmid: data.PubMedIDs
+    #   ref_pmid: data.PubMedIDs
     chemical2go:
       GO: data.GoTermID  ## HAS PREFIX
       ## commenting out because data-processing / biolink-modeling issues
@@ -567,17 +567,17 @@ components:
       # related_gene_ncbigene: data.GeneID  ## no prefix
     disease2gene:
       NCBIGene: data.GeneID
-      pmid: data.PubMedIDs
+      ref_pmid: data.PubMedIDs
       ## commenting out because data-processing / biolink-modeling issues
       # ctd_gene_disease_interaction_types: data.DirectEvidence
     # gene2disease_1:
     #   MESH: data.DiseaseID  ## HAS PREFIX
     #   ctd_gene_disease_interaction_types: data.DirectEvidence
-    #   pmid: data.PubMedIDs
+    #   ref_pmid: data.PubMedIDs
     # gene2disease_2:
     #   OMIM: data.DiseaseID  ## HAS PREFIX
     #   ctd_gene_disease_interaction_types: data.DirectEvidence
-    #   pmid: data.PubMedIDs
+    #   ref_pmid: data.PubMedIDs
     # gene2pathway_1:
     #   REACT: data.PathwayID  ## HAS PREFIX
     #   output_name: data.PathwayName
diff --git a/EBIgene2phenotype/smartapi.yaml b/EBIgene2phenotype/smartapi.yaml
index 88e2e05f..24bea767 100644
--- a/EBIgene2phenotype/smartapi.yaml
+++ b/EBIgene2phenotype/smartapi.yaml
@@ -659,7 +659,7 @@ components:
     disease-object:
       OMIM: gene2phenotype.disease.disease_mim   ## no prefix
       output_name: gene2phenotype.disease.disease_name
-      pmid: gene2phenotype.pmids   ## no prefix
+      ref_pmid: gene2phenotype.pmids   ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # ## similar to "biolink:has_mode_of_inheritance" (predicate) but doesn't use the expected ontology terms and seems a bit different
       # g2p_allelic_requirement: gene2phenotype.allelic_requirement
diff --git a/MGIgene2phenotype/smartapi.yaml b/MGIgene2phenotype/smartapi.yaml
index f7549bd3..e72d2847 100644
--- a/MGIgene2phenotype/smartapi.yaml
+++ b/MGIgene2phenotype/smartapi.yaml
@@ -677,7 +677,7 @@ components:
       DOID: mgi.associated_with_disease.doid  ## has prefix
     gene_related_to_phenotype:
       MP: mgi.associated_with_phenotype.mp  ## has prefix
-      pmid: mgi.associated_with_phenotype.pubmed  ## no prefix
+      ref_pmid: mgi.associated_with_phenotype.pubmed  ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # mgi_allele_id: mgi.associated_with_phenotype.allele_id  ## MGI curie
       # mgi_allele_symbol: mgi.associated_with_phenotype.allele_symbol  ## free text?
diff --git a/bindingdb/smartapi.yaml b/bindingdb/smartapi.yaml
index fa07653c..636dc820 100644
--- a/bindingdb/smartapi.yaml
+++ b/bindingdb/smartapi.yaml
@@ -642,7 +642,7 @@ components:
     pubchem-object:
       "PUBCHEM.COMPOUND": object.pubchem_cid       ## no prefix
       "biolink:original_subject": subject.name     ## was called "Target Name Assigned by Curator or DataSource"
-      pmid: relation.pmid                          ## no prefix
+      ref_pmid: relation.pmid                          ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # in_taxon: subject.organism                 ## was called "Target Source Organism According to Curator or DataSource"
       # bindingdb_curation_datasource: relation.curation_datasource    ## Curation/DataSource
@@ -652,7 +652,7 @@ components:
     uniprot-subject:
       UniProtKB: subject.uniprot.accession  ## no prefix
       "biolink:original_subject": subject.name
-      pmid: relation.pmid                          ## no prefix
+      ref_pmid: relation.pmid                          ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # in_taxon: subject.organism
       # bindingdb_curation_datasource: relation.curation_datasource
diff --git a/dgidb/openapi.yml b/dgidb/openapi.yml
index 4a367d49..e54827b3 100644
--- a/dgidb/openapi.yml
+++ b/dgidb/openapi.yml
@@ -1979,14 +1979,14 @@ components:
   x-bte-response-mapping:
     forward:
       NCBIGene: object.NCBIGene
-      pmid: association.pmids     # no prefix
+      ref_pmid: association.pmids     # no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # dgidb_interaction_claim_source: association.interaction_claim_source
       # "biolink:original_predicate": association.interaction_types  ## or dgidb interaction types
       # dgidb_interaction_group_score: association.interaction_group_score
     reverse:
       "CHEMBL.COMPOUND": subject.CHEMBL_COMPOUND
-      pmid: association.pmids
+      ref_pmid: association.pmids
       ## commenting out because data-processing / biolink-modeling issues
       # dgidb_interaction_claim_source: association.interaction_claim_source
       # "biolink:original_predicate": association.interaction_types  ## or dgidb interaction types
diff --git a/gtrx/gtrx.yaml b/gtrx/gtrx.yaml
index 812afc34..a456855f 100644
--- a/gtrx/gtrx.yaml
+++ b/gtrx/gtrx.yaml
@@ -666,7 +666,7 @@ components:
     drug:
     ## using order and language from gtrx website for these fields
       UNII: object.intervention.inxight   ## no prefix
-      pmid: references.pmid  ## no prefix
+      ref_pmid: references.pmid  ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # type_of_intervention: object.intervention.int_class
       # intervention_MUST_be_started_within: object.timeframe
@@ -679,7 +679,7 @@ components:
       # clinical_summary: subject.clinical_summary
     disease:
       OMIM: subject.omim   ## no prefix
-      pmid: references.pmid  ## no prefix
+      ref_pmid: references.pmid  ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # type_of_intervention: object.intervention.int_class
       # intervention_MUST_be_started_within: object.timeframe
diff --git a/mychem.info/openapi_full.yml b/mychem.info/openapi_full.yml
index b3f75de3..a4a69997 100644
--- a/mychem.info/openapi_full.yml
+++ b/mychem.info/openapi_full.yml
@@ -631,7 +631,7 @@ components:
       RHEA: chebi.xrefs.rhea
     drugMechChembl_EnsemblOutput:
       ENSEMBL: chembl.drug_mechanisms.target_components.ensembl_gene  ## no prefix
-      pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
+      ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
       output_name: chembl.drug_mechanisms.target_name
       ## commenting out because data-processing / biolink-modeling issues
       # chembl_drug_action_type: chembl.drug_mechanisms.action_type
@@ -645,7 +645,7 @@ components:
       input_name: chembl.drug_mechanisms.target_name
     drugMechChembl_UniprotOutput:
       UniProtKB: chembl.drug_mechanisms.target_components.uniprot  ## no prefix
-      pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
+      ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
       output_name: chembl.drug_mechanisms.target_name
       ## commenting out because data-processing / biolink-modeling issues
       # chembl_drug_action_type: chembl.drug_mechanisms.action_type
diff --git a/mydisease.info/smartapi.yaml b/mydisease.info/smartapi.yaml
index 4f1ac011..b3dc89e6 100644
--- a/mydisease.info/smartapi.yaml
+++ b/mydisease.info/smartapi.yaml
@@ -605,7 +605,7 @@ components:
     ## not including "name" field since BTE isn't ingesting / using it to annotate nodes...
     disease-gene:
       NCBIGene: disgenet.genes_related_to_disease.gene_id  ## no prefix
-      pmid: disgenet.genes_related_to_disease.pubmed     ## no prefix
+      ref_pmid: disgenet.genes_related_to_disease.pubmed     ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # disgenet_source: disgenet.genes_related_to_disease.source
       # ## could remove "disgenet-" prefix from the key
@@ -616,7 +616,7 @@ components:
       # disgenet-score: disgenet.genes_related_to_disease.score
     disease-variant:
       DBSNP: disgenet.variants_related_to_disease.rsid       ## no prefix
-      pmid: disgenet.variants_related_to_disease.pubmed    ## no prefix
+      ref_pmid: disgenet.variants_related_to_disease.pubmed    ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # disgenet_source: disgenet.variants_related_to_disease.source
       # ## could remove "disgenet-" prefix from the key
@@ -628,7 +628,7 @@ components:
     disease-phenotype:
       HP: hpo.phenotype_related_to_disease.hpo_id                                   ## HAS PREFIX (HP)
       ## note: there are 6 reference-related fields (parser created based on ID namespace)
-      pmid: hpo.phenotype_related_to_disease.pmid_refs                            ## HAS PREFIX (PMID)
+      ref_pmid: hpo.phenotype_related_to_disease.pmid_refs                            ## HAS PREFIX (PMID)
       ## note: there are 4 frequency-related fields (parser created based on different data types)
       "biolink:has_quotient": hpo.phenotype_related_to_disease.numeric_freq         ## decimal freq
       "biolink:frequency_qualifier": hpo.phenotype_related_to_disease.hp_freq       ## HP ontology freq term
@@ -652,7 +652,7 @@ components:
       # hp_sex: hpo.phenotype_related_to_disease.sex
     disease-chemical:
       MESH: ctd.chemical_related_to_disease.mesh_chemical_id    ## no prefix
-      pmid: ctd.chemical_related_to_disease.pubmed     ## no prefix
+      ref_pmid: ctd.chemical_related_to_disease.pubmed     ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # # name: ctd.chemical_related_to_disease.chemical_name
       # ## direct_evidence has two values, 'marker/mechanism' or 'therapeutic'
diff --git a/mygene.info/openapi_full.yml b/mygene.info/openapi_full.yml
index 1ed6773c..9496ff08 100644
--- a/mygene.info/openapi_full.yml
+++ b/mygene.info/openapi_full.yml
@@ -1235,19 +1235,19 @@ components:
       output_name: pathway.biocarta.name
     biologicalProcess: 
       GO: go.BP.id  ## HAS PREFIX (GO)
-      pmid: go.BP.pubmed
+      ref_pmid: go.BP.pubmed
       ## commenting out because data-processing / biolink-modeling issues
       # evidence: go.BP.evidence  ## categorical variable?
       # go_qualifier: go.BP.qualifier  ## seems to be like a relation
     molecularFunction: 
       GO: go.MF.id  ## HAS PREFIX (GO)
-      pmid: go.MF.pubmed
+      ref_pmid: go.MF.pubmed
       ## commenting out because data-processing / biolink-modeling issues
       # evidence: go.MF.evidence  ## categorical variable?
       # go_qualifier: go.MF.qualifier  ## seems to be like a relation
     cellularComponent: 
       GO: go.CC.id  ## HAS PREFIX (GO)
-      pmid: go.CC.pubmed
+      ref_pmid: go.CC.pubmed
       ## commenting out because data-processing / biolink-modeling issues
       # evidence: go.CC.evidence  ## categorical variable?
       # go_qualifier: go.CC.qualifier  ## seems to be like a relation
diff --git a/myvariant.info/openapi_full.yml b/myvariant.info/openapi_full.yml
index 9d80b4cd..5bc01922 100644
--- a/myvariant.info/openapi_full.yml
+++ b/myvariant.info/openapi_full.yml
@@ -615,7 +615,7 @@ components:
   x-bte-response-mapping:
     civic-geneDisease:
       DOID: civic.evidence_items.disease.doid
-      pmid: civic.evidence_items.source.pubmed    ## no prefix
+      ref_pmid: civic.evidence_items.source.pubmed    ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # ## categorical var / relation
       # civic_clinical_significance: civic.evidence_items.clinical_significance
@@ -641,7 +641,7 @@ components:
       DOID: civic.evidence_items.disease.doid
       ## variant name
       input_name: _id
-      pmid: civic.evidence_items.source.pubmed    ## no prefix
+      ref_pmid: civic.evidence_items.source.pubmed    ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # ## categorical var / relation
       # civic_clinical_significance: civic.evidence_items.clinical_significance
@@ -704,7 +704,7 @@ components:
       input_name: _id      ## variant name
       ## commenting out because data-processing / biolink-modeling issues
       ## not taking notes on a bunch of fields about the gene and variant
-      # pmid: docm.pubmed_id   ## no prefix. issue: lists are ", "-delimited strings
+      # ref_pmid: docm.pubmed_id   ## no prefix. issue: lists are ", "-delimited strings
       # docm_source: docm.source   ## issue: sometimes it's just the value "-" or `null`
       # source_url: docm.url       ##: sometimes it's just `null`
       # variant_consequence_type: docm.trv_type
diff --git a/pharmgkb/smartapi.yaml b/pharmgkb/smartapi.yaml
index 2f911001..c6d2f73e 100644
--- a/pharmgkb/smartapi.yaml
+++ b/pharmgkb/smartapi.yaml
@@ -1936,34 +1936,34 @@ components:
       "PHARMGKB.CHEMICAL": data.obj2Id
       output_name: data.obj2Name
       input_name: data.obj1Name
-      pmid: data.pmid  ## no prefix
+      ref_pmid: data.pmid  ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     connection-obj-pathway:
       "PHARMGKB.PATHWAYS": data.obj2Id
       output_name: data.obj2Name
       input_name: data.obj1Name
-      pmid: data.pmid
+      ref_pmid: data.pmid
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     connection-obj-disease:
       "PHARMGKB.DISEASE": data.obj2Id
       output_name: data.obj2Name
       input_name: data.obj1Name
-      pmid: data.pmid
+      ref_pmid: data.pmid
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     connection-obj-gene:
       "PHARMGKB.GENE": data.obj2Id
       output_name: data.obj2Name
       input_name: data.obj1Name
-      pmid: data.pmid
+      ref_pmid: data.pmid
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     connection-obj-dbsnp:
       DBSNP: data.obj2Name    ## DBSNP rs ID is put there
       input_name: data.obj1Name
-      pmid: data.pmid
+      ref_pmid: data.pmid
       ## commenting out because data-processing / biolink-modeling issues
       # pharmgkb_source_of_connection: data.relType
     pathEnd-chem:
diff --git a/rhea/smartapi.yaml b/rhea/smartapi.yaml
index 5acde2c9..e3e2fe93 100644
--- a/rhea/smartapi.yaml
+++ b/rhea/smartapi.yaml
@@ -766,35 +766,35 @@ components:
   x-bte-response-mapping:
     side_l-object:
       CHEBI: side_l.chebi_id     ## has CHEBI prefix
-      pmid: citations          ## has PMID prefix
+      ref_pmid: citations          ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
       # reaction_status: status
     side_r-object:
       CHEBI: side_r.chebi_id     ## has CHEBI prefix
-      pmid: citations          ## has PMID prefix
+      ref_pmid: citations          ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
       # reaction_status: status
     side_l_reactive-object:
       CHEBI: side_l.reactive_parts.chebi_id     ## has CHEBI prefix
-      pmid: citations                         ## has PMID prefix
+      ref_pmid: citations                         ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
       # reaction_status: status
     side_r_reactive-object:
       CHEBI: side_r.reactive_parts.chebi_id     ## has CHEBI prefix
-      pmid: citations                         ## has PMID prefix
+      ref_pmid: citations                         ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
       # reaction_status: status
     reaction-object:
       RHEA: _id     ## has RHEA prefix
-      pmid: citations          ## has PMID prefix
+      ref_pmid: citations          ## has PMID prefix
       ## commenting out because data-processing / biolink-modeling issues
       # equation: equation
       # is_transport: is_transport
diff --git a/semmeddb/smartapi.yaml b/semmeddb/smartapi.yaml
index 0b20faf2..005dbad0 100644
--- a/semmeddb/smartapi.yaml
+++ b/semmeddb/smartapi.yaml
@@ -3125,7 +3125,7 @@ components:
   x-bte-response-mapping:
     umls-obj:
       UMLS: object.umls  ## no prefix
-      pmid: predication.pmid       ## no prefix
+      ref_pmid: predication.pmid       ## no prefix
       "biolink:supporting_text": predication.sentence
       "biolink:original_subject": subject.umls
       "biolink:original_object": object.umls
@@ -3137,7 +3137,7 @@ components:
       # "biolink:original_predicate": predicate
     umls-subj:
       UMLS: subject.umls  ## no prefix
-      pmid: predication.pmid       ## no prefix
+      ref_pmid: predication.pmid       ## no prefix
       "biolink:supporting_text": predication.sentence
       "biolink:original_subject": subject.umls
       "biolink:original_object": object.umls
diff --git a/semmeddb/version_without_operations.yaml b/semmeddb/version_without_operations.yaml
index 613bd342..d0e52a5d 100644
--- a/semmeddb/version_without_operations.yaml
+++ b/semmeddb/version_without_operations.yaml
@@ -627,7 +627,7 @@ components:
   x-bte-response-mapping:
     umls-obj:
       UMLS: object.umls  ## no prefix
-      pmid: predication.pmid       ## no prefix
+      ref_pmid: predication.pmid       ## no prefix
       "biolink:supporting_text": predication.sentence
       "biolink:original_subject": subject.umls
       "biolink:original_object": object.umls
@@ -639,7 +639,7 @@ components:
       # "biolink:original_predicate": predicate
     umls-subj:
       UMLS: subject.umls  ## no prefix
-      pmid: predication.pmid       ## no prefix
+      ref_pmid: predication.pmid       ## no prefix
       "biolink:supporting_text": predication.sentence
       "biolink:original_subject": subject.umls
       "biolink:original_object": object.umls

From 8c685db80bd02de381f1ae4309f678e1306ad090 Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Mon, 24 Jul 2023 22:16:48 -0700
Subject: [PATCH 03/11] multiple service provider kps: add edge url info under
 keyword ref_url

---
 bindingdb/smartapi.yaml         |  8 ++++----
 litvar/smartapi.yaml            |  3 +--
 mychem.info/openapi_full.yml    | 12 ++++++++----
 mydisease.info/smartapi.yaml    |  6 ++++--
 mygene.info/openapi_full.yml    |  2 +-
 myvariant.info/openapi_full.yml |  4 ++--
 ncats_rare_source/smartapi.yaml | 16 +++++-----------
 pfocr/smartapi.yaml             | 18 +++++++++---------
 pharmgkb/smartapi.yaml          | 28 ++++++++++++----------------
 9 files changed, 46 insertions(+), 51 deletions(-)

diff --git a/bindingdb/smartapi.yaml b/bindingdb/smartapi.yaml
index 636dc820..4b7e40cd 100644
--- a/bindingdb/smartapi.yaml
+++ b/bindingdb/smartapi.yaml
@@ -599,7 +599,7 @@ components:
         #     relation.curation_datasource,relation.pmid,relation.bindingdb_link,relation.article_doi,relation.patent_number
           fields: >-
             object.pubchem_cid,
-            subject.name,relation.pmid
+            subject.name,relation.pmid,relation.bindingdb_link
           size: 1000
         predicate: physically_interacts_with
         source: "infores:bindingdb"
@@ -629,7 +629,7 @@ components:
           #   relation.curation_datasource,relation.pmid,relation.bindingdb_link,relation.article_doi,relation.patent_number
           fields: >-
             subject.uniprot.accession,
-            subject.name,relation.pmid
+            subject.name,relation.pmid,relation.bindingdb_link
           size: 1000
         predicate: physically_interacts_with
         source: "infores:bindingdb"
@@ -643,19 +643,19 @@ components:
       "PUBCHEM.COMPOUND": object.pubchem_cid       ## no prefix
       "biolink:original_subject": subject.name     ## was called "Target Name Assigned by Curator or DataSource"
       ref_pmid: relation.pmid                          ## no prefix
+      ref_url: relation.bindingdb_link
       ## commenting out because data-processing / biolink-modeling issues
       # in_taxon: subject.organism                 ## was called "Target Source Organism According to Curator or DataSource"
       # bindingdb_curation_datasource: relation.curation_datasource    ## Curation/DataSource
-      # "biolink:source_web_page": relation.bindingdb_link
       # article_doi: relation.article_doi
       # patent_number: relation.patent_number
     uniprot-subject:
       UniProtKB: subject.uniprot.accession  ## no prefix
       "biolink:original_subject": subject.name
       ref_pmid: relation.pmid                          ## no prefix
+      ref_url: relation.bindingdb_link
       ## commenting out because data-processing / biolink-modeling issues
       # in_taxon: subject.organism
       # bindingdb_curation_datasource: relation.curation_datasource
-      # "biolink:source_web_page": relation.bindingdb_link
       # article_doi: relation.article_doi
       # patent_number: relation.patent_number
diff --git a/litvar/smartapi.yaml b/litvar/smartapi.yaml
index 2976a883..53293991 100644
--- a/litvar/smartapi.yaml
+++ b/litvar/smartapi.yaml
@@ -69,5 +69,4 @@ components:
   x-bte-response-mapping:
     variant_located_in_gene:
       NCBIGene: gene.id  ## no prefix
-      ## commenting out because data-processing / biolink-modeling issues
-      # "biolink:source_web_page": links.url
+      ref_url: links.url
diff --git a/mychem.info/openapi_full.yml b/mychem.info/openapi_full.yml
index a4a69997..ed881112 100644
--- a/mychem.info/openapi_full.yml
+++ b/mychem.info/openapi_full.yml
@@ -632,11 +632,11 @@ components:
     drugMechChembl_EnsemblOutput:
       ENSEMBL: chembl.drug_mechanisms.target_components.ensembl_gene  ## no prefix
       ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
+      ref_url: chembl.drug_mechanisms.mechanism_refs.url
       output_name: chembl.drug_mechanisms.target_name
       ## commenting out because data-processing / biolink-modeling issues
       # chembl_drug_action_type: chembl.drug_mechanisms.action_type
       # chembl_binding_site_name: chembl.drug_mechanisms.binding_site_name
-      # reference_url: chembl.drug_mechanisms.mechanism_refs.url
       # reference_isbn_note: chembl.drug_mechanisms.mechanism_refs.ISBN
       # in_taxon: chembl.drug_mechanisms.target_organism
       # chembl_target_type: chembl.drug_mechanisms.target_type
@@ -646,22 +646,22 @@ components:
     drugMechChembl_UniprotOutput:
       UniProtKB: chembl.drug_mechanisms.target_components.uniprot  ## no prefix
       ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
+      ref_url: chembl.drug_mechanisms.mechanism_refs.url
       output_name: chembl.drug_mechanisms.target_name
       ## commenting out because data-processing / biolink-modeling issues
       # chembl_drug_action_type: chembl.drug_mechanisms.action_type
       # chembl_binding_site_name: chembl.drug_mechanisms.binding_site_name
-      # reference_url: chembl.drug_mechanisms.mechanism_refs.url
       # reference_isbn_note: chembl.drug_mechanisms.mechanism_refs.ISBN
       # in_taxon: chembl.drug_mechanisms.target_organism
       # chembl_target_type: chembl.drug_mechanisms.target_type
     chembl-treats:
       MESH: chembl.drug_indications.mesh_id
+      ref_url: chembl.drug_indications.indication_refs.url
       ## commenting out because data-processing / biolink-modeling issues
       # ## edge attributes
       # max_clinical_phase_for_indication: chembl.drug_indications.max_phase_for_ind
       # year_first_approved: chembl.drug_indications.first_approval
       # ## provenance-related field. Sources are ATC, ClinicalTrial, DailyMed, FDA
-      # "biolink:source_web_page": chembl.drug_indications.indication_refs.url
       # chembl_source: chembl.drug_indications.indication_refs.type
     chembl:
       "CHEMBL.COMPOUND": chembl.molecule_chembl_id  ## no prefix
@@ -903,6 +903,7 @@ components:
         #   chembl.drug_mechanisms.target_type
         fields: >-
           chembl.drug_mechanisms.target_components.ensembl_gene,
+          chembl.drug_mechanisms.mechanism_refs.url,
           chembl.drug_mechanisms.mechanism_refs.PubMed,
           chembl.drug_mechanisms.target_name
         size: 1000  ## note size limit; added just in case
@@ -968,6 +969,7 @@ components:
         #   chembl.drug_mechanisms.target_type
         fields: >-
           chembl.drug_mechanisms.target_components.uniprot,
+          chembl.drug_mechanisms.mechanism_refs.url,
           chembl.drug_mechanisms.mechanism_refs.PubMed,
           chembl.drug_mechanisms.target_name
         size: 1000  ## note size limit; added just in case
@@ -1026,7 +1028,9 @@ components:
         #   chembl.drug_indications.indication_refs.url,
         #   chembl.drug_indications.indication_refs.type,
         #   chembl.drug_indications.first_approval
-        fields: chembl.drug_indications.mesh_id
+        fields: >-
+          chembl.drug_indications.mesh_id,
+          chembl.drug_indications.indication_refs.url
         size: 1000  ## note size limit; added just in case
       predicate: treats
       source: "infores:chembl"
diff --git a/mydisease.info/smartapi.yaml b/mydisease.info/smartapi.yaml
index b3dc89e6..1f7e467e 100644
--- a/mydisease.info/smartapi.yaml
+++ b/mydisease.info/smartapi.yaml
@@ -628,7 +628,8 @@ components:
     disease-phenotype:
       HP: hpo.phenotype_related_to_disease.hpo_id                                   ## HAS PREFIX (HP)
       ## note: there are 6 reference-related fields (parser created based on ID namespace)
-      ref_pmid: hpo.phenotype_related_to_disease.pmid_refs                            ## HAS PREFIX (PMID)
+      ref_pmid: hpo.phenotype_related_to_disease.pmid_refs                          ## HAS PREFIX (PMID)
+      ref_url: hpo.phenotype_related_to_disease.website_refs
       ## note: there are 4 frequency-related fields (parser created based on different data types)
       "biolink:has_quotient": hpo.phenotype_related_to_disease.numeric_freq         ## decimal freq
       "biolink:frequency_qualifier": hpo.phenotype_related_to_disease.hp_freq       ## HP ontology freq term
@@ -637,7 +638,6 @@ components:
       ## commenting out because data-processing / biolink-modeling issues
       # ## note: there are 6 reference-related fields (parser created based on ID namespace)
       # isbn_refs: hpo.phenotype_related_to_disease.isbn_refs                         ## HAS PREFIX (ISBN)
-      # "biolink:source_web_page": hpo.phenotype_related_to_disease.website_refs      ## no prefix
       # decipher_refs: hpo.phenotype_related_to_disease.decipher_refs                 ## HAS PREFIX (DECIPHER)
       # omim_refs: hpo.phenotype_related_to_disease.omim_refs                         ## HAS PREFIX (OMIM)
       # orphanet_refs: hpo.phenotype_related_to_disease.orphanet_refs                 ## HAS PREFIX (ORPHANET)
@@ -809,6 +809,7 @@ components:
           fields: >-
             hpo.phenotype_related_to_disease.hpo_id,
             hpo.phenotype_related_to_disease.pmid_refs,
+            hpo.phenotype_related_to_disease.website_refs,
             hpo.phenotype_related_to_disease.numeric_freq,
             hpo.phenotype_related_to_disease.hp_freq,
             hpo.phenotype_related_to_disease.freq_numerator,
@@ -858,6 +859,7 @@ components:
           fields: >-
             hpo.phenotype_related_to_disease.hpo_id,
             hpo.phenotype_related_to_disease.pmid_refs,
+            hpo.phenotype_related_to_disease.website_refs,
             hpo.phenotype_related_to_disease.numeric_freq,
             hpo.phenotype_related_to_disease.hp_freq,
             hpo.phenotype_related_to_disease.freq_numerator,
diff --git a/mygene.info/openapi_full.yml b/mygene.info/openapi_full.yml
index 9496ff08..d31bf75e 100644
--- a/mygene.info/openapi_full.yml
+++ b/mygene.info/openapi_full.yml
@@ -1253,10 +1253,10 @@ components:
       # go_qualifier: go.CC.qualifier  ## seems to be like a relation
     clingen: 
       MONDO: clingen.clinical_validity.mondo  ## HAS PREFIX (MONDO)
+      ref_url: clingen.clinical_validity.online_report
       ## commenting out because data-processing / biolink-modeling issues
       # ## possible values for classification: definitive, moderate, limited, disputed, refuted (!!!), no known disease relationship (!!!)
       # clingen_clinical_validity_classification: clingen.clinical_validity.classification  
-      # "biolink:source_web_page": clingen.clinical_validity.online_report
     pantherMGI: 
       MGI: pantherdb.ortholog.MGI  ## no prefix
       ## commenting out because data-processing / biolink-modeling issues
diff --git a/myvariant.info/openapi_full.yml b/myvariant.info/openapi_full.yml
index 5bc01922..54e87d37 100644
--- a/myvariant.info/openapi_full.yml
+++ b/myvariant.info/openapi_full.yml
@@ -702,11 +702,11 @@ components:
     docm-disease-variant-start:
       DOID: docm.doid
       input_name: _id      ## variant name
+      ref_url: docm.url        ## issue: sometimes it's just `null`
       ## commenting out because data-processing / biolink-modeling issues
       ## not taking notes on a bunch of fields about the gene and variant
       # ref_pmid: docm.pubmed_id   ## no prefix. issue: lists are ", "-delimited strings
       # docm_source: docm.source   ## issue: sometimes it's just the value "-" or `null`
-      # source_url: docm.url       ##: sometimes it's just `null`
       # variant_consequence_type: docm.trv_type
       # variant_type: docm.type
       # docm_variant_in_gene: docm.genename
@@ -1138,7 +1138,7 @@ components:
         #   docm.doid,
         #   _id,
         #   docm.genename
-        fields: docm.doid,_id
+        fields: docm.doid,_id,docm.url
         size: 1000  ## note size limit; added just in case
       predicate: related_to
       ## it's curated but there's no relationship info...
diff --git a/ncats_rare_source/smartapi.yaml b/ncats_rare_source/smartapi.yaml
index 22bf2e70..5133ff61 100644
--- a/ncats_rare_source/smartapi.yaml
+++ b/ncats_rare_source/smartapi.yaml
@@ -602,10 +602,8 @@ components:
       - id: ORPHANET
         semantic: Disease
       parameters:
-      ## commenting out because data-processing / biolink-modeling issues
       ## orphanet ID has no prefix
-        # fields: raresource.disease.orphanet,raresource.disease.cooccurrence_url
-        fields: raresource.disease.orphanet
+        fields: raresource.disease.orphanet,raresource.disease.cooccurrence_url
         size: 1000              ## note size limit
       predicate: gene_associated_with_condition
       source: "infores:rare-source"
@@ -653,10 +651,8 @@ components:
       - id: UMLS
         semantic: Disease
       parameters:
-      ## commenting out because data-processing / biolink-modeling issues
       ## umls ID has no prefix
-        # fields: raresource.disease.umls,raresource.disease.cooccurrence_url
-        fields: raresource.disease.umls
+        fields: raresource.disease.umls,raresource.disease.cooccurrence_url
         size: 1000              ## note size limit
       predicate: gene_associated_with_condition
       source: "infores:rare-source"
@@ -697,13 +693,11 @@ components:
   ##   when they're supposed to show up only on the edge they correspond to
     diseaseOrphanet-object:
       ORPHANET: raresource.disease.orphanet    ## no prefix
-      ## commenting out because data-processing / biolink-modeling issues
-      # "biolink:xref": raresource.disease.cooccurrence_url    
-      # ## this url leads to a webpage with literature supporting the gene-disease relationship 
+      ref_url: raresource.disease.cooccurrence_url
+      ## this url leads to a webpage with literature supporting the gene-disease relationship 
     diseaseUMLS-object:
       UMLS: raresource.disease.umls    ## no prefix
-      ## commenting out because data-processing / biolink-modeling issues
-      # "biolink:xref": raresource.disease.cooccurrence_url    
+      ref_url: raresource.disease.cooccurrence_url
     gene-object:
       NCBIGene: entrezgene    ## no prefix
       output_name: symbol   ## SRI Node Norm sometimes doesn't know the NCBIGene name. Using "gene" symbol as name
diff --git a/pfocr/smartapi.yaml b/pfocr/smartapi.yaml
index 71925022..7da22f32 100644
--- a/pfocr/smartapi.yaml
+++ b/pfocr/smartapi.yaml
@@ -598,7 +598,7 @@ components:
           # fields: >-
           #   associatedWith.mentions.chemicals.mesh,
           #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.chemicals.mesh
+          fields: associatedWith.mentions.chemicals.mesh,associatedWith.figureUrl
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -628,7 +628,7 @@ components:
           # fields: >-
           #   associatedWith.mentions.genes.ncbigene,
           #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.genes.ncbigene
+          fields: associatedWith.mentions.genes.ncbigene,associatedWith.figureUrl
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -660,7 +660,7 @@ components:
           # fields: >-
           #   associatedWith.mentions.chemicals.mesh,
           #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.chemicals.mesh
+          fields: associatedWith.mentions.chemicals.mesh,associatedWith.figureUrl
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -690,7 +690,7 @@ components:
           # fields: >-
           #   associatedWith.mentions.diseases.mesh,
           #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.diseases.mesh
+          fields: associatedWith.mentions.diseases.mesh,associatedWith.figureUrl
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -722,7 +722,7 @@ components:
           # fields: >-
           #   associatedWith.mentions.diseases.mesh,
           #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.genes.ncbigene
+          fields: associatedWith.mentions.genes.ncbigene,associatedWith.figureUrl
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -752,7 +752,7 @@ components:
           # fields: >-
           #   associatedWith.mentions.diseases.mesh,
           #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.diseases.mesh
+          fields: associatedWith.mentions.diseases.mesh,associatedWith.figureUrl
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -767,19 +767,19 @@ components:
     ## not easy to make it clear with biolink-model terms: the publication (PMC ID) vs the figure (title and url)
     chem:
       MESH: associatedWith.mentions.chemicals.mesh   ## no prefix
+      ref_url: associatedWith.figureUrl
       ## commenting out because data-processing / biolink-modeling issues
-      # figure_download_url: associatedWith.figureUrl
       # figure_title: associatedWith.title
       # pmc_reference: associatedWith.pmc
     gene:
       NCBIGene: associatedWith.mentions.genes.ncbigene   ## no prefix
+      ref_url: associatedWith.figureUrl
       ## commenting out because data-processing / biolink-modeling issues
-      # figure_download_url: associatedWith.figureUrl
       # figure_title: associatedWith.title
       # pmc_reference: associatedWith.pmc
     disease:
       MESH: associatedWith.mentions.diseases.mesh   ## no prefix
+      ref_url: associatedWith.figureUrl
       ## commenting out because data-processing / biolink-modeling issues
-      # figure_download_url: associatedWith.figureUrl
       # figure_title: associatedWith.title
       # pmc_reference: associatedWith.pmc
diff --git a/pharmgkb/smartapi.yaml b/pharmgkb/smartapi.yaml
index c6d2f73e..4f3c59e5 100644
--- a/pharmgkb/smartapi.yaml
+++ b/pharmgkb/smartapi.yaml
@@ -1988,15 +1988,14 @@ components:
       input_name: data.name
     guidelineEnd-gene:
       "PHARMGKB.GENE": data.relatedGenes.id
-      ## guideline annotation info
-      "biolink:supporting_text": data.summaryMarkdown.html
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url (sometimes an expanded PMID, PMCID)
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
       # ## provenance
-      # guideline_literature_urls: data.literature._sameAs
       # clinical_guideline_source: data.source      ## probably an enum of {cpic, dpwg, pro}
       # ## guideline annotation info
+      # guideline_summary: data.summaryMarkdown.html
       # clinical_guideline_name: data.name
       # flag_has_recommendation: data.recommendation
       # flag_has_alternative_drug_info: data.alternateDrugAvailable
@@ -2006,15 +2005,14 @@ components:
       # flag_has_pediatric_information: data.pediatric
     guidelineEnd-chem:
       "PHARMGKB.CHEMICAL": data.relatedChemicals.id
-      ## guideline annotation info
-      "biolink:supporting_text": data.summaryMarkdown.html
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url (sometimes an expanded PMID, PMCID)
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
       # ## provenance
-      # guideline_literature_urls: data.literature._sameAs
       # clinical_guideline_source: data.source      ## probably an enum of {cpic, dpwg, pro}
       # ## guideline annotation info
+      # guideline_summary: data.summaryMarkdown.html
       # clinical_guideline_name: data.name
       # flag_has_recommendation: data.recommendation
       # flag_has_alternative_drug_info: data.alternateDrugAvailable
@@ -2025,15 +2023,14 @@ components:
     labelEnd-gene:
       ## not sure if prescribingGenes provides different info or not. using relatedGenes section instead
       "PHARMGKB.GENE": data.relatedGenes.id
-      ## label annotation info
-      "biolink:supporting_text": data.summaryMarkdown.html
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
       # ## provenance
-      # label_literature_urls: data.literature._sameAs
       # label_source: data.source      ## probably an enum of {fda, ema, pmda, hcsc}
       # ## label annotation info
+      # label_summary: data.summaryMarkdown.html
       # label_title: data.name
       # pgx_level: data.testing.term
       # flag_has_alternative_drug_info: data.alternateDrugAvailable
@@ -2042,15 +2039,14 @@ components:
       # flag_has_pediatric_information: data.pediatric
     labelEnd-chem:
       "PHARMGKB.CHEMICAL": data.relatedChemicals.id
-      ## label annotation info
-      "biolink:supporting_text": data.summaryMarkdown.html
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
       # ## provenance
-      # label_literature_urls: data.literature._sameAs
       # label_source: data.source      ## probably an enum of {fda, ema, pmda, hcsc}
       # ## label annotation info
+      # label_summary: data.summaryMarkdown.html
       # label_title: data.name
       # pgx_level: data.testing.term
       # flag_has_alternative_drug_info: data.alternateDrugAvailable
@@ -2087,12 +2083,12 @@ components:
     variantAnnotEnd-chem:
       "PHARMGKB.CHEMICAL": data.relatedChemicals.id
       ## provenance
-      "biolink:supporting_text": data.sentence
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url
       ## commenting out because data-processing / biolink-modeling issues
       # related_genes: data.location.genes.symbol
       # flag_significant_association: data.significance.term  ## yes/no values?
       # ## provenance
-      # annotation_literature_urls: data.literature._sameAs
+      # pharmgkb_sentence_summary: data.sentence
       # description: data.description
       # ## phenotypic effect
       # phenotype_name: data.phenotypes.phenotype
@@ -2104,12 +2100,12 @@ components:
     variantAnnotEnd-variant:
       DBSNP: data.location.rsid
       ## provenance
-      "biolink:supporting_text": data.sentence
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url
       ## commenting out because data-processing / biolink-modeling issues
       # related_genes: data.location.genes.symbol
       # flag_significant_association: data.significance.term  ## yes/no values?
       # ## provenance
-      # annotation_literature_urls: data.literature._sameAs
+      # pharmgkb_sentence_summary: data.sentence
       # description: data.description
       # ## phenotypic effect
       # phenotype_name: data.phenotypes.phenotype

From ad41f3147dded47900531e1805c4096b652c751b Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Tue, 25 Jul 2023 00:04:11 -0700
Subject: [PATCH 04/11] mychem,myvariant,pharmgkb: expand annotation beyond
 urls, add comments when urls could be expansions of curies

---
 mychem.info/openapi_full.yml    | 56 +++++++++++++++++++++++++--------
 myvariant.info/openapi_full.yml |  2 +-
 pharmgkb/smartapi.yaml          | 12 +++----
 3 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/mychem.info/openapi_full.yml b/mychem.info/openapi_full.yml
index ed881112..fbdac44b 100644
--- a/mychem.info/openapi_full.yml
+++ b/mychem.info/openapi_full.yml
@@ -630,39 +630,63 @@ components:
     chebiXrefs-rhea:
       RHEA: chebi.xrefs.rhea
     drugMechChembl_EnsemblOutput:
-      ENSEMBL: chembl.drug_mechanisms.target_components.ensembl_gene  ## no prefix
-      ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
+      ENSEMBL: chembl.drug_mechanisms.target_components.ensembl_gene               ## no prefix
+      ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed                       ## no prefix
+      ref_clinicaltrials: chembl.drug_mechanisms.mechanism_refs.ClinicalTrials     ## no prefix
+      ref_doi: chembl.drug_mechanisms.mechanism_refs.DOI                           ## no prefix
+      ref_pmc: chembl.drug_mechanisms.mechanism_refs.PMC                           ## no prefix (europepcm IDs)
+      ## this url field will include expanded PMID / clinicaltrials / doi / PMC (aka duplicates!)
+      ##   but is required for other references: 
+      ##   DailyMed, Expert, FDA, ISBN,
+      ##   IUPHAR, InterPro, KEGG, Other,
+      ##   Patent, PubChem, UniProt, Wikipedia
+      ## notes on some specific fields:
+      ## - chembl.drug_mechanisms.mechanism_refs.ISBN: includes ID and page number
+      ## - chembl.drug_mechanisms.mechanism_refs.Other: seem to be urls
+      ## - chembl.drug_mechanisms.mechanism_refs.Patent: related urls seem to be from surechembl
+      ## - chembl.drug_mechanisms.mechanism_refs.PubMed: related urls seem to be from europepcm...
       ref_url: chembl.drug_mechanisms.mechanism_refs.url
       output_name: chembl.drug_mechanisms.target_name
       ## commenting out because data-processing / biolink-modeling issues
       # chembl_drug_action_type: chembl.drug_mechanisms.action_type
       # chembl_binding_site_name: chembl.drug_mechanisms.binding_site_name
-      # reference_isbn_note: chembl.drug_mechanisms.mechanism_refs.ISBN
       # in_taxon: chembl.drug_mechanisms.target_organism
       # chembl_target_type: chembl.drug_mechanisms.target_type
     drugMechChembl-rev:
       "CHEMBL.COMPOUND": chembl.molecule_chembl_id  ## no prefix
       input_name: chembl.drug_mechanisms.target_name
     drugMechChembl_UniprotOutput:
-      UniProtKB: chembl.drug_mechanisms.target_components.uniprot  ## no prefix
-      ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed  ## no prefix
+      UniProtKB: chembl.drug_mechanisms.target_components.uniprot                  ## no prefix
+      ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed                       ## no prefix
+      ref_clinicaltrials: chembl.drug_mechanisms.mechanism_refs.ClinicalTrials     ## no prefix
+      ref_doi: chembl.drug_mechanisms.mechanism_refs.DOI                           ## no prefix
+      ref_pmc: chembl.drug_mechanisms.mechanism_refs.PMC                           ## no prefix (europepcm IDs)
+      ## this url field will include expanded PMID / clinicaltrials / doi / PMC (aka duplicates!)
+      ##   but is required for other references: 
+      ##   DailyMed, Expert, FDA, ISBN,
+      ##   IUPHAR, InterPro, KEGG, Other,
+      ##   Patent, PubChem, UniProt, Wikipedia
+      ## notes on some specific fields:
+      ## - chembl.drug_mechanisms.mechanism_refs.ISBN: includes ID and page number
+      ## - chembl.drug_mechanisms.mechanism_refs.Other: seem to be urls
+      ## - chembl.drug_mechanisms.mechanism_refs.Patent: related urls seem to be from surechembl
+      ## - chembl.drug_mechanisms.mechanism_refs.PubMed: related urls seem to be from europepcm...
       ref_url: chembl.drug_mechanisms.mechanism_refs.url
       output_name: chembl.drug_mechanisms.target_name
       ## commenting out because data-processing / biolink-modeling issues
       # chembl_drug_action_type: chembl.drug_mechanisms.action_type
       # chembl_binding_site_name: chembl.drug_mechanisms.binding_site_name
-      # reference_isbn_note: chembl.drug_mechanisms.mechanism_refs.ISBN
       # in_taxon: chembl.drug_mechanisms.target_organism
       # chembl_target_type: chembl.drug_mechanisms.target_type
     chembl-treats:
       MESH: chembl.drug_indications.mesh_id
-      ref_url: chembl.drug_indications.indication_refs.url
+      ref_clinicaltrials: chembl.drug_mechanisms.mechanism_refs.ClinicalTrials     ## no prefix
+      ## this url field will include expanded clinicaltrials (aka duplicates!)
+      ##   but is required for other references: ATC, DailyMed, FDA
+      ref_url: chembl.drug_mechanisms.mechanism_refs.url
       ## commenting out because data-processing / biolink-modeling issues
-      # ## edge attributes
       # max_clinical_phase_for_indication: chembl.drug_indications.max_phase_for_ind
       # year_first_approved: chembl.drug_indications.first_approval
-      # ## provenance-related field. Sources are ATC, ClinicalTrial, DailyMed, FDA
-      # chembl_source: chembl.drug_indications.indication_refs.type
     chembl:
       "CHEMBL.COMPOUND": chembl.molecule_chembl_id  ## no prefix
     drugcentral-bioactivity:
@@ -903,8 +927,11 @@ components:
         #   chembl.drug_mechanisms.target_type
         fields: >-
           chembl.drug_mechanisms.target_components.ensembl_gene,
-          chembl.drug_mechanisms.mechanism_refs.url,
           chembl.drug_mechanisms.mechanism_refs.PubMed,
+          chembl.drug_mechanisms.mechanism_refs.ClinicalTrials,
+          chembl.drug_mechanisms.mechanism_refs.DOI,
+          chembl.drug_mechanisms.mechanism_refs.PMC,
+          chembl.drug_mechanisms.mechanism_refs.url,
           chembl.drug_mechanisms.target_name
         size: 1000  ## note size limit; added just in case
       ## using the same predicate as dgidb's no-relationship-specified. 
@@ -969,8 +996,11 @@ components:
         #   chembl.drug_mechanisms.target_type
         fields: >-
           chembl.drug_mechanisms.target_components.uniprot,
-          chembl.drug_mechanisms.mechanism_refs.url,
           chembl.drug_mechanisms.mechanism_refs.PubMed,
+          chembl.drug_mechanisms.mechanism_refs.ClinicalTrials,
+          chembl.drug_mechanisms.mechanism_refs.DOI,
+          chembl.drug_mechanisms.mechanism_refs.PMC,
+          chembl.drug_mechanisms.mechanism_refs.url,
           chembl.drug_mechanisms.target_name
         size: 1000  ## note size limit; added just in case
       ## see drugMechChemblEnsembl operation for more info on predicate
@@ -1026,10 +1056,10 @@ components:
         #   chembl.drug_indications.mesh_id,
         #   chembl.drug_indications.max_phase_for_ind,
         #   chembl.drug_indications.indication_refs.url,
-        #   chembl.drug_indications.indication_refs.type,
         #   chembl.drug_indications.first_approval
         fields: >-
           chembl.drug_indications.mesh_id,
+          chembl.drug_mechanisms.mechanism_refs.ClinicalTrials,
           chembl.drug_indications.indication_refs.url
         size: 1000  ## note size limit; added just in case
       predicate: treats
diff --git a/myvariant.info/openapi_full.yml b/myvariant.info/openapi_full.yml
index 54e87d37..0a2368a8 100644
--- a/myvariant.info/openapi_full.yml
+++ b/myvariant.info/openapi_full.yml
@@ -702,7 +702,7 @@ components:
     docm-disease-variant-start:
       DOID: docm.doid
       input_name: _id      ## variant name
-      ref_url: docm.url        ## issue: sometimes it's just `null`
+      ref_url: docm.url        ## only MyCancerGenome urls? Also, an issue: sometimes it's just `null`
       ## commenting out because data-processing / biolink-modeling issues
       ## not taking notes on a bunch of fields about the gene and variant
       # ref_pmid: docm.pubmed_id   ## no prefix. issue: lists are ", "-delimited strings
diff --git a/pharmgkb/smartapi.yaml b/pharmgkb/smartapi.yaml
index 4f3c59e5..03b29a5b 100644
--- a/pharmgkb/smartapi.yaml
+++ b/pharmgkb/smartapi.yaml
@@ -1988,7 +1988,7 @@ components:
       input_name: data.name
     guidelineEnd-gene:
       "PHARMGKB.GENE": data.relatedGenes.id
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url (sometimes an expanded PMID, PMCID)
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url - often an expanded PMID or PMCID (NLM)
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
@@ -2005,7 +2005,7 @@ components:
       # flag_has_pediatric_information: data.pediatric
     guidelineEnd-chem:
       "PHARMGKB.CHEMICAL": data.relatedChemicals.id
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url (sometimes an expanded PMID, PMCID)
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url - often an expanded PMID or PMCID (NLM)
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
@@ -2023,7 +2023,7 @@ components:
     labelEnd-gene:
       ## not sure if prescribingGenes provides different info or not. using relatedGenes section instead
       "PHARMGKB.GENE": data.relatedGenes.id
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url (NOT expanded PMID/PMCID)
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
@@ -2039,7 +2039,7 @@ components:
       # flag_has_pediatric_information: data.pediatric
     labelEnd-chem:
       "PHARMGKB.CHEMICAL": data.relatedChemicals.id
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url (NOT expanded PMID/PMCID)
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
@@ -2083,7 +2083,7 @@ components:
     variantAnnotEnd-chem:
       "PHARMGKB.CHEMICAL": data.relatedChemicals.id
       ## provenance
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url that's an expanded PMID or PMCID (NLM)
       ## commenting out because data-processing / biolink-modeling issues
       # related_genes: data.location.genes.symbol
       # flag_significant_association: data.significance.term  ## yes/no values?
@@ -2100,7 +2100,7 @@ components:
     variantAnnotEnd-variant:
       DBSNP: data.location.rsid
       ## provenance
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url
+      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url that's an expanded PMID or PMCID (NLM)
       ## commenting out because data-processing / biolink-modeling issues
       # related_genes: data.location.genes.symbol
       # flag_significant_association: data.significance.term  ## yes/no values?

From 7ab848f85dc824de4070473734ef268dffa903a6 Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Tue, 25 Jul 2023 00:09:58 -0700
Subject: [PATCH 05/11] mygene: add field retrieval for ref_url field

---
 mygene.info/openapi_full.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mygene.info/openapi_full.yml b/mygene.info/openapi_full.yml
index d31bf75e..f0662f7c 100644
--- a/mygene.info/openapi_full.yml
+++ b/mygene.info/openapi_full.yml
@@ -1111,7 +1111,7 @@ components:
           #   clingen.clinical_validity.mondo,
           #   clingen.clinical_validity.classification,
           #   clingen.clinical_validity.online_report
-          fields: clingen.clinical_validity.mondo
+          fields: clingen.clinical_validity.mondo,clingen.clinical_validity.online_report
           species: human
           size: 1000
         ## using related_to because....

From 4ee3bd7a66620d55f50eb0feadb634ba45f51e88 Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Tue, 25 Jul 2023 16:57:49 -0700
Subject: [PATCH 06/11] multiple service provider kps: add keyword ref_pmcid
 for PMC fields, adjust comments

comments on publications / urls
---
 mychem.info/openapi_full.yml | 30 ++++-------------
 mydisease.info/smartapi.yaml |  1 -
 pfocr/smartapi.yaml          | 64 +++++++++++++-----------------------
 pharmgkb/smartapi.yaml       | 24 +++++++++-----
 4 files changed, 45 insertions(+), 74 deletions(-)

diff --git a/mychem.info/openapi_full.yml b/mychem.info/openapi_full.yml
index fbdac44b..9bb3e96c 100644
--- a/mychem.info/openapi_full.yml
+++ b/mychem.info/openapi_full.yml
@@ -634,8 +634,8 @@ components:
       ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed                       ## no prefix
       ref_clinicaltrials: chembl.drug_mechanisms.mechanism_refs.ClinicalTrials     ## no prefix
       ref_doi: chembl.drug_mechanisms.mechanism_refs.DOI                           ## no prefix
-      ref_pmc: chembl.drug_mechanisms.mechanism_refs.PMC                           ## no prefix (europepcm IDs)
-      ## this url field will include expanded PMID / clinicaltrials / doi / PMC (aka duplicates!)
+      ref_pmcid: chembl.drug_mechanisms.mechanism_refs.PMC                         ## no prefix (but IDs start with "PMC")
+      ## this url field will include expanded PMID / clinicaltrials / doi / PMCID (aka duplicates!)
       ##   but is required for other references: 
       ##   DailyMed, Expert, FDA, ISBN,
       ##   IUPHAR, InterPro, KEGG, Other,
@@ -656,28 +656,14 @@ components:
       "CHEMBL.COMPOUND": chembl.molecule_chembl_id  ## no prefix
       input_name: chembl.drug_mechanisms.target_name
     drugMechChembl_UniprotOutput:
+    ## basically the same response-mapping as drugMechChembl_EnsemblOutput
       UniProtKB: chembl.drug_mechanisms.target_components.uniprot                  ## no prefix
       ref_pmid: chembl.drug_mechanisms.mechanism_refs.PubMed                       ## no prefix
       ref_clinicaltrials: chembl.drug_mechanisms.mechanism_refs.ClinicalTrials     ## no prefix
       ref_doi: chembl.drug_mechanisms.mechanism_refs.DOI                           ## no prefix
-      ref_pmc: chembl.drug_mechanisms.mechanism_refs.PMC                           ## no prefix (europepcm IDs)
-      ## this url field will include expanded PMID / clinicaltrials / doi / PMC (aka duplicates!)
-      ##   but is required for other references: 
-      ##   DailyMed, Expert, FDA, ISBN,
-      ##   IUPHAR, InterPro, KEGG, Other,
-      ##   Patent, PubChem, UniProt, Wikipedia
-      ## notes on some specific fields:
-      ## - chembl.drug_mechanisms.mechanism_refs.ISBN: includes ID and page number
-      ## - chembl.drug_mechanisms.mechanism_refs.Other: seem to be urls
-      ## - chembl.drug_mechanisms.mechanism_refs.Patent: related urls seem to be from surechembl
-      ## - chembl.drug_mechanisms.mechanism_refs.PubMed: related urls seem to be from europepcm...
+      ref_pmcid: chembl.drug_mechanisms.mechanism_refs.PMC                         ## no prefix (but IDs start with "PMC")
       ref_url: chembl.drug_mechanisms.mechanism_refs.url
       output_name: chembl.drug_mechanisms.target_name
-      ## commenting out because data-processing / biolink-modeling issues
-      # chembl_drug_action_type: chembl.drug_mechanisms.action_type
-      # chembl_binding_site_name: chembl.drug_mechanisms.binding_site_name
-      # in_taxon: chembl.drug_mechanisms.target_organism
-      # chembl_target_type: chembl.drug_mechanisms.target_type
     chembl-treats:
       MESH: chembl.drug_indications.mesh_id
       ref_clinicaltrials: chembl.drug_mechanisms.mechanism_refs.ClinicalTrials     ## no prefix
@@ -909,11 +895,7 @@ components:
       - id: ENSEMBL
         semantic: Gene
       parameters:
-      ## chembl.drug_mechanisms.mechanism_refs.url will get info from:
-      ##   ClinicalTrials, DOI, DailyMed, Expert, FDA, ISBN, IUPHAR, InterPro, KEGG, Other, 
-      ##   PMC, Patent, PubChem, Pubmed, UniProt, Wikipedia
-      ## chembl.drug_mechanisms.mechanism_refs.ISBN will include page number. ex: "9780702034718 PP. 164"
-      ## chembl.drug_mechanisms.mechanism_refs.PubMed will include only the PMID (no prefix)
+      ## see response-mapping for more info on fields
       ## commenting out because data-processing / biolink-modeling issues
         # fields: >-
         #   chembl.drug_mechanisms.target_components.ensembl_gene,
@@ -982,7 +964,7 @@ components:
       - id: UniProtKB
         semantic: Gene
       parameters:
-      ## see drugMechChemblEnsembl operation for more info on fields
+      ## see response-mapping drugMechChembl_EnsemblOutput for more info on fields
       ## commenting out because data-processing / biolink-modeling issues
         # fields: >-
         #   chembl.drug_mechanisms.target_components.uniprot,
diff --git a/mydisease.info/smartapi.yaml b/mydisease.info/smartapi.yaml
index 1f7e467e..695a4d1d 100644
--- a/mydisease.info/smartapi.yaml
+++ b/mydisease.info/smartapi.yaml
@@ -627,7 +627,6 @@ components:
       # disgenet-score: disgenet.variants_related_to_disease.score
     disease-phenotype:
       HP: hpo.phenotype_related_to_disease.hpo_id                                   ## HAS PREFIX (HP)
-      ## note: there are 6 reference-related fields (parser created based on ID namespace)
       ref_pmid: hpo.phenotype_related_to_disease.pmid_refs                          ## HAS PREFIX (PMID)
       ref_url: hpo.phenotype_related_to_disease.website_refs
       ## note: there are 4 frequency-related fields (parser created based on different data types)
diff --git a/pfocr/smartapi.yaml b/pfocr/smartapi.yaml
index 7da22f32..c8efcb4f 100644
--- a/pfocr/smartapi.yaml
+++ b/pfocr/smartapi.yaml
@@ -594,11 +594,9 @@ components:
           - id: MESH
             semantic: SmallMolecule
         parameters:
-        ## commenting out because data-processing / biolink-modeling issues
-          # fields: >-
-          #   associatedWith.mentions.chemicals.mesh,
-          #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.chemicals.mesh,associatedWith.figureUrl
+          fields: >-
+            associatedWith.mentions.chemicals.mesh,
+            associatedWith.figureUrl,associatedWith.pmc
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -624,11 +622,9 @@ components:
           - id: NCBIGene
             semantic: Gene
         parameters:
-        ## commenting out because data-processing / biolink-modeling issues
-          # fields: >-
-          #   associatedWith.mentions.genes.ncbigene,
-          #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.genes.ncbigene,associatedWith.figureUrl
+          fields: >-
+            associatedWith.mentions.genes.ncbigene,
+            associatedWith.figureUrl,associatedWith.pmc
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -656,11 +652,9 @@ components:
           - id: MESH
             semantic: SmallMolecule
         parameters:
-        ## commenting out because data-processing / biolink-modeling issues
-          # fields: >-
-          #   associatedWith.mentions.chemicals.mesh,
-          #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.chemicals.mesh,associatedWith.figureUrl
+          fields: >-
+            associatedWith.mentions.chemicals.mesh,
+            associatedWith.figureUrl,associatedWith.pmc
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -686,11 +680,9 @@ components:
           - id: MESH
             semantic: Disease
         parameters:
-        ## commenting out because data-processing / biolink-modeling issues
-          # fields: >-
-          #   associatedWith.mentions.diseases.mesh,
-          #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.diseases.mesh,associatedWith.figureUrl
+          fields: >-
+            associatedWith.mentions.diseases.mesh,
+            associatedWith.figureUrl,associatedWith.pmc
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -718,11 +710,9 @@ components:
           - id: NCBIGene
             semantic: Gene
         parameters:
-        ## commenting out because data-processing / biolink-modeling issues
-          # fields: >-
-          #   associatedWith.mentions.diseases.mesh,
-          #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.genes.ncbigene,associatedWith.figureUrl
+          fields: >-
+            associatedWith.mentions.genes.ncbigene,
+            associatedWith.figureUrl,associatedWith.pmc
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -748,11 +738,9 @@ components:
           - id: MESH
             semantic: Disease
         parameters:
-        ## commenting out because data-processing / biolink-modeling issues
-          # fields: >-
-          #   associatedWith.mentions.diseases.mesh,
-          #   associatedWith.figureUrl,associatedWith.pmc,associatedWith.title
-          fields: associatedWith.mentions.diseases.mesh,associatedWith.figureUrl
+          fields: >-
+            associatedWith.mentions.diseases.mesh,
+            associatedWith.figureUrl,associatedWith.pmc
           size: 1000
         predicate: occurs_together_in_literature_with
         source: "infores:pfocr"
@@ -763,23 +751,17 @@ components:
         #     oneOutput: "MESH:D002318"    ## Cardiovascular Diseases
         ## should have 1 figure: https://biothings.ncats.io/pfocr/query?q=associatedWith.mentions.genes.ncbigene:3791%20AND%20associatedWith.mentions.diseases.mesh:D002318
   x-bte-response-mapping:
-    ## pmc_reference kinda matches "biolink:publications". However, it doesn't have a PMC prefix so it doesn't quite have the right value...
-    ## not easy to make it clear with biolink-model terms: the publication (PMC ID) vs the figure (title and url)
+    ## not including because because data-processing / biolink-modeling issues
+    ## - associatedWith.title (title of figure)
     chem:
       MESH: associatedWith.mentions.chemicals.mesh   ## no prefix
       ref_url: associatedWith.figureUrl
-      ## commenting out because data-processing / biolink-modeling issues
-      # figure_title: associatedWith.title
-      # pmc_reference: associatedWith.pmc
+      ref_pmcid: associatedWith.pmc                  ## no prefix (but IDs start with "PMC")
     gene:
       NCBIGene: associatedWith.mentions.genes.ncbigene   ## no prefix
       ref_url: associatedWith.figureUrl
-      ## commenting out because data-processing / biolink-modeling issues
-      # figure_title: associatedWith.title
-      # pmc_reference: associatedWith.pmc
+      ref_pmcid: associatedWith.pmc
     disease:
       MESH: associatedWith.mentions.diseases.mesh   ## no prefix
       ref_url: associatedWith.figureUrl
-      ## commenting out because data-processing / biolink-modeling issues
-      # figure_title: associatedWith.title
-      # pmc_reference: associatedWith.pmc
+      ref_pmcid: associatedWith.pmc
diff --git a/pharmgkb/smartapi.yaml b/pharmgkb/smartapi.yaml
index 03b29a5b..835d1b16 100644
--- a/pharmgkb/smartapi.yaml
+++ b/pharmgkb/smartapi.yaml
@@ -1988,7 +1988,9 @@ components:
       input_name: data.name
     guidelineEnd-gene:
       "PHARMGKB.GENE": data.relatedGenes.id
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url - often an expanded PMID or PMCID (NLM)
+      ## in responses I've reviewed, this is always a url
+      ## - often an expanded PMID or a PMCID (NLM) like https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3994233
+      ref_url: data.literature._sameAs
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
@@ -2005,7 +2007,9 @@ components:
       # flag_has_pediatric_information: data.pediatric
     guidelineEnd-chem:
       "PHARMGKB.CHEMICAL": data.relatedChemicals.id
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url - often an expanded PMID or PMCID (NLM)
+      ## in responses I've reviewed, this is always a url
+      ## - often an expanded PMID or a PMCID (NLM) like https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3994233
+      ref_url: data.literature._sameAs
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
@@ -2023,7 +2027,8 @@ components:
     labelEnd-gene:
       ## not sure if prescribingGenes provides different info or not. using relatedGenes section instead
       "PHARMGKB.GENE": data.relatedGenes.id
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url (NOT expanded PMID/PMCID)
+      ## in responses I've reviewed, this is always a plain url (NOT an expanded PMID/PMCID)
+      ref_url: data.literature._sameAs
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
@@ -2039,7 +2044,8 @@ components:
       # flag_has_pediatric_information: data.pediatric
     labelEnd-chem:
       "PHARMGKB.CHEMICAL": data.relatedChemicals.id
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url (NOT expanded PMID/PMCID)
+      ## in responses I've reviewed, this is always a plain url (NOT an expanded PMID/PMCID)
+      ref_url: data.literature._sameAs
       ## commenting out because data-processing / biolink-modeling issues
       # ## related gene alleles
       # related_gene_alleles: data.relatedAlleles.symbol
@@ -2082,8 +2088,9 @@ components:
       # related_diseases: data.relatedDiseases.name
     variantAnnotEnd-chem:
       "PHARMGKB.CHEMICAL": data.relatedChemicals.id
-      ## provenance
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url that's an expanded PMID or PMCID (NLM)
+      ## in responses I've reviewed, this is always a url that's an expanded PMID or 
+      ##   a PMCID (NLM) like https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3165308
+      ref_url: data.literature._sameAs
       ## commenting out because data-processing / biolink-modeling issues
       # related_genes: data.location.genes.symbol
       # flag_significant_association: data.significance.term  ## yes/no values?
@@ -2099,8 +2106,9 @@ components:
       # population_phenotype_name: data.populationPhenotypes.phenotype
     variantAnnotEnd-variant:
       DBSNP: data.location.rsid
-      ## provenance
-      ref_url: data.literature._sameAs    ## in responses I've reviewed, this is always a url that's an expanded PMID or PMCID (NLM)
+      ## in responses I've reviewed, this is always a url that's an expanded PMID or 
+      ##   a PMCID (NLM) like https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3165308
+      ref_url: data.literature._sameAs
       ## commenting out because data-processing / biolink-modeling issues
       # related_genes: data.location.genes.symbol
       # flag_significant_association: data.significance.term  ## yes/no values?

From f281b27d5ce576b11e9b20303337a7e6a7806270 Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Tue, 25 Jul 2023 17:32:11 -0700
Subject: [PATCH 07/11] mychem,myvariant: add clinicaltrials ref keyword and
 fields

---
 mychem.info/openapi_full.yml    |  6 +++---
 myvariant.info/openapi_full.yml | 21 +++++++++++++--------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/mychem.info/openapi_full.yml b/mychem.info/openapi_full.yml
index 9bb3e96c..edc2e0f2 100644
--- a/mychem.info/openapi_full.yml
+++ b/mychem.info/openapi_full.yml
@@ -666,10 +666,10 @@ components:
       output_name: chembl.drug_mechanisms.target_name
     chembl-treats:
       MESH: chembl.drug_indications.mesh_id
-      ref_clinicaltrials: chembl.drug_mechanisms.mechanism_refs.ClinicalTrials     ## no prefix
+      ref_clinicaltrials: chembl.drug_indications.indication_refs.ClinicalTrials     ## no prefix
       ## this url field will include expanded clinicaltrials (aka duplicates!)
       ##   but is required for other references: ATC, DailyMed, FDA
-      ref_url: chembl.drug_mechanisms.mechanism_refs.url
+      ref_url: chembl.drug_indications.indication_refs.url
       ## commenting out because data-processing / biolink-modeling issues
       # max_clinical_phase_for_indication: chembl.drug_indications.max_phase_for_ind
       # year_first_approved: chembl.drug_indications.first_approval
@@ -1041,7 +1041,7 @@ components:
         #   chembl.drug_indications.first_approval
         fields: >-
           chembl.drug_indications.mesh_id,
-          chembl.drug_mechanisms.mechanism_refs.ClinicalTrials,
+          chembl.drug_indications.indication_refs.ClinicalTrials,
           chembl.drug_indications.indication_refs.url
         size: 1000  ## note size limit; added just in case
       predicate: treats
diff --git a/myvariant.info/openapi_full.yml b/myvariant.info/openapi_full.yml
index 0a2368a8..27a7f86e 100644
--- a/myvariant.info/openapi_full.yml
+++ b/myvariant.info/openapi_full.yml
@@ -615,7 +615,8 @@ components:
   x-bte-response-mapping:
     civic-geneDisease:
       DOID: civic.evidence_items.disease.doid
-      ref_pmid: civic.evidence_items.source.pubmed    ## no prefix
+      ref_pmid: civic.evidence_items.source.pubmed                              ## no prefix
+      ref_clinicaltrials: civic.evidence_items.source.clinical_trials.nct_id    ## no prefix (ID starts with "NCT")
       ## commenting out because data-processing / biolink-modeling issues
       # ## categorical var / relation
       # civic_clinical_significance: civic.evidence_items.clinical_significance
@@ -628,8 +629,6 @@ components:
       # civic_evidence_direction: civic.evidence_items.evidence_direction
       # civic_evidence_level: civic.evidence_items.evidence_level
       # civic_evidence_type: civic.evidence_items.evidence_type
-      # ## provenance
-      # clinical_trial_url: civic.evidence_items.source.clinical_trials.clinical_trial_url
       # ## context: variant involved, could use "biolink:sequence_variant_qualifier"
       # ##   but then ID-namespace would be unclear (this ID doesn't have a prefix) 
       # civic_variant_id: civic.evidence_items.variant_id
@@ -641,7 +640,8 @@ components:
       DOID: civic.evidence_items.disease.doid
       ## variant name
       input_name: _id
-      ref_pmid: civic.evidence_items.source.pubmed    ## no prefix
+      ref_pmid: civic.evidence_items.source.pubmed                              ## no prefix
+      ref_clinicaltrials: civic.evidence_items.source.clinical_trials.nct_id    ## no prefix (ID starts with "NCT")
       ## commenting out because data-processing / biolink-modeling issues
       # ## categorical var / relation
       # civic_clinical_significance: civic.evidence_items.clinical_significance
@@ -656,8 +656,6 @@ components:
       # civic_evidence_direction: civic.evidence_items.evidence_direction
       # civic_evidence_level: civic.evidence_items.evidence_level
       # civic_evidence_type: civic.evidence_items.evidence_type
-      # ## provenance
-      # clinical_trial_url: civic.evidence_items.source.clinical_trials.clinical_trial_url
       # ## context: variant involved was Somatic, unknown...
       # civic_variant_origin: civic.evidence_items.variant_origin
     dbsnp-rsid:
@@ -749,7 +747,10 @@ components:
         ## only some records have drug information, and only some drugs have NCIT IDs
         ## commenting out because data-processing / biolink-modeling issues
           # fields: civic.evidence_items
-          fields: civic.evidence_items.disease.doid,civic.evidence_items.source.pubmed
+          fields: >-
+            civic.evidence_items.disease.doid,
+            civic.evidence_items.source.pubmed,
+            civic.evidence_items.source.clinical_trials.nct_id
           size: 1000        ## note size limit; added just in case
         predicate: affects
         source: "infores:civic"
@@ -807,7 +808,11 @@ components:
           #   civic.evidence_items,
           #   civic.entrez_name,
           #   _id
-          fields: civic.evidence_items.disease.doid,_id,civic.evidence_items.source.pubmed
+          fields: >-
+            civic.evidence_items.disease.doid,
+            _id,
+            civic.evidence_items.source.pubmed,
+            civic.evidence_items.source.clinical_trials.nct_id
           size: 1000        ## note size limit; added just in case
         predicate: affects
         source: "infores:civic"

From fe2209b067d68efacdd2fcc1045d29255ab14ee0 Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Tue, 25 Jul 2023 18:21:10 -0700
Subject: [PATCH 08/11] bindingdb: add ref_doi. mychem already annotated for
 doi

---
 bindingdb/smartapi.yaml | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/bindingdb/smartapi.yaml b/bindingdb/smartapi.yaml
index 4b7e40cd..b481a7c1 100644
--- a/bindingdb/smartapi.yaml
+++ b/bindingdb/smartapi.yaml
@@ -599,7 +599,7 @@ components:
         #     relation.curation_datasource,relation.pmid,relation.bindingdb_link,relation.article_doi,relation.patent_number
           fields: >-
             object.pubchem_cid,
-            subject.name,relation.pmid,relation.bindingdb_link
+            subject.name,relation.pmid,relation.bindingdb_link,relation.article_doi
           size: 1000
         predicate: physically_interacts_with
         source: "infores:bindingdb"
@@ -629,7 +629,7 @@ components:
           #   relation.curation_datasource,relation.pmid,relation.bindingdb_link,relation.article_doi,relation.patent_number
           fields: >-
             subject.uniprot.accession,
-            subject.name,relation.pmid,relation.bindingdb_link
+            subject.name,relation.pmid,relation.bindingdb_link,relation.article_doi
           size: 1000
         predicate: physically_interacts_with
         source: "infores:bindingdb"
@@ -639,23 +639,28 @@ components:
         #   - qInput: "PUBCHEM.COMPOUND:134553288"     ## US10736883, Example 387.0
         #     oneOutput: "UniProtKB:P35414"            ## Apelin receptor
   x-bte-response-mapping:
+  ## note on references:
+  ##   sometimes the doi + pmid will refer to the same publication (so..."duplicate" references)
+  ##   but there are plenty of cases where relationships have only 1 of these fields:
+  ##   only doi: https://biothings.transltr.io/bindingdb/query?q=_exists_:relation.article_doi%20AND%20(NOT%20_exists_:relation.pmid)
+  ##   only pmid: https://biothings.transltr.io/bindingdb/query?q=_exists_:relation.pmid%20AND%20(NOT%20_exists_:relation.article_doi)
     pubchem-object:
       "PUBCHEM.COMPOUND": object.pubchem_cid       ## no prefix
       "biolink:original_subject": subject.name     ## was called "Target Name Assigned by Curator or DataSource"
       ref_pmid: relation.pmid                          ## no prefix
+      ref_doi: relation.article_doi
       ref_url: relation.bindingdb_link
       ## commenting out because data-processing / biolink-modeling issues
       # in_taxon: subject.organism                 ## was called "Target Source Organism According to Curator or DataSource"
       # bindingdb_curation_datasource: relation.curation_datasource    ## Curation/DataSource
-      # article_doi: relation.article_doi
       # patent_number: relation.patent_number
     uniprot-subject:
       UniProtKB: subject.uniprot.accession  ## no prefix
       "biolink:original_subject": subject.name
       ref_pmid: relation.pmid                          ## no prefix
+      ref_doi: relation.article_doi
       ref_url: relation.bindingdb_link
       ## commenting out because data-processing / biolink-modeling issues
       # in_taxon: subject.organism
       # bindingdb_curation_datasource: relation.curation_datasource
-      # article_doi: relation.article_doi
       # patent_number: relation.patent_number

From ca4eca271cb4f1383d904105f1566be9d37d4bca Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Tue, 25 Jul 2023 18:42:03 -0700
Subject: [PATCH 09/11] mydisease: add ref_isbn

---
 mydisease.info/smartapi.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mydisease.info/smartapi.yaml b/mydisease.info/smartapi.yaml
index 695a4d1d..f7a6dd82 100644
--- a/mydisease.info/smartapi.yaml
+++ b/mydisease.info/smartapi.yaml
@@ -628,6 +628,7 @@ components:
     disease-phenotype:
       HP: hpo.phenotype_related_to_disease.hpo_id                                   ## HAS PREFIX (HP)
       ref_pmid: hpo.phenotype_related_to_disease.pmid_refs                          ## HAS PREFIX (PMID)
+      ref_isbn: hpo.phenotype_related_to_disease.isbn_refs                          ## HAS PREFIX (ISBN)
       ref_url: hpo.phenotype_related_to_disease.website_refs
       ## note: there are 4 frequency-related fields (parser created based on different data types)
       "biolink:has_quotient": hpo.phenotype_related_to_disease.numeric_freq         ## decimal freq
@@ -636,7 +637,6 @@ components:
       "biolink:has_total": hpo.phenotype_related_to_disease.freq_denominator        ## frequency as a fraction (denominator)
       ## commenting out because data-processing / biolink-modeling issues
       # ## note: there are 6 reference-related fields (parser created based on ID namespace)
-      # isbn_refs: hpo.phenotype_related_to_disease.isbn_refs                         ## HAS PREFIX (ISBN)
       # decipher_refs: hpo.phenotype_related_to_disease.decipher_refs                 ## HAS PREFIX (DECIPHER)
       # omim_refs: hpo.phenotype_related_to_disease.omim_refs                         ## HAS PREFIX (OMIM)
       # orphanet_refs: hpo.phenotype_related_to_disease.orphanet_refs                 ## HAS PREFIX (ORPHANET)
@@ -808,6 +808,7 @@ components:
           fields: >-
             hpo.phenotype_related_to_disease.hpo_id,
             hpo.phenotype_related_to_disease.pmid_refs,
+            hpo.phenotype_related_to_disease.isbn_refs,
             hpo.phenotype_related_to_disease.website_refs,
             hpo.phenotype_related_to_disease.numeric_freq,
             hpo.phenotype_related_to_disease.hp_freq,
@@ -858,6 +859,7 @@ components:
           fields: >-
             hpo.phenotype_related_to_disease.hpo_id,
             hpo.phenotype_related_to_disease.pmid_refs,
+            hpo.phenotype_related_to_disease.isbn_refs,
             hpo.phenotype_related_to_disease.website_refs,
             hpo.phenotype_related_to_disease.numeric_freq,
             hpo.phenotype_related_to_disease.hp_freq,

From 6f7319c3bdbd705719975d277451789211d29a02 Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Wed, 26 Jul 2023 17:14:51 -0700
Subject: [PATCH 10/11] biolink/monarch api: update to use ref_pmid

---
 biolink/openapi.yml | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/biolink/openapi.yml b/biolink/openapi.yml
index 54883776..bb5d007d 100644
--- a/biolink/openapi.yml
+++ b/biolink/openapi.yml
@@ -853,7 +853,7 @@ components:
   x-bte-response-mapping:
     gene-hgnc:
       HGNC: associations.object.HGNC
-      pubmed: associations.publications.id
+      ref_pmid: associations.publications.id
       ## commenting out because data-processing / biolink-modeling issues
       # monarch_source_database: associations.provided_by
       # ## there's also a relation.id and relation.inverse.
@@ -862,7 +862,7 @@ components:
     gene-hgnc-dbsnp-start:
       HGNC: associations.object.HGNC
       input_name: associations.subject.label   ## for the dbsnp input
-      pubmed: associations.publications.id
+      ref_pmid: associations.publications.id
       ## commenting out because data-processing / biolink-modeling issues
       # monarch_source_database: associations.provided_by
       # ## there's also a relation.id and relation.inverse.
@@ -870,52 +870,52 @@ components:
       # "biolink:original_predicate": associations.relation.label
     pathway-react:
       REACT: associations.object.REACT
-      pubmed: associations.publications.id
+      ref_pmid: associations.publications.id
       ## commenting out because data-processing / biolink-modeling issues
       # monarch_source_database: associations.provided_by
       # "biolink:original_predicate": associations.relation.label
     phenotype-hp:
       HP: associations.object.HP
-      pubmed: associations.publications.id
+      ref_pmid: associations.publications.id
       ## commenting out because data-processing / biolink-modeling issues
       # monarch_source_database: associations.provided_by
       # "biolink:original_predicate": associations.relation.label
     phenotype-hp-dbsnp-start:
       HP: associations.object.HP
       input_name: associations.subject.label   ## for the dbsnp input
-      pubmed: associations.publications.id
+      ref_pmid: associations.publications.id
       ## commenting out because data-processing / biolink-modeling issues
       # monarch_source_database: associations.provided_by
       # "biolink:original_predicate": associations.relation.label
     variant-dbsnp:
       DBSNP: associations.object.dbSNP
       output_name: associations.object.label
-      pubmed: associations.publications.id
+      ref_pmid: associations.publications.id
       ## commenting out because data-processing / biolink-modeling issues
       # monarch_source_database: associations.provided_by
       # "biolink:original_predicate": associations.relation.label
     anatomy-uberon:
       UBERON: associations.object.UBERON
-      pubmed: associations.publications.id
+      ref_pmid: associations.publications.id
       ## commenting out because data-processing / biolink-modeling issues
       # monarch_source_database: associations.provided_by
       # "biolink:original_predicate": associations.relation.label
     disease-mondo:
       MONDO: associations.object.MONDO
-      pubmed: associations.publications.id
+      ref_pmid: associations.publications.id
       ## commenting out because data-processing / biolink-modeling issues
       # monarch_source_database: associations.provided_by
       # "biolink:original_predicate": associations.relation.label
     disease-mondo-dbsnp-start:
       MONDO: associations.object.MONDO
       input_name: associations.subject.label   ## for the dbsnp input
-      pubmed: associations.publications.id
+      ref_pmid: associations.publications.id
       ## commenting out because data-processing / biolink-modeling issues
       # monarch_source_database: associations.provided_by
       # "biolink:original_predicate": associations.relation.label
     orthologGene2diseaseMondo:
       MONDO: associations.object.MONDO
-      pubmed: associations.publications.id    ## not all of these are pubmed though...saw some WormBase:WBPaper ones
+      ref_pmid: associations.publications.id    ## not all of these are pubmed though...saw some WormBase:WBPaper ones
       ## commenting out because data-processing / biolink-modeling issues
       # homologous_gene_model_id: associations.subject.id  ## what is directly annotated to the disease
       # homologous_gene_model_name: associations.subject.label
@@ -923,7 +923,7 @@ components:
       # "biolink:original_predicate": associations.relation.label
     orthologGene2phenoHP:
       HP: associations.object.HP
-      pubmed: associations.publications.id    ## not all of these are pubmed though...saw some WormBase:WBPaper ones
+      ref_pmid: associations.publications.id    ## not all of these are pubmed though...saw some WormBase:WBPaper ones
       ## commenting out because data-processing / biolink-modeling issues
       # homologous_gene_model_id: associations.subject.id  ## what is directly annotated to the disease
       # homologous_gene_model_name: associations.subject.label

From 16b13cd741ee022e6e2ebede82471d8eefff454d Mon Sep 17 00:00:00 2001
From: Colleen Xu <colleenhxu@gmail.com>
Date: Mon, 14 Aug 2023 16:00:18 -0700
Subject: [PATCH 11/11] ebi g2p: update examples in comments

---
 EBIgene2phenotype/smartapi.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/EBIgene2phenotype/smartapi.yaml b/EBIgene2phenotype/smartapi.yaml
index 24bea767..82d90a17 100644
--- a/EBIgene2phenotype/smartapi.yaml
+++ b/EBIgene2phenotype/smartapi.yaml
@@ -623,8 +623,8 @@ components:
       response_mapping:
         "$ref": "#/components/x-bte-response-mapping/disease-object"
       # testExamples:
-      #   - qInput: "HGNC:10031"        ## RMRP
-      #     oneOutput: "OMIM:250250"    ## CARTILAGE-HAIR HYPOPLASIA
+      #   - qInput: "HGNC:1020"         ## BCS1L
+      #     oneOutput: "OMIM:603358"    ## GRACILE SYNDROME
     disease_to_gene:
     - supportBatch: true
       useTemplating: true