-
Notifications
You must be signed in to change notification settings - Fork 2
/
Makefile
109 lines (83 loc) · 4.49 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
OBO = http://purl.obolibrary.org/obo
all: target all_ofn neo.obo neo.owl
clean:
rm trigger datasets.json mirror/*gz mirror/*tmp target/*.obo || echo "not all files present, perhaps last build did not complete"
TEST_SRCS ?= sgd pombase
SRCS ?= sgd pombase mgi zfin rgd dictybase fb tair wb goa_human goa_human_complex goa_human_rna goa_human_isoform goa_pig xenbase pseudocap ecocyc goa_sars-cov-2 uniprot_reviewed
ROBOT_ENV = ROBOT_JAVA_ARGS=-Xmx12G
ROBOT = $(ROBOT_ENV) robot
OFN_SRCS = $(patsubst %,target/neo-%.ofn,$(SRCS))
all_ofn: $(OFN_SRCS)
test_ofn: target $(patsubst %,target/neo-%.ofn,$(TEST_SRCS))
#test: touch_trigger test_obo
test:
echo "tests disabled until its easier to run perl on travis"
touch_trigger:
touch trigger
trigger:
touch $@
IMPORTS = imports/pr_import.obo
neo.owl: $(OFN_SRCS) $(IMPORTS)
$(ROBOT) merge $(addprefix -i ,$^) annotate --ontology-iri 'http://purl.obolibrary.org/obo/go/noctua/neo.owl' convert -f owl -o [email protected] && mv [email protected] $@
## datasets.json is created as a throwaway in the NEO versions of the
## pipeline and is based on the go-site master data.
datasets.json: trigger
wget http://s3.amazonaws.com/go-build/metadata/datasets.json -O $@ && touch $@
target:
mkdir target
foo:
pwd
# BUG: temporary hardcode until https://github.com/geneontology/go-site/issues/1431 is resolved and stable GPI URL is established
mirror/goa_sars-cov-2.gpi.gz:
wget --no-check-certificate https://raw.githubusercontent.com/Knowledge-Graph-Hub/kg-covid-19/master/curated/ORFs/uniprot_sars-cov-2.gpi -O mirror/goa_sars-cov-2.gpi && gzip mirror/goa_sars-cov-2.gpi
target/neo-goa_sars-cov-2.ofn: mirror/goa_sars-cov-2.gpi.gz
gzip -dc $< | ./gpi2ofn.pl -s Scov2 -n sars-cov-2 > [email protected] && mv [email protected] $@
# ## In support of including viruses and bacteria
# ## (https://github.com/geneontology/neo/issues/77).
# ## http://ftp.ebi.ac.uk/pub/contrib/goa/uniprot_reviewed_virus_bacteria.gpi.gz
# mirror/uniprot_reviewed_virus_bacteria.gpi.gz:
# wget --no-check-certificate http://ftp.ebi.ac.uk/pub/contrib/goa/uniprot_reviewed_virus_bacteria.gpi.gz -O mirror/uniprot_reviewed_virus_bacteria.gpi.gz
# target/neo-uniprot_reviewed_virus_bacteria.obo: mirror/uniprot_reviewed_virus_bacteria.gpi.gz
# gzip -dc $< | ./gpi2obo.pl -F -n reviewed_virus_bacteria > [email protected] && mv [email protected] $@
## In support of including all swissprot reviewed.
## Download and /filter out by species/.
## (https://github.com/geneontology/neo/issues/82).
## http://ftp.ebi.ac.uk/pub/contrib/goa/uniprot_reviewed.gpi.gz
## The filter_list.txt (and option) should not be needed in the future
## as we should be drawing exclusively from datasets.json.
mirror/uniprot_reviewed.gpi.gz: datasets.json
wget --no-check-certificate http://ftp.ebi.ac.uk/pub/contrib/goa/uniprot_reviewed.gpi.gz -O mirror/uniprot_reviewed.gpi.gz.tmp
gzip -dc mirror/uniprot_reviewed.gpi.gz.tmp > mirror/uniprot_reviewed.gpi.tmp
perl filter.pl -v --metadata datasets.json --filter filter_list.txt --input mirror/uniprot_reviewed.gpi.tmp > mirror/filtered_uniprot_reviewed.gpi.tmp
gzip -c mirror/filtered_uniprot_reviewed.gpi.tmp > mirror/filtered_uniprot_reviewed.gpi.gz.tmp
mv mirror/filtered_uniprot_reviewed.gpi.gz.tmp mirror/uniprot_reviewed.gpi.gz
target/neo-uniprot_reviewed.ofn: mirror/uniprot_reviewed.gpi.gz
gzip -dc $< | ./gpi2ofn.pl -F -n reviewed > [email protected] && mv [email protected] $@
# Sub-makefile
#
# contains targets:
# - neo-{Gspe}.obo
#
# see below for regenerating this
include Makefile-gafs
# The neo solr index has an ID field (which is a CURIE), but no URI
# Minerva requires OWL which uses URIs
#
# When loading solr, owltools will use the oboInOwl:id field as priority to load the ID field (see https://github.com/owlcollab/owltools/pull/247)
# Otherwise, the owltools built-in URI contraction method is used, which assumes OBO purls, with unpredictable behavior non-OBO PURLs
neo.obo: neo.owl
$(ROBOT) convert -i $< -o [email protected] -f obo && grep -v ^owl-axioms [email protected] >$@
Makefile-gafs: datasets.json
./build-neo-makefile.py -i $< > [email protected] && mv [email protected] $@
GCRP=ftp://ftp.ebi.ac.uk/pub/contrib/goa/gcrp/
RNACFTP=ftp://ftp.ebi.ac.uk/pub/databases/RNAcentral/releases/3.0/genome_coordinates/
Homo_sapiens.GRCh38.gff3.gz:
wget $(RNCFTP)/$@ -O $@
rnacentral.gpi.gz:
wget ftp://ftp.ebi.ac.uk/pub/databases/RNAcentral/current_release/gpi/rnacentral.gpi.gz
rnacentral.gpi: rnacentral.gpi.gz
gzip -dc $< > $@
target/neo-rnac.ofn: rnacentral.gpi.gz
gzip -dc $< | ./rnacgpi2ofn.pl > [email protected] && mv [email protected] $@
target/neo-%.owl: target/neo-%.ofn
$(ROBOT) convert -i $< -o [email protected] -f owl && mv [email protected] $@