Merge pull request #83 from reality/master

new stable-ish
reality · Jul 8, 2020 · 6320942 · 6320942
2 parents e270fc6 + 5bddf4c
commit 6320942
Show file tree

Hide file tree

Showing 23 changed files with 181,344 additions and 479 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,5 @@
+language: groovy
+dist: bionic
+sudo: required
+install: gradle --no-daemon assemble
+script: gradle --no-daemon check
diff --git a/README.md b/README.md
@@ -1,12 +1,21 @@
 # Komenti
 
+[![Build Status](https://travis-ci.com/reality/komenti.svg?branch=master)](https://travis-ci.com/reality/komenti)
+![LINE](https://img.shields.io/badge/line--coverage-49%25-orange.svg)
+![BRANCH](https://img.shields.io/badge/branch--coverage-21%25-red.svg)
+![COMPLEXITY](https://img.shields.io/badge/complexity-5.50-brightgreen.svg)
+
 Komenti is a tool for semantic query, annotation, and analysis of text using ontologies. 
 
 It enables querying multiple ontologies with complex class descriptions using AberOWL. These can be used to build a vocabulary for text annotation, including new methods for synonym and label expansion. Annotation is performed using Stanford CoreNLP, and include novel methods for the detection and disambiguation of concept negation and uncertainty. Annotations of text corpora can be used for analysis, within or without Komenti. These components are in development, but currently include summarisation of the co-ocurrence of groups of concepts across text, and use of annotations to suggest description logic axioms for classes. These more complex uses can be described by series of parameters to be passed to the tool in the form of a serialised 'roster,' defining a natural language processing pipeline.
 
+We are working on papers discussing the novel components. I will post them here:
+
+* [A fast, accurate, and generalisable heuristic-based negation detection algorithm for clinical text](https://www.biorxiv.org/content/10.1101/2020.07.03.187054v1)
+
 ## Installation
 
-You can find the latest release here: https://github.com/reality/komenti/releases/tag/0.0.4-SNAPSHOT-5
+You can find the latest stable-ish release here: https://github.com/reality/komenti/releases/tag/0.1.0
 
 You can add the bin/ directory to your PATH, to be able to use it easily from anywhere. It should also work on Windows, but I haven't tested that.
 
@@ -26,6 +35,14 @@ komenti query -q "'part of' some 'apoptotic process'" -o GO --out labels.txt
 komenti query -c toxicity,asbestos -o ENM --out labels.txt
 ```
 
+### All classes
+
+You can get all classes in an ontology by running a subclass query on owl#Thing:
+
+```bash
+komenti query -q "<http://www.w3.org/2002/07/owl#Thing>" --ontology HP
+```
+
 ### Parameters
 
 * The labels can be extended by the power of lemmatisation, by passing --lemmatise

diff --git a/build.gradle b/build.gradle
@@ -12,6 +12,9 @@ plugins {
 
     // Apply the application plugin to add support for building a CLI application.
     id 'application'
+
+    id 'jacoco'
+    id 'com.github.dawnwords.jacoco.badge' version '0.2.0'
 }
 
 repositories {
@@ -33,6 +36,12 @@ dependencies {
     compile 'org.apache.pdfbox:pdfbox:2.0.15'
     compile 'org.codehaus.gpars:gpars:1.2.1'
 
+    compile 'net.sourceforge.owlapi:owlapi-api:5.1.4'
+    compile 'net.sourceforge.owlapi:owlapi-apibinding:5.1.4'
+    compile 'net.sourceforge.owlapi:owlapi-impl:5.1.4'
+    compile 'net.sourceforge.owlapi:owlapi-parsers:5.1.4'
+    compile 'org.apache.commons:commons-rdf-api:0.5.0'
+
     // Use the awesome Spock testing and specification framework
     testImplementation 'org.spockframework:spock-core:1.3-groovy-2.5'
 }
@@ -57,8 +66,28 @@ task release {
 
 gradle.taskGraph.whenReady { taskGraph ->
     if (taskGraph.hasTask(":release")) {
-        version = '0.0.4'
+        version = '0.1.0'
     } else {
-        version = '0.0.4-SNAPSHOT'
+        version = '0.1.0-SNAPSHOT'
     }
 }
+
+test {
+  maxHeapSize = "6G"
+  testLogging {
+    events "started", "skipped", "failed"
+    exceptionFormat "full"
+  }
+  finalizedBy jacocoTestReport // report is always generated after tests run
+}
+
+jacocoTestReport {
+  dependsOn test // tests are required to run before generating the report
+  reports {
+    xml.enabled true
+  }
+}
+
+
+
+
diff --git a/gradle.properties b/gradle.properties
@@ -0,0 +1 @@
+org.gradle.jvmargs=-Xmx6G -Xms6G
diff --git a/src/main/groovy/komenti/App.groovy b/src/main/groovy/komenti/App.groovy
@@ -4,7 +4,7 @@
 package komenti
 
 class App {
- static void main(String[] args) {
+ static void main(args) {
     def cliBuilder = new CliBuilder(
       usage: 'komenti <command> [<options>]',
       header: 'Options:'
@@ -22,6 +22,7 @@ class App {
       _ longOpt: 'override-group', 'Override group in labels output with given text', args: 1
       _ longOpt: 'priority', 'RegexNER priority in output. Default is 1.', args: 1
       _ longOpt: 'expand-synonyms', 'Expand synonyms using AberOWL', type: Boolean
+      _ longOpt: 'label-extension', 'Run a named label extension, e.g. cmo', args: 1
 
       // annotation options
       t longOpt: 'text', 'A file or directory of files to annotate.', args: 1
@@ -33,6 +34,9 @@ class App {
       _ longOpt: 'allergy-modifier', 'Evaluate sentences for whether or not they mention an allergy', type: Boolean
       _ longOpt: 'exclude', 'A list of phrases, which when matched in a sentence, will cause that sentence not to be annotated. One phrase per line.', args: 1
       _ longOpt: 'write-pdfs-to-dir', 'If set, write the converted PDF text into the given directory.', args: 1
+      _ longOpt: 'extract-triples', 'Extract triples from text', type: Boolean
+      _ longOpt: 'allow-unmatched-relations', 'If there are two terms, output a mocked Annotation in AnnotationTriple, allowing triples without ', type: Boolean
+      // _ longOpt: 'require-full-match', ' require a full match for extraction of tripels'
 
       // summary options
       a longOpt: 'annotation-file', 'Annotation file to summarise', args: 1
@@ -69,13 +73,132 @@ class App {
       // diagnose options
       _ longOpt: 'by-group', 'Group items for diagnosis by the query group, rather than by term IRI', type: Boolean
 
+      // ontologise options
+      _ longOpt: 'triples', 'Triples file to turn into --ontolog', args: 1
+
       // all options
       _ longOpt: 'out', 'Where to write the annotation results.', args: 1
       _ longOpt: 'append', 'Append output file, instead of replacing it', type: Boolean
-      _ longOpt: 'verbose', 'Verbose output, mostly progress', type: Boolean
+      _ longOpt: 'verbose', 'Verbose output, mostly progress', type: Boolean, args: 0
       _ longOpt: 'threads', 'Number of threads to use for query/annotation processes', type: Integer, args: 1
     }
 
-    Komenti.run(cliBuilder, args)
+    if(args.contains('--verbose')) {
+      println args
+    }
+
+    if(!args[0]) { println "Must provide command." }
+    if(args[0] == '-h' || args[0] == '--help') {
+      cliBuilder.usage(); return;
+    }
+
+    def command = args[0]
+    def o = cliBuilder.parse(args.drop(1))
+
+    if(o.h) { 
+      cliBuilder.usage()
+    }
+
+    def aCheck = checkArguments(command, o)
+    if(aCheck) {
+      Komenti."$command"(o)
+    }
   }
+
+  // TODO this could be a bit smarter, eh
+  static def checkArguments(command, o) {
+    def success = true
+    if(!Komenti.metaClass.getMetaMethod(command)) {
+      println "Command ${command} not found." ; success = false
+    }
+
+    if(command == 'gen_roster') {
+      if(!o.q && !o.c) { println "You must provide a --query or --class-list" ; success = false }
+
+      // Check that the roster is being generated with some text (annotation mode) or with an analysis method
+      if(!o['with-abstracts-download'] && !o['with-metadata-download'] && !o['mine-relationship'] && !o.t) { 
+        println "Must either download abstracts, metadata, or provide text to annotate"
+        success = false 
+      }
+
+      if(o['mine-relationship']) {
+        if(!o.c || (o.c && o.c.split(',').size() != 2)) { 
+          println "to --mine-relationship you must pass exactly two concept names with -c/--class-list"
+          success = false
+        }
+      }
+
+      if(o['suggest-axiom']) {
+        if(!o.o) { 
+          println "Must pass an ontology to query with -o/--ontology"
+          success = false
+        }
+
+        if((!o.c || (o.c && o.c.split(',').size() != 1))) {
+          println "You must pass a class into -c to suggest axiom"
+          success = false
+        } 
+
+        if(!o.entity || !o.quality || !o['default-entity'] || !o['default-relation'])  { 
+          println "To suggest axiom you must pass class lists with --entity, --quality. You must also pass --default-relation and --default-entity."
+          success = false
+        }
+      }
+    } else if(command == 'auto') {
+      if(!o.r) { 
+        println "Must pass a roster"
+        success = false
+      }
+    } else if(command == 'query') {
+      if((!o['object-properties'] && (!o.q && !o.c))) { 
+        println "You must pass a query or class list"
+        success = false
+      }
+      if(o['object-properties'] && (o.q || o.c)) { 
+        println "Cannot pass a query or class list for --object-properties query"
+        success = false
+      }
+    } else if (command == 'get_metadata') { // TODO: needs to be expanded
+      if(!o.l) { 
+        println "Must pass label file" 
+        success = false
+      }
+    } else if(command == 'annotate') {
+      if(!o.t && !o['file-list']) {
+        println "Must either pass texts to parse, or a --file-list containing paths of texts to analyse."
+        success = false
+      }
+      if(!o.l) { 
+        println "Must pass label file" 
+        success = false
+      }
+      if(!o.out) { 
+        println "Must provide output filename via --out"
+        success = false
+      }
+    } else if(command == 'add_modifiers') {
+      if(!o.out || !o.a || !o.l) { 
+        println "Must provide annotation file via -a, and labels file with -l, and output filename via --out"
+        success = false
+      }
+    } else if(command == 'get_abstracts') {
+      if(!o.l) { 
+        println "Must pass label file" 
+        success = false
+      }
+
+    } else if(command == 'summarise_entity_pair') {
+      if(!o.l || !o.a || !o.c) { 
+        println "Must provide annotation file via -a, and labels file with -l, and two classes with -c"
+        success = false
+      }
+    } else if(command == 'suggest_axiom') {
+      if(!o.l || !o.a) { 
+        println "Must provide a --label file and a --annotations file" 
+        success = false
+      }
+    }
+
+    success 
+  } 
 }