Skip to content

Commit

Permalink
Merge pull request #83 from reality/master
Browse files Browse the repository at this point in the history
new stable-ish
  • Loading branch information
reality authored Jul 8, 2020
2 parents e270fc6 + 5bddf4c commit 6320942
Show file tree
Hide file tree
Showing 23 changed files with 181,344 additions and 479 deletions.
5 changes: 5 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
language: groovy
dist: bionic
sudo: required
install: gradle --no-daemon assemble
script: gradle --no-daemon check
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
# Komenti

[![Build Status](https://travis-ci.com/reality/komenti.svg?branch=master)](https://travis-ci.com/reality/komenti)
![LINE](https://img.shields.io/badge/line--coverage-49%25-orange.svg)
![BRANCH](https://img.shields.io/badge/branch--coverage-21%25-red.svg)
![COMPLEXITY](https://img.shields.io/badge/complexity-5.50-brightgreen.svg)

Komenti is a tool for semantic query, annotation, and analysis of text using ontologies.

It enables querying multiple ontologies with complex class descriptions using AberOWL. These can be used to build a vocabulary for text annotation, including new methods for synonym and label expansion. Annotation is performed using Stanford CoreNLP, and include novel methods for the detection and disambiguation of concept negation and uncertainty. Annotations of text corpora can be used for analysis, within or without Komenti. These components are in development, but currently include summarisation of the co-ocurrence of groups of concepts across text, and use of annotations to suggest description logic axioms for classes. These more complex uses can be described by series of parameters to be passed to the tool in the form of a serialised 'roster,' defining a natural language processing pipeline.

We are working on papers discussing the novel components. I will post them here:

* [A fast, accurate, and generalisable heuristic-based negation detection algorithm for clinical text](https://www.biorxiv.org/content/10.1101/2020.07.03.187054v1)

## Installation

You can find the latest release here: https://github.com/reality/komenti/releases/tag/0.0.4-SNAPSHOT-5
You can find the latest stable-ish release here: https://github.com/reality/komenti/releases/tag/0.1.0

You can add the bin/ directory to your PATH, to be able to use it easily from anywhere. It should also work on Windows, but I haven't tested that.

Expand All @@ -26,6 +35,14 @@ komenti query -q "'part of' some 'apoptotic process'" -o GO --out labels.txt
komenti query -c toxicity,asbestos -o ENM --out labels.txt
```

### All classes

You can get all classes in an ontology by running a subclass query on owl#Thing:

```bash
komenti query -q "<http://www.w3.org/2002/07/owl#Thing>" --ontology HP
```

### Parameters

* The labels can be extended by the power of lemmatisation, by passing --lemmatise
Expand Down
33 changes: 31 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ plugins {

// Apply the application plugin to add support for building a CLI application.
id 'application'

id 'jacoco'
id 'com.github.dawnwords.jacoco.badge' version '0.2.0'
}

repositories {
Expand All @@ -33,6 +36,12 @@ dependencies {
compile 'org.apache.pdfbox:pdfbox:2.0.15'
compile 'org.codehaus.gpars:gpars:1.2.1'

compile 'net.sourceforge.owlapi:owlapi-api:5.1.4'
compile 'net.sourceforge.owlapi:owlapi-apibinding:5.1.4'
compile 'net.sourceforge.owlapi:owlapi-impl:5.1.4'
compile 'net.sourceforge.owlapi:owlapi-parsers:5.1.4'
compile 'org.apache.commons:commons-rdf-api:0.5.0'

// Use the awesome Spock testing and specification framework
testImplementation 'org.spockframework:spock-core:1.3-groovy-2.5'
}
Expand All @@ -57,8 +66,28 @@ task release {

gradle.taskGraph.whenReady { taskGraph ->
if (taskGraph.hasTask(":release")) {
version = '0.0.4'
version = '0.1.0'
} else {
version = '0.0.4-SNAPSHOT'
version = '0.1.0-SNAPSHOT'
}
}

test {
maxHeapSize = "6G"
testLogging {
events "started", "skipped", "failed"
exceptionFormat "full"
}
finalizedBy jacocoTestReport // report is always generated after tests run
}

jacocoTestReport {
dependsOn test // tests are required to run before generating the report
reports {
xml.enabled true
}
}




1 change: 1 addition & 0 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
org.gradle.jvmargs=-Xmx6G -Xms6G
129 changes: 126 additions & 3 deletions src/main/groovy/komenti/App.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
package komenti

class App {
static void main(String[] args) {
static void main(args) {
def cliBuilder = new CliBuilder(
usage: 'komenti <command> [<options>]',
header: 'Options:'
Expand All @@ -22,6 +22,7 @@ class App {
_ longOpt: 'override-group', 'Override group in labels output with given text', args: 1
_ longOpt: 'priority', 'RegexNER priority in output. Default is 1.', args: 1
_ longOpt: 'expand-synonyms', 'Expand synonyms using AberOWL', type: Boolean
_ longOpt: 'label-extension', 'Run a named label extension, e.g. cmo', args: 1

// annotation options
t longOpt: 'text', 'A file or directory of files to annotate.', args: 1
Expand All @@ -33,6 +34,9 @@ class App {
_ longOpt: 'allergy-modifier', 'Evaluate sentences for whether or not they mention an allergy', type: Boolean
_ longOpt: 'exclude', 'A list of phrases, which when matched in a sentence, will cause that sentence not to be annotated. One phrase per line.', args: 1
_ longOpt: 'write-pdfs-to-dir', 'If set, write the converted PDF text into the given directory.', args: 1
_ longOpt: 'extract-triples', 'Extract triples from text', type: Boolean
_ longOpt: 'allow-unmatched-relations', 'If there are two terms, output a mocked Annotation in AnnotationTriple, allowing triples without ', type: Boolean
// _ longOpt: 'require-full-match', ' require a full match for extraction of tripels'

// summary options
a longOpt: 'annotation-file', 'Annotation file to summarise', args: 1
Expand Down Expand Up @@ -69,13 +73,132 @@ class App {
// diagnose options
_ longOpt: 'by-group', 'Group items for diagnosis by the query group, rather than by term IRI', type: Boolean

// ontologise options
_ longOpt: 'triples', 'Triples file to turn into --ontolog', args: 1

// all options
_ longOpt: 'out', 'Where to write the annotation results.', args: 1
_ longOpt: 'append', 'Append output file, instead of replacing it', type: Boolean
_ longOpt: 'verbose', 'Verbose output, mostly progress', type: Boolean
_ longOpt: 'verbose', 'Verbose output, mostly progress', type: Boolean, args: 0
_ longOpt: 'threads', 'Number of threads to use for query/annotation processes', type: Integer, args: 1
}

Komenti.run(cliBuilder, args)
if(args.contains('--verbose')) {
println args
}

if(!args[0]) { println "Must provide command." }
if(args[0] == '-h' || args[0] == '--help') {
cliBuilder.usage(); return;
}

def command = args[0]
def o = cliBuilder.parse(args.drop(1))

if(o.h) {
cliBuilder.usage()
}

def aCheck = checkArguments(command, o)
if(aCheck) {
Komenti."$command"(o)
}
}

// TODO this could be a bit smarter, eh
static def checkArguments(command, o) {
def success = true
if(!Komenti.metaClass.getMetaMethod(command)) {
println "Command ${command} not found." ; success = false
}

if(command == 'gen_roster') {
if(!o.q && !o.c) { println "You must provide a --query or --class-list" ; success = false }

// Check that the roster is being generated with some text (annotation mode) or with an analysis method
if(!o['with-abstracts-download'] && !o['with-metadata-download'] && !o['mine-relationship'] && !o.t) {
println "Must either download abstracts, metadata, or provide text to annotate"
success = false
}

if(o['mine-relationship']) {
if(!o.c || (o.c && o.c.split(',').size() != 2)) {
println "to --mine-relationship you must pass exactly two concept names with -c/--class-list"
success = false
}
}

if(o['suggest-axiom']) {
if(!o.o) {
println "Must pass an ontology to query with -o/--ontology"
success = false
}

if((!o.c || (o.c && o.c.split(',').size() != 1))) {
println "You must pass a class into -c to suggest axiom"
success = false
}

if(!o.entity || !o.quality || !o['default-entity'] || !o['default-relation']) {
println "To suggest axiom you must pass class lists with --entity, --quality. You must also pass --default-relation and --default-entity."
success = false
}
}
} else if(command == 'auto') {
if(!o.r) {
println "Must pass a roster"
success = false
}
} else if(command == 'query') {
if((!o['object-properties'] && (!o.q && !o.c))) {
println "You must pass a query or class list"
success = false
}
if(o['object-properties'] && (o.q || o.c)) {
println "Cannot pass a query or class list for --object-properties query"
success = false
}
} else if (command == 'get_metadata') { // TODO: needs to be expanded
if(!o.l) {
println "Must pass label file"
success = false
}
} else if(command == 'annotate') {
if(!o.t && !o['file-list']) {
println "Must either pass texts to parse, or a --file-list containing paths of texts to analyse."
success = false
}
if(!o.l) {
println "Must pass label file"
success = false
}
if(!o.out) {
println "Must provide output filename via --out"
success = false
}
} else if(command == 'add_modifiers') {
if(!o.out || !o.a || !o.l) {
println "Must provide annotation file via -a, and labels file with -l, and output filename via --out"
success = false
}
} else if(command == 'get_abstracts') {
if(!o.l) {
println "Must pass label file"
success = false
}

} else if(command == 'summarise_entity_pair') {
if(!o.l || !o.a || !o.c) {
println "Must provide annotation file via -a, and labels file with -l, and two classes with -c"
success = false
}
} else if(command == 'suggest_axiom') {
if(!o.l || !o.a) {
println "Must provide a --label file and a --annotations file"
success = false
}
}

success
}
}
Loading

0 comments on commit 6320942

Please sign in to comment.