Skip to content

Commit

Permalink
small fix for file-lkist, pdf parse
Browse files Browse the repository at this point in the history
  • Loading branch information
reality committed May 19, 2020
1 parent 0c6d340 commit 4dd7d9d
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/main/groovy/komenti/Komenti.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ public class Komenti {

def fList
if(o['file-list']) {
fList = new File(o['file-list']).text.split('\n')
fList = new File(o['file-list']).text.split('\n').collect { new File(it) }
}

def outWriter = new BufferedWriter(new FileWriter(o.out))
Expand Down
2 changes: 1 addition & 1 deletion src/main/groovy/komenti/klib/PDFReader.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ public class PDFReader {

def text = stripper.getText(reader).toLowerCase()

text = text.replaceAll('\n\n', '. ')
text = text.replaceAll('\u2022', '. ')
text = text.replaceAll('–', '. ')
text = text.replaceAll('\b-', '. ')
Expand All @@ -27,7 +28,6 @@ public class PDFReader {
text = text.replaceAll('\\s+', ' ')
text = text.replaceAll(', \\?', '. ?')
text = text.replaceAll('\\.', '. ')
text = text.replaceAll('\n\n', '. ')

pages << text
}
Expand Down

0 comments on commit 4dd7d9d

Please sign in to comment.