forked from JabRef/jabref
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add APS Fetcher (refactored) (JabRef#6143)
* Add APS fetcher * Fix case sensitivity bug * Refactor ApsFetcher * Add note about APS fetcher * Refactor findFulltext() * Refactor getId() * Parameterize ApsFetcherTest * Add link to APS changelog entry * Refactor APS Fetcher * make separate tests Co-authored-by: August Janse <[email protected]>
- Loading branch information
1 parent
edec608
commit 99183e1
Showing
4 changed files
with
145 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
93 changes: 93 additions & 0 deletions
93
src/main/java/org/jabref/logic/importer/fetcher/ApsFetcher.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
package org.jabref.logic.importer.fetcher; | ||
|
||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.net.URLConnection; | ||
import java.util.Objects; | ||
import java.util.Optional; | ||
|
||
import org.jabref.logic.importer.FulltextFetcher; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.identifier.DOI; | ||
|
||
import kong.unirest.Unirest; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* FulltextFetcher implementation that attempts to find a PDF URL at APS. Also see the <a | ||
* href="https://harvest.aps.org/docs/harvest-api">API</a>, although it isn't currently used. | ||
*/ | ||
public class ApsFetcher implements FulltextFetcher { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(ApsFetcher.class); | ||
|
||
// The actual API needs either an API key or a header. This is a workaround. | ||
private static final String DOI_URL = "https://www.doi.org/"; | ||
private static final String PDF_URL = "https://journals.aps.org/prl/pdf/"; | ||
|
||
@Override | ||
public Optional<URL> findFullText(BibEntry entry) throws IOException { | ||
Objects.requireNonNull(entry); | ||
|
||
Optional<DOI> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse); | ||
|
||
if (!doi.isPresent()) { | ||
return Optional.empty(); | ||
} | ||
|
||
Optional<String> id = getId(doi.get().getDOI()); | ||
|
||
if (id.isPresent()) { | ||
|
||
String pdfRequestUrl = PDF_URL + id.get(); | ||
int code = Unirest.head(pdfRequestUrl).asJson().getStatus(); | ||
|
||
if (code == 200) { | ||
LOGGER.info("Fulltext PDF found @ APS."); | ||
try { | ||
return Optional.of(new URL(pdfRequestUrl)); | ||
} catch (MalformedURLException e) { | ||
LOGGER.warn("APS returned malformed URL, cannot find PDF."); | ||
} | ||
} | ||
} | ||
return Optional.empty(); | ||
} | ||
|
||
@Override | ||
public TrustLevel getTrustLevel() { | ||
return TrustLevel.PUBLISHER; | ||
} | ||
|
||
/** | ||
* Convert a DOI into an appropriate APS id. | ||
* | ||
* @param doi A case insensitive DOI | ||
* @return A DOI cased as APS likes it | ||
*/ | ||
private Optional<String> getId(String doi) { | ||
// DOI is not case sensitive, but the id for the PDF URL is, | ||
// so we follow DOI.org redirects to get the proper id. | ||
// https://stackoverflow.com/a/5270162/1729441 | ||
|
||
String doiRequest = DOI_URL + doi; | ||
|
||
URLConnection con; | ||
try { | ||
con = new URL(doiRequest).openConnection(); | ||
con.connect(); | ||
con.getInputStream(); | ||
String[] urlParts = con.getURL().toString().split("abstract/"); | ||
if (urlParts.length == 2) { | ||
return Optional.of(urlParts[1]); | ||
} | ||
|
||
} catch (IOException e) { | ||
LOGGER.warn("Error connecting to APS", e); | ||
} | ||
return Optional.empty(); | ||
} | ||
} |
48 changes: 48 additions & 0 deletions
48
src/test/java/org/jabref/logic/importer/fetcher/ApsFetcherTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
package org.jabref.logic.importer.fetcher; | ||
|
||
import java.net.URL; | ||
import java.util.Optional; | ||
|
||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.testutils.category.FetcherTest; | ||
|
||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
@FetcherTest | ||
class ApsFetcherTest { | ||
|
||
private ApsFetcher finder; | ||
|
||
@BeforeEach | ||
void setUp() { | ||
finder = new ApsFetcher(); | ||
} | ||
|
||
@Test | ||
void findFullTextFromDoi() throws Exception { | ||
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.116.061102"); | ||
assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.116.061102")), finder.findFullText(entry)); | ||
} | ||
|
||
@Test | ||
void findFullTextFromLowercaseDoi() throws Exception { | ||
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/physrevlett.124.029002"); | ||
assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.124.029002")), finder.findFullText(entry)); | ||
} | ||
|
||
@Test | ||
void notFindFullTextForUnauthorized() throws Exception { | ||
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.89.127401"); | ||
assertEquals(Optional.empty(), finder.findFullText(entry)); | ||
} | ||
|
||
@Test | ||
void notFindFullTextForUnknownEntry() throws Exception { | ||
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1016/j.aasri.2014.0559.002"); | ||
assertEquals(Optional.empty(), finder.findFullText(entry)); | ||
} | ||
} |