Skip to content

Commit

Permalink
fix(citoid): remove non-numeric chars from oclc
Browse files Browse the repository at this point in the history
  • Loading branch information
5j9 committed Dec 12, 2024
1 parent 3edd2f5 commit 17e9e43
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 0 deletions.
8 changes: 8 additions & 0 deletions lib/citoid.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from functools import partial
from urllib.parse import quote_plus

from regex import compile as rc

from lib import four_digit_num, request
from lib.commons import find_any_date

Expand All @@ -19,6 +22,8 @@
'PMCID': 'pmcid',
}

rm_non_numeric = partial(rc(r'\D').sub, '')


def citoid_data(query: str, quote=False, /) -> dict:
if quote is True:
Expand All @@ -38,6 +43,9 @@ def citoid_data(query: str, quote=False, /) -> dict:
if (value := get(citoid_key)) is not None:
d[citer_key] = value

if (oclc := d.get('oclc')) is not None:
d['oclc'] = rm_non_numeric(oclc)

authors = get('author')
contributors = get('contributor')

Expand Down
6 changes: 6 additions & 0 deletions tests/isbn_oclc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,9 @@ def test_not_found_isbn(_m1, _m2):
with raises(ReturnError) as e:
isbn_scr('9798863646336')
assert e.value.args == ('Error: ISBN not found', '', '')


def test_oclc_no_leading_letters():
assert isbn_scr('978-80-210-8779-8')[1] == (
'* {{cite book | title=The European fortune of the Roman Veronica in the Middle Ages | publisher=Brepols | publication-place=Turnhout | date=2017 | isbn=978-80-210-8779-8 | oclc=1021182894 | ref={{sfnref|Brepols|2017}}}}'
)
11 changes: 11 additions & 0 deletions tests/testdata/7b8675b5b8a89b52eaa2a91e1e8c08b10242642c.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang=en>
<meta charset=utf-8>
<meta name=viewport content="initial-scale=1, minimum-scale=1, width=device-width">
<title>Error 403 (Forbidden)!!1</title>
<style>
*{margin:0;padding:0}html,code{font:15px/22px arial,sans-serif}html{background:#fff;color:#222;padding:15px}body{margin:7% auto 0;max-width:390px;min-height:180px;padding:30px 0 15px}* > body{background:url(//www.google.com/images/errors/robot.png) 100% 5px no-repeat;padding-right:205px}p{margin:11px 0 22px;overflow:hidden}ins{color:#777;text-decoration:none}a img{border:0}@media screen and (max-width:772px){body{background:none;margin-top:0;max-width:none;padding-right:0}}#logo{background:url(//www.google.com/images/branding/googlelogo/1x/googlelogo_color_150x54dp.png) no-repeat;margin-left:-5px}@media only screen and (min-resolution:192dpi){#logo{background:url(//www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png) no-repeat 0% 0%/100% 100%;-moz-border-image:url(//www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png) 0}}@media only screen and (-webkit-min-device-pixel-ratio:2){#logo{background:url(//www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png) no-repeat;-webkit-background-size:100% 100%}}#logo{display:inline-block;height:54px;width:150px}
</style>
<a href=//www.google.com/><span id=logo aria-label=Google></span></a>
<p><b>403.</b> <ins>That’s an error.</ins>
<p>Your client does not have permission to get URL <code>/books/v1/volumes</code> from this server. <ins>That’s all we know.</ins>
12 changes: 12 additions & 0 deletions tests/testdata/7b8675b5b8a89b52eaa2a91e1e8c08b10242642c.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"encoding": "UTF-8",
"headers": {
"alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
"content-length": "1595",
"content-type": "text/html; charset=UTF-8",
"date": "Thu, 12 Dec 2024 10:12:11 GMT",
"referrer-policy": "no-referrer"
},
"status_code": 403,
"url": "https://www.googleapis.com/books/v1/volumes?q=isbn:9788021087798"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"key":"ZIMS77FJ","version":0,"itemType":"book","tags":[{"tag":"Jesus Christ","type":1},{"tag":"Art","type":1},{"tag":"Veronica","type":1},{"tag":"Art","type":1},{"tag":"Jesus Christ","type":1},{"tag":"Veronica","type":1},{"tag":"Veil of Veronica in art","type":1},{"tag":"Veil of Veronica in art","type":1},{"tag":"Art","type":1}],"ISBN":["978-80-210-8779-8"],"title":"The European fortune of the Roman Veronica in the Middle Ages","numPages":"303","series":"Convivium (Brno, Czech Republic). Supplementum","seriesNumber":"2017","callNumber":"N8050 .E88 2017","extra":"OCLC: on1021182894","place":"Turnhout","publisher":"Brepols","date":"2017","libraryCatalog":"Library of Congress ISBN","accessDate":"2024-12-12","editor":[["Amanda C.","Murphy"],["Herbert L.","Kessler"],["Marco","Petoletti"],["Eamon","Duffy"],["Guido","Milanese"],["Veronika","Tvrzníková"]],"oclc":"on1021182894","source":["Zotero","Library of Congress ISBN"],"url":"https://www.worldcat.org/title/on1021182894"}]
36 changes: 36 additions & 0 deletions tests/testdata/cabe52f3d12d2b279e7f745d9d6ba70e2085782b.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"encoding": "utf-8",
"headers": {
"accept-ranges": "bytes",
"access-control-allow-headers": "accept, content-type, content-length, cache-control, accept-language, api-user-agent, if-match, if-modified-since, if-none-match, dnt, accept-encoding",
"access-control-allow-methods": "GET,HEAD",
"access-control-allow-origin": "*",
"access-control-expose-headers": "etag",
"age": "1",
"cache-control": "private, max-age=0, s-maxage=0, must-revalidate",
"content-encoding": "gzip",
"content-length": "536",
"content-location": "https://en.wikipedia.org/api/rest_v1/data/citation/mediawiki/978-80-210-8779-8",
"content-security-policy": "default-src 'none'; frame-ancestors 'none'",
"content-type": "application/json; charset=utf-8",
"date": "Thu, 12 Dec 2024 10:12:12 GMT",
"nel": "{ \"report_to\": \"wm_nel\", \"max_age\": 604800, \"failure_fraction\": 0.05, \"success_fraction\": 0.0}",
"referrer-policy": "origin-when-cross-origin",
"report-to": "{ \"group\": \"wm_nel\", \"max_age\": 604800, \"endpoints\": [{ \"url\": \"https://intake-logging.wikimedia.org/v1/events?stream=w3c.reportingapi.network_error&schema_uri=/w3c/reportingapi/network_error/1.0.0\" }] }",
"server": "restbase1032",
"server-timing": "cache;desc=\"pass\", host;desc=\"cp3066\"",
"set-cookie": "WMF-Last-Access=12-Dec-2024;Path=/;HttpOnly;secure;Expires=Mon, 13 Jan 2025 00:00:00 GMT, WMF-Last-Access-Global=12-Dec-2024;Path=/;Domain=.wikipedia.org;HttpOnly;secure;Expires=Mon, 13 Jan 2025 00:00:00 GMT, GeoIP=IR:09:Mashhad:36.30:59.59:v4; Path=/; secure; Domain=.wikipedia.org, NetworkProbeLimit=0.001;Path=/;Secure;SameSite=Lax;Max-Age=3600",
"strict-transport-security": "max-age=106384710; includeSubDomains; preload",
"vary": "Accept-Encoding",
"x-cache": "cp3066 miss, cp3066 pass",
"x-cache-status": "pass",
"x-client-ip": "31.14.150.2",
"x-content-security-policy": "default-src 'none'; frame-ancestors 'none'",
"x-content-type-options": "nosniff",
"x-frame-options": "SAMEORIGIN",
"x-webkit-csp": "default-src 'none'; frame-ancestors 'none'",
"x-xss-protection": "1; mode=block"
},
"status_code": 200,
"url": "https://en.wikipedia.org/api/rest_v1/data/citation/mediawiki/978-80-210-8779-8"
}

0 comments on commit 17e9e43

Please sign in to comment.