Skip to content

Commit

Permalink
Merge pull request #7 from PerryAgent/main
Browse files Browse the repository at this point in the history
Pull Request for Sci-Hub Changes
  • Loading branch information
hack-r authored Mar 19, 2024
2 parents d75fe2e + 91da89e commit b7de821
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 7 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ habanero
python-box
PyYAML
pyzotero
urllib3
urllib3
pandas
74 changes: 68 additions & 6 deletions src/pyserpZotero/pyserpZotero.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from collections import Counter
from datetime import date
from habanero import Crossref
import pdb
from io import BytesIO
from pyzotero import zotero
from serpapi import GoogleSearch
Expand All @@ -26,6 +27,7 @@


class serpZot:

"""
:param API_KEY: serpAPI API key
:type API_KEY: str
Expand Down Expand Up @@ -371,6 +373,8 @@ def cleanZot(self, ZOT_ID = "", ZOT_KEY = "",SEARCH_TERM="", FIELD="title"):
return 0

def arxivDownload(self, ZOT_ID = "", ZOT_KEY = "",SEARCH_TERM="",GET_SOURCE=False,DOWNLOAD_DEST="."):
FIELD = 'publicationTitle'

'''
:param ZOT_ID: Zotero user (aka library) Id
:type ZOT_ID: str
Expand Down Expand Up @@ -407,26 +411,84 @@ def arxivDownload(self, ZOT_ID = "", ZOT_KEY = "",SEARCH_TERM="",GET_SOURCE=Fals
text1 = item['data'][FIELD]
string = re.sub('[ ](?=[ ])|[^-_,A-Za-z0-9 ]+','',text1)
vector1 = self.text_to_vector(string)

search = arxiv.Search(
query = 'ti:'+"'"+string+"'",
max_results = 10,
sort_by = arxiv.SortCriterion.Relevance
)
#cosine_holder = []
pdf_downloaded = 0
for result in search.results():
vector2 = self.text_to_vector(result.title)
cosine = self.get_cosine(vector1, vector2)
#cosine_holder.append({result.title:cosine})
if cosine > .9:
#result.doi
pdf_downloaded += 1
print("Match found!: ")
print(text1)
print(result.entry_id)
result.download_pdf(dirpath=DOWNLOAD_DEST)
files = [os.path.join(DOWNLOAD_DEST, x) for x in os.listdir(DOWNLOAD_DEST) if x.endswith(".pdf")]
newest = max(files , key = os.path.getctime)
zot.attachment_simple([newest],item['key'])
except:


if pdf_downloaded == 0:
sci_hub_url = "https://sci-hub.se/"
# DOI = item['data'].get('DOI')
DOI = "10.1002/neu.1021"
sci_hub_url += DOI

response = requests.get(sci_hub_url)


name = DOI.replace("/", "_") + ".pdf"
path = os.path.join(DOWNLOAD_DEST, name)
# pdb.set_trace()

if response.headers['content-type'] == "application/pdf":
with open(path, "wb") as f:
f.write(response.content)
f.close()
elif re.findall("application/pdf", response.text):
pdf_link = "https:" + re.findall('src=".*\.pdf.*"', response.text)[0].split('"')[1].split('#')[0]
# pdf_link = "https://zero.sci-hub.se/182/46a2ed6f529ae730db224547694eb48b/lee2001.pdf?download=true"
pdf_response = requests.get(pdf_link)
if pdf_response.headers['content-type'] == "application/pdf":
with open(path, "wb") as pf:
pf.write(pdf_response.content)
pf.close()

files = [os.path.join(DOWNLOAD_DEST, x) for x in os.listdir(DOWNLOAD_DEST) if x.endswith(".pdf")]
print(files)
newest = max(files , key = os.path.getctime)
zot.attachment_simple([newest],item['key'])

except Exception as e:
print(e)
pass
return 0
return 0


# """Load libraries"""
# import os
# import yaml

# from box import Box

# # Load a YAML with the following 3 values (Optional)
# with open("/home/perry/Documents/Coding/Upwork/pyserpZotero/src/pyserpZotero/config.yaml", "r") as ymlfile:
# cfg = Box(yaml.safe_load(ymlfile), default_box=True, default_box_attr=None)

# API_KEY = cfg.API_KEY
# ZOT_ID = cfg.ZOT_ID
# ZOT_KEY = cfg.ZOT_KEY

# # Instantiate a serpZot object for API management
# citeObj = serpZot(API_KEY = API_KEY,
# ZOT_ID = ZOT_ID,
# ZOT_KEY = ZOT_KEY,
# DOWNLOAD_DEST = "." # Optional (for destinations other than the current directory)
# )

# # Check Arxiv for Free PDFs of Papers and Attach / Upload Them To Zotero
# citeObj.arxivDownload()

0 comments on commit b7de821

Please sign in to comment.