Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor repository and unit tests #29

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

# ignore files created by `pip install -e .`
*.egg-info
9 changes: 3 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@

test: pdftitle.py test.sh test_max2.sh test_eliot.sh testc.sh
pylint pdftitle.py
bash test.sh
bash test_max2.sh
bash test_eliot.sh
bash testc.sh
test: tests/__init__.py
pylint src/pdftitle/pdftitle.py
python -m unittest discover

upload: pdftitle.py setup.py
rm -rf dist
Expand Down
57 changes: 34 additions & 23 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,48 @@
from codecs import open
from os import path

here = path.abspath(path.dirname(__file__))

with open(path.join(here, 'README.md'), encoding='utf-8') as f:
packagename = "pdftitle"

# consider the path of `setup.py` as root directory:
PROJECTROOT = path.dirname(__file__)
release_path = path.join(PROJECTROOT, "src", packagename, "release.py")
with open(release_path, encoding="utf8") as release_file:
__version__ = release_file.read().split('__version__ = "', 1)[1].split('"', 1)[0]


with open("requirements.txt") as requirements_file:
requirements = requirements_file.read()


with open(path.join(PROJECTROOT, "README.md"), encoding="utf-8") as f:
long_description = f.read()

setup(
name='pdftitle',
version='0.11',
description='pdftitle is a small utility to extract the title from a PDF file.',
name=packagename,
version=__version__,
description="pdftitle is a small utility to extract the title from a PDF file.",
long_description=long_description,
long_description_content_type='text/markdown',
url='https://github.com/metebalci/pdftitle',
author='Mete Balci',
author_email='[email protected]',
license='GPLv3',
long_description_content_type="text/markdown",
url="https://github.com/metebalci/pdftitle",
author="Mete Balci",
author_email="[email protected]",
license="GPLv3",
classifiers=[
'Development Status :: 3 - Alpha',
'Environment :: Console',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
'Topic :: Utilities',
'Programming Language :: Python :: 3.6',
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Topic :: Utilities",
"Programming Language :: Python :: 3.6",
],

keywords='pdf text extract',
py_modules=['pdftitle'],
install_requires=['pdfminer.six>=20201018'],

keywords="pdf text extract",
packages=find_packages("src"),
package_dir={"": "src"},
install_requires=requirements,
entry_points={
'console_scripts': [
'pdftitle=pdftitle:run',
"console_scripts": [
"pdftitle=pdftitle:run",
],
},
)
2 changes: 2 additions & 0 deletions src/pdftitle/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# this allows `import pdftitle` instead of `import pdftitle.pdftitle`
from .pdftitle import *
File renamed without changes.
1 change: 1 addition & 0 deletions src/pdftitle/release.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.11.0"
10 changes: 0 additions & 10 deletions test.sh

This file was deleted.

10 changes: 0 additions & 10 deletions test_eliot.sh

This file was deleted.

10 changes: 0 additions & 10 deletions test_max2.sh

This file was deleted.

20 changes: 0 additions & 20 deletions testc.sh

This file was deleted.

Empty file added tests/__init__.py
Empty file.
File renamed without changes.
Binary file not shown.
File renamed without changes.
File renamed without changes.
103 changes: 103 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import unittest
import os
import subprocess
from subprocess import PIPE
import sys


def command_with_output(cmd):

if isinstance(cmd, str):
cmd = cmd.split(" ") # note this breaks for spaces inside args

assert isinstance(cmd, (list, tuple))


if sys.version_info >= (3, 7):
res = subprocess.run(cmd, capture_output=True)
else:
# this is old and might be dropped with support for python3.6
res = subprocess.run(cmd, stdout=PIPE, stderr=PIPE)

res.stdout = res.stdout.decode("utf8")
res.stderr = res.stderr.decode("utf8")
return res


class TestCore(unittest.TestCase):
def setUp(self):

self.orig_path = os.path.abspath(os.curdir)
os.chdir(os.path.abspath(os.path.dirname(__file__)))

def tearDown(self):
os.chdir(self.orig_path)

def test_option_c(self):

fpath = "data/knuth65.pdf"

res = command_with_output("cp {} test.pdf".format(fpath))
self.assertEqual(res.returncode, 0)

res = command_with_output("pdftitle -p test.pdf -c")
self.assertEqual(res.returncode, 0)

title = res.stdout.strip() # remove trailing newline
self.assertEqual(title, "on_the_translation_of_languages_from_left_to_right.pdf")
self.assertFalse(os.path.isfile("test.pdf"))
self.assertTrue(os.path.isfile("on_the_translation_of_languages_from_left_to_right.pdf"))

# remove the newly created

res = command_with_output("rm {}".format(title))
self.assertEqual(res.returncode, 0)

def test_knuth65(self):

fpath = "data/knuth65.pdf"

res = command_with_output("pdftitle -p {}".format(fpath))
self.assertEqual(res.returncode, 0)

title = res.stdout.strip() # remove trailing newli
self.assertEqual(title, "On the Translation of Languages from Left to Right")

def test_max2(self):

fpath = "data/paran2010.pdf"

res = command_with_output("pdftitle -p {} -a max2 -t".format(fpath))
self.assertEqual(res.returncode, 0)

title = res.stdout.strip() # remove trailing newli
self.assertEqual(
title,
"Settlement Remains From The Bronze And Iron Ages At Horbat Menorim (El-Manara), Lower Galilee",
)

def test_eliot(self):

fpath = "data/woo2019.pdf"

# here we must pass the arguments as list because they contain spaces
res = command_with_output(
[
"pdftitle",
"-a",
"eliot",
"--eliot-tfs",
"1",
"-p",
fpath,
"--replace-missing-char",
" ",
]
)
self.assertEqual(res.returncode, 0)

title = res.stdout.strip() # remove trailing newli
self.assertEqual(
title,
"Lactobacillus HY2782 and Bifidobacterium HY8002 Decrease Airway Hyperresponsiveness Induced by Chronic PM2.5 Inhalation in Mice",
)