Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX - Replace time.clock with time.perf_counter #174

Merged
merged 4 commits into from
Nov 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 37 additions & 22 deletions fido/fido.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
import sys
import tarfile
import tempfile
import time
try:
from time import perf_counter
except ImportError:
from time import clock as perf_counter

from xml.etree import cElementTree as ET
import zipfile

Expand Down Expand Up @@ -56,6 +60,21 @@
}


class PerfTimer:
"""Utility class that carries out simple process timings."""
def __init__(self):
"""New instance with start time running."""
self.start_time = perf_counter()

def start(self):
"""Start new timer."""
self.start_time = perf_counter()

def duration(self):
"""Return the duration since instantiation or start() was last called."""
return perf_counter() - self.start_time


class Fido:
def __init__(self, quiet=False, bufsize=None, container_bufsize=None, printnomatch=None, printmatch=None, zip=False, nocontainer=False, handle_matches=None, conf_dir=CONFIG_DIR, format_files=None, containersignature_file=None):
global defaults
Expand Down Expand Up @@ -326,7 +345,7 @@ def identify_file(self, filename, extension=True):
self.current_file = filename
self.matchtype = "signature"
try:
t0 = time.clock()
timer = PerfTimer()
f = open(filename, 'rb')
size = os.stat(filename)[6]
self.current_filesize = size
Expand All @@ -342,7 +361,7 @@ def identify_file(self, filename, extension=True):
else:
container_matches = self.match_container("OLE2", OlePackage, filename, container_file)
if len(container_matches) > 0:
self.handle_matches(filename, container_matches, time.clock() - t0, "container")
self.handle_matches(filename, container_matches, timer.duration(), "container")
return
# from here is also repeated in walk_zip
# we should make this uniform in a next version!
Expand All @@ -351,10 +370,10 @@ def identify_file(self, filename, extension=True):
# are falsely characterised being 'rtf' (due to wacky sig)
# in these cases we try to match the extension instead
if len(matches) > 0 and self.current_filesize > 0:
self.handle_matches(filename, matches, time.clock() - t0, self.matchtype)
self.handle_matches(filename, matches, timer.duration(), self.matchtype)
elif extension and (len(matches) == 0 or self.current_filesize == 0):
matches = self.match_extensions(filename)
self.handle_matches(filename, matches, time.clock() - t0, "extension")
self.handle_matches(filename, matches, timer.duration(), "extension")
# only recurse into certain containers, like ZIP or TAR
container = self.container_type(matches)
# till here matey!
Expand Down Expand Up @@ -398,7 +417,7 @@ def identify_multi_object_stream(self, stream, extension=True):
"""
offset = 0
while True:
t0 = time.clock()
timer = PerfTimer()
content_length = -1
for line in stream:
offset += len(line)
Expand All @@ -419,25 +438,25 @@ def identify_multi_object_stream(self, stream, extension=True):
matches = self.match_formats(bofbuffer, eofbuffer)
# MdR: this needs attention
if len(matches) > 0:
self.handle_matches(self.current_file, matches, time.clock() - t0, "signature")
self.handle_matches(self.current_file, matches, timer.duration(), "signature")
elif extension and (len(matches) == 0 or self.current_filesize == 0):
matches = self.match_extensions(self.current_file)
self.handle_matches(self.current_file, matches, time.clock() - t0, "extension")
self.handle_matches(self.current_file, matches, timer.duration(), "extension")

def identify_stream(self, stream, filename, extension=True):
"""
Identify the type of @param stream.
Call self.handle_matches instead of returning a value.
Does not close stream.
"""
t0 = time.clock()
timer = PerfTimer()
bofbuffer, eofbuffer, bytes_read = self.get_buffers(stream, length=None)
self.current_filesize = bytes_read
self.current_file = 'STDIN'
matches = self.match_formats(bofbuffer, eofbuffer)
# MdR: this needs attention
if len(matches) > 0:
self.handle_matches(self.current_file, matches, time.clock() - t0, "signature")
self.handle_matches(self.current_file, matches, timer.duration(), "signature")
elif extension and (len(matches) == 0 or self.current_filesize == 0):
# we can only determine the filename from the STDIN stream
# on Linux, on Windows there is not a (simple) way to do that
Expand All @@ -456,7 +475,7 @@ def identify_stream(self, stream, filename, extension=True):
# we have to reset self.current_file if not on Windows
if os.name != "nt":
self.current_file = 'STDIN'
self.handle_matches(self.current_file, matches, time.clock() - t0, "extension")
self.handle_matches(self.current_file, matches, timer.duration(), "extension")

def container_type(self, matches):
"""
Expand Down Expand Up @@ -559,7 +578,7 @@ def walk_zip(self, filename, fileobj=None, extension=True):
for item in zipstream.infolist():
if item.file_size == 0:
continue # TODO: Find a better test for isdir, Python 3.6 adds is_dir() test to ZipInfo class
t0 = time.clock()
timer = PerfTimer()
with zipstream.open(item) as f:
item_name = filename + '!' + item.filename
self.current_file = item_name
Expand All @@ -569,10 +588,10 @@ def walk_zip(self, filename, fileobj=None, extension=True):
bofbuffer, eofbuffer, _ = self.get_buffers(f, item.file_size)
matches = self.match_formats(bofbuffer, eofbuffer)
if len(matches) > 0 and self.current_filesize > 0:
self.handle_matches(item_name, matches, time.clock() - t0, "signature")
self.handle_matches(item_name, matches, timer.duration(), "signature")
elif extension and (len(matches) == 0 or self.current_filesize == 0):
matches = self.match_extensions(item_name)
self.handle_matches(item_name, matches, time.clock() - t0, "extension")
self.handle_matches(item_name, matches, timer.duration(), "extension")
if self.container_type(matches):
target = tempfile.SpooledTemporaryFile(prefix='Fido')
with zipstream.open(item) as source:
Expand All @@ -596,14 +615,14 @@ def walk_tar(self, filename, fileobj, extension=True):
for item in tarstream.getmembers():
if not item.isfile():
continue
t0 = time.clock()
timer = PerfTimer()
with closing(tarstream.extractfile(item)) as f:
tar_item_name = filename + '!' + item.name
self.current_file = tar_item_name
self.current_filesize = item.size
bofbuffer, eofbuffer, _ = self.get_buffers(f, item.size)
matches = self.match_formats(bofbuffer, eofbuffer)
self.handle_matches(tar_item_name, matches, time.clock() - t0)
self.handle_matches(tar_item_name, matches, timer.duration())
if self.container_type(matches):
f.seek(0)
self.identify_contents(tar_item_name, f, self.container_type(matches), extension=extension)
Expand Down Expand Up @@ -650,7 +669,6 @@ def match_formats(self, bofbuffer, eofbuffer):
The list has inferior matches removed.
"""
self.current_count += 1
# t0 = time.clock()
result = []
for format in self.formats:
try:
Expand Down Expand Up @@ -689,9 +707,6 @@ def match_formats(self, bofbuffer, eofbuffer):
# print "Unexpected error:", sys.exc_info()[0], e
# sys.stdout.write('***', self.get_puid(format), regex)

# t1 = time.clock()
# if t1 - t0 > 0.02:
# print >> sys.stderr, "FIDO: Slow ID", self.current_file
result = [match for match in result if self.as_good_as_any(match[0], result)]
return result

Expand Down Expand Up @@ -768,7 +783,7 @@ def main(args=None):
sys.exit(1)
args = parser.parse_args(args)

t0 = time.clock()
timer = PerfTimer()

versions = get_local_pronom_versions(args.confdir)

Expand Down Expand Up @@ -851,7 +866,7 @@ def main(args=None):

if not args.q:
sys.stdout.flush()
fido.print_summary(time.clock() - t0)
fido.print_summary(timer.duration())
sys.stderr.flush()


Expand Down
13 changes: 13 additions & 0 deletions tests/test_fido.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from time import sleep

from fido.fido import PerfTimer


def test_perf_timer():
timer = PerfTimer()
sleep(3.6)
duration = timer.duration()
assert duration > 0