Skip to content

pythonPerformance

Marcel Schmalzl edited this page Dec 16, 2024 · 5 revisions

Timing Framework

Decorator version

def timeit(log_file_path: Optional[str] = None, ignore_measurement_overhead: bool = False) -> Callable:
    """
    Decorator to measure the execution time of a function

    Args:
        log_file_path: Log file directory; default = None. If None, no log files will be written
        ignore_measurement_overhead: Consider measurement overhead correction factor if False

    Returns:
        Callable: The wrapped function with timing measurement.
    """
    measurement_overhead_correction_factor: Optional[float] = None

    def measure_overhead() -> None:
        nonlocal measurement_overhead_correction_factor
        test_runs = 20
        total_time = 0.0
        for _ in range(test_runs):
            start_time = time.time()
            end_time = time.time()
            total_time += (end_time - start_time)
        measurement_overhead_correction_factor = total_time / test_runs

    def decorator(func: Callable) -> Callable:
        nonlocal measurement_overhead_correction_factor
        if measurement_overhead_correction_factor is None and not ignore_measurement_overhead:
            measure_overhead()

        @functools.wraps(func)
        def wrapper(*args: Any, **kwargs: Any) -> Any:
            start_time = time.time()
            start_timestamp = int(start_time)
            result = func(*args, **kwargs)
            end_time = time.time()
            end_timestamp = int(end_time)
            delta = end_time - start_time
            if not ignore_measurement_overhead:
                delta -= measurement_overhead_correction_factor
            runtime = time.strftime("%H:%M:%S", time.gmtime(delta))
            fqdn = f"{func.__module__}.{func.__qualname__}"

            current_time = datetime.datetime.now().strftime("%Y-%m-%d_-_%H:%M:%S")
            log_message = f"{current_time}: Function: {fqdn}: -start + end time (Unix): {start_timestamp} - {end_timestamp} = {delta:.2f}; runtime [HH:MM:SS]: {runtime}"

            print(log_message)

            if log_file_path is not None:
                if not os.path.exists(log_file_path):
                    os.makedirs(log_file_path)
                log_file_name = f"{func.__name__}_timing_{datetime.datetime.now().strftime('%Y-%m-%d')}.log"
                full_log_file_path = os.path.join(log_file_path, log_file_name)
                with open(full_log_file_path, 'a') as log_file:
                    log_file.write(log_message + "\n")

            return result
        return wrapper
    return decorator

Function version

This is a helpful snippet in order to measure and compare different implementations.

import timeit
import pypiscout as sc
import datetime

FACTOR = 10**7+1

class aClass:
    def __init__(self):
        self.a = "a"
        self.b = "b"
        self.c = "c"
        self.d = "d"
        self.e = "e"
        self.f = "f"

anObj = aClass()


def function1ToMeasure():
    """
    YOUR CODE
    :return: None
    """
    pass


def function2ToMeasure():
    """
    YOUR CODE
    :return: None
    """
    pass


if __name__ == "__main__":
    sc.header("Let's do this!")

    timeStart_empty = timeit.default_timer()
    for i in range(0, FACTOR):
        pass
    timeEnd_empty = timeit.default_timer()
    sc.info("Job no load finished at:  {}  (duration: " "{:.12f} s)".format(datetime.datetime.now().strftime("%H:%M:%S"), (timeEnd_empty - timeStart_empty)))

    # ########################

    # Function 1
    timeStart = timeit.default_timer()
    for i in range(0, FACTOR):
        function1ToMeasure()
    timeEnd = timeit.default_timer()
    sc.info("Job 1 finished at:            {}  (duration: " "{:.12f} s)".format(datetime.datetime.now().strftime("%H:%M:%S"), (timeEnd - timeStart)))

    # Function 2
    timeStart = timeit.default_timer()
    for i in range(0, FACTOR):
        function1ToMeasure()
    timeEnd = timeit.default_timer()
    sc.info("Job 2 at:            {}  (duration: " "{:.12f} s)".format(datetime.datetime.now().strftime("%H:%M:%S"), (timeEnd - timeStart)))

Tracing Tools

  • pyinstrument (statistical profiler (polls every 1 ms the call stack))
  • Pythons build-in cProfile

Performance hints

Loops

  • List comprehensions are generally faster than ordinary for-loops

Strings

Concatenation

F-strings are usually fastest (and (!) most readable). See also: https://stackoverflow.com/a/38362140/4773274

Additional Resources

JIT

Cython

Actually this is not really JIT since you compile before you run.

Linux

Process monitoring

top -p PID

-> However only live results (no history)

Windows

Windows Perfmon

Perf equivalents

https://stackoverflow.com/questions/34641644/is-there-a-windows-equivalent-of-the-linux-command-perf-stat

Tracing

Windows performance toolkit and analyser: https://learn.microsoft.com/en-us/windows-hardware/test/wpt/

Clone this wiki locally