diff --git a/lexical-benchmark/etc/plot.py b/lexical-benchmark/etc/plot.py index 66e3cf5f..3bcdb39a 100644 --- a/lexical-benchmark/etc/plot.py +++ b/lexical-benchmark/etc/plot.py @@ -10,7 +10,6 @@ import argparse import json import os -import re import matplotlib.pyplot as plt import matplotlib.ticker as ticker @@ -21,6 +20,7 @@ etc = os.path.dirname(os.path.realpath(__file__)) home = os.path.dirname(etc) + def parse_args(argv=None): '''Create and parse our command line arguments.''' @@ -42,6 +42,7 @@ def parse_args(argv=None): ) return parser.parse_args(argv) + def filename(basename, args): '''Get a resilient name for the benchmark data.''' @@ -52,6 +53,7 @@ def filename(basename, args): name = f'{name}_features={args.features}' return name + def format_time(time): '''Format time to be a nice value.''' @@ -71,14 +73,17 @@ def strip_zero(value): time /= 1000 return f'{strip_zero(str(round(time, 3)))} s' + def float_sort_key(x): '''Sort key for an float value.''' return (x[0], int(x[1:])) + def integer_sort_key(x): '''Sort key for an integral value.''' return (x[0], int(x[1:])) + def plot_bar( prefix=None, xlabel=None, @@ -115,13 +120,13 @@ def plot_ax(ax, xticks): ax.legend(libraries, fancybox=True, framealpha=1, shadow=True, borderpad=1) fig = plt.figure(figsize=(10, 8)) - index = 1 ax = fig.add_subplot(1, 1, 1) plot_ax(ax, xticks) fig.savefig(path, format='svg') fig.clf() + def plot_scatter( prefix=None, xlabel=None, @@ -146,7 +151,7 @@ def plot_ax(ax, xticks): for library in libraries: ys = [data[f'{prefix}_{i}_{library}'] for i in xticks] - points = ax.semilogy( + _ = ax.semilogy( xticks, ys, '-o', mec='k', ms=15, mew=1, alpha=.8, label=library ) @@ -170,6 +175,7 @@ def plot_ax(ax, xticks): fig.savefig(path, format='svg') fig.clf() + def plot_write_float(args): '''Plot the write float dataset.''' @@ -235,6 +241,7 @@ def plot_write_float(args): title='Random Data: BigInts', ) + def plot_write_integer(args): '''Plot the write integer dataset.''' @@ -301,6 +308,7 @@ def plot_write_integer(args): title='Random Data: Large Negative', ) + def plot_parse_float(args): '''Plot the parse float dataset.''' @@ -392,6 +400,7 @@ def plot_parse_float(args): key=lambda x: int(x), ) + def plot_parse_integer(args): '''Plot the parse integer dataset.''' @@ -452,6 +461,7 @@ def plot_parse_integer(args): title='Random Data: Large Negative', ) + def main(argv=None): '''Entry point.''' @@ -469,5 +479,6 @@ def main(argv=None): else: raise NotImplementedError + if __name__ == '__main__': main() diff --git a/lexical-benchmark/etc/run.py b/lexical-benchmark/etc/run.py index 2dd7a132..f75caf03 100644 --- a/lexical-benchmark/etc/run.py +++ b/lexical-benchmark/etc/run.py @@ -16,6 +16,7 @@ etc = os.path.dirname(os.path.realpath(__file__)) home = os.path.dirname(etc) + def parse_args(argv=None): '''Create and parse our command line arguments.''' @@ -42,6 +43,7 @@ def parse_args(argv=None): ) return parser.parse_args(argv) + def filename(basename, args): '''Get a resilient name for the benchmark data.''' @@ -52,6 +54,7 @@ def filename(basename, args): name = f'{name}_features={args.features}' return name + @contextlib.contextmanager def change_directory(path): '''Change directory and return to the original directory afterwards.''' @@ -63,6 +66,7 @@ def change_directory(path): finally: os.chdir(cwd) + def process_rust_benchmark(line): '''Process the result of an individual Rust benchmark.''' @@ -83,6 +87,7 @@ def process_rust_benchmark(line): return group, name, speed + def run_benchmark(args): '''Run a single benchmark.''' @@ -109,6 +114,7 @@ def run_benchmark(args): return data + def main(argv=None): '''Entry point.''' @@ -121,5 +127,6 @@ def main(argv=None): with open(f'{home}/results/{filename(bench, args)}.json', 'w') as file: json.dump(data, file) + if __name__ == '__main__': main() diff --git a/lexical-parse-float/etc/correctness/test-parse-unittests/to_toml.py b/lexical-parse-float/etc/correctness/test-parse-unittests/to_toml.py index 79c8418e..c3e3c7d8 100644 --- a/lexical-parse-float/etc/correctness/test-parse-unittests/to_toml.py +++ b/lexical-parse-float/etc/correctness/test-parse-unittests/to_toml.py @@ -37,6 +37,7 @@ # FLOAT HELPERS # ------------- + class FloatMixin: '''Mixing for floating-point methods.''' @@ -87,13 +88,13 @@ def mantissa(self): class Float32(FloatMixin): '''Wrapper around a 32-bit floating point value.''' - SIGN_MASK = np.uint32(0x80000000) - EXPONENT_MASK = np.uint32(0x7F800000) - HIDDEN_BIT_MASK = np.uint32(0x00800000) - MANTISSA_MASK = np.uint32(0x007FFFFF) - MANTISSA_SIZE = np.int32(23) - EXPONENT_BIAS = np.int32(127 + MANTISSA_SIZE) - DENORMAL_EXPONENT = np.int32(1 - EXPONENT_BIAS) + SIGN_MASK = np.uint32(0x80000000) # noqa + EXPONENT_MASK = np.uint32(0x7F800000) # noqa + HIDDEN_BIT_MASK = np.uint32(0x00800000) # noqa + MANTISSA_MASK = np.uint32(0x007FFFFF) # noqa + MANTISSA_SIZE = np.int32(23) # noqa + EXPONENT_BIAS = np.int32(127 + MANTISSA_SIZE) # noqa + DENORMAL_EXPONENT = np.int32(1 - EXPONENT_BIAS) # noqa def __init__(self, value): self.value = np.float32(value) @@ -103,13 +104,13 @@ def __init__(self, value): class Float64(FloatMixin): '''Wrapper around a 64-bit floating point value.''' - SIGN_MASK = np.uint64(0x8000000000000000) - EXPONENT_MASK = np.uint64(0x7FF0000000000000) - HIDDEN_BIT_MASK = np.uint64(0x0010000000000000) - MANTISSA_MASK = np.uint64(0x000FFFFFFFFFFFFF) - MANTISSA_SIZE = np.int32(52) - EXPONENT_BIAS = np.int32(1023 + MANTISSA_SIZE) - DENORMAL_EXPONENT = np.int32(1 - EXPONENT_BIAS) + SIGN_MASK = np.uint64(0x8000000000000000) # noqa + EXPONENT_MASK = np.uint64(0x7FF0000000000000) # noqa + HIDDEN_BIT_MASK = np.uint64(0x0010000000000000) # noqa + MANTISSA_MASK = np.uint64(0x000FFFFFFFFFFFFF) # noqa + MANTISSA_SIZE = np.int32(52) # noqa + EXPONENT_BIAS = np.int32(1023 + MANTISSA_SIZE) # noqa + DENORMAL_EXPONENT = np.int32(1 - EXPONENT_BIAS) # noqa def __init__(self, value): self.value = np.float64(value) @@ -180,6 +181,7 @@ def test_mantissa(self): float32 = Float32("1e-45") self.assertEqual(float32.mantissa(), np.uint32(1)) + class TestFloat64(unittest.TestCase): def test_to_bits(self): @@ -249,6 +251,7 @@ def run_tests(): '''Run unittest suite.''' unittest.main(argv=sys.argv[:1]) + def create_test(test): '''Create conversion test table.''' @@ -263,6 +266,7 @@ def create_test(test): return conversion_test + def main(source, destination): '''Run main script.''' @@ -284,6 +288,7 @@ def main(source, destination): with open(destination, 'w') as fout: print(tomlkit.dumps(document), file=fout) + if __name__ == '__main__': args = parser.parse_args() if args.test: diff --git a/lexical-parse-float/etc/powers_table.py b/lexical-parse-float/etc/powers_table.py index f5daa8c5..b15c1918 100644 --- a/lexical-parse-float/etc/powers_table.py +++ b/lexical-parse-float/etc/powers_table.py @@ -81,19 +81,19 @@ def print_large(radix, max_exp): print(f'pub const LARGE_POW{radix}: [u32; {len(limb32)}] = [') for value in limb32: print(f' {value},') - print(f'];') - print(f'') + print('];') + print('') print(f'/// Pre-computed large power-of-{radix} for 64-bit limbs.') print('#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]') print(f'pub const LARGE_POW{radix}: [u64; {len(limb64)}] = [') for value in limb64: print(f' {value},') - print(f'];') - print(f'') + print('];') + print('') print(f'/// Step for large power-of-{radix} for 32-bit limbs.') print(f'pub const LARGE_POW{radix}_STEP: u32 = {5 * max_exp};') - print(f'') + print('') def print_tables(radix, f64_pow_limit, f32_exp_limit, f64_exp_limit): @@ -106,11 +106,11 @@ def print_tables(radix, f64_pow_limit, f32_exp_limit, f64_exp_limit): def f32_exponent_limit(radix): return { - 3 : (-15, 15), - 5 : (-10, 10), - 6 : (-15, 15), - 7 : (-8, 8), - 9 : (-7, 7), + 3 : (-15, 15), # noqa + 5 : (-10, 10), # noqa + 6 : (-15, 15), # noqa + 7 : (-8, 8), # noqa + 9 : (-7, 7), # noqa 11: (-6, 6), 12: (-15, 15), 13: (-6, 6), @@ -206,7 +206,10 @@ def f64_power_limit(radix): }[radix] -radixes = [3, 5, 6, 7, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36] +radixes = [ + 3, 5, 6, 7, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36 +] for radix in radixes: f64_pow_limit = f64_power_limit(radix) f32_exp_limit = f32_exponent_limit(radix)[1] diff --git a/lexical-write-float/etc/log.py b/lexical-write-float/etc/log.py index 4353b6ec..151aa2fd 100644 --- a/lexical-write-float/etc/log.py +++ b/lexical-write-float/etc/log.py @@ -14,10 +14,12 @@ # GENERATORS # ---------- + def floor(x): # Valid even when x is negative return int(math.floor(x)) + # Does a quick generation of `x * log(a, b)` for the floor, # and validates it over the entire range of values. def calc_fast_log(max_exp, bitshift, log_base, radix, cb): @@ -30,6 +32,7 @@ def calc_fast_log(max_exp, bitshift, log_base, radix, cb): raise ValueError(f'exp={exp}, exact={exact}, guess={guess}') return num, bitshift + # Does a quick generation of `x * log(r1, b1) - log(r2, b2)` for the # floor, and validates it over the entire range of values. def calc_fast_log_sub(max_exp, bitshift, b1, r1, b2, r2, cb): @@ -43,6 +46,7 @@ def calc_fast_log_sub(max_exp, bitshift, b1, r1, b2, r2, cb): raise ValueError(f'exp={exp}, exact={exact}, guess={guess}') return num, sub, bitshift + # Does a quick generation of `x * log(r1, b1) - log(r2, b2) / div` for the # floor, and validates it over the entire range of values. def calc_fast_log_sub_div(max_exp, bitshift, b1, r1, b2, r2, div, cb): @@ -56,6 +60,7 @@ def calc_fast_log_sub_div(max_exp, bitshift, b1, r1, b2, r2, div, cb): raise ValueError(f'exp={exp}, exact={exact}, guess={guess}') return num, sub, bitshift + # Iterates over the range of valid bitshifts to try to calculate # the log constants. def gen_fast_log(max_exp, log_base, radix, cb): @@ -68,6 +73,7 @@ def gen_fast_log(max_exp, log_base, radix, cb): raise ValueError('Calculating constants for log failed.') + def gen_fast_log_sub(max_exp, b1, r1, b2, r2, cb): bitshift = 1 while bitshift <= 25: @@ -78,6 +84,7 @@ def gen_fast_log_sub(max_exp, b1, r1, b2, r2, cb): raise ValueError('Calculating constants for log failed.') + def gen_fast_log_sub_div(max_exp, b1, r1, b2, r2, div, cb): bitshift = 1 while bitshift <= 25: @@ -88,22 +95,27 @@ def gen_fast_log_sub_div(max_exp, b1, r1, b2, r2, div, cb): raise ValueError('Calculating constants for log failed.') + # This is for generating x * log2(10) def gen_log2_10(max_exp=1233): return gen_fast_log(max_exp, 2, 10, dragonbox_log2_10) + # This is for generating x * log10(2) def gen_log10_2(max_exp=1700): return gen_fast_log(max_exp, 10, 2, dragonbox_log10_2) + # This is for generating x * log5(2) def gen_log5_2(max_exp=1492): return gen_fast_log(max_exp, 5, 2, dragonbox_log5_2) + # This is for generating x * log5(2) - log5(3) def gen_log5_2_sub_log5_3(max_exp=2427): return gen_fast_log_sub(max_exp, 5, 2, 5, 3, dragonbox_log5_2_sub_log5_3) + # This is for generating x * log10(2) - log10(4 / 3) def gen_log10_2_sub_log10_4_div3(max_exp=1700): return gen_fast_log_sub_div(max_exp, 10, 2, 10, 4, 3, dragonbox_log10_2_sub_log10_4_div3) @@ -111,26 +123,32 @@ def gen_log10_2_sub_log10_4_div3(max_exp=1700): # GENERIC # ------- + # Generic, for other radixes. def exact_log(exponent, log_base, radix): return floor(exponent * math.log(radix, log_base)) + def exact_sub_log(exponent, b1, r1, b2, r2): v1 = exponent * math.log(r1, b1) v2 = math.log(r2, b2) return floor(v1 - v2) + def exact_sub_log_div(exponent, b1, r1, b2, r2, den): v1 = exponent * math.log(r1, b1) v2 = math.log(r2, b2) / den return floor(v1 - v2) + def lemire_log(exponent, multiplier, bitshift): return int((exponent * multiplier) >> bitshift) + def lemire_sub_log(exponent, multiplier, sub, bitshift): return int((exponent * multiplier - sub) >> bitshift) + def floor_shift(integer, fraction, shift): integer = np.uint32(integer) fraction = np.uint64(fraction) @@ -140,65 +158,80 @@ def floor_shift(integer, fraction, shift): # COMPUTE # ------- + # These all do `x * log5(2)` def exact_log5_2(exponent): return exact_log(exponent, 5, 2) + def lemire_log5_2(exponent): return lemire_log(exponent, 225799, 19) + def dragonbox_log5_2(exponent): q = np.int32(exponent) c = floor_shift(0, 0x6e40d1a4143dcb94, 20) s = floor_shift(0, 0, 20) return (q * c - s) >> 20 + # These all do `x * log10(2)` def exact_log10_2(exponent): return exact_log(exponent, 10, 2) + def lemire_log10_2(exponent): return lemire_log(exponent, 315653, 20) + def dragonbox_log10_2(exponent): q = np.int32(exponent) c = floor_shift(0, 0x4d104d427de7fbcc, 22) s = floor_shift(0, 0, 22) return (q * c - s) >> 22 + # These all do `x * log2(10)` def exact_log2_10(exponent): return exact_log(exponent, 2, 10) + def lemire_log2_10(exponent): return lemire_log(exponent, 1741647, 19) + def dragonbox_log2_10(exponent): q = np.int32(exponent) c = floor_shift(3, 0x5269e12f346e2bf9, 19) s = floor_shift(0, 0, 19) return (q * c - s) >> 19 + # These all do `x * log5(2) - log5(3)` def exact_log5_2_sub_log5_3(exponent): return exact_sub_log(exponent, 5, 2, 5, 3) + def lemire_log5_2_sub_log5_3(exponent): return lemire_sub_log(exponent, 451597, 715764, 20) + def dragonbox_log5_2_sub_log5_3(exponent): q = np.int32(exponent) c = floor_shift(0, 0x6e40d1a4143dcb94, 20) s = floor_shift(0, 0xaebf47915d443b24, 20) return (q * c - s) >> 20 + # These all do `x * log10(2) - log10(4) / 3` def exact_log10_2_sub_log10_4_div3(exponent): return exact_sub_log_div(exponent, 10, 2, 10, 4, 3) + def lemire_log10_2_sub_log10_4_div3(exponent): return lemire_sub_log(exponent, 1262611, 524031, 22) + def dragonbox_log10_2_sub_log10_4_div3(exponent): # This value **isn't** actually exact, so it can't work # with any automated generator. @@ -210,6 +243,7 @@ def dragonbox_log10_2_sub_log10_4_div3(exponent): # VALIDATE # -------- + def check_ratio( exact_cb, lemire_cb, @@ -262,7 +296,7 @@ def main(): max_exp=2427, ) - #print(f'gen_log10_2_sub_log10_4_div3={gen_log10_2_sub_log10_4_div3()}') + # print(f'gen_log10_2_sub_log10_4_div3={gen_log10_2_sub_log10_4_div3()}') check_ratio( exact_log10_2_sub_log10_4_div3, lemire_log10_2_sub_log10_4_div3, @@ -272,5 +306,6 @@ def main(): max_exp=1700, ) + if __name__ == '__main__': main() diff --git a/scripts/size.py b/scripts/size.py index 1acee690..319448bb 100755 --- a/scripts/size.py +++ b/scripts/size.py @@ -12,7 +12,6 @@ import argparse import json -import mimetypes import subprocess import os @@ -57,6 +56,7 @@ lto = true ''' + def parse_args(argv=None): '''Create and parse our command line arguments.''' @@ -88,6 +88,7 @@ def parse_args(argv=None): ) return parser.parse_args(argv) + def filename(basename, args): '''Get a resilient name for the benchmark data.''' @@ -98,6 +99,7 @@ def filename(basename, args): name = f'{name}_features={args.features}' return name + def plot_bar( xlabel=None, data=None, @@ -133,13 +135,13 @@ def plot_ax(ax, xticks): ax.legend(libraries, fancybox=True, framealpha=1, shadow=True, borderpad=1) fig = plt.figure(figsize=(10, 8)) - index = 1 ax = fig.add_subplot(1, 1, 1) plot_ax(ax, xticks) fig.savefig(path, format='svg') fig.clf() + def clean(): '''Clean the project''' @@ -151,6 +153,7 @@ def clean(): stderr=subprocess.DEVNULL, ) + def write_manifest(level): '''Write the manifest for the given optimization level.''' @@ -169,11 +172,12 @@ def write_manifest(level): with open(manifest, 'w') as file: file.write(contents) + def build(args, level, is_lexical): '''Build the project.''' os.chdir(f'{home}/lexical-size') - command = f'cargo +nightly build' + command = 'cargo +nightly build' if args.no_default_features: command = f'{command} --no-default-features' features = args.features @@ -195,6 +199,7 @@ def build(args, level, is_lexical): stdout=subprocess.DEVNULL, ) + def is_executable(path): '''Determine if a file is a binary executable.''' if os.name == 'nt': @@ -202,6 +207,7 @@ def is_executable(path): else: return magic.from_file(path, mime=True) == 'application/x-pie-executable' + def prettyify(size): '''Get the human readable filesize from bytes.''' @@ -216,6 +222,7 @@ def prettyify(size): return f'{size:0.1f}PB' + def get_file_size(path): '''Read the file size of a given binary.''' @@ -240,6 +247,7 @@ def get_file_size(path): return text + data + rodata + def get_sizes(level): '''Get the binary sizes for all targets.''' @@ -258,6 +266,7 @@ def get_sizes(level): return {k: v - empty for k, v in data.items()} + def strip(level): '''Strip all the binaries''' @@ -276,10 +285,12 @@ def strip(level): stdout=subprocess.DEVNULL, ) + def plot_level(args, data, level): '''Print markdown-based report for the file sizes.''' print(f'Plotting binary sizes for optimization level {level}.') + def sort_key(x): split = x.split('-') ctype = split[-1] @@ -369,6 +380,7 @@ def flatten(lib, key, filter): title=f'Write Stripped Data -- Optimization Level "{level}"', ) + def run_level(args, level, is_lexical): '''Generate the size data for a given build configuration.''' @@ -386,6 +398,7 @@ def run_level(args, level, is_lexical): return data + def run(args): '''Run the size calculations.''' @@ -399,6 +412,7 @@ def run(args): with open(f'{assets}/{file}.json', 'w') as file: json.dump(data, file) + def plot(args): '''Plot the size calculations.''' @@ -410,6 +424,7 @@ def plot(args): data = json.load(file) plot_level(args, data, level) + def main(argv=None): '''Entry point.''' @@ -419,5 +434,6 @@ def main(argv=None): if args.plot: plot(args) + if __name__ == '__main__': main() diff --git a/scripts/timings.py b/scripts/timings.py index 71d5abfc..0a9ffe1a 100755 --- a/scripts/timings.py +++ b/scripts/timings.py @@ -13,13 +13,13 @@ import matplotlib.pyplot as plt from matplotlib import patches -from matplotlib import textpath plt.style.use('ggplot') scripts = os.path.dirname(os.path.realpath(__file__)) home = os.path.dirname(scripts) + def parse_args(argv=None): '''Create and parse our command line arguments.''' @@ -41,6 +41,7 @@ def parse_args(argv=None): ) return parser.parse_args(argv) + def clean(directory=home): '''Clean the project''' @@ -52,6 +53,7 @@ def clean(directory=home): stderr=subprocess.DEVNULL, ) + def build(args, directory=home): '''Build the project and get the timings output.''' @@ -81,6 +83,7 @@ def build(args, directory=home): return data + def filename(basename, args): '''Get a resilient name for the benchmark data.''' @@ -91,6 +94,7 @@ def filename(basename, args): name = f'{name}_features={args.features}' return name + def plot_timings(timings, output, workspace=''): '''Plot our timings data.''' @@ -190,6 +194,7 @@ def max_duration(*keys): fig.savefig(output, format='svg') fig.clf() + def plot_all(args): '''Build and plot the timings for the root module.''' @@ -198,6 +203,7 @@ def plot_all(args): path = f'{home}/assets/timings_{filename("all", args)}_{os.name}.svg' plot_timings(timings, path) + def plot_workspace(args, workspace): '''Build and plot the timings for the root module.''' @@ -207,6 +213,7 @@ def plot_workspace(args, workspace): path = f'{home}/assets/{basename}.svg' plot_timings(timings, path, workspace) + def main(argv=None): '''Entry point.''' @@ -216,5 +223,6 @@ def main(argv=None): for workspace in workspaces: plot_workspace(args, workspace) + if __name__ == '__main__': main()