diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 1852188c718a9e..fc62767c330b65 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1892,10 +1892,50 @@ const std::pair &jl_get_llvm_disasm_target(void) llvm::SmallVector jl_get_llvm_clone_targets(void) { - if (jit_targets.empty()) - jl_error("JIT targets not initialized"); - llvm::SmallVector res; - for (auto &target: jit_targets) { + + auto &cmdline = get_cmdline_targets(); + check_cmdline(cmdline, true); + static llvm::SmallVector, 0> image_targets; + for (auto &arg: cmdline) { + auto data = arg_target_data(arg, image_targets.empty()); + image_targets.push_back(std::move(data)); + } + auto ntargets = image_targets.size(); + // Now decide the clone condition. + for (size_t i = 1; i < ntargets; i++) { + auto &t = image_targets[i]; + if (t.en.flags & JL_TARGET_CLONE_ALL) + continue; + auto &features0 = image_targets[t.base].en.features; + // Always clone when code checks CPU features + t.en.flags |= JL_TARGET_CLONE_CPU; + static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16}; + for (auto fe: clone_fp16) { + if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) { + t.en.flags |= JL_TARGET_CLONE_FLOAT16; + break; + } + } + // The most useful one in general... + t.en.flags |= JL_TARGET_CLONE_LOOP; +#ifdef _CPU_ARM_ + static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon}; + for (auto fe: clone_math) { + if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) { + t.en.flags |= JL_TARGET_CLONE_MATH; + break; + } + } + static constexpr uint32_t clone_simd[] = {Feature::neon}; + for (auto fe: clone_simd) { + if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) { + t.en.flags |= JL_TARGET_CLONE_SIMD; + break; + } + } +#endif + } + for (auto &target: image_targets) { auto features_en = target.en.features; auto features_dis = target.dis.features; for (auto &fename: feature_names) { diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp index 87d72d5ba79588..50715f55f94239 100644 --- a/src/processor_fallback.cpp +++ b/src/processor_fallback.cpp @@ -147,10 +147,24 @@ const std::pair &jl_get_llvm_disasm_target(void) llvm::SmallVector jl_get_llvm_clone_targets(void) { - if (jit_targets.empty()) - jl_error("JIT targets not initialized"); + + auto &cmdline = get_cmdline_targets(); + check_cmdline(cmdline, true); + static llvm::SmallVector, 0> image_targets; + for (auto &arg: cmdline) { + auto data = arg_target_data(arg, image_targets.empty()); + image_targets.push_back(std::move(data)); + } + auto ntargets = image_targets.size(); + // Now decide the clone condition. + for (size_t i = 1; i < ntargets; i++) { + auto &t = image_targets[i]; + t.en.flags |= JL_TARGET_CLONE_ALL; + } + if (image_targets.empty()) + jl_error("No image targets found"); llvm::SmallVector res; - for (auto &target: jit_targets) { + for (auto &target: image_targets) { jl_target_spec_t ele; std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target); ele.data = serialize_target_data(target.name, target.en.features, diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index 7f173440c8b9b4..334b61cbd75fc2 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -910,6 +910,8 @@ static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason) return match.best_idx; } +//This function serves as a fallback during bootstrapping, at that point we don't have a sysimage with native code +// so we won't call sysimg_init_cb, else this function shouldn't do anything. static void ensure_jit_target(bool imaging) { auto &cmdline = get_cmdline_targets(); @@ -1102,13 +1104,81 @@ const std::pair &jl_get_llvm_disasm_target(void) {feature_masks, 0}, {{}, 0}, 0}); return res; } - +//This function parses the -C command line to figure out which targets to multiversion to. llvm::SmallVector jl_get_llvm_clone_targets(void) { - if (jit_targets.empty()) - jl_error("JIT targets not initialized"); + auto &cmdline = get_cmdline_targets(); + check_cmdline(cmdline, true); + static llvm::SmallVector, 0> image_targets; + for (auto &arg: cmdline) { + auto data = arg_target_data(arg, image_targets.empty()); + image_targets.push_back(std::move(data)); + } + + auto ntargets = image_targets.size(); + // Now decide the clone condition. + for (size_t i = 1; i < ntargets; i++) { + auto &t = image_targets[i]; + if (t.en.flags & JL_TARGET_CLONE_ALL) + continue; + // Always clone when code checks CPU features + t.en.flags |= JL_TARGET_CLONE_CPU; + // The most useful one in general... + t.en.flags |= JL_TARGET_CLONE_LOOP; + auto &features0 = image_targets[t.base].en.features; + // Special case for KNL/KNM since they're so different + if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) { + if ((t.name == "knl" || t.name == "knm") && + image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") { + t.en.flags |= JL_TARGET_CLONE_ALL; + break; + } + } + static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4}; + static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3, + Feature::sse41, Feature::sse42, + Feature::avx, Feature::avx2, + Feature::vaes, Feature::vpclmulqdq, + Feature::sse4a, Feature::avx512f, + Feature::avx512dq, Feature::avx512ifma, + Feature::avx512pf, Feature::avx512er, + Feature::avx512cd, Feature::avx512bw, + Feature::avx512vl, Feature::avx512vbmi, + Feature::avx512vpopcntdq, Feature::avxvnni, + Feature::avx512vbmi2, Feature::avx512vnni, + Feature::avx512bitalg, Feature::avx512bf16, + Feature::avx512vp2intersect, Feature::avx512fp16}; + for (auto fe: clone_math) { + if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) { + t.en.flags |= JL_TARGET_CLONE_MATH; + break; + } + } + for (auto fe: clone_simd) { + if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) { + t.en.flags |= JL_TARGET_CLONE_SIMD; + break; + } + } + static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16}; + for (auto fe: clone_fp16) { + if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) { + t.en.flags |= JL_TARGET_CLONE_FLOAT16; + break; + } + } + static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16}; + for (auto fe: clone_bf16) { + if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) { + t.en.flags |= JL_TARGET_CLONE_BFLOAT16; + break; + } + } + } + if (image_targets.empty()) + jl_error("No targets specifiec"); llvm::SmallVector res; - for (auto &target: jit_targets) { + for (auto &target: image_targets) { auto features_en = target.en.features; auto features_dis = target.dis.features; for (auto &fename: feature_names) { @@ -1126,6 +1196,13 @@ llvm::SmallVector jl_get_llvm_clone_targets(void) ele.base = target.base; res.push_back(ele); } + // print results + for (auto &ele: res) { + jl_safe_printf("Target: %s\n", ele.cpu_name.c_str()); + jl_safe_printf("Features: %s\n", ele.cpu_features.c_str()); + jl_safe_printf("Flags: %x\n", ele.flags); + jl_safe_printf("Base: %d\n", ele.base); + } return res; }