Skip to content

Commit

Permalink
Merge pull request rust-lang#299 from rust-lang/feature/jit-cpu-features
Browse files Browse the repository at this point in the history
Add support for detecting CPU features
  • Loading branch information
antoyo authored Jul 5, 2023
2 parents 48a3613 + 91e0400 commit c4e86b6
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 52 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ A secondary goal is to check if using the gcc backend will provide any run-time
## Building

**This requires a patched libgccjit in order to work.
The patches in [this repository](https://github.com/antoyo/libgccjit-patches) need to be applied.
(Those patches should work when applied on master, but in case it doesn't work, they are known to work when applied on 079c23cfe079f203d5df83fea8e92a60c7d7e878.)
You can also use my [fork of gcc](https://github.com/antoyo/gcc) which already includes these patches.**
You need to use my [fork of gcc](https://github.com/antoyo/gcc) which already includes these patches.**

To build it (most of these instructions come from [here](https://gcc.gnu.org/onlinedocs/jit/internals/index.html), so don't hesitate to take a look there if you encounter an issue):

Expand Down
1 change: 0 additions & 1 deletion failing-ui-tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ tests/ui/sepcomp/sepcomp-fns-backwards.rs
tests/ui/sepcomp/sepcomp-fns.rs
tests/ui/sepcomp/sepcomp-statics.rs
tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
tests/ui/sse2.rs
tests/ui/target-feature/missing-plusminus.rs
tests/ui/asm/x86_64/may_unwind.rs
tests/ui/backtrace.rs
Expand Down
51 changes: 30 additions & 21 deletions src/base.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use std::collections::HashSet;
use std::env;
use std::sync::Arc;
use std::time::Instant;

use gccjit::{
Context,
FunctionType,
GlobalKind,
GlobalKind, TargetInfo,
};
use rustc_middle::dep_graph;
use rustc_middle::ty::TyCtxt;
Expand Down Expand Up @@ -63,15 +65,15 @@ pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType {
}
}

pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_integers: bool) -> (ModuleCodegen<GccContext>, u64) {
pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc<TargetInfo>) -> (ModuleCodegen<GccContext>, u64) {
let prof_timer = tcx.prof.generic_activity("codegen_module");
let start_time = Instant::now();

let dep_node = tcx.codegen_unit(cgu_name).codegen_dep_node(tcx);
let (module, _) = tcx.dep_graph.with_task(
dep_node,
tcx,
(cgu_name, supports_128bit_integers),
(cgu_name, target_info),
module_codegen,
Some(dep_graph::hash_result),
);
Expand All @@ -82,7 +84,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
// the time we needed for codegenning it.
let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;

fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, supports_128bit_integers): (Symbol, bool)) -> ModuleCodegen<GccContext> {
fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, Arc<TargetInfo>)) -> ModuleCodegen<GccContext> {
let cgu = tcx.codegen_unit(cgu_name);
// Instantiate monomorphizations without filling out definitions yet...
//let llvm_module = ModuleLlvm::new(tcx, &cgu_name.as_str());
Expand All @@ -91,29 +93,36 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
context.add_command_line_option("-fexceptions");
context.add_driver_option("-fexceptions");

let disabled_features: HashSet<_> = tcx.sess.opts.cg.target_feature.split(',')
.filter(|feature| feature.starts_with('-'))
.map(|string| &string[1..])
.collect();

let add_cpu_feature_flag = |feature: &str| {
// FIXME(antoyo): some tests cause a segfault in GCC when not enabling all these
// features.
if (true || target_info.cpu_supports(feature)) && !disabled_features.contains(feature) {
context.add_command_line_option(&format!("-m{}", feature));
}
};

// TODO(antoyo): only set on x86 platforms.
context.add_command_line_option("-masm=intel");
// TODO(antoyo): only add the following cli argument if the feature is supported.
context.add_command_line_option("-msse2");
context.add_command_line_option("-mavx2");
// FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
// Only add if the CPU supports it.
context.add_command_line_option("-msha");

let features = ["sse2", "avx", "avx2", "sha", "fma", "gfni", "f16c", "aes", "bmi2", "rtm",
"vaes", "vpclmulqdq", "xsavec",
];

for feature in &features {
add_cpu_feature_flag(feature);
}

// TODO(antoyo): only add the following cli arguments if the feature is supported.
context.add_command_line_option("-mpclmul");
context.add_command_line_option("-mfma");
context.add_command_line_option("-mfma4");
context.add_command_line_option("-m64");
context.add_command_line_option("-mbmi");
context.add_command_line_option("-mgfni");
//context.add_command_line_option("-mavxvnni"); // The CI doesn't support this option.
context.add_command_line_option("-mf16c");
context.add_command_line_option("-maes");
context.add_command_line_option("-mxsavec");
context.add_command_line_option("-mbmi2");
context.add_command_line_option("-mrtm");
context.add_command_line_option("-mvaes");
context.add_command_line_option("-mvpclmulqdq");
context.add_command_line_option("-mavx");

for arg in &tcx.sess.opts.cg.llvm_args {
context.add_command_line_option(arg);
Expand Down Expand Up @@ -156,7 +165,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
context.set_allow_unreachable_blocks(true);

{
let cx = CodegenCx::new(&context, cgu, tcx, supports_128bit_integers);
let cx = CodegenCx::new(&context, cgu, tcx, target_info.supports_128bit_int());

let mono_items = cgu.items_in_deterministic_order(tcx);
for &(mono_item, (linkage, visibility)) in &mono_items {
Expand Down
42 changes: 17 additions & 25 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ extern crate rustc_middle;
extern crate rustc_session;
extern crate rustc_span;
extern crate rustc_target;
extern crate tempfile;

// This prevents duplicating functions and statics that are already part of the host rustc process.
#[allow(unused_extern_crates)]
Expand Down Expand Up @@ -64,10 +63,10 @@ mod type_;
mod type_of;

use std::any::Any;
use std::sync::{Arc, Mutex};
use std::sync::Arc;

use crate::errors::LTONotSupported;
use gccjit::{Context, OptimizationLevel, CType};
use gccjit::{Context, OptimizationLevel, TargetInfo};
use rustc_ast::expand::allocator::AllocatorKind;
use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
use rustc_codegen_ssa::base::codegen_crate;
Expand All @@ -86,7 +85,6 @@ use rustc_session::config::{Lto, OptLevel, OutputFilenames};
use rustc_session::Session;
use rustc_span::Symbol;
use rustc_span::fatal_error::FatalError;
use tempfile::TempDir;

fluent_messages! { "../messages.ftl" }

Expand All @@ -102,7 +100,7 @@ impl<F: Fn() -> String> Drop for PrintOnPanic<F> {

#[derive(Clone)]
pub struct GccCodegenBackend {
supports_128bit_integers: Arc<Mutex<bool>>,
target_info: Arc<TargetInfo>,
}

impl CodegenBackend for GccCodegenBackend {
Expand All @@ -116,15 +114,6 @@ impl CodegenBackend for GccCodegenBackend {
if sess.lto() != Lto::No {
sess.emit_warning(LTONotSupported {});
}

let temp_dir = TempDir::new().expect("cannot create temporary directory");
let temp_file = temp_dir.into_path().join("result.asm");
let check_context = Context::default();
check_context.set_print_errors_to_stderr(false);
let _int128_ty = check_context.new_c_type(CType::UInt128t);
// NOTE: we cannot just call compile() as this would require other files than libgccjit.so.
check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str"));
*self.supports_128bit_integers.lock().expect("lock") = check_context.get_last_error() == Ok(None);
}

fn provide(&self, providers: &mut Providers) {
Expand Down Expand Up @@ -160,7 +149,7 @@ impl CodegenBackend for GccCodegenBackend {
}

fn target_features(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
target_features(sess, allow_unstable)
target_features(sess, allow_unstable, &self.target_info)
}
}

Expand All @@ -174,7 +163,7 @@ impl ExtraBackendMethods for GccCodegenBackend {
}

fn compile_codegen_unit(&self, tcx: TyCtxt<'_>, cgu_name: Symbol) -> (ModuleCodegen<Self::Module>, u64) {
base::compile_codegen_unit(tcx, cgu_name, *self.supports_128bit_integers.lock().expect("lock"))
base::compile_codegen_unit(tcx, cgu_name, Arc::clone(&self.target_info))
}

fn target_machine_factory(&self, _sess: &Session, _opt_level: OptLevel, _features: &[String]) -> TargetMachineFactoryFn<Self> {
Expand Down Expand Up @@ -273,8 +262,17 @@ impl WriteBackendMethods for GccCodegenBackend {
/// This is the entrypoint for a hot plugged rustc_codegen_gccjit
#[no_mangle]
pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
// Get the native arch and check whether the target supports 128-bit integers.
let context = Context::default();
let arch = context.get_target_info().arch().unwrap();

// Get the second TargetInfo with the correct CPU features by setting the arch.
let context = Context::default();
context.add_driver_option(&format!("-march={}", arch.to_str().unwrap()));
let target_info = Arc::new(context.get_target_info());

Box::new(GccCodegenBackend {
supports_128bit_integers: Arc::new(Mutex::new(false)),
target_info,
})
}

Expand Down Expand Up @@ -308,7 +306,7 @@ pub fn target_cpu(sess: &Session) -> &str {
}
}

pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &Arc<TargetInfo>) -> Vec<Symbol> {
supported_target_features(sess)
.iter()
.filter_map(
Expand All @@ -317,14 +315,9 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
},
)
.filter(|_feature| {
// TODO(antoyo): implement a way to get enabled feature in libgccjit.
// Probably using the equivalent of __builtin_cpu_supports.
// TODO(antoyo): maybe use whatever outputs the following command:
// gcc -march=native -Q --help=target
#[cfg(feature="master")]
{
// NOTE: the CPU in the CI doesn't support sse4a, so disable it to make the stdarch tests pass in the CI.
(_feature.contains("sse") || _feature.contains("avx")) && !_feature.contains("avx512") && !_feature.contains("sse4a")
target_info.cpu_supports(_feature)
}
#[cfg(not(feature="master"))]
{
Expand All @@ -336,7 +329,6 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
*/
//false
})
.map(|feature| Symbol::intern(feature))
.collect()
Expand Down

0 comments on commit c4e86b6

Please sign in to comment.