diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA1Op.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA1Op.java index 001ed10687d9..ad5b41295b3e 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA1Op.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA1Op.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,8 +24,16 @@ */ package jdk.graal.compiler.lir.amd64; +import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant; +import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress; import static jdk.vm.ci.amd64.AMD64.xmm0; import static jdk.vm.ci.amd64.AMD64.xmm1; +import static jdk.vm.ci.amd64.AMD64.xmm10; +import static jdk.vm.ci.amd64.AMD64.xmm11; +import static jdk.vm.ci.amd64.AMD64.xmm12; +import static jdk.vm.ci.amd64.AMD64.xmm13; +import static jdk.vm.ci.amd64.AMD64.xmm14; +import static jdk.vm.ci.amd64.AMD64.xmm15; import static jdk.vm.ci.amd64.AMD64.xmm2; import static jdk.vm.ci.amd64.AMD64.xmm3; import static jdk.vm.ci.amd64.AMD64.xmm4; @@ -35,20 +43,18 @@ import static jdk.vm.ci.amd64.AMD64.xmm8; import static jdk.vm.ci.amd64.AMD64.xmm9; import static jdk.vm.ci.code.ValueUtil.asRegister; -import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant; -import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress; import jdk.graal.compiler.asm.Label; import jdk.graal.compiler.asm.amd64.AMD64Address; import jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag; import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler; +import jdk.graal.compiler.core.amd64.AMD64LIRGenerator; import jdk.graal.compiler.debug.GraalError; import jdk.graal.compiler.lir.LIRInstructionClass; import jdk.graal.compiler.lir.SyncPort; import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant; import jdk.graal.compiler.lir.asm.CompilationResultBuilder; -import jdk.graal.compiler.lir.gen.LIRGeneratorTool; - +import jdk.vm.ci.amd64.AMD64.CPUFeature; import jdk.vm.ci.amd64.AMD64Kind; import jdk.vm.ci.code.Register; import jdk.vm.ci.meta.AllocatableValue; @@ -76,11 +82,11 @@ public final class AMD64SHA1Op extends AMD64LIRInstruction { @Temp({OperandFlag.REG}) private Value[] temps; private final boolean multiBlock; - public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) { + public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) { this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false); } - public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue, + public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue, AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) { super(TYPE); @@ -92,18 +98,40 @@ public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatable this.multiBlock = multiBlock; - this.temps = new Value[]{ - xmm0.asValue(), - xmm1.asValue(), - xmm2.asValue(), - xmm3.asValue(), - xmm4.asValue(), - xmm5.asValue(), - xmm6.asValue(), - xmm7.asValue(), - xmm8.asValue(), - xmm9.asValue(), - }; + if (tool.supportsCPUFeature(CPUFeature.AVX)) { + // vzeroupper clears upper bits of xmm0-xmm15 + this.temps = new Value[]{ + xmm0.asValue(), + xmm1.asValue(), + xmm2.asValue(), + xmm3.asValue(), + xmm4.asValue(), + xmm5.asValue(), + xmm6.asValue(), + xmm7.asValue(), + xmm8.asValue(), + xmm9.asValue(), + xmm10.asValue(), + xmm11.asValue(), + xmm12.asValue(), + xmm13.asValue(), + xmm14.asValue(), + xmm15.asValue(), + }; + } else { + this.temps = new Value[]{ + xmm0.asValue(), + xmm1.asValue(), + xmm2.asValue(), + xmm3.asValue(), + xmm4.asValue(), + xmm5.asValue(), + xmm6.asValue(), + xmm7.asValue(), + xmm8.asValue(), + xmm9.asValue(), + }; + } if (multiBlock) { this.bufTempValue = tool.newVariable(bufValue.getValueKind()); @@ -168,6 +196,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { Label labelDoneHash = new Label(); Label labelLoop0 = new Label(); + if (masm.supports(CPUFeature.AVX)) { + // Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between + // previously executed AVX instructions and the following SHA-1 instructions. + masm.vzeroupper(); + } + masm.movdqu(abcd, new AMD64Address(state, 0)); masm.pinsrd(e0, new AMD64Address(state, 16), 3); masm.movdqu(shufMask, recordExternalAddress(crb, upperWordMask)); diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA256Op.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA256Op.java index c9679faedb82..ab7b35c320b2 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA256Op.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA256Op.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,9 +24,17 @@ */ package jdk.graal.compiler.lir.amd64; +import static jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag.BelowEqual; +import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant; +import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress; import static jdk.vm.ci.amd64.AMD64.xmm0; import static jdk.vm.ci.amd64.AMD64.xmm1; import static jdk.vm.ci.amd64.AMD64.xmm10; +import static jdk.vm.ci.amd64.AMD64.xmm11; +import static jdk.vm.ci.amd64.AMD64.xmm12; +import static jdk.vm.ci.amd64.AMD64.xmm13; +import static jdk.vm.ci.amd64.AMD64.xmm14; +import static jdk.vm.ci.amd64.AMD64.xmm15; import static jdk.vm.ci.amd64.AMD64.xmm2; import static jdk.vm.ci.amd64.AMD64.xmm3; import static jdk.vm.ci.amd64.AMD64.xmm4; @@ -36,20 +44,17 @@ import static jdk.vm.ci.amd64.AMD64.xmm8; import static jdk.vm.ci.amd64.AMD64.xmm9; import static jdk.vm.ci.code.ValueUtil.asRegister; -import static jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag.BelowEqual; -import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant; -import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress; import jdk.graal.compiler.asm.Label; import jdk.graal.compiler.asm.amd64.AMD64Address; import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler; +import jdk.graal.compiler.core.amd64.AMD64LIRGenerator; import jdk.graal.compiler.debug.GraalError; import jdk.graal.compiler.lir.LIRInstructionClass; import jdk.graal.compiler.lir.SyncPort; import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant; import jdk.graal.compiler.lir.asm.CompilationResultBuilder; -import jdk.graal.compiler.lir.gen.LIRGeneratorTool; - +import jdk.vm.ci.amd64.AMD64.CPUFeature; import jdk.vm.ci.amd64.AMD64Kind; import jdk.vm.ci.code.Register; import jdk.vm.ci.meta.AllocatableValue; @@ -79,11 +84,11 @@ public final class AMD64SHA256Op extends AMD64LIRInstruction { private final boolean multiBlock; - public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) { + public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) { this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false); } - public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue, + public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue, AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) { super(TYPE); @@ -97,19 +102,40 @@ public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatab this.keyTempValue = tool.newVariable(bufValue.getValueKind()); - this.temps = new Value[]{ - xmm0.asValue(), - xmm1.asValue(), - xmm2.asValue(), - xmm3.asValue(), - xmm4.asValue(), - xmm5.asValue(), - xmm6.asValue(), - xmm7.asValue(), - xmm8.asValue(), - xmm9.asValue(), - xmm10.asValue(), - }; + if (tool.supportsCPUFeature(CPUFeature.AVX)) { + // vzeroupper clears upper bits of xmm0-xmm15 + this.temps = new Value[]{ + xmm0.asValue(), + xmm1.asValue(), + xmm2.asValue(), + xmm3.asValue(), + xmm4.asValue(), + xmm5.asValue(), + xmm6.asValue(), + xmm7.asValue(), + xmm8.asValue(), + xmm9.asValue(), + xmm10.asValue(), + xmm11.asValue(), + xmm12.asValue(), + xmm13.asValue(), + xmm14.asValue(), + xmm15.asValue(), + }; + } else { + this.temps = new Value[]{ + xmm0.asValue(), + xmm1.asValue(), + xmm2.asValue(), + xmm3.asValue(), + xmm4.asValue(), + xmm5.asValue(), + xmm6.asValue(), + xmm7.asValue(), + xmm8.asValue(), + xmm9.asValue(), + }; + } if (multiBlock) { this.bufTempValue = tool.newVariable(bufValue.getValueKind()); @@ -199,6 +225,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { // keyTemp replaces the hardcoded rax in the original stub. Register keyTemp = asRegister(keyTempValue); + if (masm.supports(CPUFeature.AVX)) { + // Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between + // previously executed AVX instructions and the following SHA-256 instructions. + masm.vzeroupper(); + } + masm.movdqu(state0, new AMD64Address(state, 0)); masm.movdqu(state1, new AMD64Address(state, 16));