From aed981c4d4395aff666f968739cb8ffe373d5236 Mon Sep 17 00:00:00 2001 From: ZhouGuangyuan Date: Mon, 25 Aug 2025 15:01:40 +0800 Subject: [PATCH] asm readbarrier Change-Id: I01280592b6f721e95dcbd276de42b0b3225d0591 --- common_components/base/globals.h | 6 ++ common_components/common_runtime/hooks.h | 2 + .../heap/allocator/region_desc.h | 4 +- .../heap/collector/region_bitmap.h | 3 + .../assembler/aarch64/assembler_aarch64.cpp | 16 ++++++ .../assembler/aarch64/assembler_aarch64.h | 2 + .../aarch64/assembler_aarch64_constants.h | 5 ++ .../compiler/assembler/x64/assembler_x64.cpp | 33 +++++++++++ .../compiler/assembler/x64/assembler_x64.h | 6 ++ ecmascript/compiler/barrier_stub_builder.cpp | 32 ++--------- ecmascript/compiler/call_signature.cpp | 49 +++++++++++++--- ecmascript/compiler/call_signature.h | 3 +- .../codegen/llvm/aarch64/aarch64_builder.cpp | 45 ++++++++++++++- .../compiler/codegen/llvm/llvm_ir_builder.cpp | 19 +++++-- .../compiler/codegen/llvm/llvm_ir_builder.h | 4 ++ .../compiler/codegen/llvm/x64/x64_builder.cpp | 41 ++++++++++++++ .../codegen/maple/litecg_ir_builder.cpp | 6 +- .../maple_be/include/litecg/lmir_builder.h | 2 +- .../src/cg/aarch64/aarch64_cgfunc.cpp | 14 ++++- .../maple_be/src/cg/x86_64/x64_MPIsel.cpp | 14 ++++- .../maple_be/src/litecg/lmir_builder.cpp | 7 ++- .../maple/maple_ir/include/intrinsic_js.def | 2 + ecmascript/compiler/post_schedule.cpp | 16 +++--- ecmascript/compiler/stub_builder-inl.h | 3 +- ecmascript/compiler/stub_builder.cpp | 2 +- .../aarch64/asm_interpreter_call.cpp | 51 ++++++++++++++++- .../compiler/trampoline/aarch64/common_call.h | 4 +- .../trampoline/x64/asm_interpreter_call.cpp | 56 +++++++++++++++++-- .../compiler/trampoline/x64/common_call.h | 7 ++- ecmascript/stubs/runtime_stub_list.h | 3 +- ecmascript/stubs/runtime_stubs.cpp | 10 ++-- ecmascript/stubs/runtime_stubs.h | 2 +- 32 files changed, 393 insertions(+), 76 deletions(-) diff --git a/common_components/base/globals.h b/common_components/base/globals.h index 6321c95796..ee5b5aacd0 100755 --- a/common_components/base/globals.h +++ b/common_components/base/globals.h @@ -52,6 +52,12 @@ constexpr bool IsPowerOfTwo(T x) return ret; } +template +static constexpr int Log2(T n, T acc = 0) +{ + return (n == 1) ? acc : Log2(n >> 1, acc + 1); +} + template T RoundDown(T x, typename Identity::type n) { diff --git a/common_components/common_runtime/hooks.h b/common_components/common_runtime/hooks.h index 4fe9a9487d..fe6183ce43 100644 --- a/common_components/common_runtime/hooks.h +++ b/common_components/common_runtime/hooks.h @@ -20,6 +20,7 @@ #include "common_interfaces/heap/heap_visitor.h" #include "common_interfaces/thread/mutator_base.h" +#include "common_components/heap/collector/gc_request.h" // Visitor that iterate all `RefField`s in a TaggedObject and add them to // `WorkStack` Should be moved to BaseRT and panda namespace later @@ -45,6 +46,7 @@ PUBLIC_API void VisitDynamicThreadPreforwardRoot(const RefFieldVisitor &visitorF PUBLIC_API void VisitJSThread(void *jsThread, CommonRootVisitor visitor); PUBLIC_API void SynchronizeGCPhaseToJSThread(void *jsThread, GCPhase gcPhase); +PUBLIC_API void UpdateCMCWriteBarrierStub(void *jsThread, GCPhase gcPhase, GCReason gcReason); // CMC-GC dependent interface PUBLIC_API void FillFreeObject(void *object, size_t size); diff --git a/common_components/heap/allocator/region_desc.h b/common_components/heap/allocator/region_desc.h index 8fd7191254..6f9f5cf8a9 100755 --- a/common_components/heap/allocator/region_desc.h +++ b/common_components/heap/allocator/region_desc.h @@ -94,13 +94,15 @@ public: // default common region unit size. static constexpr size_t UNIT_SIZE = 256 * KB; + static constexpr int UNIT_SIZE_LOG2 = Log2(UNIT_SIZE); // result == 18 + // threshold for object to unique a region static constexpr size_t LARGE_OBJECT_DEFAULT_THRESHOLD = UNIT_SIZE * 2 / 3; // release a large object when the size is greater than 4096KB. static constexpr size_t LARGE_OBJECT_RELEASE_THRESHOLD = 4096 * KB; - static constexpr size_t DEFAULT_REGION_UNIT_MASK = RegionDesc::UNIT_SIZE - 1; + static constexpr uint64_t DEFAULT_REGION_UNIT_MASK = RegionDesc::UNIT_SIZE - 1; RegionDesc() { diff --git a/common_components/heap/collector/region_bitmap.h b/common_components/heap/collector/region_bitmap.h index 0aeac4f6f4..5ac877d171 100755 --- a/common_components/heap/collector/region_bitmap.h +++ b/common_components/heap/collector/region_bitmap.h @@ -26,8 +26,11 @@ namespace common { static constexpr size_t kBitsPerByte = 8; +static constexpr size_t kBitsPerByteLog2 = Log2(kBitsPerByte); static constexpr size_t kMarkedBytesPerBit = 8; +static constexpr size_t kMarkedBytesPerBitLog2 = Log2(kMarkedBytesPerBit); static constexpr size_t kBitsPerWord = sizeof(uint64_t) * kBitsPerByte; +static constexpr size_t kBitsPerWordLog2 = Log2(kBitsPerWord); static constexpr size_t kBytesPerWord = sizeof(uint64_t) / sizeof(uint8_t); struct RegionBitmap { static constexpr uint8_t factor = 16; diff --git a/ecmascript/compiler/assembler/aarch64/assembler_aarch64.cpp b/ecmascript/compiler/assembler/aarch64/assembler_aarch64.cpp index 6bc9cda990..d3a9383975 100644 --- a/ecmascript/compiler/assembler/aarch64/assembler_aarch64.cpp +++ b/ecmascript/compiler/assembler/aarch64/assembler_aarch64.cpp @@ -296,6 +296,22 @@ void AssemblerAarch64::Ldr(const Register &rt, const MemoryOperand &operand, Sca } } +void AssemblerAarch64::Ldxr(const Register &rt, const Register &rn) +{ + bool regX = !rt.IsW(); + uint32_t op = ExclusiveOpCode::LDXR; + uint32_t instructionCode = ((regX << 30) | op | Rn(rn.GetId()) | Rt(rt.GetId())); + EmitU32(instructionCode); +} + +void AssemblerAarch64::Stxr(const Register &rm, const Register &rt, const Register &rn) +{ + bool regX = !rt.IsW(); + uint32_t op = ExclusiveOpCode::STXR; + uint32_t instructionCode = ((regX << 30) | op | Rm(rm.GetId()) | Rn(rn.GetId()) | Rt(rt.GetId())); + EmitU32(instructionCode); +} + void AssemblerAarch64::Ldr(const Register &rt, const MemoryOperand &operand) { Ldr(rt, operand, Scale::Q); diff --git a/ecmascript/compiler/assembler/aarch64/assembler_aarch64.h b/ecmascript/compiler/assembler/aarch64/assembler_aarch64.h index 8e84f4e87d..ccc18b1530 100644 --- a/ecmascript/compiler/assembler/aarch64/assembler_aarch64.h +++ b/ecmascript/compiler/assembler/aarch64/assembler_aarch64.h @@ -354,6 +354,8 @@ public: void Ret(const Register &rn); void Brk(const Immediate &imm); void Bind(Label *target); + void Ldxr(const Register &rt, const Register &rn); + void Stxr(const Register &rm, const Register &rt, const Register &rn); private: // common reg field defines inline uint32_t Rd(uint32_t id) diff --git a/ecmascript/compiler/assembler/aarch64/assembler_aarch64_constants.h b/ecmascript/compiler/assembler/aarch64/assembler_aarch64_constants.h index 3145a24ce1..7baf1aefa8 100644 --- a/ecmascript/compiler/assembler/aarch64/assembler_aarch64_constants.h +++ b/ecmascript/compiler/assembler/aarch64/assembler_aarch64_constants.h @@ -116,6 +116,11 @@ enum BitwiseOpCode { ORR_Shift = 0x2a000000, }; +enum ExclusiveOpCode { + LDXR = 0x885F7C00, + STXR = 0x88007C00, +}; + // branch code enum BranchOpCode { BranchFMask = 0x7C000000, diff --git a/ecmascript/compiler/assembler/x64/assembler_x64.cpp b/ecmascript/compiler/assembler/x64/assembler_x64.cpp index db984e9260..de4ec21da3 100644 --- a/ecmascript/compiler/assembler/x64/assembler_x64.cpp +++ b/ecmascript/compiler/assembler/x64/assembler_x64.cpp @@ -892,6 +892,14 @@ void AssemblerX64::Testq(Immediate src, Register dst) } } +void AssemblerX64::Test(Register src, Register dst) +{ + EmitRexPrefix(src, dst); + // 21 : And r/m64, r64 + EmitU8(0x85); + EmitModrm(src, dst); +} + void AssemblerX64::Testb(Immediate src, Register dst) { ASSERT(InRange8(src.Value())); @@ -1316,6 +1324,16 @@ void AssemblerX64::Btq(Immediate src, Register dst) EmitModrm(4, dst); EmitI8(static_cast(src.Value())); } + +void AssemblerX64::Btq(Register src, const Operand& dst) +{ + EmitRexPrefix(src, dst); + EmitU8(0x0F); + EmitU8(0xA3); + + EmitOperand(src, dst); +} + void AssemblerX64::Btl(Immediate src, Register dst) { EmitRexPrefix(dst); @@ -1365,6 +1383,21 @@ void AssemblerX64::Btsl(Register src, Register dst) EmitModrm(src, dst); } +void AssemblerX64::LockPrefix() +{ + EmitU8(0xF0); +} + +void AssemblerX64::Btsq(Register src, const Operand& dst) +{ + EmitRexPrefix(src, dst); + // 0F AB: bts r32, r32; + EmitU8(0x0F); + EmitU8(0xAB); + + EmitOperand(src, dst); +} + void AssemblerX64::Int3() { // CC :: INT3 diff --git a/ecmascript/compiler/assembler/x64/assembler_x64.h b/ecmascript/compiler/assembler/x64/assembler_x64.h index aad96bde62..663a0c0971 100644 --- a/ecmascript/compiler/assembler/x64/assembler_x64.h +++ b/ecmascript/compiler/assembler/x64/assembler_x64.h @@ -124,6 +124,7 @@ public: void Or(Immediate src, Register dst); void Orq(Register src, Register dst); void Btq(Immediate src, Register dst); + void Btq(Register src, const Operand &dst); void Btl(Immediate src, Register dst); void Cmpl(Register src, Register dst); void CMovbe(Register src, Register dst); @@ -154,7 +155,12 @@ public: void Shll(Immediate src, Register dst); void Shlq(Immediate src, Register dst); void Btsl(Register src, Register dst); + void LockPrefix(); + void Btsq(Register src, const Operand &dst); void Testq(Immediate src, Register dst); + + void Test(Register src, Register dst); + void Testb(Immediate src, Register dst); void Int3(); void Movzwq(const Operand &src, Register dst); diff --git a/ecmascript/compiler/barrier_stub_builder.cpp b/ecmascript/compiler/barrier_stub_builder.cpp index 3702fbf4c7..43a9fda0ce 100644 --- a/ecmascript/compiler/barrier_stub_builder.cpp +++ b/ecmascript/compiler/barrier_stub_builder.cpp @@ -751,12 +751,8 @@ void BarrierStubBuilder::DoReverseBarrier() Label markInBuffer(env); Label continueProcessing(env); Label isTaggedObject(env); - Label RefisTaggedObject(env); - Label markRSet(env); Label continueLoopHead(env); Label continueLoopEnd(env); - Label notMarkRSetLoopHead(env); - Label notMarkRSetLoopEnd(env); Label iLessLength(env); Label indexLessLength(env); Label notIdlePhase(env); @@ -799,29 +795,10 @@ void BarrierStubBuilder::DoReverseBarrier() } Bind(¬MarkRSet); { - DEFVARIABLE(index, VariableType::INT32(), Int32(0)); - GateRef shouldProcessSATB = ShouldProcessSATB(gcPhase); - Jump(¬MarkRSetLoopHead); - LoopBegin(¬MarkRSetLoopHead); - { - BRANCH_LIKELY(Int32UnsignedLessThan(*index, slotCount_), &indexLessLength, &exit); - Bind(&indexLessLength); - GateRef offset = PtrMul(ZExtInt32ToPtr(*index), IntPtr(JSTaggedValue::TaggedTypeSize())); - GateRef ref = LoadPrimitive(VariableType::JS_ANY(), dstAddr_, offset); - BRANCH(TaggedIsHeapObject(ref), &RefisTaggedObject, ¬MarkRSetLoopEnd); - Bind(&RefisTaggedObject); - BRANCH_UNLIKELY(shouldProcessSATB, &markInBuffer, &exit); - Bind(&markInBuffer); - { - ASSERT(RuntimeStubCSigns::Get(RTSTUB_ID(MarkInBuffer))->IsNoTailCall()); - CallNGCRuntime(glue_, RTSTUB_ID(MarkInBuffer), {ref}); - Jump(¬MarkRSetLoopEnd); - } - - Bind(¬MarkRSetLoopEnd); - index = Int32Add(*index, Int32(1)); - LoopEnd(¬MarkRSetLoopHead); - } + BRANCH_UNLIKELY(shouldProcessSATB, &markInBuffer, &exit); + Bind(&markInBuffer); + CallNGCRuntime(glue_, RTSTUB_ID(BatchMarkInBuffer), {TaggedCastToIntPtr(dstAddr_), slotCount_}); + Jump(&exit); } } } @@ -846,7 +823,6 @@ void BarrierStubBuilder::DoReverseBarrier() env->SubCfgExit(); } - void BarrierStubBuilder::DoReverseBarrierInternal() { auto env = GetEnvironment(); diff --git a/ecmascript/compiler/call_signature.cpp b/ecmascript/compiler/call_signature.cpp index faff89b72d..9219bc2ea7 100644 --- a/ecmascript/compiler/call_signature.cpp +++ b/ecmascript/compiler/call_signature.cpp @@ -2693,17 +2693,24 @@ DEF_CALL_SIGNATURE(CMCGCMarkingBarrier) DEF_CALL_SIGNATURE(GetValueWithBarrier) { // 2 : 2 input parameters - CallSignature getValueWithBarrier("GetValueWithBarrier", 0, 2, ArgumentsOrder::DEFAULT_ORDER, + CallSignature getValueWithBarrier("GetValueWithBarrier", 0, 3, ArgumentsOrder::DEFAULT_ORDER, VariableType::JS_ANY()); *callSign = getValueWithBarrier; // 2 : 2 input parameters - std::array params = { + std::array params = { VariableType::NATIVE_POINTER(), - VariableType::JS_POINTER() + VariableType::JS_POINTER(), + VariableType::INT64(), }; callSign->SetParameters(params.data()); callSign->SetGCLeafFunction(true); callSign->SetCallConv(CallSignature::CallConv::CCallConv); + std::vector paramAttrs = { + CallSignature::ParamAttr::NoAttr, + CallSignature::ParamAttr::NoAttr, + CallSignature::ParamAttr::Dead, + }; + callSign->SetParamAttr(std::move(paramAttrs)); } DEF_CALL_SIGNATURE(ReadBarrier) @@ -3503,12 +3510,15 @@ DEF_CALL_SIGNATURE(MarkRSetCardTable) DEF_CALL_SIGNATURE(MarkInBuffer) { - // 3 : 3 input parameters - CallSignature MarkInBuffer("MarkInBuffer", 0, 1, ArgumentsOrder::DEFAULT_ORDER, - VariableType::BOOL()); + // 4 : 4 input parameters + CallSignature MarkInBuffer("MarkInBuffer", 0, 4, ArgumentsOrder::DEFAULT_ORDER, + VariableType::VOID()); *callSign = MarkInBuffer; - std::array params = { // 1 : 1 input parameters + std::array params = { // 4 : 4 input parameters + VariableType::NATIVE_POINTER(), VariableType::JS_POINTER(), + VariableType::NATIVE_POINTER(), + VariableType::JS_ANY(), }; callSign->SetParameters(params.data()); callSign->SetGCLeafFunction(true); @@ -3530,4 +3540,29 @@ DEF_CALL_SIGNATURE(BatchMarkInBuffer) callSign->SetGCLeafFunction(true); callSign->SetTargetKind(CallSignature::TargetKind::RUNTIME_STUB_NO_GC); } + +DEF_CALL_SIGNATURE(ASMFastReadBarrier) +{ + // 2 : 2 input parameters + CallSignature signature("ASMFastReadBarrier", 0, 3, ArgumentsOrder::DEFAULT_ORDER, + VariableType::JS_ANY()); + *callSign = signature; + // 2 : 2 input parameters + std::array params = { + VariableType::NATIVE_POINTER(), + VariableType::JS_POINTER(), + VariableType::INT64(), + }; + + callSign->SetParameters(params.data()); + callSign->SetGCLeafFunction(true); + callSign->SetCallConv(CallSignature::CallConv::CCallConv); + callSign->SetTargetKind(CallSignature::TargetKind::ASM_CALL_BARRIER_STUB); + std::vector paramAttrs = { + CallSignature::ParamAttr::NoAttr, + CallSignature::ParamAttr::NoAttr, + CallSignature::ParamAttr::Dead, + }; + callSign->SetParamAttr(std::move(paramAttrs)); +} } // namespace panda::ecmascript::kungfu diff --git a/ecmascript/compiler/call_signature.h b/ecmascript/compiler/call_signature.h index a39c47be8a..5d41dfc4df 100644 --- a/ecmascript/compiler/call_signature.h +++ b/ecmascript/compiler/call_signature.h @@ -720,7 +720,8 @@ private: V(MarkInBuffer) \ V(BatchMarkInBuffer) \ V(CMCSetValueWithBarrier) \ - V(UpdateSharedModule) + V(UpdateSharedModule) \ + V(ASMFastReadBarrier) #define DECL_CALL_SIGNATURE(name) \ class name##CallSignature final { \ diff --git a/ecmascript/compiler/codegen/llvm/aarch64/aarch64_builder.cpp b/ecmascript/compiler/codegen/llvm/aarch64/aarch64_builder.cpp index 4924202bdf..e728b1e01e 100644 --- a/ecmascript/compiler/codegen/llvm/aarch64/aarch64_builder.cpp +++ b/ecmascript/compiler/codegen/llvm/aarch64/aarch64_builder.cpp @@ -36,7 +36,7 @@ public: // input registers, first is the runtime check barrier stub. // others are same with the sign of runtime check barrier stub. } - std::string constraints = inputRegs + "~{x15},~{nzcv},~{fpsr},~{x30}" + std::string constraints = inputRegs + "~{x15},~{nzcv},~{fpsr},~{x30}," // x15 will be used as scratch register, so mark it as clobbered, all the flag registers are also clobbered. // lr will be early clobbered at call. "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7}," @@ -63,6 +63,49 @@ public: constraints.size(), true, true, LLVMInlineAsmDialectATT, false); #endif } + LLVMValueRef GetASMReadBarrierCall(LLVMModule* llvmModule, bool isDirectCall) override + { + std::string asmCall; + std::string inputRegs; + if (isDirectCall) { + asmCall = "bl " + RuntimeStubCSigns::GetRTName(RuntimeStubCSigns::ID_ASMFastReadBarrier); + inputRegs = "{x0},{x1},"; + // input registers are same with the sign of runtime check barrier stub. + } else { + asmCall = "blr $1"; // call to the first input register. + inputRegs = "r,{x0},{x1},"; + // input registers, first is the runtime check barrier stub. + // others are same with the sign of runtime check barrier stub. + } + std::string constraints = "={x15}," + inputRegs + "~{x15},~{x16},~{nzcv},~{fpsr},~{x30}," + // x15 will be used as scratch register, so mark it as clobbered, all the flag registers are also clobbered. + // lr will be early clobbered at call. + "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7}," + // d8 ~ d15 are callee saved in C calling conv, do not mark q8 ~ q15 clobbered, but we can't use q8 ~ q15 + // cross such call site + "~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30}," + "~{q31}"; + const CallSignature* cs = RuntimeStubCSigns::Get(RuntimeStubCSigns::ID_ASMFastReadBarrier); + std::vector paramTys; + if (!isDirectCall) { + paramTys.push_back(llvmModule->GetRawPtrT()); // add the runtime check barrier stub as the first arg. + } + const size_t count = cs->GetParametersCount(); + const VariableType* originParamType = cs->GetParametersType(); + for (size_t i = 0; i < count; i++) { + paramTys.push_back(llvmModule->ConvertLLVMTypeFromVariableType(originParamType[i])); + } + LLVMTypeRef returnType = llvmModule->ConvertLLVMTypeFromVariableType( cs->GetReturnType()); + LLVMTypeRef functype = LLVMFunctionType(returnType, paramTys.data(), paramTys.size(), false); +#if defined(PANDA_TARGET_MACOS) + return LLVMGetInlineAsm(functype, asmCall.data(), asmCall.size(), constraints.data(), + constraints.size(), true, true, LLVMInlineAsmDialectATT); +#else + return LLVMGetInlineAsm(functype, asmCall.data(), asmCall.size(), constraints.data(), + constraints.size(), true, true, LLVMInlineAsmDialectATT, false); +#endif + } + }; class Aarch64TargetBuilderRegistry { diff --git a/ecmascript/compiler/codegen/llvm/llvm_ir_builder.cpp b/ecmascript/compiler/codegen/llvm/llvm_ir_builder.cpp index 0d9475fbbb..2719fd787c 100644 --- a/ecmascript/compiler/codegen/llvm/llvm_ir_builder.cpp +++ b/ecmascript/compiler/codegen/llvm/llvm_ir_builder.cpp @@ -83,6 +83,9 @@ LLVMIRBuilder::LLVMIRBuilder(const std::vector> *schedule, ASSERT(GlobalTargetBuilders().count(triple) && "unsupported target"); targetBuilder_ = GlobalTargetBuilders()[triple](); ASMBarrierCall_ = targetBuilder_->GetASMBarrierCall(module, enableOptDirectCall_); + ASMBarrierIndirectCall_ = targetBuilder_->GetASMBarrierCall(module, false); + ASMReadBarrierCall_ = targetBuilder_->GetASMReadBarrierCall(module, enableOptDirectCall_); + ASMReadBarrierIndirectCall_ = targetBuilder_->GetASMReadBarrierCall(module, false); const char* attrName = "no-builtin-memset"; const char* attrValue = ""; LLVMAddAttributeAtIndex( @@ -1165,11 +1168,19 @@ void LLVMIRBuilder::VisitCall(GateRef gate, const std::vector &inList, LLVMValueRef call = nullptr; if (op == OpCode::ASM_CALL_BARRIER) { - if (!enableOptDirectCall_) { - callee = LLVMBuildPointerCast(builder_, callee, llvmModule_->GetRawPtrT(), ""); - params.insert(params.begin(), callee); + if (acc_.GetConstantValue(inList[targetIndex]) == RTSTUB_ID(ASMFastReadBarrier)) { + if (!enableOptDirectCall_) { + callee = LLVMBuildPointerCast(builder_, callee, llvmModule_->GetRawPtrT(), ""); + params.insert(params.begin(), callee); + } + call = LLVMBuildCall(builder_, ASMReadBarrierCall_, params.data(), params.size(), ""); + } else { + if (!enableOptDirectCall_) { + callee = LLVMBuildPointerCast(builder_, callee, llvmModule_->GetRawPtrT(), ""); + params.insert(params.begin(), callee); + } + call = LLVMBuildCall(builder_, ASMBarrierCall_, params.data(), params.size(), ""); } - call = LLVMBuildCall(builder_, ASMBarrierCall_, params.data(), params.size(), ""); } else { LLVMTypeRef funcType = llvmModule_->GenerateFuncType(params, calleeDescriptor); callee = LLVMBuildPointerCast(builder_, callee, LLVMPointerType(funcType, 0), ""); diff --git a/ecmascript/compiler/codegen/llvm/llvm_ir_builder.h b/ecmascript/compiler/codegen/llvm/llvm_ir_builder.h index cef89978e9..9a8bafae2f 100644 --- a/ecmascript/compiler/codegen/llvm/llvm_ir_builder.h +++ b/ecmascript/compiler/codegen/llvm/llvm_ir_builder.h @@ -276,6 +276,7 @@ class LLVMTargetBuilder { public: virtual ~LLVMTargetBuilder() = default; virtual LLVMValueRef GetASMBarrierCall(LLVMModule *llvmModule_, bool isDirectCall) = 0; + virtual LLVMValueRef GetASMReadBarrierCall(LLVMModule *llvmModule_, bool isDirectCall) = 0; }; class LLVMIRBuilder { @@ -503,6 +504,9 @@ private: bool enableOptBranchProfiling_ {true}; bool isStwCopyStub_ {false}; LLVMValueRef ASMBarrierCall_ {nullptr}; + LLVMValueRef ASMBarrierIndirectCall_ {nullptr}; + LLVMValueRef ASMReadBarrierCall_ {nullptr}; + LLVMValueRef ASMReadBarrierIndirectCall_ {nullptr}; LLVMTargetBuilder* targetBuilder_ {nullptr}; static constexpr std::string_view COLD_ATTR = "cold"; static constexpr std::string_view READONLY_ATTR = "readonly"; diff --git a/ecmascript/compiler/codegen/llvm/x64/x64_builder.cpp b/ecmascript/compiler/codegen/llvm/x64/x64_builder.cpp index 0fed28255e..683d970b37 100644 --- a/ecmascript/compiler/codegen/llvm/x64/x64_builder.cpp +++ b/ecmascript/compiler/codegen/llvm/x64/x64_builder.cpp @@ -59,6 +59,47 @@ public: #else return LLVMGetInlineAsm(functype, asmCall.data(), asmCall.size(), constraints.data(), constraints.size(), true, true, LLVMInlineAsmDialectATT, false); +#endif + } + LLVMValueRef GetASMReadBarrierCall(LLVMModule *llvmModule, bool isDirectCall) override + { + std::string asmCall; + std::string inputRegs; + if (isDirectCall) { + asmCall = "call " + RuntimeStubCSigns::GetRTName(RuntimeStubCSigns::ID_ASMFastReadBarrier); + inputRegs = "{rdi},{rsi},"; + // input registers are same with the sign of runtime check barrier stub. + } else { + asmCall = "call *${1:c}"; // call to the first input register. + inputRegs = "r,{rdi},{rsi},"; + // input registers, first is the runtime check barrier stub. + // others are same with the sign of runtime check barrier stub. + } + std::string constraints = "={r11}," + inputRegs + "~{r11},~{r12},~{dirflag},~{fpsr},~{flags}," + // r11 will be used as scratch register, so mark it as clobbered, all the flag registers are also clobbered. + "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7}," + "~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"; + // can't promise the vector registers are preserved, so mark them clobbered. + // NOTE: if AVX512 or more vector registers are enabled, need add them to clobber list. + const CallSignature *cs = RuntimeStubCSigns::Get(RuntimeStubCSigns::ID_ASMFastReadBarrier); + std::vector paramTys; + if (!isDirectCall) { + paramTys.push_back(llvmModule->GetRawPtrT()); // add the runtime check barrier stub as the first arg. + } + const size_t count = cs->GetParametersCount(); + const VariableType* originParamType = cs->GetParametersType(); + for (size_t i = 0; i < count; i++) { + paramTys.push_back(llvmModule->ConvertLLVMTypeFromVariableType(originParamType[i])); + } + LLVMTypeRef returnType = llvmModule->ConvertLLVMTypeFromVariableType( cs->GetReturnType()); + + LLVMTypeRef functype = LLVMFunctionType(returnType, paramTys.data(), paramTys.size(), false); +#if defined(PANDA_TARGET_MACOS) + return LLVMGetInlineAsm(functype, asmCall.data(), asmCall.size(), constraints.data(), + constraints.size(), true, true, LLVMInlineAsmDialectATT); +#else + return LLVMGetInlineAsm(functype, asmCall.data(), asmCall.size(), constraints.data(), + constraints.size(), true, true, LLVMInlineAsmDialectATT, false); #endif } }; diff --git a/ecmascript/compiler/codegen/maple/litecg_ir_builder.cpp b/ecmascript/compiler/codegen/maple/litecg_ir_builder.cpp index fa2e3a0426..033bea4699 100644 --- a/ecmascript/compiler/codegen/maple/litecg_ir_builder.cpp +++ b/ecmascript/compiler/codegen/maple/litecg_ir_builder.cpp @@ -1722,9 +1722,11 @@ void LiteCGIRBuilder::VisitCall(GateRef gate, const std::vector &inList bool returnVoid = (returnType == lmirBuilder_->voidType); PregIdx returnPregIdx = returnVoid ? -1 : lmirBuilder_->CreatePreg(returnType); if (op == OpCode::ASM_CALL_BARRIER) { - if (!returnVoid) LOG_JIT(INFO) << "barrier has return use\n"; - Stmt &pureCall = lmirBuilder_->PureCall(callee, params); + Stmt &pureCall = lmirBuilder_->PureCall(callee, params, returnPregIdx); lmirBuilder_->AppendStmt(bb, pureCall); + if (!returnVoid) { + SaveGate2Expr(gate, lmirBuilder_->Regread(returnPregIdx)); + } return; } Stmt &callNode = diff --git a/ecmascript/compiler/codegen/maple/maple_be/include/litecg/lmir_builder.h b/ecmascript/compiler/codegen/maple/maple_be/include/litecg/lmir_builder.h index 8ee509945b..82b311ac99 100644 --- a/ecmascript/compiler/codegen/maple/maple_be/include/litecg/lmir_builder.h +++ b/ecmascript/compiler/codegen/maple/maple_be/include/litecg/lmir_builder.h @@ -366,7 +366,7 @@ public: Stmt &TailICall(Expr funcAddr, Args &args); - Stmt &PureCall(Expr funcAddr, Args &args, Var *result = nullptr); + Stmt &PureCall(Expr funcAddr, Args &args, PregIdx pregIdx); Stmt &ICall(Expr funcAddr, Args &args, Var *result = nullptr); diff --git a/ecmascript/compiler/codegen/maple/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/ecmascript/compiler/codegen/maple/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp index bc2fbfaa41..6573d358e6 100644 --- a/ecmascript/compiler/codegen/maple/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +++ b/ecmascript/compiler/codegen/maple/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp @@ -4452,21 +4452,31 @@ RegOperand &AArch64CGFunc::LoadOpndIntoPhysicalRegister(const IntrinsiccallNode void AArch64CGFunc::SelectPureCall(const IntrinsiccallNode &intrnNode) { - DEBUG_ASSERT(intrnNode.NumOpnds() == 6, "must be 6 operands"); // must be 6 operands // deal with parms ListOperand *srcOpnds = CreateListOpnd(*GetFuncScopeAllocator()); auto &callee = *intrnNode.Opnd(0); auto ptyp = callee.GetPrimType(); RegOperand &calleeReg = LoadIntoRegister(*HandleExpr(intrnNode, callee), ptyp); uint32 i = 1; - for (; i < kSeventhReg; i++) { + for (; i < intrnNode.NumOpnds(); i++) { srcOpnds->PushOpnd(LoadOpndIntoPhysicalRegister(intrnNode, i)); } + const CallReturnVector& retVec = intrnNode.GetReturnVec(); // R15 is used in asm call srcOpnds->PushOpnd(GetOrCreatePhysicalRegisterOperand(static_cast(R15), GetPointerSize() * kBitsPerByte, kRegTyInt)); + if (!retVec.empty()) { + srcOpnds->PushOpnd(GetOrCreatePhysicalRegisterOperand(static_cast(R16), + GetPointerSize() * kBitsPerByte, kRegTyInt)); + } Insn &callInsn = GetInsnBuilder()->BuildInsn(MOP_pure_call, calleeReg, *srcOpnds); GetCurBB()->AppendInsn(callInsn); + if (!retVec.empty()) { + PregIdx pregIdx = retVec[0].second.GetPregIdx(); + RegOperand &resReg = GetOrCreateVirtualRegisterOperand(GetVirtualRegNOFromPseudoRegIdx(pregIdx)); + RegOperand &srcReg = GetOrCreatePhysicalRegisterOperand(static_cast(R15), k64BitSize, kRegTyInt); + GetCurBB()->AppendInsn(GetInsnBuilder()->BuildInsn(MOP_xmovrr, resReg, srcReg)); + } } void AArch64CGFunc::SelectIntrinsicCall(IntrinsiccallNode &intrinsiccallNode) diff --git a/ecmascript/compiler/codegen/maple/maple_be/src/cg/x86_64/x64_MPIsel.cpp b/ecmascript/compiler/codegen/maple/maple_be/src/cg/x86_64/x64_MPIsel.cpp index ddae43e965..2b6acfbabd 100644 --- a/ecmascript/compiler/codegen/maple/maple_be/src/cg/x86_64/x64_MPIsel.cpp +++ b/ecmascript/compiler/codegen/maple/maple_be/src/cg/x86_64/x64_MPIsel.cpp @@ -376,22 +376,32 @@ void X64MPIsel::SelectOverFlowCall(const IntrinsiccallNode &intrnNode) void X64MPIsel::SelectPureCall(const IntrinsiccallNode &intrnNode) { - DEBUG_ASSERT(intrnNode.NumOpnds() == 6, "must be 6 operands"); // must be 6 operands ListOperand &srcOpnds = cgFunc->GetOpndBuilder()->CreateList(); auto &callee = *intrnNode.Opnd(0); auto ptyp = callee.GetPrimType(); RegOperand &calleeReg = SelectCopy2Reg(*HandleExpr(intrnNode, callee), ptyp); uint32 i = 1; - for (; i < kSeventhReg; i++) { + for (; i < intrnNode.NumOpnds(); i++) { srcOpnds.PushOpnd(LoadOpndIntoPhysicalRegister(intrnNode, i)); } // R11 is used in asm call srcOpnds.PushOpnd(cgFunc->GetOpndBuilder()->CreatePReg(x64::R11, GetPrimTypeBitSize(PTY_i64), kRegTyInt)); + const CallReturnVector& retVec = intrnNode.GetReturnVec(); + if (!retVec.empty()) { + srcOpnds.PushOpnd(cgFunc->GetOpndBuilder()->CreatePReg(x64::R12, GetPrimTypeBitSize(PTY_i64), kRegTyInt)); + cgFunc->AddtoCalleeSaved(x64::R12); + } MOperator mOp = x64::MOP_pure_call; Insn &callInsn = cgFunc->GetInsnBuilder()->BuildInsn(mOp, X64CG::kMd[mOp]); callInsn.AddOpndChain(calleeReg); callInsn.AddOpndChain(srcOpnds); cgFunc->GetCurBB()->AppendInsn(callInsn); + if (!retVec.empty()) { + PregIdx pregIdx = retVec[0].second.GetPregIdx(); + RegOperand &resReg = cgFunc->GetOpndBuilder()->CreateVReg(cgFunc->GetVirtualRegNOFromPseudoRegIdx(pregIdx),k64BitSize,kRegTyInt); + RegOperand &srcReg = cgFunc->GetOpndBuilder()->CreatePReg(x64::R11, k64BitSize, kRegTyInt); + cgFunc->GetCurBB()->AppendInsn(cgFunc->GetInsnBuilder()->BuildInsn(x64::MOP_movq_r_r, srcReg,resReg)); + } return; } diff --git a/ecmascript/compiler/codegen/maple/maple_be/src/litecg/lmir_builder.cpp b/ecmascript/compiler/codegen/maple/maple_be/src/litecg/lmir_builder.cpp index 9941e91333..9b272dd6d2 100644 --- a/ecmascript/compiler/codegen/maple/maple_be/src/litecg/lmir_builder.cpp +++ b/ecmascript/compiler/codegen/maple/maple_be/src/litecg/lmir_builder.cpp @@ -466,13 +466,16 @@ Stmt &LMIRBuilder::TailICall(Expr funcAddr, Args &args_) return *mirBuilder.CreateStmtTailIcall(args); } -Stmt &LMIRBuilder::PureCall(Expr funcAddr, Args &args_, Var *result) +Stmt &LMIRBuilder::PureCall(Expr funcAddr, Args &args_, PregIdx pregIdx) { MapleVector args(mirBuilder.GetCurrentFuncCodeMpAllocator()->Adapter()); args.push_back(funcAddr.GetNode()); - for (const auto &arg : args_) { + for (const auto &arg: args_) { args.emplace_back(arg.GetNode()); } + if (pregIdx > 0) { + return *mirBuilder.CreateStmtIntrinsicCallAssigned(MIRIntrinsicID::INTRN_JS_PURE_CALL, args, pregIdx, 0); + } return *mirBuilder.CreateStmtIntrinsicCall(MIRIntrinsicID::INTRN_JS_PURE_CALL, args); } diff --git a/ecmascript/compiler/codegen/maple/maple_ir/include/intrinsic_js.def b/ecmascript/compiler/codegen/maple/maple_ir/include/intrinsic_js.def index f716a6e6cf..6cdaac1cf4 100644 --- a/ecmascript/compiler/codegen/maple/maple_ir/include/intrinsic_js.def +++ b/ecmascript/compiler/codegen/maple/maple_ir/include/intrinsic_js.def @@ -22,6 +22,8 @@ DEF_MIR_INTRINSIC(MUL_WITH_OVERFLOW,\ "__mul_with_overflow", kArgTyI32, kArgTyI32, kArgTyI32) DEF_MIR_INTRINSIC(JS_PURE_CALL,\ "__jsop_purecall", kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany) +DEF_MIR_INTRINSIC(JS_PURE_CALL_WITH_RETURN,\ + "__jsop_purecall_with_return", kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany, kArgTyDynany) DEF_MIR_INTRINSIC(HEAP_CONSTANT,\ "__heap_constant", kArgTyI64, kArgTyI64, kArgTyI64) DEF_MIR_INTRINSIC(GET_HEAP_CONSTANT_TABLE,\ diff --git a/ecmascript/compiler/post_schedule.cpp b/ecmascript/compiler/post_schedule.cpp index d05772ca16..943ccd7197 100644 --- a/ecmascript/compiler/post_schedule.cpp +++ b/ecmascript/compiler/post_schedule.cpp @@ -951,14 +951,15 @@ void PostSchedule::LoweringLoadWithBarrierAndPrepareScheduleGate(GateRef gate, builder_.Bind(&callRuntime); { GateRef ifFalse = builder_.GetState(); - int index = CommonStubCSigns::GetValueWithBarrier; - const CallSignature *cs = CommonStubCSigns::Get(index); - ASSERT(cs->IsCommonStub()); + // int index = CommonStubCSigns::GetValueWithBarrier; + // const CallSignature *cs = CommonStubCSigns::Get(index); + // ASSERT(cs->IsCommonStub()); + int index = RuntimeStubCSigns::ID_ASMFastReadBarrier; + const CallSignature *cs = RuntimeStubCSigns::Get(index); GateRef target = circuit_->GetConstantGateWithoutCache(MachineType::ARCH, index, GateType::NJSValue()); - GateRef reservedFrameArgs = circuit_->GetConstantGateWithoutCache(MachineType::I64, 0, GateType::NJSValue()); - GateRef reservedPc = circuit_->GetConstantGateWithoutCache(MachineType::I64, 0, GateType::NJSValue()); + GateRef reservedFrameState = circuit_->GetConstantGateWithoutCache(MachineType::I64, 0, GateType::NJSValue()); GateRef loadBarrier = builder_.Call(cs, glue, target, builder_.GetDepend(), - { glue, addr, reservedFrameArgs, reservedPc }, + { glue, addr, reservedFrameState}, Circuit::NullGate(), "load barrier"); result = loadBarrier; builder_.Jump(&exit); @@ -966,8 +967,7 @@ void PostSchedule::LoweringLoadWithBarrierAndPrepareScheduleGate(GateRef gate, GateRef ordinaryBlock = callRuntime.GetControl(); PrepareToScheduleNewGate(ordinaryBlock, failBBGates); PrepareToScheduleNewGate(loadBarrier, failBBGates); - PrepareToScheduleNewGate(reservedPc, failBBGates); - PrepareToScheduleNewGate(reservedFrameArgs, failBBGates); + PrepareToScheduleNewGate(reservedFrameState, failBBGates); PrepareToScheduleNewGate(target, failBBGates); PrepareToScheduleNewGate(ifFalse, failBBGates); } diff --git a/ecmascript/compiler/stub_builder-inl.h b/ecmascript/compiler/stub_builder-inl.h index 3881f4606e..899ddab8f4 100644 --- a/ecmascript/compiler/stub_builder-inl.h +++ b/ecmascript/compiler/stub_builder-inl.h @@ -426,7 +426,8 @@ inline GateRef StubBuilder::Load(VariableType type, GateRef glue, GateRef base, type = env_->IsArch64Bit() ? VariableType::INT64() : VariableType::INT32(); } if (type == VariableType::JS_ANY() || type == VariableType::JS_POINTER()) { - return env_->GetBuilder()->Load(type, glue, base, offset); + GateRef res = env_->GetBuilder()->Load(type, glue, base, offset); + return res; } return LoadPrimitive(type, base, offset); } diff --git a/ecmascript/compiler/stub_builder.cpp b/ecmascript/compiler/stub_builder.cpp index d2559c6c0e..5f38d63462 100644 --- a/ecmascript/compiler/stub_builder.cpp +++ b/ecmascript/compiler/stub_builder.cpp @@ -2061,7 +2061,7 @@ void StubBuilder::CMCSetValueWithBarrier(GateRef glue, GateRef obj, [[maybe_unus BRANCH_UNLIKELY(shouldProcessSATB, &markInBuffer, &exit); Bind(&markInBuffer); { - CallNGCRuntime(glue, RTSTUB_ID(MarkInBuffer), {value}); + CallNGCRuntime(glue, RTSTUB_ID(MarkInBuffer), {glue, obj, offset, value}); Jump(&exit); } Bind(&exit); diff --git a/ecmascript/compiler/trampoline/aarch64/asm_interpreter_call.cpp b/ecmascript/compiler/trampoline/aarch64/asm_interpreter_call.cpp index ff7358cd2f..cb24275962 100644 --- a/ecmascript/compiler/trampoline/aarch64/asm_interpreter_call.cpp +++ b/ecmascript/compiler/trampoline/aarch64/asm_interpreter_call.cpp @@ -17,6 +17,7 @@ #include "ecmascript/js_generator_object.h" #include "ecmascript/message_string.h" +#include "common_components/heap/allocator/region_desc.h" namespace panda::ecmascript::aarch64 { using Label = panda::ecmascript::Label; @@ -1109,9 +1110,50 @@ void AsmInterpreterCall::CallReturnWithArgv(ExtendedAssembler *assembler) } } +// ASMFastReadBarrier(GateRef glue, GateRef addr) +// c calling convention, but preserve all general registers except %x15 +// %x0 - glue +// %x1 - addr +void AsmInterpreterCall::ASMFastReadBarrier(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMFastReadBarrier)); + Label done; + Label needCall; + __ Ldr(X15, MemoryOperand(X1, 0)); + __ Mov(X16, Immediate(JSTaggedValue::TAG_HEAPOBJECT_MASK)); + __ Tst(X15, Register(X16)); + __ B(NE, &done); + __ Ldr(X16, MemoryOperand(X0, JSThread::GlueData::GetHeapStartAddrOffset(false))); + __ Cmp(X16, Register(X15)); + __ B(GT, &done); + __ Ldr(X16, MemoryOperand(X0, JSThread::GlueData::GetHeapCurrentEndOffset(false))); + __ Cmp(X16, Register(X15)); + __ B(LE, &done); + __ And(X16, X15, LogicalImmediate::Create(0xfffffffc0000, RegXSize)); + __ Ldrb(Register(X16, W), MemoryOperand(X16, 16)); + __ Cmp(Register(X16, W), Immediate(0x4)); + __ B(NE, &done); + __ And(X16, X15, LogicalImmediate::Create(0xfffffffffffffffe, RegXSize)); + __ Ldr(X16, MemoryOperand(X16, 0)); + __ Tbz(X16, 63, &needCall); + __ And(X15, X15, LogicalImmediate::Create(1, RegXSize)); + __ And(X16, X16, LogicalImmediate::Create(0xffffffffffff, RegXSize)); + __ Orr(X15, X15, Register(X16)); + __ Ret(); + __ Bind(&needCall); + { + int32_t readBarrier = static_cast(JSThread::GlueData::GetRTStubEntriesOffset(false)) + + RTSTUB_ID(ReadBarrier) * FRAME_SLOT_SIZE; + __ Mov(X15, readBarrier); + __ Ldr(X15, MemoryOperand(X0, Register(X15), UXTX)); + PreserveMostCall(assembler, true, X15); + } + __ Bind(&done); + __ Ret(); +} // preserve all the general registers, except x15 and callee saved registers/ // and call x15 -void AsmInterpreterCall::PreserveMostCall(ExtendedAssembler* assembler) +void AsmInterpreterCall::PreserveMostCall(ExtendedAssembler* assembler, bool needReturn, Register retReg) { // * layout as the following: // +--------------------------+ --------- @@ -1182,6 +1224,9 @@ void AsmInterpreterCall::PreserveMostCall(ExtendedAssembler* assembler) __ Stp(X16, X17, MemoryOperand(SP, DOUBLE_SLOT_SIZE * (--PreserveRegPairIndex))); __ Str(X18, MemoryOperand(SP, FRAME_SLOT_SIZE)); __ Blr(X15); + if (needReturn) { + __ Mov(retReg, Register(X0)); + } __ Ldr(X18, MemoryOperand(SP, FRAME_SLOT_SIZE)); __ Ldp(X16, X17, MemoryOperand(SP, DOUBLE_SLOT_SIZE * (PreserveRegPairIndex++))); __ Ldp(X13, X14, MemoryOperand(SP, DOUBLE_SLOT_SIZE * (PreserveRegPairIndex++))); @@ -1198,7 +1243,6 @@ void AsmInterpreterCall::PreserveMostCall(ExtendedAssembler* assembler) __ Add(SP, SP, Immediate(DOUBLE_SLOT_SIZE * PreserveRegPairIndex + FRAME_SLOT_SIZE + FRAME_SLOT_SIZE)); __ Ldp(FP, X30, MemoryOperand(SP, DOUBLE_SLOT_SIZE, AddrMode::POSTINDEX)); - __ Ret(); } } @@ -1308,7 +1352,8 @@ void AsmInterpreterCall::ASMFastWriteBarrier(ExtendedAssembler* assembler) __ Bind(&needCall); { __ Ldr(X15, MemoryOperand(X0, Register(X15), UXTX)); - PreserveMostCall(assembler); + PreserveMostCall(assembler, false, INVALID_REG); + __ Ret(); } __ Bind(&needShareBarrier); { diff --git a/ecmascript/compiler/trampoline/aarch64/common_call.h b/ecmascript/compiler/trampoline/aarch64/common_call.h index 2bb07bff4c..5c22d77c8e 100644 --- a/ecmascript/compiler/trampoline/aarch64/common_call.h +++ b/ecmascript/compiler/trampoline/aarch64/common_call.h @@ -215,6 +215,8 @@ public: static void CallReturnWithArgv([[maybe_unused]]ExtendedAssembler *assembler); + static void ASMFastReadBarrier(ExtendedAssembler *assembler); + static void ASMFastWriteBarrier(ExtendedAssembler *assembler); static void ASMFastSharedWriteBarrier(ExtendedAssembler *assembler, Label& needCall); @@ -271,7 +273,7 @@ private: static void CallNativeEntry(ExtendedAssembler *assembler, bool isJSFunction); static void CallNativeWithArgv(ExtendedAssembler *assembler, bool callNew, bool hasNewTarget = false); - static void PreserveMostCall(ExtendedAssembler* assembler); + static void PreserveMostCall(ExtendedAssembler* assembler, bool needReturn, Register retReg); friend class OptimizedCall; friend class BaselineCall; }; diff --git a/ecmascript/compiler/trampoline/x64/asm_interpreter_call.cpp b/ecmascript/compiler/trampoline/x64/asm_interpreter_call.cpp index aece3f6475..af4672f8ac 100644 --- a/ecmascript/compiler/trampoline/x64/asm_interpreter_call.cpp +++ b/ecmascript/compiler/trampoline/x64/asm_interpreter_call.cpp @@ -13,11 +13,11 @@ * limitations under the License. */ - #include "ecmascript/compiler/trampoline/x64/common_call.h" #include "ecmascript/js_generator_object.h" #include "ecmascript/message_string.h" +#include "common_components/heap/allocator/region_desc.h" namespace panda::ecmascript::x64 { #define __ assembler-> @@ -1436,7 +1436,7 @@ void AsmInterpreterCall::ResumeRspAndRollback(ExtendedAssembler *assembler) // preserve all the general registers, except r11 and callee saved registers/ // and call r11 -void AsmInterpreterCall::PreserveMostCall(ExtendedAssembler* assembler) +void AsmInterpreterCall::PreserveMostCall(ExtendedAssembler* assembler, bool needReturn, Register retReg) { // * layout as the following: // +--------------------------+ --------- @@ -1485,6 +1485,9 @@ void AsmInterpreterCall::PreserveMostCall(ExtendedAssembler* assembler) __ Movq(r10, Operand(rsp, FRAME_SLOT_SIZE * (--PreserveRegisterIndex))); __ Movq(rax, Operand(rsp, FRAME_SLOT_SIZE * (--PreserveRegisterIndex))); __ Callq(r11); + if (needReturn) { + __ Movq(rax, retReg); + } __ Movq(Operand(rsp, FRAME_SLOT_SIZE * (PreserveRegisterIndex++)), rax); __ Movq(Operand(rsp, FRAME_SLOT_SIZE * (PreserveRegisterIndex++)), r10); __ Movq(Operand(rsp, FRAME_SLOT_SIZE * (PreserveRegisterIndex++)), r9); @@ -1498,7 +1501,6 @@ void AsmInterpreterCall::PreserveMostCall(ExtendedAssembler* assembler) // need add the frametype slot __ Addq(PreserveRegisterIndex * FRAME_SLOT_SIZE + FRAME_SLOT_SIZE, rsp); __ Popq(rbp); - __ Ret(); } } @@ -1604,7 +1606,8 @@ void AsmInterpreterCall::ASMFastWriteBarrier(ExtendedAssembler* assembler) } __ Bind(&needCall); { - PreserveMostCall(assembler); + PreserveMostCall(assembler, false, rInvalid); + __ Ret(); } __ Bind(&needShareBarrier); { @@ -1742,6 +1745,51 @@ void AsmInterpreterCall::ASMFastSharedWriteBarrier(ExtendedAssembler* assembler, } } +void AsmInterpreterCall::ASMFastReadBarrier(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMFastReadBarrier)); + Label done; + Label needCall; + __ Movq(Operand(rsi, 0),r11); + __ Movabs(JSTaggedValue::TAG_HEAPOBJECT_MASK,r12); + __ Test( r12,r11); + __ Jne(&done); + __ Movq(Operand(rdi, JSThread::GlueData::GetHeapStartAddrOffset(false)), r12); + __ Cmpq(r11, r12); + __ Jg(&done); + __ Movq(Operand(rdi, JSThread::GlueData::GetHeapCurrentEndOffset(false)), r12); + __ Cmpq(r11, r12); + __ Jle( &done); + __ Movabs(0xfffffffc0000,r12); + __ And(r11, r12); + __ Movq(Operand(r12, 16), r12); + __ Cmpb(0x4, r12); + __ Jne(&done); + __ Movabs(0xfffffffffffffffe,r12); + __ And(r11, r12); + __ Movq(Operand(r12, 0), r12); + __ Shrq(0x3e, r12); + __ Cmp(0x2, r12); + __ Jne(&needCall); + __ Movabs(0xfffffffffffffffe,r12); + __ And(r11, r12); + __ Movq(Operand(r12, 0), r12); + __ Andq(1, r11); + __ Orq(r12, r11); + __ Movabs(0xffffffffffff,r12); + __ And(r12, r11); + __ Ret(); + __ Bind(&needCall); + { + int32_t readBarrier = static_cast(JSThread::GlueData::GetRTStubEntriesOffset(false)) + + RTSTUB_ID(ReadBarrier) * FRAME_SLOT_SIZE; + __ Movq(Operand(rdi, readBarrier), r11); + PreserveMostCall(assembler, true, r11); + } + __ Bind(&done); + __ Ret(); +} + void AsmInterpreterCall::PushUndefinedWithArgcAndCheckStack(ExtendedAssembler *assembler, Register glue, Register argc, Register op1, Register op2, Label *stackOverflow) { diff --git a/ecmascript/compiler/trampoline/x64/common_call.h b/ecmascript/compiler/trampoline/x64/common_call.h index 2143c13bc8..b645a39b18 100644 --- a/ecmascript/compiler/trampoline/x64/common_call.h +++ b/ecmascript/compiler/trampoline/x64/common_call.h @@ -185,6 +185,9 @@ public: static void ResumeRspAndRollback(ExtendedAssembler *assembler); static void ASMFastWriteBarrier(ExtendedAssembler *assembler); + + static void ASMFastReadBarrier(ExtendedAssembler * extended_assembler); + private: static void PushFrameState(ExtendedAssembler *assembler, Register prevSpRegister, Register fpRegister, Register callTargetRegister, Register thisRegister, Register methodRegister, Register pcRegister, @@ -221,8 +224,10 @@ private: static void JSCallCommonFastPath(ExtendedAssembler *assembler, JSCallMode mode, Label *stackOverflow); static void JSCallCommonSlowPath(ExtendedAssembler *assembler, JSCallMode mode, Label *fastPathEntry, Label *pushCallThis, Label *stackOverflow); - static void PreserveMostCall(ExtendedAssembler* assembler); + static void PreserveMostCall(ExtendedAssembler* assembler, bool needReturn, Register retReg); static void ASMFastSharedWriteBarrier(ExtendedAssembler *assembler, Label &needcall); + static void ASMCMCFastUpdateRSet(ExtendedAssembler *assembler, + const std::function &doShortcut); friend class OptimizedCall; friend class BaselineCall; }; diff --git a/ecmascript/stubs/runtime_stub_list.h b/ecmascript/stubs/runtime_stub_list.h index f9dcd9c4f0..34f5d2091b 100644 --- a/ecmascript/stubs/runtime_stub_list.h +++ b/ecmascript/stubs/runtime_stub_list.h @@ -56,7 +56,8 @@ namespace panda::ecmascript { V(CallContainersArgs2) \ V(CallContainersArgs3) \ V(CallReturnWithArgv) \ - V(ASMFastWriteBarrier) + V(ASMFastWriteBarrier) \ + V(ASMFastReadBarrier) #define BASELINE_TRAMPOLINE_LIST(V) \ V(CallArg0AndCheckToBaseline) \ diff --git a/ecmascript/stubs/runtime_stubs.cpp b/ecmascript/stubs/runtime_stubs.cpp index 4cc4a00550..0aa29975c1 100644 --- a/ecmascript/stubs/runtime_stubs.cpp +++ b/ecmascript/stubs/runtime_stubs.cpp @@ -975,7 +975,7 @@ DEF_RUNTIME_STUBS(DumpObject) RUNTIME_STUBS_HEADER(DumpObject); JSHandle target = GetHArg(argv, argc, 0); // 0: means the zeroth parameter JSHandle targetId = GetHArg(argv, argc, 1); // 1: means the first parameter - LOG_ECMA(INFO) << "InstanceOf Stability Testing Num: " << targetId->GetInt(); + LOG_ECMA(INFO) << "InstanceOf Stability Testing Num: " << targetId->GetInt() <<" "<GetRawData(); std::ostringstream oss; target->Dump(thread, oss); LOG_ECMA(INFO) << "dump log for instance of target: " << oss.str(); @@ -5042,10 +5042,12 @@ bool RuntimeStubs::MarkRSetCardTable(BaseObject* obj) return region->MarkRSetCardTable(obj); } -void RuntimeStubs::MarkInBuffer(BaseObject* ref) +void RuntimeStubs::MarkInBuffer(uintptr_t argGlue, [[maybe_unused]] BaseObject *obj, [[maybe_unused]] uintptr_t offset, + BaseObject *ref) { - ref = reinterpret_cast(reinterpret_cast(ref) & ~(common::Barrier::TAG_WEAK)); - common::Mutator* mutator = common::Mutator::GetMutator(); + ref = reinterpret_cast(reinterpret_cast(ref) & ~(common::Barrier::TAG_WEAK)); + auto thread = JSThread::GlueToJSThread(argGlue); + common::Mutator *mutator = static_cast(thread->GetThreadHolder()->GetMutator()); mutator->RememberObjectInSatbBuffer(ref); } diff --git a/ecmascript/stubs/runtime_stubs.h b/ecmascript/stubs/runtime_stubs.h index 9305cb5491..1cb5a63406 100644 --- a/ecmascript/stubs/runtime_stubs.h +++ b/ecmascript/stubs/runtime_stubs.h @@ -183,7 +183,7 @@ public: static void TraceLazyDeoptCommitSuccess(uintptr_t argGlue, JSHandle func); static JSTaggedValue GetExternalModuleVar(uintptr_t argGlue, JSFunction *jsFunc, int32_t index); static bool MarkRSetCardTable(BaseObject* obj); - static void MarkInBuffer(BaseObject* ref); + static void MarkInBuffer(uintptr_t argGlue, BaseObject* obj, uintptr_t offset, BaseObject* ref); static void BatchMarkInBuffer(void* src, size_t count); private: -- Gitee