diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index db3a1ea967e5d4..2b01bfc54f42b2 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -709,7 +709,7 @@ Managed code uses the same linear stack as C code. The stack grows down. ## Incoming argument ABI -The linear stack pointer `$sp` is the first argument to all methods. At a native->managed transition it is the value of the `$__stack_pointer` global. This global may be updated to the current `$sp` within managed code, and must be up to date with the current `$sp` at managed->native boundaries. Within the method the stack pointer always points at the bottom (lowest address) of the stack; generally this is a fixed offset from the value the stack pointer held on entry, except in methods that can do dynamic allocation. +The linear stack pointer `$sp` is the first argument after the managed `this` argument or the first argument for static methods. At a native->managed transition it is the value of the `$__stack_pointer` global. This global may be updated to the current `$sp` within managed code, and must be up to date with the current `$sp` at managed->native boundaries. Within the method the stack pointer always points at the bottom (lowest address) of the stack; generally this is a fixed offset from the value the stack pointer held on entry, except in methods that can do dynamic allocation. A frame pointer, if used, points at the bottom of the "fixed" portion of the stack to facilitate use of Wasm addressing modes, which only allow positive offsets. diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index e7884e486c5820..64612b0e40db3d 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -5,6 +5,7 @@ #include "interpreter.h" #include "stackmap.h" +#include #include #include // for std::bad_alloc @@ -2023,18 +2024,13 @@ int32_t InterpCompiler::GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, Inte if (interpType == InterpTypeVT) { size = m_compHnd->getClassSize(clsHnd); - align = m_compHnd->getClassAlignmentRequirement(clsHnd); - - // All vars are stored at 8 byte aligned offsets - if (align < INTERP_STACK_SLOT_SIZE) - align = INTERP_STACK_SLOT_SIZE; + // All vars are stored at least at 8 byte aligned offsets // We do not align beyond the stack alignment // (This is relevant for structs with very high alignment requirements, // where we align within struct layout, but the structs are not actually // aligned on the stack) - if (align > INTERP_STACK_ALIGNMENT) - align = INTERP_STACK_ALIGNMENT; + align = std::clamp(m_compHnd->getClassAlignmentRequirement(clsHnd), INTERP_STACK_SLOT_SIZE, INTERP_STACK_ALIGNMENT); } else { diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp index 2636afdb468be9..5dc917ae0c75b9 100644 --- a/src/coreclr/interpreter/compileropt.cpp +++ b/src/coreclr/interpreter/compileropt.cpp @@ -3,6 +3,8 @@ #include "interpreter.h" #include "stackmap.h" +#include + // Allocates the offset for var at the stack position identified by // *pPos while bumping the pointer to point to the next stack location int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos) @@ -17,11 +19,7 @@ int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos) if (size > INTERP_STACK_SLOT_SIZE) { assert(m_pVars[var].interpType == InterpTypeVT); - align = m_compHnd->getClassAlignmentRequirement(m_pVars[var].clsHnd); - if (align < INTERP_STACK_SLOT_SIZE) - align = INTERP_STACK_SLOT_SIZE; - if (align > INTERP_STACK_ALIGNMENT) - align = INTERP_STACK_ALIGNMENT; + align = std::clamp(m_compHnd->getClassAlignmentRequirement(m_pVars[var].clsHnd), INTERP_STACK_SLOT_SIZE, INTERP_STACK_ALIGNMENT); } else { diff --git a/src/coreclr/interpreter/inc/interpretershared.h b/src/coreclr/interpreter/inc/interpretershared.h index a25c24f4cc1be3..f9b19392204805 100644 --- a/src/coreclr/interpreter/inc/interpretershared.h +++ b/src/coreclr/interpreter/inc/interpretershared.h @@ -14,8 +14,8 @@ #define INTERP_API __attribute__ ((visibility ("default"))) #endif // _MSC_VER -#define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack -#define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame +#define INTERP_STACK_SLOT_SIZE 8u // Alignment of each var offset on the interpreter stack +#define INTERP_STACK_ALIGNMENT 16u // Alignment of interpreter stack at the start of a frame struct InterpHelperData { uint32_t addressDataItemIndex : 29; diff --git a/src/coreclr/vm/callhelpers.cpp b/src/coreclr/vm/callhelpers.cpp index 783c1a239cc4ad..b86aa01cf75ff1 100644 --- a/src/coreclr/vm/callhelpers.cpp +++ b/src/coreclr/vm/callhelpers.cpp @@ -223,6 +223,7 @@ void* DispatchCallSimple( #ifdef TARGET_WASM static_assert(2*sizeof(ARGHOLDER_TYPE) == INTERP_STACK_SLOT_SIZE); callDescrData.nArgsSize = numStackSlotsToCopy * sizeof(ARGHOLDER_TYPE)*2; + callDescrData.hasRetBuff = false; LPVOID pOrigSrc = callDescrData.pSrc; callDescrData.pSrc = (LPVOID)_alloca(callDescrData.nArgsSize); for (int i = 0; i < numStackSlotsToCopy; i++) @@ -540,6 +541,8 @@ void MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments, ARG_SLOT * callDescrData.pTarget = m_pCallTarget; #ifdef TARGET_WASM callDescrData.nArgsSize = nStackBytes; + callDescrData.hasRetBuff = false; + _ASSERTE(!m_argIt.HasRetBuffArg()); #endif // TARGET_WASM CallDescrWorkerWithHandler(&callDescrData); diff --git a/src/coreclr/vm/callhelpers.h b/src/coreclr/vm/callhelpers.h index 9526da60dbbe39..3b37373a8d4314 100644 --- a/src/coreclr/vm/callhelpers.h +++ b/src/coreclr/vm/callhelpers.h @@ -28,7 +28,9 @@ struct CallDescrData #ifdef TARGET_WASM // size of the arguments and the transition block are used to execute the method with the interpreter size_t nArgsSize; -#endif + bool hasThis; + bool hasRetBuff; +#endif // TARGET_WASM #ifdef CALLDESCR_RETBUFFARGREG // Pointer to return buffer arg location diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 2a609a7ad4e604..297997d0753c50 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -13,6 +13,7 @@ #define __CALLING_CONVENTION_INCLUDED #ifdef FEATURE_INTERPRETER +#include #include #endif // FEATURE_INTERPRETER @@ -1902,6 +1903,17 @@ int ArgIteratorTemplate::GetNextOffset() return argOfs; #elif defined(TARGET_WASM) + + unsigned align; + if (argType == ELEMENT_TYPE_VALUETYPE) + { + align = std::clamp(CEEInfo::getClassAlignmentRequirementStatic(thValueType.GetMethodTable()), INTERP_STACK_SLOT_SIZE, INTERP_STACK_ALIGNMENT); + } else { + align = INTERP_STACK_SLOT_SIZE; + } + + m_ofsStack = ALIGN_UP(m_ofsStack, align); + int cbArg = ALIGN_UP(argSize, INTERP_STACK_SLOT_SIZE); int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 169906f1fb5316..c80bd011f9f7fe 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -2941,15 +2941,7 @@ void CallStubGenerator::ComputeCallStubWorker(bool hasUnmanagedCallConv, CorInfo CorElementType corType = argIt.GetArgType(&thArgTypeHandle); if ((corType == ELEMENT_TYPE_VALUETYPE) && thArgTypeHandle.GetSize() > INTERP_STACK_SLOT_SIZE) { - unsigned align = CEEInfo::getClassAlignmentRequirementStatic(thArgTypeHandle); - if (align < INTERP_STACK_SLOT_SIZE) - { - align = INTERP_STACK_SLOT_SIZE; - } - else if (align > INTERP_STACK_ALIGNMENT) - { - align = INTERP_STACK_ALIGNMENT; - } + unsigned align = std::clamp(CEEInfo::getClassAlignmentRequirementStatic(thArgTypeHandle), INTERP_STACK_SLOT_SIZE, INTERP_STACK_ALIGNMENT); assert(align == 8 || align == 16); // At the moment, we can only have an 8 or 16 byte alignment requirement here if (interpreterStackOffset != ALIGN_UP(interpreterStackOffset, align)) { diff --git a/src/coreclr/vm/reflectioninvocation.cpp b/src/coreclr/vm/reflectioninvocation.cpp index 461dbf1a4a1d01..a17819ca0fb581 100644 --- a/src/coreclr/vm/reflectioninvocation.cpp +++ b/src/coreclr/vm/reflectioninvocation.cpp @@ -406,7 +406,7 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod( callDescrData.pSrc = pTransitionBlock + sizeof(TransitionBlock); _ASSERTE((nStackBytes % TARGET_POINTER_SIZE) == 0); - callDescrData.numStackSlots = nStackBytes / TARGET_POINTER_SIZE; + callDescrData.numStackSlots = ALIGN_UP(nStackBytes, TARGET_REGISTER_SIZE) / TARGET_REGISTER_SIZE; #ifdef CALLDESCR_ARGREGS callDescrData.pArgumentRegisters = (ArgumentRegisters*)(pTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters()); #endif @@ -423,6 +423,8 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod( #ifdef TARGET_WASM // WASM-TODO: this is now called from the interpreter, so the arguments layout is OK. reconsider with codegen callDescrData.nArgsSize = nStackBytes; + callDescrData.hasThis = argit.HasThis(); + callDescrData.hasRetBuff = argit.HasRetBuffArg(); #endif // TARGET_WASM // This is duplicated logic from MethodDesc::GetCallTarget diff --git a/src/coreclr/vm/wasm/calldescrworkerwasm.cpp b/src/coreclr/vm/wasm/calldescrworkerwasm.cpp index 6f0fbefb682f56..61cc94b12b2ab3 100644 --- a/src/coreclr/vm/wasm/calldescrworkerwasm.cpp +++ b/src/coreclr/vm/wasm/calldescrworkerwasm.cpp @@ -7,6 +7,8 @@ // Forward declaration void ExecuteInterpretedMethodWithArgs(TADDR targetIp, int8_t* args, size_t argSize, void* retBuff, PCODE callerIp); +#define SPECIAL_ARG_ADDR(pos) (void**)(((int8_t*)pCallDescrData->pSrc) + ((pos)*INTERP_STACK_SLOT_SIZE)) + extern "C" void STDCALL CallDescrWorkerInternal(CallDescrData* pCallDescrData) { _ASSERTE(pCallDescrData != NULL); @@ -26,5 +28,24 @@ extern "C" void STDCALL CallDescrWorkerInternal(CallDescrData* pCallDescrData) targetIp = pMethod->GetInterpreterCode(); } - ExecuteInterpretedMethodWithArgs((TADDR)targetIp, (int8_t*)pCallDescrData->pSrc, pCallDescrData->nArgsSize, (int8_t*)pCallDescrData->returnValue, (PCODE)&CallDescrWorkerInternal); + size_t argsSize = pCallDescrData->nArgsSize; + void* retBuff; + int8_t* args; + if (pCallDescrData->hasRetBuff) + { + argsSize -= INTERP_STACK_SLOT_SIZE; + if (pCallDescrData->hasThis) + { + retBuff = *SPECIAL_ARG_ADDR(1); + *SPECIAL_ARG_ADDR(1) = *SPECIAL_ARG_ADDR(0); + } else { + retBuff = *SPECIAL_ARG_ADDR(0); + } + args = (int8_t*)SPECIAL_ARG_ADDR(1); + } else { + args = (int8_t*)pCallDescrData->pSrc; + retBuff = pCallDescrData->returnValue; + } + + ExecuteInterpretedMethodWithArgs((TADDR)targetIp, args, argsSize, retBuff, (PCODE)&CallDescrWorkerInternal); } diff --git a/src/coreclr/vm/wasm/cgencpu.h b/src/coreclr/vm/wasm/cgencpu.h index 1e076e9d01e089..5829f5fd798592 100644 --- a/src/coreclr/vm/wasm/cgencpu.h +++ b/src/coreclr/vm/wasm/cgencpu.h @@ -67,7 +67,8 @@ struct ArgumentRegisters { #define NUM_ARGUMENT_REGISTERS 0 #define ARGUMENTREGISTERS_SIZE sizeof(ArgumentRegisters) -#define ENREGISTERED_RETURNTYPE_MAXSIZE 16 // not sure here, 16 bytes is v128 +#define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 8 // bytes +#define ENREGISTERED_RETURNTYPE_MAXSIZE 16 // bytes, so that v128 can be returned without retbuff #define STACKWALK_CONTROLPC_ADJUST_OFFSET 1 diff --git a/src/tests/JIT/Directed/VectorABI/VectorMgdMgdArray.cs b/src/tests/JIT/Directed/VectorABI/VectorMgdMgdArray.cs index 517b875bdb2e6a..c042e690f51b84 100644 --- a/src/tests/JIT/Directed/VectorABI/VectorMgdMgdArray.cs +++ b/src/tests/JIT/Directed/VectorABI/VectorMgdMgdArray.cs @@ -344,7 +344,7 @@ private void checkValues(string msg, Vector128 v, int index) { if (!printedMsg) { - Console.WriteLine("{0}: FAILED - Vector64 checkValues(index = {1}, i = {2}) {3}", + Console.WriteLine("{0}: FAILED - Vector128 checkValues(index = {1}, i = {2}) {3}", msg, index, i, isReflection ? "(via reflection)" : "" ); printedMsg = true; }