Jump to content


SSE: Bizarre alignment error


22 replies to this topic

#21 Sandevil

    New Member

  • Members
  • Pip
  • 7 posts

Posted 16 February 2011 - 03:38 PM

Console Application Source Code:

#include "stdafx.h"

#pragma inline_depth(0);
#pragma inline_recursion(off);

int test1(const int a, const int b, const int c, const int d, const int e)
{
	int result = (((a*b) + c) * d) - e;

	return result;
}

int __cdecl test2(const int a, const int b, const int c, const int d, const int e)
{
	int result = (((a*b) + c) * d) - e;

	return result;
}

int __stdcall test3(const int a, const int b, const int c, const int d, const int e)
{
	int result = (((a*b) + c) * d) - e;

	return result;
}

int __fastcall test4(const int a, const int b, const int c, const int d, const int e)
{
	int result = (((a*b) + c) * d) - e;

	return result;
}

__declspec(dllexport) int test5(const int a, const int b, const int c, const int d, const int e)
{
	int result = (((a*b) + c) * d) - e;

	return result;
}

class test6
{
public:
	test6() {};
	~test6() {};

	int exec_test(const const int a, int b, const int c, const int d, const int e)
	{
		int result = (((a*b) + c) * d) - e;

		return result;		
	}

	int __fastcall exec_test2(const const int a, int b, const int c, const int d, const int e)
	{
		int result = (((a*b) + c) * d) - e;

		return result;		
	}
};

int _tmain(int argc, _TCHAR* argv[])
{
 int a;
 int b;
 int c;
 int d;
 int e;
 test6 ctest6;


	scanf("%d",a);
	scanf("%d",b);
	scanf("%d",c);
	scanf("%d",d);
	scanf("%d",e);

	int r1,r2,r3,r4,r5,r6, r7;

	r1 = test1(a,b,c,d,e);
	r2 = test2(a,b,c,d,e);
	r3 = test3(a,b,c,d,e);
	r4 = test4(a,b,c,d,e);
	r5 = test5(a,b,c,d,e);
	r6 = ctest6.exec_test(a,b,c,d,e);
	r7 = ctest6.exec_test2(a,b,c,d,e);

	printf("%d\n",r1);
	printf("%d\n",r2);
	printf("%d\n",r3);
	printf("%d\n",r4);
	printf("%d\n",r5);
	printf("%d\n",r6);
	printf("%d\n",r7);

	char ch;
	scanf("%c",ch);
	
	return 0;
}


Release Assembly Output:
; 80   : 
; 81   : 	int r1,r2,r3,r4,r5,r6, r7;
; 82   : 
; 83   : 	r1 = test1(a,b,c,d,e);

	mov	edx, DWORD PTR _e$[ebp]
	mov	eax, DWORD PTR _d$[ebp]
	push	edx
	push	eax
	push	ebx
	push	edi
	mov	eax, esi
	call	?test1@@YAHHHHHH@Z			; test1

; 84   : 	r2 = test2(a,b,c,d,e);
; 85   : 	r3 = test3(a,b,c,d,e);
; 86   : 	r4 = test4(a,b,c,d,e);
; 87   : 	r5 = test5(a,b,c,d,e);
; 88   : 	r6 = ctest6.exec_test(a,b,c,d,e);
; 89   : 	r7 = ctest6.exec_test2(a,b,c,d,e);
; 90   : 
; 91   : 	printf("%d\n",r1);

	push	eax
	push	OFFSET ??_C@_03PMGGPEJJ@?$CFd?6?$AA@
	call	DWORD PTR __imp__printf
	mov	ecx, DWORD PTR _e$[ebp]
	mov	edx, DWORD PTR _d$[ebp]
	add	esp, 64					; 00000040H
	push	ecx
	push	edx
	push	ebx
	push	edi
	mov	eax, esi
	call	?test2@@YAHHHHHH@Z			; test2

; 92   : 	printf("%d\n",r2);

	push	eax
	push	OFFSET ??_C@_03PMGGPEJJ@?$CFd?6?$AA@
	call	DWORD PTR __imp__printf
	mov	eax, DWORD PTR _e$[ebp]
	mov	ecx, DWORD PTR _d$[ebp]
	add	esp, 24					; 00000018H
	push	eax
	push	ecx
	push	ebx
	push	edi
	mov	eax, esi
	call	?test3@@YGHHHHHH@Z			; test3

; 93   : 	printf("%d\n",r3);

	push	eax
	push	OFFSET ??_C@_03PMGGPEJJ@?$CFd?6?$AA@
	call	DWORD PTR __imp__printf
	mov	edx, DWORD PTR _e$[ebp]
	mov	eax, DWORD PTR _d$[ebp]
	add	esp, 8
	push	edx
	push	eax
	push	ebx
	push	edi
	mov	eax, esi
	call	?test4@@YIHHHHHH@Z			; test4

; 94   : 	printf("%d\n",r4);

	push	eax
	push	OFFSET ??_C@_03PMGGPEJJ@?$CFd?6?$AA@
	call	DWORD PTR __imp__printf
	mov	ecx, DWORD PTR _e$[ebp]
	mov	edx, DWORD PTR _d$[ebp]
	push	ecx
	push	edx
	push	ebx
	push	edi
	push	esi
	call	?test5@@YAHHHHHH@Z			; test5

; 95   : 	printf("%d\n",r5);

	push	eax
	push	OFFSET ??_C@_03PMGGPEJJ@?$CFd?6?$AA@
	call	DWORD PTR __imp__printf
	mov	eax, DWORD PTR _e$[ebp]
	mov	ecx, DWORD PTR _d$[ebp]
	add	esp, 36					; 00000024H
	push	eax
	push	ecx
	push	ebx
	push	edi
	mov	eax, esi
	call	?exec_test@test6@@QAEHHHHHH@Z		; test6::exec_test

; 96   : 	printf("%d\n",r6);

	push	eax
	push	OFFSET ??_C@_03PMGGPEJJ@?$CFd?6?$AA@
	call	DWORD PTR __imp__printf
	mov	edx, DWORD PTR _e$[ebp]
	mov	eax, DWORD PTR _d$[ebp]
	add	esp, 8
	push	edx
	push	eax
	push	ebx
	push	edi
	mov	eax, esi
	call	?exec_test2@test6@@QAIHHHHHH@Z		; test6::exec_test2

...

As you can see __cdecl and __stdcall generate same code, all params pass right to left so evaulation is left to right (last push will be the first to be popped).

In this test case the compiler wasn't be able to optimize __fastcall and it was ignored (__fastcall works like inline keyword).
By the way __fastcall will try to pass 3 arguments into registers if he can't do it, it will try with 2 otherwise all into the stack.
But if you generate a dll, every function that will be marked as __declspec(dllexport) will use __stdcall = __cdecl, __fastcall will be ignored.
The only difference beetwen Embarcadero C++ Builder and Microsoft Visual C++ is that embarcadero for __fastcall 3 arguments onto the registers or nothing and everythig goes into the stack. But this will create problem only if you create a library and try a statical linking but it's impossible by default they use two different object format, and also gcc use another object format.
So using a DLL in another language will usually never cause a problem.
I think that the MSDN documentation is not updated for calling convections.
I used Visual C++ 2010.

#22 .oisyn

    DevMaster Staff

  • Moderators
  • 1842 posts

Posted 16 February 2011 - 03:46 PM

That's an awful lot of words just to say you were wrong and we were right :).
Also, cdecl is not the same as stdcall. Name mangling is different, and with stdcall the callee is responsible for stack cleanup (using the retn instruction), while with cdecl the caller is responsible.

Quote

I think that the MSDN documentation is not updated for calling convections.
What you describe is perfectly to specifications in the MSDN.

My test, clean and simple. Compiled in release without whole program optimization (otherwise, all bets are off)
#include <iostream>
#include <intrin.h>
#include <stdlib.h>

__declspec(noinline) void __cdecl test1 (int a, int b, int c, int d) { std::cout << a << std::endl; }
__declspec(noinline) void __stdcall test2 (int a, int b, int c, int d) { std::cout << a << std::endl; }
__declspec(noinline) void __fastcall test3 (int a, int b, int c, int d) { std::cout << a << std::endl; }

__declspec(noinline, dllexport) void __cdecl test4 (int a, int b, int c, int d) { std::cout << a << std::endl; }
__declspec(noinline, dllexport) void __stdcall test5 (int a, int b, int c, int d) { std::cout << a << std::endl; }
__declspec(noinline, dllexport) void __fastcall test6 (int a, int b, int c, int d) { std::cout << a << std::endl; }


int main()
{
	test1(0, 1, 2, 3);
	test2(0, 1, 2, 3);
	test3(0, 1, 2, 3);
	test4(0, 1, 2, 3);
	test5(0, 1, 2, 3);
	test6(0, 1, 2, 3);
}

Asm:
int main()
{
	test1(0, 1, 2, 3);
00381070  push        3    
00381072  push        2    
00381074  push        1    
00381076  push        0    
00381078  call        test4 (381000h) 
0038107D  add         esp,10h 
	test2(0, 1, 2, 3);
00381080  push        3    
00381082  push        2    
00381084  push        1    
00381086  push        0    
00381088  call        test5 (381020h) 
	test3(0, 1, 2, 3);
0038108D  push        3    
0038108F  push        2    
00381091  mov         edx,1 
00381096  xor         ecx,ecx 
00381098  call        test6 (381050h) 
	test4(0, 1, 2, 3);
0038109D  push        3    
0038109F  push        2    
003810A1  push        1    
003810A3  push        0    
003810A5  call        test4 (381000h) 
003810AA  add         esp,10h 
	test5(0, 1, 2, 3);
003810AD  push        3    
003810AF  push        2    
003810B1  push        1    
003810B3  push        0    
003810B5  call        test5 (381020h) 
	test6(0, 1, 2, 3);
003810BA  push        3    
003810BC  push        2    
003810BE  mov         edx,1 
003810C3  xor         ecx,ecx 
003810C5  call        test6 (381050h) 
}
003810CA  xor         eax,eax 
003810CC  ret     

dllexport makes no difference whatsoever. __fastcall uses two registers.
C++ addict
-
Currently working on: the 3D engine for Tomb Raider.

#23 Sandevil

    New Member

  • Members
  • Pip
  • 7 posts

Posted 16 February 2011 - 04:28 PM

If you put in this way you have right and i'm wrong.
Sorry for the posts.
I know that the stack cleanup is different but the Name mangling not always.
Look the assembly code or try by yoursef.
But i was saying that:

The parameters are put always right to left for both __stdcall and __cdecl so the params passing is equal if you want to believe that they are different believe what you want.
If you want to believe that 64 bits linux and windows will have the same calling convention believe it but gcc is following amd and microsoft will follow intel advice.
if you want to believe that __fastcall will try to pass 2 and not 3 values onto the registers if he can do it. by the way everything started on the 4th parameter, so why the 3rd was passed onto the register ?.
if you want to believe that microsoft by removing inline assembly in 64 bits is doing something good for you, believe it.
And if you want to demostrate that i was wrong on everything write some code and post it.





1 user(s) are reading this topic

0 members, 1 guests, 0 anonymous users