(revised 02NOV2006)
In this exercise you will improve the performance of a C function that has been translated into an assembly language function and compare the performance of the implementations. In addition, you will perform comparisons to compiler optimized code.
The folks at Numerical Recipes Software provide a function called ran0 that generates uniformally distributed random numbers. The code is provided below:
#define IA 16807
#define IM 2147483647
#define AM (1.0/IM)
#define IQ 127773
#define IR 2836
#define MASK 123459876
float ran0(long *idum)
{
long k;
float ans;
*idum ^= MASK;
k=(*idum)/IQ;
*idum=IA*(*idum-k*IQ)-IR*k;
if (*idum < 0) *idum += IM;
ans=AM*(*idum);
*idum ^= MASK;
return ans;
}
#undef IA
#undef IM
#undef AM
#undef IQ
#undef IR
#undef MASK
A detailed description of the nature of the generator is provided here.
TITLE AsmRan0 Procedure (AsmRan0.asm) ; An assembly language version of the Numerical recipes rano ; random number generator .586 .model flat,C AsmRan0 PROTO, idum:PTR DWORD ; program constants IA = 16807 IM = 2147483647 IQ = 127773 IR = 2836 _MASK = 123459876 .data AM REAL8 4.656612875E-10 ; (1.0/IM) .code ;----------------------------------------------- AsmRan0 PROC C USES edi, idum:PTR DWORD LOCAL k:DWORD, temp:DWORD, ans:DWORD ; ; Generate random deviates over the interval (0,1) ;----------------------------------------------- push esi ; *idum ^= mask mov esi, idum ; get pointer to idum mov eax, [esi] ; get idum xor eax, _MASK ; idum ^ mask mov [esi], eax ; idum = idum ^ mask ; k=(*idum)/IQ; mov eax, [esi] ; get idum cdq ; extend the sign mov ecx, IQ ; get IQ idiv ecx ; (*idum)/IQ; mov k, eax ; k=(*idum)/IQ; ; *idum=IA*(*idum-k*IQ)-IR*k; mov eax, k ; get k imul eax, IQ ; k*IQ mov edx, [esi] ; get idum sub edx, eax ; *idum-k*IQ imul edx, IA ; IA*(*idum-k*IQ) mov eax, k ; get k imul eax, IR ; IR*k sub edx, eax ; IA*(*idum-k*IQ)-IR*k mov [esi], edx ; *idum=IA*(*idum-k*IQ)-IR*k ; if (*idum < 0) *idum += IM; mov eax, [esi] ; get idum cmp eax, 0 ; if (*idum < 0) jge Label1 mov eax, [esi] ; get idum add eax, IM ; *idum + IM mov [esi], eax ; *idum += IM Label1: ; ans=AM*(*idum); mov eax, [esi] ; get idum mov temp, eax ; save idum for the conversion fild temp ; convert idum to real fmul AM ; AM*(*idum) fstp ans ; ans=AM*(*idum) ; *idum ^= MASK; mov eax, [esi] ; get idum xor eax, _MASK ; idum ^ mask mov [esi], eax ; idum = idum ^ mask ; return ans; fld ans pop esi ret 0 AsmRan0 ENDP END
// main.cpp - Testing ran0 and AsmRan0
#include <iostream>
#include <time.h>
#include "ran0.h"
#include <Windows.h>
using namespace std;
int main()
{
// Fill an array with pseudorandom integers.
const unsigned LOOP_SIZE = 10000000;
long idum;
LARGE_INTEGER start, end;
LARGE_INTEGER freq;
SetThreadAffinityMask(GetCurrentThread(), 1);
// make sure installed hardware supports a high-resolution performance counter
if(!QueryPerformanceFrequency(&freq)) return 1;
// Test the C++ function:
QueryPerformanceCounter(&start);
idum = 123456;
for( int n = 0; n < LOOP_SIZE; n++) ran0(&idum);
QueryPerformanceCounter(&end);
cout << " c code " << (end.QuadPart - start.QuadPart)/(double) freq.QuadPart
<< " seconds" << endl;
// Test the Assembly language procedure:
QueryPerformanceCounter(&start);
idum = 123456;
for( int n = 0; n < LOOP_SIZE; n++) AsmRan0(&idum);
QueryPerformanceCounter(&end);
cout << " asm code " << (end.QuadPart - start.QuadPart)/(double) freq.QuadPart
<< " seconds" << endl;
return 0;
}
// ran0.h
extern "C" {
// C++ version
float ran0(long *idum);
// Assembly language version
float AsmRan0(long *idum);
}
Your task is:
Using a zip-utility, zip up the following items into a single archive (call it mp08.zip) and submit it to the homework submission system.