48#define SHUFFLE(x, y, z, w) (((x)&3)<< 6|((y)&3)<<4|((z)&3)<< 2|((w)&3))
49#define BROADCAST(XMM, INDEX) __asm shufps XMM,XMM,(((INDEX)&3)<< 6|((INDEX)&3)<<4|((INDEX)&3)<< 2|((INDEX)&3))
51#define TRANSPOSE(BX, BY, BZ, BW, TV) \
53 __asm unpcklps BZ,BW \
54 __asm unpckhps TV,BW \
56 __asm unpcklps BX,BY \
57 __asm unpckhps BW,BY \
59 __asm shufps BX,BZ,SHUFFLE(1, 0, 1, 0) \
60 __asm shufps BY,BZ,SHUFFLE(3, 2, 3, 2) \
62 __asm shufps BZ,TV,SHUFFLE(1, 0, 1, 0) \
63 __asm shufps BW,TV,SHUFFLE(3, 2, 3, 2)
69 if (CPUDetectClass::_Has_SSE_Instruction_Set()) {
79static Vector4 lastrow(0.0f,0.0f,0.0f,1.0f);
85 if (CPUDetectClass::_Has_SSE_Instruction_Set()) {
100 shufps xmm4,xmm4,
SHUFFLE(2,1,0,0)
101 shufps xmm5,xmm5,
SHUFFLE(2,1,0,0)
102 shufps xmm6,xmm6,
SHUFFLE(2,1,0,0)
103 shufps xmm7,xmm7,
SHUFFLE(2,1,0,0)
145 cmp dword ptr [ebx+12],0
147 cmp dword ptr [ebx+28],0
149 cmp dword ptr [ebx+44],0
156 prefetchnta [eax+ecx+48]
157 prefetchnta [eax+ecx+48+32]
161 movss xmm1,[eax+ecx+4]
163 movss xmm2,[eax+ecx+8]
171 movss xmm1,[eax+ecx+12]
173 movss xmm2,[eax+ecx+16]
175 movss xmm3,[eax+ecx+20]
184 shufps xmm0,xmm0,
SHUFFLE(0,3,2,1)
186 movaps [edx+ecx],xmm0
188 prefetcht0 [edx+ecx+48]
189 prefetcht0 [edx+ecx+48+32]
191 movss xmm0,[eax+ecx+24]
193 movss xmm1,[eax+ecx+24+4]
195 movss xmm2,[eax+ecx+24+8]
203 shufps xmm3,xmm0,
SHUFFLE(2,1,3,2)
204 movaps [edx+ecx+16],xmm3
206 movss xmm1,[eax+ecx+24+12]
208 movss xmm2,[eax+ecx+24+16]
210 movss xmm3,[eax+ecx+24+20]
218 shufps xmm0,xmm0,
SHUFFLE(2,1,0,3)
220 movaps [edx+ecx+32],xmm1
230 prefetchnta [eax+ecx+48]
231 prefetchnta [eax+ecx+48+32]
235 movss xmm1,[eax+ecx+4]
237 movss xmm2,[eax+ecx+8]
246 movss xmm1,[eax+ecx+12]
248 movss xmm2,[eax+ecx+16]
250 movss xmm3,[eax+ecx+20]
260 shufps xmm0,xmm0,
SHUFFLE(0,3,2,1)
261 movaps [edx+ecx],xmm0
263 prefetcht0 [edx+ecx+48]
264 prefetcht0 [edx+ecx+48+32]
266 movss xmm0,[eax+ecx+24]
268 movss xmm1,[eax+ecx+24+4]
270 movss xmm2,[eax+ecx+24+8]
279 shufps xmm3,xmm0,
SHUFFLE(2,1,3,2)
280 movaps [edx+ecx+16],xmm3
282 movss xmm1,[eax+ecx+24+12]
284 movss xmm2,[eax+ecx+24+16]
286 movss xmm3,[eax+ecx+24+20]
295 shufps xmm0,xmm0,
SHUFFLE(2,1,0,3)
298 movaps [edx+ecx+32],xmm1
317 if (count<=0)
return;
321 for (i=0; i<count; i++)
323 dst[i]=matrix*src[i];
329 if (count<=0)
return;
330 memcpy(dst,src,
sizeof(
Vector2)*count);
335 if (count<=0)
return;
336 memcpy(dst,src,
sizeof(
unsigned)*count);
341 if (count<=0)
return;
342 memcpy(dst,src,
sizeof(
Vector3)*count);
347 if (count<=0)
return;
348 memcpy(dst,src,
sizeof(
Vector4)*count);
353 if (count<=0)
return;
356 for (i=0; i<count; i++)
367 if (count<=0)
return;
370 for (i=0; i<count; i++)
381 if (count<=0)
return;
384 for (i=0; i<count; i++)
395 if (count<=0)
return;
398 for (i=0; i<count; i++)
400 dst[i]=src[index[i]];
406 if (count<=0)
return;
409 for (i=0; i<count; i++)
411 dst[i]=src[index[i]];
417 if (count<=0)
return;
420 for (i=0; i<count; i++)
422 dst[i]=src[index[i]];
428 if (count<=0)
return;
431 for (i=0; i<count; i++)
433 dst[i]=src[index[i]];
439 if (count<=0)
return;
442 for (i=0; i<count; i++)
444 dst[i]=src[index[i]];
450 if (count<=0)
return;
453 for (i=0; i<count; i++)
455 dst[i]=src[index[i]];
461 if (count<=0)
return;
464 for (i=0; i<count; i++)
482 if (count<=0)
return;
483 memset(dst,0,
sizeof(
Vector3)*count);
489 if (count<=0)
return;
492 for (i=0; i<count; i++)
498 if (count<=0)
return;
504 for (i=1; i<count; i++)
518 for (
int i=0; i<count; i++) {
519 dest[i] = dest[i] * multiplier +
add;
525 for (
int i=0; i<count; i++)
531 for (
int i=0; i<count; i++)
532 dst[i]=(src[i]>
min?src[i]:
min);
537 for (
int i=0; i<count; i++)
538 dst[i]=powf(src[i],pow);
void add(float *sum, float *addend)
void mulVector3Array(const Vector3 *in, Vector3 *out, int count) const
static WWINLINE float Dot_Product(const Vector3 &a, const Vector3 &b)
static void ClampMin(float *dst, float *src, const float min, const int count)
static void Normalize(Vector3 *dst, const int count)
static void Transform(Vector3 *dst, const Vector3 *src, const Matrix3D &matrix, const int count)
static void Clamp(Vector4 *dst, const Vector4 *src, const float min, const float max, const int count)
static void MulAdd(float *dest, float multiplier, float add, int count)
static void Clear(Vector3 *dst, const int count)
static void Power(float *dst, float *src, const float pow, const int count)
static void Prefetch(void *address)
static void MinMax(Vector3 *src, Vector3 &min, Vector3 &max, const int count)
static void DotProduct(float *dst, const Vector3 &a, const Vector3 *b, const int count)
static void CopyIndexed(unsigned *dst, const unsigned *src, const unsigned int *index, const int count)
static void Copy(unsigned *dst, const unsigned *src, const int count)
#define BROADCAST(XMM, INDEX)
#define SHUFFLE(x, y, z, w)
#define TRANSPOSE(BX, BY, BZ, BW, TV)