Richard Boegli's CnC_Generals_Zero_Hour Fork WIP
This is documentation of Richard Boegil's Zero Hour Fork
 
Loading...
Searching...
No Matches
vp.cpp
Go to the documentation of this file.
1/*
2** Command & Conquer Generals Zero Hour(tm)
3** Copyright 2025 Electronic Arts Inc.
4**
5** This program is free software: you can redistribute it and/or modify
6** it under the terms of the GNU General Public License as published by
7** the Free Software Foundation, either version 3 of the License, or
8** (at your option) any later version.
9**
10** This program is distributed in the hope that it will be useful,
11** but WITHOUT ANY WARRANTY; without even the implied warranty of
12** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13** GNU General Public License for more details.
14**
15** You should have received a copy of the GNU General Public License
16** along with this program. If not, see <http://www.gnu.org/licenses/>.
17*/
18
19/***********************************************************************************************
20 *** C O N F I D E N T I A L --- W E S T W O O D S T U D I O S ***
21 ***********************************************************************************************
22 * *
23 * Project Name : wwmath *
24 * *
25 * $Archive:: /Commando/Code/WWMath/vp.cpp $*
26 * *
27 * Org Author:: Hector Yee *
28 * *
29 * Author : Kenny Mitchell *
30 * *
31 * $Modtime:: 06/26/02 4:04p $*
32 * *
33 * $Revision:: 12 $*
34 * *
35 * 06/26/02 KM Matrix name change to avoid MAX conflicts *
36 *---------------------------------------------------------------------------------------------*/
37
38#include "vp.h"
39#include "vector2.h"
40#include "vector3.h"
41#include "vector4.h"
42#include "matrix3d.h"
43#include "matrix4.h"
44#include "wwdebug.h"
45#include "cpudetect.h"
46#include <memory.h>
47
48#define SHUFFLE(x, y, z, w) (((x)&3)<< 6|((y)&3)<<4|((z)&3)<< 2|((w)&3))
49#define BROADCAST(XMM, INDEX) __asm shufps XMM,XMM,(((INDEX)&3)<< 6|((INDEX)&3)<<4|((INDEX)&3)<< 2|((INDEX)&3))
50
51#define TRANSPOSE(BX, BY, BZ, BW, TV) \
52 __asm movaps TV,BZ \
53 __asm unpcklps BZ,BW \
54 __asm unpckhps TV,BW \
55 __asm movaps BW,BX \
56 __asm unpcklps BX,BY \
57 __asm unpckhps BW,BY \
58 __asm movaps BY,BX \
59 __asm shufps BX,BZ,SHUFFLE(1, 0, 1, 0) \
60 __asm shufps BY,BZ,SHUFFLE(3, 2, 3, 2) \
61 __asm movaps BZ,BW \
62 __asm shufps BZ,TV,SHUFFLE(1, 0, 1, 0) \
63 __asm shufps BW,TV,SHUFFLE(3, 2, 3, 2)
64
65
67{
68#if defined (__ICL) // Detect Intel compiler
69 if (CPUDetectClass::_Has_SSE_Instruction_Set()) {
70 __asm {
71// mov edx,address
72// mov eax,[edx]
73// prefetchT1 address
74 }
75 }
76#endif
77}
78
79static Vector4 lastrow(0.0f,0.0f,0.0f,1.0f);
80void VectorProcessorClass::Transform (Vector3* dst,const Vector3 *src, const Matrix3D& mtx, const int count)
81{
82 if (count<=0) return;
83
84#if defined (__ICL) // Detect Intel compiler
85 if (CPUDetectClass::_Has_SSE_Instruction_Set()) {
86
87 __asm {
88 mov edx,dst
89 mov eax,src
90 mov ebx,mtx
91 mov edi,count
92
93 movups xmm4,[ebx+0]
94 movups xmm5,[ebx+16]
95 movups xmm6,[ebx+32]
96 movups xmm7,lastrow //[ebx+48]
97
98 TRANSPOSE(xmm4, xmm5, xmm6, xmm7, xmm0);
99
100 shufps xmm4,xmm4,SHUFFLE(2,1,0,0)
101 shufps xmm5,xmm5,SHUFFLE(2,1,0,0)
102 shufps xmm6,xmm6,SHUFFLE(2,1,0,0)
103 shufps xmm7,xmm7,SHUFFLE(2,1,0,0)
104
105 mov esi,edx
106 _lp:
107 test edi,edi
108 jz _ulos
109 test esi,0xf
110 jz _aligned
111 movss xmm0,[eax]
112 movss xmm1,[eax+4]
113 movss xmm2,[eax+8]
114 BROADCAST(xmm0,0)
115 BROADCAST(xmm1,0)
116 BROADCAST(xmm2,0)
117 mulps xmm0,xmm4
118 mulps xmm1,xmm5
119 mulps xmm2,xmm6
120 addps xmm0,xmm1
121 addps xmm0,xmm2
122 addps xmm0,xmm7
123 movss [edx],xmm0
124 movhps [edx+4],xmm0
125 add eax,12
126 add edx,12
127 add esi,12
128 dec edi
129 jmp _lp
130 _aligned:
131
132 mov esi,1
133
134 mov ecx,edi
135 and edi,3
136 and ecx,~3
137 jz _lp
138
139 lea ecx,[ecx+ecx*2]
140 shl ecx,2
141 add eax,ecx
142 add edx,ecx
143 neg ecx
144
145 cmp dword ptr [ebx+12],0
146 jne _xlatelp
147 cmp dword ptr [ebx+28],0
148 jne _xlatelp
149 cmp dword ptr [ebx+44],0
150 jne _xlatelp
151 jmp _noxlatelp
152
153 align 16
154
155 _noxlatelp:
156 prefetchnta [eax+ecx+48]
157 prefetchnta [eax+ecx+48+32]
158
159 movss xmm0,[eax+ecx]
160 BROADCAST(xmm0,0)
161 movss xmm1,[eax+ecx+4]
162 BROADCAST(xmm1,0)
163 movss xmm2,[eax+ecx+8]
164 BROADCAST(xmm2,0)
165 mulps xmm0,xmm4
166 mulps xmm1,xmm5
167 mulps xmm2,xmm6
168 addps xmm0,xmm1
169 addps xmm0,xmm2
170
171 movss xmm1,[eax+ecx+12]
172 BROADCAST(xmm1,0)
173 movss xmm2,[eax+ecx+16]
174 BROADCAST(xmm2,0)
175 movss xmm3,[eax+ecx+20]
176 BROADCAST(xmm3,0)
177 mulps xmm1,xmm4
178 mulps xmm2,xmm5
179 mulps xmm3,xmm6
180 addps xmm1,xmm2
181 addps xmm3,xmm1
182
183 movss xmm0,xmm3
184 shufps xmm0,xmm0,SHUFFLE(0,3,2,1)
185
186 movaps [edx+ecx],xmm0
187
188 prefetcht0 [edx+ecx+48]
189 prefetcht0 [edx+ecx+48+32]
190
191 movss xmm0,[eax+ecx+24]
192 BROADCAST(xmm0,0)
193 movss xmm1,[eax+ecx+24+4]
194 BROADCAST(xmm1,0)
195 movss xmm2,[eax+ecx+24+8]
196 BROADCAST(xmm2,0)
197 mulps xmm0,xmm4
198 mulps xmm1,xmm5
199 mulps xmm2,xmm6
200 addps xmm0,xmm1
201 addps xmm0,xmm2
202
203 shufps xmm3,xmm0,SHUFFLE(2,1,3,2)
204 movaps [edx+ecx+16],xmm3
205
206 movss xmm1,[eax+ecx+24+12]
207 BROADCAST(xmm1,0)
208 movss xmm2,[eax+ecx+24+16]
209 BROADCAST(xmm2,0)
210 movss xmm3,[eax+ecx+24+20]
211 BROADCAST(xmm3,0)
212 mulps xmm1,xmm4
213 mulps xmm2,xmm5
214 mulps xmm3,xmm6
215 addps xmm1,xmm2
216 addps xmm1,xmm3
217
218 shufps xmm0,xmm0,SHUFFLE(2,1,0,3)
219 movss xmm1,xmm0
220 movaps [edx+ecx+32],xmm1
221
222 add ecx,48
223 js _noxlatelp
224
225 jmp _lp
226
227 align 16
228
229 _xlatelp:
230 prefetchnta [eax+ecx+48]
231 prefetchnta [eax+ecx+48+32]
232
233 movss xmm0,[eax+ecx]
234 BROADCAST(xmm0,0)
235 movss xmm1,[eax+ecx+4]
236 BROADCAST(xmm1,0)
237 movss xmm2,[eax+ecx+8]
238 BROADCAST(xmm2,0)
239 mulps xmm0,xmm4
240 mulps xmm1,xmm5
241 mulps xmm2,xmm6
242 addps xmm0,xmm1
243 addps xmm0,xmm2
244 addps xmm0,xmm7
245
246 movss xmm1,[eax+ecx+12]
247 BROADCAST(xmm1,0)
248 movss xmm2,[eax+ecx+16]
249 BROADCAST(xmm2,0)
250 movss xmm3,[eax+ecx+20]
251 BROADCAST(xmm3,0)
252 mulps xmm1,xmm4
253 mulps xmm2,xmm5
254 mulps xmm3,xmm6
255 addps xmm1,xmm2
256 addps xmm3,xmm1
257 addps xmm3,xmm7
258
259 movss xmm0,xmm3
260 shufps xmm0,xmm0,SHUFFLE(0,3,2,1)
261 movaps [edx+ecx],xmm0
262
263 prefetcht0 [edx+ecx+48]
264 prefetcht0 [edx+ecx+48+32]
265
266 movss xmm0,[eax+ecx+24]
267 BROADCAST(xmm0,0)
268 movss xmm1,[eax+ecx+24+4]
269 BROADCAST(xmm1,0)
270 movss xmm2,[eax+ecx+24+8]
271 BROADCAST(xmm2,0)
272 mulps xmm0,xmm4
273 mulps xmm1,xmm5
274 mulps xmm2,xmm6
275 addps xmm0,xmm1
276 addps xmm0,xmm2
277 addps xmm0,xmm7
278
279 shufps xmm3,xmm0,SHUFFLE(2,1,3,2)
280 movaps [edx+ecx+16],xmm3
281
282 movss xmm1,[eax+ecx+24+12]
283 BROADCAST(xmm1,0)
284 movss xmm2,[eax+ecx+24+16]
285 BROADCAST(xmm2,0)
286 movss xmm3,[eax+ecx+24+20]
287 BROADCAST(xmm3,0)
288 mulps xmm1,xmm4
289 mulps xmm2,xmm5
290 mulps xmm3,xmm6
291 addps xmm1,xmm2
292 addps xmm1,xmm3
293 addps xmm1,xmm7
294
295 shufps xmm0,xmm0,SHUFFLE(2,1,0,3)
296 movss xmm1,xmm0
297
298 movaps [edx+ecx+32],xmm1
299
300 add ecx,48
301 js _xlatelp
302
303 jmp _lp
304 _ulos:
305 }
306
307 }
308 else
309#endif
310 {
311 mtx.mulVector3Array(src, dst, count);
312 }
313}
314
315void VectorProcessorClass::Transform(Vector4* dst,const Vector3 *src, const Matrix4x4& matrix, const int count)
316{
317 if (count<=0) return;
318
319 int i;
320
321 for (i=0; i<count; i++)
322 {
323 dst[i]=matrix*src[i];
324 }
325}
326
327void VectorProcessorClass::Copy(Vector2 *dst, const Vector2 *src, int count)
328{
329 if (count<=0) return;
330 memcpy(dst,src,sizeof(Vector2)*count);
331}
332
333void VectorProcessorClass::Copy(unsigned *dst, const unsigned *src, int count)
334{
335 if (count<=0) return;
336 memcpy(dst,src,sizeof(unsigned)*count);
337}
338
339void VectorProcessorClass::Copy(Vector3 *dst, const Vector3 *src, int count)
340{
341 if (count<=0) return;
342 memcpy(dst,src,sizeof(Vector3)*count);
343}
344
345void VectorProcessorClass::Copy(Vector4 *dst, const Vector4 *src, int count)
346{
347 if (count<=0) return;
348 memcpy(dst,src,sizeof(Vector4)*count);
349}
350
351void VectorProcessorClass::Copy(Vector4 *dst,const Vector3 *src, const float * srca, const int count)
352{
353 if (count<=0) return;
354 int i;
355
356 for (i=0; i<count; i++)
357 {
358 dst[i].X=src[i].X;
359 dst[i].Y=src[i].Y;
360 dst[i].Z=src[i].Z;
361 dst[i].W=srca[i];
362 }
363}
364
365void VectorProcessorClass::Copy(Vector4 *dst,const Vector3 *src, const float srca, const int count)
366{
367 if (count<=0) return;
368 int i;
369
370 for (i=0; i<count; i++)
371 {
372 dst[i].X=src[i].X;
373 dst[i].Y=src[i].Y;
374 dst[i].Z=src[i].Z;
375 dst[i].W=srca;
376 }
377}
378
379void VectorProcessorClass::Copy(Vector4 *dst,const Vector3 &src, const float * srca, const int count)
380{
381 if (count<=0) return;
382 int i;
383
384 for (i=0; i<count; i++)
385 {
386 dst[i].X=src.X;
387 dst[i].Y=src.Y;
388 dst[i].Z=src.Z;
389 dst[i].W=srca[i];
390 }
391}
392
393void VectorProcessorClass::CopyIndexed (unsigned *dst,const unsigned *src, const unsigned int *index, int count)
394{
395 if (count<=0) return;
396 int i;
397
398 for (i=0; i<count; i++)
399 {
400 dst[i]=src[index[i]];
401 }
402}
403
404void VectorProcessorClass::CopyIndexed (Vector2 *dst,const Vector2 *src, const unsigned int *index, int count)
405{
406 if (count<=0) return;
407 int i;
408
409 for (i=0; i<count; i++)
410 {
411 dst[i]=src[index[i]];
412 }
413}
414
415void VectorProcessorClass::CopyIndexed (Vector3 *dst,const Vector3 *src, const unsigned int *index, int count)
416{
417 if (count<=0) return;
418 int i;
419
420 for (i=0; i<count; i++)
421 {
422 dst[i]=src[index[i]];
423 }
424}
425
426void VectorProcessorClass::CopyIndexed (Vector4 *dst,const Vector4 *src, const unsigned int *index, int count)
427{
428 if (count<=0) return;
429 int i;
430
431 for (i=0; i<count; i++)
432 {
433 dst[i]=src[index[i]];
434 }
435}
436
437void VectorProcessorClass::CopyIndexed(unsigned char* dst, const unsigned char* src, const unsigned int *index, int count)
438{
439 if (count<=0) return;
440 int i;
441
442 for (i=0; i<count; i++)
443 {
444 dst[i]=src[index[i]];
445 }
446}
447
448void VectorProcessorClass::CopyIndexed(float* dst, float* src, const unsigned int *index, int count)
449{
450 if (count<=0) return;
451 int i;
452
453 for (i=0; i<count; i++)
454 {
455 dst[i]=src[index[i]];
456 }
457}
458
459void VectorProcessorClass::Clamp(Vector4 *dst,const Vector4 *src, const float min, const float max, const int count)
460{
461 if (count<=0) return;
462 int i;
463
464 for (i=0; i<count; i++)
465 {
466 dst[i].X=(src[i].X<min)?min:src[i].X;
467 dst[i].X=(src[i].X>max)?max:src[i].X;
468
469 dst[i].Y=(src[i].Y<min)?min:src[i].Y;
470 dst[i].Y=(src[i].Y>max)?max:src[i].Y;
471
472 dst[i].Z=(src[i].Z<min)?min:src[i].Z;
473 dst[i].Z=(src[i].Z>max)?max:src[i].Z;
474
475 dst[i].W=(src[i].W<min)?min:src[i].W;
476 dst[i].W=(src[i].W>max)?max:src[i].W;
477 }
478}
479
480void VectorProcessorClass::Clear(Vector3*dst, const int count)
481{
482 if (count<=0) return;
483 memset(dst,0,sizeof(Vector3)*count);
484}
485
486
487void VectorProcessorClass::Normalize(Vector3 *dst, const int count)
488{
489 if (count<=0) return;
490 int i;
491
492 for (i=0; i<count; i++)
493 dst[i].Normalize();
494}
495
497{
498 if (count<=0) return;
499 min=*src;
500 max=*src;
501
502 int i;
503
504 for (i=1; i<count; i++)
505 {
506 min.X=MIN(min.X,src[i].X);
507 min.Y=MIN(min.Y,src[i].Y);
508 min.Z=MIN(min.Z,src[i].Z);
509
510 max.X=MAX(max.X,src[i].X);
511 max.Y=MAX(max.Y,src[i].Y);
512 max.Z=MAX(max.Z,src[i].Z);
513 }
514}
515
516void VectorProcessorClass::MulAdd(float * dest,float multiplier,float add,int count)
517{
518 for (int i=0; i<count; i++) {
519 dest[i] = dest[i] * multiplier + add;
520 }
521}
522
523void VectorProcessorClass::DotProduct(float *dst, const Vector3 &a, const Vector3 *b,const int count)
524{
525 for (int i=0; i<count; i++)
526 dst[i]=Vector3::Dot_Product(a,b[i]);
527}
528
529void VectorProcessorClass::ClampMin(float *dst, float *src, const float min, const int count)
530{
531 for (int i=0; i<count; i++)
532 dst[i]=(src[i]>min?src[i]:min);
533}
534
535void VectorProcessorClass::Power(float *dst, float *src, const float pow, const int count)
536{
537 for (int i=0; i<count; i++)
538 dst[i]=powf(src[i],pow);
539}
#define min(x, y)
Definition BaseType.h:101
#define max(x, y)
Definition BaseType.h:105
#define MIN(a, b)
Definition always.h:189
#define MAX(a, b)
Definition always.h:185
void add(float *sum, float *addend)
void mulVector3Array(const Vector3 *in, Vector3 *out, int count) const
Definition matrix3d.h:1661
static WWINLINE float Dot_Product(const Vector3 &a, const Vector3 &b)
Definition vector3.h:293
float X
Definition vector3.h:90
float Z
Definition vector3.h:92
float Y
Definition vector3.h:91
float Y
Definition vector4.h:67
float Z
Definition vector4.h:68
float X
Definition vector4.h:66
float W
Definition vector4.h:69
static void ClampMin(float *dst, float *src, const float min, const int count)
Definition vp.cpp:529
static void Normalize(Vector3 *dst, const int count)
Definition vp.cpp:487
static void Transform(Vector3 *dst, const Vector3 *src, const Matrix3D &matrix, const int count)
Definition vp.cpp:80
static void Clamp(Vector4 *dst, const Vector4 *src, const float min, const float max, const int count)
Definition vp.cpp:459
static void MulAdd(float *dest, float multiplier, float add, int count)
Definition vp.cpp:516
static void Clear(Vector3 *dst, const int count)
Definition vp.cpp:480
static void Power(float *dst, float *src, const float pow, const int count)
Definition vp.cpp:535
static void Prefetch(void *address)
Definition vp.cpp:66
static void MinMax(Vector3 *src, Vector3 &min, Vector3 &max, const int count)
Definition vp.cpp:496
static void DotProduct(float *dst, const Vector3 &a, const Vector3 *b, const int count)
Definition vp.cpp:523
static void CopyIndexed(unsigned *dst, const unsigned *src, const unsigned int *index, const int count)
Definition vp.cpp:393
static void Copy(unsigned *dst, const unsigned *src, const int count)
Definition vp.cpp:333
int test
Definition test6.cpp:32
#define BROADCAST(XMM, INDEX)
Definition vp.cpp:49
#define SHUFFLE(x, y, z, w)
Definition vp.cpp:48
#define TRANSPOSE(BX, BY, BZ, BW, TV)
Definition vp.cpp:51