First draft of a altivec copy with cache prefetch
This commit is contained in:
parent
529748c7b7
commit
36e785bc77
|
@ -0,0 +1,162 @@
|
|||
/*
|
||||
* Copyright (c) 2002 Light Weight Java Game Library Project
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of 'Light Weight Java Game Library' nor the names of
|
||||
* its contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* remember to turn on the -faltivec flag for gcc compilation */
|
||||
|
||||
|
||||
/**
|
||||
* $Id$
|
||||
*
|
||||
* math library.
|
||||
*
|
||||
* @author cix_foo <cix_foo@users.sourceforge.net>
|
||||
* @version $Revision$
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <string.h>
|
||||
#include "org_lwjgl_Math_MatrixOpCopy_MatrixOpSafe.h"
|
||||
#include "MatrixOpCommon.h"
|
||||
|
||||
|
||||
void altivec_CopyPackedSafe (char * src, char * dst, int length);
|
||||
|
||||
/*
|
||||
* Class: org_lwjgl_Math_MatrixOpCopy_MatrixOpSafe
|
||||
* Method: execute
|
||||
* Signature: (IIIIIZIIZ)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_lwjgl_Math_00024MatrixOpCopy_00024MatrixOpSafe_execute
|
||||
(
|
||||
JNIEnv * env,
|
||||
jobject obj,
|
||||
jint sourceAddress,
|
||||
jint sourceStride,
|
||||
jint numElements,
|
||||
jint sourceWidth,
|
||||
jint sourceHeight,
|
||||
jboolean transposeSource,
|
||||
jint destAddress,
|
||||
jint destStride,
|
||||
jboolean transposeDest
|
||||
)
|
||||
{
|
||||
// remove any unnecessary copying
|
||||
if (transposeSource == transposeDest)
|
||||
{
|
||||
transposeSource = false;
|
||||
transposeDest = false;
|
||||
}
|
||||
|
||||
/* handle all cases where the data is packed and transposition is not necessary */
|
||||
if ((transposeSource == transposeDest)
|
||||
&& ((sourceWidth * sourceHeight * 4) == sourceStride))
|
||||
{
|
||||
altivec_CopyPackedSafe((char *)sourceAddress, (char *)destAddress, numElements*sourceWidth*sourceHeight*4);
|
||||
return;
|
||||
}
|
||||
|
||||
MatrixSrc source (sourceAddress, sourceStride, sourceWidth, sourceHeight, numElements, transposeSource);
|
||||
MatrixDst dest (destAddress, destStride, source.width, source.height, source.elements, transposeDest);
|
||||
|
||||
float * srcMatrix, * destMatrix;
|
||||
int matrixByteCount = source.width*source.height*sizeof(jfloat);
|
||||
|
||||
for (int i = 0; i < source.elements; i++)
|
||||
{
|
||||
srcMatrix = source.nextMatrix();
|
||||
destMatrix = dest.nextMatrix();
|
||||
|
||||
// just do a straight memory copy
|
||||
memcpy(destMatrix, srcMatrix, matrixByteCount);
|
||||
dest.writeComplete();
|
||||
}
|
||||
}
|
||||
|
||||
void altivec_CopyPackedSafe (char * src, char * dst, int length)
|
||||
{
|
||||
int src_a = (int)src;
|
||||
int dst_a = (int)dst;
|
||||
|
||||
//std::cout << "src: " << src_a << " dst: " << dst_a <<"\n";
|
||||
|
||||
if ((src_a & 0x0F) == (dst_a & 0x0F))
|
||||
{
|
||||
|
||||
//std::cout << "same alignment\n" << "\n";
|
||||
int first_bytes = 16 - ((int)(src) & 0x0000000F);
|
||||
|
||||
//std::cout << "first bytes" << first_bytes << "\n";
|
||||
int i = first_bytes;
|
||||
if (first_bytes > length)
|
||||
first_bytes = length;
|
||||
if (first_bytes == 16)
|
||||
first_bytes = 0;
|
||||
|
||||
while (i--)
|
||||
dst[i] = src[i];
|
||||
|
||||
src = &src[first_bytes];
|
||||
dst = &dst[first_bytes];
|
||||
|
||||
length -= first_bytes;
|
||||
//std::cout << "new length" << length << "\n";
|
||||
|
||||
// figure out how many 16 byte chunks there are
|
||||
int middle_cycles = (length >> 4); // ignore any other bytes
|
||||
length -= (middle_cycles << 4);
|
||||
|
||||
while (middle_cycles --)
|
||||
{
|
||||
// load a vector, set the cache line to be LRU
|
||||
vector float a = (vector float) vec_ldl(0, (float *) src);
|
||||
src += 16;
|
||||
|
||||
// write it back, set cache line LRU, gets flushed back to RAM (not L2 or L3)
|
||||
vec_stl(a, 0, (float *) dst);
|
||||
dst += 16;
|
||||
}
|
||||
|
||||
// write back any remaining bytes
|
||||
while(length--)
|
||||
dst[length] = src[length];
|
||||
|
||||
}
|
||||
else
|
||||
{ //std::cout << "different alignment\n";
|
||||
// differing offsets (byte by byte copy)
|
||||
while (length--)
|
||||
dst[length] = src[length];
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue