From 36e785bc77ce5a234fe234d1a72876e73dfed81c Mon Sep 17 00:00:00 2001 From: Tristan Campbell Date: Fri, 31 Jan 2003 01:17:19 +0000 Subject: [PATCH] First draft of a altivec copy with cache prefetch --- ...g_lwjgl_Math_MatrixOpCopy_MatrixOpSafe.cpp | 162 ++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 src/native/macosx/altivec/org_lwjgl_Math_MatrixOpCopy_MatrixOpSafe.cpp diff --git a/src/native/macosx/altivec/org_lwjgl_Math_MatrixOpCopy_MatrixOpSafe.cpp b/src/native/macosx/altivec/org_lwjgl_Math_MatrixOpCopy_MatrixOpSafe.cpp new file mode 100644 index 00000000..306786c9 --- /dev/null +++ b/src/native/macosx/altivec/org_lwjgl_Math_MatrixOpCopy_MatrixOpSafe.cpp @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2002 Light Weight Java Game Library Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of 'Light Weight Java Game Library' nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* remember to turn on the -faltivec flag for gcc compilation */ + + +/** + * $Id$ + * + * math library. + * + * @author cix_foo + * @version $Revision$ + */ + +#ifdef _WIN32 +#include +#endif +#include +#include "org_lwjgl_Math_MatrixOpCopy_MatrixOpSafe.h" +#include "MatrixOpCommon.h" + + +void altivec_CopyPackedSafe (char * src, char * dst, int length); + +/* + * Class: org_lwjgl_Math_MatrixOpCopy_MatrixOpSafe + * Method: execute + * Signature: (IIIIIZIIZ)V + */ +JNIEXPORT void JNICALL Java_org_lwjgl_Math_00024MatrixOpCopy_00024MatrixOpSafe_execute + ( + JNIEnv * env, + jobject obj, + jint sourceAddress, + jint sourceStride, + jint numElements, + jint sourceWidth, + jint sourceHeight, + jboolean transposeSource, + jint destAddress, + jint destStride, + jboolean transposeDest + ) +{ + // remove any unnecessary copying + if (transposeSource == transposeDest) + { + transposeSource = false; + transposeDest = false; + } + + /* handle all cases where the data is packed and transposition is not necessary */ + if ((transposeSource == transposeDest) + && ((sourceWidth * sourceHeight * 4) == sourceStride)) + { + altivec_CopyPackedSafe((char *)sourceAddress, (char *)destAddress, numElements*sourceWidth*sourceHeight*4); + return; + } + + MatrixSrc source (sourceAddress, sourceStride, sourceWidth, sourceHeight, numElements, transposeSource); + MatrixDst dest (destAddress, destStride, source.width, source.height, source.elements, transposeDest); + + float * srcMatrix, * destMatrix; + int matrixByteCount = source.width*source.height*sizeof(jfloat); + + for (int i = 0; i < source.elements; i++) + { + srcMatrix = source.nextMatrix(); + destMatrix = dest.nextMatrix(); + + // just do a straight memory copy + memcpy(destMatrix, srcMatrix, matrixByteCount); + dest.writeComplete(); + } +} + +void altivec_CopyPackedSafe (char * src, char * dst, int length) +{ + int src_a = (int)src; + int dst_a = (int)dst; + + //std::cout << "src: " << src_a << " dst: " << dst_a <<"\n"; + + if ((src_a & 0x0F) == (dst_a & 0x0F)) + { + + //std::cout << "same alignment\n" << "\n"; + int first_bytes = 16 - ((int)(src) & 0x0000000F); + + //std::cout << "first bytes" << first_bytes << "\n"; + int i = first_bytes; + if (first_bytes > length) + first_bytes = length; + if (first_bytes == 16) + first_bytes = 0; + + while (i--) + dst[i] = src[i]; + + src = &src[first_bytes]; + dst = &dst[first_bytes]; + + length -= first_bytes; + //std::cout << "new length" << length << "\n"; + + // figure out how many 16 byte chunks there are + int middle_cycles = (length >> 4); // ignore any other bytes + length -= (middle_cycles << 4); + + while (middle_cycles --) + { + // load a vector, set the cache line to be LRU + vector float a = (vector float) vec_ldl(0, (float *) src); + src += 16; + + // write it back, set cache line LRU, gets flushed back to RAM (not L2 or L3) + vec_stl(a, 0, (float *) dst); + dst += 16; + } + + // write back any remaining bytes + while(length--) + dst[length] = src[length]; + + } + else + { //std::cout << "different alignment\n"; + // differing offsets (byte by byte copy) + while (length--) + dst[length] = src[length]; + } +}