diff --git a/src/native/win32/MatrixOpCommon.cpp b/src/native/win32/MatrixOpCommon.cpp
index a8907647..36347099 100644
--- a/src/native/win32/MatrixOpCommon.cpp
+++ b/src/native/win32/MatrixOpCommon.cpp
@@ -90,9 +90,7 @@ MatrixSrc::MatrixSrc ( jint addr, jint s,
     record = new float[width * height];
     
     // vectors do not need to be transposed 
-    transpose = (t == JNI_TRUE) 
-                && (w != 1) 
-                && (h != 1);
+    transpose = (t == JNI_TRUE) && (w != 1) && (h != 1);
     
     if (transpose && (width != height))
         // only need temp storage for transpose if the matrix is not square
@@ -175,7 +173,7 @@ MatrixDst::MatrixDst (jint addr, jint s, jint w, jint h, jint e, jboolean t):
     else
         transpose_record = 0;
         
-    data_buffered = JNI_FALSE;
+    data_buffered   = JNI_FALSE;
     record_buffered = JNI_FALSE;
         
     record_offset = address - stride;
@@ -244,8 +242,9 @@ void MatrixDst::createBuffer()
 float * MatrixDst::nextMatrix()
 {
     record_offset = &record_offset[stride];
+    int alignment = ((unsigned int)(record_offset)) & FLOAT_ALIGNMENT;
     
-    if ((((unsigned int)(record_offset)) & FLOAT_ALIGNMENT) || transpose || record_buffered)
+    if (transpose || record_buffered || alignment)
     {
         last_record_in_temp = JNI_TRUE;
         return record;
@@ -276,7 +275,7 @@ void MatrixDst::writeComplete()
         }
         else if (transpose)
         {
-            transposeMatrix(record, (float *) record_offset,  width, height);
+            transposeMatrix(record, (float *) &record_offset[0],  width, height);
         }
         else
             memcpy (record_offset, record, record_size * sizeof(jfloat));
diff --git a/src/native/win32/MatrixOpCommon.h b/src/native/win32/MatrixOpCommon.h
index 07d525b9..181058ec 100755
--- a/src/native/win32/MatrixOpCommon.h
+++ b/src/native/win32/MatrixOpCommon.h
@@ -9,10 +9,30 @@
  
 
 
+////////////////////////////////////////////////////////////////////////////////////////
+// Utility Functions
+////////////////////////////////////////////////////////////////////////////////////////
+
 #define FLOAT_ALIGNMENT 0x00000003
 
+// 23 bit mantisa on a float (we need error for checking if two nums are equal)
+// for now use error of 1/2^18, this could be refined up to 1/2^22 if needed
+#define FLOATING_POINT_ERROR (1.0f/262144.0f)
+
+// check if two numbers are approximately equal, used when floating point errors
+// occur.  Should NEVER check to see if two floats are identical
+
+inline bool approxEqual(float a, float b)
+{
+    a -= b;
+    a = (a < 0) ? -a: a;
+    return (a < FLOATING_POINT_ERROR);
+}
+
 float determinant (const float * matrix , int side);
-void subMatrix (const float * src, int side, float * dst , int col_omit, int row_omit);
+void subMatrix    (const float * src, int side, float * dst , int col_omit, int row_omit);
+
+
 
 ///////////////////////////////////////////////////////////////////////////////////////
 // Matrix
diff --git a/src/native/win32/org_lwjgl_Math_MatrixOpInvert_MatrixOpDirect.cpp b/src/native/win32/org_lwjgl_Math_MatrixOpInvert_MatrixOpDirect.cpp
index b7ae1b36..4c5f9e0a 100644
--- a/src/native/win32/org_lwjgl_Math_MatrixOpInvert_MatrixOpDirect.cpp
+++ b/src/native/win32/org_lwjgl_Math_MatrixOpInvert_MatrixOpDirect.cpp
@@ -42,6 +42,9 @@
 #include <windows.h>
 #include "org_lwjgl_Math_MatrixOpInvert_MatrixOpDirect.h"
 #include "MatrixOpCommon.h"
+#ifdef _DEBUG
+#include <stdio.h>
+#endif
 /*
  * Class:     org_lwjgl_Math_MatrixOpInvert_MatrixOpDirect
  * Method:    execute
@@ -62,6 +65,13 @@ JNIEXPORT void JNICALL Java_org_lwjgl_Math_00024MatrixOpInvert_00024MatrixOpDire
 	jboolean transposeDest
   )
 {
+        if (transposeSource == transposeDest)
+        {
+            transposeSource = JNI_FALSE;
+            transposeDest   = JNI_FALSE;
+        }
+
+
         // We are under the assumption that sourceWidth == sourceHeight and the matrix 
         // defined within is invertable
         
@@ -79,46 +89,90 @@ JNIEXPORT void JNICALL Java_org_lwjgl_Math_00024MatrixOpInvert_00024MatrixOpDire
  
        for (int i = 0; i < source.elements; i++)
         {
-            srcMatrix = source.nextMatrix();
-            destMatrix   = dest.nextMatrix();
+            srcMatrix    = source.nextMatrix();
+            destMatrix   = dest  .nextMatrix();
 
             // calculate the determinant
             float det = determinant(srcMatrix, source.width);
             
 #ifdef _DEBUG
-            printf("Determinant: %f\n", det);
+            printf("Matrix Determinant: %f\n", det);
+            printf("Matrix Determinant - 1 = %f\n", det -1);
+            printf("FLOATING POINT ERROR: %f\n", FLOATING_POINT_ERROR);
 #endif
-
-            float sign;
-            
-            for (int col = 0; col < source.width; col++)
+        
+            // use approxEqual to avoid direct comparisons
+            if (approxEqual(det, 1.0f) || 
+                approxEqual(det, -1.0f))
             {
-                /*
-                    Maintain sign:
-                    
-                    + - + - ...
-                    - + - + ..
-                    + - + - ..
-                    - + - + ..
-                    : : : : \
-                */
             
-                sign = (col & 1) ? -1.0f : 1.0f;
+#ifdef _DEBUG
+                printf("Matrix is Orthogonal\n");
+#endif
+                /* this matrix is orthogonal
                 
-                for (int row = 0; row < source.height; row++)
+                    since   inv(M) * M = I
+                    when orthogonal
+                            trans(M) * M = I
+                            
+                    proper orthogonal
+                            inv(M) = trans(M)
+                    improper orthogonal
+                            inv(M) = -trans(M)
+                */
+                
+                if (approxEqual(det, 1))
                 {
-                    // get the sub matrix
-                    subMatrix(srcMatrix, source.width, temp_matrix, col, row);
-                    
-                    // transpose the result
-                    destMatrix[col + row * source.height] 
-                            = (sign / det) * determinant(temp_matrix, temp_side); 
-                    
-                    // swap signs
-                    sign = (sign == 1) ? -1.0f : 1.0f;
+                    // proper orthogonal
+                    int srcIndex = 0;
+                    for (int col = 0; col < source.width; col++)
+                        for (int row = 0; row < source.height; row++)
+                            destMatrix[col + row * source.width] = srcMatrix[srcIndex++];
+                }
+                else
+                {
+                    // improper orthogonal
+                    int srcIndex = 0;
+                    for (int col = 0; col < source.width; col++)
+                        for (int row = 0; row < source.height; row++)
+                            destMatrix[col + row * source.width] = -srcMatrix[srcIndex++];
                 }
             }
-    
+            else
+            {
+                
+                float sign;
+                
+                for (int col = 0; col < source.width; col++)
+                {
+                    /*
+                        Maintain sign:
+                        
+                        + - + - ...
+                        - + - + ..
+                        + - + - ..
+                        - + - + ..
+                        : : : : \
+                    */
+                
+                    sign = (col & 1) ? -1.0f : 1.0f;
+                    
+                    for (int row = 0; row < source.height; row++)
+                    {
+                        // get the sub matrix
+                        subMatrix(srcMatrix, source.width, temp_matrix, col, row);
+                        
+                        // transpose the result
+                        destMatrix[col + row * source.height] 
+                                = (sign / det) * determinant(temp_matrix, temp_side); 
+                        
+                        // swap signs
+                        sign = (sign == 1) ? -1.0f : 1.0f;
+                    }
+                }
+        
+                
+            }
             dest.writeComplete();
         }
 }
diff --git a/src/native/win32/org_lwjgl_Math_MatrixOpInvert_MatrixOpSafe.cpp b/src/native/win32/org_lwjgl_Math_MatrixOpInvert_MatrixOpSafe.cpp
index 8ad43c98..ba74e7ef 100644
--- a/src/native/win32/org_lwjgl_Math_MatrixOpInvert_MatrixOpSafe.cpp
+++ b/src/native/win32/org_lwjgl_Math_MatrixOpInvert_MatrixOpSafe.cpp
@@ -39,15 +39,14 @@
  * @version $Revision$
  */
  
- 
-#ifdef _DEBUG
-#include <stdio.h>
-#endif
-
 #include <windows.h>
 #include "org_lwjgl_Math_MatrixOpInvert_MatrixOpSafe.h"
 #include "MatrixOpCommon.h"
 
+#ifdef _DEBUG
+#include <stdio.h>
+#endif
+
 /*
  * Class:     org_lwjgl_Math_MatrixOpInvert_MatrixOpSafe
  * Method:    execute
@@ -68,6 +67,13 @@ JNIEXPORT void JNICALL Java_org_lwjgl_Math_00024MatrixOpInvert_00024MatrixOpSafe
 	jboolean transposeDest
   )
 {
+
+        if (transposeSource == transposeDest)
+        {
+            transposeSource = JNI_FALSE;
+            transposeDest   = JNI_FALSE;
+        }
+        
         // We are under the assumption that sourceWidth == sourceHeight and the matrix 
         // defined within is invertable
         
@@ -90,39 +96,82 @@ JNIEXPORT void JNICALL Java_org_lwjgl_Math_00024MatrixOpInvert_00024MatrixOpSafe
             float det = determinant(srcMatrix, source.width);
             
 #ifdef _DEBUG
-            printf("Determinant: %f\n", det);
+            printf("Matrix Determinant: %f\n", det);
+            printf("Matrix Determinant - 1: %f\n", det-1);
+            printf("FLOATING POINT ERROR: %f\n", FLOATING_POINT_ERROR);
 #endif
-
-            float sign;
             
-            for (int col = 0; col < source.width; col++)
+            // use approxEqual to avoid direct comparisons
+            if (approxEqual(det,1) || approxEqual(det, -1))
             {
-                /*
-                    Maintain sign:
-                    
-                    + - + - ...
-                    - + - + ..
-                    + - + - ..
-                    - + - + ..
-                    : : : : \
-                */
             
-                sign = (col & 1) ? -1.0f : 1.0f;
+#ifdef _DEBUG
+                printf("Matrix is Orthogonal\n");
+#endif
+                /* this matrix is orthogonal
                 
-                for (int row = 0; row < source.height; row++)
+                    since   inv(M) * M = I
+                    when orthogonal
+                            trans(M) * M = I
+                            
+                    proper orthogonal
+                            inv(M) = trans(M)
+                    improper orthogonal
+                            inv(M) = -trans(M)
+                */
+                
+                if (approxEqual(det, 1))
                 {
-                    // get the sub matrix
-                    subMatrix(srcMatrix, source.width, temp_matrix, col, row);
-                    
-                    // transpose the result
-                    destMatrix[col + row * source.height] 
-                            = (sign / det) * determinant(temp_matrix, temp_side); 
-                    
-                    // swap signs
-                    sign = (sign == 1) ? -1.0f : 1.0f;
+                    // proper orthogonal
+                    int srcIndex = 0;
+                    for (int col = 0; col < source.width; col++)
+                        for (int row = 0; row < source.height; row++)
+                            destMatrix[col + row * source.width] = srcMatrix[srcIndex++];
+                }
+                else
+                {
+                    // improper orthogonal
+                    int srcIndex = 0;
+                    for (int col = 0; col < source.width; col++)
+                        for (int row = 0; row < source.height; row++)
+                            destMatrix[col + row * source.width] = -srcMatrix[srcIndex++];
                 }
             }
-    
+            else
+            {
+                
+                float sign;
+                
+                for (int col = 0; col < source.width; col++)
+                {
+                    /*
+                        Maintain sign:
+                        
+                        + - + - ...
+                        - + - + ..
+                        + - + - ..
+                        - + - + ..
+                        : : : : \
+                    */
+                
+                    sign = (col & 1) ? -1.0f : 1.0f;
+                    
+                    for (int row = 0; row < source.height; row++)
+                    {
+                        // get the sub matrix
+                        subMatrix(srcMatrix, source.width, temp_matrix, col, row);
+                        
+                        // transpose the result
+                        destMatrix[col + row * source.height] 
+                                = (sign / det) * determinant(temp_matrix, temp_side); 
+                        
+                        // swap signs
+                        sign = (sign == 1) ? -1.0f : 1.0f;
+                    }
+                }
+        
+            }
             dest.writeComplete();
+
         }
 }