From 900eb4e4d2354c8d6de97ec571be19264da6b5ba Mon Sep 17 00:00:00 2001 From: Ioannis Tsakpinis Date: Wed, 10 Aug 2011 16:05:46 +0000 Subject: [PATCH] Added support for cache-line padding and alignment. Made byteOffset() in MappedField optional. Misc fixes and improvements. --- src/java/org/lwjgl/LWJGLUtil.java | 34 ++- .../lwjgl/test/mapped/MappedObjectTests3.java | 10 + .../lwjgl/test/mapped/MappedObjectTests4.java | 109 +++++++++- .../lwjgl/test/mapped/MappedSomething.java | 5 +- .../lwjgl/test/mapped/TestMappedObject.java | 5 + .../test/opengl/sprites/SpriteShootout.java | 8 + .../opengl/sprites/SpriteShootoutMapped.java | 20 +- .../org/lwjgl/util/mapped/CacheLinePad.java | 66 ++++++ .../org/lwjgl/util/mapped/CacheLineSize.java | 141 +++++++++++++ src/java/org/lwjgl/util/mapped/CacheUtil.java | 193 ++++++++++++++++++ .../org/lwjgl/util/mapped/MappedField.java | 2 +- .../org/lwjgl/util/mapped/MappedHelper.java | 6 +- .../util/mapped/MappedObjectClassLoader.java | 34 ++- .../util/mapped/MappedObjectTransformer.java | 159 ++++++++++++--- .../lwjgl/util/mapped/MappedObjectUnsafe.java | 3 +- .../org/lwjgl/util/mapped/MappedType.java | 17 +- 16 files changed, 749 insertions(+), 63 deletions(-) create mode 100644 src/java/org/lwjgl/util/mapped/CacheLinePad.java create mode 100644 src/java/org/lwjgl/util/mapped/CacheLineSize.java create mode 100644 src/java/org/lwjgl/util/mapped/CacheUtil.java diff --git a/src/java/org/lwjgl/LWJGLUtil.java b/src/java/org/lwjgl/LWJGLUtil.java index e1c60d36..a04edaad 100644 --- a/src/java/org/lwjgl/LWJGLUtil.java +++ b/src/java/org/lwjgl/LWJGLUtil.java @@ -449,12 +449,42 @@ public class LWJGLUtil { * Gets a boolean property as a privileged action. */ public static boolean getPrivilegedBoolean(final String property_name) { - Boolean value = AccessController.doPrivileged(new PrivilegedAction() { + return AccessController.doPrivileged(new PrivilegedAction() { public Boolean run() { return Boolean.getBoolean(property_name); } }); - return value; + } + + /** + * Gets an integer property as a privileged action. + * + * @param property_name the integer property name + * + * @return the property value + */ + public static Integer getPrivilegedInteger(final String property_name) { + return AccessController.doPrivileged(new PrivilegedAction() { + public Integer run() { + return Integer.getInteger(property_name); + } + }); + } + + /** + * Gets an integer property as a privileged action. + * + * @param property_name the integer property name + * @param default_val the default value to use if the property is not defined + * + * @return the property value + */ + public static Integer getPrivilegedInteger(final String property_name, final int default_val) { + return AccessController.doPrivileged(new PrivilegedAction() { + public Integer run() { + return Integer.getInteger(property_name, default_val); + } + }); } /** diff --git a/src/java/org/lwjgl/test/mapped/MappedObjectTests3.java b/src/java/org/lwjgl/test/mapped/MappedObjectTests3.java index 13cb0fdb..01ec9c20 100644 --- a/src/java/org/lwjgl/test/mapped/MappedObjectTests3.java +++ b/src/java/org/lwjgl/test/mapped/MappedObjectTests3.java @@ -60,6 +60,16 @@ public class MappedObjectTests3 { assert (addr2 - addr1 == 4); assert (mapped.capacity() == MappedSomething.SIZEOF - 4); + { + assert (some.shared == 0); + assert (mapped.getInt(8) == 0); + + some.shared = 1234; + + assert (some.shared == 1234); + assert (mapped.getInt(8) == 1234); + } + some.view++; mapped = some.data; // creates new ByteBuffer instance diff --git a/src/java/org/lwjgl/test/mapped/MappedObjectTests4.java b/src/java/org/lwjgl/test/mapped/MappedObjectTests4.java index 6beceede..f6ea670c 100644 --- a/src/java/org/lwjgl/test/mapped/MappedObjectTests4.java +++ b/src/java/org/lwjgl/test/mapped/MappedObjectTests4.java @@ -34,10 +34,10 @@ package org.lwjgl.test.mapped; import org.lwjgl.MemoryUtil; import org.lwjgl.PointerBuffer; import org.lwjgl.opengl.Display; -import org.lwjgl.util.mapped.MappedObject; -import org.lwjgl.util.mapped.Pointer; +import org.lwjgl.util.mapped.*; import java.io.File; +import java.lang.reflect.Field; import java.nio.ByteBuffer; /** @author Riven */ @@ -145,4 +145,109 @@ public class MappedObjectTests4 { } } + @MappedType(cacheLinePadding = true) + public static class MappedCacheLinePadded extends MappedObject { + + int foo; + int bar; + + } + + public static void testCacheLineAlignment() { + MappedCacheLinePadded data = MappedCacheLinePadded.malloc(10); + + assert (data.backingByteBuffer().capacity() == 10 * CacheUtil.getCacheLineSize()); + assert (MemoryUtil.getAddress(data.backingByteBuffer()) % CacheUtil.getCacheLineSize() == 0); + + for ( int i = 0; i < 10; i++ ) { + data.view = i; + + data.foo = i; + data.bar = i * 2; + } + + for ( int i = 0; i < 10; i++ ) { + data.view = i; + + assert (data.foo == i); + assert (data.bar == i * 2); + } + } + + public static class MappedFieldCacheLinePadded extends MappedObject { + + // If we assume CacheUtil.getCacheLineSize() == 64 + // 0 - 63 + @CacheLinePad long longBar; + // 64 - 71 + long longFoo; + // 72 - 75 + int intFoo; + // 128 - 131 + @CacheLinePad(before = true) int intBar; + // 192 - 195 + int foo; + // 256 - 267 + @CacheLinePad(before = true, after = false) + @MappedField(byteLength = 12) + ByteBuffer buffer; + // 268 - 271 + int bar; + + } + + public static void testCacheLinePadding() { + MappedFieldCacheLinePadded data = MappedFieldCacheLinePadded.map(CacheUtil.createByteBuffer(10 * MappedFieldCacheLinePadded.SIZEOF)); + + final int sizeof = + CacheUtil.getCacheLineSize() + + 8 + + (CacheUtil.getCacheLineSize() - 8) + + CacheUtil.getCacheLineSize() + + 4 + + (CacheUtil.getCacheLineSize() - 4) + + 12 + + 4; + + assert (MappedFieldCacheLinePadded.SIZEOF == sizeof); + assert (data.backingByteBuffer().capacity() == sizeof * 10); + + for ( int i = 0; i < 10; i++ ) { + data.view = i; + + data.longFoo = i * 1000000000L; + data.longBar = i * 2000000000L; + data.intFoo = i * 1000; + data.intBar = i * 2000; + data.foo = i; + } + + for ( int i = 0; i < 10; i++ ) { + data.view = i; + + assert (data.longFoo == i * 1000000000L); + assert (data.longBar == i * 2000000000L); + assert (data.intFoo == i * 1000); + assert (data.intBar == i * 2000); + assert (data.foo == i); + } + } + + public static class POJOFieldCacheLinePadded { + + @CacheLinePad long longBar; + long longFoo; + int intFoo; + @CacheLinePad(before = true) int intBar; + int foo; + @CacheLinePad boolean bool; + int bar; + + } + + public static void testCacheLinePaddingPOJO() { + Field[] fields = new POJOFieldCacheLinePadded().getClass().getDeclaredFields(); + assert (fields.length == (1 + 7) + 1 + 1 + (15 + 1 + 15) + 1 + (1 + 63) + 1); + } + } \ No newline at end of file diff --git a/src/java/org/lwjgl/test/mapped/MappedSomething.java b/src/java/org/lwjgl/test/mapped/MappedSomething.java index bf3a0624..95590ad5 100644 --- a/src/java/org/lwjgl/test/mapped/MappedSomething.java +++ b/src/java/org/lwjgl/test/mapped/MappedSomething.java @@ -42,9 +42,12 @@ public class MappedSomething extends MappedObject { @MappedField(byteOffset = 0) public int used; - @MappedField(byteOffset = 4, byteLength = 64 - 4) + @MappedField(byteLength = 64 - 4) // optional byteOffset public ByteBuffer data; + @MappedField(byteOffset = 12) // inside data + public int shared; + @Override public String toString() { return "MappedSomething[" + used + "]"; diff --git a/src/java/org/lwjgl/test/mapped/TestMappedObject.java b/src/java/org/lwjgl/test/mapped/TestMappedObject.java index 5d620316..0cee7f57 100644 --- a/src/java/org/lwjgl/test/mapped/TestMappedObject.java +++ b/src/java/org/lwjgl/test/mapped/TestMappedObject.java @@ -52,6 +52,8 @@ public class TestMappedObject { MappedObjectTransformer.register(MappedSomething.class); MappedObjectTransformer.register(MappedObjectTests3.Xyz.class); MappedObjectTransformer.register(MappedObjectTests4.MappedPointer.class); + MappedObjectTransformer.register(MappedObjectTests4.MappedCacheLinePadded.class); + MappedObjectTransformer.register(MappedObjectTests4.MappedFieldCacheLinePadded.class); if ( MappedObjectClassLoader.fork(TestMappedObject.class, args) ) { return; @@ -75,6 +77,9 @@ public class TestMappedObject { MappedObjectTests4.testLocalView(); //MappedObjectTests4.testLWJGL(); MappedObjectTests4.testPointer(); + MappedObjectTests4.testCacheLineAlignment(); + MappedObjectTests4.testCacheLinePadding(); + MappedObjectTests4.testCacheLinePaddingPOJO(); System.out.println("done"); } diff --git a/src/java/org/lwjgl/test/opengl/sprites/SpriteShootout.java b/src/java/org/lwjgl/test/opengl/sprites/SpriteShootout.java index a7d7057e..2a0cc264 100644 --- a/src/java/org/lwjgl/test/opengl/sprites/SpriteShootout.java +++ b/src/java/org/lwjgl/test/opengl/sprites/SpriteShootout.java @@ -86,6 +86,8 @@ public final class SpriteShootout { private int texBigID; private int texSmallID; + long animateTime; + private SpriteShootout() { } @@ -276,6 +278,8 @@ public final class SpriteShootout { long timeUsed = 5000 + (startTime - System.currentTimeMillis()); startTime = System.currentTimeMillis() + 5000; System.out.println("FPS: " + (Math.round(fps / (timeUsed / 1000.0) * 10) / 10.0) + ", Balls: " + ballCount); + System.out.println("\tAnimation: " + (animateTime / fps / 1000) + "us"); + animateTime = 0; fps = 0; } } @@ -582,7 +586,11 @@ public final class SpriteShootout { if ( animate ) { final ByteBuffer buffer = animVBO.map(batchSize * (2 * 4)); + long t0 = System.nanoTime(); animate(transform, buffer.asFloatBuffer(), ballSize, ballIndex, batchSize, delta); + long t1 = System.nanoTime(); + + animateTime += t1 - t0; animVBO.unmap(); } diff --git a/src/java/org/lwjgl/test/opengl/sprites/SpriteShootoutMapped.java b/src/java/org/lwjgl/test/opengl/sprites/SpriteShootoutMapped.java index 7a839cf8..5062e15a 100644 --- a/src/java/org/lwjgl/test/opengl/sprites/SpriteShootoutMapped.java +++ b/src/java/org/lwjgl/test/opengl/sprites/SpriteShootoutMapped.java @@ -89,6 +89,8 @@ public final class SpriteShootoutMapped { private int texBigID; private int texSmallID; + long animateTime; + private SpriteShootoutMapped() { } @@ -312,6 +314,8 @@ public final class SpriteShootoutMapped { long timeUsed = 5000 + (startTime - System.currentTimeMillis()); startTime = System.currentTimeMillis() + 5000; System.out.println("FPS: " + (Math.round(fps / (timeUsed / 1000.0) * 10) / 10.0) + ", Balls: " + ballCount); + System.out.println("Animation: " + animateTime / fps); + animateTime = 0; fps = 0; } } @@ -414,8 +418,8 @@ public final class SpriteShootoutMapped { public static class Sprite extends MappedObject { - public float x, dx; - public float y, dy; + public float dx, x; + public float dy, y; } @@ -527,8 +531,8 @@ public final class SpriteShootoutMapped { final Sprite[] sprites = sprite.asArray(); final SpriteRender[] spritesRender = spriteRender.asArray(); for ( int b = ballIndex, r = 0, len = (ballIndex + batchSize); b < len; b++, r++ ) { - float x = sprites[b].x; float dx = sprites[b].dx; + float x = sprites[b].x; x += dx * delta; if ( x < ballRadius ) { @@ -539,12 +543,12 @@ public final class SpriteShootoutMapped { dx = -dx; } - sprites[b].x = x; sprites[b].dx = dx; + sprites[b].x = x; spritesRender[r].x = x; - float y = sprites[b].y; float dy = sprites[b].dy; + float y = sprites[b].y; y += dy * delta; if ( y < ballRadius ) { @@ -555,8 +559,8 @@ public final class SpriteShootoutMapped { dy = -dy; } - sprites[b].y = y; sprites[b].dy = dy; + sprites[b].y = y; spritesRender[r].y = y; } } @@ -654,7 +658,11 @@ public final class SpriteShootoutMapped { if ( animate ) { final ByteBuffer buffer = animVBO.map(batchSize * (2 * 4)); + long t0 = System.nanoTime(); animate(sprites, SpriteRender.map(buffer), ballSize, ballIndex, batchSize, delta); + long t1 = System.nanoTime(); + + animateTime += t1 - t0; animVBO.unmap(); } diff --git a/src/java/org/lwjgl/util/mapped/CacheLinePad.java b/src/java/org/lwjgl/util/mapped/CacheLinePad.java new file mode 100644 index 00000000..6ad2ab03 --- /dev/null +++ b/src/java/org/lwjgl/util/mapped/CacheLinePad.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2002-2011 LWJGL Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of 'LWJGL' nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.lwjgl.util.mapped; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * When this annotation is used on a field, automatic cache-line-sized padding + * will be inserted around the field. This is useful in multi-threaded algorithms + * to avoid cache line false sharing. The annotation defaults to padding after + * the field, but can be changed to before or both before and after. It can be + * applied to both mapped object fields and POJO primitive fields. + * + * @author Spasi + */ +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) +public @interface CacheLinePad { + + /** + * When true, cache-line padding will be inserted before the field. + * + * @return + */ + boolean before() default false; + + /** + * When true, cache-line padding will be inserted after the field. + * + * @return + */ + boolean after() default true; + +} \ No newline at end of file diff --git a/src/java/org/lwjgl/util/mapped/CacheLineSize.java b/src/java/org/lwjgl/util/mapped/CacheLineSize.java new file mode 100644 index 00000000..dab259bd --- /dev/null +++ b/src/java/org/lwjgl/util/mapped/CacheLineSize.java @@ -0,0 +1,141 @@ +package org.lwjgl.util.mapped; + +import org.lwjgl.LWJGLUtil; +import org.lwjgl.MemoryUtil; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.IntBuffer; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import static org.lwjgl.util.mapped.MappedHelper.*; + +/** + * This micro-benchmark tries to detect the CPU's cache line size. This is + * done by exploiting cache line false sharing in multi-threaded code: + * When 2 threads simultaneously access the same cache line (and at least + * 1 access is a write), performance drops considerably. We detect this + * performance drop while decreasing the memory padding in every test step. + * + * @author Spasi + */ +final class CacheLineSize { + + private CacheLineSize() { + } + + static int getCacheLineSize() { + final int THREADS = 2; + final int REPEATS = 100000 * THREADS; + final int LOCAL_REPEATS = REPEATS / THREADS; + + // Detection will start from CacheLineMaxSize bytes. + final int MAX_SIZE = LWJGLUtil.getPrivilegedInteger("org.lwjgl.util.mapped.CacheLineMaxSize", 1024) / 4; // in # of integers + // Detection will stop when the execution time increases by more than CacheLineTimeThreshold %. + final double TIME_THRESHOLD = 1.0 + LWJGLUtil.getPrivilegedInteger("org.lwjgl.util.mapped.CacheLineTimeThreshold", 50) / 100.0; + + final ExecutorService executorService = Executors.newFixedThreadPool(THREADS); + final ExecutorCompletionService completionService = new ExecutorCompletionService(executorService); + + try { + // We need to use a NIO buffer in order to guarantee memory alignment. + final IntBuffer memory = getMemory(MAX_SIZE); + + // -- WARMUP -- + + final int WARMUP = 10; + for ( int i = 0; i < WARMUP; i++ ) + doTest(THREADS, LOCAL_REPEATS, 0, memory, completionService); + + // -- CACHE LINE SIZE DETECTION -- + + long totalTime = 0; + int count = 0; + int cacheLineSize = 64; // fallback to the most common size these days + boolean found = false; + for ( int i = MAX_SIZE; i >= 1; i >>= 1 ) { + final long time = doTest(THREADS, LOCAL_REPEATS, i, memory, completionService); + if ( totalTime > 0 ) { // Ignore first run + final long avgTime = totalTime / count; + if ( (double)time / (double)avgTime > TIME_THRESHOLD ) { // Try to detect a noticeable jump in execution time + cacheLineSize = (i << 1) * 4; + found = true; + break; + } + } + totalTime += time; + count++; + } + + if ( LWJGLUtil.DEBUG ) { + if ( found ) + LWJGLUtil.log("Cache line size detected: " + cacheLineSize + " bytes"); + else + LWJGLUtil.log("Failed to detect cache line size, assuming " + cacheLineSize + " bytes"); + } + + return cacheLineSize; + } finally { + executorService.shutdown(); + } + } + + public static void main(String[] args) { + CacheUtil.getCacheLineSize(); + } + + static long memoryLoop(final int index, final int repeats, final IntBuffer memory, final int padding) { + final long address = MemoryUtil.getAddress(memory) + (index * padding * 4); + + final long time = System.nanoTime(); + for ( int i = 0; i < repeats; i++ ) { + // Use volatile access to avoid server VM optimizations. + ivput(ivget(address) + 1, address); + } + + return System.nanoTime() - time; + } + + private static IntBuffer getMemory(final int START_SIZE) { + final int PAGE_SIZE = MappedObjectUnsafe.INSTANCE.pageSize(); + + final ByteBuffer buffer = ByteBuffer.allocateDirect((START_SIZE * 4) + PAGE_SIZE).order(ByteOrder.nativeOrder()); + + // Align to page and, consequently, to cache line. Otherwise results will be inconsistent. + if ( MemoryUtil.getAddress(buffer) % PAGE_SIZE != 0 ) { + // Round up to page boundary + buffer.position(PAGE_SIZE - (int)(MemoryUtil.getAddress(buffer) & (PAGE_SIZE - 1))); + } + + return buffer.asIntBuffer(); + } + + private static long doTest(final int threads, final int repeats, final int padding, final IntBuffer memory, final ExecutorCompletionService completionService) { + for ( int i = 0; i < threads; i++ ) + submitTest(completionService, i, repeats, memory, padding); + return waitForResults(threads, completionService); + } + + private static void submitTest(final ExecutorCompletionService completionService, final int index, final int repeats, final IntBuffer memory, final int padding) { + completionService.submit(new Callable() { + public Long call() throws Exception { + return memoryLoop(index, repeats, memory, padding); + } + }); + } + + private static long waitForResults(final int count, final ExecutorCompletionService completionService) { + try { + long totalTime = 0; + for ( int i = 0; i < count; i++ ) + totalTime += completionService.take().get(); + return totalTime; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + +} \ No newline at end of file diff --git a/src/java/org/lwjgl/util/mapped/CacheUtil.java b/src/java/org/lwjgl/util/mapped/CacheUtil.java new file mode 100644 index 00000000..c18f5790 --- /dev/null +++ b/src/java/org/lwjgl/util/mapped/CacheUtil.java @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2002-2011 LWJGL Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of 'LWJGL' nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.lwjgl.util.mapped; + +import org.lwjgl.LWJGLUtil; +import org.lwjgl.MemoryUtil; +import org.lwjgl.PointerBuffer; + +import java.nio.*; + +/** + * This class provides utility methods for allocating cache-line-aligned + * NIO buffers. The CPU cache line size is detected using a micro-benchmark + * that exploits the performation degredation that occurs when different + * threads write to different locations of the same cache line. The detection + * should be reasonably robust on both the server and client VM, but there + * are a few system properties that can be used to tune it. + * + * @author Spasi + */ +public final class CacheUtil { + + private static final int CACHE_LINE_SIZE; + + static { + final Integer size = LWJGLUtil.getPrivilegedInteger("org.lwjgl.util.mapped.CacheLineSize"); // forces a specific cache line size + + if ( size != null ) { + if ( size < 1 ) + throw new IllegalStateException("Invalid CacheLineSize specified: " + size); + CACHE_LINE_SIZE = size; + } else if ( Runtime.getRuntime().availableProcessors() == 1 ) { // We cannot use false sharing to detect it + /* + Spasi: + + I have implemented a single-threaded benchmark for this, but it requires + lots of memory allocations and could not tune it for both the client and + server VM. It's not a big deal anyway, 64 bytes should be ok for any + single-core CPU. + */ + if ( LWJGLUtil.DEBUG ) + LWJGLUtil.log("Cannot detect cache line size on single-core CPUs, assuming 64 bytes."); + CACHE_LINE_SIZE = 64; + } else + CACHE_LINE_SIZE = CacheLineSize.getCacheLineSize(); + } + + private CacheUtil() { + } + + /** + * Returns the CPU cache line size, in number of bytes. + * + * @return the cache line size + */ + public static int getCacheLineSize() { + return CACHE_LINE_SIZE; + } + + /** + * Construct a direct, native-ordered and cache-line-aligned bytebuffer with the specified size. + * + * @param size The size, in bytes + * + * @return a ByteBuffer + */ + public static ByteBuffer createByteBuffer(int size) { + ByteBuffer buffer = ByteBuffer.allocateDirect(size + CACHE_LINE_SIZE); + + // Align to cache line. + if ( MemoryUtil.getAddress(buffer) % CACHE_LINE_SIZE != 0 ) { + // Round up to cache line boundary + buffer.position(CACHE_LINE_SIZE - (int)(MemoryUtil.getAddress(buffer) & (CACHE_LINE_SIZE - 1))); + } + + buffer.limit(buffer.position() + size); + return buffer.slice().order(ByteOrder.nativeOrder()); + } + + /** + * Construct a direct, native-ordered and cache-line-aligned shortbuffer with the specified number + * of elements. + * + * @param size The size, in shorts + * + * @return a ShortBuffer + */ + public static ShortBuffer createShortBuffer(int size) { + return createByteBuffer(size << 1).asShortBuffer(); + } + + /** + * Construct a direct, native-ordered and cache-line-aligned charbuffer with the specified number + * of elements. + * + * @param size The size, in chars + * + * @return an CharBuffer + */ + public static CharBuffer createCharBuffer(int size) { + return createByteBuffer(size << 1).asCharBuffer(); + } + + /** + * Construct a direct, native-ordered and cache-line-aligned intbuffer with the specified number + * of elements. + * + * @param size The size, in ints + * + * @return an IntBuffer + */ + public static IntBuffer createIntBuffer(int size) { + return createByteBuffer(size << 2).asIntBuffer(); + } + + /** + * Construct a direct, native-ordered and cache-line-aligned longbuffer with the specified number + * of elements. + * + * @param size The size, in longs + * + * @return an LongBuffer + */ + public static LongBuffer createLongBuffer(int size) { + return createByteBuffer(size << 3).asLongBuffer(); + } + + /** + * Construct a direct, native-ordered and cache-line-aligned floatbuffer with the specified number + * of elements. + * + * @param size The size, in floats + * + * @return a FloatBuffer + */ + public static FloatBuffer createFloatBuffer(int size) { + return createByteBuffer(size << 2).asFloatBuffer(); + } + + /** + * Construct a direct, native-ordered and cache-line-aligned doublebuffer with the specified number + * of elements. + * + * @param size The size, in floats + * + * @return a FloatBuffer + */ + public static DoubleBuffer createDoubleBuffer(int size) { + return createByteBuffer(size << 3).asDoubleBuffer(); + } + + /** + * Construct a cache-line-aligned PointerBuffer with the specified number + * of elements. + * + * @param size The size, in memory addresses + * + * @return a PointerBuffer + */ + public static PointerBuffer createPointerBuffer(int size) { + return new PointerBuffer(createByteBuffer(size * PointerBuffer.getPointerSize())); + } + +} \ No newline at end of file diff --git a/src/java/org/lwjgl/util/mapped/MappedField.java b/src/java/org/lwjgl/util/mapped/MappedField.java index a39d63a6..f280e88c 100644 --- a/src/java/org/lwjgl/util/mapped/MappedField.java +++ b/src/java/org/lwjgl/util/mapped/MappedField.java @@ -53,7 +53,7 @@ public @interface MappedField { * * @return the field byte offset */ - long byteOffset(); + long byteOffset() default -1; /** * Specifies the field byte length. Required for {@link java.nio.ByteBuffer} fields. diff --git a/src/java/org/lwjgl/util/mapped/MappedHelper.java b/src/java/org/lwjgl/util/mapped/MappedHelper.java index 2be2aa8c..9002d89e 100644 --- a/src/java/org/lwjgl/util/mapped/MappedHelper.java +++ b/src/java/org/lwjgl/util/mapped/MappedHelper.java @@ -68,7 +68,7 @@ public class MappedHelper { mo.baseAddress = mo.viewAddress = addr; } - public static void checkAddress(MappedObject mapped, long viewAddress) { + public static void checkAddress(long viewAddress, MappedObject mapped) { mapped.checkAddress(viewAddress); } @@ -317,7 +317,7 @@ public class MappedHelper { return INSTANCE.getLong(addr); } - public static long lget(MappedObject mapped, int fieldOffset) { + public static long jget(MappedObject mapped, int fieldOffset) { return INSTANCE.getLong(mapped.viewAddress + fieldOffset); } @@ -333,7 +333,7 @@ public class MappedHelper { return INSTANCE.getLongVolatile(null, addr); } - public static long lvget(MappedObject mapped, int fieldOffset) { + public static long jvget(MappedObject mapped, int fieldOffset) { return INSTANCE.getLongVolatile(null, mapped.viewAddress + fieldOffset); } diff --git a/src/java/org/lwjgl/util/mapped/MappedObjectClassLoader.java b/src/java/org/lwjgl/util/mapped/MappedObjectClassLoader.java index 4217646c..86d5a667 100644 --- a/src/java/org/lwjgl/util/mapped/MappedObjectClassLoader.java +++ b/src/java/org/lwjgl/util/mapped/MappedObjectClassLoader.java @@ -48,11 +48,7 @@ import java.util.Arrays; */ public class MappedObjectClassLoader extends URLClassLoader { - static final String MAPPEDOBJECT_PACKAGE_PREFIX; - - static { - MAPPEDOBJECT_PACKAGE_PREFIX = MappedObjectClassLoader.class.getPackage().getName() + "."; - } + static final String MAPPEDOBJECT_PACKAGE_PREFIX = MappedObjectClassLoader.class.getPackage().getName() + "."; static boolean FORKED; @@ -115,28 +111,28 @@ public class MappedObjectClassLoader extends URLClassLoader { @Override protected synchronized Class loadClass(String name, boolean resolve) throws ClassNotFoundException { - if ( name.startsWith("java.") ) - return super.loadClass(name, resolve); - if ( name.startsWith("javax.") ) + if ( name.startsWith("java.") + || name.startsWith("javax.") + || name.startsWith("sun.") + || name.startsWith("sunw.") + || name.startsWith("org.objectweb.asm.") + ) return super.loadClass(name, resolve); - if ( name.startsWith("sun.") ) - return super.loadClass(name, resolve); - if ( name.startsWith("sunw.") ) - return super.loadClass(name, resolve); + final String className = name.replace('.', '/'); + final boolean inThisPackage = name.startsWith(MAPPEDOBJECT_PACKAGE_PREFIX); - if ( name.startsWith("org.objectweb.asm.") ) + if ( inThisPackage && ( + name.equals(MappedObjectClassLoader.class.getName()) + || name.equals((MappedObjectTransformer.class.getName())) + || name.equals((CacheUtil.class.getName())) + ) ) return super.loadClass(name, resolve); - if ( name.equals(MappedObjectClassLoader.class.getName()) || name.equals((MappedObjectTransformer.class.getName())) ) - return super.loadClass(name, resolve); - - String className = name.replace('.', '/'); - byte[] bytecode = readStream(this.getResourceAsStream(className.concat(".class"))); // Classes in this package do not get transformed, but need to go through here because we have transformed MappedObject. - if ( !(name.startsWith(MAPPEDOBJECT_PACKAGE_PREFIX) && name.substring(MAPPEDOBJECT_PACKAGE_PREFIX.length()).indexOf('.') == -1) ) { + if ( !(inThisPackage && name.substring(MAPPEDOBJECT_PACKAGE_PREFIX.length()).indexOf('.') == -1) ) { long t0 = System.nanoTime(); final byte[] newBytecode = MappedObjectTransformer.transformMappedAPI(className, bytecode); long t1 = System.nanoTime(); diff --git a/src/java/org/lwjgl/util/mapped/MappedObjectTransformer.java b/src/java/org/lwjgl/util/mapped/MappedObjectTransformer.java index 253c308e..cf2aac88 100644 --- a/src/java/org/lwjgl/util/mapped/MappedObjectTransformer.java +++ b/src/java/org/lwjgl/util/mapped/MappedObjectTransformer.java @@ -31,6 +31,7 @@ */ package org.lwjgl.util.mapped; +import org.lwjgl.BufferUtils; import org.lwjgl.LWJGLUtil; import org.lwjgl.MemoryUtil; import org.objectweb.asm.*; @@ -79,6 +80,8 @@ public class MappedObjectTransformer { static final String MAPPED_SET3_JVM = jvmClassName(MappedSet3.class); static final String MAPPED_SET4_JVM = jvmClassName(MappedSet4.class); + static final String CACHE_LINE_PAD_JVM = "L" + jvmClassName(CacheLinePad.class) + ";"; + // Public methods static final String VIEWADDRESS_METHOD_NAME = "getViewAddress"; static final String NEXT_METHOD_NAME = "next"; @@ -115,7 +118,7 @@ public class MappedObjectTransformer { // => IADD // => PUTFIELD MyMappedType.view // - className_to_subtype.put(MAPPED_OBJECT_JVM, new MappedSubtypeInfo(MAPPED_OBJECT_JVM, null, -1, -1, -1)); + className_to_subtype.put(MAPPED_OBJECT_JVM, new MappedSubtypeInfo(MAPPED_OBJECT_JVM, null, -1, -1, -1, false)); } final String vmName = System.getProperty("java.vm.name"); @@ -145,30 +148,44 @@ public class MappedObjectTransformer { final String className = jvmClassName(type); final Map fields = new HashMap(); - int advancingOffset = 0; long sizeof = 0; for ( Field field : type.getDeclaredFields() ) { - FieldInfo fieldInfo = registerField(mapped == null || mapped.autoGenerateOffsets(), className, advancingOffset, field); + FieldInfo fieldInfo = registerField(mapped == null || mapped.autoGenerateOffsets(), className, sizeof, field); if ( fieldInfo == null ) continue; fields.put(field.getName(), fieldInfo); - advancingOffset += fieldInfo.length; - sizeof = Math.max(sizeof, fieldInfo.offset + fieldInfo.length); + sizeof = Math.max(sizeof, fieldInfo.offset + fieldInfo.lengthPadded); } - final int align = mapped == null ? 4 : mapped.align(); - final int padding = mapped == null ? 0 : mapped.padding(); + int align = 4; + int padding = 0; + boolean cacheLinePadded = false; + + if ( mapped != null ) { + align = mapped.align(); + if ( mapped.cacheLinePadding() ) { + if ( mapped.padding() != 0 ) + throw new ClassFormatError("Mapped type padding cannot be specified together with cacheLinePadding."); + + final int cacheLineMod = (int)(sizeof % CacheUtil.getCacheLineSize()); + if ( cacheLineMod != 0 ) + padding = CacheUtil.getCacheLineSize() - cacheLineMod; + + cacheLinePadded = true; + } else + padding = mapped.padding(); + } sizeof += padding; - final MappedSubtypeInfo mappedType = new MappedSubtypeInfo(className, fields, (int)sizeof, align, padding); + final MappedSubtypeInfo mappedType = new MappedSubtypeInfo(className, fields, (int)sizeof, align, padding, cacheLinePadded); if ( className_to_subtype.put(className, mappedType) != null ) throw new InternalError("duplicate mapped type: " + mappedType.className); } - private static FieldInfo registerField(final boolean autoGenerateOffsets, final String className, int advancingOffset, final Field field) { + private static FieldInfo registerField(final boolean autoGenerateOffsets, final String className, long advancingOffset, final Field field) { if ( Modifier.isStatic(field.getModifiers()) ) // static fields are never mapped return null; @@ -188,7 +205,6 @@ public class MappedObjectTransformer { throw new ClassFormatError("The volatile keyword is not supported for @Pointer or ByteBuffer fields. Volatile field found: " + className + "." + field.getName() + ": " + field.getType()); // quick hack - long byteOffset = meta == null ? advancingOffset : meta.byteOffset(); long byteLength; if ( field.getType() == long.class || field.getType() == double.class ) { if ( pointer == null ) @@ -213,10 +229,36 @@ public class MappedObjectTransformer { if ( field.getType() != ByteBuffer.class && (advancingOffset % byteLength) != 0 ) throw new IllegalStateException("misaligned mapped type: " + className + "." + field.getName()); + CacheLinePad pad = field.getAnnotation(CacheLinePad.class); + + long byteOffset = advancingOffset; + if ( meta != null && meta.byteOffset() != -1 ) { + if ( meta.byteOffset() < 0 ) + throw new ClassFormatError("Invalid field byte offset: " + className + "." + field.getName() + " [byteOffset=" + meta.byteOffset() + "]"); + if ( pad != null ) + throw new ClassFormatError("A field byte offset cannot be specified together with cache-line padding: " + className + "." + field.getName()); + + byteOffset = meta.byteOffset(); + } + + long byteLengthPadded = byteLength; + if ( pad != null ) { + // Pad before + if ( pad.before() && byteOffset % CacheUtil.getCacheLineSize() != 0 ) + byteOffset += CacheUtil.getCacheLineSize() - (byteOffset & (CacheUtil.getCacheLineSize() - 1)); + + // Pad after + if ( pad.after() && (byteOffset + byteLength) % CacheUtil.getCacheLineSize() != 0 ) + byteLengthPadded += CacheUtil.getCacheLineSize() - (byteOffset + byteLength) % CacheUtil.getCacheLineSize(); + + assert !pad.before() || (byteOffset % CacheUtil.getCacheLineSize() == 0); + assert !pad.after() || ((byteOffset + byteLengthPadded) % CacheUtil.getCacheLineSize() == 0); + } + if ( PRINT_ACTIVITY ) LWJGLUtil.log(MappedObjectTransformer.class.getSimpleName() + ": " + className + "." + field.getName() + " [type=" + field.getType().getSimpleName() + ", offset=" + byteOffset + "]"); - return new FieldInfo(byteOffset, byteLength, Type.getType(field.getType()), Modifier.isVolatile(field.getModifiers()), pointer != null); + return new FieldInfo(byteOffset, byteLength, byteLengthPadded, Type.getType(field.getType()), Modifier.isVolatile(field.getModifiers()), pointer != null); } /** Removes final from methods that will be overriden by subclasses. */ @@ -318,17 +360,12 @@ public class MappedObjectTransformer { mv.visitInsn(I2L); mv.visitInsn(LADD); if ( MappedObject.CHECKS ) { - mv.visitVarInsn(LSTORE, 2); + mv.visitInsn(DUP2); mv.visitVarInsn(ALOAD, 0); - mv.visitVarInsn(LLOAD, 2); - mv.visitMethodInsn(INVOKESTATIC, MAPPED_HELPER_JVM, "checkAddress", "(L" + MAPPED_OBJECT_JVM + ";J)V"); - mv.visitVarInsn(LLOAD, 2); + mv.visitMethodInsn(INVOKESTATIC, MAPPED_HELPER_JVM, "checkAddress", "(JL" + MAPPED_OBJECT_JVM + ";)V"); } mv.visitInsn(LRETURN); - if ( MappedObject.CHECKS ) - mv.visitMaxs(3, 4); - else - mv.visitMaxs(3, 2); + mv.visitMaxs(3, 2); mv.visitEnd(); } @@ -477,7 +514,71 @@ public class MappedObjectTransformer { return null; } - return super.visitField(access, name, desc, signature, value); + if ( (access & ACC_STATIC) == 0 ) { + return new FieldNode(access, name, desc, signature, value) { + public void visitEnd() { + if ( visibleAnnotations == null ) { // early-out + accept(cv); + return; + } + + boolean before = false; + boolean after = false; + int byteLength = 0; + for ( AnnotationNode pad : visibleAnnotations ) { + if ( CACHE_LINE_PAD_JVM.equals(pad.desc) ) { + if ( "J".equals(desc) || "D".equals(desc) ) + byteLength = 8; + else if ( "I".equals(desc) || "F".equals(desc) ) + byteLength = 4; + else if ( "S".equals(desc) || "C".equals(desc) ) + byteLength = 2; + else if ( "B".equals(desc) || "Z".equals(desc) ) + byteLength = 1; + else + throw new ClassFormatError("The @CacheLinePad annotation cannot be used on non-primitive fields: " + className + "." + name); + + transformed = true; + + after = true; + if ( pad.values != null ) { + for ( int i = 0; i < pad.values.size(); i += 2 ) { + final boolean value = pad.values.get(i + 1).equals(Boolean.TRUE); + if ( "before".equals(pad.values.get(i)) ) + before = value; + else + after = value; + } + } + break; + } + } + + /* + We make the fields public to force the JVM to keep the fields in the object. + Instead of using only longs or integers, we use the same type as the original + field. That's because modern JVMs usually reorder fields by type: + longs, then doubles, then integers, then booleans, etc. This way it's more + likely that the padding will work as expected. + */ + + if ( before ) { + final int count = CacheUtil.getCacheLineSize() / byteLength - 1; + for ( int i = count; i >= 1; i-- ) + cv.visitField(access | ACC_PUBLIC | ACC_SYNTHETIC, name + "$PAD_" + i, desc, signature, null); + } + + accept(cv); + + if ( after ) { + final int count = CacheUtil.getCacheLineSize() / byteLength - 1; + for ( int i = 1; i <= count; i++ ) + cv.visitField(access | ACC_PUBLIC | ACC_SYNTHETIC, name + "$PAD" + i, desc, signature, null); + } + } + }; + } else + return super.visitField(access, name, desc, signature, value); } @Override @@ -762,7 +863,7 @@ public class MappedObjectTransformer { // stack: sizeof, count trg.add(new InsnNode(IMUL)); // stack: bytes - trg.add(new MethodInsnNode(INVOKESTATIC, jvmClassName(ByteBuffer.class), "allocateDirect", "(I)L" + jvmClassName(ByteBuffer.class) + ";")); + trg.add(new MethodInsnNode(INVOKESTATIC, mappedType.cacheLinePadded ? jvmClassName(CacheUtil.class) : jvmClassName(BufferUtils.class), "createByteBuffer", "(I)L" + jvmClassName(ByteBuffer.class) + ";")); // stack: buffer } else if ( mapDirectMethod ) { // stack: capacity, address @@ -1061,13 +1162,15 @@ public class MappedObjectTransformer { final long offset; final long length; + final long lengthPadded; final Type type; final boolean isVolatile; final boolean isPointer; - FieldInfo(final long offset, final long length, final Type type, final boolean isVolatile, final boolean isPointer) { + FieldInfo(final long offset, final long length, final long lengthPadded, final Type type, final boolean isVolatile, final boolean isPointer) { this.offset = offset; this.length = length; + this.lengthPadded = lengthPadded; this.type = type; this.isVolatile = isVolatile; this.isPointer = isPointer; @@ -1083,14 +1186,15 @@ public class MappedObjectTransformer { final String className; - final int sizeof; - final int sizeof_shift; - final int align; - final int padding; + final int sizeof; + final int sizeof_shift; + final int align; + final int padding; + final boolean cacheLinePadded; final Map fields; - MappedSubtypeInfo(String className, Map fields, int sizeof, int align, int padding) { + MappedSubtypeInfo(String className, Map fields, int sizeof, int align, int padding, final boolean cacheLinePadded) { this.className = className; this.sizeof = sizeof; @@ -1100,6 +1204,7 @@ public class MappedObjectTransformer { this.sizeof_shift = 0; this.align = align; this.padding = padding; + this.cacheLinePadded = cacheLinePadded; this.fields = fields; } diff --git a/src/java/org/lwjgl/util/mapped/MappedObjectUnsafe.java b/src/java/org/lwjgl/util/mapped/MappedObjectUnsafe.java index 7407c15b..2f25f488 100644 --- a/src/java/org/lwjgl/util/mapped/MappedObjectUnsafe.java +++ b/src/java/org/lwjgl/util/mapped/MappedObjectUnsafe.java @@ -34,6 +34,7 @@ package org.lwjgl.util.mapped; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import sun.misc.Unsafe; @@ -55,7 +56,7 @@ final class MappedObjectUnsafe { if ( address <= 0L || capacity < 0 ) throw new IllegalStateException("you almost crashed the jvm"); - ByteBuffer buffer = global.duplicate(); + ByteBuffer buffer = global.duplicate().order(ByteOrder.nativeOrder()); INSTANCE.putLong(buffer, BUFFER_ADDRESS_OFFSET, address); INSTANCE.putInt(buffer, BUFFER_CAPACITY_OFFSET, capacity); buffer.position(0); diff --git a/src/java/org/lwjgl/util/mapped/MappedType.java b/src/java/org/lwjgl/util/mapped/MappedType.java index 9de6f8ef..a0590101 100644 --- a/src/java/org/lwjgl/util/mapped/MappedType.java +++ b/src/java/org/lwjgl/util/mapped/MappedType.java @@ -61,12 +61,27 @@ public @interface MappedType { /** * The number of bytes to add to the total byte size. - * SIZEOF will be calculated as SIZEOF = max(field_offset + field_length) + padding + * SIZEOF will be calculated as SIZEOF = max(field_offset + field_length) + padding. + *

+ * Cannot be used with {@link #cacheLinePadding()}. * * @return the padding amount */ int padding() default 0; + /** + * When true, SIZEOF will be increased (if necessary) so that it's a multiple of the CPU cache line size. + * Additionally, {@link MappedObject#malloc(int)} on the mapped object type will automatically use + * {@link CacheUtil#createByteBuffer(int)} instead of the unaligned {@link org.lwjgl.BufferUtils#createByteBuffer(int)}. + *

+ * Cannot be used with {@link #padding()}. + * + * @return if cache-line padding should be applied + * + * @see CacheUtil + */ + boolean cacheLinePadding() default false; + /** * The mapped data memory alignment, in bytes. *