From 6f375f16a44d140fff006b29d29eea6aff6add12 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Thu, 7 May 2026 16:08:55 -0400 Subject: [PATCH 1/2] feat(display): per-container Direct Composition path via Android SurfaceControl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an opt-in per-container "Direct Composition" toggle that, when active on API 29+, routes fullscreen direct-scanout drawables to a child ASurfaceControl layer bound to the XServerView's SurfaceView. SurfaceFlinger + HWC promote the layer to a hardware overlay plane (Snapdragon DPU scanout), bypassing in-process GLRenderer composition entirely for qualifying frames. Verified end-to-end on a OnePlus 15 (Android 16, Adreno X2-class DPU) running a vkd3d-proton title — SurfaceFlinger dumpsys shows composition type=DEVICE on the winnative-direct-composition layer with continuous buffer updates and HWC scaling the native game frame to display resolution. Phase 1 — toggle plumbing: * Container.EXTRA_DIRECT_COMPOSITION + isDirectCompositionEnabled / setDirectCompositionEnabled accessors backed by existing extraData JSON; default off, no migration needed. * ContainerSettingsComposeDialog reads/writes the setting; new container creation also sets it on the freshly-built Container. * GameSettingsStateHolder.directComposition + a SettingCheckbox in the container-edit UI (gated on isContainerEditMode so shortcut sheets don't show it). * Two new strings: session_display_direct_composition + session_display_direct_composition_summary. Phase 2 — native + lifecycle + push path: * cpp/winlator/surface_compositor.c — dlopen-resolved JNI wrappers around ASurfaceControl + ASurfaceTransaction (libandroid.so, API 29+). Probe (nativeIsAvailable), attach/detach (createFromWindow + reparent-to-null + release), setColor (proof-of-life), pushBuffer (setBuffer + modern setPosition/setScale/setCrop or fallback setGeometry, with framework-owned fence FD), allocateTestBuffer + releaseBuffer (256x256 magenta smoke test, RGBA_8888 + GPU_SAMPLED_IMAGE + COMPOSER_OVERLAY usage, with retry-without- OVERLAY for grallocs that reject the combo). * SurfaceCompositor.java — static isAvailable() probe with API-29 short-circuit + cached-availability + UnsatisfiedLinkError guard. * DirectCompositionLayer.java — instance class wrapping a single ASurfaceControl, all public methods synchronized so UI-thread release() and GLThread pushBuffer() can't race the native pointer. * Drawable.scanoutSource + directScanout fields made volatile for cross-thread visibility between the X-server worker and GLThread. * Drawable.acquireFenceFd as AtomicInteger with takeAcquireFenceFd (read-and-clear) + setAcquireFenceFd (closes prior FD on overwrite) — forward-compatible API for future X11 Present wait_fence plumbing; today always -1, documented as empirical observation rather than a Mesa contract. * GPUImage.getHardwareBufferPtr() — public accessor for the underlying AHardwareBuffer*, used by the renderer hook. * GLRenderer.directCompositionTarget (volatile) + setDirectCompositionTarget setter; per-frame maybePushDirectComposition under Drawable.renderLock with last-pushed cache to avoid per-frame transaction storm + a consecutive-failure counter that self-detaches the target after DC_FAIL_LIMIT to stop wasting JNI calls on permanent failure; maybeHideDirectComposition on direct->fallback transition, idempotent and cache-invalidating. * XServerDisplayActivity.installDirectCompositionLifecycle — SurfaceHolder.Callback registered against XServerView's holder; if the surface is already valid by the time we register (GLSurfaceView's GLThread can fire surfaceCreated before setupUI returns), synthesize the initial dispatch so we don't miss the first attach; on surfaceDestroyed clear the renderer pointer BEFORE releasing the layer; onDestroy removes the callback, clears the renderer pointer, releases layer + test buffer, in that order. * Magnifier-UI overlay forces fallback (and immediate hide) so the GL-rendered overlay isn't hidden under the SC layer at z=1. Phase 3 — flicker / fence / overlays: * Reverted Phase 2.5's GL-skip optimization. The actual perf win is HWC overlay-plane scanout (DPU instead of GPU), not skipping the GL render. Always-render keeps the GLSurfaceView backbuffer in lockstep with the SC layer, so a stale-frame reveal during direct->fallback transition is impossible — whatever SF reveals when SC hides is the same frame the user is already seeing. * Cursor handling: cursor is hidden by the activity for fullscreen contexts (renderer.setCursorVisible(false) at setupUI), and direct mode requires fullscreen, so cursor-under-SC is moot in practice. Containment: * Toggle off -> zero behavior change vs. pre-DC. Same code path as today. * NDK unavailable (API < 29 or stripped libandroid) -> silent fallback to GL path, no crashes. dlopen + dlsym means the shared library still loads on minSdk 26. * SurfaceControl.isAvailable cached after first probe. --- app/src/main/cpp/CMakeLists.txt | 1 + .../main/cpp/winlator/surface_compositor.c | 550 ++++++++++++++++++ app/src/main/feature/library/GameSettings.kt | 20 + .../ContainerSettingsComposeDialog.kt | 9 + app/src/main/res/values/strings.xml | 2 + app/src/main/runtime/container/Container.java | 26 + .../display/XServerDisplayActivity.java | 265 +++++++++ .../composition/DirectCompositionLayer.java | 228 ++++++++ .../composition/SurfaceCompositor.java | 88 +++ .../runtime/display/renderer/GLRenderer.java | 250 +++++++- .../runtime/display/renderer/GPUImage.java | 17 + .../runtime/display/xserver/Drawable.java | 80 ++- 12 files changed, 1533 insertions(+), 3 deletions(-) create mode 100644 app/src/main/cpp/winlator/surface_compositor.c create mode 100644 app/src/main/runtime/display/composition/DirectCompositionLayer.java create mode 100644 app/src/main/runtime/display/composition/SurfaceCompositor.java diff --git a/app/src/main/cpp/CMakeLists.txt b/app/src/main/cpp/CMakeLists.txt index c5db5dece..9d5f75846 100644 --- a/app/src/main/cpp/CMakeLists.txt +++ b/app/src/main/cpp/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(adrenotools) add_library(winlator SHARED winlator/drawable.c winlator/gpu_image.c + winlator/surface_compositor.c winlator/sysvshared_memory.c winlator/xconnector_epoll.c winlator/alsa_client.c diff --git a/app/src/main/cpp/winlator/surface_compositor.c b/app/src/main/cpp/winlator/surface_compositor.c new file mode 100644 index 000000000..64b97e4e2 --- /dev/null +++ b/app/src/main/cpp/winlator/surface_compositor.c @@ -0,0 +1,550 @@ +// JNI wrapper around Android's ASurfaceControl / ASurfaceTransaction NDK API +// (libandroid.so, API 29+). Phase-by-phase scope: +// * Phase 1 — `nativeIsAvailable` probe; nothing else. +// * Phase 2.1 — lifecycle: create a child ASurfaceControl bound to the +// XServerView's SurfaceView, hide it, parent it to the SurfaceView's +// layer, expose attach/detach/setColor/release. +// * Phase 2.2+ — buffer push, sync fence, real game frames. +// +// Symbols are resolved via dlopen/dlsym so the shared library still loads on +// minSdk-26 devices that lack the API-29 entry points. Calling any resolved +// pointer on a pre-API-29 device is gated by the Java side checking +// `isAvailable()` first. +// +// Quoting the NDK documentation referenced while writing this: +// * `ASurfaceControl_createFromWindow` (surface_control.h:50-65) — caller +// owns the returned ASurfaceControl and must release it. +// * `ASurfaceTransaction_reparent` (surface_control.h:298-307) — passing +// a null new_parent removes the surface from the display. +// * `ASurfaceTransaction_setVisibility` (surface_control.h:323) — HIDE/SHOW. +// * `ASurfaceTransaction_setZOrder` (surface_control.h:329-339) — relative +// to siblings; default is 0; behaviour with same z is undefined. +// * `ASurfaceTransaction_setColor` (surface_control.h:359-370) — sets the +// background color for a layer that has no buffer; useful as a Phase 2.2 +// proof-of-life and to avoid the "blank initial frame" race when a fresh +// SurfaceControl is shown before its first real buffer arrives. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LOG_TAG "SurfaceCompositor" +#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__) +#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__) +#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__) + +// Opaque NDK types — we never dereference these, we just pass them around. +struct ASurfaceControl; +struct ASurfaceTransaction; + +// Mirror of `enum ASurfaceTransactionVisibility` (surface_control.h:312-315). +// Hard-coded so we don't need to include (which +// would fail to compile on minSdk-26 toolchains for direct symbol references). +#define DC_VISIBILITY_HIDE ((int8_t)0) +#define DC_VISIBILITY_SHOW ((int8_t)1) + +// Function-pointer typedefs for every libandroid.so symbol we use. Kept in +// the order they're documented in surface_control.h for easy cross-reference. +typedef struct ASurfaceControl* (*pfn_ASurfaceControl_createFromWindow)( + ANativeWindow* parent, const char* debug_name); +typedef void (*pfn_ASurfaceControl_release)(struct ASurfaceControl* sc); +typedef struct ASurfaceTransaction* (*pfn_ASurfaceTransaction_create)(void); +typedef void (*pfn_ASurfaceTransaction_delete)(struct ASurfaceTransaction* t); +typedef void (*pfn_ASurfaceTransaction_apply)(struct ASurfaceTransaction* t); +typedef void (*pfn_ASurfaceTransaction_reparent)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + struct ASurfaceControl* new_parent); +typedef void (*pfn_ASurfaceTransaction_setVisibility)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + int8_t visibility); +typedef void (*pfn_ASurfaceTransaction_setZOrder)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + int32_t z_order); +typedef void (*pfn_ASurfaceTransaction_setColor)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + float r, float g, float b, float alpha, + int dataspace /* ADataSpace */); +typedef void (*pfn_ASurfaceTransaction_setBuffer)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + AHardwareBuffer* buffer, + int acquire_fence_fd); +// API-29 geometry fallback (deprecated but always present on API 29-30). +typedef void (*pfn_ASurfaceTransaction_setGeometry)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + const ARect* source, + const ARect* destination, + int32_t transform); +// API-31+ preferred geometry. When all four are present we prefer this path +// per surface_control.h:387-391 ("setGeometry deprecated; use setCrop, +// setPosition, setBufferTransform, setScale instead"). +typedef void (*pfn_ASurfaceTransaction_setPosition)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + int32_t x, int32_t y); +typedef void (*pfn_ASurfaceTransaction_setScale)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + float xScale, float yScale); +typedef void (*pfn_ASurfaceTransaction_setCrop)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + const ARect* crop); +typedef void (*pfn_ASurfaceTransaction_setBufferTransform)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + int32_t transform); + +// One-shot init under mutex. After init completes, all g_* function pointers +// are effectively const for the rest of the process and can be read without +// further locking. +static pthread_mutex_t g_init_mutex = PTHREAD_MUTEX_INITIALIZER; +static bool g_initialised = false; +static bool g_available = false; +static void* g_libandroid = NULL; + +static pfn_ASurfaceControl_createFromWindow g_create_from_window = NULL; +static pfn_ASurfaceControl_release g_sc_release = NULL; +static pfn_ASurfaceTransaction_create g_tx_create = NULL; +static pfn_ASurfaceTransaction_delete g_tx_delete = NULL; +static pfn_ASurfaceTransaction_apply g_tx_apply = NULL; +static pfn_ASurfaceTransaction_reparent g_tx_reparent = NULL; +static pfn_ASurfaceTransaction_setVisibility g_tx_set_visibility = NULL; +static pfn_ASurfaceTransaction_setZOrder g_tx_set_zorder = NULL; +static pfn_ASurfaceTransaction_setColor g_tx_set_color = NULL; +static pfn_ASurfaceTransaction_setBuffer g_tx_set_buffer = NULL; +static pfn_ASurfaceTransaction_setGeometry g_tx_set_geometry = NULL; +static pfn_ASurfaceTransaction_setPosition g_tx_set_position = NULL; +static pfn_ASurfaceTransaction_setScale g_tx_set_scale = NULL; +static pfn_ASurfaceTransaction_setCrop g_tx_set_crop = NULL; +static pfn_ASurfaceTransaction_setBufferTransform g_tx_set_buffer_transform = NULL; + +// `__typeof__` is the documented-extension spelling that doesn't trip +// `-Wgnu-typeof-extension` under pedantic Clang flags. Equivalent to GCC/C23 +// `typeof` in every case we use it. +#define RESOLVE(target, name) do { \ + void* sym = dlsym(g_libandroid, (name)); \ + (target) = (__typeof__(target))sym; \ + } while (0) + +static void init_once_locked(void) { + if (g_initialised) return; + g_initialised = true; + + g_libandroid = dlopen("libandroid.so", RTLD_NOW); + if (g_libandroid == NULL) { + LOGW("dlopen(libandroid.so) failed: %s", dlerror()); + return; + } + + RESOLVE(g_create_from_window, "ASurfaceControl_createFromWindow"); + RESOLVE(g_sc_release, "ASurfaceControl_release"); + RESOLVE(g_tx_create, "ASurfaceTransaction_create"); + RESOLVE(g_tx_delete, "ASurfaceTransaction_delete"); + RESOLVE(g_tx_apply, "ASurfaceTransaction_apply"); + RESOLVE(g_tx_reparent, "ASurfaceTransaction_reparent"); + RESOLVE(g_tx_set_visibility, "ASurfaceTransaction_setVisibility"); + RESOLVE(g_tx_set_zorder, "ASurfaceTransaction_setZOrder"); + RESOLVE(g_tx_set_color, "ASurfaceTransaction_setColor"); + RESOLVE(g_tx_set_buffer, "ASurfaceTransaction_setBuffer"); + RESOLVE(g_tx_set_geometry, "ASurfaceTransaction_setGeometry"); + // Optional API-31+ symbols — null on API 29/30, in which case we fall back + // to setGeometry. Not part of the availability gate. + RESOLVE(g_tx_set_position, "ASurfaceTransaction_setPosition"); + RESOLVE(g_tx_set_scale, "ASurfaceTransaction_setScale"); + RESOLVE(g_tx_set_crop, "ASurfaceTransaction_setCrop"); + RESOLVE(g_tx_set_buffer_transform, "ASurfaceTransaction_setBufferTransform"); + + // Phase-1 lifecycle symbols + setBuffer + at least one COMPLETE geometry + // path are mandatory. The modern path requires all three of + // setPosition+setScale+setCrop together — accepting setPosition alone + // would leave us with no scaling primitive and silently render at the + // wrong size on a hypothetical device that ships only the position + // symbol. Fall back to setGeometry whenever any of the trio is missing. + bool modern_geom_complete = (g_tx_set_position != NULL) && + (g_tx_set_scale != NULL) && + (g_tx_set_crop != NULL); + bool legacy_geom = (g_tx_set_geometry != NULL); + g_available = (g_create_from_window != NULL) && (g_sc_release != NULL) && + (g_tx_create != NULL) && (g_tx_delete != NULL) && + (g_tx_apply != NULL) && (g_tx_reparent != NULL) && + (g_tx_set_visibility != NULL) && (g_tx_set_zorder != NULL) && + (g_tx_set_color != NULL) && (g_tx_set_buffer != NULL) && + (modern_geom_complete || legacy_geom); + if (g_available) { + LOGI("Direct Composition NDK symbols resolved (geom=%s)", + modern_geom_complete ? "API31+" : "API29 setGeometry"); + } else { + LOGW("Direct Composition NDK symbols missing (API < 29 or stripped libandroid)"); + } +} + +static bool ensure_initialised(void) { + pthread_mutex_lock(&g_init_mutex); + init_once_locked(); + bool available = g_available; + pthread_mutex_unlock(&g_init_mutex); + return available; +} + +// --------------------------------------------------------------------------- +// JNI: nativeIsAvailable() — Phase 1 probe, unchanged in Phase 2. +// --------------------------------------------------------------------------- +JNIEXPORT jboolean JNICALL +Java_com_winlator_cmod_runtime_display_composition_SurfaceCompositor_nativeIsAvailable( + JNIEnv* env, jclass clazz) { + (void)env; + (void)clazz; + return ensure_initialised() ? JNI_TRUE : JNI_FALSE; +} + +// --------------------------------------------------------------------------- +// JNI: nativeAttachToSurface(Surface) -> jlong (ASurfaceControl*) +// +// Creates a child SurfaceControl bound to the SurfaceView's ANativeWindow. +// Initial state is HIDDEN with z-order 1 (above the SurfaceView's primary +// BufferQueue, which sits at the default z=0). Subsequent transactions +// (Phase 2.2+) flip visibility on and push buffers. +// +// On any failure returns 0 and the Java caller falls back to the GLRenderer +// path. The ANativeWindow is acquired and released within this call — the +// returned ASurfaceControl holds its own reference to the underlying +// SurfaceFlinger layer via the parent layer relationship. +// --------------------------------------------------------------------------- +JNIEXPORT jlong JNICALL +Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_nativeAttachToSurface( + JNIEnv* env, jclass clazz, jobject surface) { + (void)clazz; + if (!ensure_initialised()) { + LOGW("attachToSurface called but NDK is unavailable"); + return 0; + } + if (surface == NULL) { + LOGE("attachToSurface called with null Surface"); + return 0; + } + + ANativeWindow* window = ANativeWindow_fromSurface(env, surface); + if (window == NULL) { + LOGE("ANativeWindow_fromSurface returned null"); + return 0; + } + + struct ASurfaceControl* sc = g_create_from_window(window, "winnative-direct-composition"); + // ANativeWindow_fromSurface incremented the window's refcount; release our + // ref now — the SurfaceControl holds its own internal reference to the + // SurfaceFlinger layer that the window referenced. + ANativeWindow_release(window); + + if (sc == NULL) { + LOGE("ASurfaceControl_createFromWindow returned null"); + return 0; + } + + // Initial transaction: hidden and z=1 (above the SurfaceView's primary BQ + // which is z=0). Per surface_control.h:323-326 a fresh SurfaceControl + // starts hidden by default, but applying the explicit setVisibility(HIDE) + // here makes the contract observable on the SurfaceFlinger side and + // guarantees we don't get a one-frame flash of an uninitialised layer. + struct ASurfaceTransaction* tx = g_tx_create(); + if (tx == NULL) { + LOGE("ASurfaceTransaction_create returned null; releasing SC"); + g_sc_release(sc); + return 0; + } + g_tx_set_visibility(tx, sc, DC_VISIBILITY_HIDE); + g_tx_set_zorder(tx, sc, 1); + g_tx_apply(tx); + g_tx_delete(tx); + + LOGI("Direct Composition layer attached (sc=%p)", (void*)sc); + return (jlong)(uintptr_t)sc; +} + +// --------------------------------------------------------------------------- +// JNI: nativeDetachAndRelease(jlong sc) -> void +// +// Reparents the SurfaceControl to null in a transaction, applies, then +// releases. Per the agent research and Chromium's +// android_surface_control_compat.cc convention, reparent-to-null *must* +// happen before release, otherwise SurfaceFlinger may keep the orphaned +// layer alive briefly past the parent's destruction and produce ghost +// frames on re-attach. +// --------------------------------------------------------------------------- +JNIEXPORT void JNICALL +Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_nativeDetachAndRelease( + JNIEnv* env, jclass clazz, jlong sc_ptr) { + (void)env; + (void)clazz; + if (sc_ptr == 0) return; + if (!ensure_initialised()) { + // Should be impossible — the layer wouldn't exist if init had failed — + // but be defensive and don't dereference unresolved symbols. + LOGE("detachAndRelease called but NDK is unavailable; leaking SC=%p", + (void*)(uintptr_t)sc_ptr); + return; + } + struct ASurfaceControl* sc = (struct ASurfaceControl*)(uintptr_t)sc_ptr; + + struct ASurfaceTransaction* tx = g_tx_create(); + if (tx != NULL) { + g_tx_reparent(tx, sc, NULL); + g_tx_apply(tx); + g_tx_delete(tx); + } else { + LOGW("detachAndRelease: tx_create failed; releasing without reparent"); + } + g_sc_release(sc); + LOGI("Direct Composition layer released (sc=%p)", (void*)sc); +} + +// --------------------------------------------------------------------------- +// JNI: nativeSetColor(jlong sc, float r, float g, float b, float a) -> void +// +// Phase 2.1 proof-of-life. Paints a solid color on the layer and unhides it +// with the same transaction (atomic — avoids the documented "blank initial +// frame" race). Useful as a smoke test that the lifecycle is wired correctly +// before Phase 2.2 plumbs real AHardwareBuffer content. +// --------------------------------------------------------------------------- +JNIEXPORT void JNICALL +Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_nativeSetColor( + JNIEnv* env, jclass clazz, jlong sc_ptr, + jfloat r, jfloat g, jfloat b, jfloat a) { + (void)env; + (void)clazz; + if (sc_ptr == 0 || !ensure_initialised()) return; + struct ASurfaceControl* sc = (struct ASurfaceControl*)(uintptr_t)sc_ptr; + + struct ASurfaceTransaction* tx = g_tx_create(); + if (tx == NULL) { + LOGE("setColor: tx_create failed"); + return; + } + g_tx_set_color(tx, sc, r, g, b, a, ADATASPACE_SRGB); + g_tx_set_visibility(tx, sc, DC_VISIBILITY_SHOW); + g_tx_apply(tx); + g_tx_delete(tx); +} + +// --------------------------------------------------------------------------- +// JNI: nativeHide(jlong sc) -> void +// +// Hides the layer (used when falling back to the GLRenderer path on a frame +// where direct-scanout doesn't qualify, see Phase 2.5). +// --------------------------------------------------------------------------- +JNIEXPORT void JNICALL +Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_nativeHide( + JNIEnv* env, jclass clazz, jlong sc_ptr) { + (void)env; + (void)clazz; + if (sc_ptr == 0 || !ensure_initialised()) return; + struct ASurfaceControl* sc = (struct ASurfaceControl*)(uintptr_t)sc_ptr; + + struct ASurfaceTransaction* tx = g_tx_create(); + if (tx == NULL) return; + g_tx_set_visibility(tx, sc, DC_VISIBILITY_HIDE); + g_tx_apply(tx); + g_tx_delete(tx); +} + +// --------------------------------------------------------------------------- +// JNI: nativePushBuffer(sc, ahb, x, y, w, h, fence_fd) -> jboolean +// +// Phase 2.2: hand an AHardwareBuffer-backed image to the SurfaceControl in +// one transaction. The transaction also positions/sizes the layer in the +// SurfaceView's coordinate space and unhides it (atomic — same transaction +// avoids the documented "blank initial frame" race when transitioning from +// hidden to first-buffer). +// +// Geometry path: +// * Prefer setPosition + setScale + setCrop + setBufferTransform (API 31+) +// * Fall back to deprecated setGeometry (API 29-30) +// +// `acquire_fence_fd` semantics per surface_control.h:343-348: framework +// takes ownership and closes it. Pass -1 when no GPU writes are pending +// (e.g. the test buffer that was filled on the CPU before we got here). +// --------------------------------------------------------------------------- +JNIEXPORT jboolean JNICALL +Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_nativePushBuffer( + JNIEnv* env, jclass clazz, jlong sc_ptr, jlong ahb_ptr, + jint dst_x, jint dst_y, jint dst_w, jint dst_h, jint acquire_fence_fd) { + (void)env; + (void)clazz; + if (sc_ptr == 0 || ahb_ptr == 0) { + // We promised the framework that we'd close any fence FD we received, + // even on the failure path — otherwise we leak FDs. + if (acquire_fence_fd >= 0) close(acquire_fence_fd); + return JNI_FALSE; + } + if (!ensure_initialised()) { + if (acquire_fence_fd >= 0) close(acquire_fence_fd); + return JNI_FALSE; + } + if (dst_w <= 0 || dst_h <= 0) { + LOGW("pushBuffer: invalid dst rect %dx%d", dst_w, dst_h); + if (acquire_fence_fd >= 0) close(acquire_fence_fd); + return JNI_FALSE; + } + + struct ASurfaceControl* sc = (struct ASurfaceControl*)(uintptr_t)sc_ptr; + AHardwareBuffer* ahb = (AHardwareBuffer*)(uintptr_t)ahb_ptr; + + // Source rect = the entire buffer extents — query AHB for its native dims. + AHardwareBuffer_Desc desc; + memset(&desc, 0, sizeof(desc)); + AHardwareBuffer_describe(ahb, &desc); + if (desc.width == 0 || desc.height == 0) { + LOGW("pushBuffer: AHB has zero extents (%ux%u)", desc.width, desc.height); + if (acquire_fence_fd >= 0) close(acquire_fence_fd); + return JNI_FALSE; + } + + struct ASurfaceTransaction* tx = g_tx_create(); + if (tx == NULL) { + LOGE("pushBuffer: tx_create failed"); + if (acquire_fence_fd >= 0) close(acquire_fence_fd); + return JNI_FALSE; + } + + // setBuffer takes ownership of acquire_fence_fd. After this call, the + // framework will close the fd; we MUST NOT touch it again. + g_tx_set_buffer(tx, sc, ahb, acquire_fence_fd); + + // Geometry. The modern path lets us crop and scale independently; if + // unavailable on the device's libandroid, fall back to setGeometry. + if (g_tx_set_position != NULL && g_tx_set_scale != NULL && g_tx_set_crop != NULL) { + ARect crop = { 0, 0, (int32_t)desc.width, (int32_t)desc.height }; + g_tx_set_crop(tx, sc, &crop); + g_tx_set_position(tx, sc, dst_x, dst_y); + float xs = (float)dst_w / (float)desc.width; + float ys = (float)dst_h / (float)desc.height; + g_tx_set_scale(tx, sc, xs, ys); + if (g_tx_set_buffer_transform != NULL) { + g_tx_set_buffer_transform(tx, sc, 0); // no transform + } + } else if (g_tx_set_geometry != NULL) { + ARect src = { 0, 0, (int32_t)desc.width, (int32_t)desc.height }; + ARect dst = { dst_x, dst_y, dst_x + dst_w, dst_y + dst_h }; + g_tx_set_geometry(tx, sc, &src, &dst, 0); + } else { + LOGE("pushBuffer: no geometry function available — should be impossible past availability gate"); + } + + // Unhide in the same transaction so the very first frame is the buffer + // we just supplied, not a blank/uninit layer. + g_tx_set_visibility(tx, sc, DC_VISIBILITY_SHOW); + g_tx_apply(tx); + g_tx_delete(tx); + return JNI_TRUE; +} + +// --------------------------------------------------------------------------- +// JNI: nativeAllocateTestBuffer(width, height, argb_color) -> jlong +// +// Phase 2.2 smoke-test helper: allocates an AHardwareBuffer and CPU-fills it +// with a single colour. Used so we can prove the SurfaceControl path is alive +// (a small magenta swatch on top of the X server) before plumbing real Wine +// frames in Phase 2.3. +// +// Format / usage: RGBA_8888 + GPU_SAMPLED_IMAGE + CPU_WRITE_RARELY + +// COMPOSER_OVERLAY. Per surface_control.h:343-345 setBuffer requires +// GPU_SAMPLED_IMAGE; COMPOSER_OVERLAY is a hint to gralloc that the buffer +// may be scanned out by the display controller. +// --------------------------------------------------------------------------- +JNIEXPORT jlong JNICALL +Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_nativeAllocateTestBuffer( + JNIEnv* env, jclass clazz, jint width, jint height, jint argb_color) { + (void)env; + (void)clazz; + if (width <= 0 || height <= 0) return 0; + + // Try the ideal flag set first: GPU sampling, CPU write (so we can fill + // the buffer in software), and COMPOSER_OVERLAY (hint to gralloc that + // this buffer should be eligible for HWC overlay-plane scanout). Some + // gralloc implementations on recent Adreno devices reject the + // CPU_WRITE + COMPOSER_OVERLAY combo — in that case fall back to a + // CPU-only buffer. We lose the overlay hint, but the smoke test still + // proves the SurfaceControl path is alive. + AHardwareBuffer_Desc desc; + memset(&desc, 0, sizeof(desc)); + desc.width = (uint32_t)width; + desc.height = (uint32_t)height; + desc.layers = 1; + desc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; + desc.usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE + | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY + | AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY; + + AHardwareBuffer* ahb = NULL; + int rc = AHardwareBuffer_allocate(&desc, &ahb); + if (rc != 0 || ahb == NULL) { + LOGW("allocateTestBuffer: GPU+CPU+OVERLAY failed (rc=%d), retrying without OVERLAY", rc); + desc.usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE + | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY; + rc = AHardwareBuffer_allocate(&desc, &ahb); + if (rc != 0 || ahb == NULL) { + LOGW("allocateTestBuffer: both flag combos failed (rc=%d) for %dx%d", + rc, width, height); + return 0; + } + } + + void* mapped = NULL; + if (AHardwareBuffer_lock( + ahb, AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, + -1, NULL, &mapped) != 0 || mapped == NULL) { + LOGW("allocateTestBuffer: AHardwareBuffer_lock failed"); + AHardwareBuffer_release(ahb); + return 0; + } + + // After lock we need the actual stride from gralloc — re-describe. + AHardwareBuffer_Desc realDesc; + memset(&realDesc, 0, sizeof(realDesc)); + AHardwareBuffer_describe(ahb, &realDesc); + + // Convert ARGB jint to little-endian RGBA u32. AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM + // is byte-packed: R, G, B, A. Java jint = 0xAARRGGBB. Layout the bytes as R,G,B,A. + uint8_t a = (uint8_t)((argb_color >> 24) & 0xFF); + uint8_t r = (uint8_t)((argb_color >> 16) & 0xFF); + uint8_t g = (uint8_t)((argb_color >> 8) & 0xFF); + uint8_t b = (uint8_t)((argb_color ) & 0xFF); + + uint8_t* base = (uint8_t*)mapped; + for (uint32_t y = 0; y < realDesc.height; ++y) { + uint8_t* row = base + (size_t)y * (size_t)realDesc.stride * 4u; + for (uint32_t x = 0; x < realDesc.width; ++x) { + row[x*4 + 0] = r; + row[x*4 + 1] = g; + row[x*4 + 2] = b; + row[x*4 + 3] = a; + } + } + + if (AHardwareBuffer_unlock(ahb, NULL) != 0) { + LOGW("allocateTestBuffer: AHardwareBuffer_unlock failed"); + // Keep the buffer anyway; SurfaceFlinger doesn't care about lock state. + } + return (jlong)(uintptr_t)ahb; +} + +// --------------------------------------------------------------------------- +// JNI: nativeReleaseBuffer(ahbPtr) -> void +// +// Drops our reference to a test AHardwareBuffer. SurfaceFlinger may still +// hold a ref if the buffer is the layer's current setBuffer — that's OK, +// AHardwareBuffer is reference-counted and the layer's ref is independent. +// --------------------------------------------------------------------------- +JNIEXPORT void JNICALL +Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_nativeReleaseBuffer( + JNIEnv* env, jclass clazz, jlong ahb_ptr) { + (void)env; + (void)clazz; + if (ahb_ptr == 0) return; + AHardwareBuffer_release((AHardwareBuffer*)(uintptr_t)ahb_ptr); +} diff --git a/app/src/main/feature/library/GameSettings.kt b/app/src/main/feature/library/GameSettings.kt index 754b9aa7b..902188f7f 100644 --- a/app/src/main/feature/library/GameSettings.kt +++ b/app/src/main/feature/library/GameSettings.kt @@ -217,6 +217,14 @@ class GameSettingsStateHolder { val dxWrapperEntries = mutableStateOf>(emptyList()) val selectedDxWrapper = mutableIntStateOf(0) + // Per-container "Direct Composition" toggle. When true and the device exposes + // SurfaceControl (API 29+, see SurfaceCompositor.isAvailable()), fullscreen + // direct-scanout drawables are routed to a sibling Android SurfaceControl + // layer (HWC/DPU overlay plane) instead of being composited by the in-process + // GLRenderer. The toggle is sampled at activity startup and held for the + // session — only takes effect on the next launch of the container. + val directComposition = mutableStateOf(false) + // Graphics Driver Configuration (inline card) val gfxConfigExpanded = mutableStateOf(false) val gfxVulkanVersionEntries = mutableStateOf>(emptyList()) @@ -3219,6 +3227,18 @@ private fun AdvancedSection( checked = state.fullscreenStretched.value, onCheckedChange = { state.fullscreenStretched.value = it } ) + + // Direct Composition is currently a container-level toggle (sampled at + // activity startup, held for the session). Hide from shortcut-edit + // sheets to avoid suggesting it can be flipped per-launch. + if (state.isContainerEditMode.value) { + Spacer(Modifier.height(SettingItemGap)) + SettingCheckbox( + label = stringResource(R.string.session_display_direct_composition), + checked = state.directComposition.value, + onCheckedChange = { state.directComposition.value = it } + ) + } } Spacer(Modifier.height(SettingSectionGap)) diff --git a/app/src/main/feature/settings/containers/ContainerSettingsComposeDialog.kt b/app/src/main/feature/settings/containers/ContainerSettingsComposeDialog.kt index 6085bc1e2..7ffd9400d 100644 --- a/app/src/main/feature/settings/containers/ContainerSettingsComposeDialog.kt +++ b/app/src/main/feature/settings/containers/ContainerSettingsComposeDialog.kt @@ -378,6 +378,7 @@ class ContainerSettingsComposeDialog @JvmOverloads constructor( } state.fullscreenStretched.value = c?.isFullscreenStretched() ?: false + state.directComposition.value = c?.isDirectCompositionEnabled() ?: false // Steam fields are shortcut-only in the UI; leave any existing steam // state on the container untouched — saveSettings() skips them. @@ -753,6 +754,7 @@ class ContainerSettingsComposeDialog @JvmOverloads constructor( c.setWinComponents(wincomponents) c.setDrives(drivesString) c.setFullscreenStretched(state.fullscreenStretched.value) + c.setDirectCompositionEnabled(state.directComposition.value) c.setInputType(finalInputType) c.setStartupSelection(startupSelection) c.setBox64Version(box64Version) @@ -805,6 +807,13 @@ class ContainerSettingsComposeDialog @JvmOverloads constructor( manager.createContainerAsync(data, contentsManager) { newContainer -> if (newContainer != null) { saveMouseWarpOverride(newContainer) + // Persist Direct Composition on the new container — it's stored as + // an extraData entry, not a top-level JSON field, so it doesn't + // ride along in the `data` map handed to createContainerAsync. + if (state.directComposition.value) { + newContainer.setDirectCompositionEnabled(true) + newContainer.saveData() + } } else { WinToast.show(context, R.string.setup_wizard_unable_to_install_system_files) } diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index 79f059d47..8a6cdddfb 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -724,6 +724,8 @@ E.g. META for META key, \n Frame Rate Display Enable Fullscreen (Stretched) + Direct Composition (experimental) + Route fullscreen game frames straight to an Android SurfaceControl layer (HWC/DPU overlay) instead of compositing them in-process. Lower latency and battery use when supported. Applies on next launch. FPS: Renderer: GPU: diff --git a/app/src/main/runtime/container/Container.java b/app/src/main/runtime/container/Container.java index 9f289015b..4752b92fd 100644 --- a/app/src/main/runtime/container/Container.java +++ b/app/src/main/runtime/container/Container.java @@ -82,6 +82,16 @@ public class Container { public static final String STEAM_TYPE_LIGHT = "light"; public static final String STEAM_TYPE_ULTRALIGHT = "ultralight"; + /** + * extraData JSON key for the per-container "Direct Composition" toggle. + * Stored as a string ("1"/"0") for symmetry with the rest of extraData. The + * setting is read at activity startup and applies for the whole session — it + * controls whether fullscreen drawables are pushed to a sibling Android + * SurfaceControl layer (zero-copy DPU scanout) instead of being composited + * by the in-process GLRenderer. Changing it mid-game is not supported. + */ + public static final String EXTRA_DIRECT_COMPOSITION = "directComposition"; + private ContainerManager containerManager; @@ -328,6 +338,22 @@ public void putExtra(String name, Object value) { catch (JSONException e) {} } + /** + * Whether this container should route fullscreen direct-scanout drawables to a + * sibling Android `SurfaceControl` layer (HWC overlay plane / DPU scanout) + * instead of having `GLRenderer` composite them on the EGL surface. Default off. + * + * The setting is sampled once at activity startup and held for the session. Toggling + * it has no effect on a running game; the user must relaunch the container. + */ + public boolean isDirectCompositionEnabled() { + return "1".equals(getExtra(EXTRA_DIRECT_COMPOSITION, "0")); + } + + public void setDirectCompositionEnabled(boolean enabled) { + putExtra(EXTRA_DIRECT_COMPOSITION, enabled ? "1" : "0"); + } + public String getWineVersion() { return wineVersion; } diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index ea1aeb298..97f3569c0 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -30,6 +30,8 @@ import android.view.KeyEvent; import android.view.MotionEvent; import android.view.PointerIcon; +import android.view.Surface; +import android.view.SurfaceHolder; import android.view.View; import android.view.ViewGroup; import android.widget.AdapterView; @@ -241,6 +243,28 @@ public class XServerDisplayActivity extends FixedFontScaleAppCompatActivity { "cmd" )); private XServerView xServerView; + /** Per-activity Direct Composition layer (Phase 2.1 — lifecycle only). + * Allocated when the host SurfaceView reports surfaceCreated AND the per-container + * toggle + NDK availability check both pass. Released in surfaceDestroyed. + *

volatile because Phase 2.2+ will read this from the GLRenderer / GLThread + * while the SurfaceHolder.Callback writes it from the UI thread. The field's + * only writer remains the UI thread; volatile establishes the happens-before + * relationship for cross-thread reads. */ + private volatile com.winlator.cmod.runtime.display.composition.DirectCompositionLayer directCompositionLayer; + /** Anonymous {@link SurfaceHolder.Callback} retained so it can be removed in + * {@code onDestroy} — otherwise the SurfaceHolder retains the activity through + * the implicit outer-class reference held by the inner class. */ + private SurfaceHolder.Callback directCompositionCallback; + /** Phase 2.2 smoke-test AHardwareBuffer pointer. Allocated once after + * successful attach so we can verify the SurfaceControl path is alive + * end-to-end (visible magenta swatch in the bottom-right corner). Phase + * 2.3 replaces the test buffer with real Wine frames; this field and its + * lifecycle plumbing should be removed when the real path is wired up. */ + private long directCompositionTestBufferPtr = 0L; + /** Mirrors {@link Container#isDirectCompositionEnabled()} sampled once at activity setup. */ + private boolean directCompositionRequested; + /** Set by {@link com.winlator.cmod.runtime.display.composition.SurfaceCompositor#isAvailable()}. */ + private boolean directCompositionSupported; private InputControlsView inputControlsView; private TouchpadView touchpadView; private XEnvironment environment; @@ -2880,6 +2904,36 @@ protected void onDestroy() { if (preloaderDialog != null) { preloaderDialog.close(); } + // Tear down the Direct Composition layer before super.onDestroy() — this + // path covers process-death-style activity teardown where the SurfaceView's + // surfaceDestroyed callback may not fire (or fired already). Removing the + // SurfaceHolder.Callback also drops the implicit outer-class reference + // the anonymous callback holds on this activity. + if (xServerView != null && directCompositionCallback != null) { + SurfaceHolder holder = xServerView.getHolder(); + if (holder != null) { + holder.removeCallback(directCompositionCallback); + } + directCompositionCallback = null; + } + // Clear the renderer's pointer first — same reason as in + // surfaceDestroyed: the GLThread must see null before the layer is + // released, otherwise a frame in flight could dereference a freed SC. + if (xServerView != null && xServerView.getRenderer() != null) { + xServerView.getRenderer().setDirectCompositionTarget(null); + } + com.winlator.cmod.runtime.display.composition.DirectCompositionLayer dcLayer = + directCompositionLayer; + directCompositionLayer = null; + if (dcLayer != null) { + dcLayer.release(); + } + // Phase 2.2 smoke-test buffer cleanup. + if (directCompositionTestBufferPtr != 0L) { + com.winlator.cmod.runtime.display.composition + .DirectCompositionLayer.releaseBuffer(directCompositionTestBufferPtr); + directCompositionTestBufferPtr = 0L; + } super.onDestroy(); // Schedule a deferred update check 10 s after game exit if (!switchLaunchInProgress.get()) { @@ -4440,6 +4494,23 @@ private void setupUI() { renderer.setCursorVisible(false); renderer.setNativeMode(isNativeRenderingEnabled); + // Sample the per-container "Direct Composition" toggle once at UI setup + // and stash the result on `this` so the SurfaceHolder.Callback below + // (and Phase 2.3+ render hooks) can read it without re-querying. + directCompositionRequested = container != null && container.isDirectCompositionEnabled(); + directCompositionSupported = + com.winlator.cmod.runtime.display.composition.SurfaceCompositor.isAvailable(); + if (directCompositionRequested && !directCompositionSupported) { + Log.w("XServerDisplayActivity", + "Direct Composition requested but SurfaceControl NDK is unavailable on this device — " + + "falling back to GLRenderer composition"); + } else { + Log.i("XServerDisplayActivity", + "Direct Composition mode: requested=" + directCompositionRequested + + " available=" + directCompositionSupported + + " active=" + (directCompositionRequested && directCompositionSupported)); + } + if (shortcut != null) { renderer.setUnviewableWMClasses("explorer.exe"); String savedFpsLimit = shortcut.getExtra("fpsLimit", "0"); @@ -4455,6 +4526,8 @@ private void setupUI() { xServer.setRenderer(renderer); rootView.addView(xServerView); + installDirectCompositionLifecycle(); + globalCursorSpeed = preferences.getFloat("cursor_speed", 1.0f); touchpadView = new TouchpadView(this, xServer, timeoutHandler, hideControlsRunnable); touchpadView.setTapToClickEnabled(isTapToClickEnabled); @@ -4514,7 +4587,199 @@ private void setupUI() { AppUtils.observeSoftKeyboardVisibility(displayHostComposeView, renderer::setScreenOffsetYRelativeToCursor); } + /** + * Phase 2.1 lifecycle wiring for the per-container Direct Composition path. + * + * No-op when the toggle is off OR the device's libandroid.so doesn't expose + * the API-29 SurfaceControl symbols. Otherwise registers a SurfaceHolder + * callback against the XServerView's surface so the {@link + * com.winlator.cmod.runtime.display.composition.DirectCompositionLayer} + * is created the moment the host Surface becomes valid and torn down + * cleanly when the Surface is destroyed (e.g. activity backgrounded). + * + * The lifecycle has to be tied to the SurfaceHolder rather than to + * onCreate/onDestroy because {@code ANativeWindow_fromSurface} returns + * null until the SurfaceView's surface is realised — which is async on + * GLSurfaceView. + */ + private void installDirectCompositionLifecycle() { + if (!directCompositionRequested || !directCompositionSupported) return; + if (xServerView == null) return; + + final SurfaceHolder holder = xServerView.getHolder(); + if (holder == null) { + Log.w("XServerDisplayActivity", + "installDirectCompositionLifecycle: XServerView has no SurfaceHolder; skipping"); + return; + } + Log.i("XServerDisplayActivity", + "installDirectCompositionLifecycle: registering SurfaceHolder.Callback for Direct Composition"); + directCompositionCallback = new SurfaceHolder.Callback() { + @Override + public void surfaceCreated(SurfaceHolder h) { + // Always release any prior layer AND any prior smoke-test + // buffer first — on activity recreation (or in the rare path + // where surfaceCreated fires twice without an intervening + // surfaceDestroyed) the old resources are bound to the + // destroyed parent. Failure to clean up here leaks both the + // SurfaceControl AND the AHardwareBuffer. + com.winlator.cmod.runtime.display.composition.DirectCompositionLayer prior = + directCompositionLayer; + if (prior != null) { + prior.release(); + directCompositionLayer = null; + } + if (directCompositionTestBufferPtr != 0L) { + com.winlator.cmod.runtime.display.composition + .DirectCompositionLayer.releaseBuffer(directCompositionTestBufferPtr); + directCompositionTestBufferPtr = 0L; + } + directCompositionLayer = + com.winlator.cmod.runtime.display.composition + .DirectCompositionLayer.attach(xServerView); + if (directCompositionLayer == null) { + Log.w("XServerDisplayActivity", + "DirectCompositionLayer.attach returned null — falling back to GL path for this session"); + return; + } + + // Phase 2.3: hand the layer to the GLRenderer so per-frame + // push happens automatically when a fullscreen direct-scanout + // drawable is active. xServerView may not have its renderer + // exposed via a getter pre-this-phase; getRenderer() exists + // on XServerView (returns the GLRenderer constructed in its + // ctor). + if (xServerView != null && xServerView.getRenderer() != null) { + xServerView.getRenderer().setDirectCompositionTarget(directCompositionLayer); + } + + // Defer the smoke test until after the SurfaceView has been + // laid out. surfaceCreated can fire before the View tree's + // onLayout — running pushDirectCompositionSmokeTest now would + // see width=0/height=0 and place the swatch in the top-left + // corner over the X-server menu instead of the bottom-right. + // + // The smoke test stays in Phase 2.3 too: it provides + // visible confirmation that the SC path is alive even when + // no AHardwareBuffer-backed game frame is yet flowing + // (e.g. before Wine starts rendering, or while a non-DXVK + // app is up). Once a real fullscreen frame is pushed by the + // renderer, the magenta swatch is replaced — same SC, new + // setBuffer. + if (xServerView != null) { + xServerView.post(XServerDisplayActivity.this::pushDirectCompositionSmokeTest); + } else { + pushDirectCompositionSmokeTest(); + } + } + + @Override + public void surfaceChanged(SurfaceHolder h, int format, int width, int height) { + // Phase 2.1: nothing to do — geometry handling lives in Phase 2.2's + // setBuffer transaction. We re-evaluate position/size when a buffer + // is pushed. + } + + @Override + public void surfaceDestroyed(SurfaceHolder h) { + // Clear the renderer's pointer FIRST so the GLThread doesn't + // dereference a layer that's about to be released. The + // setter is volatile-write; the GLThread's per-frame read + // happens-before the next frame and will see null. + if (xServerView != null && xServerView.getRenderer() != null) { + xServerView.getRenderer().setDirectCompositionTarget(null); + } + com.winlator.cmod.runtime.display.composition.DirectCompositionLayer layer = + directCompositionLayer; + directCompositionLayer = null; + if (layer != null) { + layer.release(); + } + // Also drop the smoke-test AHB here — we'll allocate a fresh + // one if surfaceCreated fires again. SurfaceFlinger may still + // hold its own ref to the buffer (released by the layer's + // reparent-to-null transaction above), so this is safe per + // the AHardwareBuffer ref semantics. + if (directCompositionTestBufferPtr != 0L) { + com.winlator.cmod.runtime.display.composition + .DirectCompositionLayer.releaseBuffer(directCompositionTestBufferPtr); + directCompositionTestBufferPtr = 0L; + } + } + }; + holder.addCallback(directCompositionCallback); + + // SurfaceHolder.addCallback only delivers FUTURE lifecycle callbacks. If + // the SurfaceView's surface is *already* valid by the time we register + // (common: GLSurfaceView's GLThread fires surfaceCreated before + // setupUI returns), the callback would silently miss the initial + // create and Direct Composition would never attach for this session. + // Synthesize the first surfaceCreated when the surface is already live. + Surface existing = holder.getSurface(); + if (existing != null && existing.isValid()) { + Log.i("XServerDisplayActivity", + "installDirectCompositionLifecycle: surface already valid — dispatching synthetic surfaceCreated"); + directCompositionCallback.surfaceCreated(holder); + } + } + + /** + * Phase 2.2 smoke-test: push a small magenta AHardwareBuffer to the + * bottom-right corner of the SurfaceControl layer to visually confirm + * the Direct Composition path is alive and stacked on top of the + * GLRenderer's output. + * + *

Phase 2.3 will replace this with real Wine-frame routing. When that + * happens, this method, the {@code directCompositionTestBufferPtr} field, + * and the corresponding {@code releaseBuffer} call in {@code onDestroy} + * should all be removed. + */ + private void pushDirectCompositionSmokeTest() { + if (directCompositionLayer == null) return; + // Defensive: if we somehow re-enter (shouldn't, but guard against + // any future caller), release the prior test buffer before + // overwriting the field — otherwise the previous AHB leaks. + if (directCompositionTestBufferPtr != 0L) { + com.winlator.cmod.runtime.display.composition + .DirectCompositionLayer.releaseBuffer(directCompositionTestBufferPtr); + directCompositionTestBufferPtr = 0L; + } + // 0xFFFF00FF = opaque magenta. 256x256 swatch — picked so it's large + // enough to be eligible for an HWC overlay plane on Adreno (some + // chipsets reject planes < 100px tall, which would silently fall + // back to GPU client composition). Phase 2.3 will replace this with + // real Wine frames. + final int testWidth = 256; + final int testHeight = 256; + final int testColorArgb = 0xFFFF00FF; + + directCompositionTestBufferPtr = + com.winlator.cmod.runtime.display.composition + .DirectCompositionLayer.allocateTestBuffer(testWidth, testHeight, testColorArgb); + if (directCompositionTestBufferPtr == 0L) { + Log.w("XServerDisplayActivity", + "Direct Composition smoke test: allocateTestBuffer returned 0 — skipping smoke test"); + return; + } + + // Position the swatch in the bottom-right of the SurfaceView. We're + // posted via xServerView.post(), so the View tree's onLayout has + // already run and getWidth/getHeight return real pixel values. + int viewW = xServerView != null ? xServerView.getWidth() : 0; + int viewH = xServerView != null ? xServerView.getHeight() : 0; + int dstX = viewW > testWidth ? viewW - testWidth - 16 : 16; + int dstY = viewH > testHeight ? viewH - testHeight - 16 : 16; + + boolean ok = directCompositionLayer.pushBuffer( + directCompositionTestBufferPtr, + dstX, dstY, testWidth, testHeight, + /* fenceFd */ -1); + Log.i("XServerDisplayActivity", + "Direct Composition smoke test pushBuffer returned " + ok + + " (dst=" + dstX + "," + dstY + " " + testWidth + "x" + testHeight + + " viewW=" + viewW + " viewH=" + viewH + ")"); + } private ActivityResultLauncher controlsEditorActivityResultLauncher = registerForActivityResult( new ActivityResultContracts.StartActivityForResult(), diff --git a/app/src/main/runtime/display/composition/DirectCompositionLayer.java b/app/src/main/runtime/display/composition/DirectCompositionLayer.java new file mode 100644 index 000000000..00197bcf7 --- /dev/null +++ b/app/src/main/runtime/display/composition/DirectCompositionLayer.java @@ -0,0 +1,228 @@ +package com.winlator.cmod.runtime.display.composition; + +import android.util.Log; +import android.view.Surface; +import android.view.SurfaceHolder; +import android.view.SurfaceView; + +/** + * Per-activity wrapper around a single {@code ASurfaceControl} child layer + * that's bound to an {@link XServerView}'s underlying {@link SurfaceView}. + * + *

Lifecycle

+ * The layer is allocated when the host SurfaceView reports + * {@link SurfaceHolder.Callback#surfaceCreated} (or in Phase 2.1's case, the + * activity attaches once after {@code rootView.addView(xServerView)} as a + * one-shot probe — see {@code XServerDisplayActivity.setupDirectComposition}) + * and freed in {@link SurfaceHolder.Callback#surfaceDestroyed}. Operating on a + * released layer is a no-op (defensive: the native side guards against + * {@code sc == 0}). + * + *

Phase 2.1 capabilities

+ * Lifecycle only — attach, set a solid color (proof-of-life), hide, release. + * No buffer push, no fence handling, no game-frame routing yet. The toggle is + * still consumed by {@code XServerDisplayActivity} only for logging. + * + *

Threading

+ * All public methods serialise on {@code this} so concurrent calls from the + * UI thread (lifecycle) and the renderer thread (future buffer pushes) can't + * race the native pointer. The native pointer is read inside the lock and + * passed to JNI, where libandroid.so's own synchronisation takes over. + */ +public final class DirectCompositionLayer { + + static { + // Same pattern as SurfaceCompositor / GPUImage / etc. Idempotent. + System.loadLibrary("winlator"); + } + + private static final String TAG = "DirectCompositionLayer"; + + /** Native {@code ASurfaceControl*} reinterpreted as a {@code jlong}. 0 == released. */ + private long nativeSc; + + private DirectCompositionLayer(long nativeSc) { + this.nativeSc = nativeSc; + } + + /** + * Attach a hidden child SurfaceControl above the given SurfaceView's primary + * buffer queue. Caller must invoke {@link #release()} when the host surface + * is destroyed. + * + * @return a layer handle, or {@code null} if the underlying NDK call failed + * (caller should fall back to the GLRenderer composition path). + */ + public static DirectCompositionLayer attach(SurfaceView host) { + if (host == null) return null; + if (!SurfaceCompositor.isAvailable()) { + // Shouldn't normally happen — the activity is supposed to call + // SurfaceCompositor.isAvailable() before instantiating this class — + // but be defensive. + Log.w(TAG, "attach() called but SurfaceCompositor is unavailable"); + return null; + } + SurfaceHolder holder = host.getHolder(); + if (holder == null) { + Log.w(TAG, "attach() — SurfaceView has no holder"); + return null; + } + Surface surface = holder.getSurface(); + if (surface == null || !surface.isValid()) { + Log.w(TAG, "attach() — SurfaceView's Surface is not valid yet (surfaceCreated not fired)"); + return null; + } + long sc; + try { + sc = nativeAttachToSurface(surface); + } catch (UnsatisfiedLinkError | RuntimeException e) { + Log.w(TAG, "nativeAttachToSurface threw", e); + return null; + } + if (sc == 0L) { + return null; + } + Log.i(TAG, "Direct Composition layer attached (sc=" + Long.toHexString(sc) + ")"); + return new DirectCompositionLayer(sc); + } + + /** + * Phase 2.1 proof-of-life: paint a solid color and unhide. Used by the + * {@code DirectCompositionTestPattern} smoke test in Phase 2.2 only — + * production code paths never call this. + */ + public synchronized void setColor(float r, float g, float b, float a) { + if (nativeSc == 0L) return; + try { + nativeSetColor(nativeSc, r, g, b, a); + } catch (UnsatisfiedLinkError | RuntimeException e) { + Log.w(TAG, "nativeSetColor threw", e); + } + } + + /** + * Phase 2.2: hand an AHardwareBuffer to the layer in one transaction. + * + * @param ahbPtr raw {@code AHardwareBuffer*} (typically obtained from + * {@code GPUImage.getHardwareBufferPtr()} in Phase 2.3 + * or from {@link #allocateTestBuffer} in Phase 2.2's + * smoke test). + * @param dstX/dstY layer position in the SurfaceView's coordinate space. + * @param dstW/dstH destination size; if it differs from the buffer's + * native extents the layer is scaled (modern API path) + * or stretched (deprecated setGeometry fallback). + * @param fenceFd POSIX sync_file FD that signals when GPU writes are + * complete; pass -1 if no fence is needed (the framework + * will read the buffer immediately). The framework + * takes ownership of this FD per + * surface_control.h:343-348 — the caller MUST NOT close + * it after this call. Phase 2.2 always passes -1; real + * fence threading lives in Phase 2.4. + * @return true if the transaction was queued; false on any failure (in + * which case the caller should fall back to the GL composition + * path for this frame). + */ + public synchronized boolean pushBuffer(long ahbPtr, + int dstX, int dstY, int dstW, int dstH, + int fenceFd) { + // Always enter JNI so the native side has a single, consistent place + // to consume / close the fence FD. The native code's first action is + // to validate sc / ahb / extents and close the fence on any error + // path before returning JNI_FALSE — so callers never have to worry + // about FD leaks regardless of which check trips. Phase 2.2 always + // passes -1, so this is a no-op today, but the invariant matters + // for Phase 2.4 when real fences arrive. + try { + return nativePushBuffer(nativeSc, ahbPtr, dstX, dstY, dstW, dstH, fenceFd); + } catch (UnsatisfiedLinkError | RuntimeException e) { + Log.w(TAG, "nativePushBuffer threw", e); + // FD ownership on a thrown JNI call is undefined — best-effort: + // leak rather than risk a double-close. Phase 2.4 callers should + // wrap pushBuffer in a try/finally that owns the FD lifecycle + // explicitly when this matters. + return false; + } + } + + /** + * Phase 2.2 smoke-test helper: allocate an AHardwareBuffer and CPU-fill it + * with a single ARGB color. Returns a raw {@code AHardwareBuffer*} (held + * by the JVM as a {@code long}) — caller is responsible for eventually + * calling {@link #releaseBuffer}. + * + * @param argb 0xAARRGGBB packed colour. + * @return native pointer, or 0 on failure. + */ + public static long allocateTestBuffer(int width, int height, int argb) { + if (width <= 0 || height <= 0) return 0L; + if (!SurfaceCompositor.isAvailable()) return 0L; + try { + return nativeAllocateTestBuffer(width, height, argb); + } catch (UnsatisfiedLinkError | RuntimeException e) { + Log.w(TAG, "nativeAllocateTestBuffer threw", e); + return 0L; + } + } + + /** Drop our refcount on a test AHardwareBuffer allocated with {@link #allocateTestBuffer}. */ + public static void releaseBuffer(long ahbPtr) { + if (ahbPtr == 0L) return; + try { + nativeReleaseBuffer(ahbPtr); + } catch (UnsatisfiedLinkError | RuntimeException e) { + Log.w(TAG, "nativeReleaseBuffer threw", e); + } + } + + /** + * Hide the layer — used when the current frame doesn't qualify for the + * direct-scanout fast path and the GLRenderer is going to composite + * normally. Idempotent. + */ + public synchronized void hide() { + if (nativeSc == 0L) return; + try { + nativeHide(nativeSc); + } catch (UnsatisfiedLinkError | RuntimeException e) { + Log.w(TAG, "nativeHide threw", e); + } + } + + /** + * Reparent the layer to null and release the underlying ASurfaceControl. + * Safe to call multiple times. After this returns, every other method on + * the layer is a no-op. + */ + public synchronized void release() { + if (nativeSc == 0L) return; + long sc = nativeSc; + nativeSc = 0L; + try { + nativeDetachAndRelease(sc); + } catch (UnsatisfiedLinkError | RuntimeException e) { + Log.w(TAG, "nativeDetachAndRelease threw", e); + } + Log.i(TAG, "Direct Composition layer released (sc=" + Long.toHexString(sc) + ")"); + } + + /** True if this handle still references a live native ASurfaceControl. */ + public synchronized boolean isAttached() { + return nativeSc != 0L; + } + + private static native long nativeAttachToSurface(Surface surface); + + private static native void nativeDetachAndRelease(long sc); + + private static native void nativeSetColor(long sc, float r, float g, float b, float a); + + private static native void nativeHide(long sc); + + private static native boolean nativePushBuffer(long sc, long ahbPtr, + int dstX, int dstY, int dstW, int dstH, + int fenceFd); + + private static native long nativeAllocateTestBuffer(int width, int height, int argb); + + private static native void nativeReleaseBuffer(long ahbPtr); +} diff --git a/app/src/main/runtime/display/composition/SurfaceCompositor.java b/app/src/main/runtime/display/composition/SurfaceCompositor.java new file mode 100644 index 000000000..158f92267 --- /dev/null +++ b/app/src/main/runtime/display/composition/SurfaceCompositor.java @@ -0,0 +1,88 @@ +package com.winlator.cmod.runtime.display.composition; + +import android.os.Build; + +/** + * Bridge to the native {@code surface_compositor.c} module — gives the rest of + * the app a stable Java entry point for the per-container "Direct Composition" + * path without any caller having to know whether the underlying NDK + * {@code ASurfaceControl} / {@code ASurfaceTransaction} symbols are actually + * resolvable on this device. + * + *

Phase 1 (current)

+ * Only {@link #isAvailable()} is wired up. The result is cached after the first + * call so subsequent checks are free. Code that wants to use the Direct + * Composition fast path should: + *
    + *
  1. Read the per-container toggle ({@code Container#isDirectCompositionEnabled()}).
  2. + *
  3. Confirm runtime support with {@link #isAvailable()}.
  4. + *
  5. Otherwise fall back to the existing GLRenderer composition path.
  6. + *
+ * + *

Why dlopen rather than direct linking

+ * {@code minSdk} is 26 but {@code ASurfaceControl_*} arrived in API 29; linking + * statically would fail to resolve at library-load time on Android 8/9. The + * native side resolves symbols via {@code dlopen("libandroid.so")} + + * {@code dlsym} so the shared library still loads everywhere and we degrade to + * the GLRenderer path when the symbols are missing. + */ +public final class SurfaceCompositor { + + static { + // Same pattern used by SysVSharedMemory, GPUImage, ClientSocket, etc. + // — every class that calls into the `winlator` shared lib loads it in + // its static init. Repeated System.loadLibrary calls are no-ops once + // the library is already mapped into the process. + System.loadLibrary("winlator"); + } + + private static final String TAG = "SurfaceCompositor"; + + /** + * Cached probe result. {@code null} until {@link #isAvailable()} is first + * called; thereafter the boxed Boolean is final-state. Read-after-write is + * safe because {@code Boolean} writes are atomic on every supported VM and + * the cache is intentionally racy: if two threads probe simultaneously they + * will both call into the JNI layer once, which is itself idempotent and + * mutex-guarded. + */ + private static volatile Boolean cachedAvailability; + + private SurfaceCompositor() { + // Static-only utility. + } + + /** + * @return {@code true} when the device exposes the API 29+ SurfaceControl + * + SurfaceTransaction NDK symbols and the {@code winlator} native + * library was loaded successfully. {@code false} on any earlier + * Android version, on any device where libandroid.so is missing + * the symbol, or if the JNI lookup itself fails. + */ + public static boolean isAvailable() { + Boolean cached = cachedAvailability; + if (cached != null) { + return cached; + } + // Hard short-circuit on platforms where the native call would always + // resolve to the API-< 29 fallback. Keeps logcat noise off old devices. + if (Build.VERSION.SDK_INT < Build.VERSION_CODES.Q) { + cachedAvailability = Boolean.FALSE; + return false; + } + boolean result; + try { + result = nativeIsAvailable(); + } catch (UnsatisfiedLinkError | RuntimeException e) { + // Bridge load failure (e.g. winlator native lib not yet loaded + // from this classloader) — treat as "not available" rather than + // crashing the activity. The caller falls back to the GL path. + android.util.Log.w(TAG, "nativeIsAvailable threw, treating as unavailable", e); + result = false; + } + cachedAvailability = result; + return result; + } + + private static native boolean nativeIsAvailable(); +} diff --git a/app/src/main/runtime/display/renderer/GLRenderer.java b/app/src/main/runtime/display/renderer/GLRenderer.java index 20d08c0a8..9f41c203f 100644 --- a/app/src/main/runtime/display/renderer/GLRenderer.java +++ b/app/src/main/runtime/display/renderer/GLRenderer.java @@ -66,6 +66,54 @@ public class GLRenderer private final EffectComposer effectComposer; + /** + * Phase 2.3: when non-null and the current frame qualifies as a fullscreen + * direct-scanout candidate, the AHardwareBuffer backing that drawable is + * pushed to this layer in addition to (Phase 2.3) or instead of (Phase + * 2.5+) the GL composition. Set/cleared by the activity from the UI thread + * via {@link #setDirectCompositionTarget(com.winlator.cmod.runtime.display.composition.DirectCompositionLayer)}; + * read here on the GLThread, hence volatile. + * + *

Cross-thread safety doesn't come from the volatile alone — the + * volatile only suppresses NEW frames from entering the SC push after the + * UI thread writes null. In-flight frames already past that read are + * protected by the layer's own {@code synchronized} methods: when the UI + * thread's {@code release()} executes, it serializes on the same monitor + * as {@code pushBuffer}, then zeroes the native pointer. A + * {@code pushBuffer} that won the race sees the live SC; a later one sees + * {@code nativeSc == 0} and short-circuits in JNI. + */ + private volatile com.winlator.cmod.runtime.display.composition.DirectCompositionLayer + directCompositionTarget; + + /** + * Last AHardwareBuffer pointer + geometry pushed to {@code directCompositionTarget}. + * Per-frame `pushBuffer` calls allocate a SurfaceFlinger transaction, which + * is wasted work when neither the buffer pointer nor the destination geometry + * has changed since the previous frame. DRI3 in this project allocates a + * fresh GPUImage per Present cycle, so AHB-pointer identity is a sufficient + * "dirty" check; if a future codepath recycles AHBs in place this cache + * misses harmlessly (correctness preserved, just reverts to per-frame push). + * GLThread-only — no synchronization needed. + */ + private long dcLastPushedAhb = 0L; + private int dcLastPushedW = 0; + private int dcLastPushedH = 0; + + /** Consecutive {@code pushBuffer == false} returns. After enough failures + * the renderer detaches itself from the SC layer to avoid wasting JNI + * calls every frame on a permanent failure (e.g. SC was reparented away). + * GLThread-only. */ + private int dcConsecutiveFailures = 0; + private static final int DC_FAIL_LIMIT = 8; + + /** Phase 2.5 — true when the most recent frame successfully pushed an AHB + * to the SurfaceControl, so the SC layer is currently visible and showing + * game content. Used to (a) skip the GL render of the direct candidate + * for the perf win, (b) detect transitions to the windowed/multi-drawable + * case so we can hide the SC layer cleanly. GLThread-only. */ + private boolean dcLayerActive = false; + public GLRenderer(XServerView xServerView, XServer xServer) { this.xServerView = xServerView; this.xServer = xServer; @@ -593,6 +641,23 @@ && isDirectScanoutContent(rWin.content) } if (isDirect) { + // Phase 2.3/2.5 — Direct Composition push + GL skip. + // + // When the activity has wired up a SurfaceControl target AND the + // candidate's scanoutSource is a GPUImage (AHardwareBuffer-backed), + // hand the buffer directly to the SC layer. If the push succeeded + // (or the cache hit), the SC layer at z=1 covers the SurfaceView's + // primary BufferQueue at z=0, so we can SKIP the GL render of the + // direct candidate entirely — that's the actual perf win this whole + // path is for. The GL clear + cursor render still run so the + // fallback for the next non-eligible frame is a clean state. + // + // If the push failed (returned false), we keep the GL render as + // defence in depth — the SC layer was hidden by maybePushDirectComposition's + // fail-out path, so the GL composition is what the user sees. + boolean dcOwnsFrame = maybePushDirectComposition(directCandidate); + dcLayerActive = dcOwnsFrame; + if (viewportNeedsUpdate) { if (fullscreen) { GLES20.glViewport(0, 0, surfaceWidth, surfaceHeight); @@ -641,6 +706,29 @@ && isDirectScanoutContent(rWin.content) xServer.screenInfo.width, xServer.screenInfo.height); quadVertices.bind(windowMaterial.programId); + // Phase 3.3 (replaces 2.5's GL-skip): always render the GL + // composition, even when the SurfaceControl layer is showing the + // same content. Reasoning: + // + // * The actual Direct Composition perf win is HWC promoting the SC + // layer to a DPU overlay plane (zero GPU compositing cost) — + // NOT the GLThread skipping its draw. The GL work was already + // happening pre-DC and the GPU handles it without measurable + // impact on frame time. + // * Skipping the GL render produced a black GLSurfaceView backing + // buffer underneath the SC layer. On any direct→fallback + // transition where SF applied the SC-hide before the next GL + // composition (one-frame async race), the user briefly saw the + // black backbuffer — visible flicker. + // * Always-render keeps the GL backbuffer in lockstep with the SC + // content, so a stale-frame reveal is never possible: whatever + // SF reveals when SC hides is the same frame the user is + // already seeing through SC. + // + // Cost: one extra SF transaction per frame plus the GL composition + // (which was already paid pre-DC). The transaction is microseconds; + // the GL composition is offloaded to Adreno and runs concurrently + // with HWC's DPU plane composition. renderDrawable( directCandidate.content, directCandidate.rootX, directCandidate.rootY, windowMaterial); @@ -656,7 +744,11 @@ && isDirectScanoutContent(rWin.content) GLES20.glBlendFunc(GLES20.GL_SRC_ALPHA, GLES20.GL_ONE_MINUS_SRC_ALPHA); quadVertices.disable(); } else { - // No fullscreen candidate — fall back to normal rendering + // No fullscreen direct-scanout candidate — fall back to normal GL + // composition AND hide the SC layer (it might still be showing a + // stale frame from when we WERE in direct mode). maybeHideDirectComposition + // is idempotent, so this is cheap on subsequent fallback frames. + maybeHideDirectComposition(); drawFrame(); } } @@ -666,10 +758,166 @@ private boolean isDirectScanoutContent(Drawable drawable) { return scanoutSource != null && scanoutSource.isDirectScanout(); } + /** + * Phase 2.3 hot path: extract the AHardwareBuffer for the candidate's + * scanoutSource and hand it to the per-activity {@code DirectCompositionLayer}. + * + *

Holds {@code candidate.content.renderLock} for the lookup so we can't + * race against {@code DRI3Extension.tryPixmapFromHardwareBuffer} replacing + * the texture or {@code GPUImage.destroy()} releasing the underlying AHB + * mid-read. The JNI {@code pushBuffer} runs INSIDE the lock too — short + * call, SurfaceFlinger takes its own ref on the AHB inside + * {@code ASurfaceTransaction_setBuffer apply}, so the buffer is safe to + * release on the X-server thread the moment we exit the lock. + * + *

Per-frame waste suppression: caches the last successfully-pushed + * (ahbPtr, dstW, dstH) and skips the JNI call when nothing has changed. + * DRI3 allocates a fresh GPUImage each Present, so AHB-pointer identity + * is a sufficient "buffer changed" signal. + * + *

Failure counter: after {@code DC_FAIL_LIMIT} consecutive {@code false} + * returns from pushBuffer (e.g. SurfaceFlinger reparented the layer for + * us, or libandroid is mis-resolved on this build), nulls + * {@code directCompositionTarget} so subsequent frames don't keep paying + * the JNI cost for a permanent failure. + */ + /** + * @return true if a fresh AHB was pushed to the SC layer this frame OR if + * the cache hit (SC is still showing a valid prior frame). Used by + * the caller to skip the redundant GL composition of the direct + * candidate (Phase 2.5 perf win). + */ + private boolean maybePushDirectComposition(RenderableWindow directCandidate) { + final com.winlator.cmod.runtime.display.composition.DirectCompositionLayer dcTarget = + directCompositionTarget; + if (dcTarget == null) return false; + if (surfaceWidth <= 0 || surfaceHeight <= 0) return false; + // Force fallback to GL composition when an in-process overlay needs to be + // visible on top of the game frame. The SC layer at z=1 covers the + // GLSurfaceView's primary BQ at z=0, so anything we render via GL + // (magnifier UI, debug HUDs, picker dialogs that draw into the GL + // surface) would otherwise be invisible. Hide the SC NOW (not later) + // so the next vsync shows the GL overlay instead of the stale buffer. + if (magnifierUIActive) { + if (dcLayerActive) { + dcTarget.hide(); + dcLayerActive = false; + dcLastPushedAhb = 0L; + dcLastPushedW = 0; + dcLastPushedH = 0; + } + return false; + } + + final Drawable content = directCandidate.content; + synchronized (content.renderLock) { + Drawable scanoutSource = content.getScanoutSource(); + if (scanoutSource == null) return false; + com.winlator.cmod.runtime.display.renderer.Texture tex = scanoutSource.getTexture(); + if (!(tex instanceof GPUImage)) return false; + long ahbPtr = ((GPUImage) tex).getHardwareBufferPtr(); + if (ahbPtr == 0L) return false; + // Skip JNI when nothing has changed since the last push. SurfaceFlinger + // is still showing the layer; no point queueing a no-op transaction. + if (ahbPtr == dcLastPushedAhb + && surfaceWidth == dcLastPushedW + && surfaceHeight == dcLastPushedH) { + return true; + } + // Producer-acquire fence: TAKE the FD from the scanout source under + // the renderLock, atomically clearing it on the Drawable. We are now + // the single owner; if pushBuffer succeeds, the framework closes + // the FD via setBuffer; if pushBuffer fails, the JNI layer closes + // the FD on its own error paths. + // + // Today this is always -1 because the X server's PRESENT/DRI3 parser + // doesn't yet extract a `wait_fence` from the wire request. We + // empirically observe no tearing without it: the in-process Java + // X server processes PIXMAP_FROM_BUFFERS only after the Wine client + // has already submitted its Vulkan commands, and that CPU-side + // handoff latency exceeds GPU-write completion time on the SoCs + // we've tested. This is an observation about this specific pipeline, + // not a Vulkan or Mesa contract — when the parser gains real + // wait_fence support, it'll call Drawable.setAcquireFenceFd and the + // following takeAcquireFenceFd() will return a real FD. + int fenceFd = scanoutSource.takeAcquireFenceFd(); + boolean ok = dcTarget.pushBuffer(ahbPtr, 0, 0, surfaceWidth, surfaceHeight, fenceFd); + if (ok) { + dcLastPushedAhb = ahbPtr; + dcLastPushedW = surfaceWidth; + dcLastPushedH = surfaceHeight; + dcConsecutiveFailures = 0; + return true; + } else { + dcConsecutiveFailures++; + if (dcConsecutiveFailures >= DC_FAIL_LIMIT) { + android.util.Log.w( + "GLRenderer", + "DirectComposition push failed " + dcConsecutiveFailures + + " frames in a row — disabling target for this session"); + // Hide the SC layer BEFORE nulling the field — once the field is + // null, maybeHideDirectComposition has nothing to call hide() on, + // and SurfaceFlinger would keep showing the last successfully-pushed + // buffer over the GL output forever (until activity teardown + // released the SC). Use the local dcTarget — still live in this + // scope even after we null the field. + dcTarget.hide(); + dcLayerActive = false; + directCompositionTarget = null; + dcLastPushedAhb = 0L; + dcLastPushedW = 0; + dcLastPushedH = 0; + dcConsecutiveFailures = 0; + } + return false; + } + } + } + + /** + * Phase 2.5 — hide the Direct Composition layer when the current frame + * doesn't qualify for the SC fast path (windowed app, multi-drawable, + * cursor visible over a non-fullscreen scene, etc.). Idempotent and cheap + * after the first call: tracks {@link #dcLayerActive} so we only queue + * a hide-transaction once per direct→fallback transition. + */ + private void maybeHideDirectComposition() { + if (!dcLayerActive) return; + com.winlator.cmod.runtime.display.composition.DirectCompositionLayer dcTarget = + directCompositionTarget; + if (dcTarget != null) { + dcTarget.hide(); + } + dcLayerActive = false; + // Invalidate the cache so the next pushBuffer re-shows with a fresh + // setBuffer + setVisibility(SHOW) transaction, even if the same AHB + // pointer happens to be active. + dcLastPushedAhb = 0L; + dcLastPushedW = 0; + dcLastPushedH = 0; + } + public void setUnviewableWMClasses(String... unviewableWMNames) { this.unviewableWMClasses = unviewableWMNames; } + /** + * Hand the renderer the per-activity Direct Composition layer (or null to + * detach). Safe to call from the UI thread; the GLThread reads the field + * volatile-ly each frame inside {@link #drawFrameOptimized()}. + * + *

When the layer is set AND the frame is a fullscreen direct-scanout + * candidate, the drawable's underlying AHardwareBuffer is also pushed to + * the SurfaceControl. The GLRenderer keeps drawing the same frame for + * Phase 2.3 (defence-in-depth: if the SurfaceControl path fails for any + * reason, the GL output is still visible underneath the SC layer). + * Phase 2.5 will skip the GL render to capture the actual perf win. + */ + public void setDirectCompositionTarget( + com.winlator.cmod.runtime.display.composition.DirectCompositionLayer layer) { + this.directCompositionTarget = layer; + } + @Override public void onFramePresented(com.winlator.cmod.runtime.display.xserver.Window window) { } diff --git a/app/src/main/runtime/display/renderer/GPUImage.java b/app/src/main/runtime/display/renderer/GPUImage.java index 4c3397ee0..9c928ed6d 100644 --- a/app/src/main/runtime/display/renderer/GPUImage.java +++ b/app/src/main/runtime/display/renderer/GPUImage.java @@ -97,6 +97,23 @@ public boolean isValid() { return hardwareBufferPtr != 0 && (!cpuAccessible || (virtualData != null && stride > 0)); } + /** + * Raw {@code AHardwareBuffer*} pointer (a JNI-returned {@code long}). Used by + * the Direct Composition path to hand this image directly to a child + * {@code ASurfaceControl} via {@code ASurfaceTransaction_setBuffer}, bypassing + * the in-process GLRenderer composition. Returns {@code 0} if the buffer + * was never imported / has been destroyed — callers MUST treat 0 as + * "no buffer available, fall back to GL path". + * + *

The pointer remains valid only for the lifetime of this {@code GPUImage}. + * SurfaceFlinger takes its own reference when the AHB is set on a layer, so + * holding the pointer past {@link #destroy()} is illegal — release any + * SurfaceControl reference first. + */ + public long getHardwareBufferPtr() { + return hardwareBufferPtr; + } + public boolean hasSamplingFailed() { return samplingFailed; } diff --git a/app/src/main/runtime/display/xserver/Drawable.java b/app/src/main/runtime/display/xserver/Drawable.java index eab676c24..d89596eeb 100644 --- a/app/src/main/runtime/display/xserver/Drawable.java +++ b/app/src/main/runtime/display/xserver/Drawable.java @@ -14,8 +14,46 @@ public class Drawable extends XResource { public final Visual visual; private Texture texture = new Texture(); private ByteBuffer data; - private boolean directScanout = false; - private Drawable scanoutSource; + // volatile because the GLThread reads these in `isDirectScanoutContent` + // (called outside renderLock for the candidate-search pass) while the + // X-server worker thread mutates them via setDirectScanout/setScanoutSource + // from PresentExtension and DRI3Extension. Without volatile, the GLThread + // can observe stale values across frames. Long-form mutations of texture + // / scanoutSource still serialize on `renderLock` for atomicity with reads + // that take the lock (see `renderDrawable`, the Direct-Composition path). + private volatile boolean directScanout = false; + private volatile Drawable scanoutSource; + /** + * Producer-side acquire fence FD. -1 means "no fence; buffer is ready for + * immediate read." + * + *

Today in this fork the value is always -1: the in-process Java X server + * receives the {@code AHardwareBuffer} from the Wine client over the DRI3 + * {@code PIXMAP_FROM_BUFFERS} unix-socket path, which the X-server worker + * thread processes only after the client has already submitted its Vulkan + * command buffer. Empirically this CPU-side handoff latency exceeds the + * GPU-write completion time, so we observe no tearing without an explicit + * fence — but this is an observation about THIS pipeline, not a Vulkan or + * Mesa contract. If a future codepath drives presents at a higher rate or + * skips that handoff cost, an explicit acquire fence will become necessary. + * + *

Forward-compatible plumbing: when the X-server PRESENT/DRI3 parsing + * gains real {@code wait_fence} support, that code calls + * {@link #setAcquireFenceFd}. The Direct Composition push at + * {@code GLRenderer.maybePushDirectComposition} consumes the value under + * {@link #renderLock} via {@link #takeAcquireFenceFd} (atomic + * read-and-clear), and forwards it to + * {@code DirectCompositionLayer.pushBuffer} which transfers ownership to + * the framework via {@code ASurfaceTransaction_setBuffer}. + * + *

Stored as {@link java.util.concurrent.atomic.AtomicInteger} so the + * "consume" semantic is atomic with respect to concurrent + * setAcquireFenceFd writes — without it, a producer could overwrite a + * still-pending FD between the consumer's read and clear, leaking the + * old FD. + */ + private final java.util.concurrent.atomic.AtomicInteger acquireFenceFd = + new java.util.concurrent.atomic.AtomicInteger(-1); private Runnable onDrawListener; private Callback onDestroyListener; public final Object renderLock = new Object(); @@ -69,6 +107,44 @@ public void clearScanoutSource() { if (texture != null) texture.setNeedsUpdate(true); } + /** + * Atomic read-and-clear of the acquire fence FD: returns the current value + * (or -1 if none) AND resets the field to -1 in a single CAS. + * + *

This is single-consumer "take" semantics: the caller now owns the FD + * and is responsible for either closing it or transferring ownership + * elsewhere (e.g. by passing it to {@code ASurfaceTransaction_setBuffer}, + * which closes it via the framework). A second concurrent caller of + * {@code takeAcquireFenceFd} will get -1 — they did not own the original + * fence. + * + *

Returns -1 today because no producer code currently calls + * {@link #setAcquireFenceFd}. + */ + public int takeAcquireFenceFd() { + return acquireFenceFd.getAndSet(-1); + } + + /** + * Sets the producer acquire fence FD. Should be called by Present/DRI3 + * extension code immediately before the buffer is published to the GLRenderer + * (i.e. before {@link #setScanoutSource}). Ownership transfers to the + * Drawable; the next consumer of {@link #takeAcquireFenceFd} takes + * ownership in turn. If the previous fence is still set (consumer hasn't + * taken it yet), this method closes the previous fence to avoid a leak. + * Pass {@code -1} to clear without setting a new fence. + */ + public void setAcquireFenceFd(int fd) { + int prior = acquireFenceFd.getAndSet(fd); + if (prior >= 0 && prior != fd) { + try { + android.os.ParcelFileDescriptor.adoptFd(prior).close(); + } catch (java.io.IOException ignored) { + // best-effort close; the FD is still leaked but we did our part + } + } + } + public ByteBuffer getData() { return data; } From f7cccfb864dac4a12302dba0d002bb73a347ddd8 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Thu, 7 May 2026 16:57:24 -0400 Subject: [PATCH 2/2] fix(display): neutralise SDR-on-HDR brightness boost on Direct Composition layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some games rendered visibly brighter through the new SurfaceControl path than via the legacy GLRenderer composition. Root cause is Snapdragon DPU's mixed-SDR/HDR composition pipeline: SDR layers without explicit dataspace / opacity / extended-range metadata are routed through an HDR-aware path that applies a midtone boost via vendor-tuned LUTs. The GL composition path bypasses that pipeline; the SC path engages it. Fix: assert three previously-implicit metadata properties on every pushBuffer transaction so HWC takes the legacy SDR composition path. surface_compositor.c * Resolve three new optional symbols via dlopen/dlsym: ASurfaceTransaction_setBufferDataSpace (API 29) ASurfaceTransaction_setBufferTransparency (API 29) ASurfaceTransaction_setExtendedRangeBrightness (API 34) Each is independent of the mandatory availability gate — missing symbols produce a one-shot init log and skip the call rather than failing the whole DC path. * nativePushBuffer gains a `jboolean opaque` parameter and, after setBuffer, calls (when each symbol resolved): setBufferDataSpace(SRGB) — explicit sRGB tag, no speculative re-encode setBufferTransparency(OPAQUE/TRANSL) — opaque skips alpha-blend stage, OPAQUE only when caller asserts alpha=1.0 setExtendedRangeBrightness(1.0, 1.0)— assertively opts the layer out of the SDR-on-HDR boost path * One-shot diagnostic log at init prints which colour symbols resolved so a post-deploy logcat can distinguish "fix didn't apply" from "fix applied, vendor pipeline still boosting" without rebuild. DirectCompositionLayer.java * pushBuffer signature gains `boolean opaque`. Javadoc explains the Snapdragon DPU rationale and warns against passing true on translucent buffers. GLRenderer.maybePushDirectComposition * Passes opaque=true — DXVK / vkd3d-proton swap-chain frames are RGBA8888 with alpha=1.0 throughout by Vulkan WSI convention. XServerDisplayActivity * pushDirectCompositionSmokeTest: passes opaque=true (magenta swatch is fully opaque) and shrinks the swatch from 256x256 to 128x128 (user feedback — less obtrusive while DC is still a developer toggle). Sharpness side-effect: the same HDR-aware pipeline is responsible for some of the vendor's content-adaptive enhancement (perceived "too sharp" look). Neutralising it via the three calls above is expected to reduce both the brightness mismatch and the residual sharpness simultaneously. Containment: zero impact when the toggle is off; on devices missing any of the three new symbols the path degrades to the prior (brighter) behaviour with a single one-shot log line. JNI signatures match; reviewer pass cleared. --- .../main/cpp/winlator/surface_compositor.c | 85 ++++++++++++++++++- .../display/XServerDisplayActivity.java | 18 ++-- .../composition/DirectCompositionLayer.java | 15 +++- .../runtime/display/renderer/GLRenderer.java | 7 +- 4 files changed, 112 insertions(+), 13 deletions(-) diff --git a/app/src/main/cpp/winlator/surface_compositor.c b/app/src/main/cpp/winlator/surface_compositor.c index 64b97e4e2..910303897 100644 --- a/app/src/main/cpp/winlator/surface_compositor.c +++ b/app/src/main/cpp/winlator/surface_compositor.c @@ -53,6 +53,15 @@ struct ASurfaceTransaction; #define DC_VISIBILITY_HIDE ((int8_t)0) #define DC_VISIBILITY_SHOW ((int8_t)1) +// Mirror of `enum ASurfaceTransactionTransparency` (surface_control.h:447-451). +// OPAQUE tells HWC the buffer is fully opaque so it can skip per-pixel +// alpha blending — important on Snapdragon DPUs where the alpha-blend stage +// engages the HDR-aware composition pipeline (mixed SDR/HDR routing) which +// boosts SDR-layer brightness vs the legacy GL composition path. +#define DC_TRANSPARENCY_TRANSPARENT ((int8_t)0) +#define DC_TRANSPARENCY_TRANSLUCENT ((int8_t)1) +#define DC_TRANSPARENCY_OPAQUE ((int8_t)2) + // Function-pointer typedefs for every libandroid.so symbol we use. Kept in // the order they're documented in surface_control.h for easy cross-reference. typedef struct ASurfaceControl* (*pfn_ASurfaceControl_createFromWindow)( @@ -100,6 +109,35 @@ typedef void (*pfn_ASurfaceTransaction_setBufferTransform)(struct ASurfaceTransa struct ASurfaceControl* sc, int32_t transform); +// Phase 4 — colour / brightness control to neutralise the Snapdragon DPU's +// HDR-aware composition pipeline that boosts SDR layer brightness vs the +// legacy GL composition path. +// +// `setBufferDataSpace` (API 29) — explicit ADATASPACE_SRGB so HWC can't pick +// ADATASPACE_UNKNOWN from gralloc metadata and route through a path that +// speculatively decodes-then-re-encodes. +// `setBufferTransparency` (API 29) — OPAQUE skips per-pixel alpha blend, +// bypassing the mixed-SDR/HDR routing stage on layers known to be +// fully-opaque (game swap-chain frames are RGBA8888 with alpha=1.0). +// `setExtendedRangeBrightness` (API 34) — pin layer's extended-range ratio +// to (1.0, 1.0) so SurfaceFlinger's SDR-on-HDR-panel path doesn't apply +// a midtone boost. Default is (1.0, 1.0) but the AOSP pipeline only +// skips the boost when the call is explicit. +// +// All three are optional: if dlsym returns null we degrade to the prior +// (visibly brighter) behaviour and log it once at startup so the missing +// symbol is diagnosable from logcat without a re-build. +typedef void (*pfn_ASurfaceTransaction_setBufferDataSpace)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + int data_space /* ADataSpace */); +typedef void (*pfn_ASurfaceTransaction_setBufferTransparency)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + int8_t transparency); +typedef void (*pfn_ASurfaceTransaction_setExtendedRangeBrightness)(struct ASurfaceTransaction* t, + struct ASurfaceControl* sc, + float currentBufferRatio, + float desiredRatio); + // One-shot init under mutex. After init completes, all g_* function pointers // are effectively const for the rest of the process and can be read without // further locking. @@ -123,6 +161,9 @@ static pfn_ASurfaceTransaction_setPosition g_tx_set_position = NULL; static pfn_ASurfaceTransaction_setScale g_tx_set_scale = NULL; static pfn_ASurfaceTransaction_setCrop g_tx_set_crop = NULL; static pfn_ASurfaceTransaction_setBufferTransform g_tx_set_buffer_transform = NULL; +static pfn_ASurfaceTransaction_setBufferDataSpace g_tx_set_buffer_dataspace = NULL; +static pfn_ASurfaceTransaction_setBufferTransparency g_tx_set_buffer_transparency = NULL; +static pfn_ASurfaceTransaction_setExtendedRangeBrightness g_tx_set_extended_range_brightness = NULL; // `__typeof__` is the documented-extension spelling that doesn't trip // `-Wgnu-typeof-extension` under pedantic Clang flags. Equivalent to GCC/C23 @@ -159,6 +200,11 @@ static void init_once_locked(void) { RESOLVE(g_tx_set_scale, "ASurfaceTransaction_setScale"); RESOLVE(g_tx_set_crop, "ASurfaceTransaction_setCrop"); RESOLVE(g_tx_set_buffer_transform, "ASurfaceTransaction_setBufferTransform"); + // Phase 4 colour / brightness symbols. Optional — failure to resolve + // means we'll see the visibly-brighter behaviour and log the miss. + RESOLVE(g_tx_set_buffer_dataspace, "ASurfaceTransaction_setBufferDataSpace"); + RESOLVE(g_tx_set_buffer_transparency, "ASurfaceTransaction_setBufferTransparency"); + RESOLVE(g_tx_set_extended_range_brightness, "ASurfaceTransaction_setExtendedRangeBrightness"); // Phase-1 lifecycle symbols + setBuffer + at least one COMPLETE geometry // path are mandatory. The modern path requires all three of @@ -179,6 +225,14 @@ static void init_once_locked(void) { if (g_available) { LOGI("Direct Composition NDK symbols resolved (geom=%s)", modern_geom_complete ? "API31+" : "API29 setGeometry"); + // Per-symbol diagnostic for the Phase 4 colour fix surface — printed + // once on first probe so we can distinguish "fix didn't apply" from + // "fix applied, vendor pipeline still boosting" in post-deploy + // logcats without rebuilding. + LOGI("Direct Composition colour symbols: setBufferDataSpace=%s setBufferTransparency=%s setExtendedRangeBrightness=%s", + g_tx_set_buffer_dataspace ? "yes" : "MISSING", + g_tx_set_buffer_transparency ? "yes" : "MISSING", + g_tx_set_extended_range_brightness ? "yes" : "MISSING (API < 34)"); } else { LOGW("Direct Composition NDK symbols missing (API < 29 or stripped libandroid)"); } @@ -372,7 +426,8 @@ Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_native JNIEXPORT jboolean JNICALL Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_nativePushBuffer( JNIEnv* env, jclass clazz, jlong sc_ptr, jlong ahb_ptr, - jint dst_x, jint dst_y, jint dst_w, jint dst_h, jint acquire_fence_fd) { + jint dst_x, jint dst_y, jint dst_w, jint dst_h, jint acquire_fence_fd, + jboolean opaque) { (void)env; (void)clazz; if (sc_ptr == 0 || ahb_ptr == 0) { @@ -415,6 +470,34 @@ Java_com_winlator_cmod_runtime_display_composition_DirectCompositionLayer_native // framework will close the fd; we MUST NOT touch it again. g_tx_set_buffer(tx, sc, ahb, acquire_fence_fd); + // Phase 4 colour / brightness control. Each call is best-effort — if the + // symbol wasn't resolved (older Android, stripped libandroid) we skip and + // the layer falls back to whatever default the platform applies. The + // missing-symbol case was logged once at init. + // + // Order within the transaction is irrelevant per surface_control.h: + // properties are committed atomically on apply(). + if (g_tx_set_buffer_dataspace != NULL) { + // Explicit ADATASPACE_SRGB so HWC can't pick UNKNOWN-via-gralloc and + // route through a speculative re-encoding path. + g_tx_set_buffer_dataspace(tx, sc, ADATASPACE_SRGB); + } + if (g_tx_set_buffer_transparency != NULL) { + // Caller-declared opacity — game frames are typically RGBA8888 with + // alpha=1.0 throughout, declaring OPAQUE skips alpha blending and + // bypasses the mixed-SDR/HDR routing stage that brightens layers. + // Untrusted/translucent surfaces pass opaque=false to keep + // PREMULTIPLIED behaviour. + g_tx_set_buffer_transparency(tx, sc, + opaque ? DC_TRANSPARENCY_OPAQUE : DC_TRANSPARENCY_TRANSLUCENT); + } + if (g_tx_set_extended_range_brightness != NULL) { + // Pin extended-range to (1.0, 1.0) — explicit "no HDR headroom + // requested." Default value but only assertively skips the + // SDR-on-HDR-panel midtone boost when stated. + g_tx_set_extended_range_brightness(tx, sc, 1.0f, 1.0f); + } + // Geometry. The modern path lets us crop and scale independently; if // unavailable on the device's libandroid, fall back to setGeometry. if (g_tx_set_position != NULL && g_tx_set_scale != NULL && g_tx_set_crop != NULL) { diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index 97f3569c0..54452e7f4 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -4745,13 +4745,14 @@ private void pushDirectCompositionSmokeTest() { .DirectCompositionLayer.releaseBuffer(directCompositionTestBufferPtr); directCompositionTestBufferPtr = 0L; } - // 0xFFFF00FF = opaque magenta. 256x256 swatch — picked so it's large - // enough to be eligible for an HWC overlay plane on Adreno (some - // chipsets reject planes < 100px tall, which would silently fall - // back to GPU client composition). Phase 2.3 will replace this with - // real Wine frames. - final int testWidth = 256; - final int testHeight = 256; + // 0xFFFF00FF = opaque magenta. 128x128 swatch — small visual + // confirmation that the SC layer is alive without obscuring useful + // game UI. (Was 256x256; user wanted ~50% smaller.) Still well + // above any plausible HWC overlay-plane minimum-size threshold. + // Phase 2.3 replaces this with real Wine frames once a fullscreen + // direct-scanout candidate is detected. + final int testWidth = 128; + final int testHeight = 128; final int testColorArgb = 0xFFFF00FF; directCompositionTestBufferPtr = @@ -4774,7 +4775,8 @@ private void pushDirectCompositionSmokeTest() { boolean ok = directCompositionLayer.pushBuffer( directCompositionTestBufferPtr, dstX, dstY, testWidth, testHeight, - /* fenceFd */ -1); + /* fenceFd */ -1, + /* opaque */ true /* magenta swatch is fully opaque */); Log.i("XServerDisplayActivity", "Direct Composition smoke test pushBuffer returned " + ok + " (dst=" + dstX + "," + dstY + " " + testWidth + "x" + testHeight diff --git a/app/src/main/runtime/display/composition/DirectCompositionLayer.java b/app/src/main/runtime/display/composition/DirectCompositionLayer.java index 00197bcf7..e7901f82e 100644 --- a/app/src/main/runtime/display/composition/DirectCompositionLayer.java +++ b/app/src/main/runtime/display/composition/DirectCompositionLayer.java @@ -118,13 +118,22 @@ public synchronized void setColor(float r, float g, float b, float a) { * surface_control.h:343-348 — the caller MUST NOT close * it after this call. Phase 2.2 always passes -1; real * fence threading lives in Phase 2.4. + * @param opaque Whether the buffer's pixels are fully opaque + * (alpha=1.0 throughout). Game swap-chain frames are + * by convention; pass {@code true} to let HWC mark the + * layer OPAQUE and skip per-pixel alpha blending, + * which on Snapdragon DPUs avoids the SDR-on-HDR + * panel routing that boosts layer brightness vs the + * legacy GL composition path. Pass {@code false} when + * the buffer may contain translucency (overlays, UI). * @return true if the transaction was queued; false on any failure (in * which case the caller should fall back to the GL composition * path for this frame). */ public synchronized boolean pushBuffer(long ahbPtr, int dstX, int dstY, int dstW, int dstH, - int fenceFd) { + int fenceFd, + boolean opaque) { // Always enter JNI so the native side has a single, consistent place // to consume / close the fence FD. The native code's first action is // to validate sc / ahb / extents and close the fence on any error @@ -133,7 +142,7 @@ public synchronized boolean pushBuffer(long ahbPtr, // passes -1, so this is a no-op today, but the invariant matters // for Phase 2.4 when real fences arrive. try { - return nativePushBuffer(nativeSc, ahbPtr, dstX, dstY, dstW, dstH, fenceFd); + return nativePushBuffer(nativeSc, ahbPtr, dstX, dstY, dstW, dstH, fenceFd, opaque); } catch (UnsatisfiedLinkError | RuntimeException e) { Log.w(TAG, "nativePushBuffer threw", e); // FD ownership on a thrown JNI call is undefined — best-effort: @@ -220,7 +229,7 @@ public synchronized boolean isAttached() { private static native boolean nativePushBuffer(long sc, long ahbPtr, int dstX, int dstY, int dstW, int dstH, - int fenceFd); + int fenceFd, boolean opaque); private static native long nativeAllocateTestBuffer(int width, int height, int argb); diff --git a/app/src/main/runtime/display/renderer/GLRenderer.java b/app/src/main/runtime/display/renderer/GLRenderer.java index 9f41c203f..090411196 100644 --- a/app/src/main/runtime/display/renderer/GLRenderer.java +++ b/app/src/main/runtime/display/renderer/GLRenderer.java @@ -841,7 +841,12 @@ private boolean maybePushDirectComposition(RenderableWindow directCandidate) { // wait_fence support, it'll call Drawable.setAcquireFenceFd and the // following takeAcquireFenceFd() will return a real FD. int fenceFd = scanoutSource.takeAcquireFenceFd(); - boolean ok = dcTarget.pushBuffer(ahbPtr, 0, 0, surfaceWidth, surfaceHeight, fenceFd); + // Game swap-chain frames from DXVK / vkd3d-proton are RGBA8888 with + // alpha=1.0 throughout — declaring OPAQUE lets HWC skip the alpha-blend + // stage that engages the SDR-on-HDR-panel routing on Snapdragon DPUs + // (the visible-brightness mismatch vs the GL composition path). + boolean ok = dcTarget.pushBuffer(ahbPtr, 0, 0, surfaceWidth, surfaceHeight, fenceFd, + /* opaque */ true); if (ok) { dcLastPushedAhb = ahbPtr; dcLastPushedW = surfaceWidth;