From 83c2550805a66bc4e01ea608affbbb5db82b1bf0 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Tue, 9 Jun 2026 11:21:57 -0400 Subject: [PATCH 01/46] Native frame generation (optical-flow interpolation) Compositor frame generation on open GLSL shaders: block-matching motion estimation and fragment-shader interpolation wired into the Vulkan present path. A headroom-driven scheduler posts at the target rate under a non-blocking present mode (so an adaptive panel ramps up) and passes through under FIFO; the real frame always presents, so it never drops below native. Frame Gen controls live in the FX tab as an expanding toggle (like SGSR): 2x/3x/4x multiplier, quality preset and smoothness; Other-settings toggle. The HUD FPS reports the output rate (real + generated) while FG is active. --- app/src/main/cpp/CMakeLists.txt | 4 + .../cpp/winlator/vk/shaders/interpolate.frag | 59 ++ .../main/cpp/winlator/vk/shaders/motion.comp | 111 +++ .../cpp/winlator/vk/shaders/motion_fp32.comp | 96 ++ app/src/main/cpp/winlator/vk/vk_dispatch.c | 3 + app/src/main/cpp/winlator/vk/vk_dispatch.h | 6 + app/src/main/cpp/winlator/vk/vk_renderer.c | 910 +++++++++++++++--- app/src/main/cpp/winlator/vk/vk_state.h | 39 + .../settings/other/OtherSettingsFragment.kt | 5 + .../settings/other/OtherSettingsScreen.kt | 12 + app/src/main/res/values-da/strings.xml | 9 + app/src/main/res/values-de/strings.xml | 9 + app/src/main/res/values-es/strings.xml | 9 + app/src/main/res/values-fr/strings.xml | 9 + app/src/main/res/values-hi/strings.xml | 9 + app/src/main/res/values-it/strings.xml | 9 + app/src/main/res/values-ko/strings.xml | 9 + app/src/main/res/values-pl/strings.xml | 9 + app/src/main/res/values-pt-rBR/strings.xml | 9 + app/src/main/res/values-ro/strings.xml | 9 + app/src/main/res/values-ru/strings.xml | 9 + app/src/main/res/values-uk/strings.xml | 9 + app/src/main/res/values-zh-rCN/strings.xml | 9 + app/src/main/res/values-zh-rTW/strings.xml | 9 + app/src/main/res/values/strings.xml | 9 + .../display/XServerDisplayActivity.java | 93 +- .../main/runtime/display/XServerDrawerMenu.kt | 95 +- .../display/renderer/VulkanRenderer.java | 244 ++++- .../main/runtime/display/ui/FrameRating.java | 32 +- 29 files changed, 1717 insertions(+), 127 deletions(-) create mode 100644 app/src/main/cpp/winlator/vk/shaders/interpolate.frag create mode 100644 app/src/main/cpp/winlator/vk/shaders/motion.comp create mode 100644 app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp diff --git a/app/src/main/cpp/CMakeLists.txt b/app/src/main/cpp/CMakeLists.txt index 3cbe976c5..119a2d79b 100644 --- a/app/src/main/cpp/CMakeLists.txt +++ b/app/src/main/cpp/CMakeLists.txt @@ -76,6 +76,10 @@ set(SHADER_LIST "effect_hdr:frag:effect_hdr_frag" "effect_natural:frag:effect_natural_frag" "sgsr1:frag:sgsr1_frag" + # Frame generation: motion estimation (compute, fp16 + fp32 fallback) + interpolation (fragment). + "motion:comp:motion_comp" + "motion_fp32:comp:motion_fp32_comp" + "interpolate:frag:interpolate_frag" ) set(SHADER_HEADERS "") diff --git a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag new file mode 100644 index 000000000..9d4c9d865 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag @@ -0,0 +1,59 @@ +#version 450 + +// Motion-compensated frame interpolation: warps frame N-1/N along motion.comp's backward +// flow to synthesize the phase-t frame. A consistency test falls back to the nearest real +// frame where warps disagree or land off-frame (avoids smearing on HUD/text/disocclusions). + +precision mediump float; +precision highp int; + +layout(location = 0) in highp vec2 vUV; +layout(location = 0) out vec4 outColor; + +layout(set = 0, binding = 0) uniform mediump sampler2D prevFrame; // frame N-1 +layout(set = 0, binding = 1) uniform mediump sampler2D currFrame; // frame N +layout(set = 0, binding = 2) uniform highp sampler2D motionField; // rg16f half-res, curr->prev MV in half-res px + +layout(push_constant) uniform PC { + vec2 resolution; // full-res target size (pixels) + float phase; // interpolation phase t in (0,1); 0.5 == single mid frame + float occlusionLo; // luma-consistency: fully trusted at/below this delta + float occlusionHi; // fully rejected at/above this delta + float _pad; +} pc; + +float luma(vec3 c) { return dot(c, vec3(0.299, 0.587, 0.114)); } + +bool offFrame(highp vec2 uv) { + return any(lessThan(uv, vec2(0.0))) || any(greaterThan(uv, vec2(1.0))); +} + +void main() { + float t = clamp(pc.phase > 0.0 ? pc.phase : 0.5, 0.0, 1.0); + float lo = pc.occlusionLo > 0.0 ? pc.occlusionLo : 0.06; + float hi = pc.occlusionHi > lo ? pc.occlusionHi : 0.25; + + // motionField is half-res and stores curr->prev displacement in half-res pixels. + // Normalized displacement = mv_halfPx / mvSize = mv_halfPx * 2 / fullResSize. + vec2 mvNorm = texture(motionField, vUV).xy * 2.0 / pc.resolution; + + // Linear-trajectory motion compensation. For an intermediate pixel p (== vUV) + // with backward flow mv (curr->prev): + // currPos = p - (1 - t) * mv prevPos = p + t * mv + highp vec2 prevPos = vUV + t * mvNorm; + highp vec2 currPos = vUV - (1.0 - t) * mvNorm; + + vec3 cPrev = texture(prevFrame, prevPos).rgb; + vec3 cCurr = texture(currFrame, currPos).rgb; + vec3 mc = mix(cPrev, cCurr, t); // motion-compensated blend + + // Trust = how consistent the two warps are, gated by on-frame-ness. + float disagree = abs(luma(cPrev) - luma(cCurr)); + float trust = 1.0 - smoothstep(lo, hi, disagree); + if (offFrame(prevPos) || offFrame(currPos)) trust = 0.0; + + // Fallback for untrusted pixels: nearest real frame, unwarped (no smear). + vec3 nearest = (t < 0.5) ? texture(prevFrame, vUV).rgb : texture(currFrame, vUV).rgb; + + outColor = vec4(clamp(mix(nearest, mc, trust), 0.0, 1.0), 1.0); +} diff --git a/app/src/main/cpp/winlator/vk/shaders/motion.comp b/app/src/main/cpp/winlator/vk/shaders/motion.comp new file mode 100644 index 000000000..97717ac32 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/motion.comp @@ -0,0 +1,111 @@ +#version 450 + +// Half-res luma block-matching (three-step search) → backward flow field (curr->prev) +// consumed by interpolate.frag. fp16 deltas, fp32 cost accumulation. Needs shaderFloat16. + +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(set = 0, binding = 0) uniform sampler2D prevFrame; // frame N-1 (full res, SHADER_READ_ONLY) +layout(set = 0, binding = 1) uniform sampler2D currFrame; // frame N (full res, SHADER_READ_ONLY) +// rgba16f (not rg16f) is a *mandatory* storage-image format, so this avoids +// requiring the shaderStorageImageExtendedFormats device feature. Only .xy is used. +layout(set = 0, binding = 2, rgba16f) uniform writeonly image2D motionField; // half res, STORAGE + +layout(push_constant) uniform PC { + ivec2 mvSize; // motionField dimensions (~= frameSize / 2) + vec2 invMvSize; // 1.0 / mvSize, for normalized texture sampling + float mvScale; // scales the stored vector (normally 1.0) + float minStep; // lowest TSS step (quality preset): 1 = full search, larger = coarser/faster + float _pad1; + float _pad2; +} pc; + +// ---- Search / tiling parameters (compile-time so the LDS tiles can be sized) ---- +const int LS = 8; // == local_size_x/y +const int BR = 2; // block radius -> (2*BR+1)^2 = 25-tap SSD window +const int RMAX = 15; // max displacement reachable by TSS steps 8+4+2+1 + +const int TILE_P = LS + 2 * (RMAX + BR); // prev tile side = 8 + 34 = 42 +const int TILE_C = LS + 2 * BR; // curr tile side = 8 + 4 = 12 + +shared float16_t sPrev[TILE_P * TILE_P]; // ~3.5 KB +shared float16_t sCurr[TILE_C * TILE_C]; // ~0.3 KB + +float16_t luma(vec3 c) { + return float16_t(dot(c, vec3(0.299, 0.587, 0.114))); +} + +// SSD between the curr block at this work item and the prev block displaced by d. +// All reads stay inside the cached tiles by construction (see index ranges below). +float blockCost(ivec2 l, ivec2 cCenter, ivec2 d) { + float cost = 0.0; + for (int by = -BR; by <= BR; ++by) { + for (int bx = -BR; bx <= BR; ++bx) { + ivec2 cc = cCenter + ivec2(bx, by); + ivec2 pp = l + ivec2(RMAX + BR) + d + ivec2(bx, by); + float16_t dv = sCurr[cc.y * TILE_C + cc.x] - sPrev[pp.y * TILE_P + pp.x]; + cost += float(dv * dv); // fp16 delta, fp32 accumulate + } + } + return cost; +} + +void main() { + ivec2 wgOrigin = ivec2(gl_WorkGroupID.xy) * LS; + ivec2 prevOrigin = wgOrigin - ivec2(RMAX + BR); + ivec2 currOrigin = wgOrigin - ivec2(BR); + + uint li = gl_LocalInvocationIndex; // 0..63 + const uint THREADS = uint(LS * LS); // 64 + + // Cooperative load of the prev-frame luma tile (sampled at the half-res grid; + // linear filtering of the full-res texture gives the 2x downsample for free). + for (uint i = li; i < uint(TILE_P * TILE_P); i += THREADS) { + int lx = int(i) % TILE_P; + int ly = int(i) / TILE_P; + vec2 uv = (vec2(prevOrigin + ivec2(lx, ly)) + 0.5) * pc.invMvSize; + sPrev[i] = luma(textureLod(prevFrame, uv, 0.0).rgb); + } + // Cooperative load of the curr-frame luma tile. + for (uint i = li; i < uint(TILE_C * TILE_C); i += THREADS) { + int lx = int(i) % TILE_C; + int ly = int(i) / TILE_C; + vec2 uv = (vec2(currOrigin + ivec2(lx, ly)) + 0.5) * pc.invMvSize; + sCurr[i] = luma(textureLod(currFrame, uv, 0.0).rgb); + } + barrier(); + + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + // NOTE: out-of-range items still participated in the cooperative load + barrier + // above; only the store is guarded. + if (p.x >= pc.mvSize.x || p.y >= pc.mvSize.y) return; + + ivec2 l = ivec2(gl_LocalInvocationID.xy); // 0..LS-1 + ivec2 cCenter = l + ivec2(BR); // this item's center in the curr tile + + // Three-step search seeded at (0,0). (A future pyramid level would seed from the + // upsampled coarse MV instead — the search code is unchanged.) + ivec2 center = ivec2(0); + ivec2 bestD = ivec2(0); + float bestCost = blockCost(l, cCenter, ivec2(0)); + + int minStep = clamp(int(pc.minStep), 1, 8); + for (int step = 8; step >= minStep; step >>= 1) { + ivec2 localBestD = center; + float localBest = bestCost; + for (int sy = -1; sy <= 1; ++sy) { + for (int sx = -1; sx <= 1; ++sx) { + if (sx == 0 && sy == 0) continue; + ivec2 d = center + ivec2(sx, sy) * step; + if (abs(d.x) > RMAX || abs(d.y) > RMAX) continue; + float cost = blockCost(l, cCenter, d); + if (cost < localBest) { localBest = cost; localBestD = d; } + } + } + if (localBest < bestCost) { bestCost = localBest; bestD = localBestD; center = localBestD; } + } + + imageStore(motionField, p, vec4(vec2(bestD) * pc.mvScale, 0.0, 0.0)); +} diff --git a/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp b/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp new file mode 100644 index 000000000..b7c97fbc1 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp @@ -0,0 +1,96 @@ +#version 450 + +// fp32 fallback of motion.comp for devices without shaderFloat16; identical algorithm. + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(set = 0, binding = 0) uniform sampler2D prevFrame; // frame N-1 (full res) +layout(set = 0, binding = 1) uniform sampler2D currFrame; // frame N (full res) +layout(set = 0, binding = 2, rgba16f) uniform writeonly image2D motionField; // half res, STORAGE + +layout(push_constant) uniform PC { + ivec2 mvSize; + vec2 invMvSize; + float mvScale; + float minStep; // lowest TSS step (quality preset): 1 = full search, larger = coarser/faster + float _pad1; + float _pad2; +} pc; + +const int LS = 8; +const int BR = 2; +const int RMAX = 15; + +const int TILE_P = LS + 2 * (RMAX + BR); // 42 +const int TILE_C = LS + 2 * BR; // 12 + +shared float sPrev[TILE_P * TILE_P]; +shared float sCurr[TILE_C * TILE_C]; + +float luma(vec3 c) { + return dot(c, vec3(0.299, 0.587, 0.114)); +} + +float blockCost(ivec2 l, ivec2 cCenter, ivec2 d) { + float cost = 0.0; + for (int by = -BR; by <= BR; ++by) { + for (int bx = -BR; bx <= BR; ++bx) { + ivec2 cc = cCenter + ivec2(bx, by); + ivec2 pp = l + ivec2(RMAX + BR) + d + ivec2(bx, by); + float dv = sCurr[cc.y * TILE_C + cc.x] - sPrev[pp.y * TILE_P + pp.x]; + cost += dv * dv; + } + } + return cost; +} + +void main() { + ivec2 wgOrigin = ivec2(gl_WorkGroupID.xy) * LS; + ivec2 prevOrigin = wgOrigin - ivec2(RMAX + BR); + ivec2 currOrigin = wgOrigin - ivec2(BR); + + uint li = gl_LocalInvocationIndex; + const uint THREADS = uint(LS * LS); + + for (uint i = li; i < uint(TILE_P * TILE_P); i += THREADS) { + int lx = int(i) % TILE_P; + int ly = int(i) / TILE_P; + vec2 uv = (vec2(prevOrigin + ivec2(lx, ly)) + 0.5) * pc.invMvSize; + sPrev[i] = luma(textureLod(prevFrame, uv, 0.0).rgb); + } + for (uint i = li; i < uint(TILE_C * TILE_C); i += THREADS) { + int lx = int(i) % TILE_C; + int ly = int(i) / TILE_C; + vec2 uv = (vec2(currOrigin + ivec2(lx, ly)) + 0.5) * pc.invMvSize; + sCurr[i] = luma(textureLod(currFrame, uv, 0.0).rgb); + } + barrier(); + + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if (p.x >= pc.mvSize.x || p.y >= pc.mvSize.y) return; + + ivec2 l = ivec2(gl_LocalInvocationID.xy); + ivec2 cCenter = l + ivec2(BR); + + ivec2 center = ivec2(0); + ivec2 bestD = ivec2(0); + float bestCost = blockCost(l, cCenter, ivec2(0)); + + int minStep = clamp(int(pc.minStep), 1, 8); + for (int step = 8; step >= minStep; step >>= 1) { + ivec2 localBestD = center; + float localBest = bestCost; + for (int sy = -1; sy <= 1; ++sy) { + for (int sx = -1; sx <= 1; ++sx) { + if (sx == 0 && sy == 0) continue; + ivec2 d = center + ivec2(sx, sy) * step; + if (abs(d.x) > RMAX || abs(d.y) > RMAX) continue; + float cost = blockCost(l, cCenter, d); + if (cost < localBest) { localBest = cost; localBestD = d; } + } + } + if (localBest < bestCost) { bestCost = localBest; bestD = localBestD; center = localBestD; } + } + + imageStore(motionField, p, vec4(vec2(bestD) * pc.mvScale, 0.0, 0.0)); +} diff --git a/app/src/main/cpp/winlator/vk/vk_dispatch.c b/app/src/main/cpp/winlator/vk/vk_dispatch.c index a698c30d9..f7fc75b4a 100644 --- a/app/src/main/cpp/winlator/vk/vk_dispatch.c +++ b/app/src/main/cpp/winlator/vk/vk_dispatch.c @@ -112,6 +112,7 @@ bool vkd_load_instance(VkInstance instance) { LOAD(CreatePipelineLayout); LOAD(DestroyPipelineLayout); LOAD(CreateGraphicsPipelines); + LOAD(CreateComputePipelines); LOAD(DestroyPipeline); LOAD(CreateShaderModule); LOAD(DestroyShaderModule); @@ -148,7 +149,9 @@ bool vkd_load_instance(VkInstance instance) { LOAD(CmdSetViewport); LOAD(CmdSetScissor); LOAD(CmdDraw); + LOAD(CmdDispatch); LOAD(CmdPipelineBarrier); + LOAD(CmdBlitImage); LOAD(CmdCopyBufferToImage); // Queue diff --git a/app/src/main/cpp/winlator/vk/vk_dispatch.h b/app/src/main/cpp/winlator/vk/vk_dispatch.h index bf37d977d..eaaab2daa 100644 --- a/app/src/main/cpp/winlator/vk/vk_dispatch.h +++ b/app/src/main/cpp/winlator/vk/vk_dispatch.h @@ -89,6 +89,7 @@ typedef struct VkDispatch { PFN_vkCreatePipelineLayout CreatePipelineLayout; PFN_vkDestroyPipelineLayout DestroyPipelineLayout; PFN_vkCreateGraphicsPipelines CreateGraphicsPipelines; + PFN_vkCreateComputePipelines CreateComputePipelines; PFN_vkDestroyPipeline DestroyPipeline; PFN_vkCreateShaderModule CreateShaderModule; PFN_vkDestroyShaderModule DestroyShaderModule; @@ -125,7 +126,9 @@ typedef struct VkDispatch { PFN_vkCmdSetViewport CmdSetViewport; PFN_vkCmdSetScissor CmdSetScissor; PFN_vkCmdDraw CmdDraw; + PFN_vkCmdDispatch CmdDispatch; PFN_vkCmdPipelineBarrier CmdPipelineBarrier; + PFN_vkCmdBlitImage CmdBlitImage; PFN_vkCmdCopyBufferToImage CmdCopyBufferToImage; // Queue @@ -218,6 +221,7 @@ void vkd_unload(void); #define vkCreatePipelineLayout vkd.CreatePipelineLayout #define vkDestroyPipelineLayout vkd.DestroyPipelineLayout #define vkCreateGraphicsPipelines vkd.CreateGraphicsPipelines +#define vkCreateComputePipelines vkd.CreateComputePipelines #define vkDestroyPipeline vkd.DestroyPipeline #define vkCreateShaderModule vkd.CreateShaderModule #define vkDestroyShaderModule vkd.DestroyShaderModule @@ -251,7 +255,9 @@ void vkd_unload(void); #define vkCmdSetViewport vkd.CmdSetViewport #define vkCmdSetScissor vkd.CmdSetScissor #define vkCmdDraw vkd.CmdDraw +#define vkCmdDispatch vkd.CmdDispatch #define vkCmdPipelineBarrier vkd.CmdPipelineBarrier +#define vkCmdBlitImage vkd.CmdBlitImage #define vkCmdCopyBufferToImage vkd.CmdCopyBufferToImage #define vkQueueSubmit vkd.QueueSubmit diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index bf96c8003..974e3817b 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -23,6 +23,7 @@ #include #include #include +#include // SPIR-V shader byte arrays generated at build time by glslc + bin2c.cmake. #include "shaders/window_vert.spv.h" @@ -35,6 +36,9 @@ #include "shaders/effect_hdr_frag.spv.h" #include "shaders/effect_natural_frag.spv.h" #include "shaders/sgsr1_frag.spv.h" +#include "shaders/motion_comp.spv.h" +#include "shaders/motion_fp32_comp.spv.h" +#include "shaders/interpolate_frag.spv.h" // ============================================================ // Forward decls @@ -56,6 +60,20 @@ static bool create_offscreen(VkRenderer* r, uint32_t w, uint32_t h, bool need_se static void destroy_offscreen(VkRenderer* r); static bool create_sgsr1_resources(VkRenderer* r, uint32_t w, uint32_t h); static void destroy_sgsr1_resources(VkRenderer* r); +static void fg_destroy_resources(VkRenderer* r); +static bool fg_ensure_resources(VkRenderer* r); +static void wait_inflight_frames(VkRenderer* r); + +// Frame-generation render modes (see fg_submit / DESIGN.md §2). +typedef enum { + FG_MODE_HOLD = 0, // render composited scene -> history[curr]; do NOT present + FG_MODE_INTERP = 1, // motion + interpolate(prev,curr) -> swapchain; present + FG_MODE_PRESENT_LAST = 2, // blit history[curr] -> swapchain; present (the deferred real frame) +} FgMode; + +// Result of manage_scene_targets(): whether the post-effect chain runs this frame, and whether +// it is SGSR1-led. Shared by the real-present path and the FG hold path. +typedef struct { bool has_effects; bool wants_sgsr1; } SceneTargets; static bool create_quad_vbo(VkRenderer* r); static void destroy_quad_vbo(VkRenderer* r); static bool is_plain_rotation_transform(VkSurfaceTransformFlagBitsKHR transform); @@ -340,6 +358,7 @@ static bool create_device(VkRenderer* r) { bool has_ycbcr = has_extension(exts, ext_count, VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME); bool has_extmem_caps = has_extension(exts, ext_count, VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME); bool has_queue_fam = has_extension(exts, ext_count, VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME); + bool has_f16 = has_extension(exts, ext_count, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); free(exts); @@ -360,6 +379,28 @@ static bool create_device(VkRenderer* r) { r->ext_ahb = ahb_ok; r->ext_ycbcr = has_ycbcr; + + // Probe shaderFloat16; selects the fp16 vs fp32 motion shader (FG ships either way). + r->fg_float16_supported = false; + VkPhysicalDeviceShaderFloat16Int8FeaturesKHR f16_feat = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR }; + if (has_f16) { + VkPhysicalDeviceShaderFloat16Int8FeaturesKHR f16q = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR }; + VkPhysicalDeviceFeatures2 feats2 = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 }; + feats2.pNext = &f16q; + PFN_vkGetPhysicalDeviceFeatures2 fnFeat2 = (PFN_vkGetPhysicalDeviceFeatures2) + vkGetInstanceProcAddr(r->instance, "vkGetPhysicalDeviceFeatures2"); + if (!fnFeat2) fnFeat2 = (PFN_vkGetPhysicalDeviceFeatures2) + vkGetInstanceProcAddr(r->instance, "vkGetPhysicalDeviceFeatures2KHR"); + if (fnFeat2) { fnFeat2(r->physical_device, &feats2); r->fg_float16_supported = (f16q.shaderFloat16 == VK_TRUE); } + } + if (r->fg_float16_supported) { + enable[enable_n++] = VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME; + f16_feat.shaderFloat16 = VK_TRUE; + } + VK_LOGI("Frame generation fp16 support: ext=%d feature=%d", has_f16, r->fg_float16_supported); + VK_LOGI("AHB Vulkan device support: android_hardware_buffer=%d external_memory=%d dedicated=%d get_memory_requirements2=%d queue_family_foreign=%d enabled=%d", has_ahb, has_extmem, has_dedicated, has_get_mem_req2, has_queue_fam, r->ext_ahb); if (!r->ext_ahb) { @@ -378,7 +419,10 @@ static bool create_device(VkRenderer* r) { ycbcr_feat.samplerYcbcrConversion = has_ycbcr ? VK_TRUE : VK_FALSE; VkDeviceCreateInfo dci = {VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO}; - if (has_ycbcr) dci.pNext = &ycbcr_feat; + void* feat_chain = NULL; + if (has_ycbcr) { ycbcr_feat.pNext = feat_chain; feat_chain = &ycbcr_feat; } + if (r->fg_float16_supported) { f16_feat.pNext = feat_chain; feat_chain = &f16_feat; } + dci.pNext = feat_chain; dci.queueCreateInfoCount = 1; dci.pQueueCreateInfos = &qci; dci.enabledExtensionCount = enable_n; @@ -555,15 +599,19 @@ static bool create_command_pool(VkRenderer* r) { // ============================================================ static bool create_descriptor_pool(VkRenderer* r, uint32_t capacity) { - VkDescriptorPoolSize ps = {0}; - ps.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - ps.descriptorCount = capacity; + // Combined-image-samplers for textures/effects/FG, plus a small STORAGE_IMAGE budget for + // the frame-generation motion field (one writeonly image2D bound by motion.comp). + VkDescriptorPoolSize ps[2] = {0}; + ps[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + ps[0].descriptorCount = capacity; + ps[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + ps[1].descriptorCount = 8; VkDescriptorPoolCreateInfo ci = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; ci.maxSets = capacity; - ci.poolSizeCount = 1; - ci.pPoolSizes = &ps; + ci.poolSizeCount = 2; + ci.pPoolSizes = ps; if (vkCreateDescriptorPool(r->device, &ci, NULL, &r->descriptor_pool) != VK_SUCCESS) { VK_LOGE("vkCreateDescriptorPool failed"); return false; @@ -727,9 +775,74 @@ static bool create_pipeline_layouts(VkRenderer* r) { return false; } + // --- Frame generation layouts --- + // motion.comp set 0: binding0,1 = prev,curr samplers; binding2 = motion storage image. All COMPUTE. + VkDescriptorSetLayoutBinding mb[3] = {0}; + mb[0].binding = 0; mb[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + mb[0].descriptorCount = 1; mb[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + mb[1] = mb[0]; mb[1].binding = 1; + mb[2].binding = 2; mb[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + mb[2].descriptorCount = 1; mb[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + VkDescriptorSetLayoutCreateInfo dl_m = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + dl_m.bindingCount = 3; dl_m.pBindings = mb; + if (vkCreateDescriptorSetLayout(r->device, &dl_m, NULL, &r->pipelines.fg_motion_layout) != VK_SUCCESS) { + return false; + } + + // interpolate.frag set 0: prev,curr,motion combined-image-samplers, FRAGMENT. + VkDescriptorSetLayoutBinding ib[3] = {0}; + for (uint32_t i = 0; i < 3; i++) { + ib[i].binding = i; + ib[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + ib[i].descriptorCount = 1; + ib[i].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + } + VkDescriptorSetLayoutCreateInfo dl_i = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + dl_i.bindingCount = 3; dl_i.pBindings = ib; + if (vkCreateDescriptorSetLayout(r->device, &dl_i, NULL, &r->pipelines.fg_interp_layout) != VK_SUCCESS) { + return false; + } + + // motion pipeline layout: motion set + 32B push (ivec2 mvSize, vec2 invMvSize, float mvScale, pad). + VkPushConstantRange mpc = { VK_SHADER_STAGE_COMPUTE_BIT, 0, 32 }; + VkPipelineLayoutCreateInfo mpl = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + mpl.setLayoutCount = 1; mpl.pSetLayouts = &r->pipelines.fg_motion_layout; + mpl.pushConstantRangeCount = 1; mpl.pPushConstantRanges = &mpc; + if (vkCreatePipelineLayout(r->device, &mpl, NULL, &r->pipelines.fg_motion_pipe_layout) != VK_SUCCESS) { + return false; + } + + // interp pipeline layout: interp set + 24B fragment push (vec2 resolution, float phase, occLo, occHi, pad). + VkPushConstantRange ipc = { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 }; + VkPipelineLayoutCreateInfo ipl = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + ipl.setLayoutCount = 1; ipl.pSetLayouts = &r->pipelines.fg_interp_layout; + ipl.pushConstantRangeCount = 1; ipl.pPushConstantRanges = &ipc; + if (vkCreatePipelineLayout(r->device, &ipl, NULL, &r->pipelines.fg_interp_pipe_layout) != VK_SUCCESS) { + return false; + } + return true; } +// Compute pipeline helper for the frame-generation motion pass. +static VkPipeline create_compute_pipeline(VkRenderer* r, VkShaderModule cs, VkPipelineLayout layout) { + VkPipelineShaderStageCreateInfo stage = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO}; + stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + stage.module = cs; + stage.pName = "main"; + + VkComputePipelineCreateInfo cpi = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + cpi.stage = stage; + cpi.layout = layout; + + VkPipeline pipe = VK_NULL_HANDLE; + if (vkCreateComputePipelines(r->device, VK_NULL_HANDLE, 1, &cpi, NULL, &pipe) != VK_SUCCESS) { + VK_LOGE("vkCreateComputePipelines failed"); + return VK_NULL_HANDLE; + } + return pipe; +} + static VkPipeline create_graphics_pipeline( VkRenderer* r, VkShaderModule vs, VkShaderModule fs, @@ -843,9 +956,14 @@ static bool create_pipelines(VkRenderer* r) { VkShaderModule fs_hdr = load_shader_module(r, effect_hdr_frag, effect_hdr_frag_size); VkShaderModule fs_natural= load_shader_module(r, effect_natural_frag,effect_natural_frag_size); VkShaderModule fs_sgsr1 = load_shader_module(r, sgsr1_frag, sgsr1_frag_size); + // Frame generation: pick the fp16 or fp32 motion shader by device support. + VkShaderModule cs_motion = r->fg_float16_supported + ? load_shader_module(r, motion_comp, motion_comp_size) + : load_shader_module(r, motion_fp32_comp, motion_fp32_comp_size); + VkShaderModule fs_interp = load_shader_module(r, interpolate_frag, interpolate_frag_size); if (!vs_window || !fs_window || !fs_cursor || !vs_quad || !fs_blit || !fs_crt || !fs_vivid || !fs_hdr || !fs_natural - || !fs_sgsr1) { + || !fs_sgsr1 || !cs_motion || !fs_interp) { return false; } @@ -898,6 +1016,14 @@ static bool create_pipelines(VkRenderer* r) { r, vs_quad, fs_sgsr1, r->pipelines.effect_layout, r->pipelines.offscreen_pass, false, false, NULL); + // Frame generation: compute motion estimation + fullscreen-triangle interpolation (no vertex + // input, no blend — opaque full-screen write) onto the swapchain. + r->pipelines.fg_motion_pipeline = create_compute_pipeline( + r, cs_motion, r->pipelines.fg_motion_pipe_layout); + r->pipelines.fg_interp_pipeline = create_graphics_pipeline( + r, vs_quad, fs_interp, r->pipelines.fg_interp_pipe_layout, r->pipelines.swapchain_pass, + false, false, NULL); + vkDestroyShaderModule(r->device, vs_window, NULL); vkDestroyShaderModule(r->device, fs_window, NULL); vkDestroyShaderModule(r->device, fs_cursor, NULL); @@ -908,12 +1034,16 @@ static bool create_pipelines(VkRenderer* r) { vkDestroyShaderModule(r->device, fs_hdr, NULL); vkDestroyShaderModule(r->device, fs_natural, NULL); vkDestroyShaderModule(r->device, fs_sgsr1, NULL); + vkDestroyShaderModule(r->device, cs_motion, NULL); + vkDestroyShaderModule(r->device, fs_interp, NULL); if (!r->pipelines.window_pipeline || !r->pipelines.cursor_pipeline || !r->pipelines.blit_pipeline || !r->pipelines.offscreen_window_pipeline || !r->pipelines.offscreen_cursor_pipeline - || !r->pipelines.offscreen_blit_pipeline) { + || !r->pipelines.offscreen_blit_pipeline + || !r->pipelines.fg_motion_pipeline + || !r->pipelines.fg_interp_pipeline) { destroy_pipelines(r); return false; } @@ -946,9 +1076,15 @@ static void destroy_pipelines(VkRenderer* r) { if (r->pipelines.offscreen_window_pipeline) vkDestroyPipeline(r->device, r->pipelines.offscreen_window_pipeline, NULL); if (r->pipelines.offscreen_cursor_pipeline) vkDestroyPipeline(r->device, r->pipelines.offscreen_cursor_pipeline, NULL); if (r->pipelines.offscreen_blit_pipeline) vkDestroyPipeline(r->device, r->pipelines.offscreen_blit_pipeline, NULL); + if (r->pipelines.fg_motion_pipeline) vkDestroyPipeline(r->device, r->pipelines.fg_motion_pipeline, NULL); + if (r->pipelines.fg_interp_pipeline) vkDestroyPipeline(r->device, r->pipelines.fg_interp_pipeline, NULL); if (r->pipelines.window_layout) vkDestroyPipelineLayout(r->device, r->pipelines.window_layout, NULL); if (r->pipelines.effect_layout) vkDestroyPipelineLayout(r->device, r->pipelines.effect_layout, NULL); + if (r->pipelines.fg_motion_pipe_layout) vkDestroyPipelineLayout(r->device, r->pipelines.fg_motion_pipe_layout, NULL); + if (r->pipelines.fg_interp_pipe_layout) vkDestroyPipelineLayout(r->device, r->pipelines.fg_interp_pipe_layout, NULL); if (r->pipelines.sampler_set_layout) vkDestroyDescriptorSetLayout(r->device, r->pipelines.sampler_set_layout, NULL); + if (r->pipelines.fg_motion_layout) vkDestroyDescriptorSetLayout(r->device, r->pipelines.fg_motion_layout, NULL); + if (r->pipelines.fg_interp_layout) vkDestroyDescriptorSetLayout(r->device, r->pipelines.fg_interp_layout, NULL); if (r->pipelines.swapchain_pass) vkDestroyRenderPass(r->device, r->pipelines.swapchain_pass, NULL); if (r->pipelines.offscreen_pass) vkDestroyRenderPass(r->device, r->pipelines.offscreen_pass, NULL); memset(&r->pipelines, 0, sizeof(r->pipelines)); @@ -1043,20 +1179,29 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa if (want != VK_PRESENT_MODE_FIFO_KHR) { uint32_t pm_count = 0; vkGetPhysicalDeviceSurfacePresentModesKHR(r->physical_device, r->surface, &pm_count, NULL); + bool have_want = false, have_immediate = false; if (pm_count > 0) { VkPresentModeKHR* pms = calloc(pm_count, sizeof(VkPresentModeKHR)); if (pms) { vkGetPhysicalDeviceSurfacePresentModesKHR(r->physical_device, r->surface, &pm_count, pms); for (uint32_t i = 0; i < pm_count; i++) { - if (pms[i] == want) { present_mode = want; break; } + if (pms[i] == want) have_want = true; + if (pms[i] == VK_PRESENT_MODE_IMMEDIATE_KHR) have_immediate = true; } free(pms); } } - if (present_mode != want) { + if (have_want) { + present_mode = want; + } else if (want == VK_PRESENT_MODE_MAILBOX_KHR && have_immediate) { + // MAILBOX is often unsupported on Adreno/Mali; IMMEDIATE is also non-blocking, which FG needs. + present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; + VK_LOGW("MAILBOX unavailable; using IMMEDIATE for off-vsync present"); + } else { VK_LOGW("Requested present mode %d unavailable; using FIFO", want); } } + r->active_present_mode = present_mode; VkSurfaceTransformFlagBitsKHR pre_transform = caps.currentTransform; if (!is_plain_rotation_transform(pre_transform) @@ -1097,11 +1242,13 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa // Only possible for unsupported mirrored transforms; avoid an Adreno present loop // while still letting normal rotation changes recreate the swapchain. r->ignore_suboptimal = r->caps.is_adreno && (pre_transform != caps.currentTransform); - VK_LOGI("Swapchain surface=%ux%u extent=%ux%u currentTransform=0x%x preTransform=0x%x", + VK_LOGI("Swapchain surface=%ux%u extent=%ux%u currentTransform=0x%x preTransform=0x%x mode=%d", surface_extent.width, surface_extent.height, extent.width, extent.height, - caps.currentTransform, pre_transform); + caps.currentTransform, pre_transform, present_mode); uint32_t image_count = caps.minImageCount + 1; + // Non-blocking modes (MAILBOX/IMMEDIATE) need >=3 images to run ahead of vblank. + if (present_mode != VK_PRESENT_MODE_FIFO_KHR && image_count < 3) image_count = 3; if (caps.maxImageCount > 0 && image_count > caps.maxImageCount) image_count = caps.maxImageCount; if (image_count > VK_MAX_SWAPCHAIN_IMAGES) image_count = VK_MAX_SWAPCHAIN_IMAGES; @@ -1750,51 +1897,18 @@ static VkExtent2D compute_sgsr1_source_extent(VkRenderer* r, const VkScene* s) { return source; } -static bool record_and_submit_frame(VkRenderer* r) { - if (!r->surface_ready || !r->swapchain) return false; - - pthread_mutex_lock(&r->render_mutex); - - VkFrame* f = &r->frames[r->frame_index]; - uint32_t grave_slot = r->graveyard_index; - - vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); - - // Snapshot the scene under scene_mutex (cheap memcpy of a few KB), then release it so - // scene producers (texture destroys, X server window updates) don't stall behind the - // long acquire/record/submit/present below. render_mutex still serializes us against - // surface lifecycle changes, which keeps the swapchain handles stable for our use. - VkScene snap; - VkTexture** dead = NULL; - uint32_t dead_count = 0; - pthread_mutex_lock(&r->scene_mutex); - if (!r->surface_ready || !r->swapchain || r->swapchain_image_count == 0 - || r->swapchain_extent.width == 0 || r->swapchain_extent.height == 0) { - pthread_mutex_unlock(&r->scene_mutex); - pthread_mutex_unlock(&r->render_mutex); - return false; - } - snap = r->scene; - detach_graveyard_slot(r, grave_slot, &dead, &dead_count); - pthread_mutex_unlock(&r->scene_mutex); - destroy_graveyard_textures(r, dead, dead_count); - - bool wants_sgsr1 = scene_starts_with_sgsr1(&snap); - bool needs_fullres_offscreen = snap.effect_count > 0 - && (!wants_sgsr1 || snap.effect_count > 1); - // offscreen[1] is reached only once the chain writes two distinct offscreen buffers: the - // effect loop's dst_idx starts at 1 for a non-SGSR chain but at 0 for an SGSR1-led one - // (scene goes to the separate SGSR source), so the threshold is >1 normally, >2 for SGSR. +// (Re)build the effect ping-pong / SGSR1 targets for the current scene and report whether the +// effect chain runs. Shared by the real present and the FG hold. Caller holds render_mutex. +static SceneTargets manage_scene_targets(VkRenderer* r, const VkScene* snap) { + bool wants_sgsr1 = scene_starts_with_sgsr1(snap); + bool needs_fullres_offscreen = snap->effect_count > 0 + && (!wants_sgsr1 || snap->effect_count > 1); bool needs_second_offscreen = needs_fullres_offscreen - && snap.effect_count > (wants_sgsr1 ? 2u : 1u); + && snap->effect_count > (wants_sgsr1 ? 2u : 1u); VkExtent2D sgsr1_source_extent = wants_sgsr1 - ? compute_sgsr1_source_extent(r, &snap) + ? compute_sgsr1_source_extent(r, snap) : r->swapchain_extent; - // Full-res ping-pong targets exist only when the chain needs them (SGSR-only writes its - // low-res source straight to the swapchain). offscreen[1] is grown/freed lazily as the - // chain crosses the threshold above; effect counts change on user action, not per frame, - // so this doesn't thrash. Safe under render_mutex (no concurrent swapchain teardown). bool offscreen_dims_stale = !r->offscreen_built || r->offscreen[0].width != r->swapchain_extent.width || r->offscreen[0].height != r->swapchain_extent.height; @@ -1812,9 +1926,6 @@ static bool record_and_submit_frame(VkRenderer* r) { wait_inflight_frames(r); destroy_offscreen(r); } - // Only rebuild SGSR1 source on meaningful dim change. Tiny pixmap-size flicker (off-by- - // one DRI3 jitter, transient resizes) used to thrash this allocation every frame and - // stall the render thread on the full-device wait that preceded it. int sgsr1_dw = (int)r->sgsr1.width - (int)sgsr1_source_extent.width; int sgsr1_dh = (int)r->sgsr1.height - (int)sgsr1_source_extent.height; if (sgsr1_dw < 0) sgsr1_dw = -sgsr1_dw; @@ -1828,48 +1939,24 @@ static bool record_and_submit_frame(VkRenderer* r) { destroy_sgsr1_resources(r); } - uint32_t image_index = 0; - VkResult acq = vkAcquireNextImageKHR(r->device, r->swapchain, UINT64_MAX, - f->image_available, VK_NULL_HANDLE, &image_index); - bool recreate_after_present = false; - if (acq == VK_ERROR_OUT_OF_DATE_KHR) { - r->surface_ready = false; - pthread_mutex_lock(&r->queue_mutex); - vkQueueWaitIdle(r->graphics_queue); - pthread_mutex_unlock(&r->queue_mutex); - destroy_swapchain_resources(r); - r->surface_ready = create_swapchain(r, r->surface_extent.width, r->surface_extent.height); - pthread_mutex_unlock(&r->render_mutex); - return false; - } else if (acq == VK_SUBOPTIMAL_KHR) { - if (!r->ignore_suboptimal) recreate_after_present = true; - } else if (acq != VK_SUCCESS) { - VK_LOGE("vkAcquireNextImageKHR -> %d", acq); - pthread_mutex_unlock(&r->render_mutex); - return false; - } - VkSemaphore render_finished = r->swapchain_render_finished[image_index]; - - vkResetFences(r->device, 1, &f->in_flight); - - VkCommandBufferBeginInfo bi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; - bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - vkBeginCommandBuffer(f->cmd, &bi); - - bool has_effects = snap.effect_count > 0 && r->offscreen_built; - if (snap.effect_count > 0) { - // Don't enter the effect path if a required target's lazy creation failed (else we'd - // record into a null framebuffer). + bool has_effects = snap->effect_count > 0 && r->offscreen_built; + if (snap->effect_count > 0) { bool full_ok = !needs_fullres_offscreen || (r->offscreen_built && (!needs_second_offscreen || r->offscreen[1].image != VK_NULL_HANDLE)); has_effects = full_ok && (!wants_sgsr1 || r->sgsr1.built); } + SceneTargets st = { has_effects, wants_sgsr1 }; + return st; +} +// Record the composited scene into final_fb; final_offscreen picks the offscreen vs swapchain +// pipeline variant. Used by the real present (swapchain) and the FG hold (history offscreen). +static void record_scene_chain(VkRenderer* r, VkCommandBuffer cmd, const VkScene* snap, + bool has_effects, bool wants_sgsr1, + VkRenderPass final_pass, VkFramebuffer final_fb, + uint32_t final_w, uint32_t final_h, bool final_offscreen) { VkClearValue clear = {0}; - clear.color.float32[0] = 0.0f; - clear.color.float32[1] = 0.0f; - clear.color.float32[2] = 0.0f; clear.color.float32[3] = 1.0f; if (has_effects) { @@ -1877,7 +1964,6 @@ static bool record_and_submit_frame(VkRenderer* r) { ? &r->sgsr1.source : &r->offscreen[0]; - // Pass 1: render scene to either full-res effect input or SGSR1's low-res source. VkRenderPassBeginInfo rpbi = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO}; rpbi.renderPass = r->pipelines.offscreen_pass; rpbi.framebuffer = scene_target->framebuffer; @@ -1885,34 +1971,32 @@ static bool record_and_submit_frame(VkRenderer* r) { rpbi.renderArea.extent.height = scene_target->height; rpbi.clearValueCount = 1; rpbi.pClearValues = &clear; - vkCmdBeginRenderPass(f->cmd, &rpbi, VK_SUBPASS_CONTENTS_INLINE); - draw_scene_pass(r, f->cmd, &snap, true, - scene_target->width, scene_target->height); - vkCmdEndRenderPass(f->cmd); + vkCmdBeginRenderPass(cmd, &rpbi, VK_SUBPASS_CONTENTS_INLINE); + draw_scene_pass(r, cmd, snap, true, scene_target->width, scene_target->height); + vkCmdEndRenderPass(cmd); - // Effect chain: source descriptor moves through ping-pong buffers. When SGSR1 is - // first, the first source is low-res and SGSR1 writes full-res output. VkOffscreen* src_offscreen = scene_target; uint32_t dst_idx = (scene_target == &r->offscreen[0]) ? 1u : 0u; - for (uint32_t i = 0; i < snap.effect_count; i++) { - bool last = (i == snap.effect_count - 1); - VkEffectSlot* eff = &snap.effects[i]; - + for (uint32_t i = 0; i < snap->effect_count; i++) { + bool last = (i == snap->effect_count - 1); + VkEffectSlot eff = snap->effects[i]; if (last) { - rpbi.renderPass = r->pipelines.swapchain_pass; - rpbi.framebuffer = r->swapchain_framebuffers[image_index]; - rpbi.renderArea.extent = r->swapchain_extent; + rpbi.renderPass = final_pass; + rpbi.framebuffer = final_fb; + rpbi.renderArea.extent.width = final_w; + rpbi.renderArea.extent.height = final_h; } else { rpbi.renderPass = r->pipelines.offscreen_pass; rpbi.framebuffer = r->offscreen[dst_idx].framebuffer; rpbi.renderArea.extent.width = r->offscreen[dst_idx].width; rpbi.renderArea.extent.height = r->offscreen[dst_idx].height; } - vkCmdBeginRenderPass(f->cmd, &rpbi, VK_SUBPASS_CONTENTS_INLINE); - uint32_t target_w = last ? r->swapchain_extent.width : rpbi.renderArea.extent.width; - uint32_t target_h = last ? r->swapchain_extent.height : rpbi.renderArea.extent.height; - run_effect(r, f->cmd, eff, src_offscreen->descriptor_set, target_w, target_h, !last); - vkCmdEndRenderPass(f->cmd); + vkCmdBeginRenderPass(cmd, &rpbi, VK_SUBPASS_CONTENTS_INLINE); + uint32_t target_w = last ? final_w : rpbi.renderArea.extent.width; + uint32_t target_h = last ? final_h : rpbi.renderArea.extent.height; + run_effect(r, cmd, &eff, src_offscreen->descriptor_set, target_w, target_h, + last ? final_offscreen : true); + vkCmdEndRenderPass(cmd); if (!last) { src_offscreen = &r->offscreen[dst_idx]; dst_idx ^= 1u; @@ -1920,16 +2004,80 @@ static bool record_and_submit_frame(VkRenderer* r) { } } else { VkRenderPassBeginInfo rpbi = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO}; - rpbi.renderPass = r->pipelines.swapchain_pass; - rpbi.framebuffer = r->swapchain_framebuffers[image_index]; - rpbi.renderArea.extent = r->swapchain_extent; + rpbi.renderPass = final_pass; + rpbi.framebuffer = final_fb; + rpbi.renderArea.extent.width = final_w; + rpbi.renderArea.extent.height = final_h; rpbi.clearValueCount = 1; rpbi.pClearValues = &clear; - vkCmdBeginRenderPass(f->cmd, &rpbi, VK_SUBPASS_CONTENTS_INLINE); - draw_scene_pass(r, f->cmd, &snap, false, - r->swapchain_extent.width, r->swapchain_extent.height); - vkCmdEndRenderPass(f->cmd); + vkCmdBeginRenderPass(cmd, &rpbi, VK_SUBPASS_CONTENTS_INLINE); + draw_scene_pass(r, cmd, snap, final_offscreen, final_w, final_h); + vkCmdEndRenderPass(cmd); } +} + +static bool record_and_submit_frame(VkRenderer* r) { + if (!r->surface_ready || !r->swapchain) return false; + + pthread_mutex_lock(&r->render_mutex); + + VkFrame* f = &r->frames[r->frame_index]; + uint32_t grave_slot = r->graveyard_index; + + vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); + + // Snapshot the scene under scene_mutex (cheap memcpy of a few KB), then release it so + // scene producers (texture destroys, X server window updates) don't stall behind the + // long acquire/record/submit/present below. render_mutex still serializes us against + // surface lifecycle changes, which keeps the swapchain handles stable for our use. + VkScene snap; + VkTexture** dead = NULL; + uint32_t dead_count = 0; + pthread_mutex_lock(&r->scene_mutex); + if (!r->surface_ready || !r->swapchain || r->swapchain_image_count == 0 + || r->swapchain_extent.width == 0 || r->swapchain_extent.height == 0) { + pthread_mutex_unlock(&r->scene_mutex); + pthread_mutex_unlock(&r->render_mutex); + return false; + } + snap = r->scene; + detach_graveyard_slot(r, grave_slot, &dead, &dead_count); + pthread_mutex_unlock(&r->scene_mutex); + destroy_graveyard_textures(r, dead, dead_count); + + SceneTargets st = manage_scene_targets(r, &snap); + + uint32_t image_index = 0; + VkResult acq = vkAcquireNextImageKHR(r->device, r->swapchain, UINT64_MAX, + f->image_available, VK_NULL_HANDLE, &image_index); + bool recreate_after_present = false; + if (acq == VK_ERROR_OUT_OF_DATE_KHR) { + r->surface_ready = false; + pthread_mutex_lock(&r->queue_mutex); + vkQueueWaitIdle(r->graphics_queue); + pthread_mutex_unlock(&r->queue_mutex); + destroy_swapchain_resources(r); + r->surface_ready = create_swapchain(r, r->surface_extent.width, r->surface_extent.height); + pthread_mutex_unlock(&r->render_mutex); + return false; + } else if (acq == VK_SUBOPTIMAL_KHR) { + if (!r->ignore_suboptimal) recreate_after_present = true; + } else if (acq != VK_SUCCESS) { + VK_LOGE("vkAcquireNextImageKHR -> %d", acq); + pthread_mutex_unlock(&r->render_mutex); + return false; + } + VkSemaphore render_finished = r->swapchain_render_finished[image_index]; + + vkResetFences(r->device, 1, &f->in_flight); + + VkCommandBufferBeginInfo bi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + vkBeginCommandBuffer(f->cmd, &bi); + + record_scene_chain(r, f->cmd, &snap, st.has_effects, st.wants_sgsr1, + r->pipelines.swapchain_pass, r->swapchain_framebuffers[image_index], + r->swapchain_extent.width, r->swapchain_extent.height, false); vkEndCommandBuffer(f->cmd); @@ -1970,6 +2118,7 @@ static bool record_and_submit_frame(VkRenderer* r) { pthread_mutex_lock(&r->queue_mutex); VkResult pr = vkQueuePresentKHR(r->graphics_queue, &pi); + if (pr == VK_SUCCESS || pr == VK_SUBOPTIMAL_KHR) r->fg_present_count++; pthread_mutex_unlock(&r->queue_mutex); bool present_suboptimal = (pr == VK_SUBOPTIMAL_KHR) && !r->ignore_suboptimal; @@ -1990,6 +2139,451 @@ static bool record_and_submit_frame(VkRenderer* r) { return true; } +// ============================================================ +// Frame generation resources + submit +// ============================================================ + +// Descriptor sets for the FG compute/interp layouts are allocated directly (not via the +// texture free-list, which assumes sampler_set_layout). The pool is externally synchronized. +static VkDescriptorSet fg_alloc_set(VkRenderer* r, VkDescriptorSetLayout layout) { + VkDescriptorSetAllocateInfo ai = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + ai.descriptorPool = r->descriptor_pool; + ai.descriptorSetCount = 1; + ai.pSetLayouts = &layout; + VkDescriptorSet set = VK_NULL_HANDLE; + pthread_mutex_lock(&r->descriptor_mutex); + VkResult res = vkAllocateDescriptorSets(r->device, &ai, &set); + pthread_mutex_unlock(&r->descriptor_mutex); + if (res != VK_SUCCESS) { VK_LOGE("fg_alloc_set failed: %d", res); return VK_NULL_HANDLE; } + return set; +} + +static void fg_free_set(VkRenderer* r, VkDescriptorSet set) { + if (set == VK_NULL_HANDLE) return; + pthread_mutex_lock(&r->descriptor_mutex); + vkFreeDescriptorSets(r->device, r->descriptor_pool, 1, &set); + pthread_mutex_unlock(&r->descriptor_mutex); +} + +static void fg_destroy_resources(VkRenderer* r) { + if (!r->device) return; + for (uint32_t p = 0; p < 2; p++) { + if (r->fg_motion_set[p]) { fg_free_set(r, r->fg_motion_set[p]); r->fg_motion_set[p] = VK_NULL_HANDLE; } + if (r->fg_interp_set[p]) { fg_free_set(r, r->fg_interp_set[p]); r->fg_interp_set[p] = VK_NULL_HANDLE; } + } + for (uint32_t i = 0; i < 2; i++) { + VkFgImage* o = &r->fg_history[i]; + if (o->blit_set) vkr_free_descriptor_set(r, o->blit_set); + if (o->framebuffer) vkDestroyFramebuffer(r->device, o->framebuffer, NULL); + if (o->view) vkDestroyImageView(r->device, o->view, NULL); + if (o->image) vkDestroyImage(r->device, o->image, NULL); + if (o->memory) vkFreeMemory(r->device, o->memory, NULL); + memset(o, 0, sizeof(*o)); + } + if (r->fg_motion.view) vkDestroyImageView(r->device, r->fg_motion.view, NULL); + if (r->fg_motion.image) vkDestroyImage(r->device, r->fg_motion.image, NULL); + if (r->fg_motion.memory) vkFreeMemory(r->device, r->fg_motion.memory, NULL); + memset(&r->fg_motion, 0, sizeof(r->fg_motion)); + if (r->fg_sampler) { vkDestroySampler(r->device, r->fg_sampler, NULL); r->fg_sampler = VK_NULL_HANDLE; } + r->fg_built = false; + r->fg_history_count = 0; + r->fg_history_curr = 0; + r->fg_dims.width = 0; + r->fg_dims.height = 0; +} + +// Full-res composited-scene history target: render target (offscreen_pass) + sampled input. +static bool fg_create_color_target(VkRenderer* r, VkFgImage* o, uint32_t w, uint32_t h) { + o->width = w; o->height = h; + VkImageCreateInfo ic = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + ic.imageType = VK_IMAGE_TYPE_2D; + ic.format = r->caps.offscreen_format; + ic.extent.width = w; ic.extent.height = h; ic.extent.depth = 1; + ic.mipLevels = 1; ic.arrayLayers = 1; + ic.samples = VK_SAMPLE_COUNT_1_BIT; + ic.tiling = VK_IMAGE_TILING_OPTIMAL; + ic.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + if (vkCreateImage(r->device, &ic, NULL, &o->image) != VK_SUCCESS) return false; + + VkMemoryRequirements mr; vkGetImageMemoryRequirements(r->device, o->image, &mr); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + ai.allocationSize = mr.size; + ai.memoryTypeIndex = vkr_find_memory_type(r, mr.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (ai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &ai, NULL, &o->memory) != VK_SUCCESS) return false; + vkBindImageMemory(r->device, o->image, o->memory, 0); + + VkImageViewCreateInfo vi = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + vi.image = o->image; vi.viewType = VK_IMAGE_VIEW_TYPE_2D; vi.format = ic.format; + vi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vi.subresourceRange.levelCount = 1; vi.subresourceRange.layerCount = 1; + if (vkCreateImageView(r->device, &vi, NULL, &o->view) != VK_SUCCESS) return false; + + VkFramebufferCreateInfo fb = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO}; + fb.renderPass = r->pipelines.offscreen_pass; + fb.attachmentCount = 1; fb.pAttachments = &o->view; + fb.width = w; fb.height = h; fb.layers = 1; + if (vkCreateFramebuffer(r->device, &fb, NULL, &o->framebuffer) != VK_SUCCESS) return false; + + // Single-binding set (sampler_set_layout) so PRESENT_LAST can reuse the blit pipeline. + o->blit_set = vkr_alloc_descriptor_set(r); + if (o->blit_set == VK_NULL_HANDLE) return false; + VkDescriptorImageInfo dii = { r->fg_sampler, o->view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkWriteDescriptorSet wr = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + wr.dstSet = o->blit_set; wr.dstBinding = 0; wr.descriptorCount = 1; + wr.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; wr.pImageInfo = &dii; + vkUpdateDescriptorSets(r->device, 1, &wr, 0, NULL); + return true; +} + +// Half-res rgba16f backward-flow field: storage (motion.comp write) + sampled (interpolate.frag). +static bool fg_create_motion(VkRenderer* r, uint32_t w, uint32_t h) { + VkFgImage* o = &r->fg_motion; + o->width = w; o->height = h; o->framebuffer = VK_NULL_HANDLE; + VkImageCreateInfo ic = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + ic.imageType = VK_IMAGE_TYPE_2D; + ic.format = VK_FORMAT_R16G16B16A16_SFLOAT; // mandatory storage format (no extended-format feature) + ic.extent.width = w; ic.extent.height = h; ic.extent.depth = 1; + ic.mipLevels = 1; ic.arrayLayers = 1; + ic.samples = VK_SAMPLE_COUNT_1_BIT; + ic.tiling = VK_IMAGE_TILING_OPTIMAL; + ic.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + if (vkCreateImage(r->device, &ic, NULL, &o->image) != VK_SUCCESS) return false; + + VkMemoryRequirements mr; vkGetImageMemoryRequirements(r->device, o->image, &mr); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + ai.allocationSize = mr.size; + ai.memoryTypeIndex = vkr_find_memory_type(r, mr.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (ai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &ai, NULL, &o->memory) != VK_SUCCESS) return false; + vkBindImageMemory(r->device, o->image, o->memory, 0); + + VkImageViewCreateInfo vi = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + vi.image = o->image; vi.viewType = VK_IMAGE_VIEW_TYPE_2D; vi.format = ic.format; + vi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vi.subresourceRange.levelCount = 1; vi.subresourceRange.layerCount = 1; + if (vkCreateImageView(r->device, &vi, NULL, &o->view) != VK_SUCCESS) return false; + return true; +} + +static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { + VkSamplerCreateInfo si = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + si.magFilter = VK_FILTER_LINEAR; si.minFilter = VK_FILTER_LINEAR; + si.addressModeU = si.addressModeV = si.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + if (vkCreateSampler(r->device, &si, NULL, &r->fg_sampler) != VK_SUCCESS) goto fail; + + if (!fg_create_color_target(r, &r->fg_history[0], w, h)) goto fail; + if (!fg_create_color_target(r, &r->fg_history[1], w, h)) goto fail; + if (!fg_create_motion(r, (w / 2) ? (w / 2) : 1u, (h / 2) ? (h / 2) : 1u)) goto fail; + + for (uint32_t p = 0; p < 2; p++) { + r->fg_motion_set[p] = fg_alloc_set(r, r->pipelines.fg_motion_layout); + r->fg_interp_set[p] = fg_alloc_set(r, r->pipelines.fg_interp_layout); + if (!r->fg_motion_set[p] || !r->fg_interp_set[p]) goto fail; + + VkImageView prevV = r->fg_history[1u - p].view; // parity p => curr=history[p], prev=history[1-p] + VkImageView currV = r->fg_history[p].view; + + // motion.comp set: b0 prev (sampled), b1 curr (sampled), b2 motion (storage, GENERAL) + VkDescriptorImageInfo mPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo mCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo mMv = { VK_NULL_HANDLE, r->fg_motion.view, VK_IMAGE_LAYOUT_GENERAL }; + VkWriteDescriptorSet mw_[3] = {0}; + for (int b = 0; b < 3; b++) { mw_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; mw_[b].dstSet = r->fg_motion_set[p]; mw_[b].dstBinding = (uint32_t)b; mw_[b].descriptorCount = 1; } + mw_[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; mw_[0].pImageInfo = &mPrev; + mw_[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; mw_[1].pImageInfo = &mCurr; + mw_[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; mw_[2].pImageInfo = &mMv; + vkUpdateDescriptorSets(r->device, 3, mw_, 0, NULL); + + // interpolate.frag set: b0 prev, b1 curr, b2 motion — all sampled (SHADER_READ). + VkDescriptorImageInfo iPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo iCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo iMv = { r->fg_sampler, r->fg_motion.view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkWriteDescriptorSet iw_[3] = {0}; + for (int b = 0; b < 3; b++) { iw_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; iw_[b].dstSet = r->fg_interp_set[p]; iw_[b].dstBinding = (uint32_t)b; iw_[b].descriptorCount = 1; iw_[b].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; } + iw_[0].pImageInfo = &iPrev; iw_[1].pImageInfo = &iCurr; iw_[2].pImageInfo = &iMv; + vkUpdateDescriptorSets(r->device, 3, iw_, 0, NULL); + } + + r->fg_dims.width = w; r->fg_dims.height = h; + r->fg_history_curr = 0; + r->fg_history_count = 0; + r->fg_motion_valid = false; // freshly created motion image — force a recompute before reuse + r->fg_built = true; + return true; + +fail: + VK_LOGE("fg_create_resources failed (%ux%u)", w, h); + fg_destroy_resources(r); + return false; +} + +// Build/rebuild FG images for the current swapchain extent. Caller holds render_mutex. +static bool fg_ensure_resources(VkRenderer* r) { + if (r->swapchain_extent.width == 0 || r->swapchain_extent.height == 0) return false; + if (!r->pipelines_built) return false; + if (r->fg_built + && r->fg_dims.width == r->swapchain_extent.width + && r->fg_dims.height == r->swapchain_extent.height) { + return true; + } + wait_inflight_frames(r); + fg_destroy_resources(r); + return fg_create_resources(r, r->swapchain_extent.width, r->swapchain_extent.height); +} + +// Restore a frame fence to the signaled state after a submit failure (so the next frame that +// reuses this index does not block forever on an unsignaled fence). +static void fg_restore_fence(VkRenderer* r, VkFrame* f) { + vkDestroyFence(r->device, f->in_flight, NULL); + VkFenceCreateInfo rfi = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; + rfi.flags = VK_FENCE_CREATE_SIGNALED_BIT; + if (vkCreateFence(r->device, &rfi, NULL, &f->in_flight) != VK_SUCCESS) { + f->in_flight = VK_NULL_HANDLE; + VK_LOGE("Failed to recreate frame fence after FG submit failure"); + } +} + +// Diagnostic cadence counters (render-thread only). Logged ~once/sec to verify FG is producing +// ~2 presents (interp + held real) per engine frame (hold). Cheap; safe to leave in. +static uint64_t g_fg_holds = 0; +static uint64_t g_fg_interp = 0; +static uint64_t g_fg_plast = 0; + +// FG submit. HOLD renders the scene into the history ring (no present); INTERP synthesizes and +// presents an in-between frame; PRESENT_LAST presents the held real frame. Fully serialized. +static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { + if (!r->surface_ready || !r->swapchain) return false; + pthread_mutex_lock(&r->render_mutex); + if (!r->surface_ready || !r->swapchain || r->swapchain_image_count == 0 + || r->swapchain_extent.width == 0 || r->swapchain_extent.height == 0 + || !r->pipelines_built) { + pthread_mutex_unlock(&r->render_mutex); + return false; + } + + VkFrame* f = &r->frames[r->frame_index]; + vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); + + if (!fg_ensure_resources(r)) { pthread_mutex_unlock(&r->render_mutex); return false; } + + // Drain: the 2-slot history ring is written by HOLD and read by INTERP; serialize to avoid aliasing. + wait_inflight_frames(r); + + VkCommandBufferBeginInfo bi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + // -------- HOLD: render composited scene into history[curr'] (no present) -------- + if (mode == FG_MODE_HOLD) { + VkScene snap; + VkTexture** dead = NULL; uint32_t dead_count = 0; + pthread_mutex_lock(&r->scene_mutex); + snap = r->scene; + detach_graveyard_slot(r, r->graveyard_index, &dead, &dead_count); + pthread_mutex_unlock(&r->scene_mutex); + destroy_graveyard_textures(r, dead, dead_count); + + SceneTargets st = manage_scene_targets(r, &snap); + uint32_t next = r->fg_history_curr ^ 1u; + VkFgImage* hist = &r->fg_history[next]; + + vkResetFences(r->device, 1, &f->in_flight); + vkBeginCommandBuffer(f->cmd, &bi); + record_scene_chain(r, f->cmd, &snap, st.has_effects, st.wants_sgsr1, + r->pipelines.offscreen_pass, hist->framebuffer, + hist->width, hist->height, true); + vkEndCommandBuffer(f->cmd); + + VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + si.commandBufferCount = 1; si.pCommandBuffers = &f->cmd; + pthread_mutex_lock(&r->queue_mutex); + VkResult sr = vkQueueSubmit(r->graphics_queue, 1, &si, f->in_flight); + pthread_mutex_unlock(&r->queue_mutex); + if (sr != VK_SUCCESS) { + VK_LOGE("fg HOLD submit -> %d", sr); + fg_restore_fence(r, f); + pthread_mutex_unlock(&r->render_mutex); + return false; + } + r->fg_history_curr = next; + if (r->fg_history_count < 2) r->fg_history_count++; + r->fg_motion_valid = false; // new history pair — flow must be recomputed on the next interp + g_fg_holds++; + pthread_mutex_unlock(&r->render_mutex); + r->frame_index = (r->frame_index + 1) % VK_FRAMES_IN_FLIGHT; + r->graveyard_index = (r->graveyard_index + 1) % (VK_FRAMES_IN_FLIGHT + 1); + return true; + } + + // -------- INTERP / PRESENT_LAST: acquire swapchain image, present -------- + bool do_interp = (mode == FG_MODE_INTERP) && (r->fg_history_count >= 2); + + // Interps are optional: under a non-blocking mode acquire without waiting, so a panel that can't run + // ahead skips the synthetic frame instead of stalling. PRESENT_LAST always blocks (never dropped). + uint64_t acq_timeout = (do_interp && r->active_present_mode != VK_PRESENT_MODE_FIFO_KHR) + ? 0u : UINT64_MAX; + uint32_t image_index = 0; + VkResult acq = vkAcquireNextImageKHR(r->device, r->swapchain, acq_timeout, + f->image_available, VK_NULL_HANDLE, &image_index); + bool recreate_after_present = false; + if (acq == VK_NOT_READY || acq == VK_TIMEOUT) { + // No free image right now — drop this interpolated frame (not an error). + pthread_mutex_unlock(&r->render_mutex); + return false; + } + if (acq == VK_ERROR_OUT_OF_DATE_KHR) { + r->surface_ready = false; + pthread_mutex_lock(&r->queue_mutex); vkQueueWaitIdle(r->graphics_queue); pthread_mutex_unlock(&r->queue_mutex); + fg_destroy_resources(r); + destroy_swapchain_resources(r); + r->surface_ready = create_swapchain(r, r->surface_extent.width, r->surface_extent.height); + pthread_mutex_unlock(&r->render_mutex); + return false; + } else if (acq == VK_SUBOPTIMAL_KHR) { + if (!r->ignore_suboptimal) recreate_after_present = true; + } else if (acq != VK_SUCCESS) { + VK_LOGE("fg acquire -> %d", acq); + pthread_mutex_unlock(&r->render_mutex); + return false; + } + VkSemaphore render_finished = r->swapchain_render_finished[image_index]; + vkResetFences(r->device, 1, &f->in_flight); + + uint32_t parity = r->fg_history_curr; + VkFgImage* curr = &r->fg_history[parity]; + + vkBeginCommandBuffer(f->cmd, &bi); + + if (do_interp) { + VkFgImage* prev = &r->fg_history[parity ^ 1u]; + // Make the HOLD color writes visible to the reads below (compute when recomputing flow, + // else just the fragment interp draw). + VkPipelineStageFlags hist_dst = r->fg_motion_valid + ? VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + vkr_image_barrier(f->cmd, prev->image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkr_image_barrier(f->cmd, curr->image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + if (!r->fg_motion_valid) { + // motion field -> GENERAL, dispatch block matching. + vkr_image_barrier(f->cmd, r->fg_motion.image, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, VK_ACCESS_SHADER_WRITE_BIT); + vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipeline); + vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->pipelines.fg_motion_pipe_layout, 0, 1, &r->fg_motion_set[parity], 0, NULL); + struct { int32_t mvW, mvH; float invW, invH, mvScale, minStep, p1, p2; } mpc; + mpc.mvW = (int32_t)r->fg_motion.width; mpc.mvH = (int32_t)r->fg_motion.height; + mpc.invW = 1.0f / (float)r->fg_motion.width; + mpc.invH = 1.0f / (float)r->fg_motion.height; + mpc.mvScale = 1.0f; mpc.minStep = (float)r->fg_min_step; mpc.p1 = mpc.p2 = 0.0f; + vkCmdPushConstants(f->cmd, r->pipelines.fg_motion_pipe_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(mpc), &mpc); + vkCmdDispatch(f->cmd, (r->fg_motion.width + 7u) / 8u, (r->fg_motion.height + 7u) / 8u, 1); + // motion field -> SHADER_READ for the interpolation draw. + vkr_image_barrier(f->cmd, r->fg_motion.image, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + r->fg_motion_valid = true; + } else { + // Flow reused from this pair's first interp (multi-frame 4x/6x). Re-establish the + // compute-write -> fragment-read dependency in this submit; no layout change. + vkr_image_barrier(f->cmd, r->fg_motion.image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + } + } + + VkClearValue clear = {0}; + clear.color.float32[3] = 1.0f; + VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO}; + rp.renderPass = r->pipelines.swapchain_pass; + rp.framebuffer = r->swapchain_framebuffers[image_index]; + rp.renderArea.extent = r->swapchain_extent; + rp.clearValueCount = 1; rp.pClearValues = &clear; + vkCmdBeginRenderPass(f->cmd, &rp, VK_SUBPASS_CONTENTS_INLINE); + + VkViewport vp = {0, 0, (float)r->swapchain_extent.width, (float)r->swapchain_extent.height, 0.0f, 1.0f}; + VkRect2D scis = {{0, 0}, r->swapchain_extent}; + vkCmdSetViewport(f->cmd, 0, 1, &vp); + vkCmdSetScissor(f->cmd, 0, 1, &scis); + + if (do_interp) { + vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, r->pipelines.fg_interp_pipeline); + vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + r->pipelines.fg_interp_pipe_layout, 0, 1, &r->fg_interp_set[parity], 0, NULL); + struct { float resW, resH, phase, occLo, occHi, pad; } ipc; + ipc.resW = (float)r->swapchain_extent.width; ipc.resH = (float)r->swapchain_extent.height; + ipc.phase = phase; ipc.occLo = r->fg_occ_lo; ipc.occHi = r->fg_occ_hi; ipc.pad = 0.0f; + vkCmdPushConstants(f->cmd, r->pipelines.fg_interp_pipe_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + 0, sizeof(ipc), &ipc); + vkCmdDraw(f->cmd, 3, 1, 0, 0); + } else { + // PRESENT_LAST or interp-not-ready fallback: blit the latest real frame. + vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, r->pipelines.blit_pipeline); + vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + r->pipelines.effect_layout, 0, 1, &curr->blit_set, 0, NULL); + vkCmdDraw(f->cmd, 3, 1, 0, 0); + } + vkCmdEndRenderPass(f->cmd); + vkEndCommandBuffer(f->cmd); + + VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + si.waitSemaphoreCount = 1; si.pWaitSemaphores = &f->image_available; + si.pWaitDstStageMask = &wait_stage; + si.commandBufferCount = 1; si.pCommandBuffers = &f->cmd; + si.signalSemaphoreCount = 1; si.pSignalSemaphores = &render_finished; + pthread_mutex_lock(&r->queue_mutex); + VkResult sr = vkQueueSubmit(r->graphics_queue, 1, &si, f->in_flight); + pthread_mutex_unlock(&r->queue_mutex); + if (sr != VK_SUCCESS) { + VK_LOGE("fg present submit -> %d", sr); + fg_restore_fence(r, f); + pthread_mutex_unlock(&r->render_mutex); + return false; + } + + VkPresentInfoKHR pinfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR}; + pinfo.waitSemaphoreCount = 1; pinfo.pWaitSemaphores = &render_finished; + pinfo.swapchainCount = 1; pinfo.pSwapchains = &r->swapchain; pinfo.pImageIndices = &image_index; + pthread_mutex_lock(&r->queue_mutex); + VkResult pr = vkQueuePresentKHR(r->graphics_queue, &pinfo); + if (pr == VK_SUCCESS || pr == VK_SUBOPTIMAL_KHR) { + r->fg_present_count++; + if (do_interp) g_fg_interp++; else g_fg_plast++; + if (((g_fg_interp + g_fg_plast) % 120u) == 0u) { + VK_LOGI("FG cadence: holds=%llu interp=%llu presentLast=%llu presents=%llu", + (unsigned long long)g_fg_holds, (unsigned long long)g_fg_interp, + (unsigned long long)g_fg_plast, (unsigned long long)r->fg_present_count); + } + } + pthread_mutex_unlock(&r->queue_mutex); + + bool present_suboptimal = (pr == VK_SUBOPTIMAL_KHR) && !r->ignore_suboptimal; + if (recreate_after_present || pr == VK_ERROR_OUT_OF_DATE_KHR || present_suboptimal) { + r->surface_ready = false; + pthread_mutex_lock(&r->queue_mutex); vkQueueWaitIdle(r->graphics_queue); pthread_mutex_unlock(&r->queue_mutex); + fg_destroy_resources(r); + destroy_swapchain_resources(r); + r->surface_ready = create_swapchain(r, r->surface_extent.width, r->surface_extent.height); + } + pthread_mutex_unlock(&r->render_mutex); + r->frame_index = (r->frame_index + 1) % VK_FRAMES_IN_FLIGHT; + return true; +} + // ============================================================ // JNI entry points // ============================================================ @@ -2004,6 +2598,10 @@ JNIEXPORT jlong JNICALL JNI_FN(nativeCreate)(JNIEnv* env, jclass clazz, VkRenderer* r = calloc(1, sizeof(VkRenderer)); if (!r) return 0; r->target_present_mode = VK_PRESENT_MODE_FIFO_KHR; + r->active_present_mode = VK_PRESENT_MODE_FIFO_KHR; + r->fg_occ_lo = 0.06f; + r->fg_occ_hi = 0.25f; + r->fg_min_step = 1; r->validation_enabled = (enableValidationLayers == JNI_TRUE); pthread_mutex_init(&r->scene_mutex, NULL); pthread_mutex_init(&r->queue_mutex, NULL); @@ -2085,6 +2683,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeDestroy)(JNIEnv* env, jclass clazz, jlong ha free(r->batch_entry_scratch); free(r->batch_prepared_scratch); + fg_destroy_resources(r); destroy_sgsr1_resources(r); destroy_offscreen(r); destroy_swapchain(r); @@ -2229,6 +2828,83 @@ JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderFrame)(JNIEnv* env, jclass clazz, return record_and_submit_frame(r) ? JNI_TRUE : JNI_FALSE; } +// ---- Frame generation JNI ---- + +JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGeneration)(JNIEnv* env, jclass clazz, jlong handle, jboolean enabled) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return; + r->fg_enabled = (enabled == JNI_TRUE); + VK_LOGI("Frame generation %s (fp16=%d)", r->fg_enabled ? "ENABLED" : "disabled", r->fg_float16_supported); +} + +// Present mode actually in use (Java convention: 0 FIFO, 1 MAILBOX, 2 IMMEDIATE). FG uses this to +// know whether presents are non-blocking — only then may it post above the panel's idle refresh. +JNIEXPORT jint JNICALL JNI_FN(nativeGetActivePresentMode)(JNIEnv* env, jclass clazz, jlong handle) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return 0; + switch (r->active_present_mode) { + case VK_PRESENT_MODE_MAILBOX_KHR: return 1; + case VK_PRESENT_MODE_IMMEDIATE_KHR: return 2; + default: return 0; + } +} + +JNIEXPORT jboolean JNICALL JNI_FN(nativeFrameGenerationSupported)(JNIEnv* env, jclass clazz, jlong handle) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + // Compute + rgba16f storage are universal on Vulkan 1.1 Android GPUs, and the fp32 motion + // shader covers devices without shaderFloat16, so FG is effectively always available. + return (r != NULL) ? JNI_TRUE : JNI_FALSE; +} + +// Monotonic count of actual vkQueuePresentKHR calls (real + interpolated). The HUD derives +// Display FPS from deltas of this; Engine FPS stays on the X11-Present path in Java. +JNIEXPORT jlong JNICALL JNI_FN(nativeGetDisplayFrameCount)(JNIEnv* env, jclass clazz, jlong handle) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return 0; + pthread_mutex_lock(&r->queue_mutex); + uint64_t c = r->fg_present_count; + pthread_mutex_unlock(&r->queue_mutex); + return (jlong)c; +} + +JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderHold)(JNIEnv* env, jclass clazz, jlong handle) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r || !r->surface_ready) return JNI_FALSE; + return fg_submit(r, FG_MODE_HOLD, 0.5f) ? JNI_TRUE : JNI_FALSE; +} + +JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderInterp)(JNIEnv* env, jclass clazz, jlong handle, jfloat phase) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r || !r->surface_ready) return JNI_FALSE; + return fg_submit(r, FG_MODE_INTERP, (float)phase) ? JNI_TRUE : JNI_FALSE; +} + +JNIEXPORT jboolean JNICALL JNI_FN(nativePresentLast)(JNIEnv* env, jclass clazz, jlong handle) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r || !r->surface_ready) return JNI_FALSE; + return fg_submit(r, FG_MODE_PRESENT_LAST, 0.5f) ? JNI_TRUE : JNI_FALSE; +} + +// Live FG knobs: occLo/occHi = interp consistency window (smoothness), minStep = motion search floor. +JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenParams)(JNIEnv* env, jclass clazz, jlong handle, + jfloat occLo, jfloat occHi, jint minStep) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return; + float lo = occLo > 0.0f ? occLo : 0.06f; + float hi = occHi > lo ? occHi : (lo + 0.1f); + r->fg_occ_lo = lo; + r->fg_occ_hi = hi; + r->fg_min_step = minStep < 1 ? 1 : (minStep > 8 ? 8 : minStep); +} + // Scene byte buffer layout (must mirror VulkanRenderer.java offsets). Native-endian, packed. // Using a single direct ByteBuffer instead of 6 separate jarray params avoids per-frame JNI // critical regions (each ~3-8µs on ART) and the temporary array shadow allocations they diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index 26d13730c..99d2abe10 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -177,6 +177,14 @@ typedef struct VkPipelineSet { // Render passes VkRenderPass swapchain_pass; // load=clear, store=store, final=present VkRenderPass offscreen_pass; // load=clear, store=store, final=shader-read + + // --- Frame generation (created once with the rest; persist across swapchain rebuilds) --- + VkDescriptorSetLayout fg_motion_layout; // set0: binding0,1 sampler(prev,curr) + binding2 STORAGE_IMAGE(mv), COMPUTE + VkDescriptorSetLayout fg_interp_layout; // set0: 3x COMBINED_IMAGE_SAMPLER (prev,curr,mv), FRAGMENT + VkPipelineLayout fg_motion_pipe_layout; // [motion] set + 32B compute push range + VkPipelineLayout fg_interp_pipe_layout; // [interp] set + 24B fragment push range + VkPipeline fg_motion_pipeline; // compute (block matching) + VkPipeline fg_interp_pipeline; // graphics (interpolation, swapchain_pass) } VkPipelineSet; // ============================================================ @@ -210,6 +218,16 @@ typedef struct VkSgsr1State { uint32_t height; } VkSgsr1State; +// A history frame (full res, render target + sampled) or the half-res motion field (rgba16f). +typedef struct VkFgImage { + VkImage image; + VkImageView view; + VkDeviceMemory memory; + VkFramebuffer framebuffer; // history targets only; VK_NULL_HANDLE for the motion field + VkDescriptorSet blit_set; // history only: single-binding set (sampler_set_layout) for present + uint32_t width, height; +} VkFgImage; + // ============================================================ // Staging pool for async texture uploads // ============================================================ @@ -358,6 +376,24 @@ typedef struct VkRenderer { bool offscreen_built; VkSgsr1State sgsr1; + // --- Frame generation --- + bool fg_enabled; + bool fg_float16_supported; // shaderFloat16 available (selects the fp16 motion shader) + bool fg_built; // history + motion images allocated at fg_dims + VkExtent2D fg_dims; // extent the fg images were built for + VkFgImage fg_history[2]; // composited-scene ring; fg_history_curr = newest + VkFgImage fg_motion; // rgba16f half-res backward-flow field + VkSampler fg_sampler; // linear, clamp — for all fg sampled reads + VkDescriptorSet fg_motion_set[2]; // [parity] prev,curr samplers + motion storage (motion.comp) + VkDescriptorSet fg_interp_set[2]; // [parity] prev,curr,motion samplers (interpolate.frag) + uint32_t fg_history_curr; // parity (0/1) of the most-recent composited frame + uint32_t fg_history_count; // 0,1,2 — valid history frames + uint64_t fg_present_count; // actual vkQueuePresentKHR calls; guarded by queue_mutex + bool fg_motion_valid; // motion field current for the live history pair (reused across multi-interp) + float fg_occ_lo; // interpolate.frag consistency lower bound (smoothness) + float fg_occ_hi; // interpolate.frag consistency upper bound (smoothness) + int32_t fg_min_step; // motion.comp lowest TSS step (quality preset; 1 = full search) + // Quad vertex buffer (window/cursor) VkBuffer quad_vbo; VkDeviceMemory quad_vbo_memory; @@ -424,6 +460,9 @@ typedef struct VkRenderer { // Compositor present mode requested by Java (default FIFO). Validated against // device-supported modes in create_swapchain; falls back to FIFO if unavailable. VkPresentModeKHR target_present_mode; + // Present mode actually selected by the last create_swapchain (target may fall back). Read by + // Java (nativeGetActivePresentMode) so frame generation knows whether presents are non-blocking. + VkPresentModeKHR active_present_mode; } VkRenderer; // ============================================================ diff --git a/app/src/main/feature/settings/other/OtherSettingsFragment.kt b/app/src/main/feature/settings/other/OtherSettingsFragment.kt index a80ad8f82..f0d81059b 100644 --- a/app/src/main/feature/settings/other/OtherSettingsFragment.kt +++ b/app/src/main/feature/settings/other/OtherSettingsFragment.kt @@ -147,6 +147,10 @@ class OtherSettingsFragment : Fragment() { preferences.edit { putBoolean("xinput_toggle", checked) } refresh() }, + onNativeFrameGenChanged = { checked -> + preferences.edit { putBoolean("native_frame_generation", checked) } + refresh() + }, onEnableFileProviderChanged = { checked -> preferences.edit { putBoolean("enable_file_provider", checked) } WinToast.show(ctx, R.string.settings_general_take_effect_next_startup) @@ -228,6 +232,7 @@ class OtherSettingsFragment : Fragment() { .coerceIn(10, 300), cursorLock = preferences.getBoolean("cursor_lock", false), xinputDisabled = preferences.getBoolean("xinput_toggle", false), + nativeFrameGen = preferences.getBoolean("native_frame_generation", false), enableFileProvider = preferences.getBoolean("enable_file_provider", true), openInBrowser = preferences.getBoolean("open_with_android_browser", false), shareClipboard = preferences.getBoolean("share_android_clipboard", false), diff --git a/app/src/main/feature/settings/other/OtherSettingsScreen.kt b/app/src/main/feature/settings/other/OtherSettingsScreen.kt index 1fad7b04c..653b27db4 100644 --- a/app/src/main/feature/settings/other/OtherSettingsScreen.kt +++ b/app/src/main/feature/settings/other/OtherSettingsScreen.kt @@ -105,6 +105,7 @@ data class OtherSettingsState( val cursorSpeedPercent: Int = 100, val cursorLock: Boolean = false, val xinputDisabled: Boolean = false, + val nativeFrameGen: Boolean = false, val enableFileProvider: Boolean = true, val openInBrowser: Boolean = false, val shareClipboard: Boolean = false, @@ -147,6 +148,7 @@ fun OtherSettingsScreen( onCursorSpeedChanged: (Int) -> Unit, onCursorLockChanged: (Boolean) -> Unit, onXinputDisabledChanged: (Boolean) -> Unit, + onNativeFrameGenChanged: (Boolean) -> Unit, onEnableFileProviderChanged: (Boolean) -> Unit, onOpenInBrowserChanged: (Boolean) -> Unit, onShareClipboardChanged: (Boolean) -> Unit, @@ -278,6 +280,16 @@ fun OtherSettingsScreen( ) } + item(key = "native_frame_gen_card") { + SettingsToggleCard( + title = stringResource(R.string.settings_other_frame_gen_title), + subtitle = stringResource(R.string.settings_other_frame_gen_summary), + icon = Icons.Outlined.Speed, + checked = state.nativeFrameGen, + onCheckedChange = onNativeFrameGenChanged, + ) + } + item(key = "integration_section") { SectionLabel(stringResource(R.string.settings_other_section_integration), modifier = Modifier.padding(top = 8.dp)) } diff --git a/app/src/main/res/values-da/strings.xml b/app/src/main/res/values-da/strings.xml index bce9c0299..e6ea8c0dd 100644 --- a/app/src/main/res/values-da/strings.xml +++ b/app/src/main/res/values-da/strings.xml @@ -1319,4 +1319,13 @@ Installeret sti: Indlæser Workshop-elementer Søg i Workshop-elementer Mislykket + Frame Gen + Multiplikator + Kvalitetsforudindstilling + Ydelse + Balanceret + Kvalitet + Jævnhed + Native Frame Generation + Indsæt interpolerede frames i compositoren for at gøre bevægelser mere flydende. Virker med alle spil (ingen spildata nødvendig) og tilføjer en smule input-latens. diff --git a/app/src/main/res/values-de/strings.xml b/app/src/main/res/values-de/strings.xml index 7f1ca96d1..32a1d2438 100644 --- a/app/src/main/res/values-de/strings.xml +++ b/app/src/main/res/values-de/strings.xml @@ -1319,4 +1319,13 @@ Installierter Pfad: Workshop-Elemente werden geladen Workshop-Elemente suchen Fehlgeschlagen + Frame Gen + Multiplikator + Qualitätsvoreinstellung + Leistung + Ausgewogen + Qualität + Glättung + Native Frame Generation + Fügt im Compositor interpolierte Zwischenbilder ein, um Bewegungen flüssiger darzustellen. Funktioniert mit jedem Spiel (keine Spieldaten erforderlich) und erhöht die Eingabelatenz geringfügig. diff --git a/app/src/main/res/values-es/strings.xml b/app/src/main/res/values-es/strings.xml index e6c6b53c0..927ffa928 100644 --- a/app/src/main/res/values-es/strings.xml +++ b/app/src/main/res/values-es/strings.xml @@ -1319,5 +1319,14 @@ Ruta instalada: Cargando elementos de Workshop Buscar elementos de Workshop Fallido + Frame Gen + Multiplicador + Perfil de calidad + Rendimiento + Equilibrado + Calidad + Suavidad + Generación de fotogramas nativa + Inserta fotogramas interpolados en el compositor para suavizar el movimiento. Funciona con cualquier juego (sin necesidad de datos del juego) y añade una pequeña cantidad de latencia de entrada. diff --git a/app/src/main/res/values-fr/strings.xml b/app/src/main/res/values-fr/strings.xml index 0107079aa..e017e1bc5 100644 --- a/app/src/main/res/values-fr/strings.xml +++ b/app/src/main/res/values-fr/strings.xml @@ -1319,5 +1319,14 @@ Chemin installé : Chargement des éléments du Workshop Rechercher des éléments du Workshop Échec + Frame Gen + Multiplicateur + Préréglage de qualité + Performance + Équilibré + Qualité + Fluidité + Génération d\'images native + Insère des images interpolées dans le compositeur pour fluidifier les mouvements. Compatible avec tous les jeux (aucune donnée de jeu requise) et ajoute une légère latence d\'entrée. diff --git a/app/src/main/res/values-hi/strings.xml b/app/src/main/res/values-hi/strings.xml index 06b9da9e4..b7a1e4efe 100644 --- a/app/src/main/res/values-hi/strings.xml +++ b/app/src/main/res/values-hi/strings.xml @@ -1256,4 +1256,13 @@ Workshop आइटम लोड हो रहे हैं Workshop आइटम खोजें विफल + फ्रेम जेन + मल्टीप्लायर + क्वालिटी प्रीसेट + परफॉर्मेंस + बैलेंस्ड + क्वालिटी + स्मूदनेस + नेटिव फ्रेम जनरेशन + मोशन को स्मूद बनाने के लिए कंपोजिटर में इंटरपोलेटेड फ्रेम जोड़ें। यह किसी भी गेम के साथ काम करता है (किसी गेम डेटा की ज़रूरत नहीं) और इनपुट लेटेंसी थोड़ी बढ़ा देता है। diff --git a/app/src/main/res/values-it/strings.xml b/app/src/main/res/values-it/strings.xml index e4a4c2de2..81a86f5a3 100644 --- a/app/src/main/res/values-it/strings.xml +++ b/app/src/main/res/values-it/strings.xml @@ -1319,5 +1319,14 @@ Percorso installato: Caricamento elementi Workshop Cerca elementi Workshop Non riuscito + Frame Gen + Moltiplicatore + Preset qualità + Prestazioni + Bilanciato + Qualità + Fluidità + Frame Generation nativa + Inserisce frame interpolati nel compositor per rendere il movimento più fluido. Funziona con qualsiasi gioco (senza bisogno dei dati del gioco) e aggiunge una piccola quantità di latenza di input. diff --git a/app/src/main/res/values-ko/strings.xml b/app/src/main/res/values-ko/strings.xml index 805a6643f..7a300da5e 100644 --- a/app/src/main/res/values-ko/strings.xml +++ b/app/src/main/res/values-ko/strings.xml @@ -1320,5 +1320,14 @@ Workshop 항목 로드 중 Workshop 항목 검색 실패 + 프레임 생성 + 배수 + 품질 프리셋 + 성능 + 균형 + 품질 + 부드러움 + 네이티브 프레임 생성 + 컴포지터에서 보간 프레임을 삽입하여 움직임을 부드럽게 만듭니다. 모든 게임에서 작동하며(게임 데이터가 필요 없음) 약간의 입력 지연이 추가됩니다. diff --git a/app/src/main/res/values-pl/strings.xml b/app/src/main/res/values-pl/strings.xml index 28edba29a..be8912389 100644 --- a/app/src/main/res/values-pl/strings.xml +++ b/app/src/main/res/values-pl/strings.xml @@ -1325,5 +1325,14 @@ Zainstalowana ścieżka: Ładowanie elementów Workshop Szukaj elementów Workshop Niepowodzenie + Gen. klatek + Mnożnik + Ustawienie jakości + Wydajność + Zrównoważone + Jakość + Płynność + Natywne generowanie klatek (Frame Generation) + Wstawia interpolowane klatki w kompozytorze, aby wygładzić ruch. Działa z każdą grą (nie wymaga danych gry) i nieznacznie zwiększa opóźnienie sterowania. diff --git a/app/src/main/res/values-pt-rBR/strings.xml b/app/src/main/res/values-pt-rBR/strings.xml index 65afacbd3..2aa3b508b 100644 --- a/app/src/main/res/values-pt-rBR/strings.xml +++ b/app/src/main/res/values-pt-rBR/strings.xml @@ -1319,5 +1319,14 @@ Caminho instalado: Carregando itens do Workshop Buscar itens do Workshop Falhou + Geração de Quadros + Multiplicador + Predefinição de qualidade + Desempenho + Equilibrado + Qualidade + Suavidade + Geração de Quadros Nativa + Insere quadros interpolados no compositor para suavizar o movimento. Funciona com qualquer jogo (não precisa de dados do jogo) e adiciona uma pequena quantidade de latência de entrada. diff --git a/app/src/main/res/values-ro/strings.xml b/app/src/main/res/values-ro/strings.xml index 0c9d2b58f..a445a7fd5 100644 --- a/app/src/main/res/values-ro/strings.xml +++ b/app/src/main/res/values-ro/strings.xml @@ -1319,5 +1319,14 @@ Cale instalata: Se încarcă elementele Workshop Caută elemente Workshop Eșuat + Gen. cadre + Multiplicator + Presetare de calitate + Performanță + Echilibrat + Calitate + Fluiditate + Generare nativă de cadre (Frame Generation) + Inserează cadre interpolate în compozitor pentru a fluidiza mișcarea. Funcționează cu orice joc (nu necesită date despre joc) și adaugă o cantitate mică de latență a comenzilor. diff --git a/app/src/main/res/values-ru/strings.xml b/app/src/main/res/values-ru/strings.xml index f998edbf5..4a6f552d9 100644 --- a/app/src/main/res/values-ru/strings.xml +++ b/app/src/main/res/values-ru/strings.xml @@ -1225,4 +1225,13 @@ Загрузка элементов Workshop Поиск элементов Workshop Ошибка + Кадры + Множитель + Пресет качества + Производительность + Сбалансированный + Качество + Плавность + Нативная генерация кадров + Вставка интерполированных кадров в композиторе для плавности движения. Работает с любой игрой (данные игры не нужны) и немного увеличивает задержку ввода. diff --git a/app/src/main/res/values-uk/strings.xml b/app/src/main/res/values-uk/strings.xml index 6e4007967..11a1a5de6 100644 --- a/app/src/main/res/values-uk/strings.xml +++ b/app/src/main/res/values-uk/strings.xml @@ -1325,5 +1325,14 @@ Завантаження елементів Workshop Пошук елементів Workshop Помилка + Кадри + Множник + Налаштування якості + Продуктивність + Збалансовано + Якість + Плавність + Нативна генерація кадрів + Вставляє інтерпольовані кадри в композитор для плавнішого руху. Працює з будь-якою грою (дані гри не потрібні) та трохи збільшує затримку вводу. diff --git a/app/src/main/res/values-zh-rCN/strings.xml b/app/src/main/res/values-zh-rCN/strings.xml index dfd008176..e834c227f 100644 --- a/app/src/main/res/values-zh-rCN/strings.xml +++ b/app/src/main/res/values-zh-rCN/strings.xml @@ -1319,5 +1319,14 @@ 正在加载 Workshop 项目 搜索 Workshop 项目 失败 + 帧生成 + 倍率 + 质量预设 + 性能 + 均衡 + 质量 + 流畅度 + 原生帧生成 + 在合成器中插入插值帧以使画面运动更流畅。适用于任何游戏(无需游戏数据),并会增加少量输入延迟。 diff --git a/app/src/main/res/values-zh-rTW/strings.xml b/app/src/main/res/values-zh-rTW/strings.xml index 692da34a7..7bb4104c4 100644 --- a/app/src/main/res/values-zh-rTW/strings.xml +++ b/app/src/main/res/values-zh-rTW/strings.xml @@ -1319,5 +1319,14 @@ 正在載入 Workshop 項目 搜尋 Workshop 項目 失敗 + 生成幀 + 倍率 + 品質預設 + 效能 + 平衡 + 品質 + 流暢度 + 原生畫格生成 + 在合成器中插入內插畫格以使動態更流暢。適用於任何遊戲(無需遊戲資料),並會略微增加輸入延遲。 diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index ef118d423..c3ad1934d 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -1326,4 +1326,13 @@ Installed path: Loading Workshop items Search Workshop items Failed + Frame Gen + Multiplier + Quality preset + Performance + Balanced + Quality + Smoothness + Native Frame Generation + Insert interpolated frames in the compositor to smooth motion. Works with any game (no game data needed) and adds a small amount of input latency. diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index b3c5b5385..62c353533 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -345,6 +345,10 @@ public boolean isInputSuspended() { private boolean frametimeNumericMode = false; private boolean hudCardExpanded = false; private boolean screenEffectsCardExpanded = false; + private boolean frameGenerationEnabled = false; + private int frameGenerationMultiplier = 2; + private int frameGenerationQuality = 1; + private float frameGenerationSmoothing = 0.5f; private boolean sgsrEnabled = false; private boolean sgsrRuntimeEnabled = false; private int sgsrUpscaleMode = 1; @@ -601,7 +605,19 @@ private void applyPreferredRefreshRate() { Runnable applyRefresh = () -> { if (isFinishing() || isDestroyed()) return; - RefreshRateUtils.applyPreferredRefreshRate(this, getRefreshRateOverride(), runtimeFpsLimit); + VulkanRenderer renderer = xServerView != null ? xServerView.getRenderer() : null; + if (renderer != null && renderer.isFrameGenerationEnabled()) { + // FG targets multiplier×engine: pin the display mode to that (capped to panel max) and + // vote it on the surface so the panel holds the refresh. Engine = fps cap, else 60. + int engine = runtimeFpsLimit > 0 ? runtimeFpsLimit : 60; + int panelMax = RefreshRateUtils.getMaxSupportedRefreshRate(this); + int target = Math.min(panelMax, engine * renderer.getFrameGenMultiplier()); + renderer.setFrameGenDisplayCap(panelMax); + RefreshRateUtils.applyPreferredRefreshRate(this, target, 0); + requestSurfaceFrameRate((float) target); + } else { + RefreshRateUtils.applyPreferredRefreshRate(this, getRefreshRateOverride(), runtimeFpsLimit); + } }; if (Looper.myLooper() == Looper.getMainLooper()) { @@ -611,6 +627,21 @@ private void applyPreferredRefreshRate() { } } + // Vote a frame rate on the surface so a VRR/ADFR panel holds the high refresh while FG is active. + private void requestSurfaceFrameRate(float hz) { + if (hz <= 0f || Build.VERSION.SDK_INT < Build.VERSION_CODES.R || xServerView == null) return; + try { + android.view.Surface s = xServerView.getHolder().getSurface(); + if (s == null || !s.isValid()) return; + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + s.setFrameRate(hz, android.view.Surface.FRAME_RATE_COMPATIBILITY_FIXED_SOURCE, + android.view.Surface.CHANGE_FRAME_RATE_ALWAYS); + } else { + s.setFrameRate(hz, android.view.Surface.FRAME_RATE_COMPATIBILITY_FIXED_SOURCE); + } + } catch (Exception ignore) {} + } + /** * Watch for the display's refresh rate / supported modes changing while a game * is running (e.g. the user toggles the system refresh rate, or an external @@ -3862,7 +3893,11 @@ private void renderDrawerMenu() { globalCursorSpeed, xServerView != null && xServerView.getRenderer() != null && xServerView.getRenderer().isFullscreen(), RefreshRateUtils.getMaxSupportedRefreshRate(this), - isRefactorSizeEnabled + isRefactorSizeEnabled, + frameGenerationEnabled, + frameGenerationMultiplier, + frameGenerationQuality, + frameGenerationSmoothing ); if (drawerActionListener == null) { @@ -4049,6 +4084,44 @@ public void onScreenEffectsCardExpandedChanged(boolean expanded) { renderDrawerMenu(); } + @Override + public void onFrameGenerationEnabledChanged(boolean enabled) { + frameGenerationEnabled = enabled; + preferences.edit().putBoolean("native_frame_generation", enabled).apply(); + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) r.setFrameGeneration(enabled); + applyPreferredRefreshRate(); + renderDrawerMenu(); + } + + @Override + public void onFrameGenerationMultiplierSelected(int multiplier) { + frameGenerationMultiplier = multiplier; + preferences.edit().putInt("frame_generation_multiplier", multiplier).apply(); + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) r.setFrameGenerationMultiplier(multiplier); + applyPreferredRefreshRate(); + renderDrawerMenu(); + } + + @Override + public void onFrameGenerationQualitySelected(int quality) { + frameGenerationQuality = quality; + preferences.edit().putInt("frame_generation_quality", quality).apply(); + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) r.setFrameGenerationQuality(quality); + renderDrawerMenu(); + } + + @Override + public void onFrameGenerationSmoothingChanged(float smoothing) { + frameGenerationSmoothing = smoothing; + preferences.edit().putFloat("frame_generation_smoothing", smoothing).apply(); + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) r.setFrameGenerationSmoothness(smoothing); + renderDrawerMenu(); + } + @Override public void onSGSREnabledChanged(boolean enabled) { boolean wasEnabled = sgsrEnabled; @@ -4716,6 +4789,10 @@ private boolean handleDrawerAction(int itemId) { frameRating = new FrameRating(this, graphicsDriverConfig); frameRating.setRenderer(lastRendererName); if (lastGpuName != null) frameRating.setGpuName(lastGpuName); + frameRating.setDisplayFrameCounter(() -> { + VulkanRenderer fgr = xServerView != null ? xServerView.getRenderer() : null; + return (fgr != null && fgr.isFrameGenerationEnabled()) ? fgr.getDisplayFrameCount() : 0L; + }); frameRating.setVisibility(View.GONE); applyHUDSettings(); rootView.addView(frameRating); @@ -5961,6 +6038,14 @@ private void setupUI() { renderer.setNativeMode(isNativeRenderingEnabled); renderer.setPresentMode(VulkanRenderer.parsePresentMode( graphicsDriverConfig != null ? graphicsDriverConfig.get("compositorPresentMode") : null)); + frameGenerationEnabled = preferences.getBoolean("native_frame_generation", false); + frameGenerationMultiplier = preferences.getInt("frame_generation_multiplier", 2); + frameGenerationQuality = preferences.getInt("frame_generation_quality", 1); + frameGenerationSmoothing = preferences.getFloat("frame_generation_smoothing", 0.5f); + renderer.setFrameGenerationMultiplier(frameGenerationMultiplier); + renderer.setFrameGenerationQuality(frameGenerationQuality); + renderer.setFrameGenerationSmoothness(frameGenerationSmoothing); + renderer.setFrameGeneration(frameGenerationEnabled); boolean swapRB = shortcut != null ? shortcut.getExtra("swapRB", "0").equals("1") : (container != null && container.getExtra("swapRB", "0").equals("1")); @@ -6009,6 +6094,10 @@ private void setupUI() { frameRating = new FrameRating(this, graphicsDriverConfig); frameRating.setRenderer(lastRendererName); if (lastGpuName != null) frameRating.setGpuName(lastGpuName); + frameRating.setDisplayFrameCounter(() -> { + VulkanRenderer fgr = xServerView != null ? xServerView.getRenderer() : null; + return (fgr != null && fgr.isFrameGenerationEnabled()) ? fgr.getDisplayFrameCount() : 0L; + }); frameRating.setVisibility(View.VISIBLE); applyHUDSettings(); updateHUDRenderMode(); diff --git a/app/src/main/runtime/display/XServerDrawerMenu.kt b/app/src/main/runtime/display/XServerDrawerMenu.kt index b5a413058..cdd726ca9 100644 --- a/app/src/main/runtime/display/XServerDrawerMenu.kt +++ b/app/src/main/runtime/display/XServerDrawerMenu.kt @@ -327,6 +327,10 @@ data class XServerDrawerState( val fpsLimit: Int = 0, val maxRefreshRate: Int = 60, val screenEffectsCardExpanded: Boolean = false, + val frameGenerationEnabled: Boolean = false, + val frameGenerationMultiplier: Int = 2, + val frameGenerationQuality: Int = 1, + val frameGenerationSmoothing: Float = 0.5f, val sgsrEnabled: Boolean = false, val sgsrSharpness: Int = 100, val vividEnabled: Boolean = false, @@ -512,6 +516,14 @@ interface XServerDrawerActionListener { fun onScreenEffectsCardExpandedChanged(expanded: Boolean) + fun onFrameGenerationEnabledChanged(enabled: Boolean) + + fun onFrameGenerationMultiplierSelected(multiplier: Int) + + fun onFrameGenerationQualitySelected(quality: Int) + + fun onFrameGenerationSmoothingChanged(smoothing: Float) + fun onSGSREnabledChanged(enabled: Boolean) fun onSGSRSharpnessChanged(sharpness: Int) @@ -619,6 +631,10 @@ fun buildXServerDrawerState( fullscreenEnabled: Boolean = false, maxRefreshRate: Int = 60, refactorSizeEnabled: Boolean = false, + frameGenerationEnabled: Boolean = false, + frameGenerationMultiplier: Int = 2, + frameGenerationQuality: Int = 1, + frameGenerationSmoothing: Float = 0.5f, ): XServerDrawerState { val items = mutableListOf( @@ -770,6 +786,10 @@ fun buildXServerDrawerState( fpsLimit = fpsLimit, maxRefreshRate = maxRefreshRate, screenEffectsCardExpanded = screenEffectsCardExpanded, + frameGenerationEnabled = frameGenerationEnabled, + frameGenerationMultiplier = frameGenerationMultiplier, + frameGenerationQuality = frameGenerationQuality, + frameGenerationSmoothing = frameGenerationSmoothing, sgsrEnabled = sgsrEnabled, sgsrSharpness = sgsrSharpness, vividEnabled = vividEnabled, @@ -967,6 +987,8 @@ private fun TopRail( val activeSpecs = RAIL_PANES.filter { spec -> state.items.any { it.itemId == spec.itemId } } val tileBounds = remember { mutableStateMapOf() } + // Tile bounds are Row-relative, so the indicator (in the un-scrolled parent) subtracts the scroll. + val railScroll = rememberScrollState() val selectedKey = when (openPane) { @@ -1016,10 +1038,12 @@ private fun TopRail( Box( modifier = Modifier - .offset( - x = indicatorX + underlineHorizontalInset, - y = indicatorTileHeight - underlineThickness, - ) + .offset { + IntOffset( + x = (indicatorX + underlineHorizontalInset).roundToPx() - railScroll.value, + y = (indicatorTileHeight - underlineThickness).roundToPx(), + ) + } .width((indicatorWidth - underlineHorizontalInset * 2).coerceAtLeast(0.dp)) .height(underlineThickness) .graphicsLayer { alpha = indicatorAlpha } @@ -1029,7 +1053,7 @@ private fun TopRail( } Row( - modifier = Modifier.horizontalScroll(rememberScrollState()), + modifier = Modifier.horizontalScroll(railScroll), horizontalArrangement = Arrangement.spacedBy(TopRailTileSpacing), verticalAlignment = Alignment.CenterVertically, ) { @@ -2320,6 +2344,67 @@ private fun ScreenEffectsPaneContent( ) } } + + DrawerBooleanRow( + title = stringResource(R.string.session_drawer_rail_label_frame_generation), + checked = state.frameGenerationEnabled, + onCheckedChange = listener::onFrameGenerationEnabledChanged, + ) + + AnimatedVisibility( + visible = state.frameGenerationEnabled, + enter = + expandVertically( + animationSpec = tween(durationMillis = 220, easing = FastOutSlowInEasing), + expandFrom = Alignment.Top, + ) + fadeIn(animationSpec = tween(durationMillis = 160, easing = FastOutSlowInEasing)), + exit = + shrinkVertically( + animationSpec = tween(durationMillis = 180, easing = FastOutSlowInEasing), + shrinkTowards = Alignment.Top, + ) + fadeOut(animationSpec = tween(durationMillis = 120, easing = FastOutSlowInEasing)), + ) { + Column(verticalArrangement = Arrangement.spacedBy((8f * paneScale).dp)) { + PaneSectionLabel(stringResource(R.string.session_drawer_frame_generation_multiplier)) + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy((8f * paneScale).dp), + ) { + listOf(2, 3, 4).forEach { multiplier -> + HUDToggleChip( + label = "${multiplier}×", + checked = state.frameGenerationMultiplier == multiplier, + onClick = { listener.onFrameGenerationMultiplierSelected(multiplier) }, + modifier = Modifier.weight(1f), + ) + } + } + PaneSectionLabel(stringResource(R.string.session_drawer_frame_generation_quality)) + val qualityLabels = + listOf( + stringResource(R.string.session_drawer_frame_generation_quality_performance), + stringResource(R.string.session_drawer_frame_generation_quality_balanced), + stringResource(R.string.session_drawer_frame_generation_quality_quality), + ) + ChipFlow { + qualityLabels.forEachIndexed { index, label -> + HUDToggleChip( + label = label, + checked = state.frameGenerationQuality == index, + onClick = { listener.onFrameGenerationQualitySelected(index) }, + ) + } + } + DrawerSliderRow( + label = stringResource(R.string.session_drawer_frame_generation_smoothness), + valueText = "${(state.frameGenerationSmoothing * 100).roundToInt()}%", + value = state.frameGenerationSmoothing, + valueRange = 0f..1f, + steps = 0, + onValueChange = listener::onFrameGenerationSmoothingChanged, + ) + } + } } ThinDivider() diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index f2b149cd0..e062efa39 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -58,6 +58,27 @@ public class VulkanRenderer // Must be set before attachSurface — nativeCreate reads it once at instance creation. private volatile String graphicsDriverName = null; + // ---- Frame generation ---- + // Display-cadence pump that inserts interpolated frames between real ones; never starves the real frame. + private volatile boolean frameGenEnabled = false; + private volatile int fgMultiplier = 2; // target display:engine ratio (2, 3, 4) + private final AtomicBoolean fgNewScene = new AtomicBoolean(false); + private final AtomicBoolean fgPumpScheduled = new AtomicBoolean(false); + private boolean fgPendingReal = false; // a held real frame awaits its display tick + private int fgPendingInterps = 0; // interpolated frames still owed before the held real + private int fgInterpTotal = 0; // interps planned for the current engine frame (phase divisor) + private long fgEngineFrames = 0; // count of held real frames since FG was enabled + // EMAs of the pump (=panel) tick interval and the real game-frame interval. + private volatile long fgDisplayPeriodNs = 0; + private volatile long fgGamePeriodNs = 0; + private long fgLastPumpNs = 0; + private volatile long fgLastGameNs = 0; + private volatile int fgActivePresentMode = PRESENT_MODE_FIFO; // resolved native mode (see nativeGetActivePresentMode) + private volatile int fgDisplayCapHz = 0; // panel-max ceiling for the target post rate; 0 = uncapped + // Quality/smoothness, mapped to native shader knobs (motion search floor + interp consistency). + private volatile int fgQuality = 1; // 0 performance, 1 balanced, 2 quality + private volatile float fgSmoothness = 0.5f; + private final EffectComposer effectComposer; public final ViewTransformation viewTransformation = new ViewTransformation(); @@ -169,6 +190,12 @@ public void destroy() { } public void requestRenderCoalesced() { + if (frameGenEnabled) { + // Under FG the pump drives presentation; generic requests only keep it alive (real + // game presents set the new-frame flag in onFramePresented). + scheduleFgPump(); + return; + } if (renderRequested.compareAndSet(false, true)) { mainHandler.post(() -> Choreographer.getInstance().postFrameCallback(frameTimeNanos -> { @@ -178,6 +205,171 @@ public void requestRenderCoalesced() { } } + // ---- Frame generation driver ------------------------------------------- + + /** Toggle native frame generation. Safe to call from any thread. */ + public void setFrameGeneration(boolean enabled) { + frameGenEnabled = enabled; + synchronized (this) { + if (nativeHandle != 0) { + nativeSetFrameGeneration(nativeHandle, enabled); + // Prefer MAILBOX (native falls back to IMMEDIATE, then FIFO). A non-blocking mode lets + // FG post above the panel's idle refresh so an adaptive-refresh panel ramps to the + // generated rate; under FIFO the scheduler degrades to a safe pass-through. + nativeSetPresentMode(nativeHandle, enabled ? PRESENT_MODE_MAILBOX : requestedPresentMode); + fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); + } + } + if (enabled) { + pushFrameGenParams(); + fgPendingReal = false; + fgPendingInterps = 0; + fgInterpTotal = 0; + fgEngineFrames = 0; + fgNewScene.set(true); // re-render current content as the first held frame + scheduleFgPump(); + } + // When disabled, the pump self-stops (fgPumpTick checks frameGenEnabled) and onDrawFrame + // reverts to the coalesced real-present path. + } + + public boolean isFrameGenerationEnabled() { return frameGenEnabled; } + + /** Target display:engine ratio (2, 3, 4). Snapped to a supported value. Live; safe from any thread. */ + public void setFrameGenerationMultiplier(int multiplier) { + fgMultiplier = multiplier <= 2 ? 2 : (multiplier >= 4 ? 4 : 3); + } + + public int getFrameGenMultiplier() { return fgMultiplier; } + + /** Panel-max refresh (Hz) — the scheduler won't target a post rate above this. 0 = uncapped. */ + public void setFrameGenDisplayCap(int hz) { fgDisplayCapHz = Math.max(0, hz); } + + /** Quality preset: 0 performance, 1 balanced, 2 quality. Live; safe from any thread. */ + public void setFrameGenerationQuality(int quality) { + fgQuality = quality < 0 ? 0 : (quality > 2 ? 2 : quality); + pushFrameGenParams(); + } + + public int getFrameGenerationQuality() { return fgQuality; } + + /** Interpolation smoothness in [0,1] (higher trusts motion more — smoother, more ghosting). Live. */ + public void setFrameGenerationSmoothness(float smoothness) { + fgSmoothness = smoothness < 0f ? 0f : (smoothness > 1f ? 1f : smoothness); + pushFrameGenParams(); + } + + public float getFrameGenerationSmoothness() { return fgSmoothness; } + + // Map quality preset + smoothness to the native interpolate.frag / motion.comp knobs. + private void pushFrameGenParams() { + float occHi = 0.12f + 0.28f * fgSmoothness; // consistency window: wider == trusts motion more + float occLo = occHi * 0.25f; + int minStep = fgQuality == 0 ? 4 : (fgQuality == 2 ? 1 : 2); + synchronized (this) { + if (nativeHandle != 0) nativeSetFrameGenParams(nativeHandle, occLo, occHi, minStep); + } + } + + /** Actual vkQueuePresentKHR count (real + interpolated). HUD derives Display FPS from this. */ + public long getDisplayFrameCount() { + synchronized (this) { + return nativeHandle != 0 ? nativeGetDisplayFrameCount(nativeHandle) : 0L; + } + } + + private void scheduleFgPump() { + if (!frameGenEnabled) return; + if (fgPumpScheduled.compareAndSet(false, true)) { + mainHandler.post(() -> Choreographer.getInstance().postFrameCallback(this::fgPumpTick)); + } + } + + // Free-running display-cadence pump: each tick wakes the render thread (onDrawFrame -> + // fgDrawFrame) and re-arms itself while FG is enabled. + private void fgPumpTick(long frameTimeNanos) { + fgPumpScheduled.set(false); + if (!frameGenEnabled || nativeHandle == 0) return; + // The swapchain may still be FIFO right after enable (surface not attached yet); re-read until + // it resolves so the bootstrap engages at launch without a manual toggle. + if (fgActivePresentMode == PRESENT_MODE_FIFO) { + fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); + } + if (fgLastPumpNs != 0L) { + long d = frameTimeNanos - fgLastPumpNs; + if (d > 0L && d < 100_000_000L) { // ignore stalls / outliers + fgDisplayPeriodNs = fgDisplayPeriodNs == 0L ? d : fgDisplayPeriodNs + (d - fgDisplayPeriodNs) / 8L; + } + } + fgLastPumpNs = frameTimeNanos; + xServerView.requestRender(); + scheduleFgPump(); + } + + // Render-thread scheduler (DESIGN.md §2): emit enough presents per tick to sustain the target rate. + private void fgDrawFrame() { + int perTick = fgComputePerTick(); + for (int i = 0; i < perTick; i++) fgEmitOne(); + } + + private void fgEmitOne() { + if (fgPendingInterps == 0 && !fgPendingReal) { + if (!fgNewScene.getAndSet(false)) return; // no new game frame — nothing to emit + buildAndSubmitFrame(); // HOLD -> history[curr] (no present) + fgEngineFrames++; + int interps = fgEngineFrames >= 2 ? fgComputeInterps() : 0; // need a prev to interpolate from + fgInterpTotal = interps; + fgPendingInterps = interps; + fgPendingReal = true; + } + if (fgPendingInterps > 0) { + int k = fgInterpTotal - fgPendingInterps + 1; // 1..fgInterpTotal + float phase = (float) k / (float) (fgInterpTotal + 1); // evenly split the prev→curr gap + nativeRenderInterp(nativeHandle, phase); + fgPendingInterps--; + } else if (fgPendingReal) { + nativePresentLast(nativeHandle); // the held real frame + fgPendingReal = false; + } + } + + // Target FG post rate (Hz): multiplier × game rate, capped to the panel max. 0 if not measured. + private double fgTargetHz() { + long game = fgGamePeriodNs; + if (game <= 0L) return 0.0; + double target = Math.max(1, fgMultiplier) * (1.0e9 / (double) game); + if (fgDisplayCapHz > 0) target = Math.min(target, (double) fgDisplayCapHz); + return target; + } + + // Interpolated frames to insert between this engine frame and the previous one. + private int fgComputeInterps() { + int maxInterps = Math.max(1, fgMultiplier) - 1; // 2x->1, 3x->2, 4x->3 + long disp = fgDisplayPeriodNs, game = fgGamePeriodNs; + if (disp <= 0L || game <= 0L) return 0; + if (fgActivePresentMode == PRESENT_MODE_FIFO) { + // Vsync-locked: only insert what the current refresh affords (floor, never round — rounding + // up would cost a real frame). Often a clean pass-through. + int slots = (int) Math.floor((double) game / (double) disp); + return Math.max(0, Math.min(maxInterps, slots - 1)); + } + // Non-blocking: post at the target rate so an adaptive-refresh panel ramps up to it. + double gameHz = 1.0e9 / (double) game; + int interps = (int) Math.round(fgTargetHz() / gameHz) - 1; + return Math.max(0, Math.min(maxInterps, interps)); + } + + // Presents per pump tick: enough to sustain the target rate from the current refresh (1 under FIFO). + private int fgComputePerTick() { + if (fgActivePresentMode == PRESENT_MODE_FIFO) return 1; + long disp = fgDisplayPeriodNs; + double target = fgTargetHz(); + if (disp <= 0L || target <= 0.0) return 1; + double panelHz = 1.0e9 / (double) disp; + int n = (int) Math.round(target / Math.max(1.0, panelHz)); + return Math.max(1, Math.min(n, 8)); + } + private Drawable createRootCursorDrawable() { Context context = xServerView.getContext(); BitmapFactory.Options options = new BitmapFactory.Options(); @@ -206,6 +398,18 @@ public void attachSurface(Surface surface) { if (requestedPresentMode != PRESENT_MODE_FIFO) { nativeSetPresentMode(nativeHandle, requestedPresentMode); } + if (frameGenEnabled) { + nativeSetFrameGeneration(nativeHandle, true); + nativeSetPresentMode(nativeHandle, PRESENT_MODE_MAILBOX); // off-vsync FG output + fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); + pushFrameGenParams(); + fgPendingReal = false; + fgPendingInterps = 0; + fgInterpTotal = 0; + fgEngineFrames = 0; + fgNewScene.set(true); + scheduleFgPump(); + } destroyed.set(false); xServer.windowManager.addOnWindowModificationListener(this); xServer.pointer.addOnPointerMotionListener(this); @@ -255,7 +459,11 @@ public void onSurfaceDestroyed() { @Override public void onDrawFrame() { if (nativeHandle == 0) return; - buildAndSubmitFrame(); + if (frameGenEnabled) { + fgDrawFrame(); + } else { + buildAndSubmitFrame(); + } } // ----- Scene assembly ---------------------------------------------------- @@ -494,7 +702,13 @@ private void buildAndSubmitFrame() { nativeSetScene(nativeHandle, buf); // nativeSetFpsLimit is a native no-op (pacing is done elsewhere); not called per frame. - nativeRenderFrame(nativeHandle); + if (frameGenEnabled) { + // FG: render the composited scene into the history ring without presenting; the + // interpolated + held-real presents are issued by fgEmitOne() at display cadence. + nativeRenderHold(nativeHandle); + } else { + nativeRenderFrame(nativeHandle); + } } // ----- WindowManager / Pointer listeners -------------------------------- @@ -556,6 +770,22 @@ public void onPointerMove(short x, short y) { public void onFramePresented(Window window, WindowManager.FrameSource source, int serial) { // DRI3_BUFFER fires at pixmap allocation, not a visible change; the real present already wakes us. Skip it. if (source == WindowManager.FrameSource.DRI3_BUFFER) return; + if (frameGenEnabled) { + // This is an actual game-window frame (X11 Present / PutImage / MIT-SHM) — the only + // signal that drives FG's hold+interpolate cadence. Cursor/Controls go through the + // generic requestRenderCoalesced path and deliberately do not get counted here. + long now = System.nanoTime(); + if (fgLastGameNs != 0L) { + long d = now - fgLastGameNs; + if (d > 0L && d < 500_000_000L) { + fgGamePeriodNs = fgGamePeriodNs == 0L ? d : fgGamePeriodNs + (d - fgGamePeriodNs) / 8L; + } + } + fgLastGameNs = now; + fgNewScene.set(true); + scheduleFgPump(); + return; + } requestRenderCoalesced(); } @@ -805,4 +1035,14 @@ private static native long nativeCreate(boolean enableValidationLayers, private static native void nativeSetScene(long handle, ByteBuffer sceneBuf); private static native void nativeSetFpsLimit(long handle, int fps); private static native void nativeSetPresentMode(long handle, int mode); + + // ---- Frame generation ---- + private static native void nativeSetFrameGeneration(long handle, boolean enabled); + private static native boolean nativeFrameGenerationSupported(long handle); + private static native long nativeGetDisplayFrameCount(long handle); + private static native boolean nativeRenderHold(long handle); + private static native boolean nativeRenderInterp(long handle, float phase); + private static native boolean nativePresentLast(long handle); + private static native void nativeSetFrameGenParams(long handle, float occLo, float occHi, int minStep); + private static native int nativeGetActivePresentMode(long handle); } diff --git a/app/src/main/runtime/display/ui/FrameRating.java b/app/src/main/runtime/display/ui/FrameRating.java index 58471d5e6..f66bc7e9d 100644 --- a/app/src/main/runtime/display/ui/FrameRating.java +++ b/app/src/main/runtime/display/ui/FrameRating.java @@ -132,6 +132,12 @@ public void setFrameObserver(FrameObserver observer) { private boolean isStatsRunning; private volatile boolean isCharging; private volatile float lastFPS; + // Frame generation: when set, supplies the cumulative display present count (real + generated). + // 0 means FG is off, in which case the HUD shows the single engine FPS. + private java.util.function.LongSupplier displayFrameCounter; + private long lastDisplayCount = -1L; + private long lastDisplayNano; + private volatile float displayFps; private volatile long lastFrameNano; private long lastPrimaryFrameNano; private long lastGraphRedraw; @@ -876,6 +882,12 @@ private void applyDisplayMode() { requestLayout(); } + /** Supplies the cumulative display present count (real + generated) so the HUD can show the output + * rate while frame generation is on; supply 0 (or null) when FG is off to fall back to engine FPS. */ + public void setDisplayFrameCounter(java.util.function.LongSupplier supplier) { + this.displayFrameCounter = supplier; + } + public void setRenderer(String renderer) { if (renderer == null) { return; @@ -1487,8 +1499,26 @@ public void run() { this.tvTemp.setVisibility(View.GONE); } + // Frame generation: derive the output rate (real + generated) from the present counter so the + // HUD reflects what is actually displayed, not just the engine rate. + if (this.displayFrameCounter != null) { + long c = this.displayFrameCounter.getAsLong(); + if (c <= 0L) { + this.displayFps = 0.0f; + this.lastDisplayCount = -1L; + } else { + if (this.lastDisplayCount >= 0L && nowNano > this.lastDisplayNano) { + this.displayFps = + (float) ((c - this.lastDisplayCount) * 1000000000.0 / (nowNano - this.lastDisplayNano)); + } + this.lastDisplayCount = c; + this.lastDisplayNano = nowNano; + } + } + if (this.enableFps && this.tvFpsBig != null) { - this.tvFpsBig.setText(String.format(Locale.US, "%.0f", this.lastFPS)); + float shownFps = this.displayFps > 0.0f ? this.displayFps : this.lastFPS; + this.tvFpsBig.setText(String.format(Locale.US, "%.0f", shownFps)); this.tvFpsBig.setTextColor(this.C_FPS_OK); this.tvFpsBig.setVisibility(View.VISIBLE); } else if (this.tvFpsBig != null) this.tvFpsBig.setVisibility(View.GONE); From 94e1b2e532dfbd0f43a107f2f58377dc3c42b1bf Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Tue, 9 Jun 2026 12:41:05 -0400 Subject: [PATCH 02/46] Fix frame-gen compositor freeze on cursor/UI changes while the game is idle Under FG only real game presents set fgNewScene, so cursor and window changes (paused, idle menus, static scenes) never reached a HOLD and the compositor stayed frozen on the last frame. Mark a scene-dirty flag on non-game render requests; the pump recomposites and presents it without interpolating. --- .../runtime/display/renderer/VulkanRenderer.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index e062efa39..af645e571 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -63,6 +63,7 @@ public class VulkanRenderer private volatile boolean frameGenEnabled = false; private volatile int fgMultiplier = 2; // target display:engine ratio (2, 3, 4) private final AtomicBoolean fgNewScene = new AtomicBoolean(false); + private final AtomicBoolean fgSceneDirty = new AtomicBoolean(false); // cursor/window change awaiting a recomposite private final AtomicBoolean fgPumpScheduled = new AtomicBoolean(false); private boolean fgPendingReal = false; // a held real frame awaits its display tick private int fgPendingInterps = 0; // interpolated frames still owed before the held real @@ -191,8 +192,8 @@ public void destroy() { public void requestRenderCoalesced() { if (frameGenEnabled) { - // Under FG the pump drives presentation; generic requests only keep it alive (real - // game presents set the new-frame flag in onFramePresented). + // Non-game change (cursor/window/geometry): mark dirty so the pump recomposites it. + fgSceneDirty.set(true); scheduleFgPump(); return; } @@ -314,10 +315,13 @@ private void fgDrawFrame() { private void fgEmitOne() { if (fgPendingInterps == 0 && !fgPendingReal) { - if (!fgNewScene.getAndSet(false)) return; // no new game frame — nothing to emit + boolean newGame = fgNewScene.getAndSet(false); + boolean dirty = fgSceneDirty.getAndSet(false); + if (!newGame && !dirty) return; // nothing changed — idle tick buildAndSubmitFrame(); // HOLD -> history[curr] (no present) fgEngineFrames++; - int interps = fgEngineFrames >= 2 ? fgComputeInterps() : 0; // need a prev to interpolate from + // Interpolate only between real game frames; a cursor/UI-only change just recomposites. + int interps = (newGame && fgEngineFrames >= 2) ? fgComputeInterps() : 0; fgInterpTotal = interps; fgPendingInterps = interps; fgPendingReal = true; @@ -754,6 +758,7 @@ public void onUpdateWindowAttributes(Window window, Bitmask mask) { public void requestCursorRender() { cursorActiveUntilNs = System.nanoTime() + CURSOR_ACTIVE_NS; + if (frameGenEnabled) fgSceneDirty.set(true); xServerView.requestTransientRender(100); } From b829bc17aae8c31378722fef05247ee5282c8d7c Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Tue, 9 Jun 2026 14:48:31 -0400 Subject: [PATCH 03/46] Pipeline frame-gen submits: 3-slot history ring + targeted per-slot wait Replace the full per-submit GPU drain (wait_inflight_frames) in fg_submit with a targeted wait on only the history slot a HOLD is about to overwrite. Grow the history ring 2->3 slots so the overwritten slot is never the pair an in-flight INTERP is sampling, and track a fence per slot. Add a fragment->compute WAR barrier so a new pair's motion recompute waits for the prior pair's interp reads on the single graphics queue. This lets CPU record/submit overlap GPU execution, cutting frametime variance under the Balanced/Quality presets. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 31 +++++++++++++--------- app/src/main/cpp/winlator/vk/vk_state.h | 9 ++++--- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 974e3817b..2c0698173 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -2167,11 +2167,12 @@ static void fg_free_set(VkRenderer* r, VkDescriptorSet set) { static void fg_destroy_resources(VkRenderer* r) { if (!r->device) return; - for (uint32_t p = 0; p < 2; p++) { + for (uint32_t p = 0; p < 3; p++) { if (r->fg_motion_set[p]) { fg_free_set(r, r->fg_motion_set[p]); r->fg_motion_set[p] = VK_NULL_HANDLE; } if (r->fg_interp_set[p]) { fg_free_set(r, r->fg_interp_set[p]); r->fg_interp_set[p] = VK_NULL_HANDLE; } } - for (uint32_t i = 0; i < 2; i++) { + memset(r->fg_slot_fence, 0, sizeof(r->fg_slot_fence)); + for (uint32_t i = 0; i < 3; i++) { VkFgImage* o = &r->fg_history[i]; if (o->blit_set) vkr_free_descriptor_set(r, o->blit_set); if (o->framebuffer) vkDestroyFramebuffer(r->device, o->framebuffer, NULL); @@ -2278,14 +2279,16 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { if (!fg_create_color_target(r, &r->fg_history[0], w, h)) goto fail; if (!fg_create_color_target(r, &r->fg_history[1], w, h)) goto fail; + if (!fg_create_color_target(r, &r->fg_history[2], w, h)) goto fail; if (!fg_create_motion(r, (w / 2) ? (w / 2) : 1u, (h / 2) ? (h / 2) : 1u)) goto fail; + memset(r->fg_slot_fence, 0, sizeof(r->fg_slot_fence)); - for (uint32_t p = 0; p < 2; p++) { + for (uint32_t p = 0; p < 3; p++) { r->fg_motion_set[p] = fg_alloc_set(r, r->pipelines.fg_motion_layout); r->fg_interp_set[p] = fg_alloc_set(r, r->pipelines.fg_interp_layout); if (!r->fg_motion_set[p] || !r->fg_interp_set[p]) goto fail; - VkImageView prevV = r->fg_history[1u - p].view; // parity p => curr=history[p], prev=history[1-p] + VkImageView prevV = r->fg_history[(p + 2u) % 3u].view; // curr=history[p], prev=history[(p+2)%3] VkImageView currV = r->fg_history[p].view; // motion.comp set: b0 prev (sampled), b1 curr (sampled), b2 motion (storage, GENERAL) @@ -2339,6 +2342,7 @@ static bool fg_ensure_resources(VkRenderer* r) { // Restore a frame fence to the signaled state after a submit failure (so the next frame that // reuses this index does not block forever on an unsignaled fence). static void fg_restore_fence(VkRenderer* r, VkFrame* f) { + for (uint32_t i = 0; i < 3; i++) if (r->fg_slot_fence[i] == f->in_flight) r->fg_slot_fence[i] = VK_NULL_HANDLE; vkDestroyFence(r->device, f->in_flight, NULL); VkFenceCreateInfo rfi = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; rfi.flags = VK_FENCE_CREATE_SIGNALED_BIT; @@ -2355,7 +2359,7 @@ static uint64_t g_fg_interp = 0; static uint64_t g_fg_plast = 0; // FG submit. HOLD renders the scene into the history ring (no present); INTERP synthesizes and -// presents an in-between frame; PRESENT_LAST presents the held real frame. Fully serialized. +// presents an in-between frame; PRESENT_LAST presents the held real frame. static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { if (!r->surface_ready || !r->swapchain) return false; pthread_mutex_lock(&r->render_mutex); @@ -2371,9 +2375,6 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { if (!fg_ensure_resources(r)) { pthread_mutex_unlock(&r->render_mutex); return false; } - // Drain: the 2-slot history ring is written by HOLD and read by INTERP; serialize to avoid aliasing. - wait_inflight_frames(r); - VkCommandBufferBeginInfo bi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; @@ -2388,9 +2389,11 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { destroy_graveyard_textures(r, dead, dead_count); SceneTargets st = manage_scene_targets(r, &snap); - uint32_t next = r->fg_history_curr ^ 1u; + uint32_t next = (r->fg_history_curr + 1u) % 3u; VkFgImage* hist = &r->fg_history[next]; + if (r->fg_slot_fence[next] != VK_NULL_HANDLE) + vkWaitForFences(r->device, 1, &r->fg_slot_fence[next], VK_TRUE, UINT64_MAX); vkResetFences(r->device, 1, &f->in_flight); vkBeginCommandBuffer(f->cmd, &bi); record_scene_chain(r, f->cmd, &snap, st.has_effects, st.wants_sgsr1, @@ -2410,6 +2413,7 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { return false; } r->fg_history_curr = next; + r->fg_slot_fence[next] = f->in_flight; if (r->fg_history_count < 2) r->fg_history_count++; r->fg_motion_valid = false; // new history pair — flow must be recomputed on the next interp g_fg_holds++; @@ -2454,12 +2458,13 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { vkResetFences(r->device, 1, &f->in_flight); uint32_t parity = r->fg_history_curr; + uint32_t prev_idx = (parity + 2u) % 3u; VkFgImage* curr = &r->fg_history[parity]; vkBeginCommandBuffer(f->cmd, &bi); if (do_interp) { - VkFgImage* prev = &r->fg_history[parity ^ 1u]; + VkFgImage* prev = &r->fg_history[prev_idx]; // Make the HOLD color writes visible to the reads below (compute when recomputing flow, // else just the fragment interp draw). VkPipelineStageFlags hist_dst = r->fg_motion_valid @@ -2473,10 +2478,10 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); if (!r->fg_motion_valid) { - // motion field -> GENERAL, dispatch block matching. + // motion field -> GENERAL, dispatch block matching (wait for the prior pair's interp reads). vkr_image_barrier(f->cmd, r->fg_motion.image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, VK_ACCESS_SHADER_WRITE_BIT); vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipeline); vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -2554,6 +2559,8 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { pthread_mutex_unlock(&r->render_mutex); return false; } + r->fg_slot_fence[parity] = f->in_flight; + if (do_interp) r->fg_slot_fence[prev_idx] = f->in_flight; VkPresentInfoKHR pinfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR}; pinfo.waitSemaphoreCount = 1; pinfo.pWaitSemaphores = &render_finished; diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index 99d2abe10..b80ac5a28 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -381,12 +381,13 @@ typedef struct VkRenderer { bool fg_float16_supported; // shaderFloat16 available (selects the fp16 motion shader) bool fg_built; // history + motion images allocated at fg_dims VkExtent2D fg_dims; // extent the fg images were built for - VkFgImage fg_history[2]; // composited-scene ring; fg_history_curr = newest + VkFgImage fg_history[3]; // composited-scene ring; fg_history_curr = newest VkFgImage fg_motion; // rgba16f half-res backward-flow field VkSampler fg_sampler; // linear, clamp — for all fg sampled reads - VkDescriptorSet fg_motion_set[2]; // [parity] prev,curr samplers + motion storage (motion.comp) - VkDescriptorSet fg_interp_set[2]; // [parity] prev,curr,motion samplers (interpolate.frag) - uint32_t fg_history_curr; // parity (0/1) of the most-recent composited frame + VkDescriptorSet fg_motion_set[3]; // [curr] prev,curr samplers + motion storage (motion.comp) + VkDescriptorSet fg_interp_set[3]; // [curr] prev,curr,motion samplers (interpolate.frag) + VkFence fg_slot_fence[3]; // last submit that used each history slot + uint32_t fg_history_curr; // index (0..2) of the most-recent composited frame uint32_t fg_history_count; // 0,1,2 — valid history frames uint64_t fg_present_count; // actual vkQueuePresentKHR calls; guarded by queue_mutex bool fg_motion_valid; // motion field current for the live history pair (reused across multi-interp) From 4707488ef337c5bbd4c430ef3d4dd6a65c812145 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Tue, 9 Jun 2026 15:07:46 -0400 Subject: [PATCH 04/46] Frame-gen quality: sub-pixel motion vectors, MV median, swapchain headroom Refine the block-matching result with a per-axis parabolic SSD fit so motion vectors are sub-pixel instead of quantized to 2 full-res px (removes slow-pan wobble); guarded to keep the +/-1 taps inside the search tile. Apply a separable 3x3 component-wise median to the flow field in interpolate.frag to reject outlier vectors without blurring motion edges. Raise the non-FIFO swapchain image floor 3->4 so interpolated frames stop being dropped at acquire, and count those drops in the FG cadence log. --- .../cpp/winlator/vk/shaders/interpolate.frag | 20 +++++++++++++++++-- .../main/cpp/winlator/vk/shaders/motion.comp | 15 +++++++++++++- .../cpp/winlator/vk/shaders/motion_fp32.comp | 15 +++++++++++++- app/src/main/cpp/winlator/vk/vk_renderer.c | 11 ++++++---- 4 files changed, 53 insertions(+), 8 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag index 9d4c9d865..ff27dc722 100644 --- a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag +++ b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag @@ -12,7 +12,7 @@ layout(location = 0) out vec4 outColor; layout(set = 0, binding = 0) uniform mediump sampler2D prevFrame; // frame N-1 layout(set = 0, binding = 1) uniform mediump sampler2D currFrame; // frame N -layout(set = 0, binding = 2) uniform highp sampler2D motionField; // rg16f half-res, curr->prev MV in half-res px +layout(set = 0, binding = 2) uniform highp sampler2D motionField; // rgba16f half-res (.xy), curr->prev MV in half-res px layout(push_constant) uniform PC { vec2 resolution; // full-res target size (pixels) @@ -28,6 +28,21 @@ bool offFrame(highp vec2 uv) { return any(lessThan(uv, vec2(0.0))) || any(greaterThan(uv, vec2(1.0))); } +vec2 med3(vec2 a, vec2 b, vec2 c) { return max(min(a, b), min(max(a, b), c)); } + +vec2 sampleMV(highp vec2 uv, highp vec2 texel) { + vec2 r0 = med3(texture(motionField, uv + texel * vec2(-1.0, -1.0)).xy, + texture(motionField, uv + texel * vec2( 0.0, -1.0)).xy, + texture(motionField, uv + texel * vec2( 1.0, -1.0)).xy); + vec2 r1 = med3(texture(motionField, uv + texel * vec2(-1.0, 0.0)).xy, + texture(motionField, uv).xy, + texture(motionField, uv + texel * vec2( 1.0, 0.0)).xy); + vec2 r2 = med3(texture(motionField, uv + texel * vec2(-1.0, 1.0)).xy, + texture(motionField, uv + texel * vec2( 0.0, 1.0)).xy, + texture(motionField, uv + texel * vec2( 1.0, 1.0)).xy); + return med3(r0, r1, r2); +} + void main() { float t = clamp(pc.phase > 0.0 ? pc.phase : 0.5, 0.0, 1.0); float lo = pc.occlusionLo > 0.0 ? pc.occlusionLo : 0.06; @@ -35,7 +50,8 @@ void main() { // motionField is half-res and stores curr->prev displacement in half-res pixels. // Normalized displacement = mv_halfPx / mvSize = mv_halfPx * 2 / fullResSize. - vec2 mvNorm = texture(motionField, vUV).xy * 2.0 / pc.resolution; + highp vec2 mvTexel = 2.0 / pc.resolution; + vec2 mvNorm = sampleMV(vUV, mvTexel) * 2.0 / pc.resolution; // Linear-trajectory motion compensation. For an intermediate pixel p (== vUV) // with backward flow mv (curr->prev): diff --git a/app/src/main/cpp/winlator/vk/shaders/motion.comp b/app/src/main/cpp/winlator/vk/shaders/motion.comp index 97717ac32..d08e34ede 100644 --- a/app/src/main/cpp/winlator/vk/shaders/motion.comp +++ b/app/src/main/cpp/winlator/vk/shaders/motion.comp @@ -107,5 +107,18 @@ void main() { if (localBest < bestCost) { bestCost = localBest; bestD = localBestD; center = localBestD; } } - imageStore(motionField, p, vec4(vec2(bestD) * pc.mvScale, 0.0, 0.0)); + vec2 sub = vec2(bestD); + if (abs(bestD.x) < RMAX) { + float cl = blockCost(l, cCenter, bestD + ivec2(-1, 0)); + float cr = blockCost(l, cCenter, bestD + ivec2( 1, 0)); + float dd = cl - 2.0 * bestCost + cr; + if (dd > 0.0) sub.x += clamp(0.5 * (cl - cr) / dd, -0.5, 0.5); + } + if (abs(bestD.y) < RMAX) { + float cu = blockCost(l, cCenter, bestD + ivec2(0, -1)); + float cd = blockCost(l, cCenter, bestD + ivec2(0, 1)); + float dd = cu - 2.0 * bestCost + cd; + if (dd > 0.0) sub.y += clamp(0.5 * (cu - cd) / dd, -0.5, 0.5); + } + imageStore(motionField, p, vec4(sub * pc.mvScale, 0.0, 0.0)); } diff --git a/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp b/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp index b7c97fbc1..6e672c107 100644 --- a/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp +++ b/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp @@ -92,5 +92,18 @@ void main() { if (localBest < bestCost) { bestCost = localBest; bestD = localBestD; center = localBestD; } } - imageStore(motionField, p, vec4(vec2(bestD) * pc.mvScale, 0.0, 0.0)); + vec2 sub = vec2(bestD); + if (abs(bestD.x) < RMAX) { + float cl = blockCost(l, cCenter, bestD + ivec2(-1, 0)); + float cr = blockCost(l, cCenter, bestD + ivec2( 1, 0)); + float dd = cl - 2.0 * bestCost + cr; + if (dd > 0.0) sub.x += clamp(0.5 * (cl - cr) / dd, -0.5, 0.5); + } + if (abs(bestD.y) < RMAX) { + float cu = blockCost(l, cCenter, bestD + ivec2(0, -1)); + float cd = blockCost(l, cCenter, bestD + ivec2(0, 1)); + float dd = cu - 2.0 * bestCost + cd; + if (dd > 0.0) sub.y += clamp(0.5 * (cu - cd) / dd, -0.5, 0.5); + } + imageStore(motionField, p, vec4(sub * pc.mvScale, 0.0, 0.0)); } diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 2c0698173..7d704cf18 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -1247,8 +1247,8 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa caps.currentTransform, pre_transform, present_mode); uint32_t image_count = caps.minImageCount + 1; - // Non-blocking modes (MAILBOX/IMMEDIATE) need >=3 images to run ahead of vblank. - if (present_mode != VK_PRESENT_MODE_FIFO_KHR && image_count < 3) image_count = 3; + // Non-blocking modes (MAILBOX/IMMEDIATE) need headroom so FG interps aren't dropped at acquire. + if (present_mode != VK_PRESENT_MODE_FIFO_KHR && image_count < 4) image_count = 4; if (caps.maxImageCount > 0 && image_count > caps.maxImageCount) image_count = caps.maxImageCount; if (image_count > VK_MAX_SWAPCHAIN_IMAGES) image_count = VK_MAX_SWAPCHAIN_IMAGES; @@ -2357,6 +2357,7 @@ static void fg_restore_fence(VkRenderer* r, VkFrame* f) { static uint64_t g_fg_holds = 0; static uint64_t g_fg_interp = 0; static uint64_t g_fg_plast = 0; +static uint64_t g_fg_dropped = 0; // FG submit. HOLD renders the scene into the history ring (no present); INTERP synthesizes and // presents an in-between frame; PRESENT_LAST presents the held real frame. @@ -2436,6 +2437,7 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { bool recreate_after_present = false; if (acq == VK_NOT_READY || acq == VK_TIMEOUT) { // No free image right now — drop this interpolated frame (not an error). + g_fg_dropped++; pthread_mutex_unlock(&r->render_mutex); return false; } @@ -2571,9 +2573,10 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { r->fg_present_count++; if (do_interp) g_fg_interp++; else g_fg_plast++; if (((g_fg_interp + g_fg_plast) % 120u) == 0u) { - VK_LOGI("FG cadence: holds=%llu interp=%llu presentLast=%llu presents=%llu", + VK_LOGI("FG cadence: holds=%llu interp=%llu presentLast=%llu dropped=%llu presents=%llu", (unsigned long long)g_fg_holds, (unsigned long long)g_fg_interp, - (unsigned long long)g_fg_plast, (unsigned long long)r->fg_present_count); + (unsigned long long)g_fg_plast, (unsigned long long)g_fg_dropped, + (unsigned long long)r->fg_present_count); } } pthread_mutex_unlock(&r->queue_mutex); From 40dbc7fd42b746bc9c6ce064ddcfab0c2f11f100 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Tue, 9 Jun 2026 18:07:36 -0400 Subject: [PATCH 05/46] Frame-gen pacing: MAILBOX over-post + native vsync-anchored present pacer Hold the adaptive (LTPO) panel high by over-posting under MAILBOX, and pace presents evenly with a clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME) deadline loop anchored to the Choreographer vsync grid -- the smoothness comes from the absolute-time sleep, not the present mode or VK_GOOGLE_display_timing (Android ignores desiredPresentTime under MAILBOX). The display-timing extension is kept only for read-back telemetry (FG timing: avgInterval log). The FPS limiter pins the engine rate; the surface frame-rate vote targets engine*multiplier with a never-below-native floor; the FIFO cadence branch gains an epsilon so an exact ratio doesn't drop a multiplier. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 88 ++++++++++++++++++- app/src/main/cpp/winlator/vk/vk_state.h | 10 +++ .../display/XServerDisplayActivity.java | 2 +- .../display/renderer/VulkanRenderer.java | 17 ++-- 4 files changed, 108 insertions(+), 9 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 7d704cf18..ba020b6ff 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -24,6 +24,7 @@ #include #include #include +#include // SPIR-V shader byte arrays generated at build time by glslc + bin2c.cmake. #include "shaders/window_vert.spv.h" @@ -40,6 +41,12 @@ #include "shaders/motion_fp32_comp.spv.h" #include "shaders/interpolate_frag.spv.h" +static uint64_t now_monotonic_ns(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec; +} + // ============================================================ // Forward decls // ============================================================ @@ -359,6 +366,7 @@ static bool create_device(VkRenderer* r) { bool has_extmem_caps = has_extension(exts, ext_count, VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME); bool has_queue_fam = has_extension(exts, ext_count, VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME); bool has_f16 = has_extension(exts, ext_count, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); + bool has_display_timing = has_extension(exts, ext_count, VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME); free(exts); @@ -399,6 +407,10 @@ static bool create_device(VkRenderer* r) { enable[enable_n++] = VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME; f16_feat.shaderFloat16 = VK_TRUE; } + if (has_display_timing && enable_n < 16) { + enable[enable_n++] = VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME; + r->ext_display_timing = true; + } VK_LOGI("Frame generation fp16 support: ext=%d feature=%d", has_f16, r->fg_float16_supported); VK_LOGI("AHB Vulkan device support: android_hardware_buffer=%d external_memory=%d dedicated=%d get_memory_requirements2=%d queue_family_foreign=%d enabled=%d", @@ -461,8 +473,18 @@ static bool create_device(VkRenderer* r) { r->ext_ycbcr = false; } } + if (r->ext_display_timing) { + r->fnGetRefreshCycleDuration = (PFN_vkGetRefreshCycleDurationGOOGLE) + vkGetDeviceProcAddr(r->device, "vkGetRefreshCycleDurationGOOGLE"); + r->fnGetPastPresentationTiming = (PFN_vkGetPastPresentationTimingGOOGLE) + vkGetDeviceProcAddr(r->device, "vkGetPastPresentationTimingGOOGLE"); + if (!r->fnGetRefreshCycleDuration || !r->fnGetPastPresentationTiming) { + VK_LOGW("VK_GOOGLE_display_timing entry points unavailable; FG present timing disabled"); + r->ext_display_timing = false; + } + } - VK_LOGI("Vulkan device created (AHB=%d, Ycbcr=%d)", r->ext_ahb, r->ext_ycbcr); + VK_LOGI("Vulkan device created (AHB=%d, Ycbcr=%d, displayTiming=%d)", r->ext_ahb, r->ext_ycbcr, r->ext_display_timing); return true; } @@ -1282,6 +1304,15 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa r->swapchain = new_sc; if (old_sc) vkDestroySwapchainKHR(r->device, old_sc, NULL); + r->refresh_duration_ns = 0; + r->fg_present_deadline_ns = 0; + r->fg_present_id = 0; + if (r->ext_display_timing && r->fnGetRefreshCycleDuration) { + VkRefreshCycleDurationGOOGLE rc = {0}; + if (r->fnGetRefreshCycleDuration(r->device, r->swapchain, &rc) == VK_SUCCESS) + r->refresh_duration_ns = rc.refreshDuration; + } + uint32_t actual_count = 0; if (vkGetSwapchainImagesKHR(r->device, r->swapchain, &actual_count, NULL) != VK_SUCCESS || actual_count == 0) { @@ -2359,6 +2390,27 @@ static uint64_t g_fg_interp = 0; static uint64_t g_fg_plast = 0; static uint64_t g_fg_dropped = 0; +// desiredPresentTime (CLOCK_MONOTONIC ns) for the next FG present; 0 = no constraint (real frame never delayed). +// Block until the next vsync-aligned present deadline (CLOCK_MONOTONIC absolute sleep). +static void fg_pace_to_deadline(VkRenderer* r) { + if (r->active_present_mode == VK_PRESENT_MODE_FIFO_KHR) return; // FIFO already vsync-paces + uint64_t period = r->fg_present_period_ns ? r->fg_present_period_ns : r->refresh_duration_ns; + if (period == 0) return; + uint64_t now = now_monotonic_ns(); + uint64_t prev = r->fg_present_deadline_ns; + uint64_t floor_t = (now > prev) ? now : prev; + uint64_t anchor = r->fg_vsync_anchor_ns; + uint64_t deadline = (anchor != 0 && anchor <= floor_t) + ? anchor + ((floor_t - anchor) / period + 1u) * period + : floor_t + period; + if (deadline > now + 4u * period) deadline = floor_t + period; + r->fg_present_deadline_ns = deadline; + struct timespec ts; + ts.tv_sec = (time_t)(deadline / 1000000000ull); + ts.tv_nsec = (long)(deadline % 1000000000ull); + while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, NULL) == EINTR) {} +} + // FG submit. HOLD renders the scene into the history ring (no present); INTERP synthesizes and // presents an in-between frame; PRESENT_LAST presents the held real frame. static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { @@ -2567,6 +2619,16 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { VkPresentInfoKHR pinfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR}; pinfo.waitSemaphoreCount = 1; pinfo.pWaitSemaphores = &render_finished; pinfo.swapchainCount = 1; pinfo.pSwapchains = &r->swapchain; pinfo.pImageIndices = &image_index; + VkPresentTimeGOOGLE ptg; + VkPresentTimesInfoGOOGLE pti; + if (r->ext_display_timing) { + ptg.presentID = ++r->fg_present_id; + ptg.desiredPresentTime = 0; + pti.sType = VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE; + pti.pNext = NULL; pti.swapchainCount = 1; pti.pTimes = &ptg; + pinfo.pNext = &pti; + } + fg_pace_to_deadline(r); pthread_mutex_lock(&r->queue_mutex); VkResult pr = vkQueuePresentKHR(r->graphics_queue, &pinfo); if (pr == VK_SUCCESS || pr == VK_SUBOPTIMAL_KHR) { @@ -2577,6 +2639,22 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { (unsigned long long)g_fg_holds, (unsigned long long)g_fg_interp, (unsigned long long)g_fg_plast, (unsigned long long)g_fg_dropped, (unsigned long long)r->fg_present_count); + if (r->ext_display_timing && r->fnGetPastPresentationTiming) { + VkPastPresentationTimingGOOGLE pt[16]; + uint32_t n = 16; + VkResult tr = r->fnGetPastPresentationTiming(r->device, r->swapchain, &n, pt); + if ((tr == VK_SUCCESS || tr == VK_INCOMPLETE) && n >= 2) { + double avg_ms = (double)(pt[n - 1].actualPresentTime - pt[0].actualPresentTime) + / (double)(n - 1) / 1.0e6; + int64_t late = 0; uint32_t lc = 0; + for (uint32_t i = 0; i < n; i++) + if (pt[i].desiredPresentTime != 0) { + late += (int64_t)pt[i].actualPresentTime - (int64_t)pt[i].desiredPresentTime; lc++; + } + VK_LOGI("FG timing: samples=%u avgInterval=%.2fms avgLate=%.2fms", + n, avg_ms, lc ? (double)late / (double)lc / 1.0e6 : 0.0); + } + } } } pthread_mutex_unlock(&r->queue_mutex); @@ -2915,6 +2993,14 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenParams)(JNIEnv* env, jclass clazz r->fg_min_step = minStep < 1 ? 1 : (minStep > 8 ? 8 : minStep); } +JNIEXPORT void JNICALL JNI_FN(nativeSetVsyncTiming)(JNIEnv* env, jclass clazz, jlong handle, jlong periodNs, jlong vsyncNs) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return; + r->fg_present_period_ns = periodNs > 0 ? (uint64_t)periodNs : 0; + r->fg_vsync_anchor_ns = vsyncNs > 0 ? (uint64_t)vsyncNs : 0; +} + // Scene byte buffer layout (must mirror VulkanRenderer.java offsets). Native-endian, packed. // Using a single direct ByteBuffer instead of 6 separate jarray params avoids per-frame JNI // critical regions (each ~3-8µs on ART) and the temporary array shadow allocations they diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index b80ac5a28..ba1c253dc 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -442,6 +442,16 @@ typedef struct VkRenderer { PFN_vkCreateDebugUtilsMessengerEXT fnCreateDebugUtilsMessenger; PFN_vkDestroyDebugUtilsMessengerEXT fnDestroyDebugUtilsMessenger; + // VK_GOOGLE_display_timing — capability-gated FG present-pacing hint + telemetry (no-op when absent). + bool ext_display_timing; + uint64_t refresh_duration_ns; // panel vsync period from the swapchain (fallback) + uint64_t fg_present_period_ns; // target inter-present interval (ns) fed from Java + uint64_t fg_present_deadline_ns; // next clock_nanosleep present deadline + uint64_t fg_vsync_anchor_ns; // latest Choreographer vsync timestamp (CLOCK_MONOTONIC) + uint32_t fg_present_id; + PFN_vkGetRefreshCycleDurationGOOGLE fnGetRefreshCycleDuration; + PFN_vkGetPastPresentationTimingGOOGLE fnGetPastPresentationTiming; + // Async upload pool (created in nativeCreate after device). VkStagingPool staging_pool; diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index 62c353533..db76fc10d 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -611,7 +611,7 @@ private void applyPreferredRefreshRate() { // vote it on the surface so the panel holds the refresh. Engine = fps cap, else 60. int engine = runtimeFpsLimit > 0 ? runtimeFpsLimit : 60; int panelMax = RefreshRateUtils.getMaxSupportedRefreshRate(this); - int target = Math.min(panelMax, engine * renderer.getFrameGenMultiplier()); + int target = Math.max(60, Math.min(panelMax, engine * renderer.getFrameGenMultiplier())); renderer.setFrameGenDisplayCap(panelMax); RefreshRateUtils.applyPreferredRefreshRate(this, target, 0); requestSurfaceFrameRate((float) target); diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index af645e571..828333703 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -214,9 +214,7 @@ public void setFrameGeneration(boolean enabled) { synchronized (this) { if (nativeHandle != 0) { nativeSetFrameGeneration(nativeHandle, enabled); - // Prefer MAILBOX (native falls back to IMMEDIATE, then FIFO). A non-blocking mode lets - // FG post above the panel's idle refresh so an adaptive-refresh panel ramps to the - // generated rate; under FIFO the scheduler degrades to a safe pass-through. + // MAILBOX over-post holds the adaptive panel high; the native clock_nanosleep pacer spaces presents evenly. nativeSetPresentMode(nativeHandle, enabled ? PRESENT_MODE_MAILBOX : requestedPresentMode); fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); } @@ -300,6 +298,10 @@ private void fgPumpTick(long frameTimeNanos) { long d = frameTimeNanos - fgLastPumpNs; if (d > 0L && d < 100_000_000L) { // ignore stalls / outliers fgDisplayPeriodNs = fgDisplayPeriodNs == 0L ? d : fgDisplayPeriodNs + (d - fgDisplayPeriodNs) / 8L; + if (nativeHandle != 0) { + double th = fgTargetHz(); + nativeSetVsyncTiming(nativeHandle, th > 0.0 ? (long) (1.0e9 / th) : fgDisplayPeriodNs, frameTimeNanos); + } } } fgLastPumpNs = frameTimeNanos; @@ -352,9 +354,9 @@ private int fgComputeInterps() { long disp = fgDisplayPeriodNs, game = fgGamePeriodNs; if (disp <= 0L || game <= 0L) return 0; if (fgActivePresentMode == PRESENT_MODE_FIFO) { - // Vsync-locked: only insert what the current refresh affords (floor, never round — rounding - // up would cost a real frame). Often a clean pass-through. - int slots = (int) Math.floor((double) game / (double) disp); + // Vsync-locked: insert what the current refresh affords. Epsilon absorbs EMA jitter so an + // exact integer ratio (e.g. 120/30) doesn't floor to one slot short. Never below native. + int slots = (int) Math.floor((double) game / (double) disp + 1e-3); return Math.max(0, Math.min(maxInterps, slots - 1)); } // Non-blocking: post at the target rate so an adaptive-refresh panel ramps up to it. @@ -404,7 +406,7 @@ public void attachSurface(Surface surface) { } if (frameGenEnabled) { nativeSetFrameGeneration(nativeHandle, true); - nativeSetPresentMode(nativeHandle, PRESENT_MODE_MAILBOX); // off-vsync FG output + nativeSetPresentMode(nativeHandle, PRESENT_MODE_MAILBOX); // over-post hold + native pacer fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); pushFrameGenParams(); fgPendingReal = false; @@ -1050,4 +1052,5 @@ private static native long nativeCreate(boolean enableValidationLayers, private static native boolean nativePresentLast(long handle); private static native void nativeSetFrameGenParams(long handle, float occLo, float occHi, int minStep); private static native int nativeGetActivePresentMode(long handle); + private static native void nativeSetVsyncTiming(long handle, long periodNs, long vsyncNs); } From 14f178e73014e1d4f46452d39981bc4f6cec1596 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Tue, 9 Jun 2026 21:57:41 -0400 Subject: [PATCH 06/46] Frame-gen pacing: present lead, time-based phase, windowed timing log Wake ~120us before the deadline so the present latches the current vblank; compute the interp phase from real-frame arrival times vs the present deadline instead of a fixed k/(n+1) fraction; replace the 4-sample avgInterval log with windowed CoV/min/max present-interval stats. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 81 +++++++++++++------ app/src/main/cpp/winlator/vk/vk_state.h | 8 ++ .../display/renderer/VulkanRenderer.java | 8 +- 3 files changed, 70 insertions(+), 27 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index ba020b6ff..b27c00e54 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -25,6 +25,7 @@ #include #include #include +#include // SPIR-V shader byte arrays generated at build time by glslc + bin2c.cmake. #include "shaders/window_vert.spv.h" @@ -2391,11 +2392,12 @@ static uint64_t g_fg_plast = 0; static uint64_t g_fg_dropped = 0; // desiredPresentTime (CLOCK_MONOTONIC ns) for the next FG present; 0 = no constraint (real frame never delayed). -// Block until the next vsync-aligned present deadline (CLOCK_MONOTONIC absolute sleep). -static void fg_pace_to_deadline(VkRenderer* r) { - if (r->active_present_mode == VK_PRESENT_MODE_FIFO_KHR) return; // FIFO already vsync-paces +#define FG_PRESENT_LEAD_NS 150000ull // wake this much before the deadline so the present latches this vblank + +// Advance to the next vsync-aligned present deadline; stored and returned for phase + sleep. +static uint64_t fg_compute_deadline(VkRenderer* r) { uint64_t period = r->fg_present_period_ns ? r->fg_present_period_ns : r->refresh_duration_ns; - if (period == 0) return; + if (period == 0) { r->fg_present_deadline_ns = 0; return 0; } uint64_t now = now_monotonic_ns(); uint64_t prev = r->fg_present_deadline_ns; uint64_t floor_t = (now > prev) ? now : prev; @@ -2405,12 +2407,41 @@ static void fg_pace_to_deadline(VkRenderer* r) { : floor_t + period; if (deadline > now + 4u * period) deadline = floor_t + period; r->fg_present_deadline_ns = deadline; + return deadline; +} + +static void fg_sleep_to_deadline(VkRenderer* r) { + if (r->active_present_mode == VK_PRESENT_MODE_FIFO_KHR) return; // FIFO already vsync-paces + uint64_t deadline = r->fg_present_deadline_ns; + if (deadline == 0) return; + uint64_t target = deadline > FG_PRESENT_LEAD_NS ? deadline - FG_PRESENT_LEAD_NS : deadline; struct timespec ts; - ts.tv_sec = (time_t)(deadline / 1000000000ull); - ts.tv_nsec = (long)(deadline % 1000000000ull); + ts.tv_sec = (time_t)(target / 1000000000ull); + ts.tv_nsec = (long)(target % 1000000000ull); while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, NULL) == EINTR) {} } +// Drain past-present timing records and accumulate the real scan-out interval stats. +static void fg_collect_present_timing(VkRenderer* r) { + if (!r->ext_display_timing || !r->fnGetPastPresentationTiming) return; + VkPastPresentationTimingGOOGLE pt[16]; + uint32_t n = 16; + VkResult tr = r->fnGetPastPresentationTiming(r->device, r->swapchain, &n, pt); + if (tr != VK_SUCCESS && tr != VK_INCOMPLETE) return; + for (uint32_t i = 0; i < n; i++) { + uint64_t a = pt[i].actualPresentTime; + if (r->fg_t_last_ns != 0 && a > r->fg_t_last_ns) { + double ms = (double)(a - r->fg_t_last_ns) / 1.0e6; + if (ms > 0.5 && ms < 100.0) { + if (r->fg_t_count == 0 || ms < r->fg_t_min_ms) r->fg_t_min_ms = ms; + if (r->fg_t_count == 0 || ms > r->fg_t_max_ms) r->fg_t_max_ms = ms; + r->fg_t_sum_ms += ms; r->fg_t_sumsq_ms += ms * ms; r->fg_t_count++; + } + } + r->fg_t_last_ns = a; + } +} + // FG submit. HOLD renders the scene into the history ring (no present); INTERP synthesizes and // presents an in-between frame; PRESENT_LAST presents the held real frame. static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { @@ -2515,6 +2546,14 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { uint32_t prev_idx = (parity + 2u) % 3u; VkFgImage* curr = &r->fg_history[parity]; + uint64_t fg_deadline = fg_compute_deadline(r); + if (do_interp && r->fg_prev_arrival_ns != 0 && r->fg_curr_arrival_ns > r->fg_prev_arrival_ns + && fg_deadline > r->fg_curr_arrival_ns) { + double t = (double)(fg_deadline - r->fg_curr_arrival_ns) + / (double)(r->fg_curr_arrival_ns - r->fg_prev_arrival_ns); + phase = (float)(t < 0.0 ? 0.0 : (t > 1.0 ? 1.0 : t)); + } + vkBeginCommandBuffer(f->cmd, &bi); if (do_interp) { @@ -2628,33 +2667,25 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { pti.pNext = NULL; pti.swapchainCount = 1; pti.pTimes = &ptg; pinfo.pNext = &pti; } - fg_pace_to_deadline(r); + fg_sleep_to_deadline(r); pthread_mutex_lock(&r->queue_mutex); VkResult pr = vkQueuePresentKHR(r->graphics_queue, &pinfo); if (pr == VK_SUCCESS || pr == VK_SUBOPTIMAL_KHR) { r->fg_present_count++; if (do_interp) g_fg_interp++; else g_fg_plast++; + fg_collect_present_timing(r); if (((g_fg_interp + g_fg_plast) % 120u) == 0u) { + double mean = r->fg_t_count ? r->fg_t_sum_ms / r->fg_t_count : 0.0; + double var = r->fg_t_count ? r->fg_t_sumsq_ms / r->fg_t_count - mean * mean : 0.0; + double sd = var > 0.0 ? sqrt(var) : 0.0; VK_LOGI("FG cadence: holds=%llu interp=%llu presentLast=%llu dropped=%llu presents=%llu", (unsigned long long)g_fg_holds, (unsigned long long)g_fg_interp, (unsigned long long)g_fg_plast, (unsigned long long)g_fg_dropped, (unsigned long long)r->fg_present_count); - if (r->ext_display_timing && r->fnGetPastPresentationTiming) { - VkPastPresentationTimingGOOGLE pt[16]; - uint32_t n = 16; - VkResult tr = r->fnGetPastPresentationTiming(r->device, r->swapchain, &n, pt); - if ((tr == VK_SUCCESS || tr == VK_INCOMPLETE) && n >= 2) { - double avg_ms = (double)(pt[n - 1].actualPresentTime - pt[0].actualPresentTime) - / (double)(n - 1) / 1.0e6; - int64_t late = 0; uint32_t lc = 0; - for (uint32_t i = 0; i < n; i++) - if (pt[i].desiredPresentTime != 0) { - late += (int64_t)pt[i].actualPresentTime - (int64_t)pt[i].desiredPresentTime; lc++; - } - VK_LOGI("FG timing: samples=%u avgInterval=%.2fms avgLate=%.2fms", - n, avg_ms, lc ? (double)late / (double)lc / 1.0e6 : 0.0); - } - } + VK_LOGI("FG timing: n=%u mean=%.2fms cov=%.0f%% min=%.2f max=%.2f", + r->fg_t_count, mean, mean > 0.0 ? 100.0 * sd / mean : 0.0, + r->fg_t_count ? r->fg_t_min_ms : 0.0, r->fg_t_count ? r->fg_t_max_ms : 0.0); + r->fg_t_count = 0; r->fg_t_sum_ms = 0.0; r->fg_t_sumsq_ms = 0.0; } } pthread_mutex_unlock(&r->queue_mutex); @@ -2966,10 +2997,12 @@ JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderHold)(JNIEnv* env, jclass clazz, j return fg_submit(r, FG_MODE_HOLD, 0.5f) ? JNI_TRUE : JNI_FALSE; } -JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderInterp)(JNIEnv* env, jclass clazz, jlong handle, jfloat phase) { +JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderInterp)(JNIEnv* env, jclass clazz, jlong handle, jfloat phase, jlong prevNs, jlong currNs) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r || !r->surface_ready) return JNI_FALSE; + r->fg_prev_arrival_ns = prevNs > 0 ? (uint64_t)prevNs : 0; + r->fg_curr_arrival_ns = currNs > 0 ? (uint64_t)currNs : 0; return fg_submit(r, FG_MODE_INTERP, (float)phase) ? JNI_TRUE : JNI_FALSE; } diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index ba1c253dc..9d88375b5 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -448,6 +448,14 @@ typedef struct VkRenderer { uint64_t fg_present_period_ns; // target inter-present interval (ns) fed from Java uint64_t fg_present_deadline_ns; // next clock_nanosleep present deadline uint64_t fg_vsync_anchor_ns; // latest Choreographer vsync timestamp (CLOCK_MONOTONIC) + uint64_t fg_prev_arrival_ns; // real-frame arrival times, for time-based interp phase + uint64_t fg_curr_arrival_ns; + uint64_t fg_t_last_ns; // present-interval telemetry accumulators + uint32_t fg_t_count; + double fg_t_sum_ms; + double fg_t_sumsq_ms; + double fg_t_min_ms; + double fg_t_max_ms; uint32_t fg_present_id; PFN_vkGetRefreshCycleDurationGOOGLE fnGetRefreshCycleDuration; PFN_vkGetPastPresentationTimingGOOGLE fnGetPastPresentationTiming; diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 828333703..521b84e1b 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -74,6 +74,7 @@ public class VulkanRenderer private volatile long fgGamePeriodNs = 0; private long fgLastPumpNs = 0; private volatile long fgLastGameNs = 0; + private volatile long fgPrevGameNs = 0; private volatile int fgActivePresentMode = PRESENT_MODE_FIFO; // resolved native mode (see nativeGetActivePresentMode) private volatile int fgDisplayCapHz = 0; // panel-max ceiling for the target post rate; 0 = uncapped // Quality/smoothness, mapped to native shader knobs (motion search floor + interp consistency). @@ -330,8 +331,8 @@ private void fgEmitOne() { } if (fgPendingInterps > 0) { int k = fgInterpTotal - fgPendingInterps + 1; // 1..fgInterpTotal - float phase = (float) k / (float) (fgInterpTotal + 1); // evenly split the prev→curr gap - nativeRenderInterp(nativeHandle, phase); + float phase = (float) k / (float) (fgInterpTotal + 1); // even fallback; native refines from arrival times + nativeRenderInterp(nativeHandle, phase, fgPrevGameNs, fgLastGameNs); fgPendingInterps--; } else if (fgPendingReal) { nativePresentLast(nativeHandle); // the held real frame @@ -788,6 +789,7 @@ public void onFramePresented(Window window, WindowManager.FrameSource source, in fgGamePeriodNs = fgGamePeriodNs == 0L ? d : fgGamePeriodNs + (d - fgGamePeriodNs) / 8L; } } + fgPrevGameNs = fgLastGameNs; fgLastGameNs = now; fgNewScene.set(true); scheduleFgPump(); @@ -1048,7 +1050,7 @@ private static native long nativeCreate(boolean enableValidationLayers, private static native boolean nativeFrameGenerationSupported(long handle); private static native long nativeGetDisplayFrameCount(long handle); private static native boolean nativeRenderHold(long handle); - private static native boolean nativeRenderInterp(long handle, float phase); + private static native boolean nativeRenderInterp(long handle, float phase, long prevNs, long currNs); private static native boolean nativePresentLast(long handle); private static native void nativeSetFrameGenParams(long handle, float occLo, float occHi, int minStep); private static native int nativeGetActivePresentMode(long handle); From 4af126588caaf0808406581905e08a5208bb375f Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Wed, 10 Jun 2026 08:03:42 -0400 Subject: [PATCH 07/46] FrameGen: deterministic interp phase k/(N+1) + per-period phase telemetry --- app/src/main/cpp/winlator/vk/vk_renderer.c | 25 ++++++++++++++++------ app/src/main/cpp/winlator/vk/vk_state.h | 5 +++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index b27c00e54..e3556dc32 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -2546,12 +2547,19 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { uint32_t prev_idx = (parity + 2u) % 3u; VkFgImage* curr = &r->fg_history[parity]; - uint64_t fg_deadline = fg_compute_deadline(r); - if (do_interp && r->fg_prev_arrival_ns != 0 && r->fg_curr_arrival_ns > r->fg_prev_arrival_ns - && fg_deadline > r->fg_curr_arrival_ns) { - double t = (double)(fg_deadline - r->fg_curr_arrival_ns) - / (double)(r->fg_curr_arrival_ns - r->fg_prev_arrival_ns); - phase = (float)(t < 0.0 ? 0.0 : (t > 1.0 ? 1.0 : t)); + // Advance the vsync-aligned present deadline for the pacer (fg_sleep_to_deadline). The interp phase + // is left as the cadence's k/(N+1): an even interior position, never a real-frame endpoint. A + // deadline-derived phase was tried and removed — the deadline grid (vsync clock) and the game-frame + // arrivals (present clock) aren't phase-locked, so it injected a constant per-slot bias. + fg_compute_deadline(r); + if (do_interp) { + if (r->fg_curr_arrival_ns != r->fg_dbg_last_curr) { + r->fg_dbg_done_n = r->fg_dbg_n; + for (uint32_t i = 0; i < r->fg_dbg_n && i < 8u; i++) r->fg_dbg_done[i] = r->fg_dbg_phase[i]; + r->fg_dbg_n = 0; + r->fg_dbg_last_curr = r->fg_curr_arrival_ns; + } + if (r->fg_dbg_n < 8u) r->fg_dbg_phase[r->fg_dbg_n++] = phase; } vkBeginCommandBuffer(f->cmd, &bi); @@ -2685,6 +2693,11 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { VK_LOGI("FG timing: n=%u mean=%.2fms cov=%.0f%% min=%.2f max=%.2f", r->fg_t_count, mean, mean > 0.0 ? 100.0 * sd / mean : 0.0, r->fg_t_count ? r->fg_t_min_ms : 0.0, r->fg_t_count ? r->fg_t_max_ms : 0.0); + char pbuf[64]; int poff = 0; + for (uint32_t i = 0; i < r->fg_dbg_done_n && i < 8u; i++) + poff += snprintf(pbuf + poff, sizeof(pbuf) - (size_t)poff, "%.2f ", r->fg_dbg_done[i]); + if (poff == 0) pbuf[0] = '\0'; + VK_LOGI("FG phases[n=%u]: %s", r->fg_dbg_done_n, pbuf); r->fg_t_count = 0; r->fg_t_sum_ms = 0.0; r->fg_t_sumsq_ms = 0.0; } } diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index 9d88375b5..f6279db45 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -456,6 +456,11 @@ typedef struct VkRenderer { double fg_t_sumsq_ms; double fg_t_min_ms; double fg_t_max_ms; + float fg_dbg_phase[8]; // interp phases accumulated for the in-progress period + float fg_dbg_done[8]; // last completed period's phases (logged in telemetry) + uint32_t fg_dbg_n; + uint32_t fg_dbg_done_n; + uint64_t fg_dbg_last_curr; uint32_t fg_present_id; PFN_vkGetRefreshCycleDurationGOOGLE fnGetRefreshCycleDuration; PFN_vkGetPastPresentationTimingGOOGLE fnGetPastPresentationTiming; From 09684df83f9103ac99d81b0d6a26b75be61c8d4b Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Wed, 10 Jun 2026 12:35:51 -0400 Subject: [PATCH 08/46] FrameGen: pin display mode to FG target, even present distribution, order-independent mode resolve --- app/src/main/cpp/winlator/vk/vk_renderer.c | 41 +++++++++---- app/src/main/cpp/winlator/vk/vk_state.h | 4 +- .../display/XServerDisplayActivity.java | 49 +++++++++++++--- .../display/renderer/VulkanRenderer.java | 58 ++++++++++++++++++- .../main/shared/android/RefreshRateUtils.java | 47 ++++++--------- 5 files changed, 149 insertions(+), 50 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index e3556dc32..8ddfe4d03 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -1308,6 +1308,7 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa r->refresh_duration_ns = 0; r->fg_present_deadline_ns = 0; + r->fg_present_target_ns = 0; r->fg_present_id = 0; if (r->ext_display_timing && r->fnGetRefreshCycleDuration) { VkRefreshCycleDurationGOOGLE rc = {0}; @@ -2395,25 +2396,40 @@ static uint64_t g_fg_dropped = 0; // desiredPresentTime (CLOCK_MONOTONIC ns) for the next FG present; 0 = no constraint (real frame never delayed). #define FG_PRESENT_LEAD_NS 150000ull // wake this much before the deadline so the present latches this vblank -// Advance to the next vsync-aligned present deadline; stored and returned for phase + sleep. +// Advance the present deadline by one target period, then snap it to the panel vsync grid. +// The unsnapped accumulator keeps the average rate exact; the snap places each present on its +// own vblank, spread across the game interval. The snap grid must be anchor + k*refresh (the +// same grid no matter which Choreographer tick last set the anchor) — building it as +// anchor + k*period flips the grid phase with the anchor's tick parity whenever period spans +// more than one refresh, which is what bunched the 2x/3x presents (4x escaped only because +// its period equals one refresh). static uint64_t fg_compute_deadline(VkRenderer* r) { uint64_t period = r->fg_present_period_ns ? r->fg_present_period_ns : r->refresh_duration_ns; - if (period == 0) { r->fg_present_deadline_ns = 0; return 0; } + if (period == 0) { r->fg_present_deadline_ns = 0; r->fg_present_target_ns = 0; return 0; } uint64_t now = now_monotonic_ns(); - uint64_t prev = r->fg_present_deadline_ns; - uint64_t floor_t = (now > prev) ? now : prev; - uint64_t anchor = r->fg_vsync_anchor_ns; - uint64_t deadline = (anchor != 0 && anchor <= floor_t) - ? anchor + ((floor_t - anchor) / period + 1u) * period - : floor_t + period; - if (deadline > now + 4u * period) deadline = floor_t + period; + uint64_t deadline = r->fg_present_deadline_ns + period; + if (deadline < now || deadline > now + 4u * period) deadline = now + period; r->fg_present_deadline_ns = deadline; - return deadline; + + uint64_t target = deadline; + uint64_t vs = r->fg_display_period_ns ? r->fg_display_period_ns : r->refresh_duration_ns; + uint64_t anchor = r->fg_vsync_anchor_ns; + // Snap only while the panel carries the target rate (vsync period <= present period). When + // an idle/power policy has dropped the panel below the target, snapping would quantize the + // presents down to the slow grid and SurfaceFlinger would never see the true content rate + // to ramp back up — keep presenting at the unsnapped target cadence instead (MAILBOX drops + // the surplus until the panel recovers). + if (vs != 0 && anchor != 0 && deadline > anchor && vs <= period + period / 8u) { + target = anchor + ((deadline - anchor + vs / 2u) / vs) * vs; + if (target <= r->fg_present_target_ns) target = r->fg_present_target_ns + vs; // one present per vblank + } + r->fg_present_target_ns = target; + return target; } static void fg_sleep_to_deadline(VkRenderer* r) { if (r->active_present_mode == VK_PRESENT_MODE_FIFO_KHR) return; // FIFO already vsync-paces - uint64_t deadline = r->fg_present_deadline_ns; + uint64_t deadline = r->fg_present_target_ns ? r->fg_present_target_ns : r->fg_present_deadline_ns; if (deadline == 0) return; uint64_t target = deadline > FG_PRESENT_LEAD_NS ? deadline - FG_PRESENT_LEAD_NS : deadline; struct timespec ts; @@ -3039,11 +3055,12 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenParams)(JNIEnv* env, jclass clazz r->fg_min_step = minStep < 1 ? 1 : (minStep > 8 ? 8 : minStep); } -JNIEXPORT void JNICALL JNI_FN(nativeSetVsyncTiming)(JNIEnv* env, jclass clazz, jlong handle, jlong periodNs, jlong vsyncNs) { +JNIEXPORT void JNICALL JNI_FN(nativeSetVsyncTiming)(JNIEnv* env, jclass clazz, jlong handle, jlong periodNs, jlong displayPeriodNs, jlong vsyncNs) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r) return; r->fg_present_period_ns = periodNs > 0 ? (uint64_t)periodNs : 0; + r->fg_display_period_ns = displayPeriodNs > 0 ? (uint64_t)displayPeriodNs : 0; r->fg_vsync_anchor_ns = vsyncNs > 0 ? (uint64_t)vsyncNs : 0; } diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index f6279db45..c2edeea09 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -446,7 +446,9 @@ typedef struct VkRenderer { bool ext_display_timing; uint64_t refresh_duration_ns; // panel vsync period from the swapchain (fallback) uint64_t fg_present_period_ns; // target inter-present interval (ns) fed from Java - uint64_t fg_present_deadline_ns; // next clock_nanosleep present deadline + uint64_t fg_present_deadline_ns; // unsnapped deadline accumulator (target-rate grid) + uint64_t fg_present_target_ns; // vsync-snapped sleep target for the next present + uint64_t fg_display_period_ns; // live panel vsync period fed from Java (Choreographer EMA) uint64_t fg_vsync_anchor_ns; // latest Choreographer vsync timestamp (CLOCK_MONOTONIC) uint64_t fg_prev_arrival_ns; // real-frame arrival times, for time-based interp phase uint64_t fg_curr_arrival_ns; diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index db76fc10d..5d5882427 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -252,6 +252,7 @@ public class XServerDisplayActivity extends FixedFontScaleAppCompatActivity { private boolean effectiveShowFPS = false; private boolean isTapToClickEnabled = true; private int runtimeFpsLimit = 0; + private float lastLoggedRefreshHz = 0f; // de-dupes the periodic physical-refresh self-log private String lastRendererName = "Vulkan"; private String lastGpuName = null; private Runnable editInputControlsCallback; @@ -607,14 +608,27 @@ private void applyPreferredRefreshRate() { VulkanRenderer renderer = xServerView != null ? xServerView.getRenderer() : null; if (renderer != null && renderer.isFrameGenerationEnabled()) { - // FG targets multiplier×engine: pin the display mode to that (capped to panel max) and - // vote it on the surface so the panel holds the refresh. Engine = fps cap, else 60. - int engine = runtimeFpsLimit > 0 ? runtimeFpsLimit : 60; + // Pin the panel to the renderer's live FG target (multiplier × measured game fps). + // Until the pump has measured, fall back to multiplier×(fps cap | 60). Passing the + // target as the fpsLimit makes the mode resolver demand a cadence-compatible mode + // (exact match first, then an integer multiple) instead of a raw nearest rate. int panelMax = RefreshRateUtils.getMaxSupportedRefreshRate(this); - int target = Math.max(60, Math.min(panelMax, engine * renderer.getFrameGenMultiplier())); renderer.setFrameGenDisplayCap(panelMax); - RefreshRateUtils.applyPreferredRefreshRate(this, target, 0); + int target = renderer.getFrameGenTargetHz(); + if (target <= 0) { + int engine = runtimeFpsLimit > 0 ? runtimeFpsLimit : 60; + target = Math.max(60, engine * renderer.getFrameGenMultiplier()); + } + target = Math.min(panelMax, target); + // Pin the panel's physical mode to the target (held even when untouched) and vote the + // rate on the surface. NOTE: on aggressive ADFR OEMs (e.g. OnePlus/ColorOS) the vendor + // refresh service still drops the *render* rate to 60 when there is no touch input + // unless the app is enrolled in the OEM game mode (Game Space) — that enrollment, not + // any app API, is what holds the render rate untouched. The appCategory="game" manifest + // flag signals the app so the OEM can offer it. + RefreshRateUtils.applyPreferredRefreshRate(this, target, target); requestSurfaceFrameRate((float) target); + lastLoggedRefreshHz = 0f; // force a fresh self-log line after a target change } else { RefreshRateUtils.applyPreferredRefreshRate(this, getRefreshRateOverride(), runtimeFpsLimit); } @@ -628,20 +642,37 @@ private void applyPreferredRefreshRate() { } // Vote a frame rate on the surface so a VRR/ADFR panel holds the high refresh while FG is active. + // DEFAULT (exact-or-multiple) — FIXED_SOURCE is video semantics and lets the idle policy drop + // the panel to a non-multiple rate once touch boost ends. private void requestSurfaceFrameRate(float hz) { if (hz <= 0f || Build.VERSION.SDK_INT < Build.VERSION_CODES.R || xServerView == null) return; try { android.view.Surface s = xServerView.getHolder().getSurface(); if (s == null || !s.isValid()) return; if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { - s.setFrameRate(hz, android.view.Surface.FRAME_RATE_COMPATIBILITY_FIXED_SOURCE, + s.setFrameRate(hz, android.view.Surface.FRAME_RATE_COMPATIBILITY_DEFAULT, android.view.Surface.CHANGE_FRAME_RATE_ALWAYS); } else { - s.setFrameRate(hz, android.view.Surface.FRAME_RATE_COMPATIBILITY_FIXED_SOURCE); + s.setFrameRate(hz, android.view.Surface.FRAME_RATE_COMPATIBILITY_DEFAULT); } } catch (Exception ignore) {} } + // Log the panel's actual physical refresh rate (what a refresh-rate monitor shows). De-duped so it + // only prints when the rate actually changes — makes it obvious in logcat whether the mode pin is + // holding the target Hz or the system has dropped it. + private void logCurrentRefreshRate(String from) { + try { + android.view.Display d = getWindow().getDecorView().getDisplay(); + if (d == null) return; + float hz = d.getMode().getRefreshRate(); + if (Math.abs(hz - lastLoggedRefreshHz) < 0.5f) return; + lastLoggedRefreshHz = hz; + Log.i("XServerDisplayActivity", "Physical display refresh now " + Math.round(hz) + + "Hz (modeId=" + d.getMode().getModeId() + ", " + from + ")"); + } catch (Exception ignore) {} + } + /** * Watch for the display's refresh rate / supported modes changing while a game * is running (e.g. the user toggles the system refresh rate, or an external @@ -667,6 +698,7 @@ public void onDisplayRemoved(int displayId) { @Override public void onDisplayChanged(int displayId) { + logCurrentRefreshRate("onDisplayChanged"); handleDisplayCapabilitiesChanged(); } }; @@ -6045,6 +6077,9 @@ private void setupUI() { renderer.setFrameGenerationMultiplier(frameGenerationMultiplier); renderer.setFrameGenerationQuality(frameGenerationQuality); renderer.setFrameGenerationSmoothness(frameGenerationSmoothing); + // Re-pin the window's preferred display mode whenever the measured FG target moves + // (the window pin outranks surface frame-rate votes, so it must track the live target). + renderer.setFrameGenRateChangedListener(this::applyPreferredRefreshRate); renderer.setFrameGeneration(frameGenerationEnabled); boolean swapRB = shortcut != null ? shortcut.getExtra("swapRB", "0").equals("1") diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 521b84e1b..7717657c9 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -3,6 +3,7 @@ import android.content.Context; import android.graphics.Bitmap; import android.graphics.BitmapFactory; +import android.os.Build; import android.os.Handler; import android.os.Looper; import android.util.Log; @@ -80,6 +81,12 @@ public class VulkanRenderer // Quality/smoothness, mapped to native shader knobs (motion search floor + interp consistency). private volatile int fgQuality = 1; // 0 performance, 1 balanced, 2 quality private volatile float fgSmoothness = 0.5f; + // Panel frame-rate request: surface vote here; the activity mirrors it into the window's + // preferredDisplayModeId/preferredRefreshRate (which outrank surface votes) via the listener. + private volatile Surface fgSurface; + private float fgFrameRateHint = -1f; + private long fgFrameRateHintNs = 0L; + private volatile Runnable fgRateChangedListener; private final EffectComposer effectComposer; public final ViewTransformation viewTransformation = new ViewTransformation(); @@ -231,6 +238,7 @@ public void setFrameGeneration(boolean enabled) { } // When disabled, the pump self-stops (fgPumpTick checks frameGenEnabled) and onDrawFrame // reverts to the coalesced real-present path. + if (!enabled) fgApplyFrameRateHint(0.0, System.nanoTime()); } public boolean isFrameGenerationEnabled() { return frameGenEnabled; } @@ -301,7 +309,9 @@ private void fgPumpTick(long frameTimeNanos) { fgDisplayPeriodNs = fgDisplayPeriodNs == 0L ? d : fgDisplayPeriodNs + (d - fgDisplayPeriodNs) / 8L; if (nativeHandle != 0) { double th = fgTargetHz(); - nativeSetVsyncTiming(nativeHandle, th > 0.0 ? (long) (1.0e9 / th) : fgDisplayPeriodNs, frameTimeNanos); + nativeSetVsyncTiming(nativeHandle, th > 0.0 ? (long) (1.0e9 / th) : fgDisplayPeriodNs, + fgDisplayPeriodNs, frameTimeNanos); + fgApplyFrameRateHint(th, frameTimeNanos); } } } @@ -377,6 +387,48 @@ private int fgComputePerTick() { return Math.max(1, Math.min(n, 8)); } + // Vote the FG post rate on the content surface (lifts the Android 15+ game default-60Hz + // throttle and drives VRR panels), then tell the activity so it mirrors the target into the + // window's preferredDisplayModeId — the window pin outranks surface votes, so it must carry + // the same value or it silently wins with a stale one. 0 clears both when FG turns off. + private void fgApplyFrameRateHint(double targetHz, long nowNs) { + if (Build.VERSION.SDK_INT < Build.VERSION_CODES.R) return; + float rate = frameGenEnabled && targetHz > 0.0 ? (float) Math.round(targetHz) : 0f; + if (rate == fgFrameRateHint) return; + if (rate != 0f && fgFrameRateHint > 0f && Math.abs(rate - fgFrameRateHint) <= 5f) return; // EMA jitter + if (rate != 0f && nowNs - fgFrameRateHintNs < 500_000_000L) return; + // DEFAULT (exact-or-multiple), not FIXED_SOURCE: FIXED_SOURCE is video semantics — it + // tells SurfaceFlinger pulldown judder is acceptable, which lets the idle/power policy + // drop the panel to 60Hz against a 90/120Hz vote the moment touch boost ends. + Surface s = fgSurface; + if (s != null && s.isValid()) { + try { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + s.setFrameRate(rate, Surface.FRAME_RATE_COMPATIBILITY_DEFAULT, + Surface.CHANGE_FRAME_RATE_ALWAYS); + } else { + s.setFrameRate(rate, Surface.FRAME_RATE_COMPATIBILITY_DEFAULT); + } + } catch (IllegalStateException | IllegalArgumentException ignored) {} + } + fgFrameRateHint = rate; + fgFrameRateHintNs = nowNs; + Log.i(TAG, "FG target display rate: " + (int) rate + "Hz"); + Runnable l = fgRateChangedListener; + if (l != null) l.run(); + } + + /** Live FG display target (multiplier × measured game fps, rounded), or 0 if unknown/off. */ + public int getFrameGenTargetHz() { + float rate = fgFrameRateHint; + return rate > 0f ? Math.round(rate) : 0; + } + + /** Invoked (any thread) whenever the FG display target changes; 0-target means FG off. */ + public void setFrameGenRateChangedListener(Runnable listener) { + fgRateChangedListener = listener; + } + private Drawable createRootCursorDrawable() { Context context = xServerView.getContext(); BitmapFactory.Options options = new BitmapFactory.Options(); @@ -392,6 +444,8 @@ public void setGraphicsDriver(String driverName) { } public void attachSurface(Surface surface) { + fgSurface = surface; + fgFrameRateHint = -1f; // fresh surface carries no frame-rate preference; re-apply if (nativeHandle == 0) { nativeHandle = nativeCreate(shouldEnableValidationLayers(), graphicsDriverName, xServerView.getContext().getApplicationContext()); @@ -1054,5 +1108,5 @@ private static native long nativeCreate(boolean enableValidationLayers, private static native boolean nativePresentLast(long handle); private static native void nativeSetFrameGenParams(long handle, float occLo, float occHi, int minStep); private static native int nativeGetActivePresentMode(long handle); - private static native void nativeSetVsyncTiming(long handle, long periodNs, long vsyncNs); + private static native void nativeSetVsyncTiming(long handle, long periodNs, long displayPeriodNs, long vsyncNs); } diff --git a/app/src/main/shared/android/RefreshRateUtils.java b/app/src/main/shared/android/RefreshRateUtils.java index 4d9f8878f..a2e0f9bce 100644 --- a/app/src/main/shared/android/RefreshRateUtils.java +++ b/app/src/main/shared/android/RefreshRateUtils.java @@ -134,50 +134,35 @@ public static int resolvePreferredDisplayModeId(Activity activity, int requested } Display.Mode currentMode = display.getMode(); - Display.Mode[] modes = display.getSupportedModes(); - Display.Mode bestMode = null; - float bestModeRate = 0f; - float closestDelta = Float.MAX_VALUE; + // Two independent passes (order-independent): an exact round-Hz match always wins over a + // closest match. The single-pass version mis-selected when a higher-rate mode preceded the + // exact match in enumeration order (e.g. requestedHz=60 picked the 90Hz mode). + Display.Mode exact = null, closest = null; + float exactRate = 0f, closestDelta = Float.MAX_VALUE, closestRate = 0f; - for (Display.Mode mode : modes) { + for (Display.Mode mode : display.getSupportedModes()) { if (!isSameModeGroup(currentMode, mode)) continue; - float refreshRate = mode.getRefreshRate(); if (refreshRate <= 0f) continue; if (requestedHz <= 0) { - if (bestMode == null || refreshRate > bestModeRate) { - bestMode = mode; - bestModeRate = refreshRate; - } + if (exact == null || refreshRate > exactRate) { exact = mode; exactRate = refreshRate; } continue; } - if (Math.round(refreshRate) == requestedHz) { - if (bestMode == null || refreshRate > bestModeRate) { - bestMode = mode; - bestModeRate = refreshRate; - closestDelta = 0f; - } + if (exact == null || refreshRate > exactRate) { exact = mode; exactRate = refreshRate; } continue; } - - if (bestMode != null && closestDelta == 0f) continue; - float delta = Math.abs(refreshRate - requestedHz); - if (bestMode == null - || delta < closestDelta - || (delta == closestDelta && refreshRate > bestModeRate)) { - bestMode = mode; - bestModeRate = refreshRate; - closestDelta = delta; + if (closest == null || delta < closestDelta + || (delta == closestDelta && refreshRate > closestRate)) { + closest = mode; closestDelta = delta; closestRate = refreshRate; } } - if (bestMode != null) { - return bestMode.getModeId(); - } + if (exact != null) return exact.getModeId(); + if (closest != null) return closest.getModeId(); return requestedHz <= 0 ? currentMode.getModeId() : 0; } @@ -300,6 +285,12 @@ public static void applyPreferredRefreshRate(Activity activity, int requestedHz, WindowManager.LayoutParams params = activity.getWindow().getAttributes(); int modeId = resolvePreferredDisplayModeId(activity, effectiveRequestedHz); float refreshRate = resolvePreferredRefreshRate(activity, effectiveRequestedHz); + // Skip redundant window updates: the FG target fluctuates (e.g. 114-122) but resolves to the + // same mode, so without this guard setAttributes would fire several times a second. + if (params.preferredDisplayModeId == modeId + && Math.abs(params.preferredRefreshRate - refreshRate) < 0.5f) { + return; + } params.preferredDisplayModeId = modeId; params.preferredRefreshRate = refreshRate; activity.getWindow().setAttributes(params); From d5ec31697077aa0f4d81449b125fdbc7985297c7 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Wed, 10 Jun 2026 16:03:32 -0400 Subject: [PATCH 09/46] FrameGen: phase-correct interp fallback to fix early frame placement --- .../cpp/winlator/vk/shaders/interpolate.frag | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag index ff27dc722..3fd5bfb4e 100644 --- a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag +++ b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag @@ -65,11 +65,19 @@ void main() { // Trust = how consistent the two warps are, gated by on-frame-ness. float disagree = abs(luma(cPrev) - luma(cCurr)); - float trust = 1.0 - smoothstep(lo, hi, disagree); - if (offFrame(prevPos) || offFrame(currPos)) trust = 0.0; - - // Fallback for untrusted pixels: nearest real frame, unwarped (no smear). - vec3 nearest = (t < 0.5) ? texture(prevFrame, vUV).rgb : texture(currFrame, vUV).rgb; - - outColor = vec4(clamp(mix(nearest, mc, trust), 0.0, 1.0), 1.0); + bool off = offFrame(prevPos) || offFrame(currPos); + float trust = off ? 0.0 : 1.0 - smoothstep(lo, hi, disagree); + + // Fallback for untrusted pixels. Freezing them at the unwarped prev/curr frame (alpha 0/1) + // made interpolated frames land early: untrusted regions contributed zero motion, so the + // measured placement was ~trust*t instead of t (which is why the Smoothness/trust slider + // visibly shifted it). A phase-t crossfade keeps untrusted pixels advancing to ~alpha=t — + // static UI is identical in both frames so it stays ghost-free, and only genuine disocclusions + // pick up a faint blend instead of a hard catch-up step. Hard off-frame samples still snap to + // the nearest real frame so border disocclusions don't warp in out-of-image garbage. + vec3 cPrevFlat = texture(prevFrame, vUV).rgb; + vec3 cCurrFlat = texture(currFrame, vUV).rgb; + vec3 fallback = off ? ((t < 0.5) ? cPrevFlat : cCurrFlat) : mix(cPrevFlat, cCurrFlat, t); + + outColor = vec4(clamp(mix(fallback, mc, trust), 0.0, 1.0), 1.0); } From 24080931df0a5389f74a3803885de97645497296 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Fri, 12 Jun 2026 10:59:11 -0400 Subject: [PATCH 10/46] Frame generation: FIFO slot-grid pacing, extrapolation, per-game settings - FIFO presents with deterministic slot phases; real frames present sharp - Bidirectional warp at every multiplier; median occlusion fallback; static HUD mask - Extrapolation present path (no added latency) - Runtime frames-in-flight (Buffering 1-3) - Per-game FG settings; Smoothest / Low Latency presets in the drawer --- .../cpp/winlator/vk/shaders/interpolate.frag | 153 ++++++--- .../main/cpp/winlator/vk/shaders/motion.comp | 13 +- .../cpp/winlator/vk/shaders/motion_fp32.comp | 13 +- app/src/main/cpp/winlator/vk/vk_renderer.c | 321 +++++++++++++++--- app/src/main/cpp/winlator/vk/vk_state.h | 24 +- .../steam/db/converters/AppConverter.kt | 26 +- .../converters/UserFileInfoListConverter.kt | 6 +- app/src/main/res/values/strings.xml | 3 + .../display/XServerDisplayActivity.java | 118 ++++++- .../main/runtime/display/XServerDrawerMenu.kt | 163 +++++++-- .../display/renderer/VulkanRenderer.java | 304 ++++++++++++++--- .../main/runtime/display/xserver/XServer.java | 6 +- 12 files changed, 951 insertions(+), 199 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag index 3fd5bfb4e..8f3e5f770 100644 --- a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag +++ b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag @@ -1,8 +1,17 @@ #version 450 -// Motion-compensated frame interpolation: warps frame N-1/N along motion.comp's backward -// flow to synthesize the phase-t frame. A consistency test falls back to the nearest real -// frame where warps disagree or land off-frame (avoids smearing on HUD/text/disocclusions). +// Motion-compensated frame synthesis. +// mode 0 (standard): warp prev/curr along the single backward flow, interpolate at phase t. +// mode 1 (bidirectional): prev warps along its own forward flow; forward-backward consistency +// gives a geometric (dis)occlusion signal. +// mode 2 (extrapolate): predict phase t past curr by continuing the backward flow forward; +// single-image warp, flow-divergence occlusion, no added latency. +// +// Fallback policy (occLo/occHi = the Smoothness slider): +// warps agree -> motion-compensated blend; disagree or geometric occlusion -> per-channel +// median of the two warps and the non-warped phase blend; off-frame -> time-nearest real +// pixel. The fallback must never collapse to one fixed endpoint or low-confidence regions +// stop advancing between real frames. precision mediump float; precision highp int; @@ -10,74 +19,114 @@ precision highp int; layout(location = 0) in highp vec2 vUV; layout(location = 0) out vec4 outColor; -layout(set = 0, binding = 0) uniform mediump sampler2D prevFrame; // frame N-1 -layout(set = 0, binding = 1) uniform mediump sampler2D currFrame; // frame N -layout(set = 0, binding = 2) uniform highp sampler2D motionField; // rgba16f half-res (.xy), curr->prev MV in half-res px +layout(set = 0, binding = 0) uniform mediump sampler2D prevFrame; // frame N-1 +layout(set = 0, binding = 1) uniform mediump sampler2D currFrame; // frame N +layout(set = 0, binding = 2) uniform highp sampler2D motionField; // backward curr->prev, .xy half-res px +layout(set = 0, binding = 3) uniform highp sampler2D motionFieldFwd;// forward prev->curr, .xy half-res px layout(push_constant) uniform PC { vec2 resolution; // full-res target size (pixels) - float phase; // interpolation phase t in (0,1); 0.5 == single mid frame - float occlusionLo; // luma-consistency: fully trusted at/below this delta - float occlusionHi; // fully rejected at/above this delta - float _pad; + float phase; // synthesis phase t in (0,1) + float occlusionLo; // consistency window: fully trusted at/below this delta + float occlusionHi; // fully snapped at/above this delta + float mode; // 0 standard, 1 bidirectional, 2 extrapolate } pc; -float luma(vec3 c) { return dot(c, vec3(0.299, 0.587, 0.114)); } - bool offFrame(highp vec2 uv) { - return any(lessThan(uv, vec2(0.0))) || any(greaterThan(uv, vec2(1.0))); + return uv.x < 0.0 || uv.y < 0.0 || uv.x > 1.0 || uv.y > 1.0; } vec2 med3(vec2 a, vec2 b, vec2 c) { return max(min(a, b), min(max(a, b), c)); } - -vec2 sampleMV(highp vec2 uv, highp vec2 texel) { - vec2 r0 = med3(texture(motionField, uv + texel * vec2(-1.0, -1.0)).xy, - texture(motionField, uv + texel * vec2( 0.0, -1.0)).xy, - texture(motionField, uv + texel * vec2( 1.0, -1.0)).xy); - vec2 r1 = med3(texture(motionField, uv + texel * vec2(-1.0, 0.0)).xy, - texture(motionField, uv).xy, - texture(motionField, uv + texel * vec2( 1.0, 0.0)).xy); - vec2 r2 = med3(texture(motionField, uv + texel * vec2(-1.0, 1.0)).xy, - texture(motionField, uv + texel * vec2( 0.0, 1.0)).xy, - texture(motionField, uv + texel * vec2( 1.0, 1.0)).xy); +vec3 med3v(vec3 a, vec3 b, vec3 c) { return max(min(a, b), min(max(a, b), c)); } + +// 3x3 median of the half-res flow field (kills block-match outliers before warping). +vec2 sampleMV(highp sampler2D field, highp vec2 uv, highp vec2 texel) { + vec2 r0 = med3(texture(field, uv + texel * vec2(-1.0, -1.0)).xy, + texture(field, uv + texel * vec2( 0.0, -1.0)).xy, + texture(field, uv + texel * vec2( 1.0, -1.0)).xy); + vec2 r1 = med3(texture(field, uv + texel * vec2(-1.0, 0.0)).xy, + texture(field, uv).xy, + texture(field, uv + texel * vec2( 1.0, 0.0)).xy); + vec2 r2 = med3(texture(field, uv + texel * vec2(-1.0, 1.0)).xy, + texture(field, uv + texel * vec2( 0.0, 1.0)).xy, + texture(field, uv + texel * vec2( 1.0, 1.0)).xy); return med3(r0, r1, r2); } void main() { - float t = clamp(pc.phase > 0.0 ? pc.phase : 0.5, 0.0, 1.0); + float t = clamp(pc.phase, 0.0, 1.0); float lo = pc.occlusionLo > 0.0 ? pc.occlusionLo : 0.06; float hi = pc.occlusionHi > lo ? pc.occlusionHi : 0.25; - // motionField is half-res and stores curr->prev displacement in half-res pixels. - // Normalized displacement = mv_halfPx / mvSize = mv_halfPx * 2 / fullResSize. - highp vec2 mvTexel = 2.0 / pc.resolution; - vec2 mvNorm = sampleMV(vUV, mvTexel) * 2.0 / pc.resolution; + // motionField is half-res, stores displacement in half-res pixels. Normalize: mv * 2 / fullRes. + highp vec2 norm = 2.0 / pc.resolution; + + vec2 mvB = sampleMV(motionField, vUV, norm); // backward curr->prev (9-tap median) + vec2 mvBn = mvB * norm; + + vec3 cCurrFlat = texture(currFrame, vUV).rgb; + vec3 cPrevFlat = texture(prevFrame, vUV).rgb; - // Linear-trajectory motion compensation. For an intermediate pixel p (== vUV) - // with backward flow mv (curr->prev): - // currPos = p - (1 - t) * mv prevPos = p + t * mv - highp vec2 prevPos = vUV + t * mvNorm; - highp vec2 currPos = vUV - (1.0 - t) * mvNorm; + // Static guard at full resolution: a pixel whose colour is unchanged between the two real + // frames is static (HUD, text, sync bars, unmoving background) and must never be warped. This + // is per-pixel and exact, so it catches thin high-contrast overlays that the coarse block-match + // static mask (motionField.z) misses next to moving content. + float staticMask = texture(motionField, vUV).z; + float staticPix = max(staticMask, 1.0 - smoothstep(0.02, 0.06, length(cCurrFlat - cPrevFlat))); + + if (pc.mode > 1.5) { + // Extrapolation: out(x, N+t) = curr(x + t*mvB(x)). Linear motion only. + highp vec2 srcPos = vUV + t * mvBn; + vec3 cWarp = texture(currFrame, srcPos).rgb; + // The flow at the source must agree with the flow here, else this pixel is being + // revealed and the warp would smear the object. Motion-proportional tolerance. + vec2 mvBsrc = sampleMV(motionField, srcPos, norm); + vec2 dv = mvB - mvBsrc; + float tolE = 0.01 * dot(mvB, mvB) + 0.5; + float occ = smoothstep(tolE, 4.0 * tolE + 2.0, dot(dv, dv)); + if (offFrame(srcPos)) occ = 1.0; + occ = max(occ, staticPix); // static overlays / unchanged pixels stay anchored + outColor = vec4(clamp(mix(cWarp, cCurrFlat, occ), 0.0, 1.0), 1.0); + return; + } + + // ---- INTERPOLATION ---- + highp vec2 currPos = vUV - (1.0 - t) * mvBn; // curr sampled along the backward flow + highp vec2 prevPos; + float occGeo = 0.0; + + if (pc.mode > 0.5) { + // Bidirectional: prev warps along its own forward flow; |mvB+mvF| ~ 0 for a coherent + // feature. The forward-backward residual is compared against a motion-proportional + // tolerance wide enough to ignore plain block-match search noise (~1px). + vec2 mvF = texture(motionFieldFwd, vUV).xy; + prevPos = vUV - t * (mvF * norm); + vec2 fbv = mvB + mvF; + float tol = 0.05 * (dot(mvB, mvB) + dot(mvF, mvF)) + 2.0; + occGeo = smoothstep(tol, 4.0 * tol + 4.0, dot(fbv, fbv)); + } else { + prevPos = vUV + t * mvBn; // single backward flow warps both + } vec3 cPrev = texture(prevFrame, prevPos).rgb; vec3 cCurr = texture(currFrame, currPos).rgb; - vec3 mc = mix(cPrev, cCurr, t); // motion-compensated blend - - // Trust = how consistent the two warps are, gated by on-frame-ness. - float disagree = abs(luma(cPrev) - luma(cCurr)); - bool off = offFrame(prevPos) || offFrame(currPos); - float trust = off ? 0.0 : 1.0 - smoothstep(lo, hi, disagree); - - // Fallback for untrusted pixels. Freezing them at the unwarped prev/curr frame (alpha 0/1) - // made interpolated frames land early: untrusted regions contributed zero motion, so the - // measured placement was ~trust*t instead of t (which is why the Smoothness/trust slider - // visibly shifted it). A phase-t crossfade keeps untrusted pixels advancing to ~alpha=t — - // static UI is identical in both frames so it stays ghost-free, and only genuine disocclusions - // pick up a faint blend instead of a hard catch-up step. Hard off-frame samples still snap to - // the nearest real frame so border disocclusions don't warp in out-of-image garbage. - vec3 cPrevFlat = texture(prevFrame, vUV).rgb; - vec3 cCurrFlat = texture(currFrame, vUV).rgb; - vec3 fallback = off ? ((t < 0.5) ? cPrevFlat : cCurrFlat) : mix(cPrevFlat, cCurrFlat, t); - outColor = vec4(clamp(mix(fallback, mc, trust), 0.0, 1.0), 1.0); + // Seam: RGB delta between the two motion-compensated samples, scaled ~[0,1]. + float disagree = length(cPrev - cCurr) * 0.5774; + float seam = smoothstep(lo, hi, disagree); + float fade = max(seam, occGeo); + + vec3 warped = mix(cPrev, cCurr, t); // where the warps agree + vec3 dissolve = mix(cPrevFlat, cCurrFlat, t); // non-warped phase blend + vec3 nearest = (t < 0.5) ? cPrevFlat : cCurrFlat; // time-nearest real frame (sharp) + // Per-channel median of the two one-sided warps and the dissolve, biased toward the sharp + // nearest real frame as the seam strengthens. The median alone can settle on the dissolve (a + // 50/50 blend = visible double-image); leaning it toward the nearest frame keeps strong seams + // sharp instead of ghosted, while good-flow regions (fade~0) are untouched. + vec3 robust = mix(med3v(cPrev, cCurr, dissolve), nearest, fade); + + vec3 col = mix(warped, robust, fade); + if (offFrame(prevPos) || offFrame(currPos)) col = nearest; + col = mix(col, cCurrFlat, staticPix); // static overlays / unchanged pixels: unwarped + outColor = vec4(clamp(col, 0.0, 1.0), 1.0); } diff --git a/app/src/main/cpp/winlator/vk/shaders/motion.comp b/app/src/main/cpp/winlator/vk/shaders/motion.comp index d08e34ede..366e22e52 100644 --- a/app/src/main/cpp/winlator/vk/shaders/motion.comp +++ b/app/src/main/cpp/winlator/vk/shaders/motion.comp @@ -120,5 +120,16 @@ void main() { float dd = cu - 2.0 * bestCost + cd; if (dd > 0.0) sub.y += clamp(0.5 * (cu - cd) / dd, -0.5, 0.5); } - imageStore(motionField, p, vec4(sub * pc.mvScale, 0.0, 0.0)); + + // Static-overlay (HUD/text) confidence -> .z: near-zero temporal delta at zero displacement + // plus a strong spatial gradient marks high-contrast UI that must never be warped. Soft on + // both axes so the interp's bilinear read feathers the mask edge (no hard seams). + float c0 = float(sCurr[cCenter.y * TILE_C + cCenter.x]); + float p0 = float(sPrev[(l.y + RMAX + BR) * TILE_P + (l.x + RMAX + BR)]); + float gx = abs(float(sCurr[cCenter.y * TILE_C + cCenter.x + 1]) - c0); + float gy = abs(float(sCurr[(cCenter.y + 1) * TILE_C + cCenter.x]) - c0); + float staticC = (1.0 - smoothstep(0.012, 0.025, abs(c0 - p0))) + * smoothstep(0.05, 0.12, gx + gy); + + imageStore(motionField, p, vec4(sub * pc.mvScale, staticC, 0.0)); } diff --git a/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp b/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp index 6e672c107..9d6957522 100644 --- a/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp +++ b/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp @@ -105,5 +105,16 @@ void main() { float dd = cu - 2.0 * bestCost + cd; if (dd > 0.0) sub.y += clamp(0.5 * (cu - cd) / dd, -0.5, 0.5); } - imageStore(motionField, p, vec4(sub * pc.mvScale, 0.0, 0.0)); + + // Static-overlay (HUD/text) confidence -> .z: near-zero temporal delta at zero displacement + // plus a strong spatial gradient marks high-contrast UI that must never be warped. Soft on + // both axes so the interp's bilinear read feathers the mask edge (no hard seams). + float c0 = sCurr[cCenter.y * TILE_C + cCenter.x]; + float p0 = sPrev[(l.y + RMAX + BR) * TILE_P + (l.x + RMAX + BR)]; + float gx = abs(sCurr[cCenter.y * TILE_C + cCenter.x + 1] - c0); + float gy = abs(sCurr[(cCenter.y + 1) * TILE_C + cCenter.x] - c0); + float staticC = (1.0 - smoothstep(0.012, 0.025, abs(c0 - p0))) + * smoothstep(0.05, 0.12, gx + gy); + + imageStore(motionField, p, vec4(sub * pc.mvScale, staticC, 0.0)); } diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 8ddfe4d03..dbf28962e 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -19,6 +19,11 @@ #include #include +#include +#include +#include +#include +#include #include #include #include @@ -629,7 +634,7 @@ static bool create_descriptor_pool(VkRenderer* r, uint32_t capacity) { ps[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; ps[0].descriptorCount = capacity; ps[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - ps[1].descriptorCount = 8; + ps[1].descriptorCount = 16; // backward + forward motion fields, 3 parities each VkDescriptorPoolCreateInfo ci = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; @@ -813,16 +818,16 @@ static bool create_pipeline_layouts(VkRenderer* r) { return false; } - // interpolate.frag set 0: prev,curr,motion combined-image-samplers, FRAGMENT. - VkDescriptorSetLayoutBinding ib[3] = {0}; - for (uint32_t i = 0; i < 3; i++) { + // interpolate.frag set 0: prev,curr,mvBwd,mvFwd combined-image-samplers, FRAGMENT. + VkDescriptorSetLayoutBinding ib[4] = {0}; + for (uint32_t i = 0; i < 4; i++) { ib[i].binding = i; ib[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; ib[i].descriptorCount = 1; ib[i].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; } VkDescriptorSetLayoutCreateInfo dl_i = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; - dl_i.bindingCount = 3; dl_i.pBindings = ib; + dl_i.bindingCount = 4; dl_i.pBindings = ib; if (vkCreateDescriptorSetLayout(r->device, &dl_i, NULL, &r->pipelines.fg_interp_layout) != VK_SUCCESS) { return false; } @@ -1271,6 +1276,10 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa caps.currentTransform, pre_transform, present_mode); uint32_t image_count = caps.minImageCount + 1; + // FG runs VK_FRAMES_IN_FLIGHT frames CPU-ahead of the GPU; the swapchain needs at least one more + // image than that (FIF in flight + 1 scanning out) or vkAcquireNextImageKHR blocks and defeats the + // pipelining — even under FIFO. This is what lets FIF=3 absorb the GPU's per-frame composite spikes. + if (image_count < VK_FRAMES_IN_FLIGHT + 1u) image_count = VK_FRAMES_IN_FLIGHT + 1u; // Non-blocking modes (MAILBOX/IMMEDIATE) need headroom so FG interps aren't dropped at acquire. if (present_mode != VK_PRESENT_MODE_FIFO_KHR && image_count < 4) image_count = 4; if (caps.maxImageCount > 0 && image_count > caps.maxImageCount) image_count = caps.maxImageCount; @@ -2050,11 +2059,21 @@ static void record_scene_chain(VkRenderer* r, VkCommandBuffer cmd, const VkScene } } +// Live frames-in-flight (the drawer's Buffering dial). All VK_FRAMES_IN_FLIGHT frame slots stay +// allocated; only the rotation depth changes, so applying a new value needs no rebuild. Lower depth +// = the CPU waits on the GPU sooner = less buffered latency, more exposure to GPU spikes. +static inline uint32_t vkr_active_fif(VkRenderer* r) { + uint32_t fif = r->fg_target_fif; + if (fif < 1u || fif > VK_FRAMES_IN_FLIGHT) fif = VK_FRAMES_IN_FLIGHT; + return fif; +} + static bool record_and_submit_frame(VkRenderer* r) { if (!r->surface_ready || !r->swapchain) return false; pthread_mutex_lock(&r->render_mutex); + if (r->frame_index >= vkr_active_fif(r)) r->frame_index = 0; // live Buffering shrink VkFrame* f = &r->frames[r->frame_index]; uint32_t grave_slot = r->graveyard_index; @@ -2167,7 +2186,7 @@ static bool record_and_submit_frame(VkRenderer* r) { pthread_mutex_unlock(&r->render_mutex); - r->frame_index = (r->frame_index + 1) % VK_FRAMES_IN_FLIGHT; + r->frame_index = (r->frame_index + 1) % vkr_active_fif(r); r->graveyard_index = (r->graveyard_index + 1) % (VK_FRAMES_IN_FLIGHT + 1); return true; @@ -2203,7 +2222,9 @@ static void fg_destroy_resources(VkRenderer* r) { if (!r->device) return; for (uint32_t p = 0; p < 3; p++) { if (r->fg_motion_set[p]) { fg_free_set(r, r->fg_motion_set[p]); r->fg_motion_set[p] = VK_NULL_HANDLE; } + if (r->fg_motion_set_fwd[p]) { fg_free_set(r, r->fg_motion_set_fwd[p]); r->fg_motion_set_fwd[p] = VK_NULL_HANDLE; } if (r->fg_interp_set[p]) { fg_free_set(r, r->fg_interp_set[p]); r->fg_interp_set[p] = VK_NULL_HANDLE; } + if (r->fg_interp_set_deep[p]) { fg_free_set(r, r->fg_interp_set_deep[p]); r->fg_interp_set_deep[p] = VK_NULL_HANDLE; } } memset(r->fg_slot_fence, 0, sizeof(r->fg_slot_fence)); for (uint32_t i = 0; i < 3; i++) { @@ -2215,10 +2236,18 @@ static void fg_destroy_resources(VkRenderer* r) { if (o->memory) vkFreeMemory(r->device, o->memory, NULL); memset(o, 0, sizeof(*o)); } - if (r->fg_motion.view) vkDestroyImageView(r->device, r->fg_motion.view, NULL); - if (r->fg_motion.image) vkDestroyImage(r->device, r->fg_motion.image, NULL); - if (r->fg_motion.memory) vkFreeMemory(r->device, r->fg_motion.memory, NULL); - memset(&r->fg_motion, 0, sizeof(r->fg_motion)); + for (uint32_t mi = 0; mi < 3; mi++) { + VkFgImage* m = &r->fg_motion[mi]; + if (m->view) vkDestroyImageView(r->device, m->view, NULL); + if (m->image) vkDestroyImage(r->device, m->image, NULL); + if (m->memory) vkFreeMemory(r->device, m->memory, NULL); + VkFgImage* mf = &r->fg_motion_fwd[mi]; + if (mf->view) vkDestroyImageView(r->device, mf->view, NULL); + if (mf->image) vkDestroyImage(r->device, mf->image, NULL); + if (mf->memory) vkFreeMemory(r->device, mf->memory, NULL); + } + memset(r->fg_motion, 0, sizeof(r->fg_motion)); + memset(r->fg_motion_fwd, 0, sizeof(r->fg_motion_fwd)); if (r->fg_sampler) { vkDestroySampler(r->device, r->fg_sampler, NULL); r->fg_sampler = VK_NULL_HANDLE; } r->fg_built = false; r->fg_history_count = 0; @@ -2273,9 +2302,8 @@ static bool fg_create_color_target(VkRenderer* r, VkFgImage* o, uint32_t w, uint return true; } -// Half-res rgba16f backward-flow field: storage (motion.comp write) + sampled (interpolate.frag). -static bool fg_create_motion(VkRenderer* r, uint32_t w, uint32_t h) { - VkFgImage* o = &r->fg_motion; +// Half-res rgba16f flow field: storage (motion.comp write) + sampled (interpolate.frag). +static bool fg_create_motion(VkRenderer* r, VkFgImage* o, uint32_t w, uint32_t h) { o->width = w; o->height = h; o->framebuffer = VK_NULL_HANDLE; VkImageCreateInfo ic = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; ic.imageType = VK_IMAGE_TYPE_2D; @@ -2314,13 +2342,21 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { if (!fg_create_color_target(r, &r->fg_history[0], w, h)) goto fail; if (!fg_create_color_target(r, &r->fg_history[1], w, h)) goto fail; if (!fg_create_color_target(r, &r->fg_history[2], w, h)) goto fail; - if (!fg_create_motion(r, (w / 2) ? (w / 2) : 1u, (h / 2) ? (h / 2) : 1u)) goto fail; + { + uint32_t mw = (w / 2) ? (w / 2) : 1u, mh = (h / 2) ? (h / 2) : 1u; + for (uint32_t mi = 0; mi < 3; mi++) { + if (!fg_create_motion(r, &r->fg_motion[mi], mw, mh)) goto fail; + if (!fg_create_motion(r, &r->fg_motion_fwd[mi], mw, mh)) goto fail; + } + } memset(r->fg_slot_fence, 0, sizeof(r->fg_slot_fence)); for (uint32_t p = 0; p < 3; p++) { r->fg_motion_set[p] = fg_alloc_set(r, r->pipelines.fg_motion_layout); + r->fg_motion_set_fwd[p] = fg_alloc_set(r, r->pipelines.fg_motion_layout); r->fg_interp_set[p] = fg_alloc_set(r, r->pipelines.fg_interp_layout); - if (!r->fg_motion_set[p] || !r->fg_interp_set[p]) goto fail; + r->fg_interp_set_deep[p] = fg_alloc_set(r, r->pipelines.fg_interp_layout); + if (!r->fg_motion_set[p] || !r->fg_motion_set_fwd[p] || !r->fg_interp_set[p] || !r->fg_interp_set_deep[p]) goto fail; VkImageView prevV = r->fg_history[(p + 2u) % 3u].view; // curr=history[p], prev=history[(p+2)%3] VkImageView currV = r->fg_history[p].view; @@ -2328,7 +2364,7 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { // motion.comp set: b0 prev (sampled), b1 curr (sampled), b2 motion (storage, GENERAL) VkDescriptorImageInfo mPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; VkDescriptorImageInfo mCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; - VkDescriptorImageInfo mMv = { VK_NULL_HANDLE, r->fg_motion.view, VK_IMAGE_LAYOUT_GENERAL }; + VkDescriptorImageInfo mMv = { VK_NULL_HANDLE, r->fg_motion[p].view, VK_IMAGE_LAYOUT_GENERAL }; VkWriteDescriptorSet mw_[3] = {0}; for (int b = 0; b < 3; b++) { mw_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; mw_[b].dstSet = r->fg_motion_set[p]; mw_[b].dstBinding = (uint32_t)b; mw_[b].descriptorCount = 1; } mw_[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; mw_[0].pImageInfo = &mPrev; @@ -2336,20 +2372,43 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { mw_[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; mw_[2].pImageInfo = &mMv; vkUpdateDescriptorSets(r->device, 3, mw_, 0, NULL); - // interpolate.frag set: b0 prev, b1 curr, b2 motion — all sampled (SHADER_READ). - VkDescriptorImageInfo iPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; - VkDescriptorImageInfo iCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; - VkDescriptorImageInfo iMv = { r->fg_sampler, r->fg_motion.view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; - VkWriteDescriptorSet iw_[3] = {0}; - for (int b = 0; b < 3; b++) { iw_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; iw_[b].dstSet = r->fg_interp_set[p]; iw_[b].dstBinding = (uint32_t)b; iw_[b].descriptorCount = 1; iw_[b].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; } - iw_[0].pImageInfo = &iPrev; iw_[1].pImageInfo = &iCurr; iw_[2].pImageInfo = &iMv; - vkUpdateDescriptorSets(r->device, 3, iw_, 0, NULL); + // forward motion set: prev/curr SWAPPED so motion.comp emits the prev->curr flow into fg_motion_fwd. + VkDescriptorImageInfo fPrev = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo fCurr = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo fMv = { VK_NULL_HANDLE, r->fg_motion_fwd[p].view, VK_IMAGE_LAYOUT_GENERAL }; + VkWriteDescriptorSet fw_[3] = {0}; + for (int b = 0; b < 3; b++) { fw_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; fw_[b].dstSet = r->fg_motion_set_fwd[p]; fw_[b].dstBinding = (uint32_t)b; fw_[b].descriptorCount = 1; } + fw_[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; fw_[0].pImageInfo = &fPrev; + fw_[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; fw_[1].pImageInfo = &fCurr; + fw_[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; fw_[2].pImageInfo = &fMv; + vkUpdateDescriptorSets(r->device, 3, fw_, 0, NULL); + + // interpolate.frag set: b0 prev, b1 curr, b2 mvBwd, b3 mvFwd — all sampled (SHADER_READ). + // Standard binds fg_motion as the (unread) b3 dummy; deep binds the real forward field. + VkDescriptorImageInfo iPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo iCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo iMv = { r->fg_sampler, r->fg_motion[p].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo iMvFwd = { r->fg_sampler, r->fg_motion_fwd[p].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkWriteDescriptorSet iw_[4] = {0}; + for (int b = 0; b < 4; b++) { iw_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; iw_[b].dstSet = r->fg_interp_set[p]; iw_[b].dstBinding = (uint32_t)b; iw_[b].descriptorCount = 1; iw_[b].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; } + iw_[0].pImageInfo = &iPrev; iw_[1].pImageInfo = &iCurr; iw_[2].pImageInfo = &iMv; iw_[3].pImageInfo = &iMv; + vkUpdateDescriptorSets(r->device, 4, iw_, 0, NULL); + + // Quality interp set: same newest pair as Standard, but bidirectional — b2 backward + // (fg_motion) + b3 forward (fg_motion_fwd), both computed in-path (lazy) each pair. + VkDescriptorImageInfo dPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo dCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkWriteDescriptorSet dw_[4] = {0}; + for (int b = 0; b < 4; b++) { dw_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; dw_[b].dstSet = r->fg_interp_set_deep[p]; dw_[b].dstBinding = (uint32_t)b; dw_[b].descriptorCount = 1; dw_[b].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; } + dw_[0].pImageInfo = &dPrev; dw_[1].pImageInfo = &dCurr; dw_[2].pImageInfo = &iMv; dw_[3].pImageInfo = &iMvFwd; + vkUpdateDescriptorSets(r->device, 4, dw_, 0, NULL); } r->fg_dims.width = w; r->fg_dims.height = h; r->fg_history_curr = 0; r->fg_history_count = 0; r->fg_motion_valid = false; // freshly created motion image — force a recompute before reuse + r->fg_motion_fwd_valid = false; r->fg_built = true; return true; @@ -2471,8 +2530,12 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { return false; } + if (r->frame_index >= vkr_active_fif(r)) r->frame_index = 0; // live Buffering shrink VkFrame* f = &r->frames[r->frame_index]; + uint64_t fw0 = now_monotonic_ns(); vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); + { double fw = (double)(now_monotonic_ns() - fw0) / 1.0e6; // GL-thread block on the FIF=2 in_flight fence + r->fg_fw_sum_ms += fw; if (fw > r->fg_fw_max_ms) r->fg_fw_max_ms = fw; r->fg_fw_n++; } if (!fg_ensure_resources(r)) { pthread_mutex_unlock(&r->render_mutex); return false; } @@ -2515,17 +2578,21 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { } r->fg_history_curr = next; r->fg_slot_fence[next] = f->in_flight; - if (r->fg_history_count < 2) r->fg_history_count++; + if (r->fg_history_count < 3) r->fg_history_count++; r->fg_motion_valid = false; // new history pair — flow must be recomputed on the next interp + r->fg_motion_fwd_valid = false; g_fg_holds++; pthread_mutex_unlock(&r->render_mutex); - r->frame_index = (r->frame_index + 1) % VK_FRAMES_IN_FLIGHT; + r->frame_index = (r->frame_index + 1) % vkr_active_fif(r); r->graveyard_index = (r->graveyard_index + 1) % (VK_FRAMES_IN_FLIGHT + 1); return true; } // -------- INTERP / PRESENT_LAST: acquire swapchain image, present -------- - bool do_interp = (mode == FG_MODE_INTERP) && (r->fg_history_count >= 2); + // Quality (deep flag) interpolates the same newest pair as Standard, adding a forward flow + // for a bidirectional warp; the flow cost is amortized across the pair's interps. + bool deep = r->fg_deep_mode && (r->fg_history_count >= 2u); + bool do_interp = (mode == FG_MODE_INTERP) && (r->fg_history_count >= 2u); // Interps are optional: under a non-blocking mode acquire without waiting, so a panel that can't run // ahead skips the synthetic frame instead of stalling. PRESENT_LAST always blocks (never dropped). @@ -2560,8 +2627,11 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { vkResetFences(r->device, 1, &f->in_flight); uint32_t parity = r->fg_history_curr; + // Both modes show the newest pair (history[(parity+2)%3] -> history[parity]); curr is the frame + // PRESENT_LAST blits. Quality differs only by adding the forward flow + bidirectional warp below. + uint32_t curr_idx = parity; uint32_t prev_idx = (parity + 2u) % 3u; - VkFgImage* curr = &r->fg_history[parity]; + VkFgImage* curr = &r->fg_history[curr_idx]; // Advance the vsync-aligned present deadline for the pacer (fg_sleep_to_deadline). The interp phase // is left as the cadence's k/(N+1): an even interior position, never a real-frame endpoint. A @@ -2582,10 +2652,18 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { if (do_interp) { VkFgImage* prev = &r->fg_history[prev_idx]; - // Make the HOLD color writes visible to the reads below (compute when recomputing flow, - // else just the fragment interp draw). - VkPipelineStageFlags hist_dst = r->fg_motion_valid - ? VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + // First interp of each pair recomputes the flow in-path (lazy); later interps reuse it. + // Make the HOLD color writes visible to the reads below (compute when recomputing, else the + // fragment interp draw). + bool compute_bwd = !r->fg_motion_valid; + // Forward flow (Quality): on the first interp whose phase reaches mid-pair. At 3x/4x that is + // the pair's second interp, spreading the two searches across two presents so no single + // present carries both under tight 90/120Hz budgets. At 2x there is only one interp + // (phase 0.5), so both searches share its command buffer — affordable in a 60Hz slot. + bool compute_fwd = deep && !r->fg_extrapolate && !r->fg_motion_fwd_valid && phase >= 0.45f; + bool any_compute = compute_bwd || compute_fwd; + VkPipelineStageFlags hist_dst = any_compute + ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; vkr_image_barrier(f->cmd, prev->image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, @@ -2594,33 +2672,55 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - if (!r->fg_motion_valid) { - // motion field -> GENERAL, dispatch block matching (wait for the prior pair's interp reads). - vkr_image_barrier(f->cmd, r->fg_motion.image, + struct { int32_t mvW, mvH; float invW, invH, mvScale, minStep, p1, p2; } mpc; + mpc.mvW = (int32_t)r->fg_motion[parity].width; mpc.mvH = (int32_t)r->fg_motion[parity].height; + mpc.invW = 1.0f / (float)r->fg_motion[parity].width; + mpc.invH = 1.0f / (float)r->fg_motion[parity].height; + mpc.mvScale = 1.0f; mpc.minStep = (float)r->fg_min_step; mpc.p1 = mpc.p2 = 0.0f; + if (compute_bwd) { + // Backward flow (curr->prev) -> fg_motion[parity], 1st interp of the pair (both modes). + vkr_image_barrier(f->cmd, r->fg_motion[parity].image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, VK_ACCESS_SHADER_WRITE_BIT); vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipeline); vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipe_layout, 0, 1, &r->fg_motion_set[parity], 0, NULL); - struct { int32_t mvW, mvH; float invW, invH, mvScale, minStep, p1, p2; } mpc; - mpc.mvW = (int32_t)r->fg_motion.width; mpc.mvH = (int32_t)r->fg_motion.height; - mpc.invW = 1.0f / (float)r->fg_motion.width; - mpc.invH = 1.0f / (float)r->fg_motion.height; - mpc.mvScale = 1.0f; mpc.minStep = (float)r->fg_min_step; mpc.p1 = mpc.p2 = 0.0f; vkCmdPushConstants(f->cmd, r->pipelines.fg_motion_pipe_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(mpc), &mpc); - vkCmdDispatch(f->cmd, (r->fg_motion.width + 7u) / 8u, (r->fg_motion.height + 7u) / 8u, 1); - // motion field -> SHADER_READ for the interpolation draw. - vkr_image_barrier(f->cmd, r->fg_motion.image, + vkCmdDispatch(f->cmd, (r->fg_motion[parity].width + 7u) / 8u, (r->fg_motion[parity].height + 7u) / 8u, 1); + vkr_image_barrier(f->cmd, r->fg_motion[parity].image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); r->fg_motion_valid = true; } else { - // Flow reused from this pair's first interp (multi-frame 4x/6x). Re-establish the - // compute-write -> fragment-read dependency in this submit; no layout change. - vkr_image_barrier(f->cmd, r->fg_motion.image, + // Backward flow reused (later interps of the pair). Re-establish compute-write -> fragment-read. + vkr_image_barrier(f->cmd, r->fg_motion[parity].image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + } + if (compute_fwd) { + // Quality forward flow (prev->curr) -> fg_motion_fwd[parity]. + vkr_image_barrier(f->cmd, r->fg_motion_fwd[parity].image, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, VK_ACCESS_SHADER_WRITE_BIT); + vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipeline); + vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->pipelines.fg_motion_pipe_layout, 0, 1, &r->fg_motion_set_fwd[parity], 0, NULL); + vkCmdPushConstants(f->cmd, r->pipelines.fg_motion_pipe_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(mpc), &mpc); + vkCmdDispatch(f->cmd, (r->fg_motion_fwd[parity].width + 7u) / 8u, (r->fg_motion_fwd[parity].height + 7u) / 8u, 1); + vkr_image_barrier(f->cmd, r->fg_motion_fwd[parity].image, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + r->fg_motion_fwd_valid = true; + } else if (deep && r->fg_motion_fwd_valid) { + // Forward flow reused. Re-establish its compute-write -> fragment-read dep. + vkr_image_barrier(f->cmd, r->fg_motion_fwd[parity].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); @@ -2643,11 +2743,17 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { if (do_interp) { vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, r->pipelines.fg_interp_pipeline); + // Bidirectional only once the forward flow is ready (2nd interp onward); the 1st interp of a + // Quality pair runs single-direction (like Standard) while the forward search is still pending. + // Extrapolation overrides both: single-image forward warp along the backward flow (mode 2). + bool use_fwd = deep && r->fg_motion_fwd_valid && !r->fg_extrapolate; vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, - r->pipelines.fg_interp_pipe_layout, 0, 1, &r->fg_interp_set[parity], 0, NULL); - struct { float resW, resH, phase, occLo, occHi, pad; } ipc; + r->pipelines.fg_interp_pipe_layout, 0, 1, + use_fwd ? &r->fg_interp_set_deep[parity] : &r->fg_interp_set[parity], 0, NULL); + struct { float resW, resH, phase, occLo, occHi, mode; } ipc; ipc.resW = (float)r->swapchain_extent.width; ipc.resH = (float)r->swapchain_extent.height; - ipc.phase = phase; ipc.occLo = r->fg_occ_lo; ipc.occHi = r->fg_occ_hi; ipc.pad = 0.0f; + ipc.phase = phase; ipc.occLo = r->fg_occ_lo; ipc.occHi = r->fg_occ_hi; + ipc.mode = r->fg_extrapolate ? 2.0f : (use_fwd ? 1.0f : 0.0f); vkCmdPushConstants(f->cmd, r->pipelines.fg_interp_pipe_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(ipc), &ipc); vkCmdDraw(f->cmd, 3, 1, 0, 0); @@ -2676,7 +2782,7 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { pthread_mutex_unlock(&r->render_mutex); return false; } - r->fg_slot_fence[parity] = f->in_flight; + r->fg_slot_fence[curr_idx] = f->in_flight; if (do_interp) r->fg_slot_fence[prev_idx] = f->in_flight; VkPresentInfoKHR pinfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR}; @@ -2706,9 +2812,13 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { (unsigned long long)g_fg_holds, (unsigned long long)g_fg_interp, (unsigned long long)g_fg_plast, (unsigned long long)g_fg_dropped, (unsigned long long)r->fg_present_count); - VK_LOGI("FG timing: n=%u mean=%.2fms cov=%.0f%% min=%.2f max=%.2f", + VK_LOGI("FG timing: n=%u mean=%.2fms cov=%.0f%% min=%.2f max=%.2f [%s]", r->fg_t_count, mean, mean > 0.0 ? 100.0 * sd / mean : 0.0, - r->fg_t_count ? r->fg_t_min_ms : 0.0, r->fg_t_count ? r->fg_t_max_ms : 0.0); + r->fg_t_count ? r->fg_t_min_ms : 0.0, r->fg_t_count ? r->fg_t_max_ms : 0.0, + r->fg_deep_mode ? "quality" : "standard"); + VK_LOGI("FG fence-wait: n=%u mean=%.3fms max=%.3fms (GL-thread block on in_flight before present)", + r->fg_fw_n, r->fg_fw_n ? r->fg_fw_sum_ms / r->fg_fw_n : 0.0, r->fg_fw_max_ms); + r->fg_fw_sum_ms = 0.0; r->fg_fw_max_ms = 0.0; r->fg_fw_n = 0; char pbuf[64]; int poff = 0; for (uint32_t i = 0; i < r->fg_dbg_done_n && i < 8u; i++) poff += snprintf(pbuf + poff, sizeof(pbuf) - (size_t)poff, "%.2f ", r->fg_dbg_done[i]); @@ -2728,7 +2838,7 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { r->surface_ready = create_swapchain(r, r->surface_extent.width, r->surface_extent.height); } pthread_mutex_unlock(&r->render_mutex); - r->frame_index = (r->frame_index + 1) % VK_FRAMES_IN_FLIGHT; + r->frame_index = (r->frame_index + 1) % vkr_active_fif(r); return true; } @@ -2738,6 +2848,71 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { #define JNI_FN(name) Java_com_winlator_cmod_runtime_display_renderer_VulkanRenderer_##name +// ===== Native FG pump ================================================================= +// A native pthread running its own ALooper + AChoreographer: no Java Looper/HandlerThread +// lifecycle to lose, vsync-accurate. Each vsync it calls back into +// VulkanRenderer.fgPumpTickFromNative(frameTimeNanos); the present stays on the GL thread. +static JavaVM* g_pump_jvm = NULL; +static jobject g_pump_renderer = NULL; // global ref to the VulkanRenderer instance +static jmethodID g_pump_tick = NULL; +static pthread_t g_pump_thread; +static volatile int g_pump_running = 0; +static AChoreographer* g_pump_chor = NULL; + +static void fg_pump_frame(long frameTimeNanos, void* data) { + (void)data; + if (!g_pump_running) return; + JNIEnv* env = NULL; + if ((*g_pump_jvm)->GetEnv(g_pump_jvm, (void**)&env, JNI_VERSION_1_6) == JNI_OK + && env && g_pump_renderer && g_pump_tick) { + (*env)->CallVoidMethod(env, g_pump_renderer, g_pump_tick, (jlong)frameTimeNanos); + if ((*env)->ExceptionCheck(env)) (*env)->ExceptionClear(env); + } + if (g_pump_running && g_pump_chor) + AChoreographer_postFrameCallback(g_pump_chor, fg_pump_frame, NULL); +} + +static void* fg_pump_loop(void* arg) { + (void)arg; + prctl(PR_SET_NAME, "fg-pump", 0, 0, 0); // visible in /proc//task/*/comm for verification + setpriority(PRIO_PROCESS, 0, -8); // urgent-display: fire the vsync callback without scheduling lag + JNIEnv* env = NULL; + (*g_pump_jvm)->AttachCurrentThread(g_pump_jvm, &env, NULL); + ALooper_prepare(ALOOPER_PREPARE_ALLOW_NON_CALLBACKS); + g_pump_chor = AChoreographer_getInstance(); + if (g_pump_chor) AChoreographer_postFrameCallback(g_pump_chor, fg_pump_frame, NULL); + else VK_LOGE("AChoreographer_getInstance returned NULL"); + while (g_pump_running) ALooper_pollOnce(100, NULL, NULL, NULL); // vsync wakes it; 100ms re-checks running + g_pump_chor = NULL; + (*g_pump_jvm)->DetachCurrentThread(g_pump_jvm); + return NULL; +} + +JNIEXPORT void JNICALL JNI_FN(nativeFgPumpStart)(JNIEnv* env, jclass clazz, jobject renderer) { + (void)clazz; + if (g_pump_running) return; + (*env)->GetJavaVM(env, &g_pump_jvm); + g_pump_renderer = (*env)->NewGlobalRef(env, renderer); + jclass rc = (*env)->GetObjectClass(env, renderer); + g_pump_tick = (*env)->GetMethodID(env, rc, "fgPumpTickFromNative", "(J)V"); + if (!g_pump_tick) { VK_LOGE("fgPumpTickFromNative(J)V not found"); return; } + g_pump_running = 1; + if (pthread_create(&g_pump_thread, NULL, fg_pump_loop, NULL) != 0) { + g_pump_running = 0; VK_LOGE("native fg-pump pthread_create failed"); return; + } + VK_LOGI("native fg-pump started (AChoreographer pthread)"); +} + +JNIEXPORT void JNICALL JNI_FN(nativeFgPumpStop)(JNIEnv* env, jclass clazz) { + (void)clazz; + if (!g_pump_running) return; + g_pump_running = 0; + pthread_join(g_pump_thread, NULL); + if (g_pump_renderer) { (*env)->DeleteGlobalRef(env, g_pump_renderer); g_pump_renderer = NULL; } + g_pump_tick = NULL; + VK_LOGI("native fg-pump stopped"); +} + JNIEXPORT jlong JNICALL JNI_FN(nativeCreate)(JNIEnv* env, jclass clazz, jboolean enableValidationLayers, jstring driverName, @@ -3055,6 +3230,46 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenParams)(JNIEnv* env, jclass clazz r->fg_min_step = minStep < 1 ? 1 : (minStep > 8 ? 8 : minStep); } +JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenDeepMode)(JNIEnv* env, jclass clazz, jlong handle, jboolean deep) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return; + bool want = deep ? true : false; + if (want == r->fg_deep_mode) return; + pthread_mutex_lock(&r->render_mutex); + r->fg_deep_mode = want; + // Restart the cadence cleanly so the new mode warms up from scratch (a brief re-prime). + r->fg_history_count = 0; + r->fg_motion_valid = false; + r->fg_motion_fwd_valid = false; + pthread_mutex_unlock(&r->render_mutex); +} + +// Generation method: false = interpolation, true = extrapolation. Re-primes the cadence so the +// new method starts from a clean pair. +JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenExtrapolate)(JNIEnv* env, jclass clazz, jlong handle, jboolean extrapolate) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return; + bool want = extrapolate ? true : false; + if (want == r->fg_extrapolate) return; + pthread_mutex_lock(&r->render_mutex); + r->fg_extrapolate = want; + r->fg_history_count = 0; + r->fg_motion_valid = false; + r->fg_motion_fwd_valid = false; + pthread_mutex_unlock(&r->render_mutex); +} + +// Requested compositor frames-in-flight (1..3); applied live via the frame-slot rotation depth. +JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenFramesInFlight)(JNIEnv* env, jclass clazz, jlong handle, jint framesInFlight) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return; + uint32_t fif = framesInFlight < 1 ? 1u : (framesInFlight > 3 ? 3u : (uint32_t)framesInFlight); + r->fg_target_fif = fif; +} + JNIEXPORT void JNICALL JNI_FN(nativeSetVsyncTiming)(JNIEnv* env, jclass clazz, jlong handle, jlong periodNs, jlong displayPeriodNs, jlong vsyncNs) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index c2edeea09..c8432c0ad 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -20,7 +20,7 @@ #define VK_LOGW(...) __android_log_print(ANDROID_LOG_WARN, VK_LOG_TAG, __VA_ARGS__) #define VK_LOGE(...) __android_log_print(ANDROID_LOG_ERROR, VK_LOG_TAG, __VA_ARGS__) -#define VK_FRAMES_IN_FLIGHT 2 +#define VK_FRAMES_IN_FLIGHT 3 #define VK_MAX_SWAPCHAIN_IMAGES 8 #define VK_MAX_EFFECTS 8 #define VK_MAX_RENDERABLE_WINDOWS 64 @@ -180,7 +180,7 @@ typedef struct VkPipelineSet { // --- Frame generation (created once with the rest; persist across swapchain rebuilds) --- VkDescriptorSetLayout fg_motion_layout; // set0: binding0,1 sampler(prev,curr) + binding2 STORAGE_IMAGE(mv), COMPUTE - VkDescriptorSetLayout fg_interp_layout; // set0: 3x COMBINED_IMAGE_SAMPLER (prev,curr,mv), FRAGMENT + VkDescriptorSetLayout fg_interp_layout; // set0: 4x COMBINED_IMAGE_SAMPLER (prev,curr,mvBwd,mvFwd), FRAGMENT VkPipelineLayout fg_motion_pipe_layout; // [motion] set + 32B compute push range VkPipelineLayout fg_interp_pipe_layout; // [interp] set + 24B fragment push range VkPipeline fg_motion_pipeline; // compute (block matching) @@ -382,15 +382,24 @@ typedef struct VkRenderer { bool fg_built; // history + motion images allocated at fg_dims VkExtent2D fg_dims; // extent the fg images were built for VkFgImage fg_history[3]; // composited-scene ring; fg_history_curr = newest - VkFgImage fg_motion; // rgba16f half-res backward-flow field + VkFgImage fg_motion[3]; // per-parity rgba16f half-res backward-flow ring (1 per history + // slot): consecutive cycles write different buffers so the + // once-per-cycle motion compute pipelines instead of serializing. + VkFgImage fg_motion_fwd[3]; // per-parity rgba16f half-res forward-flow ring (Quality bidirectional) VkSampler fg_sampler; // linear, clamp — for all fg sampled reads VkDescriptorSet fg_motion_set[3]; // [curr] prev,curr samplers + motion storage (motion.comp) - VkDescriptorSet fg_interp_set[3]; // [curr] prev,curr,motion samplers (interpolate.frag) + VkDescriptorSet fg_motion_set_fwd[3]; // [curr] swapped prev,curr + fwd-motion storage (forward pass) + VkDescriptorSet fg_interp_set[3]; // [curr] prev,curr,mvBwd,mvFwd samplers (interpolate.frag) + VkDescriptorSet fg_interp_set_deep[3]; // deep mode: interp the pair one step behind the newest VkFence fg_slot_fence[3]; // last submit that used each history slot uint32_t fg_history_curr; // index (0..2) of the most-recent composited frame - uint32_t fg_history_count; // 0,1,2 — valid history frames + uint32_t fg_history_count; // 0,1,2,3 — valid history frames uint64_t fg_present_count; // actual vkQueuePresentKHR calls; guarded by queue_mutex - bool fg_motion_valid; // motion field current for the live history pair (reused across multi-interp) + bool fg_motion_valid; // backward flow current for the live pair (reused across multi-interp) + bool fg_motion_fwd_valid; // forward flow current — computed on the 2nd interp (Quality) to spread cost + bool fg_deep_mode; // quality pipeline: bidirectional warp (adds a forward flow) + bool fg_extrapolate; // false=interpolate, true=extrapolate forward + uint32_t fg_target_fif; // requested compositor frames-in-flight 1..3 (applied live) float fg_occ_lo; // interpolate.frag consistency lower bound (smoothness) float fg_occ_hi; // interpolate.frag consistency upper bound (smoothness) int32_t fg_min_step; // motion.comp lowest TSS step (quality preset; 1 = full search) @@ -458,6 +467,9 @@ typedef struct VkRenderer { double fg_t_sumsq_ms; double fg_t_min_ms; double fg_t_max_ms; + double fg_fw_sum_ms; // in_flight fence-wait telemetry (GL-thread block before present) + double fg_fw_max_ms; + uint32_t fg_fw_n; float fg_dbg_phase[8]; // interp phases accumulated for the in-progress period float fg_dbg_done[8]; // last completed period's phases (logged in telemetry) uint32_t fg_dbg_n; diff --git a/app/src/main/feature/stores/steam/db/converters/AppConverter.kt b/app/src/main/feature/stores/steam/db/converters/AppConverter.kt index c1fe2a8c6..fb54b0517 100644 --- a/app/src/main/feature/stores/steam/db/converters/AppConverter.kt +++ b/app/src/main/feature/stores/steam/db/converters/AppConverter.kt @@ -13,6 +13,8 @@ import com.winlator.cmod.feature.stores.steam.enums.ReleaseState import kotlinx.serialization.json.Json import java.util.EnumSet +private val json = Json { ignoreUnknownKeys = true } + class AppConverter { @TypeConverter fun toAppType(appType: Int): AppType = AppType.fromCode(appType) @@ -39,38 +41,38 @@ class AppConverter { fun fromControllerSupport(controllerSupport: ControllerSupport): Int = controllerSupport.code @TypeConverter - fun toDepots(depots: String): Map = Json.decodeFromString>(depots) + fun toDepots(depots: String): Map = json.decodeFromString>(depots) @TypeConverter - fun fromDepots(depots: Map): String = Json.encodeToString(depots) + fun fromDepots(depots: Map): String = json.encodeToString(depots) @TypeConverter - fun toBranches(branches: String): Map = Json.decodeFromString>(branches) + fun toBranches(branches: String): Map = json.decodeFromString>(branches) @TypeConverter - fun fromBranches(branches: Map): String = Json.encodeToString(branches) + fun fromBranches(branches: Map): String = json.encodeToString(branches) @TypeConverter - fun toLangMap(langMap: String): Map = Json.decodeFromString>(langMap) + fun toLangMap(langMap: String): Map = json.decodeFromString>(langMap) @TypeConverter - fun fromLangMap(langMap: Map): String = Json.encodeToString(langMap) + fun fromLangMap(langMap: Map): String = json.encodeToString(langMap) @TypeConverter - fun toLibraryAssetsInfo(langMap: String): LibraryAssetsInfo = Json.decodeFromString(langMap) + fun toLibraryAssetsInfo(langMap: String): LibraryAssetsInfo = json.decodeFromString(langMap) @TypeConverter - fun fromLibraryAssetsInfo(langMap: LibraryAssetsInfo): String = Json.encodeToString(langMap) + fun fromLibraryAssetsInfo(langMap: LibraryAssetsInfo): String = json.encodeToString(langMap) @TypeConverter - fun toConfigInfo(configInfo: String): ConfigInfo = Json.decodeFromString(configInfo) + fun toConfigInfo(configInfo: String): ConfigInfo = json.decodeFromString(configInfo) @TypeConverter - fun fromConfigInfo(configInfo: ConfigInfo): String = Json.encodeToString(configInfo) + fun fromConfigInfo(configInfo: ConfigInfo): String = json.encodeToString(configInfo) @TypeConverter - fun toUFS(ufs: String): UFS = Json.decodeFromString(ufs) + fun toUFS(ufs: String): UFS = json.decodeFromString(ufs) @TypeConverter - fun fromUFS(ufs: UFS): String = Json.encodeToString(ufs) + fun fromUFS(ufs: UFS): String = json.encodeToString(ufs) } diff --git a/app/src/main/feature/stores/steam/db/converters/UserFileInfoListConverter.kt b/app/src/main/feature/stores/steam/db/converters/UserFileInfoListConverter.kt index b286977ad..38b92be79 100644 --- a/app/src/main/feature/stores/steam/db/converters/UserFileInfoListConverter.kt +++ b/app/src/main/feature/stores/steam/db/converters/UserFileInfoListConverter.kt @@ -3,10 +3,12 @@ import androidx.room.TypeConverter import com.winlator.cmod.feature.stores.steam.data.UserFileInfo import kotlinx.serialization.json.Json +private val json = Json { ignoreUnknownKeys = true } + class UserFileInfoListConverter { @TypeConverter - fun fromUserFileInfoList(userFileInfoList: List?): String? = userFileInfoList?.let { Json.encodeToString(it) } + fun fromUserFileInfoList(userFileInfoList: List?): String? = userFileInfoList?.let { json.encodeToString(it) } @TypeConverter - fun toUserFileInfoList(value: String?): List? = value?.let { Json.decodeFromString>(it) } + fun toUserFileInfoList(value: String?): List? = value?.let { json.decodeFromString>(it) } } diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index a4a9fcb23..9f9ba7b15 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -1333,6 +1333,9 @@ Installed path: Balanced Quality Smoothness + Recommended + Smooth + Low Latency Native Frame Generation Insert interpolated frames in the compositor to smooth motion. Works with any game (no game data needed) and adds a small amount of input latency. diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index 9438220b7..ef906853d 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -347,9 +347,46 @@ public boolean isInputSuspended() { private boolean hudCardExpanded = false; private boolean screenEffectsCardExpanded = false; private boolean frameGenerationEnabled = false; + // Frame generation settings persist per game (shortcut), not globally. + private String fgGameId() { + if (shortcut != null) { + String uuid = shortcut.getExtra("uuid", ""); + if (!uuid.isEmpty()) return "s:" + uuid; + String path = getIntent().getStringExtra("shortcut_path"); + if (path != null && !path.isEmpty()) return "p:" + Integer.toHexString(path.hashCode()); + } + return container != null ? "c:" + container.id : "g"; + } + + private String fgKey(String base) { + return base + ":" + fgGameId(); + } + + private boolean fgPrefBool(String base, boolean def) { + String k = fgKey(base); + if (preferences.contains(k)) return preferences.getBoolean(k, def); + return preferences.getBoolean(base, def); + } + + private int fgPrefInt(String base, int def) { + String k = fgKey(base); + if (preferences.contains(k)) return preferences.getInt(k, def); + return preferences.getInt(base, def); + } + + private float fgPrefFloat(String base, float def) { + String k = fgKey(base); + if (preferences.contains(k)) return preferences.getFloat(k, def); + return preferences.getFloat(base, def); + } + private int frameGenerationMultiplier = 2; private int frameGenerationQuality = 1; - private float frameGenerationSmoothing = 0.5f; + private float frameGenerationSmoothing = 0.75f; + private boolean frameGenerationDeepMode = true; + private boolean frameGenerationAdvanced = false; + private boolean frameGenerationExtrapolate = false; + private int frameGenerationFramesInFlight = 3; private boolean sgsrEnabled = false; private boolean sgsrRuntimeEnabled = false; private int sgsrUpscaleMode = 1; @@ -602,6 +639,18 @@ private String getShortcutWineVersionOverride() { return shortcut.getExtra("wineVersion"); } + // Coalesce FG target changes into a single panel re-pin once the rate settles (~600ms). The + // renderer throttles hints to 500ms, so this debounce (> that) absorbs a burst of rapid swaps + // and the unstable-startup rate wobble into one mode switch instead of one stall per change. + private final Runnable fgRepinRunnable = this::applyPreferredRefreshRate; + + private void scheduleFgRefreshRepin() { + Handler h = handler; + if (h == null) { applyPreferredRefreshRate(); return; } + h.removeCallbacks(fgRepinRunnable); + h.postDelayed(fgRepinRunnable, 600L); + } + private void applyPreferredRefreshRate() { Runnable applyRefresh = () -> { if (isFinishing() || isDestroyed()) return; @@ -3929,7 +3978,11 @@ private void renderDrawerMenu() { frameGenerationEnabled, frameGenerationMultiplier, frameGenerationQuality, - frameGenerationSmoothing + frameGenerationSmoothing, + frameGenerationDeepMode, + frameGenerationAdvanced, + frameGenerationExtrapolate, + frameGenerationFramesInFlight ); if (drawerActionListener == null) { @@ -4119,7 +4172,7 @@ public void onScreenEffectsCardExpandedChanged(boolean expanded) { @Override public void onFrameGenerationEnabledChanged(boolean enabled) { frameGenerationEnabled = enabled; - preferences.edit().putBoolean("native_frame_generation", enabled).apply(); + preferences.edit().putBoolean(fgKey("native_frame_generation"), enabled).apply(); VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; if (r != null) r.setFrameGeneration(enabled); applyPreferredRefreshRate(); @@ -4129,7 +4182,7 @@ public void onFrameGenerationEnabledChanged(boolean enabled) { @Override public void onFrameGenerationMultiplierSelected(int multiplier) { frameGenerationMultiplier = multiplier; - preferences.edit().putInt("frame_generation_multiplier", multiplier).apply(); + preferences.edit().putInt(fgKey("frame_generation_multiplier"), multiplier).apply(); VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; if (r != null) r.setFrameGenerationMultiplier(multiplier); applyPreferredRefreshRate(); @@ -4139,7 +4192,7 @@ public void onFrameGenerationMultiplierSelected(int multiplier) { @Override public void onFrameGenerationQualitySelected(int quality) { frameGenerationQuality = quality; - preferences.edit().putInt("frame_generation_quality", quality).apply(); + preferences.edit().putInt(fgKey("frame_generation_quality"), quality).apply(); VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; if (r != null) r.setFrameGenerationQuality(quality); renderDrawerMenu(); @@ -4148,12 +4201,46 @@ public void onFrameGenerationQualitySelected(int quality) { @Override public void onFrameGenerationSmoothingChanged(float smoothing) { frameGenerationSmoothing = smoothing; - preferences.edit().putFloat("frame_generation_smoothing", smoothing).apply(); + preferences.edit().putFloat(fgKey("frame_generation_smoothing"), smoothing).apply(); VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; if (r != null) r.setFrameGenerationSmoothness(smoothing); renderDrawerMenu(); } + @Override + public void onFrameGenerationDeepModeChanged(boolean deep) { + frameGenerationDeepMode = deep; + preferences.edit().putBoolean(fgKey("frame_generation_deep_mode"), deep).apply(); + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) r.setFrameGenerationDeepMode(deep); + renderDrawerMenu(); + } + + @Override + public void onFrameGenerationAdvancedChanged(boolean advanced) { + frameGenerationAdvanced = advanced; + preferences.edit().putBoolean(fgKey("frame_generation_advanced"), advanced).apply(); + renderDrawerMenu(); // UI-only: reveals the advanced controls + } + + @Override + public void onFrameGenerationExtrapolateChanged(boolean extrapolate) { + frameGenerationExtrapolate = extrapolate; + preferences.edit().putBoolean(fgKey("frame_generation_extrapolate"), extrapolate).apply(); + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) r.setFrameGenerationExtrapolate(extrapolate); + renderDrawerMenu(); + } + + @Override + public void onFrameGenerationFramesInFlightChanged(int framesInFlight) { + frameGenerationFramesInFlight = framesInFlight; + preferences.edit().putInt(fgKey("frame_generation_fif"), framesInFlight).apply(); + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) r.setFrameGenerationFramesInFlight(framesInFlight); + renderDrawerMenu(); + } + @Override public void onSGSREnabledChanged(boolean enabled) { boolean wasEnabled = sgsrEnabled; @@ -6069,16 +6156,25 @@ private void setupUI() { renderer.setNativeMode(isNativeRenderingEnabled); renderer.setPresentMode(VulkanRenderer.parsePresentMode( graphicsDriverConfig != null ? graphicsDriverConfig.get("compositorPresentMode") : null)); - frameGenerationEnabled = preferences.getBoolean("native_frame_generation", false); - frameGenerationMultiplier = preferences.getInt("frame_generation_multiplier", 2); - frameGenerationQuality = preferences.getInt("frame_generation_quality", 1); - frameGenerationSmoothing = preferences.getFloat("frame_generation_smoothing", 0.5f); + frameGenerationEnabled = fgPrefBool("native_frame_generation", false); + frameGenerationMultiplier = fgPrefInt("frame_generation_multiplier", 2); + frameGenerationQuality = fgPrefInt("frame_generation_quality", 1); + frameGenerationSmoothing = fgPrefFloat("frame_generation_smoothing", 0.75f); + frameGenerationDeepMode = fgPrefBool("frame_generation_deep_mode", true); + frameGenerationAdvanced = fgPrefBool("frame_generation_advanced", false); + frameGenerationExtrapolate = fgPrefBool("frame_generation_extrapolate", false); + frameGenerationFramesInFlight = fgPrefInt("frame_generation_fif", 3); renderer.setFrameGenerationMultiplier(frameGenerationMultiplier); renderer.setFrameGenerationQuality(frameGenerationQuality); renderer.setFrameGenerationSmoothness(frameGenerationSmoothing); + renderer.setFrameGenerationDeepMode(frameGenerationDeepMode); + renderer.setFrameGenerationExtrapolate(frameGenerationExtrapolate); + renderer.setFrameGenerationFramesInFlight(frameGenerationFramesInFlight); // Re-pin the window's preferred display mode whenever the measured FG target moves // (the window pin outranks surface frame-rate votes, so it must track the live target). - renderer.setFrameGenRateChangedListener(this::applyPreferredRefreshRate); + // Debounced: a physical mode switch (60/90/120) stalls the panel a frame or two, so rapid + // multiplier swaps and startup rate-wobble must coalesce into one switch, not one per change. + renderer.setFrameGenRateChangedListener(this::scheduleFgRefreshRepin); renderer.setFrameGeneration(frameGenerationEnabled); boolean swapRB = shortcut != null ? shortcut.getExtra("swapRB", "0").equals("1") diff --git a/app/src/main/runtime/display/XServerDrawerMenu.kt b/app/src/main/runtime/display/XServerDrawerMenu.kt index 1e16f3175..00e9d1907 100644 --- a/app/src/main/runtime/display/XServerDrawerMenu.kt +++ b/app/src/main/runtime/display/XServerDrawerMenu.kt @@ -114,6 +114,7 @@ import androidx.compose.runtime.setValue import androidx.compose.runtime.staticCompositionLocalOf import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier +import androidx.compose.ui.draw.alpha import androidx.compose.ui.draw.clip import androidx.compose.ui.focus.FocusRequester import androidx.compose.ui.focus.focusRequester @@ -160,6 +161,7 @@ import com.winlator.cmod.shared.theme.WinNativeTheme import com.winlator.cmod.shared.ui.dialog.WinNativeDialogButton import com.winlator.cmod.shared.ui.dialog.WinNativeDialogShell import com.winlator.cmod.shared.ui.outlinedSwitchColors +import kotlin.math.abs import kotlin.math.roundToInt // Drawer-local colors. @@ -331,6 +333,10 @@ data class XServerDrawerState( val frameGenerationMultiplier: Int = 2, val frameGenerationQuality: Int = 1, val frameGenerationSmoothing: Float = 0.5f, + val frameGenerationDeepMode: Boolean = false, + val frameGenerationAdvanced: Boolean = false, + val frameGenerationExtrapolate: Boolean = false, + val frameGenerationFramesInFlight: Int = 3, val sgsrEnabled: Boolean = false, val sgsrSharpness: Int = 100, val vividEnabled: Boolean = false, @@ -524,6 +530,14 @@ interface XServerDrawerActionListener { fun onFrameGenerationSmoothingChanged(smoothing: Float) + fun onFrameGenerationDeepModeChanged(deep: Boolean) + + fun onFrameGenerationAdvancedChanged(advanced: Boolean) + + fun onFrameGenerationExtrapolateChanged(extrapolate: Boolean) + + fun onFrameGenerationFramesInFlightChanged(framesInFlight: Int) + fun onSGSREnabledChanged(enabled: Boolean) fun onSGSRSharpnessChanged(sharpness: Int) @@ -635,6 +649,10 @@ fun buildXServerDrawerState( frameGenerationMultiplier: Int = 2, frameGenerationQuality: Int = 1, frameGenerationSmoothing: Float = 0.5f, + frameGenerationDeepMode: Boolean = false, + frameGenerationAdvanced: Boolean = false, + frameGenerationExtrapolate: Boolean = false, + frameGenerationFramesInFlight: Int = 3, ): XServerDrawerState { val items = mutableListOf( @@ -790,6 +808,10 @@ fun buildXServerDrawerState( frameGenerationMultiplier = frameGenerationMultiplier, frameGenerationQuality = frameGenerationQuality, frameGenerationSmoothing = frameGenerationSmoothing, + frameGenerationDeepMode = frameGenerationDeepMode, + frameGenerationAdvanced = frameGenerationAdvanced, + frameGenerationExtrapolate = frameGenerationExtrapolate, + frameGenerationFramesInFlight = frameGenerationFramesInFlight, sgsrEnabled = sgsrEnabled, sgsrSharpness = sgsrSharpness, vividEnabled = vividEnabled, @@ -2386,30 +2408,133 @@ private fun ScreenEffectsPaneContent( ) } } - PaneSectionLabel(stringResource(R.string.session_drawer_frame_generation_quality)) - val qualityLabels = - listOf( - stringResource(R.string.session_drawer_frame_generation_quality_performance), - stringResource(R.string.session_drawer_frame_generation_quality_balanced), - stringResource(R.string.session_drawer_frame_generation_quality_quality), + PaneSectionLabel(stringResource(R.string.session_drawer_frame_generation_recommended)) + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy((8f * paneScale).dp), + ) { + val smoothestActive = !state.frameGenerationExtrapolate && + state.frameGenerationDeepMode && + state.frameGenerationQuality == 1 && + abs(state.frameGenerationSmoothing - 0.75f) < 0.01f && + state.frameGenerationFramesInFlight == 3 + HUDToggleChip( + label = stringResource(R.string.session_drawer_frame_generation_preset_smoothest), + checked = smoothestActive, + onClick = { + listener.onFrameGenerationExtrapolateChanged(false) + listener.onFrameGenerationDeepModeChanged(true) + listener.onFrameGenerationQualitySelected(1) + listener.onFrameGenerationSmoothingChanged(0.75f) + listener.onFrameGenerationFramesInFlightChanged(3) + }, + modifier = Modifier.weight(1f), ) - ChipFlow { - qualityLabels.forEachIndexed { index, label -> + val lowLatencyActive = state.frameGenerationExtrapolate && + state.frameGenerationQuality == 1 && + abs(state.frameGenerationSmoothing - 0.75f) < 0.01f && + state.frameGenerationFramesInFlight == 1 + HUDToggleChip( + label = stringResource(R.string.session_drawer_frame_generation_preset_low_latency), + checked = lowLatencyActive, + onClick = { + listener.onFrameGenerationExtrapolateChanged(true) + listener.onFrameGenerationQualitySelected(1) + listener.onFrameGenerationSmoothingChanged(0.75f) + listener.onFrameGenerationFramesInFlightChanged(1) + }, + modifier = Modifier.weight(1f), + ) + } + DrawerBooleanRow( + title = "Advanced settings", + checked = state.frameGenerationAdvanced, + onCheckedChange = listener::onFrameGenerationAdvancedChanged, + subtitle = "Generation method · scan passes · quality preset · smoothness · buffering", + ) + + if (state.frameGenerationAdvanced) { + PaneSectionLabel("Generation method") + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy((8f * paneScale).dp), + ) { HUDToggleChip( - label = label, - checked = state.frameGenerationQuality == index, - onClick = { listener.onFrameGenerationQualitySelected(index) }, + label = "Interpolation", + checked = !state.frameGenerationExtrapolate, + onClick = { listener.onFrameGenerationExtrapolateChanged(false) }, + modifier = Modifier.weight(1f), + ) + HUDToggleChip( + label = "Extrapolation", + checked = state.frameGenerationExtrapolate, + onClick = { listener.onFrameGenerationExtrapolateChanged(true) }, + modifier = Modifier.weight(1f), + ) + } + PaneSectionLabel("Scan passes") + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy((8f * paneScale).dp), + ) { + HUDToggleChip( + label = "1 · Single", + checked = !state.frameGenerationDeepMode, + onClick = { listener.onFrameGenerationDeepModeChanged(false) }, + modifier = Modifier.weight(1f), + ) + HUDToggleChip( + label = "2 · Bidirectional", + checked = state.frameGenerationDeepMode, + onClick = { listener.onFrameGenerationDeepModeChanged(true) }, + modifier = Modifier.weight(1f), + ) + } + PaneSectionLabel(stringResource(R.string.session_drawer_frame_generation_quality)) + val qualityLabels = + listOf( + stringResource(R.string.session_drawer_frame_generation_quality_performance), + stringResource(R.string.session_drawer_frame_generation_quality_balanced), + stringResource(R.string.session_drawer_frame_generation_quality_quality), + ) + ChipFlow { + qualityLabels.forEachIndexed { index, label -> + HUDToggleChip( + label = label, + checked = state.frameGenerationQuality == index, + onClick = { listener.onFrameGenerationQualitySelected(index) }, + ) + } + } + DrawerSliderRow( + label = stringResource(R.string.session_drawer_frame_generation_smoothness), + valueText = "${(state.frameGenerationSmoothing * 100).roundToInt()}%", + value = state.frameGenerationSmoothing, + valueRange = 0f..1f, + steps = 0, + onValueChange = listener::onFrameGenerationSmoothingChanged, + ) + // Buffering (frames-in-flight): latency <-> smoothness. Extrapolation + // predicts frames instead of holding them, so buffering is irrelevant -> grayed. + val fifLocked = state.frameGenerationExtrapolate + Column(modifier = Modifier.alpha(if (fifLocked) 0.4f else 1f)) { + DrawerSliderRow( + label = if (fifLocked) "Buffering — n/a for extrapolation" + else "Buffering (latency ↔ smooth)", + valueText = state.frameGenerationFramesInFlight.toString(), + value = state.frameGenerationFramesInFlight.toFloat(), + valueRange = 1f..3f, + steps = 1, + onValueChange = { + if (!fifLocked) { + listener.onFrameGenerationFramesInFlightChanged( + it.roundToInt().coerceIn(1, 3), + ) + } + }, ) } } - DrawerSliderRow( - label = stringResource(R.string.session_drawer_frame_generation_smoothness), - valueText = "${(state.frameGenerationSmoothing * 100).roundToInt()}%", - value = state.frameGenerationSmoothing, - valueRange = 0f..1f, - steps = 0, - onValueChange = listener::onFrameGenerationSmoothingChanged, - ) } } } diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 7717657c9..78bd04c64 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -5,7 +5,9 @@ import android.graphics.BitmapFactory; import android.os.Build; import android.os.Handler; +import android.os.HandlerThread; import android.os.Looper; +import android.os.Process; import android.util.Log; import android.view.Choreographer; import android.view.Surface; @@ -69,18 +71,40 @@ public class VulkanRenderer private boolean fgPendingReal = false; // a held real frame awaits its display tick private int fgPendingInterps = 0; // interpolated frames still owed before the held real private int fgInterpTotal = 0; // interps planned for the current engine frame (phase divisor) + private int fgSlotIdx = 0; // display ticks since the newest real frame (slot-grid scheduler) private long fgEngineFrames = 0; // count of held real frames since FG was enabled // EMAs of the pump (=panel) tick interval and the real game-frame interval. private volatile long fgDisplayPeriodNs = 0; private volatile long fgGamePeriodNs = 0; + // Locked game-rate estimate (Hz). The raw EMA jitters a few fps; ×multiplier amplifies that into a + // wide display-target swing that thrashes the panel-mode pin and the interp cadence. Hold a stable + // rate and only re-lock on a sustained change so a steady game produces a steady target. + private volatile double fgLockedGameHz = 0.0; + private int fgGameDriftFrames = 0; private long fgLastPumpNs = 0; private volatile long fgLastGameNs = 0; private volatile long fgPrevGameNs = 0; + private volatile long fgCurrentVsyncNs = 0; // latest vsync instant from the native pump (CLOCK_MONOTONIC) + private Drawable fgLastScanoutSrc = null; // scanout buffer of the last ACCEPTED frame (dedup by identity) + private Drawable fgFirstScanoutSrc = null; // first buffer ever seen (to detect a multi-buffer swapchain) + private boolean fgMultiBuffer = false; // seen ≥2 distinct scanout buffers → identity dedup is trustworthy + private long fgLastAcceptNs = 0L; // time of last ACCEPTED frame (freeze backstop reference) + private static final long FG_DEDUP_FREEZE_NS = 100_000_000L; // never drop for >100ms → genuine holds get through + // Present-pipeline instrumentation (diagnose slips when GPU+CPU both have headroom): per-present + // GL-thread record/submit wall-time, bucketed composite(HOLD) vs interp, + count over the vsync budget. + private boolean fgEmitWasHold = false; + private long fgInstHoldN, fgInstInterpN, fgInstLongN, fgInstTotalN; + private double fgInstHoldSum, fgInstInterpSum, fgInstHoldMax, fgInstInterpMax; + private boolean fgRenderPrioritySet = false; // one-shot: elevate the GL present thread vs scheduling jitter private volatile int fgActivePresentMode = PRESENT_MODE_FIFO; // resolved native mode (see nativeGetActivePresentMode) private volatile int fgDisplayCapHz = 0; // panel-max ceiling for the target post rate; 0 = uncapped // Quality/smoothness, mapped to native shader knobs (motion search floor + interp consistency). private volatile int fgQuality = 1; // 0 performance, 1 balanced, 2 quality - private volatile float fgSmoothness = 0.5f; + private volatile float fgSmoothness = 0.75f; + // Quality pipeline: bidirectional warp (adds a forward flow). + private volatile boolean fgDeepMode = false; + private volatile boolean fgExtrapolate = false; // false = interpolate, true = extrapolate (predict forward) + private volatile int fgFramesInFlight = 3; // compositor buffering depth (1..3): latency<->smoothness // Panel frame-rate request: surface vote here; the activity mirrors it into the window's // preferredDisplayModeId/preferredRefreshRate (which outrank surface votes) via the listener. private volatile Surface fgSurface; @@ -150,6 +174,9 @@ public void setSwapRB(boolean v) { private final ByteBuffer sceneBuf = ByteBuffer.allocateDirect(SCENE_BUF_SIZE).order(ByteOrder.nativeOrder()); private final Handler mainHandler = new Handler(Looper.getMainLooper()); + // FG pump runs on a dedicated native pthread (AChoreographer) — see nativeFgPumpStart in + // vk_renderer.c; it calls back into fgPumpTickFromNative each vsync. + private volatile boolean fgPumpStarted = false; private final AtomicBoolean renderRequested = new AtomicBoolean(false); // Reusable scratch — sized once, refilled per frame. @@ -171,6 +198,7 @@ public void destroy() { // LEAK FIX: Unregister from the persistent XServer to prevent "zombie" listeners xServer.windowManager.removeOnWindowModificationListener(this); xServer.pointer.removeOnPointerMotionListener(this); + stopFgPumpThread(); if (nativeHandle != 0) { // If we are on the UI thread, nativeDestroy (which might block on vkDeviceWaitIdle) @@ -222,23 +250,38 @@ public void setFrameGeneration(boolean enabled) { synchronized (this) { if (nativeHandle != 0) { nativeSetFrameGeneration(nativeHandle, enabled); - // MAILBOX over-post holds the adaptive panel high; the native clock_nanosleep pacer spaces presents evenly. - nativeSetPresentMode(nativeHandle, enabled ? PRESENT_MODE_MAILBOX : requestedPresentMode); + // FIFO, not MAILBOX: with the panel pinned to the FG target (preferredDisplayModeId) + // the FIFO queue self-paces one present per vblank — nothing is ever replaced or + // dropped. Under MAILBOX a queued synthetic frame can be overwritten by the next + // present before scanout and never reach the panel. + nativeSetPresentMode(nativeHandle, enabled ? PRESENT_MODE_FIFO : requestedPresentMode); fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); } } if (enabled) { pushFrameGenParams(); + synchronized (this) { + if (nativeHandle != 0) nativeSetFrameGenDeepMode(nativeHandle, fgDeepMode); + } fgPendingReal = false; fgPendingInterps = 0; fgInterpTotal = 0; fgEngineFrames = 0; + fgCurrentVsyncNs = 0; + fgLastScanoutSrc = null; + fgFirstScanoutSrc = null; + fgMultiBuffer = false; + fgLastAcceptNs = 0L; + fgRenderPrioritySet = false; + fgLockedGameHz = 0.0; // re-lock the game rate fresh for this session + fgGameDriftFrames = 0; fgNewScene.set(true); // re-render current content as the first held frame + startFgPumpThread(); scheduleFgPump(); } // When disabled, the pump self-stops (fgPumpTick checks frameGenEnabled) and onDrawFrame // reverts to the coalesced real-present path. - if (!enabled) fgApplyFrameRateHint(0.0, System.nanoTime()); + if (!enabled) { fgLockedGameHz = 0.0; fgApplyFrameRateHint(0.0, System.nanoTime()); stopFgPumpThread(); } } public boolean isFrameGenerationEnabled() { return frameGenEnabled; } @@ -269,6 +312,46 @@ public void setFrameGenerationSmoothness(float smoothness) { public float getFrameGenerationSmoothness() { return fgSmoothness; } + /** + * Pipeline mode. false = standard (single backward flow). true = quality (adds a forward flow + * for a bidirectional warp; same latency). Live. + */ + public void setFrameGenerationDeepMode(boolean deep) { + fgDeepMode = deep; + synchronized (this) { + if (nativeHandle != 0) nativeSetFrameGenDeepMode(nativeHandle, deep); + } + } + + public boolean isFrameGenerationDeepMode() { return fgDeepMode; } + + /** + * Generation method. false = interpolation (between the two newest real frames; +1 frame latency). + * true = extrapolation (predict forward from the latest real frame; no added latency). Live. + */ + public void setFrameGenerationExtrapolate(boolean extrapolate) { + fgExtrapolate = extrapolate; + synchronized (this) { + if (nativeHandle != 0) nativeSetFrameGenExtrapolate(nativeHandle, extrapolate); + } + } + + public boolean isFrameGenerationExtrapolate() { return fgExtrapolate; } + + /** + * Compositor frames-in-flight (1..3): the latency↔smoothness dial. Higher buffers more GPU work + * ahead (smoother under spikes, more latency); lower is more responsive. Irrelevant under + * extrapolation (no frames are held). Live. + */ + public void setFrameGenerationFramesInFlight(int framesInFlight) { + fgFramesInFlight = framesInFlight < 1 ? 1 : (framesInFlight > 3 ? 3 : framesInFlight); + synchronized (this) { + if (nativeHandle != 0) nativeSetFrameGenFramesInFlight(nativeHandle, fgFramesInFlight); + } + } + + public int getFrameGenerationFramesInFlight() { return fgFramesInFlight; } + // Map quality preset + smoothness to the native interpolate.frag / motion.comp knobs. private void pushFrameGenParams() { float occHi = 0.12f + 0.28f * fgSmoothness; // consistency window: wider == trusts motion more @@ -286,17 +369,29 @@ public long getDisplayFrameCount() { } } + private synchronized void startFgPumpThread() { + if (fgPumpStarted) return; + nativeFgPumpStart(this); // dedicated native AChoreographer pthread; calls fgPumpTickFromNative + fgPumpStarted = true; + } + + private synchronized void stopFgPumpThread() { + if (!fgPumpStarted) return; + fgPumpStarted = false; + fgLastPumpNs = 0L; + nativeFgPumpStop(); + } + private void scheduleFgPump() { - if (!frameGenEnabled) return; - if (fgPumpScheduled.compareAndSet(false, true)) { - mainHandler.post(() -> Choreographer.getInstance().postFrameCallback(this::fgPumpTick)); - } + // The native AChoreographer pump free-runs every vsync once started; just keep it alive (cheap + // flag check). Self-heals if a lifecycle race ever left it stopped while FG is on. + if (frameGenEnabled && !fgPumpStarted) startFgPumpThread(); } - // Free-running display-cadence pump: each tick wakes the render thread (onDrawFrame -> - // fgDrawFrame) and re-arms itself while FG is enabled. - private void fgPumpTick(long frameTimeNanos) { - fgPumpScheduled.set(false); + // Invoked from the native pump thread once per vsync (frameTimeNanos = the vsync time). Does the FG + // display-rate timing + wakes the render thread (onDrawFrame -> fgDrawFrame). The native pump + // re-arms itself, so there is no re-schedule here. Keep this lightweight and exception-safe. + private void fgPumpTickFromNative(long frameTimeNanos) { if (!frameGenEnabled || nativeHandle == 0) return; // The swapchain may still be FIFO right after enable (surface not attached yet); re-read until // it resolves so the bootstrap engages at launch without a manual toggle. @@ -316,45 +411,129 @@ private void fgPumpTick(long frameTimeNanos) { } } fgLastPumpNs = frameTimeNanos; + fgCurrentVsyncNs = frameTimeNanos; // anchor continuous-phase placement to the clean vsync grid xServerView.requestRender(); - scheduleFgPump(); } // Render-thread scheduler (DESIGN.md §2): emit enough presents per tick to sustain the target rate. private void fgDrawFrame() { + if (!fgRenderPrioritySet) { + // Elevate THIS (the GL present) thread to urgent-display so a brief preempt in the tiny + // record/submit window between vsync-acquires doesn't make it miss the next image → the slip. + try { android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_URGENT_DISPLAY); } + catch (Throwable t) { /* best-effort; capped by rlimit on some ROMs */ } + fgRenderPrioritySet = true; + } int perTick = fgComputePerTick(); - for (int i = 0; i < perTick; i++) fgEmitOne(); + for (int i = 0; i < perTick; i++) { + long t0 = System.nanoTime(); + int kind = fgEmitOne(); + if (kind != 0) fgInstrument((System.nanoTime() - t0) / 1000L, fgEmitWasHold); + } + } + + // GL-thread wall-time spent recording+submitting one present, bucketed by whether it included the + // real-frame composite (HOLD). With hardware headroom, a present that takes >~8.31ms here is what + // makes the GL thread miss the next vsync (GLSurfaceView coalesces) → the slip. Reports every ~2s. + private void fgInstrument(long usCpu, boolean wasHold) { + double ms = usCpu / 1000.0; + if (wasHold) { fgInstHoldN++; fgInstHoldSum += ms; if (ms > fgInstHoldMax) fgInstHoldMax = ms; } + else { fgInstInterpN++; fgInstInterpSum += ms; if (ms > fgInstInterpMax) fgInstInterpMax = ms; } + if (ms > 8.31) fgInstLongN++; + if (++fgInstTotalN >= 240) { + android.util.Log.i(TAG, String.format(java.util.Locale.US, + "FG cpu/present: composite n=%d mean=%.2f max=%.2f | interp n=%d mean=%.2f max=%.2f | over-budget=%d/%d", + fgInstHoldN, fgInstHoldN > 0 ? fgInstHoldSum / fgInstHoldN : 0.0, fgInstHoldMax, + fgInstInterpN, fgInstInterpN > 0 ? fgInstInterpSum / fgInstInterpN : 0.0, fgInstInterpMax, + fgInstLongN, fgInstTotalN)); + fgInstHoldN = fgInstInterpN = fgInstLongN = fgInstTotalN = 0; + fgInstHoldSum = fgInstInterpSum = fgInstHoldMax = fgInstInterpMax = 0.0; + } } - private void fgEmitOne() { - if (fgPendingInterps == 0 && !fgPendingReal) { - boolean newGame = fgNewScene.getAndSet(false); - boolean dirty = fgSceneDirty.getAndSet(false); - if (!newGame && !dirty) return; // nothing changed — idle tick + // Slot-grid placement: with the panel pinned to ~M x gameHz, each game period spans M display + // ticks. Tick k since the frame's arrival presents: + // interpolation: k = 0..M-2 -> interp at phase (k+1)/M; k = M-1 -> the real frame, sharp + // (PRESENT_LAST blit, no resample). + // extrapolation: k = 0 -> the real frame immediately (no hold-back latency); + // k = 1..M-1 -> predict phase k/M past it along the motion field. + // Slot phases are deterministic: the vsync clock and the game-arrival clock are not phase-locked, + // so clock-derived phases inject per-slot bias. A continuous-phase fallback covers non-integer + // panel:game ratios (e.g. 29fps on 120Hz). + // Returns the present kind for instrumentation: 0 none, 1 synthesized, 2 real frame. + private int fgEmitOne() { + boolean newGame = fgNewScene.getAndSet(false); + boolean dirty = fgSceneDirty.getAndSet(false); + fgEmitWasHold = newGame || dirty; + if (newGame || dirty) { buildAndSubmitFrame(); // HOLD -> history[curr] (no present) - fgEngineFrames++; - // Interpolate only between real game frames; a cursor/UI-only change just recomposites. - int interps = (newGame && fgEngineFrames >= 2) ? fgComputeInterps() : 0; - fgInterpTotal = interps; - fgPendingInterps = interps; - fgPendingReal = true; + if (newGame) { fgEngineFrames++; fgSlotIdx = 0; } } - if (fgPendingInterps > 0) { - int k = fgInterpTotal - fgPendingInterps + 1; // 1..fgInterpTotal - float phase = (float) k / (float) (fgInterpTotal + 1); // even fallback; native refines from arrival times - nativeRenderInterp(nativeHandle, phase, fgPrevGameNs, fgLastGameNs); - fgPendingInterps--; - } else if (fgPendingReal) { - nativePresentLast(nativeHandle); // the held real frame - fgPendingReal = false; + if (!newGame) fgSlotIdx++; + long period = fgGamePeriodNs; + boolean canInterp = fgMultiplier > 1 && fgEngineFrames >= 2 && period > 0L + && fgLastGameNs != 0L && fgPrevGameNs != 0L; + if (!canInterp) { + // Passthrough (1x) / bootstrap / no measured rate yet: show the latest real frame on each + // change (never post below native); nothing to interpolate. + if (newGame || dirty) { nativePresentLast(nativeHandle); return 2; } + return 0; + } + if (dirty && !newGame) { + // Cursor/UI-only recomposite — show it sharply, don't morph it through the stale motion pair. + nativePresentLast(nativeHandle); + return 2; + } + + long disp = fgDisplayPeriodNs; + double ratio = disp > 0L ? (double) period / (double) disp : 0.0; + int slots = (int) Math.round(ratio); + boolean gridOk = slots >= 2 && slots <= 8 && Math.abs(ratio - slots) < 0.10 * slots; + if (gridOk) { + // Honor the user's multiplier even if the panel pin hasn't (or can't) switch: cap the + // unique synthesized positions; surplus ticks just re-show the real frame. + if (slots > fgMultiplier) slots = fgMultiplier; + int k = fgSlotIdx; + if (k >= slots * 2) return 0; // game stalled — hold the panel, save the GPU + if (fgExtrapolate) { + if (k == 0 || k >= slots) { nativePresentLast(nativeHandle); return 2; } + nativeRenderInterp(nativeHandle, (float) k / slots, fgPrevGameNs, fgLastGameNs); + return 1; + } + if (k >= slots - 1) { nativePresentLast(nativeHandle); return 2; } + nativeRenderInterp(nativeHandle, (float) (k + 1) / slots, fgPrevGameNs, fgLastGameNs); + return 1; } + + // Continuous-phase fallback: phase = (thisVsync − lastRealArrival) / gamePeriod. + long vsync = fgCurrentVsyncNs != 0L ? fgCurrentVsyncNs : System.nanoTime(); + double phase = (double) (vsync - fgLastGameNs) / (double) period; + if (fgExtrapolate) { + if (newGame || phase >= 1.0) { + if (phase < 2.0 || newGame) { nativePresentLast(nativeHandle); return 2; } + return 0; + } + nativeRenderInterp(nativeHandle, (float) Math.max(0.001, phase), fgPrevGameNs, fgLastGameNs); + return 1; + } + if (phase < 1.0) { + nativeRenderInterp(nativeHandle, (float) Math.max(0.001, phase), fgPrevGameNs, fgLastGameNs); + return 1; + } else if (phase < 2.0) { + // Caught up to the newest real frame (or it arrived a little late) — show it sharply and fill + // the tick. Covering up to 2 periods past absorbs arrival jitter so a late frame leaves no gap. + nativePresentLast(nativeHandle); + return 2; + } + // else: game stalled >2 frames — let the panel hold the last frame (don't burn GPU on a freeze). + return 0; } - // Target FG post rate (Hz): multiplier × game rate, capped to the panel max. 0 if not measured. + // Target FG post rate (Hz): multiplier × locked game rate, capped to the panel max. 0 if not measured. private double fgTargetHz() { - long game = fgGamePeriodNs; - if (game <= 0L) return 0.0; - double target = Math.max(1, fgMultiplier) * (1.0e9 / (double) game); + double g = fgLockedGameHz; + if (g <= 0.0) return 0.0; + double target = Math.max(1, fgMultiplier) * g; if (fgDisplayCapHz > 0) target = Math.min(target, (double) fgDisplayCapHz); return target; } @@ -370,8 +549,9 @@ private int fgComputeInterps() { int slots = (int) Math.floor((double) game / (double) disp + 1e-3); return Math.max(0, Math.min(maxInterps, slots - 1)); } - // Non-blocking: post at the target rate so an adaptive-refresh panel ramps up to it. - double gameHz = 1.0e9 / (double) game; + // Non-blocking: post at the target rate so an adaptive-refresh panel ramps up to it. Use the + // locked game rate on both sides so the ratio is the stable multiplier, not per-frame jitter. + double gameHz = fgLockedGameHz > 0.0 ? fgLockedGameHz : 1.0e9 / (double) game; int interps = (int) Math.round(fgTargetHz() / gameHz) - 1; return Math.max(0, Math.min(maxInterps, interps)); } @@ -413,7 +593,8 @@ private void fgApplyFrameRateHint(double targetHz, long nowNs) { } fgFrameRateHint = rate; fgFrameRateHintNs = nowNs; - Log.i(TAG, "FG target display rate: " + (int) rate + "Hz"); + Log.i(TAG, "FG target display rate: " + (int) rate + "Hz (game=" + Math.round(fgLockedGameHz) + + " x" + fgMultiplier + (fgDeepMode ? " quality" : " standard") + ")"); Runnable l = fgRateChangedListener; if (l != null) l.run(); } @@ -464,11 +645,13 @@ public void attachSurface(Surface surface) { nativeSetPresentMode(nativeHandle, PRESENT_MODE_MAILBOX); // over-post hold + native pacer fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); pushFrameGenParams(); + nativeSetFrameGenDeepMode(nativeHandle, fgDeepMode); fgPendingReal = false; fgPendingInterps = 0; fgInterpTotal = 0; fgEngineFrames = 0; fgNewScene.set(true); + startFgPumpThread(); scheduleFgPump(); } destroyed.set(false); @@ -836,11 +1019,47 @@ public void onFramePresented(Window window, WindowManager.FrameSource source, in // This is an actual game-window frame (X11 Present / PutImage / MIT-SHM) — the only // signal that drives FG's hold+interpolate cadence. Cursor/Controls go through the // generic requestRenderCoalesced path and deliberately do not get counted here. + + // De-DUPLICATE re-presented frames by SCANOUT-BUFFER IDENTITY. A game that vsyncs 30fps + // content at 60/90Hz re-presents the SAME swapchain pixmap; the FG would otherwise interpolate + // identical pairs into static holds (the [50,50,0,0] period-4 judder + duplicate frames the + // cadence audit + simulation both showed). The PRESENT extension just set the window's scanout + // source to this present's pixmap.drawable, so a present that re-points at the SAME object as + // the last ACCEPTED frame is a duplicate buffer → drop it. We compare object identity, not + // pixels (the pixmap is GPU-side, not CPU-readable here — that's what broke the hash version). + // • Trust identity ONLY once we've seen ≥2 distinct buffers (a real swapchain). A single- + // buffered game reuses one pixmap for fresh content, so identity would false-match — there + // we DON'T dedup (front-loaded but never frozen) instead of starving the cadence. + // • FREEZE BACKSTOP: never drop for >100ms, so a genuine hold/stall always gets through and + // the output can never lock up (the failure mode of the reverted CPU-hash attempt). + Drawable scanoutNow = (window != null && window.getContent() != null) + ? window.getContent().getScanoutSource() : null; + if (scanoutNow != null) { + if (fgFirstScanoutSrc == null) fgFirstScanoutSrc = scanoutNow; + else if (scanoutNow != fgFirstScanoutSrc) fgMultiBuffer = true; + } long now = System.nanoTime(); + if (fgMultiBuffer && scanoutNow != null && scanoutNow == fgLastScanoutSrc + && (now - fgLastAcceptNs) < FG_DEDUP_FREEZE_NS) { + return; // duplicate buffer — ignore for the FG cadence (keep the real-frame clock) + } + fgLastScanoutSrc = scanoutNow; + fgLastAcceptNs = now; + if (fgLastGameNs != 0L) { long d = now - fgLastGameNs; if (d > 0L && d < 500_000_000L) { fgGamePeriodNs = fgGamePeriodNs == 0L ? d : fgGamePeriodNs + (d - fgGamePeriodNs) / 8L; + // Hold the game-rate lock steady; re-lock only after a sustained (~24-frame) deviation + // beyond 10% (or 2Hz), so a steady game yields a steady target instead of a jittery one. + double inst = 1.0e9 / (double) fgGamePeriodNs; + if (fgLockedGameHz <= 0.0) { + fgLockedGameHz = inst; + } else if (Math.abs(inst - fgLockedGameHz) > Math.max(2.0, 0.10 * fgLockedGameHz)) { + if (++fgGameDriftFrames >= 24) { fgLockedGameHz = inst; fgGameDriftFrames = 0; } + } else { + fgGameDriftFrames = 0; + } } } fgPrevGameNs = fgLastGameNs; @@ -1107,6 +1326,11 @@ private static native long nativeCreate(boolean enableValidationLayers, private static native boolean nativeRenderInterp(long handle, float phase, long prevNs, long currNs); private static native boolean nativePresentLast(long handle); private static native void nativeSetFrameGenParams(long handle, float occLo, float occHi, int minStep); + private static native void nativeSetFrameGenDeepMode(long handle, boolean deep); + private static native void nativeSetFrameGenExtrapolate(long handle, boolean extrapolate); + private static native void nativeSetFrameGenFramesInFlight(long handle, int framesInFlight); private static native int nativeGetActivePresentMode(long handle); private static native void nativeSetVsyncTiming(long handle, long periodNs, long displayPeriodNs, long vsyncNs); + private static native void nativeFgPumpStart(Object renderer); // native AChoreographer pump -> fgPumpTickFromNative + private static native void nativeFgPumpStop(); } diff --git a/app/src/main/runtime/display/xserver/XServer.java b/app/src/main/runtime/display/xserver/XServer.java index 580de3dbf..8a6bba1a2 100644 --- a/app/src/main/runtime/display/xserver/XServer.java +++ b/app/src/main/runtime/display/xserver/XServer.java @@ -15,6 +15,7 @@ import com.winlator.cmod.runtime.display.xserver.extensions.XInput2Extension; import com.winlator.cmod.shared.android.CursorLocker; import java.nio.charset.Charset; +import java.util.Arrays; import java.util.EnumMap; import java.util.concurrent.locks.ReentrantLock; @@ -183,8 +184,9 @@ private class MultiXLock implements XLock { private final Lockable[] lockables; private MultiXLock(Lockable[] lockables) { - this.lockables = lockables; - for (Lockable lockable : lockables) locks.get(lockable).lock(); + this.lockables = lockables.clone(); + Arrays.sort(this.lockables); + for (Lockable lockable : this.lockables) locks.get(lockable).lock(); } @Override From 168b6fedffc2d7920f2da26f6a30b17b6fffe7d3 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Wed, 17 Jun 2026 15:20:14 -0400 Subject: [PATCH 11/46] Frame generation: worker pipeline, content-dedup, pacing + crash fixes Interpolated frame generation on a dedicated worker thread with a deferred-promote history ring and a changed-pixel content-dedup, so distinct frames are kept and the source rate is measured cleanly. Slot-grid cadence with even-hold pacing for high-refresh panels, native-max refresh pinning, and deadline-paced presents. Occlusion-gated interpolation with motion-faded detail. Fixes: enable/disable crash (stale worker fence left in the shared slot-fence array), drawer-overlay pause/resume, and low-motion content-rate collapse. Adds compute shaders for the flow-generation path. --- app/src/main/cpp/CMakeLists.txt | 20 +- .../cpp/winlator/vk/shaders/cnn_conv.comp | 99 +++ .../winlator/vk/shaders/cnn_correlation.comp | 138 +++ .../cpp/winlator/vk/shaders/cnn_generate.comp | 63 ++ .../winlator/vk/shaders/cnn_occlusion.comp | 124 +++ .../cpp/winlator/vk/shaders/cnn_pyramid.comp | 48 + .../cpp/winlator/vk/shaders/interpolate.frag | 169 ++-- .../main/cpp/winlator/vk/shaders/motion.comp | 155 ++-- .../cpp/winlator/vk/shaders/motion_fp32.comp | 102 ++- app/src/main/cpp/winlator/vk/vk_dispatch.c | 3 +- app/src/main/cpp/winlator/vk/vk_dispatch.h | 16 +- app/src/main/cpp/winlator/vk/vk_renderer.c | 819 +++++++++++++----- app/src/main/cpp/winlator/vk/vk_state.h | 101 ++- .../display/XServerDisplayActivity.java | 96 +- .../main/runtime/display/XServerDrawerMenu.kt | 164 +--- .../display/renderer/VulkanRenderer.java | 415 +++++---- 16 files changed, 1720 insertions(+), 812 deletions(-) create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_conv.comp create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_correlation.comp create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_occlusion.comp create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_pyramid.comp diff --git a/app/src/main/cpp/CMakeLists.txt b/app/src/main/cpp/CMakeLists.txt index 119a2d79b..90aa70522 100644 --- a/app/src/main/cpp/CMakeLists.txt +++ b/app/src/main/cpp/CMakeLists.txt @@ -6,8 +6,6 @@ include(FetchContent) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -Wno-unused-function -Wimplicit-function-declaration") -# Zstandard is used by winlator/native_content_io.cpp. Keep this dependency in -# the parent build instead of relying on the Steam client subproject to create it. FetchContent_Declare( zstd GIT_REPOSITORY https://github.com/facebook/zstd.git @@ -30,14 +28,6 @@ add_subdirectory(wn-libsteamclient) find_package(curl REQUIRED CONFIG) -# ---------------------------------------------------------------------------- -# SPIR-V shader compilation -# Each .glsl is compiled by glslc (shipped with the NDK) into a .spv binary, -# then converted to a C uint32_t array via bin2c.cmake. Headers are emitted -# under ${CMAKE_CURRENT_BINARY_DIR}/shaders/*.spv.h and included from vk_renderer.c. -# ---------------------------------------------------------------------------- - -# Locate glslc shipped with the NDK. ANDROID_NDK is provided by the Android Gradle plugin. if(NOT DEFINED ANDROID_NDK) message(FATAL_ERROR "ANDROID_NDK not defined; this project must be built via the Android Gradle plugin") endif() @@ -76,10 +66,14 @@ set(SHADER_LIST "effect_hdr:frag:effect_hdr_frag" "effect_natural:frag:effect_natural_frag" "sgsr1:frag:sgsr1_frag" - # Frame generation: motion estimation (compute, fp16 + fp32 fallback) + interpolation (fragment). "motion:comp:motion_comp" "motion_fp32:comp:motion_fp32_comp" "interpolate:frag:interpolate_frag" + "cnn_pyramid:comp:cnn_pyramid_comp" + "cnn_conv:comp:cnn_conv_comp" + "cnn_correlation:comp:cnn_correlation_comp" + "cnn_occlusion:comp:cnn_occlusion_comp" + "cnn_generate:comp:cnn_generate_comp" ) set(SHADER_HEADERS "") @@ -109,10 +103,6 @@ endforeach() add_custom_target(winlator_shaders DEPENDS ${SHADER_HEADERS}) -# ---------------------------------------------------------------------------- -# Winlator native library (X-server, AHB, Vulkan compositor, helpers) -# ---------------------------------------------------------------------------- - add_library(winlator SHARED winlator/drawable.c winlator/native_content_io.cpp diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_conv.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_conv.comp new file mode 100644 index 000000000..7186f2f85 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_conv.comp @@ -0,0 +1,99 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2DArray uSrc; // input feature tiles (cinT layers) +layout(set = 0, binding = 1, rgba8) uniform writeonly image2DArray uDst; // output tiles (coutT layers) +layout(set = 0, binding = 3) uniform sampler2D uLuma; // R8 luma source + +layout(set = 0, binding = 8, std430) readonly buffer Weights { float16_t W[]; }; + +layout(push_constant) uniform PC { + ivec2 size; // dst extent in pixels + float t; // interpolation phase (unused here) + float mvScale; // flow scale (unused here) + uint wBase; // weight offset (fp16 elements) + int cinT; // Cin/4 + int coutT; // Cout/4 + int flags; // bit0=stem-luma-norm bit1=stride2 bit2=residual-add bit3=relu +} pc; + +const ivec2 TAP[9] = ivec2[]( + ivec2(-1,-1), ivec2(0,-1), ivec2(1,-1), + ivec2(-1, 0), ivec2(0, 0), ivec2(1, 0), + ivec2(-1, 1), ivec2(0, 1), ivec2(1, 1)); + +const int MAX_T = 4; + +mat4 convMat(int k, int ci, int co) { + uint b = pc.wBase + uint((((k * pc.cinT + ci) * pc.coutT + co) * 4) * 4); + mat4 M; + for (int c = 0; c < 4; ++c) { + uint cb = b + uint(c); + M[c] = vec4(float(W[cb + 0u]), // r=0 + float(W[cb + 4u]), // r=1 + float(W[cb + 8u]), // r=2 + float(W[cb + 12u])); // r=3 + } + return M; +} + +void main() { + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(p, pc.size))) return; + + bool stem = (pc.flags & 1) != 0; + bool stride2 = (pc.flags & 2) != 0; + bool resid = (pc.flags & 4) != 0; + bool doRelu = (pc.flags & 8) != 0; + + // Base sample position in *source* texels. + ivec2 sp = stride2 ? (p * 2) : p; + + vec4 acc[MAX_T]; + for (int co = 0; co < pc.coutT; ++co) acc[co] = vec4(0.0); + + // affine base offset (fp16 elements) = wBase + 9*cinT*coutT*16 + uint affBase = pc.wBase + uint(9 * pc.cinT * pc.coutT * 16); + + for (int k = 0; k < 9; ++k) { + ivec2 q = clamp(sp + TAP[k], ivec2(0), pc.size - 1); + + if (stem) { + float luma = texelFetch(uLuma, q, 0).r; + vec4 x = vec4((luma - 0.2139) * 1.4961 + 0.7695, 0.0, 0.0, 0.0); + for (int co = 0; co < pc.coutT; ++co) + acc[co] += convMat(k, 0, co) * x; + } else { + for (int ci = 0; ci < pc.cinT; ++ci) { + vec4 x = texelFetch(uSrc, ivec3(q, ci), 0); + for (int co = 0; co < pc.coutT; ++co) + acc[co] += convMat(k, ci, co) * x; + } + } + } + + if (resid) { + acc[0] += texelFetch(uSrc, ivec3(p, 0), 0); + } + + for (int co = 0; co < pc.coutT; ++co) { + vec4 bias = vec4(float(W[affBase + uint((0 * pc.coutT + co) * 4 + 0)]), + float(W[affBase + uint((0 * pc.coutT + co) * 4 + 1)]), + float(W[affBase + uint((0 * pc.coutT + co) * 4 + 2)]), + float(W[affBase + uint((0 * pc.coutT + co) * 4 + 3)])); + vec4 scale = vec4(float(W[affBase + uint((1 * pc.coutT + co) * 4 + 0)]), + float(W[affBase + uint((1 * pc.coutT + co) * 4 + 1)]), + float(W[affBase + uint((1 * pc.coutT + co) * 4 + 2)]), + float(W[affBase + uint((1 * pc.coutT + co) * 4 + 3)])); + vec4 offset = vec4(float(W[affBase + uint((2 * pc.coutT + co) * 4 + 0)]), + float(W[affBase + uint((2 * pc.coutT + co) * 4 + 1)]), + float(W[affBase + uint((2 * pc.coutT + co) * 4 + 2)]), + float(W[affBase + uint((2 * pc.coutT + co) * 4 + 3)])); + + vec4 v = (acc[co] - bias) * scale + offset; + if (doRelu) v = max(v, vec4(0.0)); + imageStore(uDst, ivec3(p, co), clamp(v, 0.0, 1.0)); + } +} diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_correlation.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_correlation.comp new file mode 100644 index 000000000..cd343183c --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_correlation.comp @@ -0,0 +1,138 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2DArray uSrc; +layout(set = 0, binding = 2) uniform sampler2DArray uSrc2; +layout(set = 0, binding = 36) uniform sampler2D uFlow; +layout(set = 0, binding = 1, rgba16f) uniform writeonly image2DArray uDst; +layout(set = 0, binding = 8, std430) readonly buffer Weights { float16_t W[]; }; + +layout(push_constant) uniform PC { + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; +} pc; + +const int V_LOGIT = 0; +const int V_MIX = 1; +int variant() { return (pc.flags >> 8) & 0xF; } +bool useWarp() { return (pc.flags & (1 << 12)) != 0; } + +const ivec2 TAP[9] = ivec2[]( + ivec2(-1,-1), ivec2(0,-1), ivec2(1,-1), + ivec2(-1, 0), ivec2(0, 0), ivec2(1, 0), + ivec2(-1, 1), ivec2(0, 1), ivec2(1, 1)); + +f16vec4 wVec4(uint o) { + return f16vec4(W[o + 0u], W[o + 1u], W[o + 2u], W[o + 3u]); +} +f16mat4 wMat4(uint o) { + return f16mat4(wVec4(o), wVec4(o + 4u), wVec4(o + 8u), wVec4(o + 12u)); +} + +f16vec4 sampleA(int L, vec2 uv) { return f16vec4(textureLod(uSrc, vec3(uv, float(L)), 0.0)); } +f16vec4 sampleBoff(int L, vec2 uv, ivec2 off) { + vec2 texel = 1.0 / vec2(textureSize(uSrc2, 0).xy); + return f16vec4(textureLod(uSrc2, vec3(uv + vec2(off) * texel, float(L)), 0.0)); +} + +void main() { + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(p, pc.size))) return; + + int cinT = pc.cinT; + vec2 sz = vec2(textureSize(uSrc, 0).xy); + vec2 ctr = vec2(p) + 0.5; + + vec2 uvA = ctr / sz; + vec2 uvB = ctr / sz; + if (useWarp()) { + vec4 flow = texture(uFlow, ctr / sz) * pc.mvScale; + vec2 fwd = flow.xy * pc.t; + vec2 bwd = flow.zw * (1.0 - pc.t); + uvA = (ctr + fwd) / sz; + uvB = (ctr + bwd) / sz; + } + + bool dualRef = (variant() == V_MIX); + + f16vec4 scoreA = f16vec4(0.0hf); + f16vec4 scoreA2 = f16vec4(0.0hf); + float16_t scoreA8 = 0.0hf; + f16vec4 scoreB = f16vec4(0.0hf); + f16vec4 scoreB2 = f16vec4(0.0hf); + float16_t scoreB8 = 0.0hf; + + for (int t = 0; t < 9; t++) { + float16_t sA = 0.0hf; + float16_t sB = 0.0hf; + for (int L = 0; L < cinT; L++) { + f16vec4 a = sampleA(L, uvA); + f16vec4 bt = sampleBoff(L, uvB, TAP[t]); + sA += float16_t(dot(a, bt)); + if (dualRef) { + f16vec4 a2 = sampleA(cinT + L, uvA); + sB += float16_t(dot(a2, bt)); + } + } + if (t < 4) { scoreA[t] = sA; scoreB[t] = sB; } + else if (t < 8) { scoreA2[t - 4] = sA; scoreB2[t - 4] = sB; } + else { scoreA8 = sA; scoreB8 = sB; } + } + + if (variant() == V_LOGIT) { + uint o = pc.wBase; + f16vec4 subA = wVec4(o); f16vec4 mulA = wVec4(o + 4u); f16vec4 addA = wVec4(o + 8u); + f16vec4 subB = wVec4(o + 12u); f16vec4 mulB = wVec4(o + 16u); f16vec4 addB = wVec4(o + 20u); + f16vec4 subC = wVec4(o + 24u); f16vec4 mulC = wVec4(o + 28u); f16vec4 addC = wVec4(o + 32u); + + f16vec4 costA = (scoreA - subA) * mulA + addA; + f16vec4 costB = (scoreA2 - subB) * mulB + addB; + float16_t costC = (scoreA8 - subC.x) * mulC.x + addC.x; + + imageStore(uDst, ivec3(p, 0), vec4(costA)); + imageStore(uDst, ivec3(p, 1), vec4(costB)); + imageStore(uDst, ivec3(p, 2), vec4(costC, 0.0, 0.0, 0.0)); + return; + } + + uint o = pc.wBase; + f16vec4 subA = wVec4(o); f16vec4 mulA = wVec4(o + 4u); f16vec4 addA = wVec4(o + 8u); + f16vec4 subB = wVec4(o + 12u); f16vec4 mulB = wVec4(o + 16u); f16vec4 addB = wVec4(o + 20u); + f16vec4 subAb = wVec4(o + 24u); f16vec4 mulAb = wVec4(o + 28u); f16vec4 addAb = wVec4(o + 32u); + f16vec4 subBb = wVec4(o + 36u); f16vec4 mulBb = wVec4(o + 40u); f16vec4 addBb = wVec4(o + 44u); + f16vec4 subC = wVec4(o + 48u); f16vec4 mulC = wVec4(o + 52u); f16vec4 addC = wVec4(o + 56u); + + f16vec4 nA0 = clamp((scoreA - subA ) * mulA + addA , f16vec4(0.0hf), f16vec4(1.0hf)); + f16vec4 nA1 = clamp((scoreA2 - subB ) * mulB + addB , f16vec4(0.0hf), f16vec4(1.0hf)); + f16vec4 nB0 = clamp((scoreB - subAb) * mulAb + addAb, f16vec4(0.0hf), f16vec4(1.0hf)); + f16vec4 nB1 = clamp((scoreB2 - subBb) * mulBb + addBb, f16vec4(0.0hf), f16vec4(1.0hf)); + float16_t nAC = clamp((scoreA8 - subC.x) * mulC.x + addC.x, 0.0hf, 1.0hf); + float16_t nBC = clamp((scoreB8 - subC.x) * mulC.x + addC.x, 0.0hf, 1.0hf); + + uint h = o + 60u; + for (int head = 0; head < 2; head++) { + f16mat4 M0 = wMat4(h); + f16mat4 M1 = wMat4(h + 16u); + f16mat4 M2 = wMat4(h + 32u); + f16mat4 M3 = wMat4(h + 48u); + f16vec4 V0 = wVec4(h + 64u); + f16vec4 V1 = wVec4(h + 68u); + f16vec4 oSub = wVec4(h + 72u); + f16vec4 oMul = wVec4(h + 76u); + f16vec4 oAdd = wVec4(h + 80u); + + f16vec4 acc = M0 * nA0 + M1 * nA1 + V0 * nAC + + M2 * nB0 + M3 * nB1 + V1 * nBC; + f16vec4 outv = (acc - oSub) * oMul + oAdd; + imageStore(uDst, ivec3(p, head), vec4(outv)); + + h += 84u; + } +} diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp new file mode 100644 index 000000000..7586ef8a7 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -0,0 +1,63 @@ +#version 450 + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 32) uniform sampler2D backColor; +layout(set = 0, binding = 33) uniform sampler2D fwdColor; +layout(set = 0, binding = 34) uniform sampler2D flowB; +layout(set = 0, binding = 35) uniform sampler2D flowF; +layout(set = 0, binding = 36) uniform sampler2D logits; +layout(set = 0, binding = 48, rgba16f) uniform writeonly image2D uDst; + +layout(push_constant) uniform PC { + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; +} pc; + +void main() { + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(p, pc.size))) + return; + + vec2 texSize = vec2(textureSize(backColor, 0)); + vec2 numer = vec2(p) + 0.5; + vec2 uvc = numer / texSize; + + float m0 = pc.mvScale; + vec4 mvB = texture(flowB, uvc) * m0; + vec4 mvF = texture(flowF, uvc) * m0; + + float t = pc.t; + float a = 2.0 * t; + float b = 2.0 * (1.0 - t); + + vec2 c0 = (numer + mvB.xy * a) / texSize; + vec2 c1 = (numer + mvB.zw * b) / texSize; + vec2 c2 = (numer + mvF.xy * a) / texSize; + vec2 c3 = (numer + mvF.zw * b) / texSize; + + vec4 L = vec4(texture(logits, c0).x, + texture(logits, c1).y, + texture(logits, c2).z, + texture(logits, c3).w); + + vec4 w = exp(L); + w /= dot(w, vec4(1.0)); + + float wb = 1.0 - t; + float wf = t; + + vec4 acc = texture(backColor, c0) * (wb * w.x) + + texture(fwdColor, c1) * (wf * w.y) + + texture(backColor, c2) * (wb * w.z) + + texture(fwdColor, c3) * (wf * w.w); + + float den = wb * (w.x + w.z) + wf * (w.y + w.w) + 1e-8; + + imageStore(uDst, p, acc / den); +} diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_occlusion.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_occlusion.comp new file mode 100644 index 000000000..ffebca34e --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_occlusion.comp @@ -0,0 +1,124 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D uSrcA; +layout(set = 0, binding = 2) uniform sampler2D uSrcB; + +layout(set = 0, binding = 1, r8) uniform writeonly image2D uMip0; +layout(set = 0, binding = 3, r8) uniform writeonly image2D uMip1; +layout(set = 0, binding = 4, r8) uniform writeonly image2D uMip2; +layout(set = 0, binding = 5, r8) uniform writeonly image2D uMip3; +layout(set = 0, binding = 6, r8) uniform writeonly image2D uMip4; +layout(set = 0, binding = 7, r8) uniform writeonly image2D uMip5; + +layout(set = 0, binding = 8, std430) readonly buffer Weights { float16_t W[]; }; + +layout(push_constant) uniform PC { + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; + float occThresh; +} pc; + +const ivec2 OTAP[9] = ivec2[]( + ivec2(-1,-1), ivec2(-1, 0), ivec2(-1, 1), + ivec2( 0,-1), ivec2( 0, 0), ivec2( 0, 1), + ivec2( 1,-1), ivec2( 1, 0), ivec2( 1, 1) +); + +f16vec4 wA(int tap) { + int o = int(pc.wBase) + tap * 4; + return f16vec4(W[o], W[o + 1], W[o + 2], W[o + 3]); +} +f16vec4 wB(int tap) { + int o = int(pc.wBase) + 36 + tap * 4; + return f16vec4(W[o], W[o + 1], W[o + 2], W[o + 3]); +} + +shared float lds[32][32]; + +void main() { + ivec2 wg = ivec2(gl_WorkGroupID.xy); + ivec2 lid = ivec2(gl_LocalInvocationID.xy); + ivec2 sz = pc.size; + ivec2 hi = sz - ivec2(1); + + float16_t bias = W[int(pc.wBase) + 72]; + + for (int by = 0; by < 2; ++by) { + for (int bx = 0; bx < 2; ++bx) { + ivec2 p = wg * 32 + lid * 2 + ivec2(bx, by); + + float16_t acc = bias; + for (int k = 0; k < 9; ++k) { + ivec2 c = clamp(p + OTAP[k], ivec2(0), hi); + f16vec4 a = f16vec4(texelFetch(uSrcA, c, 0)); + f16vec4 b = f16vec4(texelFetch(uSrcB, c, 0)); + acc += dot(a, wA(k)); + acc += dot(b, wB(k)); + } + + float occ = 1.0 / (1.0 + exp(-float(acc))); + float mask = step(pc.occThresh, occ) * occ; + + if (all(lessThan(p, sz))) + imageStore(uMip0, p, vec4(mask)); + + lds[lid.x * 2 + bx][lid.y * 2 + by] = mask; + } + } + + barrier(); + + if (lid.x < 16 && lid.y < 16) { + int i = lid.x, j = lid.y; + float avg = 0.25 * (lds[2*i][2*j] + lds[2*i+1][2*j] + + lds[2*i][2*j+1] + lds[2*i+1][2*j+1]); + ivec2 c = wg * 16 + ivec2(i, j); + if (all(lessThan(c, imageSize(uMip1)))) imageStore(uMip1, c, vec4(avg)); + lds[i][j] = avg; + } + barrier(); + + if (lid.x < 8 && lid.y < 8) { + int i = lid.x, j = lid.y; + float avg = 0.25 * (lds[2*i][2*j] + lds[2*i+1][2*j] + + lds[2*i][2*j+1] + lds[2*i+1][2*j+1]); + ivec2 c = wg * 8 + ivec2(i, j); + if (all(lessThan(c, imageSize(uMip2)))) imageStore(uMip2, c, vec4(avg)); + lds[i][j] = avg; + } + barrier(); + + if (lid.x < 4 && lid.y < 4) { + int i = lid.x, j = lid.y; + float avg = 0.25 * (lds[2*i][2*j] + lds[2*i+1][2*j] + + lds[2*i][2*j+1] + lds[2*i+1][2*j+1]); + ivec2 c = wg * 4 + ivec2(i, j); + if (all(lessThan(c, imageSize(uMip3)))) imageStore(uMip3, c, vec4(avg)); + lds[i][j] = avg; + } + barrier(); + + if (lid.x < 2 && lid.y < 2) { + int i = lid.x, j = lid.y; + float avg = 0.25 * (lds[2*i][2*j] + lds[2*i+1][2*j] + + lds[2*i][2*j+1] + lds[2*i+1][2*j+1]); + ivec2 c = wg * 2 + ivec2(i, j); + if (all(lessThan(c, imageSize(uMip4)))) imageStore(uMip4, c, vec4(avg)); + lds[i][j] = avg; + } + barrier(); + + if (lid.x < 1 && lid.y < 1) { + float avg = 0.25 * (lds[0][0] + lds[1][0] + lds[0][1] + lds[1][1]); + ivec2 c = wg; + if (all(lessThan(c, imageSize(uMip5)))) imageStore(uMip5, c, vec4(avg)); + } +} diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_pyramid.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_pyramid.comp new file mode 100644 index 000000000..460a0891e --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_pyramid.comp @@ -0,0 +1,48 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D uSrc; +layout(set = 0, binding = 1, r8) uniform writeonly image2D uDst; +layout(set = 0, binding = 2) uniform sampler2D uLumaPrev; + +layout(push_constant) uniform PC { + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; +} pc; + +const vec3 LUMA = vec3(0.298828125, 0.5869140625, 0.11395263671875); + +void main() { + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(p, pc.size))) return; + + float luma; + + if ((pc.flags & 1) != 0) { + ivec2 srcSize = textureSize(uSrc, 0); + vec2 uv = (vec2(p) + 0.5) / vec2(srcSize); + f16vec3 c = f16vec3(textureLod(uSrc, uv, 0.0).rgb); + luma = float(dot(c, f16vec3(LUMA))); + } else { + ivec2 prevSize = textureSize(uLumaPrev, 0); + ivec2 b = p * 2; + ivec2 i00 = clamp(b, ivec2(0), prevSize - 1); + ivec2 i10 = clamp(b + ivec2(1, 0), ivec2(0), prevSize - 1); + ivec2 i01 = clamp(b + ivec2(0, 1), ivec2(0), prevSize - 1); + ivec2 i11 = clamp(b + ivec2(1, 1), ivec2(0), prevSize - 1); + float16_t l00 = float16_t(texelFetch(uLumaPrev, i00, 0).r); + float16_t l10 = float16_t(texelFetch(uLumaPrev, i10, 0).r); + float16_t l01 = float16_t(texelFetch(uLumaPrev, i01, 0).r); + float16_t l11 = float16_t(texelFetch(uLumaPrev, i11, 0).r); + luma = float((l00 + l10 + l01 + l11) * float16_t(0.25)); + } + + imageStore(uDst, p, vec4(luma)); +} diff --git a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag index 8f3e5f770..fa8e484ac 100644 --- a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag +++ b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag @@ -1,132 +1,109 @@ #version 450 -// Motion-compensated frame synthesis. -// mode 0 (standard): warp prev/curr along the single backward flow, interpolate at phase t. -// mode 1 (bidirectional): prev warps along its own forward flow; forward-backward consistency -// gives a geometric (dis)occlusion signal. -// mode 2 (extrapolate): predict phase t past curr by continuing the backward flow forward; -// single-image warp, flow-divergence occlusion, no added latency. -// -// Fallback policy (occLo/occHi = the Smoothness slider): -// warps agree -> motion-compensated blend; disagree or geometric occlusion -> per-channel -// median of the two warps and the non-warped phase blend; off-frame -> time-nearest real -// pixel. The fallback must never collapse to one fixed endpoint or low-confidence regions -// stop advancing between real frames. - precision mediump float; precision highp int; layout(location = 0) in highp vec2 vUV; layout(location = 0) out vec4 outColor; -layout(set = 0, binding = 0) uniform mediump sampler2D prevFrame; // frame N-1 -layout(set = 0, binding = 1) uniform mediump sampler2D currFrame; // frame N -layout(set = 0, binding = 2) uniform highp sampler2D motionField; // backward curr->prev, .xy half-res px -layout(set = 0, binding = 3) uniform highp sampler2D motionFieldFwd;// forward prev->curr, .xy half-res px +layout(set = 0, binding = 0) uniform mediump sampler2D prevFrame; +layout(set = 0, binding = 1) uniform mediump sampler2D currFrame; +layout(set = 0, binding = 2) uniform highp sampler2D motionField; +layout(set = 0, binding = 3) uniform highp sampler2D motionFieldFwd; layout(push_constant) uniform PC { - vec2 resolution; // full-res target size (pixels) - float phase; // synthesis phase t in (0,1) - float occlusionLo; // consistency window: fully trusted at/below this delta - float occlusionHi; // fully snapped at/above this delta - float mode; // 0 standard, 1 bidirectional, 2 extrapolate + vec2 resolution; + float phase; + float occlusionLo; + float occlusionHi; + float mode; } pc; bool offFrame(highp vec2 uv) { return uv.x < 0.0 || uv.y < 0.0 || uv.x > 1.0 || uv.y > 1.0; } -vec2 med3(vec2 a, vec2 b, vec2 c) { return max(min(a, b), min(max(a, b), c)); } -vec3 med3v(vec3 a, vec3 b, vec3 c) { return max(min(a, b), min(max(a, b), c)); } - -// 3x3 median of the half-res flow field (kills block-match outliers before warping). -vec2 sampleMV(highp sampler2D field, highp vec2 uv, highp vec2 texel) { - vec2 r0 = med3(texture(field, uv + texel * vec2(-1.0, -1.0)).xy, - texture(field, uv + texel * vec2( 0.0, -1.0)).xy, - texture(field, uv + texel * vec2( 1.0, -1.0)).xy); - vec2 r1 = med3(texture(field, uv + texel * vec2(-1.0, 0.0)).xy, - texture(field, uv).xy, - texture(field, uv + texel * vec2( 1.0, 0.0)).xy); - vec2 r2 = med3(texture(field, uv + texel * vec2(-1.0, 1.0)).xy, - texture(field, uv + texel * vec2( 0.0, 1.0)).xy, - texture(field, uv + texel * vec2( 1.0, 1.0)).xy); - return med3(r0, r1, r2); +float luma1(vec3 c) { return dot(c, vec3(0.299, 0.587, 0.114)); } + +float valid(vec3 c, highp vec2 p) { + return (dot(c, c) > 1e-4 && !offFrame(p)) ? 1.0 : 0.0; } void main() { - float t = clamp(pc.phase, 0.0, 1.0); - float lo = pc.occlusionLo > 0.0 ? pc.occlusionLo : 0.06; - float hi = pc.occlusionHi > lo ? pc.occlusionHi : 0.25; + float t = clamp(pc.phase, 0.0, 1.0); + float steadier = clamp(pc.occlusionLo, 0.0, 1.0); - // motionField is half-res, stores displacement in half-res pixels. Normalize: mv * 2 / fullRes. highp vec2 norm = 2.0 / pc.resolution; - - vec2 mvB = sampleMV(motionField, vUV, norm); // backward curr->prev (9-tap median) + vec2 mvB = texture(motionField, vUV).xy; vec2 mvBn = mvB * norm; vec3 cCurrFlat = texture(currFrame, vUV).rgb; vec3 cPrevFlat = texture(prevFrame, vUV).rgb; - // Static guard at full resolution: a pixel whose colour is unchanged between the two real - // frames is static (HUD, text, sync bars, unmoving background) and must never be warped. This - // is per-pixel and exact, so it catches thin high-contrast overlays that the coarse block-match - // static mask (motionField.z) misses next to moving content. - float staticMask = texture(motionField, vUV).z; - float staticPix = max(staticMask, 1.0 - smoothstep(0.02, 0.06, length(cCurrFlat - cPrevFlat))); + vec2 maskConf = texture(motionField, vUV).zw; + float staticMask = maskConf.x; + float staticPix = max(staticMask, 1.0 - smoothstep(0.02, 0.06, length(cCurrFlat - cPrevFlat))); + float uniq = smoothstep(0.08, 0.35, maskConf.y); if (pc.mode > 1.5) { - // Extrapolation: out(x, N+t) = curr(x + t*mvB(x)). Linear motion only. highp vec2 srcPos = vUV + t * mvBn; vec3 cWarp = texture(currFrame, srcPos).rgb; - // The flow at the source must agree with the flow here, else this pixel is being - // revealed and the warp would smear the object. Motion-proportional tolerance. - vec2 mvBsrc = sampleMV(motionField, srcPos, norm); + float v = valid(cWarp, srcPos); + cWarp = mix(cCurrFlat, cWarp, v); + + vec2 mvBsrc = texture(motionField, srcPos).xy; vec2 dv = mvB - mvBsrc; - float tolE = 0.01 * dot(mvB, mvB) + 0.5; - float occ = smoothstep(tolE, 4.0 * tolE + 2.0, dot(dv, dv)); - if (offFrame(srcPos)) occ = 1.0; - occ = max(occ, staticPix); // static overlays / unchanged pixels stay anchored - outColor = vec4(clamp(mix(cWarp, cCurrFlat, occ), 0.0, 1.0), 1.0); + float tolE = 0.05 * dot(mvB, mvB) + 2.0; + float occ = smoothstep(tolE, 6.0 * tolE + 6.0, dot(dv, dv)); + float relTol = 0.10 * dot(mvB, mvB) + 4.0; + float reliable = (1.0 - smoothstep(relTol, 3.0 * relTol, dot(dv, dv))) * uniq * v; + + vec3 cPred = mix(cCurrFlat, cWarp, reliable); + vec3 col = mix(cWarp, cPred, occ); + + vec2 txE = norm * 0.5; + vec3 blurE = (texture(currFrame, srcPos + vec2(txE.x, 0.0)).rgb + + texture(currFrame, srcPos - vec2(txE.x, 0.0)).rgb + + texture(currFrame, srcPos + vec2(0.0, txE.y)).rgb + + texture(currFrame, srcPos - vec2(0.0, txE.y)).rgb) * 0.25; + col += 0.55 * v * clamp(cWarp - blurE, -0.25, 0.25); + + float staticHold = staticMask * (1.0 - smoothstep(1.0, 4.0, dot(mvB, mvB))); + col = mix(col, cCurrFlat, staticHold); + outColor = vec4(clamp(col, 0.0, 1.0), 1.0); return; } - // ---- INTERPOLATION ---- - highp vec2 currPos = vUV - (1.0 - t) * mvBn; // curr sampled along the backward flow - highp vec2 prevPos; - float occGeo = 0.0; - - if (pc.mode > 0.5) { - // Bidirectional: prev warps along its own forward flow; |mvB+mvF| ~ 0 for a coherent - // feature. The forward-backward residual is compared against a motion-proportional - // tolerance wide enough to ignore plain block-match search noise (~1px). - vec2 mvF = texture(motionFieldFwd, vUV).xy; - prevPos = vUV - t * (mvF * norm); - vec2 fbv = mvB + mvF; - float tol = 0.05 * (dot(mvB, mvB) + dot(mvF, mvF)) + 2.0; - occGeo = smoothstep(tol, 4.0 * tol + 4.0, dot(fbv, fbv)); - } else { - prevPos = vUV + t * mvBn; // single backward flow warps both - } - - vec3 cPrev = texture(prevFrame, prevPos).rgb; - vec3 cCurr = texture(currFrame, currPos).rgb; - - // Seam: RGB delta between the two motion-compensated samples, scaled ~[0,1]. - float disagree = length(cPrev - cCurr) * 0.5774; - float seam = smoothstep(lo, hi, disagree); - float fade = max(seam, occGeo); - - vec3 warped = mix(cPrev, cCurr, t); // where the warps agree - vec3 dissolve = mix(cPrevFlat, cCurrFlat, t); // non-warped phase blend - vec3 nearest = (t < 0.5) ? cPrevFlat : cCurrFlat; // time-nearest real frame (sharp) - // Per-channel median of the two one-sided warps and the dissolve, biased toward the sharp - // nearest real frame as the seam strengthens. The median alone can settle on the dissolve (a - // 50/50 blend = visible double-image); leaning it toward the nearest frame keeps strong seams - // sharp instead of ghosted, while good-flow regions (fade~0) are untouched. - vec3 robust = mix(med3v(cPrev, cCurr, dissolve), nearest, fade); - - vec3 col = mix(warped, robust, fade); - if (offFrame(prevPos) || offFrame(currPos)) col = nearest; - col = mix(col, cCurrFlat, staticPix); // static overlays / unchanged pixels: unwarped + highp vec2 uvA = vUV + t * mvBn; + highp vec2 uvB = vUV - (1.0 - t) * mvBn; + vec3 cA = texture(prevFrame, uvA).rgb; + vec3 cB = texture(currFrame, uvB).rgb; + float tolE = 0.05 * dot(mvB, mvB) + 2.0; + float hiE = 6.0 * tolE + 6.0; + vec2 dA = mvB - texture(motionField, uvA).xy; + vec2 dB = mvB - texture(motionField, uvB).xy; + float occA = 1.0 - smoothstep(tolE, hiE, dot(dA, dA)); + float occB = 1.0 - smoothstep(tolE, hiE, dot(dB, dB)); + float lA = (occA - 1.0) * 16.0 + (valid(cA, uvA) - 1.0) * 24.0; + float lB = (occB - 1.0) * 16.0 + (valid(cB, uvB) - 1.0) * 24.0; + float mE = max(lA, lB); + float wA = (1.0 - t) * exp(lA - mE); + float wB = t * exp(lB - mE); + float wsum = wA + wB + 1e-6; + float selB = wB / wsum; + vec3 col = (cA * wA + cB * wB) / wsum; + + vec3 repeat = (t < 0.5) ? cPrevFlat : cCurrFlat; + col = mix(repeat, col, uniq * (1.0 - 0.30 * steadier)); + + vec2 tx = norm * 0.5; + vec3 blur = (texture(currFrame, uvB + vec2(tx.x, 0.0)).rgb + + texture(currFrame, uvB - vec2(tx.x, 0.0)).rgb + + texture(currFrame, uvB + vec2(0.0, tx.y)).rgb + + texture(currFrame, uvB - vec2(0.0, tx.y)).rgb) * 0.25; + float kdet = (0.55 - 0.30 * steadier) * selB * (1.0 - smoothstep(9.0, 64.0, dot(mvB, mvB))); + col += kdet * clamp(cB - blur, -0.25, 0.25); + + col = mix(col, cCurrFlat, staticPix); outColor = vec4(clamp(col, 0.0, 1.0), 1.0); } diff --git a/app/src/main/cpp/winlator/vk/shaders/motion.comp b/app/src/main/cpp/winlator/vk/shaders/motion.comp index 366e22e52..df07f7b7d 100644 --- a/app/src/main/cpp/winlator/vk/shaders/motion.comp +++ b/app/src/main/cpp/winlator/vk/shaders/motion.comp @@ -1,98 +1,116 @@ #version 450 -// Half-res luma block-matching (three-step search) → backward flow field (curr->prev) -// consumed by interpolate.frag. fp16 deltas, fp32 cost accumulation. Needs shaderFloat16. - #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require layout(local_size_x = 8, local_size_y = 8) in; -layout(set = 0, binding = 0) uniform sampler2D prevFrame; // frame N-1 (full res, SHADER_READ_ONLY) -layout(set = 0, binding = 1) uniform sampler2D currFrame; // frame N (full res, SHADER_READ_ONLY) -// rgba16f (not rg16f) is a *mandatory* storage-image format, so this avoids -// requiring the shaderStorageImageExtendedFormats device feature. Only .xy is used. -layout(set = 0, binding = 2, rgba16f) uniform writeonly image2D motionField; // half res, STORAGE +layout(set = 0, binding = 0) uniform sampler2D prevFrame; +layout(set = 0, binding = 1) uniform sampler2D currFrame; +layout(set = 0, binding = 2) uniform sampler2D coarseFlow; +layout(set = 0, binding = 3, rgba16f) uniform writeonly image2D motionField; layout(push_constant) uniform PC { - ivec2 mvSize; // motionField dimensions (~= frameSize / 2) - vec2 invMvSize; // 1.0 / mvSize, for normalized texture sampling - float mvScale; // scales the stored vector (normally 1.0) - float minStep; // lowest TSS step (quality preset): 1 = full search, larger = coarser/faster - float _pad1; + ivec2 mvSize; + vec2 invMvSize; + float mvScale; + float minStep; + float upscale; float _pad2; } pc; -// ---- Search / tiling parameters (compile-time so the LDS tiles can be sized) ---- -const int LS = 8; // == local_size_x/y -const int BR = 2; // block radius -> (2*BR+1)^2 = 25-tap SSD window -const int RMAX = 15; // max displacement reachable by TSS steps 8+4+2+1 +const int LS = 8; +const int BR = 2; +const int RMAX = 15; +const int FR = 5; +const float WC = 3.0; -const int TILE_P = LS + 2 * (RMAX + BR); // prev tile side = 8 + 34 = 42 -const int TILE_C = LS + 2 * BR; // curr tile side = 8 + 4 = 12 +const int TILE_P = LS + 2 * (RMAX + BR); // 42 +const int TILE_C = LS + 2 * BR; // 12 -shared float16_t sPrev[TILE_P * TILE_P]; // ~3.5 KB -shared float16_t sCurr[TILE_C * TILE_C]; // ~0.3 KB +shared f16vec2 sPrev[TILE_P * TILE_P]; +shared f16vec2 sCurr[TILE_C * TILE_C]; -float16_t luma(vec3 c) { - return float16_t(dot(c, vec3(0.299, 0.587, 0.114))); +float luma(vec3 c) { + vec3 e = exp2(log2(max(c, vec3(1e-6))) * 0.45454547); + return dot(e, vec3(0.299, 0.587, 0.114)); } -// SSD between the curr block at this work item and the prev block displaced by d. -// All reads stay inside the cached tiles by construction (see index ranges below). float blockCost(ivec2 l, ivec2 cCenter, ivec2 d) { float cost = 0.0; for (int by = -BR; by <= BR; ++by) { for (int bx = -BR; bx <= BR; ++bx) { ivec2 cc = cCenter + ivec2(bx, by); ivec2 pp = l + ivec2(RMAX + BR) + d + ivec2(bx, by); - float16_t dv = sCurr[cc.y * TILE_C + cc.x] - sPrev[pp.y * TILE_P + pp.x]; - cost += float(dv * dv); // fp16 delta, fp32 accumulate + vec2 dv = vec2(sCurr[cc.y * TILE_C + cc.x]) - vec2(sPrev[pp.y * TILE_P + pp.x]); + cost += dv.x * dv.x + WC * dv.y * dv.y; } } return cost; } +vec2 coarseAt(ivec2 q) { + return pc.upscale * texture(coarseFlow, (vec2(q) + 0.5) * pc.invMvSize).xy; +} + +vec2 descAt(sampler2D img, vec2 uv, bool aa) { + vec2 ts = pc.invMvSize; + float c; + if (aa) { + vec2 o = 0.25 * ts; + c = 0.25 * (luma(textureLod(img, uv + vec2(-o.x, -o.y), 0.0).rgb) + + luma(textureLod(img, uv + vec2( o.x, -o.y), 0.0).rgb) + + luma(textureLod(img, uv + vec2(-o.x, o.y), 0.0).rgb) + + luma(textureLod(img, uv + vec2( o.x, o.y), 0.0).rgb)); + } else { + c = luma(textureLod(img, uv, 0.0).rgb); + } + float m = 0.25 * (luma(textureLod(img, uv + vec2(ts.x, 0.0), 0.0).rgb) + + luma(textureLod(img, uv - vec2(ts.x, 0.0), 0.0).rgb) + + luma(textureLod(img, uv + vec2(0.0, ts.y), 0.0).rgb) + + luma(textureLod(img, uv - vec2(0.0, ts.y), 0.0).rgb)); + return vec2(c, c - m); +} + void main() { ivec2 wgOrigin = ivec2(gl_WorkGroupID.xy) * LS; ivec2 prevOrigin = wgOrigin - ivec2(RMAX + BR); ivec2 currOrigin = wgOrigin - ivec2(BR); - uint li = gl_LocalInvocationIndex; // 0..63 - const uint THREADS = uint(LS * LS); // 64 - - // Cooperative load of the prev-frame luma tile (sampled at the half-res grid; - // linear filtering of the full-res texture gives the 2x downsample for free). - for (uint i = li; i < uint(TILE_P * TILE_P); i += THREADS) { - int lx = int(i) % TILE_P; - int ly = int(i) / TILE_P; - vec2 uv = (vec2(prevOrigin + ivec2(lx, ly)) + 0.5) * pc.invMvSize; - sPrev[i] = luma(textureLod(prevFrame, uv, 0.0).rgb); + uint li = gl_LocalInvocationIndex; + const uint THREADS = uint(LS * LS); + bool fine = pc.upscale > 0.0; + + int tLo = fine ? (RMAX - FR) : 0; + int tSpan = fine ? (LS + 2 * (FR + BR)) : TILE_P; + for (uint i = li; i < uint(tSpan * tSpan); i += THREADS) { + int lx = tLo + int(i) % tSpan; + int ly = tLo + int(i) / tSpan; + ivec2 tp = prevOrigin + ivec2(lx, ly); + vec2 w = fine ? coarseAt(tp) : vec2(0.0); + vec2 uv = (vec2(tp) + w + 0.5) * pc.invMvSize; + sPrev[ly * TILE_P + lx] = f16vec2(descAt(prevFrame, uv, !fine)); } - // Cooperative load of the curr-frame luma tile. for (uint i = li; i < uint(TILE_C * TILE_C); i += THREADS) { int lx = int(i) % TILE_C; int ly = int(i) / TILE_C; vec2 uv = (vec2(currOrigin + ivec2(lx, ly)) + 0.5) * pc.invMvSize; - sCurr[i] = luma(textureLod(currFrame, uv, 0.0).rgb); + sCurr[i] = f16vec2(descAt(currFrame, uv, !fine)); } barrier(); ivec2 p = ivec2(gl_GlobalInvocationID.xy); - // NOTE: out-of-range items still participated in the cooperative load + barrier - // above; only the store is guarded. if (p.x >= pc.mvSize.x || p.y >= pc.mvSize.y) return; - ivec2 l = ivec2(gl_LocalInvocationID.xy); // 0..LS-1 - ivec2 cCenter = l + ivec2(BR); // this item's center in the curr tile + ivec2 l = ivec2(gl_LocalInvocationID.xy); + ivec2 cCenter = l + ivec2(BR); - // Three-step search seeded at (0,0). (A future pyramid level would seed from the - // upsampled coarse MV instead — the search code is unchanged.) ivec2 center = ivec2(0); ivec2 bestD = ivec2(0); float bestCost = blockCost(l, cCenter, ivec2(0)); int minStep = clamp(int(pc.minStep), 1, 8); - for (int step = 8; step >= minStep; step >>= 1) { + int startStep = fine ? 2 : 8; + for (int step = startStep; step >= minStep; step >>= 1) { ivec2 localBestD = center; float localBest = bestCost; for (int sy = -1; sy <= 1; ++sy) { @@ -107,29 +125,34 @@ void main() { if (localBest < bestCost) { bestCost = localBest; bestD = localBestD; center = localBestD; } } - vec2 sub = vec2(bestD); - if (abs(bestD.x) < RMAX) { - float cl = blockCost(l, cCenter, bestD + ivec2(-1, 0)); - float cr = blockCost(l, cCenter, bestD + ivec2( 1, 0)); - float dd = cl - 2.0 * bestCost + cr; - if (dd > 0.0) sub.x += clamp(0.5 * (cl - cr) / dd, -0.5, 0.5); - } - if (abs(bestD.y) < RMAX) { - float cu = blockCost(l, cCenter, bestD + ivec2(0, -1)); - float cd = blockCost(l, cCenter, bestD + ivec2(0, 1)); - float dd = cu - 2.0 * bestCost + cd; - if (dd > 0.0) sub.y += clamp(0.5 * (cu - cd) / dd, -0.5, 0.5); + float wsum = 0.0; + vec2 disp = vec2(0.0); + for (int sy = -1; sy <= 1; ++sy) { + for (int sx = -1; sx <= 1; ++sx) { + ivec2 dd = bestD + ivec2(sx, sy); + if (abs(dd.x) > RMAX || abs(dd.y) > RMAX) continue; + float c = (sx == 0 && sy == 0) ? bestCost : blockCost(l, cCenter, dd); + float w = exp(-(c - bestCost) * 4.0); + disp += w * vec2(dd); + wsum += w; + } } + vec2 sub = (fine ? coarseAt(p) : vec2(0.0)) + disp / max(wsum, 1e-6); - // Static-overlay (HUD/text) confidence -> .z: near-zero temporal delta at zero displacement - // plus a strong spatial gradient marks high-contrast UI that must never be warped. Soft on - // both axes so the interp's bilinear read feathers the mask edge (no hard seams). - float c0 = float(sCurr[cCenter.y * TILE_C + cCenter.x]); - float p0 = float(sPrev[(l.y + RMAX + BR) * TILE_P + (l.x + RMAX + BR)]); - float gx = abs(float(sCurr[cCenter.y * TILE_C + cCenter.x + 1]) - c0); - float gy = abs(float(sCurr[(cCenter.y + 1) * TILE_C + cCenter.x]) - c0); + float c0 = float(sCurr[cCenter.y * TILE_C + cCenter.x].x); + float p0 = luma(textureLod(prevFrame, (vec2(p) + 0.5) * pc.invMvSize, 0.0).rgb); + float gx = abs(float(sCurr[cCenter.y * TILE_C + cCenter.x + 1].x) - c0); + float gy = abs(float(sCurr[(cCenter.y + 1) * TILE_C + cCenter.x].x) - c0); float staticC = (1.0 - smoothstep(0.012, 0.025, abs(c0 - p0))) * smoothstep(0.05, 0.12, gx + gy); - imageStore(motionField, p, vec4(sub * pc.mvScale, staticC, 0.0)); + const int OFF = 2 * BR + 1; + float second = 3.0e30; + if (bestD.x + OFF <= RMAX) second = min(second, blockCost(l, cCenter, bestD + ivec2( OFF, 0))); + if (bestD.x - OFF >= -RMAX) second = min(second, blockCost(l, cCenter, bestD + ivec2(-OFF, 0))); + if (bestD.y + OFF <= RMAX) second = min(second, blockCost(l, cCenter, bestD + ivec2(0, OFF))); + if (bestD.y - OFF >= -RMAX) second = min(second, blockCost(l, cCenter, bestD + ivec2(0, -OFF))); + float conf = clamp((second - bestCost) / (second + 1.0e-3), 0.0, 1.0); + + imageStore(motionField, p, vec4(sub * pc.mvScale, staticC, conf)); } diff --git a/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp b/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp index 9d6957522..18d750e98 100644 --- a/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp +++ b/app/src/main/cpp/winlator/vk/shaders/motion_fp32.comp @@ -1,34 +1,35 @@ #version 450 -// fp32 fallback of motion.comp for devices without shaderFloat16; identical algorithm. - layout(local_size_x = 8, local_size_y = 8) in; -layout(set = 0, binding = 0) uniform sampler2D prevFrame; // frame N-1 (full res) -layout(set = 0, binding = 1) uniform sampler2D currFrame; // frame N (full res) -layout(set = 0, binding = 2, rgba16f) uniform writeonly image2D motionField; // half res, STORAGE +layout(set = 0, binding = 0) uniform sampler2D prevFrame; +layout(set = 0, binding = 1) uniform sampler2D currFrame; +layout(set = 0, binding = 2) uniform sampler2D coarseFlow; +layout(set = 0, binding = 3, rgba16f) uniform writeonly image2D motionField; layout(push_constant) uniform PC { ivec2 mvSize; vec2 invMvSize; float mvScale; - float minStep; // lowest TSS step (quality preset): 1 = full search, larger = coarser/faster - float _pad1; + float minStep; + float upscale; float _pad2; } pc; const int LS = 8; const int BR = 2; const int RMAX = 15; +const float WC = 3.0; const int TILE_P = LS + 2 * (RMAX + BR); // 42 const int TILE_C = LS + 2 * BR; // 12 -shared float sPrev[TILE_P * TILE_P]; -shared float sCurr[TILE_C * TILE_C]; +shared vec2 sPrev[TILE_P * TILE_P]; +shared vec2 sCurr[TILE_C * TILE_C]; float luma(vec3 c) { - return dot(c, vec3(0.299, 0.587, 0.114)); + vec3 e = exp2(log2(max(c, vec3(1e-6))) * 0.45454547); + return dot(e, vec3(0.299, 0.587, 0.114)); } float blockCost(ivec2 l, ivec2 cCenter, ivec2 d) { @@ -37,13 +38,36 @@ float blockCost(ivec2 l, ivec2 cCenter, ivec2 d) { for (int bx = -BR; bx <= BR; ++bx) { ivec2 cc = cCenter + ivec2(bx, by); ivec2 pp = l + ivec2(RMAX + BR) + d + ivec2(bx, by); - float dv = sCurr[cc.y * TILE_C + cc.x] - sPrev[pp.y * TILE_P + pp.x]; - cost += dv * dv; + vec2 dv = sCurr[cc.y * TILE_C + cc.x] - sPrev[pp.y * TILE_P + pp.x]; + cost += dv.x * dv.x + WC * dv.y * dv.y; } } return cost; } +vec2 coarseAt(ivec2 q) { + return pc.upscale * texture(coarseFlow, (vec2(q) + 0.5) * pc.invMvSize).xy; +} + +vec2 descAt(sampler2D img, vec2 uv, bool aa) { + vec2 ts = pc.invMvSize; + float c; + if (aa) { + vec2 o = 0.25 * ts; + c = 0.25 * (luma(textureLod(img, uv + vec2(-o.x, -o.y), 0.0).rgb) + + luma(textureLod(img, uv + vec2( o.x, -o.y), 0.0).rgb) + + luma(textureLod(img, uv + vec2(-o.x, o.y), 0.0).rgb) + + luma(textureLod(img, uv + vec2( o.x, o.y), 0.0).rgb)); + } else { + c = luma(textureLod(img, uv, 0.0).rgb); + } + float m = 0.25 * (luma(textureLod(img, uv + vec2(ts.x, 0.0), 0.0).rgb) + + luma(textureLod(img, uv - vec2(ts.x, 0.0), 0.0).rgb) + + luma(textureLod(img, uv + vec2(0.0, ts.y), 0.0).rgb) + + luma(textureLod(img, uv - vec2(0.0, ts.y), 0.0).rgb)); + return vec2(c, c - m); +} + void main() { ivec2 wgOrigin = ivec2(gl_WorkGroupID.xy) * LS; ivec2 prevOrigin = wgOrigin - ivec2(RMAX + BR); @@ -51,18 +75,21 @@ void main() { uint li = gl_LocalInvocationIndex; const uint THREADS = uint(LS * LS); + bool fine = pc.upscale > 0.0; for (uint i = li; i < uint(TILE_P * TILE_P); i += THREADS) { int lx = int(i) % TILE_P; int ly = int(i) / TILE_P; - vec2 uv = (vec2(prevOrigin + ivec2(lx, ly)) + 0.5) * pc.invMvSize; - sPrev[i] = luma(textureLod(prevFrame, uv, 0.0).rgb); + ivec2 tp = prevOrigin + ivec2(lx, ly); + vec2 w = fine ? coarseAt(tp) : vec2(0.0); + vec2 uv = (vec2(tp) + w + 0.5) * pc.invMvSize; + sPrev[i] = descAt(prevFrame, uv, !fine); } for (uint i = li; i < uint(TILE_C * TILE_C); i += THREADS) { int lx = int(i) % TILE_C; int ly = int(i) / TILE_C; vec2 uv = (vec2(currOrigin + ivec2(lx, ly)) + 0.5) * pc.invMvSize; - sCurr[i] = luma(textureLod(currFrame, uv, 0.0).rgb); + sCurr[i] = descAt(currFrame, uv, !fine); } barrier(); @@ -92,29 +119,34 @@ void main() { if (localBest < bestCost) { bestCost = localBest; bestD = localBestD; center = localBestD; } } - vec2 sub = vec2(bestD); - if (abs(bestD.x) < RMAX) { - float cl = blockCost(l, cCenter, bestD + ivec2(-1, 0)); - float cr = blockCost(l, cCenter, bestD + ivec2( 1, 0)); - float dd = cl - 2.0 * bestCost + cr; - if (dd > 0.0) sub.x += clamp(0.5 * (cl - cr) / dd, -0.5, 0.5); - } - if (abs(bestD.y) < RMAX) { - float cu = blockCost(l, cCenter, bestD + ivec2(0, -1)); - float cd = blockCost(l, cCenter, bestD + ivec2(0, 1)); - float dd = cu - 2.0 * bestCost + cd; - if (dd > 0.0) sub.y += clamp(0.5 * (cu - cd) / dd, -0.5, 0.5); + float wsum = 0.0; + vec2 disp = vec2(0.0); + for (int sy = -1; sy <= 1; ++sy) { + for (int sx = -1; sx <= 1; ++sx) { + ivec2 dd = bestD + ivec2(sx, sy); + if (abs(dd.x) > RMAX || abs(dd.y) > RMAX) continue; + float c = (sx == 0 && sy == 0) ? bestCost : blockCost(l, cCenter, dd); + float w = exp(-(c - bestCost) * 4.0); + disp += w * vec2(dd); + wsum += w; + } } + vec2 sub = (fine ? coarseAt(p) : vec2(0.0)) + disp / max(wsum, 1e-6); - // Static-overlay (HUD/text) confidence -> .z: near-zero temporal delta at zero displacement - // plus a strong spatial gradient marks high-contrast UI that must never be warped. Soft on - // both axes so the interp's bilinear read feathers the mask edge (no hard seams). - float c0 = sCurr[cCenter.y * TILE_C + cCenter.x]; - float p0 = sPrev[(l.y + RMAX + BR) * TILE_P + (l.x + RMAX + BR)]; - float gx = abs(sCurr[cCenter.y * TILE_C + cCenter.x + 1] - c0); - float gy = abs(sCurr[(cCenter.y + 1) * TILE_C + cCenter.x] - c0); + float c0 = sCurr[cCenter.y * TILE_C + cCenter.x].x; + float p0 = luma(textureLod(prevFrame, (vec2(p) + 0.5) * pc.invMvSize, 0.0).rgb); + float gx = abs(sCurr[cCenter.y * TILE_C + cCenter.x + 1].x - c0); + float gy = abs(sCurr[(cCenter.y + 1) * TILE_C + cCenter.x].x - c0); float staticC = (1.0 - smoothstep(0.012, 0.025, abs(c0 - p0))) * smoothstep(0.05, 0.12, gx + gy); - imageStore(motionField, p, vec4(sub * pc.mvScale, staticC, 0.0)); + const int OFF = 2 * BR + 1; + float second = 3.0e30; + if (bestD.x + OFF <= RMAX) second = min(second, blockCost(l, cCenter, bestD + ivec2( OFF, 0))); + if (bestD.x - OFF >= -RMAX) second = min(second, blockCost(l, cCenter, bestD + ivec2(-OFF, 0))); + if (bestD.y + OFF <= RMAX) second = min(second, blockCost(l, cCenter, bestD + ivec2(0, OFF))); + if (bestD.y - OFF >= -RMAX) second = min(second, blockCost(l, cCenter, bestD + ivec2(0, -OFF))); + float conf = clamp((second - bestCost) / (second + 1.0e-3), 0.0, 1.0); + + imageStore(motionField, p, vec4(sub * pc.mvScale, staticC, conf)); } diff --git a/app/src/main/cpp/winlator/vk/vk_dispatch.c b/app/src/main/cpp/winlator/vk/vk_dispatch.c index f7fc75b4a..54e25c253 100644 --- a/app/src/main/cpp/winlator/vk/vk_dispatch.c +++ b/app/src/main/cpp/winlator/vk/vk_dispatch.c @@ -39,8 +39,6 @@ bool vkd_init(void* libvulkan_handle) { bool vkd_load_instance(VkInstance instance) { if (!vkd.GetInstanceProcAddr || instance == VK_NULL_HANDLE) return false; - // Device entry points resolve via vkGetInstanceProcAddr too — the loader trampolines. - // See vk_dispatch.h for the rationale. #define LOAD(name) \ vkd.name = (PFN_vk##name)vkd.GetInstanceProcAddr(instance, "vk" #name) @@ -153,6 +151,7 @@ bool vkd_load_instance(VkInstance instance) { LOAD(CmdPipelineBarrier); LOAD(CmdBlitImage); LOAD(CmdCopyBufferToImage); + LOAD(CmdCopyImageToBuffer); // Queue LOAD(QueueSubmit); diff --git a/app/src/main/cpp/winlator/vk/vk_dispatch.h b/app/src/main/cpp/winlator/vk/vk_dispatch.h index eaaab2daa..f6a4bf45b 100644 --- a/app/src/main/cpp/winlator/vk/vk_dispatch.h +++ b/app/src/main/cpp/winlator/vk/vk_dispatch.h @@ -1,11 +1,3 @@ -// Function-pointer dispatch for the compositor's Vulkan calls. -// -// Required because adrenotools-loaded drivers live in an isolated linker namespace and do -// not share global symbols with the system loader — every call must resolve through the -// libvulkan handle chosen at dlopen time. -// -// Init order: vkd_init(handle) -> vkCreateInstance(...) -> vkd_load_instance(instance). - #pragma once #ifndef VK_NO_PROTOTYPES @@ -15,7 +7,6 @@ #include typedef struct VkDispatch { - // Loader-level (resolved via dlsym + vkGetInstanceProcAddr(NULL, ...)) PFN_vkGetInstanceProcAddr GetInstanceProcAddr; PFN_vkCreateInstance CreateInstance; PFN_vkEnumerateInstanceExtensionProperties EnumerateInstanceExtensionProperties; @@ -130,6 +121,7 @@ typedef struct VkDispatch { PFN_vkCmdPipelineBarrier CmdPipelineBarrier; PFN_vkCmdBlitImage CmdBlitImage; PFN_vkCmdCopyBufferToImage CmdCopyBufferToImage; + PFN_vkCmdCopyImageToBuffer CmdCopyImageToBuffer; // Queue PFN_vkQueueSubmit QueueSubmit; @@ -147,15 +139,10 @@ extern VkDispatch vkd; bool vkd_init(void* libvulkan_handle); -// Loads device-level pointers via vkGetInstanceProcAddr too — the loader trampolines, which -// costs a few ns per call but avoids partitioning instance vs. device scope. bool vkd_load_instance(VkInstance instance); -// Must be called before dlclose so stale-pointer crashes fault on NULL. void vkd_unload(void); -// Redirect bare `vkFoo` names to the dispatch table. - #define vkGetInstanceProcAddr vkd.GetInstanceProcAddr #define vkCreateInstance vkd.CreateInstance #define vkEnumerateInstanceExtensionProperties vkd.EnumerateInstanceExtensionProperties @@ -259,6 +246,7 @@ void vkd_unload(void); #define vkCmdPipelineBarrier vkd.CmdPipelineBarrier #define vkCmdBlitImage vkd.CmdBlitImage #define vkCmdCopyBufferToImage vkd.CmdCopyBufferToImage +#define vkCmdCopyImageToBuffer vkd.CmdCopyImageToBuffer #define vkQueueSubmit vkd.QueueSubmit #define vkQueueWaitIdle vkd.QueueWaitIdle diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index dbf28962e..7659643e9 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -1,18 +1,4 @@ // Vulkan compositor for the X-server display path. -// -// Owns the entire native-side rendering state. Java JNI shims push scene snapshots and call -// frame submit; this file handles instance/device/swapchain/pipelines/sync. -// -// All vk* calls below resolve through vk_dispatch.h, which redirects them to the dlopen -// handle (system libvulkan or adrenotools-loaded Turnip) chosen at nativeCreate. -// -// Synchronization model: -// - One graphics queue, serialized externally via VkRenderer::queue_mutex (any thread submits). -// - VK_FRAMES_IN_FLIGHT in-flight frames, each with its own semaphores + fence + cmd buffer. -// - Scene state guarded by VkRenderer::scene_mutex. -// - Texture lifetime: created/uploaded synchronously (blocks ~ms); destroyed via per-frame -// graveyard processed on the render thread, and tracked so renderer teardown can drain -// native texture objects that Java handles have not explicitly destroyed yet. #include "vk_state.h" #include "vk_driver.h" @@ -33,7 +19,6 @@ #include #include -// SPIR-V shader byte arrays generated at build time by glslc + bin2c.cmake. #include "shaders/window_vert.spv.h" #include "shaders/window_frag.spv.h" #include "shaders/cursor_frag.spv.h" @@ -77,16 +62,17 @@ static void destroy_sgsr1_resources(VkRenderer* r); static void fg_destroy_resources(VkRenderer* r); static bool fg_ensure_resources(VkRenderer* r); static void wait_inflight_frames(VkRenderer* r); +static bool fg_worker_create_resources(VkRenderer* r); +static void fg_worker_destroy_resources(VkRenderer* r); +static void fg_worker_start(VkRenderer* r); +static void fg_worker_stop(VkRenderer* r); -// Frame-generation render modes (see fg_submit / DESIGN.md §2). typedef enum { - FG_MODE_HOLD = 0, // render composited scene -> history[curr]; do NOT present - FG_MODE_INTERP = 1, // motion + interpolate(prev,curr) -> swapchain; present - FG_MODE_PRESENT_LAST = 2, // blit history[curr] -> swapchain; present (the deferred real frame) + FG_MODE_HOLD = 0, + FG_MODE_INTERP = 1, + FG_MODE_PRESENT_LAST = 2, } FgMode; -// Result of manage_scene_targets(): whether the post-effect chain runs this frame, and whether -// it is SGSR1-led. Shared by the real-present path and the FG hold path. typedef struct { bool has_effects; bool wants_sgsr1; } SceneTargets; static bool create_quad_vbo(VkRenderer* r); static void destroy_quad_vbo(VkRenderer* r); @@ -395,7 +381,6 @@ static bool create_device(VkRenderer* r) { r->ext_ahb = ahb_ok; r->ext_ycbcr = has_ycbcr; - // Probe shaderFloat16; selects the fp16 vs fp32 motion shader (FG ships either way). r->fg_float16_supported = false; VkPhysicalDeviceShaderFloat16Int8FeaturesKHR f16_feat = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR }; @@ -508,16 +493,8 @@ static void query_device_caps(VkRenderer* r) { r->caps.is_adreno = (props.vendorID == 0x5143); // Qualcomm r->caps.limits = props.limits; - // Descriptor pool capacity. Vulkan doesn't spec-bound pool size — the only ceiling - // is driver memory, and each combined-image-sampler set is ~100-200 bytes on Adreno, - // so 4096 sets is ~1 MB upfront. Pick a number high enough that an X server with - // hundreds of short-lived pixmaps can't realistically exhaust it. Grow-on-exhaust - // is the proper unbounded answer and remains a separate TODO. r->caps.descriptor_pool_capacity = 4096; - // Offscreen color format. Prefer BGRA8 to match the upload format (no shader swizzle), - // fall back to RGBA8 if the driver doesn't expose BGRA as a sampled color attachment - // in OPTIMAL tiling. RGBA8 is spec-guaranteed for both features. const VkFormatFeatureFlags need = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; const VkFormat offscreen_candidates[2] = { @@ -533,7 +510,6 @@ static void query_device_caps(VkRenderer* r) { } } - // CPU-uploaded texture format. RGBA8 is spec-guaranteed; BGRA8 is optional. const VkFormatFeatureFlags upload_need = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; r->caps.upload_format = VK_FORMAT_R8G8B8A8_UNORM; @@ -547,7 +523,6 @@ static void query_device_caps(VkRenderer* r) { } } - // AHB BGRA8 importability — diagnostic only; per-import paths still probe themselves. r->caps.ahb_bgra_supported = false; if (r->ext_ahb) { VkPhysicalDeviceExternalImageFormatInfo ext = { @@ -570,8 +545,6 @@ static void query_device_caps(VkRenderer* r) { VkImageFormatProperties2 out = { VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2 }; out.pNext = &ext_out; - // The 1.1 core entry point isn't statically exported by the Android Vulkan loader on all - // NDK targets; resolve dynamically and fall back to the KHR alias. PFN_vkGetPhysicalDeviceImageFormatProperties2 fnGetIfp2 = (PFN_vkGetPhysicalDeviceImageFormatProperties2) vkGetInstanceProcAddr(r->instance, "vkGetPhysicalDeviceImageFormatProperties2"); @@ -623,18 +596,50 @@ static bool create_command_pool(VkRenderer* r) { return true; } +static void fg_worker_destroy_resources(VkRenderer* r) { + for (uint32_t i = 0; i < 3; i++) { + VkFrame* f = &r->fg_worker_frames[i]; + if (f->image_available) { vkDestroySemaphore(r->device, f->image_available, NULL); f->image_available = VK_NULL_HANDLE; } + if (f->in_flight) { + for (uint32_t s = 0; s < 3; s++) if (r->fg_slot_fence[s] == f->in_flight) r->fg_slot_fence[s] = VK_NULL_HANDLE; + vkDestroyFence(r->device, f->in_flight, NULL); f->in_flight = VK_NULL_HANDLE; + } + f->cmd = VK_NULL_HANDLE; + } + if (r->fg_worker_pool) { vkDestroyCommandPool(r->device, r->fg_worker_pool, NULL); r->fg_worker_pool = VK_NULL_HANDLE; } +} + +static bool fg_worker_create_resources(VkRenderer* r) { + if (r->fg_worker_pool) return true; + VkCommandPoolCreateInfo ci = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; + ci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + ci.queueFamilyIndex = r->graphics_queue_family; + if (vkCreateCommandPool(r->device, &ci, NULL, &r->fg_worker_pool) != VK_SUCCESS) return false; + VkCommandBufferAllocateInfo ai = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; + ai.commandPool = r->fg_worker_pool; ai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; ai.commandBufferCount = 1; + VkSemaphoreCreateInfo si = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; + VkFenceCreateInfo fi = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; + fi.flags = VK_FENCE_CREATE_SIGNALED_BIT; + for (uint32_t i = 0; i < 3; i++) { + VkFrame* f = &r->fg_worker_frames[i]; + if (vkAllocateCommandBuffers(r->device, &ai, &f->cmd) != VK_SUCCESS) { fg_worker_destroy_resources(r); return false; } + if (vkCreateSemaphore(r->device, &si, NULL, &f->image_available) != VK_SUCCESS) { fg_worker_destroy_resources(r); return false; } + if (vkCreateFence(r->device, &fi, NULL, &f->in_flight) != VK_SUCCESS) { fg_worker_destroy_resources(r); return false; } + } + r->fg_worker_index = 0; + return true; +} + // ============================================================ // Descriptor pool // ============================================================ static bool create_descriptor_pool(VkRenderer* r, uint32_t capacity) { - // Combined-image-samplers for textures/effects/FG, plus a small STORAGE_IMAGE budget for - // the frame-generation motion field (one writeonly image2D bound by motion.comp). VkDescriptorPoolSize ps[2] = {0}; ps[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; ps[0].descriptorCount = capacity; ps[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - ps[1].descriptorCount = 16; // backward + forward motion fields, 3 parities each + ps[1].descriptorCount = 32; // fine + coarse, backward + forward, 3 parities each VkDescriptorPoolCreateInfo ci = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; @@ -805,15 +810,16 @@ static bool create_pipeline_layouts(VkRenderer* r) { } // --- Frame generation layouts --- - // motion.comp set 0: binding0,1 = prev,curr samplers; binding2 = motion storage image. All COMPUTE. - VkDescriptorSetLayoutBinding mb[3] = {0}; + // motion.comp set 0: binding0,1,2 = prev,curr,coarseFlow samplers; binding3 = motion storage. All COMPUTE. + VkDescriptorSetLayoutBinding mb[4] = {0}; mb[0].binding = 0; mb[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; mb[0].descriptorCount = 1; mb[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; mb[1] = mb[0]; mb[1].binding = 1; - mb[2].binding = 2; mb[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - mb[2].descriptorCount = 1; mb[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + mb[2] = mb[0]; mb[2].binding = 2; + mb[3].binding = 3; mb[3].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + mb[3].descriptorCount = 1; mb[3].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; VkDescriptorSetLayoutCreateInfo dl_m = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; - dl_m.bindingCount = 3; dl_m.pBindings = mb; + dl_m.bindingCount = 4; dl_m.pBindings = mb; if (vkCreateDescriptorSetLayout(r->device, &dl_m, NULL, &r->pipelines.fg_motion_layout) != VK_SUCCESS) { return false; } @@ -1199,10 +1205,6 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa free(fmts); r->swapchain_format = chosen.format; - // Honor the Java-requested present mode if the device supports it; otherwise fall back - // to FIFO (always supported per spec). target_present_mode is initialized to FIFO in - // nativeCreate, so a value-equality check is safe (no zero-sentinel ambiguity with - // VK_PRESENT_MODE_IMMEDIATE_KHR which is enum value 0). VkPresentModeKHR present_mode = VK_PRESENT_MODE_FIFO_KHR; VkPresentModeKHR want = r->target_present_mode; if (want != VK_PRESENT_MODE_FIFO_KHR) { @@ -1276,11 +1278,7 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa caps.currentTransform, pre_transform, present_mode); uint32_t image_count = caps.minImageCount + 1; - // FG runs VK_FRAMES_IN_FLIGHT frames CPU-ahead of the GPU; the swapchain needs at least one more - // image than that (FIF in flight + 1 scanning out) or vkAcquireNextImageKHR blocks and defeats the - // pipelining — even under FIFO. This is what lets FIF=3 absorb the GPU's per-frame composite spikes. if (image_count < VK_FRAMES_IN_FLIGHT + 1u) image_count = VK_FRAMES_IN_FLIGHT + 1u; - // Non-blocking modes (MAILBOX/IMMEDIATE) need headroom so FG interps aren't dropped at acquire. if (present_mode != VK_PRESENT_MODE_FIFO_KHR && image_count < 4) image_count = 4; if (caps.maxImageCount > 0 && image_count > caps.maxImageCount) image_count = caps.maxImageCount; if (image_count > VK_MAX_SWAPCHAIN_IMAGES) image_count = VK_MAX_SWAPCHAIN_IMAGES; @@ -1494,9 +1492,6 @@ static void destroy_one_offscreen(VkRenderer* r, VkOffscreen* o) { memset(o, 0, sizeof(*o)); } -// Builds offscreen[0], plus the second ping-pong target (~8 MB RGBA8 + view/sampler/descriptor/ -// framebuffer) only when need_second. At matching dims, a missing second target is added in -// place without disturbing offscreen[0]. static bool create_offscreen(VkRenderer* r, uint32_t w, uint32_t h, bool need_second) { bool dims_ok = r->offscreen_built && r->offscreen[0].width == w && r->offscreen[0].height == h; @@ -1560,11 +1555,7 @@ static void destroy_sgsr1_resources(VkRenderer* r) { // Graveyard processing // ============================================================ -// Detach the slot's pending-destroy list under scene_mutex. The Vulkan destroy calls -// (vkFreeDescriptorSets, vkDestroyImage, vkFreeMemory, AHardwareBuffer_release) can each -// take tens to hundreds of microseconds on Adreno, so doing them under scene_mutex stalls -// every scene producer (X server, input thread) for the full duration. Caller passes the -// detached array to destroy_graveyard_textures() after releasing the lock. +// Detach the slot's pending-destroy list under scene_mutex; caller destroys after unlocking. static void detach_graveyard_slot(VkRenderer* r, uint32_t slot_idx, VkTexture*** out_textures, uint32_t* out_count) { VkGraveSlot* slot = &r->graveyard[slot_idx]; @@ -1741,10 +1732,7 @@ static void push_window_constants(VkCommandBuffer cmd, VkPipelineLayout layout, static void compose_xform_for_window(float out[6], const float scene_xform[6], int wx, int wy, int ww, int wh) { - // Equivalent to GLRenderer.renderDrawable: tmpXForm1 = make(x, y, w, h); tmpXForm1 *= tmpXForm2 - // XForm.set(out, x, y, w, h): [w, 0, 0, h, x, y] float a[6] = { (float)ww, 0.0f, 0.0f, (float)wh, (float)wx, (float)wy }; - // 2x2 + translation multiply: result = a * scene_xform out[0] = a[0]*scene_xform[0] + a[1]*scene_xform[2]; out[1] = a[0]*scene_xform[1] + a[1]*scene_xform[3]; out[2] = a[2]*scene_xform[0] + a[3]*scene_xform[2]; @@ -1940,8 +1928,7 @@ static VkExtent2D compute_sgsr1_source_extent(VkRenderer* r, const VkScene* s) { return source; } -// (Re)build the effect ping-pong / SGSR1 targets for the current scene and report whether the -// effect chain runs. Shared by the real present and the FG hold. Caller holds render_mutex. +// (Re)build the effect ping-pong / SGSR1 targets for the current scene. Caller holds render_mutex. static SceneTargets manage_scene_targets(VkRenderer* r, const VkScene* snap) { bool wants_sgsr1 = scene_starts_with_sgsr1(snap); bool needs_fullres_offscreen = snap->effect_count > 0 @@ -1993,8 +1980,7 @@ static SceneTargets manage_scene_targets(VkRenderer* r, const VkScene* snap) { return st; } -// Record the composited scene into final_fb; final_offscreen picks the offscreen vs swapchain -// pipeline variant. Used by the real present (swapchain) and the FG hold (history offscreen). +// Record the composited scene into final_fb; final_offscreen picks the pipeline variant. static void record_scene_chain(VkRenderer* r, VkCommandBuffer cmd, const VkScene* snap, bool has_effects, bool wants_sgsr1, VkRenderPass final_pass, VkFramebuffer final_fb, @@ -2059,9 +2045,6 @@ static void record_scene_chain(VkRenderer* r, VkCommandBuffer cmd, const VkScene } } -// Live frames-in-flight (the drawer's Buffering dial). All VK_FRAMES_IN_FLIGHT frame slots stay -// allocated; only the rotation depth changes, so applying a new value needs no rebuild. Lower depth -// = the CPU waits on the GPU sooner = less buffered latency, more exposure to GPU spikes. static inline uint32_t vkr_active_fif(VkRenderer* r) { uint32_t fif = r->fg_target_fif; if (fif < 1u || fif > VK_FRAMES_IN_FLIGHT) fif = VK_FRAMES_IN_FLIGHT; @@ -2079,10 +2062,7 @@ static bool record_and_submit_frame(VkRenderer* r) { vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); - // Snapshot the scene under scene_mutex (cheap memcpy of a few KB), then release it so - // scene producers (texture destroys, X server window updates) don't stall behind the - // long acquire/record/submit/present below. render_mutex still serializes us against - // surface lifecycle changes, which keeps the swapchain handles stable for our use. + // Snapshot the scene under scene_mutex, then release it before the long acquire/submit/present. VkScene snap; VkTexture** dead = NULL; uint32_t dead_count = 0; @@ -2218,6 +2198,8 @@ static void fg_free_set(VkRenderer* r, VkDescriptorSet set) { pthread_mutex_unlock(&r->descriptor_mutex); } +static void fg_destroy_sig(VkRenderer* r); // content-dedup signature teardown (defined below) + static void fg_destroy_resources(VkRenderer* r) { if (!r->device) return; for (uint32_t p = 0; p < 3; p++) { @@ -2245,10 +2227,21 @@ static void fg_destroy_resources(VkRenderer* r) { if (mf->view) vkDestroyImageView(r->device, mf->view, NULL); if (mf->image) vkDestroyImage(r->device, mf->image, NULL); if (mf->memory) vkFreeMemory(r->device, mf->memory, NULL); + VkFgImage* cb_ = &r->fg_coarse[mi]; + if (cb_->view) vkDestroyImageView(r->device, cb_->view, NULL); + if (cb_->image) vkDestroyImage(r->device, cb_->image, NULL); + if (cb_->memory) vkFreeMemory(r->device, cb_->memory, NULL); + VkFgImage* cf_ = &r->fg_coarse_fwd[mi]; + if (cf_->view) vkDestroyImageView(r->device, cf_->view, NULL); + if (cf_->image) vkDestroyImage(r->device, cf_->image, NULL); + if (cf_->memory) vkFreeMemory(r->device, cf_->memory, NULL); } memset(r->fg_motion, 0, sizeof(r->fg_motion)); memset(r->fg_motion_fwd, 0, sizeof(r->fg_motion_fwd)); + memset(r->fg_coarse, 0, sizeof(r->fg_coarse)); + memset(r->fg_coarse_fwd, 0, sizeof(r->fg_coarse_fwd)); if (r->fg_sampler) { vkDestroySampler(r->device, r->fg_sampler, NULL); r->fg_sampler = VK_NULL_HANDLE; } + fg_destroy_sig(r); r->fg_built = false; r->fg_history_count = 0; r->fg_history_curr = 0; @@ -2333,6 +2326,137 @@ static bool fg_create_motion(VkRenderer* r, VkFgImage* o, uint32_t w, uint32_t h return true; } +// Coarse-to-fine motion: a plain low-res search (coarse, upscale=0) seeds a fine pass (upscale=2) +// that warps prev by the upsampled coarse flow as it loads its tile and resolves only the residual. +static void fg_motion_pass(VkRenderer* r, VkCommandBuffer cmd, + VkDescriptorSet coarseSet, VkFgImage* coarseImg, + VkDescriptorSet fineSet, VkFgImage* fineImg, float minStep) { + struct { int32_t mvW, mvH; float invW, invH, mvScale, minStep, upscale, p2; } mpc; + mpc.mvScale = 1.0f; mpc.minStep = minStep; mpc.p2 = 0.0f; + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipeline); + vkr_image_barrier(cmd, coarseImg->image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, VK_ACCESS_SHADER_WRITE_BIT); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipe_layout, 0, 1, &coarseSet, 0, NULL); + mpc.mvW = (int32_t)coarseImg->width; mpc.mvH = (int32_t)coarseImg->height; + mpc.invW = 1.0f / (float)coarseImg->width; mpc.invH = 1.0f / (float)coarseImg->height; mpc.upscale = 0.0f; + vkCmdPushConstants(cmd, r->pipelines.fg_motion_pipe_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(mpc), &mpc); + vkCmdDispatch(cmd, (coarseImg->width + 7u) / 8u, (coarseImg->height + 7u) / 8u, 1); + vkr_image_barrier(cmd, coarseImg->image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkr_image_barrier(cmd, fineImg->image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, VK_ACCESS_SHADER_WRITE_BIT); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipe_layout, 0, 1, &fineSet, 0, NULL); + mpc.mvW = (int32_t)fineImg->width; mpc.mvH = (int32_t)fineImg->height; + mpc.invW = 1.0f / (float)fineImg->width; mpc.invH = 1.0f / (float)fineImg->height; mpc.upscale = 2.0f; + vkCmdPushConstants(cmd, r->pipelines.fg_motion_pipe_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(mpc), &mpc); + vkCmdDispatch(cmd, (fineImg->width + 7u) / 8u, (fineImg->height + 7u) / 8u, 1); + vkr_image_barrier(cmd, fineImg->image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); +} + +// Content-signature resources for duplicate detection: a tiny blit target + per-slot host buffers. +#define FG_SIG_W 64u +#define FG_SIG_H 36u +static bool fg_create_sig(VkRenderer* r) { + r->fg_sig_supported = false; + r->fg_stage_slot = -1; + VkImageCreateInfo ic = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + ic.imageType = VK_IMAGE_TYPE_2D; ic.format = VK_FORMAT_R8G8B8A8_UNORM; + ic.extent.width = FG_SIG_W; ic.extent.height = FG_SIG_H; ic.extent.depth = 1; + ic.mipLevels = 1; ic.arrayLayers = 1; ic.samples = VK_SAMPLE_COUNT_1_BIT; + ic.tiling = VK_IMAGE_TILING_OPTIMAL; + ic.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + if (vkCreateImage(r->device, &ic, NULL, &r->fg_sig_img) != VK_SUCCESS) return false; + VkMemoryRequirements mr; vkGetImageMemoryRequirements(r->device, r->fg_sig_img, &mr); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; ai.allocationSize = mr.size; + ai.memoryTypeIndex = vkr_find_memory_type(r, mr.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (ai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &ai, NULL, &r->fg_sig_img_mem) != VK_SUCCESS) return false; + vkBindImageMemory(r->device, r->fg_sig_img, r->fg_sig_img_mem, 0); + for (uint32_t i = 0; i < 3; i++) { + VkBufferCreateInfo bc = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bc.size = (VkDeviceSize)FG_SIG_W * FG_SIG_H * 4; bc.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + bc.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + if (vkCreateBuffer(r->device, &bc, NULL, &r->fg_sig_buf[i]) != VK_SUCCESS) return false; + VkMemoryRequirements br; vkGetBufferMemoryRequirements(r->device, r->fg_sig_buf[i], &br); + VkMemoryAllocateInfo bai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; bai.allocationSize = br.size; + bai.memoryTypeIndex = vkr_find_memory_type(r, br.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (bai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &bai, NULL, &r->fg_sig_buf_mem[i]) != VK_SUCCESS) return false; + vkBindBufferMemory(r->device, r->fg_sig_buf[i], r->fg_sig_buf_mem[i], 0); + if (vkMapMemory(r->device, r->fg_sig_buf_mem[i], 0, VK_WHOLE_SIZE, 0, &r->fg_sig_ptr[i]) != VK_SUCCESS) return false; + memset(r->fg_sig_ptr[i], 0, (size_t)FG_SIG_W * FG_SIG_H * 4); + } + r->fg_sig_supported = true; + return true; +} + +static void fg_destroy_sig(VkRenderer* r) { + if (r->fg_sig_img) { vkDestroyImage(r->device, r->fg_sig_img, NULL); r->fg_sig_img = VK_NULL_HANDLE; } + if (r->fg_sig_img_mem) { vkFreeMemory(r->device, r->fg_sig_img_mem, NULL); r->fg_sig_img_mem = VK_NULL_HANDLE; } + for (uint32_t i = 0; i < 3; i++) { + if (r->fg_sig_buf[i]) { vkDestroyBuffer(r->device, r->fg_sig_buf[i], NULL); r->fg_sig_buf[i] = VK_NULL_HANDLE; } + if (r->fg_sig_buf_mem[i]) { vkFreeMemory(r->device, r->fg_sig_buf_mem[i], NULL); r->fg_sig_buf_mem[i] = VK_NULL_HANDLE; } + r->fg_sig_ptr[i] = NULL; + } + r->fg_sig_supported = false; + r->fg_stage_slot = -1; +} + +// Record a downsample of history[slot] into fg_sig_buf[slot] (blit -> tiny image -> host buffer). +// history[slot] is in SHADER_READ_ONLY_OPTIMAL on entry and is restored to it on exit. +static void fg_record_sig(VkRenderer* r, VkCommandBuffer cmd, uint32_t slot) { + if (!r->fg_sig_supported) return; + // history[slot] was just written by the offscreen composite (render-pass final layout + // SHADER_READ_ONLY_OPTIMAL); wait on those colour writes before the transfer read. + vkr_image_barrier(cmd, r->fg_history[slot].image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + vkr_image_barrier(cmd, r->fg_sig_img, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, VK_ACCESS_TRANSFER_WRITE_BIT); + VkImageBlit blit = {0}; + blit.srcSubresource = (VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.srcOffsets[1] = (VkOffset3D){(int32_t)r->fg_history[slot].width, (int32_t)r->fg_history[slot].height, 1}; + blit.dstSubresource = (VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.dstOffsets[1] = (VkOffset3D){(int32_t)FG_SIG_W, (int32_t)FG_SIG_H, 1}; + vkCmdBlitImage(cmd, r->fg_history[slot].image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + r->fg_sig_img, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_LINEAR); + vkr_image_barrier(cmd, r->fg_sig_img, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + VkBufferImageCopy cp = {0}; + cp.imageSubresource = (VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + cp.imageExtent = (VkExtent3D){FG_SIG_W, FG_SIG_H, 1}; + vkCmdCopyImageToBuffer(cmd, r->fg_sig_img, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + r->fg_sig_buf[slot], 1, &cp); + vkr_image_barrier(cmd, r->fg_history[slot].image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); +} + +// Count of signature pixels whose colour moved beyond the noise floor (a duplicate scores 0). +static double fg_sig_delta(VkRenderer* r, uint32_t a, uint32_t b) { + const uint8_t* pa = (const uint8_t*)r->fg_sig_ptr[a]; + const uint8_t* pb = (const uint8_t*)r->fg_sig_ptr[b]; + if (!pa || !pb) return 1e9; + uint32_t n = FG_SIG_W * FG_SIG_H, changed = 0; + for (uint32_t i = 0; i < n; i++) { + int dr = abs((int)pa[i*4+0] - (int)pb[i*4+0]); + int dg = abs((int)pa[i*4+1] - (int)pb[i*4+1]); + int db = abs((int)pa[i*4+2] - (int)pb[i*4+2]); + int m = dr > dg ? dr : dg; if (db > m) m = db; + if (m > 4) changed++; // noise floor: only light dithering is <=4/channel; real motion exceeds it + } + return (double)changed; // 0 == identical re-present; >0 == distinct content frame +} + static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { VkSamplerCreateInfo si = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; si.magFilter = VK_FILTER_LINEAR; si.minFilter = VK_FILTER_LINEAR; @@ -2343,10 +2467,16 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { if (!fg_create_color_target(r, &r->fg_history[1], w, h)) goto fail; if (!fg_create_color_target(r, &r->fg_history[2], w, h)) goto fail; { - uint32_t mw = (w / 2) ? (w / 2) : 1u, mh = (h / 2) ? (h / 2) : 1u; + float fs = r->fg_flow_scale >= 0.2f ? (r->fg_flow_scale <= 1.0f ? r->fg_flow_scale : 1.0f) : 0.5f; + uint32_t mw = (uint32_t)((float)w * fs); if (mw < 1u) mw = 1u; + uint32_t mh = (uint32_t)((float)h * fs); if (mh < 1u) mh = 1u; + uint32_t cw = (mw / 2) ? (mw / 2) : 1u, ch = (mh / 2) ? (mh / 2) : 1u; + r->fg_built_flow_scale = fs; for (uint32_t mi = 0; mi < 3; mi++) { if (!fg_create_motion(r, &r->fg_motion[mi], mw, mh)) goto fail; if (!fg_create_motion(r, &r->fg_motion_fwd[mi], mw, mh)) goto fail; + if (!fg_create_motion(r, &r->fg_coarse[mi], cw, ch)) goto fail; + if (!fg_create_motion(r, &r->fg_coarse_fwd[mi], cw, ch)) goto fail; } } memset(r->fg_slot_fence, 0, sizeof(r->fg_slot_fence)); @@ -2354,34 +2484,40 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { for (uint32_t p = 0; p < 3; p++) { r->fg_motion_set[p] = fg_alloc_set(r, r->pipelines.fg_motion_layout); r->fg_motion_set_fwd[p] = fg_alloc_set(r, r->pipelines.fg_motion_layout); + r->fg_coarse_set[p] = fg_alloc_set(r, r->pipelines.fg_motion_layout); + r->fg_coarse_set_fwd[p] = fg_alloc_set(r, r->pipelines.fg_motion_layout); r->fg_interp_set[p] = fg_alloc_set(r, r->pipelines.fg_interp_layout); r->fg_interp_set_deep[p] = fg_alloc_set(r, r->pipelines.fg_interp_layout); - if (!r->fg_motion_set[p] || !r->fg_motion_set_fwd[p] || !r->fg_interp_set[p] || !r->fg_interp_set_deep[p]) goto fail; + if (!r->fg_motion_set[p] || !r->fg_motion_set_fwd[p] || !r->fg_coarse_set[p] || !r->fg_coarse_set_fwd[p] || !r->fg_interp_set[p] || !r->fg_interp_set_deep[p]) goto fail; VkImageView prevV = r->fg_history[(p + 2u) % 3u].view; // curr=history[p], prev=history[(p+2)%3] VkImageView currV = r->fg_history[p].view; - // motion.comp set: b0 prev (sampled), b1 curr (sampled), b2 motion (storage, GENERAL) - VkDescriptorImageInfo mPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; - VkDescriptorImageInfo mCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; - VkDescriptorImageInfo mMv = { VK_NULL_HANDLE, r->fg_motion[p].view, VK_IMAGE_LAYOUT_GENERAL }; - VkWriteDescriptorSet mw_[3] = {0}; - for (int b = 0; b < 3; b++) { mw_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; mw_[b].dstSet = r->fg_motion_set[p]; mw_[b].dstBinding = (uint32_t)b; mw_[b].descriptorCount = 1; } - mw_[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; mw_[0].pImageInfo = &mPrev; - mw_[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; mw_[1].pImageInfo = &mCurr; - mw_[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; mw_[2].pImageInfo = &mMv; - vkUpdateDescriptorSets(r->device, 3, mw_, 0, NULL); - - // forward motion set: prev/curr SWAPPED so motion.comp emits the prev->curr flow into fg_motion_fwd. - VkDescriptorImageInfo fPrev = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; - VkDescriptorImageInfo fCurr = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; - VkDescriptorImageInfo fMv = { VK_NULL_HANDLE, r->fg_motion_fwd[p].view, VK_IMAGE_LAYOUT_GENERAL }; - VkWriteDescriptorSet fw_[3] = {0}; - for (int b = 0; b < 3; b++) { fw_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; fw_[b].dstSet = r->fg_motion_set_fwd[p]; fw_[b].dstBinding = (uint32_t)b; fw_[b].descriptorCount = 1; } - fw_[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; fw_[0].pImageInfo = &fPrev; - fw_[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; fw_[1].pImageInfo = &fCurr; - fw_[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; fw_[2].pImageInfo = &fMv; - vkUpdateDescriptorSets(r->device, 3, fw_, 0, NULL); + // 4-binding motion set: b0 prev, b1 curr, b2 coarseFlow (sampler; dummy on the coarse pass), + // b3 output (storage). Coarse pass writes fg_coarse; fine pass reads fg_coarse and writes fg_motion. + VkDescriptorImageInfo sPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo sCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + #define FG_MOTION_WRITE(SET, B0, B1, B2VIEW, B2LAYOUT, B3VIEW) do { \ + VkDescriptorImageInfo i0 = { r->fg_sampler, (B0), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; \ + VkDescriptorImageInfo i1 = { r->fg_sampler, (B1), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; \ + VkDescriptorImageInfo i2 = { r->fg_sampler, (B2VIEW), (B2LAYOUT) }; \ + VkDescriptorImageInfo i3 = { VK_NULL_HANDLE, (B3VIEW), VK_IMAGE_LAYOUT_GENERAL }; \ + VkWriteDescriptorSet w_[4] = {0}; \ + for (int b = 0; b < 4; b++) { w_[b].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; w_[b].dstSet = (SET); w_[b].dstBinding = (uint32_t)b; w_[b].descriptorCount = 1; } \ + w_[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; w_[0].pImageInfo = &i0; \ + w_[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; w_[1].pImageInfo = &i1; \ + w_[2].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; w_[2].pImageInfo = &i2; \ + w_[3].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; w_[3].pImageInfo = &i3; \ + vkUpdateDescriptorSets(r->device, 4, w_, 0, NULL); \ + } while (0) + // backward: coarse (prev,curr -> fg_coarse), fine (prev,curr,fg_coarse -> fg_motion) + FG_MOTION_WRITE(r->fg_coarse_set[p], prevV, currV, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_coarse[p].view); + FG_MOTION_WRITE(r->fg_motion_set[p], prevV, currV, r->fg_coarse[p].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_motion[p].view); + // forward: prev/curr swapped, into fg_coarse_fwd / fg_motion_fwd + FG_MOTION_WRITE(r->fg_coarse_set_fwd[p], currV, prevV, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_coarse_fwd[p].view); + FG_MOTION_WRITE(r->fg_motion_set_fwd[p], currV, prevV, r->fg_coarse_fwd[p].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_motion_fwd[p].view); + #undef FG_MOTION_WRITE + (void)sPrev; (void)sCurr; // interpolate.frag set: b0 prev, b1 curr, b2 mvBwd, b3 mvFwd — all sampled (SHADER_READ). // Standard binds fg_motion as the (unread) b3 dummy; deep binds the real forward field. @@ -2404,6 +2540,11 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { vkUpdateDescriptorSets(r->device, 4, dw_, 0, NULL); } + // Content-dedup signature resources (best-effort; if it fails, dedup just stays disabled). + if (!fg_create_sig(r)) { fg_destroy_sig(r); VK_LOGW("FG content-dedup unavailable; running without it"); } + r->fg_stage_slot = -1; + r->fg_last_promote_ns = 0; + r->fg_dims.width = w; r->fg_dims.height = h; r->fg_history_curr = 0; r->fg_history_count = 0; @@ -2424,10 +2565,14 @@ static bool fg_ensure_resources(VkRenderer* r) { if (!r->pipelines_built) return false; if (r->fg_built && r->fg_dims.width == r->swapchain_extent.width - && r->fg_dims.height == r->swapchain_extent.height) { + && r->fg_dims.height == r->swapchain_extent.height + && fabsf(r->fg_built_flow_scale - r->fg_flow_scale) < 1e-4f) { return true; } wait_inflight_frames(r); + // The FG worker may have in-flight GPU work reading these resources (it submits outside the GL + // frame fences), so drain the whole queue before destroying them. Rare path (dims/flowScale change). + if (r->fg_gen_started) { pthread_mutex_lock(&r->queue_mutex); vkQueueWaitIdle(r->graphics_queue); pthread_mutex_unlock(&r->queue_mutex); } fg_destroy_resources(r); return fg_create_resources(r, r->swapchain_extent.width, r->swapchain_extent.height); } @@ -2445,8 +2590,7 @@ static void fg_restore_fence(VkRenderer* r, VkFrame* f) { } } -// Diagnostic cadence counters (render-thread only). Logged ~once/sec to verify FG is producing -// ~2 presents (interp + held real) per engine frame (hold). Cheap; safe to leave in. +// Diagnostic cadence counters (render-thread only). static uint64_t g_fg_holds = 0; static uint64_t g_fg_interp = 0; static uint64_t g_fg_plast = 0; @@ -2456,12 +2600,6 @@ static uint64_t g_fg_dropped = 0; #define FG_PRESENT_LEAD_NS 150000ull // wake this much before the deadline so the present latches this vblank // Advance the present deadline by one target period, then snap it to the panel vsync grid. -// The unsnapped accumulator keeps the average rate exact; the snap places each present on its -// own vblank, spread across the game interval. The snap grid must be anchor + k*refresh (the -// same grid no matter which Choreographer tick last set the anchor) — building it as -// anchor + k*period flips the grid phase with the anchor's tick parity whenever period spans -// more than one refresh, which is what bunched the 2x/3x presents (4x escaped only because -// its period equals one refresh). static uint64_t fg_compute_deadline(VkRenderer* r) { uint64_t period = r->fg_present_period_ns ? r->fg_present_period_ns : r->refresh_duration_ns; if (period == 0) { r->fg_present_deadline_ns = 0; r->fg_present_target_ns = 0; return 0; } @@ -2473,11 +2611,7 @@ static uint64_t fg_compute_deadline(VkRenderer* r) { uint64_t target = deadline; uint64_t vs = r->fg_display_period_ns ? r->fg_display_period_ns : r->refresh_duration_ns; uint64_t anchor = r->fg_vsync_anchor_ns; - // Snap only while the panel carries the target rate (vsync period <= present period). When - // an idle/power policy has dropped the panel below the target, snapping would quantize the - // presents down to the slow grid and SurfaceFlinger would never see the true content rate - // to ramp back up — keep presenting at the unsnapped target cadence instead (MAILBOX drops - // the surplus until the panel recovers). + // Snap only while the panel carries the target rate (vsync period <= present period). if (vs != 0 && anchor != 0 && deadline > anchor && vs <= period + period / 8u) { target = anchor + ((deadline - anchor + vs / 2u) / vs) * vs; if (target <= r->fg_present_target_ns) target = r->fg_present_target_ns + vs; // one present per vblank @@ -2553,16 +2687,44 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { destroy_graveyard_textures(r, dead, dead_count); SceneTargets st = manage_scene_targets(r, &snap); - uint32_t next = (r->fg_history_curr + 1u) % 3u; - VkFgImage* hist = &r->fg_history[next]; - if (r->fg_slot_fence[next] != VK_NULL_HANDLE) - vkWaitForFences(r->device, 1, &r->fg_slot_fence[next], VK_TRUE, UINT64_MAX); + // Content-dedup step 1: resolve the previous staged frame (deferred promote). + bool promoted = false; + if (r->fg_sig_supported && r->fg_stage_slot >= 0) { + uint32_t sslot = (uint32_t)r->fg_stage_slot; + if (r->fg_slot_fence[sslot] != VK_NULL_HANDLE) + vkWaitForFences(r->device, 1, &r->fg_slot_fence[sslot], VK_TRUE, UINT64_MAX); + double delta = fg_sig_delta(r, sslot, r->fg_history_curr); // = # signature pixels that moved + r->fg_last_sig_delta = delta; + uint64_t nowp = now_monotonic_ns(); + bool backstop = (r->fg_last_promote_ns != 0) && (nowp - r->fg_last_promote_ns > 100000000ull); + if (delta > 0.0 || r->fg_history_count < 2u || backstop) { + r->fg_history_curr = sslot; + if (r->fg_history_count < 3) r->fg_history_count++; + r->fg_motion_valid = false; r->fg_motion_fwd_valid = false; + r->fg_last_promote_ns = nowp; + r->fg_promote_ns = nowp; + r->fg_distinct++; + r->fg_promote_seq++; // worker jobs snapshot this to drop a present whose pair was reused + promoted = true; + } else { + r->fg_dup_dropped++; + } + r->fg_stage_slot = -1; + } + + // --- Step 2: composite the incoming frame into a staging slot (neither curr nor prev). ------- + // When dedup is unavailable, fall back to the original behavior (advance every HOLD). + uint32_t stage = (r->fg_history_curr + 1u) % 3u; + VkFgImage* hist = &r->fg_history[stage]; + if (r->fg_slot_fence[stage] != VK_NULL_HANDLE) + vkWaitForFences(r->device, 1, &r->fg_slot_fence[stage], VK_TRUE, UINT64_MAX); vkResetFences(r->device, 1, &f->in_flight); vkBeginCommandBuffer(f->cmd, &bi); record_scene_chain(r, f->cmd, &snap, st.has_effects, st.wants_sgsr1, r->pipelines.offscreen_pass, hist->framebuffer, hist->width, hist->height, true); + fg_record_sig(r, f->cmd, stage); vkEndCommandBuffer(f->cmd); VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; @@ -2576,11 +2738,17 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { pthread_mutex_unlock(&r->render_mutex); return false; } - r->fg_history_curr = next; - r->fg_slot_fence[next] = f->in_flight; - if (r->fg_history_count < 3) r->fg_history_count++; - r->fg_motion_valid = false; // new history pair — flow must be recomputed on the next interp - r->fg_motion_fwd_valid = false; + r->fg_slot_fence[stage] = f->in_flight; + if (r->fg_sig_supported) { + r->fg_stage_slot = (int32_t)stage; // pending; promoted on the next HOLD + } else { + r->fg_history_curr = stage; // no dedup: behave as before (advance every HOLD) + if (r->fg_history_count < 3) r->fg_history_count++; + r->fg_motion_valid = false; r->fg_motion_fwd_valid = false; + r->fg_promote_ns = now_monotonic_ns(); + promoted = true; + } + if (promoted) r->fg_promote_count++; g_fg_holds++; pthread_mutex_unlock(&r->render_mutex); r->frame_index = (r->frame_index + 1) % vkr_active_fif(r); @@ -2633,10 +2801,7 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { uint32_t prev_idx = (parity + 2u) % 3u; VkFgImage* curr = &r->fg_history[curr_idx]; - // Advance the vsync-aligned present deadline for the pacer (fg_sleep_to_deadline). The interp phase - // is left as the cadence's k/(N+1): an even interior position, never a real-frame endpoint. A - // deadline-derived phase was tried and removed — the deadline grid (vsync clock) and the game-frame - // arrivals (present clock) aren't phase-locked, so it injected a constant per-slot bias. + // Advance the vsync-aligned present deadline for the pacer (fg_sleep_to_deadline). fg_compute_deadline(r); if (do_interp) { if (r->fg_curr_arrival_ns != r->fg_dbg_last_curr) { @@ -2653,14 +2818,8 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { if (do_interp) { VkFgImage* prev = &r->fg_history[prev_idx]; // First interp of each pair recomputes the flow in-path (lazy); later interps reuse it. - // Make the HOLD color writes visible to the reads below (compute when recomputing, else the - // fragment interp draw). bool compute_bwd = !r->fg_motion_valid; - // Forward flow (Quality): on the first interp whose phase reaches mid-pair. At 3x/4x that is - // the pair's second interp, spreading the two searches across two presents so no single - // present carries both under tight 90/120Hz budgets. At 2x there is only one interp - // (phase 0.5), so both searches share its command buffer — affordable in a 60Hz slot. - bool compute_fwd = deep && !r->fg_extrapolate && !r->fg_motion_fwd_valid && phase >= 0.45f; + bool compute_fwd = deep && !r->fg_extrapolate && !r->fg_motion_fwd_valid; bool any_compute = compute_bwd || compute_fwd; VkPipelineStageFlags hist_dst = any_compute ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; @@ -2672,27 +2831,10 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - struct { int32_t mvW, mvH; float invW, invH, mvScale, minStep, p1, p2; } mpc; - mpc.mvW = (int32_t)r->fg_motion[parity].width; mpc.mvH = (int32_t)r->fg_motion[parity].height; - mpc.invW = 1.0f / (float)r->fg_motion[parity].width; - mpc.invH = 1.0f / (float)r->fg_motion[parity].height; - mpc.mvScale = 1.0f; mpc.minStep = (float)r->fg_min_step; mpc.p1 = mpc.p2 = 0.0f; if (compute_bwd) { // Backward flow (curr->prev) -> fg_motion[parity], 1st interp of the pair (both modes). - vkr_image_barrier(f->cmd, r->fg_motion[parity].image, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, VK_ACCESS_SHADER_WRITE_BIT); - vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipeline); - vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->pipelines.fg_motion_pipe_layout, 0, 1, &r->fg_motion_set[parity], 0, NULL); - vkCmdPushConstants(f->cmd, r->pipelines.fg_motion_pipe_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(mpc), &mpc); - vkCmdDispatch(f->cmd, (r->fg_motion[parity].width + 7u) / 8u, (r->fg_motion[parity].height + 7u) / 8u, 1); - vkr_image_barrier(f->cmd, r->fg_motion[parity].image, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + fg_motion_pass(r, f->cmd, r->fg_coarse_set[parity], &r->fg_coarse[parity], + r->fg_motion_set[parity], &r->fg_motion[parity], (float)r->fg_min_step); r->fg_motion_valid = true; } else { // Backward flow reused (later interps of the pair). Re-establish compute-write -> fragment-read. @@ -2703,20 +2845,8 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { } if (compute_fwd) { // Quality forward flow (prev->curr) -> fg_motion_fwd[parity]. - vkr_image_barrier(f->cmd, r->fg_motion_fwd[parity].image, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, VK_ACCESS_SHADER_WRITE_BIT); - vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->pipelines.fg_motion_pipeline); - vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->pipelines.fg_motion_pipe_layout, 0, 1, &r->fg_motion_set_fwd[parity], 0, NULL); - vkCmdPushConstants(f->cmd, r->pipelines.fg_motion_pipe_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(mpc), &mpc); - vkCmdDispatch(f->cmd, (r->fg_motion_fwd[parity].width + 7u) / 8u, (r->fg_motion_fwd[parity].height + 7u) / 8u, 1); - vkr_image_barrier(f->cmd, r->fg_motion_fwd[parity].image, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + fg_motion_pass(r, f->cmd, r->fg_coarse_set_fwd[parity], &r->fg_coarse_fwd[parity], + r->fg_motion_set_fwd[parity], &r->fg_motion_fwd[parity], (float)r->fg_min_step); r->fg_motion_fwd_valid = true; } else if (deep && r->fg_motion_fwd_valid) { // Forward flow reused. Re-establish its compute-write -> fragment-read dep. @@ -2743,15 +2873,15 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { if (do_interp) { vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, r->pipelines.fg_interp_pipeline); - // Bidirectional only once the forward flow is ready (2nd interp onward); the 1st interp of a - // Quality pair runs single-direction (like Standard) while the forward search is still pending. - // Extrapolation overrides both: single-image forward warp along the backward flow (mode 2). bool use_fwd = deep && r->fg_motion_fwd_valid && !r->fg_extrapolate; vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, r->pipelines.fg_interp_pipe_layout, 0, 1, use_fwd ? &r->fg_interp_set_deep[parity] : &r->fg_interp_set[parity], 0, NULL); struct { float resW, resH, phase, occLo, occHi, mode; } ipc; - ipc.resW = (float)r->swapchain_extent.width; ipc.resH = (float)r->swapchain_extent.height; + // interp norm = 2/resolution must equal 1/flow_field_res so warp magnitude is correct at any + // flowScale (reduces to swapchain res at the 0.5 default). Pass 2*field_res. + ipc.resW = 2.0f * (float)r->fg_motion[parity].width; + ipc.resH = 2.0f * (float)r->fg_motion[parity].height; ipc.phase = phase; ipc.occLo = r->fg_occ_lo; ipc.occHi = r->fg_occ_hi; ipc.mode = r->fg_extrapolate ? 2.0f : (use_fwd ? 1.0f : 0.0f); vkCmdPushConstants(f->cmd, r->pipelines.fg_interp_pipe_layout, VK_SHADER_STAGE_FRAGMENT_BIT, @@ -2842,18 +2972,282 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { return true; } +// ============================================================ +// FG generation worker (consumer): owns the swapchain present path while FG is on +// ============================================================ + +// Pace to a specific job deadline (no-op under FIFO, where the blocking acquire already vsync-paces). +static void fg_sleep_to(VkRenderer* r, uint64_t deadline) { + (void)r; + if (deadline == 0) return; + uint64_t target = deadline > FG_PRESENT_LEAD_NS ? deadline - FG_PRESENT_LEAD_NS : deadline; + struct timespec ts; + ts.tv_sec = (time_t)(target / 1000000000ull); + ts.tv_nsec = (long)(target % 1000000000ull); + while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, NULL) == EINTR) {} +} + +// Swapchain recreate from the worker (only the worker touches the swapchain while FG is on). Takes +// render_mutex because it tears down fg_history, which the GL HOLD writes. +static void fg_worker_recreate(VkRenderer* r) { + pthread_mutex_lock(&r->render_mutex); + r->surface_ready = false; + pthread_mutex_lock(&r->queue_mutex); vkQueueWaitIdle(r->graphics_queue); pthread_mutex_unlock(&r->queue_mutex); + fg_destroy_resources(r); + destroy_swapchain_resources(r); + r->surface_ready = create_swapchain(r, r->surface_extent.width, r->surface_extent.height); + r->fg_swapchain_gen++; + pthread_mutex_unlock(&r->render_mutex); +} + +// Run ONE queued job: acquire, record flow+generate (or present_last blit), submit, pace, present. +static void fg_worker_present(VkRenderer* r, const FgJob* job) { + if (!r->surface_ready || !r->swapchain || r->swapchain_image_count == 0 + || r->swapchain_extent.width == 0u || !r->pipelines_built) { g_fg_dropped++; return; } + + VkFrame* f = &r->fg_worker_frames[r->fg_worker_index]; + r->fg_worker_index = (r->fg_worker_index + 1u) % 3u; + if (f->in_flight) vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); + + bool want_interp = (job->mode == FG_MODE_INTERP); + // Bounded (not UINT64_MAX) so the worker re-checks fg_gen_running ~10x/s and the stop-join never hangs + // on a surface that stopped releasing images. Interp in a non-blocking mode drops immediately. + uint64_t acq_timeout = (want_interp && r->active_present_mode != VK_PRESENT_MODE_FIFO_KHR) ? 0u : 100000000ull; + uint32_t image_index = 0; + VkResult acq = vkAcquireNextImageKHR(r->device, r->swapchain, acq_timeout, + f->image_available, VK_NULL_HANDLE, &image_index); + if (acq == VK_NOT_READY || acq == VK_TIMEOUT) { g_fg_dropped++; return; } + if (acq == VK_ERROR_OUT_OF_DATE_KHR) { fg_worker_recreate(r); return; } + bool recreate_after_present = (acq == VK_SUBOPTIMAL_KHR) && !r->ignore_suboptimal; + if (acq != VK_SUCCESS && acq != VK_SUBOPTIMAL_KHR) { VK_LOGE("fg-gen acquire -> %d", acq); g_fg_dropped++; return; } + + VkSemaphore render_finished = r->swapchain_render_finished[image_index]; + VkCommandBufferBeginInfo bi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + pthread_mutex_lock(&r->render_mutex); + if (!r->fg_built) { pthread_mutex_unlock(&r->render_mutex); g_fg_dropped++; return; } + // A job whose pair was reused by 2+ newer promotes falls back to present_last of the LIVE newest + // frame (never drop the acquired image — that would strand its semaphore). + bool stale = (uint32_t)(r->fg_promote_seq - job->seq) >= 2u; + bool do_interp = want_interp && !stale && r->fg_history_count >= 2u; + uint32_t curr_idx = do_interp ? job->curr_idx : r->fg_history_curr; + uint32_t prev_idx = do_interp ? job->prev_idx : ((r->fg_history_curr + 2u) % 3u); + bool deep = job->deep && do_interp; + uint32_t parity = curr_idx; + VkFgImage* curr = &r->fg_history[curr_idx]; + + vkResetFences(r->device, 1, &f->in_flight); + vkBeginCommandBuffer(f->cmd, &bi); + + if (do_interp) { + VkFgImage* prev = &r->fg_history[prev_idx]; + bool compute_fwd = deep && !r->fg_extrapolate; // worker recomputes flow each generated frame + VkPipelineStageFlags hist_dst = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + vkr_image_barrier(f->cmd, prev->image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkr_image_barrier(f->cmd, curr->image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + fg_motion_pass(r, f->cmd, r->fg_coarse_set[parity], &r->fg_coarse[parity], + r->fg_motion_set[parity], &r->fg_motion[parity], (float)r->fg_min_step); + if (compute_fwd) { + fg_motion_pass(r, f->cmd, r->fg_coarse_set_fwd[parity], &r->fg_coarse_fwd[parity], + r->fg_motion_set_fwd[parity], &r->fg_motion_fwd[parity], (float)r->fg_min_step); + } + } + + VkClearValue clear = {0}; + clear.color.float32[3] = 1.0f; + VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO}; + rp.renderPass = r->pipelines.swapchain_pass; + rp.framebuffer = r->swapchain_framebuffers[image_index]; + rp.renderArea.extent = r->swapchain_extent; + rp.clearValueCount = 1; rp.pClearValues = &clear; + vkCmdBeginRenderPass(f->cmd, &rp, VK_SUBPASS_CONTENTS_INLINE); + VkViewport vp = {0, 0, (float)r->swapchain_extent.width, (float)r->swapchain_extent.height, 0.0f, 1.0f}; + VkRect2D scis = {{0, 0}, r->swapchain_extent}; + vkCmdSetViewport(f->cmd, 0, 1, &vp); + vkCmdSetScissor(f->cmd, 0, 1, &scis); + if (do_interp) { + vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, r->pipelines.fg_interp_pipeline); + bool use_fwd = deep && !r->fg_extrapolate; + vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + r->pipelines.fg_interp_pipe_layout, 0, 1, + use_fwd ? &r->fg_interp_set_deep[parity] : &r->fg_interp_set[parity], 0, NULL); + struct { float resW, resH, phase, occLo, occHi, mode; } ipc; + ipc.resW = 2.0f * (float)r->fg_motion[parity].width; + ipc.resH = 2.0f * (float)r->fg_motion[parity].height; + ipc.phase = job->phase; ipc.occLo = r->fg_occ_lo; ipc.occHi = r->fg_occ_hi; + ipc.mode = r->fg_extrapolate ? 2.0f : (use_fwd ? 1.0f : 0.0f); + vkCmdPushConstants(f->cmd, r->pipelines.fg_interp_pipe_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + 0, sizeof(ipc), &ipc); + vkCmdDraw(f->cmd, 3, 1, 0, 0); + } else { + vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, r->pipelines.blit_pipeline); + vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + r->pipelines.effect_layout, 0, 1, &curr->blit_set, 0, NULL); + vkCmdDraw(f->cmd, 3, 1, 0, 0); + } + vkCmdEndRenderPass(f->cmd); + vkEndCommandBuffer(f->cmd); + + VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + si.waitSemaphoreCount = 1; si.pWaitSemaphores = &f->image_available; + si.pWaitDstStageMask = &wait_stage; + si.commandBufferCount = 1; si.pCommandBuffers = &f->cmd; + si.signalSemaphoreCount = 1; si.pSignalSemaphores = &render_finished; + pthread_mutex_lock(&r->queue_mutex); + VkResult sr = vkQueueSubmit(r->graphics_queue, 1, &si, f->in_flight); + pthread_mutex_unlock(&r->queue_mutex); + if (sr != VK_SUCCESS) { + VK_LOGE("fg-gen submit -> %d", sr); + vkDestroyFence(r->device, f->in_flight, NULL); // submit didn't signal; re-arm signaled + VkFenceCreateInfo rfi = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; rfi.flags = VK_FENCE_CREATE_SIGNALED_BIT; + vkCreateFence(r->device, &rfi, NULL, &f->in_flight); + pthread_mutex_unlock(&r->render_mutex); + return; + } + r->fg_slot_fence[curr_idx] = f->in_flight; + if (do_interp) r->fg_slot_fence[prev_idx] = f->in_flight; + VkSwapchainKHR swapchain = r->swapchain; + pthread_mutex_unlock(&r->render_mutex); + + fg_sleep_to(r, job->deadline_ns); + + VkPresentInfoKHR pinfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR}; + pinfo.waitSemaphoreCount = 1; pinfo.pWaitSemaphores = &render_finished; + pinfo.swapchainCount = 1; pinfo.pSwapchains = &swapchain; pinfo.pImageIndices = &image_index; + VkPresentTimeGOOGLE ptg; VkPresentTimesInfoGOOGLE pti; + if (r->ext_display_timing) { + ptg.presentID = ++r->fg_present_id; ptg.desiredPresentTime = 0; + pti.sType = VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE; + pti.pNext = NULL; pti.swapchainCount = 1; pti.pTimes = &ptg; + pinfo.pNext = &pti; + } + pthread_mutex_lock(&r->queue_mutex); + VkResult pr = vkQueuePresentKHR(r->graphics_queue, &pinfo); + if (pr == VK_SUCCESS || pr == VK_SUBOPTIMAL_KHR) { + r->fg_present_count++; + if (do_interp) g_fg_interp++; else g_fg_plast++; + fg_collect_present_timing(r); + if (((g_fg_interp + g_fg_plast) % 120u) == 0u) { + double mean = r->fg_t_count ? r->fg_t_sum_ms / r->fg_t_count : 0.0; + double var = r->fg_t_count ? r->fg_t_sumsq_ms / r->fg_t_count - mean * mean : 0.0; + double sd = var > 0.0 ? sqrt(var) : 0.0; + VK_LOGI("FG cadence: holds=%llu interp=%llu presentLast=%llu dropped=%llu presents=%llu", + (unsigned long long)g_fg_holds, (unsigned long long)g_fg_interp, + (unsigned long long)g_fg_plast, (unsigned long long)g_fg_dropped, + (unsigned long long)r->fg_present_count); + VK_LOGI("FG timing: n=%u mean=%.2fms cov=%.0f%% min=%.2f max=%.2f [%s]", + r->fg_t_count, mean, mean > 0.0 ? 100.0 * sd / mean : 0.0, + r->fg_t_count ? r->fg_t_min_ms : 0.0, r->fg_t_count ? r->fg_t_max_ms : 0.0, + r->fg_deep_mode ? "quality" : "standard"); + VK_LOGI("FG fence-wait: n=%u mean=%.3fms max=%.3fms (GL-thread block on in_flight before present)", + r->fg_fw_n, r->fg_fw_n ? r->fg_fw_sum_ms / r->fg_fw_n : 0.0, r->fg_fw_max_ms); + r->fg_fw_sum_ms = 0.0; r->fg_fw_max_ms = 0.0; r->fg_fw_n = 0; + r->fg_t_count = 0; r->fg_t_sum_ms = 0.0; r->fg_t_sumsq_ms = 0.0; + } + } + pthread_mutex_unlock(&r->queue_mutex); + + if (recreate_after_present || pr == VK_ERROR_OUT_OF_DATE_KHR + || (pr == VK_SUBOPTIMAL_KHR && !r->ignore_suboptimal)) { + fg_worker_recreate(r); + } +} + +// Producer (GL thread): snapshot the cadence decision into a job + enqueue + wake the worker. O(1). +static void fg_enqueue(VkRenderer* r, uint8_t mode, float phase) { + pthread_mutex_lock(&r->render_mutex); + if (!r->fg_gen_started) { pthread_mutex_unlock(&r->render_mutex); return; } // re-check UNDER the lock: stop sets it false here too, so no sem_post races sem_destroy + uint32_t curr = r->fg_history_curr; + FgJob job; + job.mode = mode; + job.deep = (r->fg_deep_mode && r->fg_history_count >= 2u) ? 1u : 0u; + job.phase = phase; + job.curr_idx = curr; + job.prev_idx = (curr + 2u) % 3u; + job.seq = r->fg_promote_seq; + fg_compute_deadline(r); + job.deadline_ns = r->fg_present_target_ns ? r->fg_present_target_ns : r->fg_present_deadline_ns; + uint32_t tail = r->fg_job_tail; + uint32_t next = (tail + 1u) % FG_JOB_RING; + if (next == r->fg_job_head) { // ring full: worker is behind -> drop (emit fewer in-betweens) + pthread_mutex_unlock(&r->render_mutex); + g_fg_dropped++; + return; + } + r->fg_job_ring[tail] = job; + __atomic_store_n(&r->fg_job_tail, next, __ATOMIC_RELEASE); + sem_post(&r->fg_gen_sem); // sem_post INSIDE the lock — serialized with fg_worker_stop's sem_destroy + pthread_mutex_unlock(&r->render_mutex); +} + +static void* fg_gen_loop(void* arg) { + VkRenderer* r = (VkRenderer*)arg; + prctl(PR_SET_NAME, "fg-gen", 0, 0, 0); + setpriority(PRIO_PROCESS, 0, -8); + while (r->fg_gen_running) { + sem_wait(&r->fg_gen_sem); + if (!r->fg_gen_running) break; + uint32_t head = r->fg_job_head; + if (head == __atomic_load_n(&r->fg_job_tail, __ATOMIC_ACQUIRE)) continue; // nothing (spurious/drain) + FgJob job = r->fg_job_ring[head]; + __atomic_store_n(&r->fg_job_head, (head + 1u) % FG_JOB_RING, __ATOMIC_RELEASE); + // drop-late: skip an INTERP whose deadline already passed by >1 vsync; never drop a PRESENT_LAST. + uint64_t period = r->fg_display_period_ns ? r->fg_display_period_ns : 16666667ull; + if (job.mode == FG_MODE_INTERP && job.deadline_ns != 0u + && now_monotonic_ns() > job.deadline_ns + period) { g_fg_dropped++; continue; } + fg_worker_present(r, &job); + } + return NULL; +} + +static void fg_worker_start(VkRenderer* r) { + if (r->fg_gen_started) return; + if (!fg_worker_create_resources(r)) { VK_LOGE("fg-gen worker resource create failed"); return; } + sem_init(&r->fg_gen_sem, 0, 0); + r->fg_job_head = r->fg_job_tail = 0; + r->fg_gen_running = 1; + if (pthread_create(&r->fg_gen_thread, NULL, fg_gen_loop, r) != 0) { + r->fg_gen_running = 0; sem_destroy(&r->fg_gen_sem); + fg_worker_destroy_resources(r); + VK_LOGE("fg-gen pthread_create failed"); + return; + } + r->fg_gen_started = true; + VK_LOGI("fg-gen worker started"); +} + +static void fg_worker_stop(VkRenderer* r) { + if (!r->fg_gen_started) return; + pthread_mutex_lock(&r->render_mutex); // serialize with fg_enqueue: once this is false (under the + r->fg_gen_started = false; // lock) no future fg_enqueue will sem_post, and any in-flight + r->fg_gen_running = 0; // one already finished its sem_post before we got the lock. + pthread_mutex_unlock(&r->render_mutex); + sem_post(&r->fg_gen_sem); // wake the worker (semaphore still valid here) + pthread_join(r->fg_gen_thread, NULL); + sem_destroy(&r->fg_gen_sem); // safe: no producer can sem_post this anymore + pthread_mutex_lock(&r->queue_mutex); vkQueueWaitIdle(r->graphics_queue); pthread_mutex_unlock(&r->queue_mutex); + fg_worker_destroy_resources(r); + VK_LOGI("fg-gen worker stopped"); +} + // ============================================================ // JNI entry points // ============================================================ #define JNI_FN(name) Java_com_winlator_cmod_runtime_display_renderer_VulkanRenderer_##name -// ===== Native FG pump ================================================================= -// A native pthread running its own ALooper + AChoreographer: no Java Looper/HandlerThread -// lifecycle to lose, vsync-accurate. Each vsync it calls back into -// VulkanRenderer.fgPumpTickFromNative(frameTimeNanos); the present stays on the GL thread. +// Native FG pump: a pthread running its own ALooper + AChoreographer; each vsync it calls +// back into VulkanRenderer.fgPumpTickFromNative(frameTimeNanos). static JavaVM* g_pump_jvm = NULL; -static jobject g_pump_renderer = NULL; // global ref to the VulkanRenderer instance +static jobject g_pump_renderer = NULL; static jmethodID g_pump_tick = NULL; static pthread_t g_pump_thread; static volatile int g_pump_running = 0; @@ -2925,6 +3319,7 @@ JNIEXPORT jlong JNICALL JNI_FN(nativeCreate)(JNIEnv* env, jclass clazz, r->fg_occ_lo = 0.06f; r->fg_occ_hi = 0.25f; r->fg_min_step = 1; + r->fg_flow_scale = 0.5f; // default = legacy half-res flow; presets override (Eco 0.2 .. Max 0.8) r->validation_enabled = (enableValidationLayers == JNI_TRUE); pthread_mutex_init(&r->scene_mutex, NULL); pthread_mutex_init(&r->queue_mutex, NULL); @@ -2987,6 +3382,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeDestroy)(JNIEnv* env, jclass clazz, jlong ha VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r) return; + fg_worker_stop(r); // join the worker before any device/swapchain teardown if (r->device) vkDeviceWaitIdle(r->device); // Drain any in-flight uploads and tear down the staging pool before destroying images. @@ -3042,9 +3438,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeDestroy)(JNIEnv* env, jclass clazz, jlong ha free(r); } -// Lifecycle helper: take render_mutex (waits for any in-flight render to finish), then -// briefly take scene_mutex to clear surface_ready so producers see a consistent state. -// Returns with only render_mutex held; caller must release it. +// Take render_mutex, then briefly scene_mutex to clear surface_ready. Returns holding render_mutex. static void lifecycle_begin(VkRenderer* r) { pthread_mutex_lock(&r->render_mutex); pthread_mutex_lock(&r->scene_mutex); @@ -3057,6 +3451,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeSurfaceCreated)(JNIEnv* env, jclass clazz, j VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r) return; + fg_worker_stop(r); // worker restarts on the next nativeSurfaceChanged if FG is on lifecycle_begin(r); if (r->surface) { @@ -3096,9 +3491,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeSurfaceCreated)(JNIEnv* env, jclass clazz, j VK_LOGE("Selected queue family does not support presentation"); } - // Wait for SurfaceHolder.surfaceChanged() before creating the swapchain. On Android - // the surface can be created while the activity is still completing a rotation, so - // creating it here can lock in stale portrait dimensions for a landscape launch. + // Wait for SurfaceHolder.surfaceChanged() before creating the swapchain (rotation may be mid-flight). pthread_mutex_unlock(&r->render_mutex); } @@ -3107,12 +3500,16 @@ JNIEXPORT void JNICALL JNI_FN(nativeSurfaceChanged)(JNIEnv* env, jclass clazz, j VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r || !r->surface) return; + bool fg_was = r->fg_enabled; + fg_worker_stop(r); // worker owns the swapchain while FG is on — stop it BEFORE touching it (no render_mutex held) + lifecycle_begin(r); vkDeviceWaitIdle(r->device); destroy_sgsr1_resources(r); destroy_offscreen(r); destroy_swapchain(r); - if (!create_swapchain(r, (uint32_t)w, (uint32_t)h)) { + bool ok = create_swapchain(r, (uint32_t)w, (uint32_t)h); + if (!ok) { VK_LOGE("Swapchain re-create failed in nativeSurfaceChanged"); } else { pthread_mutex_lock(&r->scene_mutex); @@ -3120,6 +3517,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeSurfaceChanged)(JNIEnv* env, jclass clazz, j pthread_mutex_unlock(&r->scene_mutex); } pthread_mutex_unlock(&r->render_mutex); + if (fg_was && ok) fg_worker_start(r); } JNIEXPORT void JNICALL JNI_FN(nativeSurfaceDestroyed)(JNIEnv* env, jclass clazz, jlong handle) { @@ -3127,6 +3525,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeSurfaceDestroyed)(JNIEnv* env, jclass clazz, VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r) return; + fg_worker_stop(r); // stop before the swapchain it owns is destroyed lifecycle_begin(r); if (r->device) vkDeviceWaitIdle(r->device); @@ -3157,8 +3556,13 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGeneration)(JNIEnv* env, jclass claz (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r) return; - r->fg_enabled = (enabled == JNI_TRUE); - VK_LOGI("Frame generation %s (fp16=%d)", r->fg_enabled ? "ENABLED" : "disabled", r->fg_float16_supported); + bool was = r->fg_enabled; + bool now = (enabled == JNI_TRUE); + if (now == was) return; + r->fg_enabled = now; + if (now) fg_worker_start(r); + else fg_worker_stop(r); + VK_LOGI("Frame generation %s (fp16=%d)", now ? "ENABLED" : "disabled", r->fg_float16_supported); } // Present mode actually in use (Java convention: 0 FIFO, 1 MAILBOX, 2 IMMEDIATE). FG uses this to @@ -3201,20 +3605,38 @@ JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderHold)(JNIEnv* env, jclass clazz, j return fg_submit(r, FG_MODE_HOLD, 0.5f) ? JNI_TRUE : JNI_FALSE; } +// Content-dedup telemetry for the Java scheduler: out[0]=promote count, out[1]=last promote time (ns), +// out[2]=duplicates dropped, out[3]=distinct total. +JNIEXPORT void JNICALL JNI_FN(nativeFgPromoteInfo)(JNIEnv* env, jclass clazz, jlong handle, jlongArray out) { + (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r || !out) return; + jlong vals[4]; + pthread_mutex_lock(&r->render_mutex); + vals[0] = (jlong)r->fg_promote_count; + vals[1] = (jlong)r->fg_promote_ns; + vals[2] = (jlong)r->fg_dup_dropped; + vals[3] = (jlong)r->fg_distinct; + pthread_mutex_unlock(&r->render_mutex); + (*env)->SetLongArrayRegion(env, out, 0, 4, vals); +} + JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderInterp)(JNIEnv* env, jclass clazz, jlong handle, jfloat phase, jlong prevNs, jlong currNs) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r || !r->surface_ready) return JNI_FALSE; r->fg_prev_arrival_ns = prevNs > 0 ? (uint64_t)prevNs : 0; r->fg_curr_arrival_ns = currNs > 0 ? (uint64_t)currNs : 0; - return fg_submit(r, FG_MODE_INTERP, (float)phase) ? JNI_TRUE : JNI_FALSE; + fg_enqueue(r, FG_MODE_INTERP, (float)phase); // O(1) producer; the worker generates+presents + return JNI_TRUE; } JNIEXPORT jboolean JNICALL JNI_FN(nativePresentLast)(JNIEnv* env, jclass clazz, jlong handle) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r || !r->surface_ready) return JNI_FALSE; - return fg_submit(r, FG_MODE_PRESENT_LAST, 0.5f) ? JNI_TRUE : JNI_FALSE; + fg_enqueue(r, FG_MODE_PRESENT_LAST, 0.5f); + return JNI_TRUE; } // Live FG knobs: occLo/occHi = interp consistency window (smoothness), minStep = motion search floor. @@ -3223,11 +3645,24 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenParams)(JNIEnv* env, jclass clazz (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r) return; - float lo = occLo > 0.0f ? occLo : 0.06f; + // occLo now carries the preset's model flag (0 standard, 1 steadier) through to interpolate.frag. + float lo = occLo < 0.0f ? 0.0f : (occLo > 1.0f ? 1.0f : occLo); float hi = occHi > lo ? occHi : (lo + 0.1f); r->fg_occ_lo = lo; r->fg_occ_hi = hi; r->fg_min_step = minStep < 1 ? 1 : (minStep > 8 ? 8 : minStep); + VK_LOGI("FG params set: model(occLo)=%.2f minStep(quality)=%d", lo, r->fg_min_step); +} + +// Preset flow-resolution dial [0.2,1.0]. Sets the desired scale; the render thread rebuilds the +// motion fields (fg_ensure_resources) when it differs from the built value. NOT a per-frame call. +JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenFlowScale)(JNIEnv* env, jclass clazz, jlong handle, jfloat flowScale) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return; + float fs = flowScale < 0.2f ? 0.2f : (flowScale > 1.0f ? 1.0f : flowScale); + r->fg_flow_scale = fs; + VK_LOGI("FG flowScale set: %.2f (motion-field rebuild pending)", fs); } JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenDeepMode)(JNIEnv* env, jclass clazz, jlong handle, jboolean deep) { @@ -3280,9 +3715,6 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetVsyncTiming)(JNIEnv* env, jclass clazz, j } // Scene byte buffer layout (must mirror VulkanRenderer.java offsets). Native-endian, packed. -// Using a single direct ByteBuffer instead of 6 separate jarray params avoids per-frame JNI -// critical regions (each ~3-8µs on ART) and the temporary array shadow allocations they -// trigger. #define SCENE_OFF_CURSOR_HANDLE 0 #define SCENE_OFF_WINDOW_HANDLES 8 /* int64 × VK_MAX_RENDERABLE_WINDOWS */ #define SCENE_OFF_WINDOW_COUNT 520 @@ -3313,8 +3745,6 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetScene)(JNIEnv* env, jclass clazz, jlong h const uint8_t* base = (const uint8_t*)(*env)->GetDirectBufferAddress(env, sceneBuf); if (!base) return; - // Defensive: a future Java-side layout change with a stale SCENE_BUF_SIZE would silently - // read past the buffer here. GetDirectBufferCapacity is one JNI call; cheap insurance. jlong cap = (*env)->GetDirectBufferCapacity(env, sceneBuf); if (cap < SCENE_BUF_SIZE) { VK_LOGE("nativeSetScene: scene buffer too small (%lld < %d)", @@ -3425,17 +3855,12 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetScene)(JNIEnv* env, jclass clazz, jlong h // here needs to touch swapchain-tied resources. } -// FPS pacing is enforced on the X dispatch thread (XClient.enforceAbsoluteFramerate) and by -// the swapchain present mode + Choreographer-coalesced render requests. The compositor used -// to run its own sleep+busy-spin here too, which duplicated the pacing and burned CPU; this -// entry point is kept as a no-op for Java-side ABI compatibility. +// No-op, kept for Java-side ABI compatibility (FPS pacing is enforced elsewhere). JNIEXPORT void JNICALL JNI_FN(nativeSetFpsLimit)(JNIEnv* env, jclass clazz, jlong handle, jint fps) { (void)env; (void)clazz; (void)handle; (void)fps; } -// Set the compositor present mode. Java passes 0=FIFO, 1=MAILBOX, 2=IMMEDIATE; anything else -// is treated as FIFO. Triggers a swapchain rebuild if a surface is currently active so the -// change takes effect on the next frame. +// Set the compositor present mode (0=FIFO, 1=MAILBOX, 2=IMMEDIATE). Rebuilds the swapchain if active. JNIEXPORT void JNICALL JNI_FN(nativeSetPresentMode)(JNIEnv* env, jclass clazz, jlong handle, jint mode) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; @@ -3453,6 +3878,9 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetPresentMode)(JNIEnv* env, jclass clazz, j // Rebuild swapchain only if one currently exists; otherwise the next create_swapchain // (e.g. on first surface attach) will pick up the new mode automatically. if (!r->surface) return; + // The FG worker owns the swapchain, so stop it before tearing it down and restart it after. + bool restart_worker = r->fg_gen_started; + fg_worker_stop(r); // no-op if not running; takes render_mutex internally + joins the worker lifecycle_begin(r); if (r->device) vkDeviceWaitIdle(r->device); uint32_t fw = r->surface_extent.width; @@ -3468,6 +3896,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetPresentMode)(JNIEnv* env, jclass clazz, j pthread_mutex_unlock(&r->scene_mutex); } pthread_mutex_unlock(&r->render_mutex); + if (restart_worker && r->fg_enabled) fg_worker_start(r); } // ============================================================ diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index c8432c0ad..90d8a39cc 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -1,5 +1,4 @@ // Master state header for the Vulkan compositor. -// Internal use only — JNI entry points expose a long handle that wraps VkRenderer*. #pragma once @@ -7,12 +6,12 @@ #include #include #include +#include #include #include #include -// All vk* calls route through the dispatch table — vk_dispatch.h is the Vulkan header for -// this translation unit (do not include directly). +// All vk* calls route through the dispatch table (do not include directly). #include "vk_dispatch.h" #define VK_LOG_TAG "VkRenderer" @@ -22,12 +21,21 @@ #define VK_FRAMES_IN_FLIGHT 3 #define VK_MAX_SWAPCHAIN_IMAGES 8 +#define FG_JOB_RING 6u + +// A queued FG present job, snapshotted at enqueue and executed by the worker pthread. +typedef struct FgJob { + uint8_t mode; // 1 = INTERP, 2 = PRESENT_LAST + uint8_t deep; // bidirectional warp (model-1) + float phase; + uint32_t curr_idx; // history slots, snapshotted at enqueue + uint32_t prev_idx; + uint64_t deadline_ns; // vsync-snapped present target (worker paces to this) + uint32_t seq; // fg_promote_seq snapshot — worker drops the job if the slot was reused +} FgJob; #define VK_MAX_EFFECTS 8 #define VK_MAX_RENDERABLE_WINDOWS 64 -// Number of in-flight upload slots. Each slot owns a persistently-mapped staging buffer, -// fence, and command pool. An upload only blocks when this many uploads are still pending -// on the GPU — with 8 slots and ~100µs GPU upload time, we can sustain ~80k uploads/sec -// without ever waiting. +// Number of in-flight upload slots. #define VK_STAGING_POOL_SIZE 8 #define VK_CHECK(expr) do { \ @@ -289,11 +297,8 @@ typedef struct VkDeviceCaps { // ============================================================ // Image sub-allocator // ============================================================ -// -// CPU-uploaded textures share large DEVICE_LOCAL blocks instead of each taking a dedicated -// vkAllocateMemory — avoids per-pixmap allocator latency and hitting maxMemoryAllocationCount -// (~4096 on Adreno) under X-server pixmap churn. Each block has a first-fit free list (offset- -// sorted, coalesced on free); fully-drained blocks are returned. AHB imports stay dedicated. +// CPU-uploaded textures share large DEVICE_LOCAL blocks via a first-fit free list; +// AHB imports stay dedicated. #define VK_SUBALLOC_BLOCK_SIZE (32u * 1024u * 1024u) // 32 MiB default block @@ -326,23 +331,16 @@ typedef struct VkRenderer { // Lifecycle bool initialized; bool surface_ready; - // True when we deliberately create a fallback swapchain with a preTransform that differs - // from caps.currentTransform (Adreno reports SUBOPTIMAL on every present in that case). + // Set when using a fallback swapchain whose preTransform differs from currentTransform. bool ignore_suboptimal; pthread_mutex_t scene_mutex; // guards r->scene + graveyard slots; held briefly by all pthread_mutex_t queue_mutex; // serializes vkQueueSubmit across threads pthread_mutex_t texture_mutex; // guards live_textures pthread_mutex_t descriptor_mutex;// external sync for descriptor_pool alloc/free - pthread_mutex_t render_mutex; // serializes lifecycle vs render; held by render thread for - // the full acquire+record+submit+present, and by lifecycle - // ops (surface create/change/destroy) before they touch the - // swapchain. Scene producers do NOT take this — they only - // touch scene_mutex, so they never stall behind a frame. + pthread_mutex_t render_mutex; // serializes lifecycle vs render // Instance + physical/logical device - // dlopen handle for the libvulkan we resolved through. dlclose'd in nativeDestroy AFTER - // vkd_unload() to avoid stale dispatch pointers calling into freed memory. - void* vulkan_handle; + void* vulkan_handle; // dlopen handle for libvulkan VkInstance instance; bool validation_enabled; bool debug_utils_enabled; @@ -382,13 +380,15 @@ typedef struct VkRenderer { bool fg_built; // history + motion images allocated at fg_dims VkExtent2D fg_dims; // extent the fg images were built for VkFgImage fg_history[3]; // composited-scene ring; fg_history_curr = newest - VkFgImage fg_motion[3]; // per-parity rgba16f half-res backward-flow ring (1 per history - // slot): consecutive cycles write different buffers so the - // once-per-cycle motion compute pipelines instead of serializing. + VkFgImage fg_motion[3]; // per-parity rgba16f half-res backward-flow ring (1 per history slot) VkFgImage fg_motion_fwd[3]; // per-parity rgba16f half-res forward-flow ring (Quality bidirectional) + VkFgImage fg_coarse[3]; // per-parity quarter-res backward coarse-flow (coarse-to-fine seed) + VkFgImage fg_coarse_fwd[3]; // per-parity quarter-res forward coarse-flow VkSampler fg_sampler; // linear, clamp — for all fg sampled reads - VkDescriptorSet fg_motion_set[3]; // [curr] prev,curr samplers + motion storage (motion.comp) - VkDescriptorSet fg_motion_set_fwd[3]; // [curr] swapped prev,curr + fwd-motion storage (forward pass) + VkDescriptorSet fg_motion_set[3]; // [curr] prev,curr,coarse samplers + motion storage (fine pass) + VkDescriptorSet fg_motion_set_fwd[3]; // [curr] swapped prev,curr + fwd-coarse + fwd-motion storage + VkDescriptorSet fg_coarse_set[3]; // [curr] prev,curr + coarse-bwd storage (coarse pass) + VkDescriptorSet fg_coarse_set_fwd[3]; // [curr] swapped prev,curr + coarse-fwd storage VkDescriptorSet fg_interp_set[3]; // [curr] prev,curr,mvBwd,mvFwd samplers (interpolate.frag) VkDescriptorSet fg_interp_set_deep[3]; // deep mode: interp the pair one step behind the newest VkFence fg_slot_fence[3]; // last submit that used each history slot @@ -403,14 +403,32 @@ typedef struct VkRenderer { float fg_occ_lo; // interpolate.frag consistency lower bound (smoothness) float fg_occ_hi; // interpolate.frag consistency upper bound (smoothness) int32_t fg_min_step; // motion.comp lowest TSS step (quality preset; 1 = full search) + float fg_flow_scale; // flow-field resolution scale [0.2,1.0] (preset GPU-cost dial) + float fg_built_flow_scale; // flow_scale baked into the current motion resources + + // --- Content-duplicate detection ------------------------------------------------------------ + // Each composited frame is downsampled to a tiny host buffer; the HOLD promotes the interp + // pair only on a genuine content change so duplicate inputs don't advance. + VkImage fg_sig_img; // tiny blit target, reused each HOLD + VkDeviceMemory fg_sig_img_mem; + VkBuffer fg_sig_buf[3]; // per-slot host-visible downsample of each history slot + VkDeviceMemory fg_sig_buf_mem[3]; + void* fg_sig_ptr[3]; // persistent map of fg_sig_buf + bool fg_sig_supported; // blit+readback path created OK (else dedup disabled) + int32_t fg_stage_slot; // history slot holding the pending (un-promoted) frame, -1 = none + VkFence fg_stage_fence; // fence that produced fg_sig_buf[fg_stage_slot] + uint64_t fg_last_promote_ns; // last promotion time (freeze backstop) + double fg_last_sig_delta; // last measured content delta (diagnostics) + uint64_t fg_dup_dropped, fg_distinct; // dedup telemetry + uint64_t fg_promote_count; // monotonic count of promotions (distinct content committed) + uint64_t fg_promote_ns; // CLOCK_MONOTONIC of the most recent promotion (for Java phase anchor) // Quad vertex buffer (window/cursor) VkBuffer quad_vbo; VkDeviceMemory quad_vbo_memory; // Shared sampler for all CPU-uploaded textures and AHB textures that don't need a Ycbcr - // conversion. Created once at init; vkCreateSampler costs ~50-200µs on Adreno, so giving - // every texture its own sampler is a non-trivial CPU+GPU tax during pixmap churn. + // conversion. Created once at init. VkSampler shared_sampler; // Per-frame @@ -430,8 +448,7 @@ typedef struct VkRenderer { VkGraveSlot graveyard[VK_FRAMES_IN_FLIGHT + 1]; uint32_t graveyard_index; - // Live native textures owned by this renderer/device. Java Texture objects can outlive a - // renderer teardown, so nativeDestroy drains this list before the device is destroyed. + // Live native textures owned by this renderer/device; drained on nativeDestroy. VkTexture** live_textures; uint32_t live_texture_count; uint32_t live_texture_capacity; @@ -476,6 +493,22 @@ typedef struct VkRenderer { uint32_t fg_dbg_done_n; uint64_t fg_dbg_last_curr; uint32_t fg_present_id; + + // FG generation worker: GL thread enqueues; this pthread runs flow+generate+pace+present + // and owns the swapchain present path while FG is on. + FgJob fg_job_ring[FG_JOB_RING]; // SPSC: GL produces (tail), worker consumes (head) + volatile uint32_t fg_job_head; + volatile uint32_t fg_job_tail; + sem_t fg_gen_sem; + pthread_t fg_gen_thread; + volatile int fg_gen_running; + bool fg_gen_started; + VkCommandPool fg_worker_pool; // worker-owned (command pools are not thread-safe) + VkFrame fg_worker_frames[3]; // worker-owned cmd + fence + image_available + uint32_t fg_worker_index; + volatile uint32_t fg_promote_seq; // ++ on each HOLD promote; jobs snapshot it + volatile uint32_t fg_swapchain_gen; // ++ on swapchain recreate; worker drops present across a change + PFN_vkGetRefreshCycleDurationGOOGLE fnGetRefreshCycleDuration; PFN_vkGetPastPresentationTimingGOOGLE fnGetPastPresentationTiming; @@ -527,10 +560,8 @@ void vkr_image_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout f VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_access, VkAccessFlags dst_access); bool vkr_create_sampler(VkRenderer* r, VkSamplerYcbcrConversion ycbcr, VkSampler* out); -// Async layout transition through the staging pool. Submits a tiny command buffer that runs -// the requested barrier, but does NOT wait for the GPU. The barrier is ordered before all -// subsequent submits on the same queue per Vulkan spec, so callers can sample the image as -// soon as the next render submit happens. Returns false on submit failure. +// Async layout transition through the staging pool; does not wait for the GPU. +// Returns false on submit failure. bool vkr_submit_async_transition(VkRenderer* r, VkImage image, VkImageLayout from, VkImageLayout to, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index ef906853d..9db4ac6cb 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -387,6 +387,8 @@ private float fgPrefFloat(String base, float def) { private boolean frameGenerationAdvanced = false; private boolean frameGenerationExtrapolate = false; private int frameGenerationFramesInFlight = 3; + private int frameGenerationPreset = 2; // Eco/Flow/Bal/Boost/Clear/Max + private int frameGenerationModel = 0; // 0 = standard, 1 = steadier private boolean sgsrEnabled = false; private boolean sgsrRuntimeEnabled = false; private int sgsrUpscaleMode = 1; @@ -639,9 +641,7 @@ private String getShortcutWineVersionOverride() { return shortcut.getExtra("wineVersion"); } - // Coalesce FG target changes into a single panel re-pin once the rate settles (~600ms). The - // renderer throttles hints to 500ms, so this debounce (> that) absorbs a burst of rapid swaps - // and the unstable-startup rate wobble into one mode switch instead of one stall per change. + // Debounce FG target changes into a single panel re-pin once the rate settles. private final Runnable fgRepinRunnable = this::applyPreferredRefreshRate; private void scheduleFgRefreshRepin() { @@ -657,27 +657,12 @@ private void applyPreferredRefreshRate() { VulkanRenderer renderer = xServerView != null ? xServerView.getRenderer() : null; if (renderer != null && renderer.isFrameGenerationEnabled()) { - // Pin the panel to the renderer's live FG target (multiplier × measured game fps). - // Until the pump has measured, fall back to multiplier×(fps cap | 60). Passing the - // target as the fpsLimit makes the mode resolver demand a cadence-compatible mode - // (exact match first, then an integer multiple) instead of a raw nearest rate. int panelMax = RefreshRateUtils.getMaxSupportedRefreshRate(this); renderer.setFrameGenDisplayCap(panelMax); - int target = renderer.getFrameGenTargetHz(); - if (target <= 0) { - int engine = runtimeFpsLimit > 0 ? runtimeFpsLimit : 60; - target = Math.max(60, engine * renderer.getFrameGenMultiplier()); - } - target = Math.min(panelMax, target); - // Pin the panel's physical mode to the target (held even when untouched) and vote the - // rate on the surface. NOTE: on aggressive ADFR OEMs (e.g. OnePlus/ColorOS) the vendor - // refresh service still drops the *render* rate to 60 when there is no touch input - // unless the app is enrolled in the OEM game mode (Game Space) — that enrollment, not - // any app API, is what holds the render rate untouched. The appCategory="game" manifest - // flag signals the app so the OEM can offer it. - RefreshRateUtils.applyPreferredRefreshRate(this, target, target); - requestSurfaceFrameRate((float) target); - lastLoggedRefreshHz = 0f; // force a fresh self-log line after a target change + // Hold the panel's native max mode while FG is on rather than down-switching. + RefreshRateUtils.applyPreferredRefreshRate(this, panelMax, panelMax); + requestSurfaceFrameRate((float) panelMax); + lastLoggedRefreshHz = 0f; } else { RefreshRateUtils.applyPreferredRefreshRate(this, getRefreshRateOverride(), runtimeFpsLimit); } @@ -691,8 +676,6 @@ private void applyPreferredRefreshRate() { } // Vote a frame rate on the surface so a VRR/ADFR panel holds the high refresh while FG is active. - // DEFAULT (exact-or-multiple) — FIXED_SOURCE is video semantics and lets the idle policy drop - // the panel to a non-multiple rate once touch boost ends. private void requestSurfaceFrameRate(float hz) { if (hz <= 0f || Build.VERSION.SDK_INT < Build.VERSION_CODES.R || xServerView == null) return; try { @@ -707,9 +690,7 @@ private void requestSurfaceFrameRate(float hz) { } catch (Exception ignore) {} } - // Log the panel's actual physical refresh rate (what a refresh-rate monitor shows). De-duped so it - // only prints when the rate actually changes — makes it obvious in logcat whether the mode pin is - // holding the target Hz or the system has dropped it. + // Log the panel's actual physical refresh rate, de-duped so it only prints on change. private void logCurrentRefreshRate(String from) { try { android.view.Display d = getWindow().getDecorView().getDisplay(); @@ -3859,6 +3840,8 @@ private void handleNavigationBackPressed() { private void openDrawerMenu() { releasePointerCapture(); + if (xServerView != null && xServerView.getRenderer() != null) + xServerView.getRenderer().fgSetOverlayActive(true); // overlay GPU contention isn't a game slowdown renderDrawerMenu(); if (drawerStateHolder != null) { drawerStateHolder.openDrawer(); @@ -3872,9 +3855,21 @@ private void closeDrawerMenu() { if (drawerStateHolder != null) { drawerStateHolder.closeDrawer(); } + if (xServerView != null && xServerView.getRenderer() != null) + xServerView.getRenderer().fgSetOverlayActive(false); // clears overlay + re-anchors the FG clock fresh tryCapturePointer(); } + // Safety net for the drawer-open listener: resumes FG if a close path missed it. Idempotent. + @Override + public void onUserInteraction() { + super.onUserInteraction(); + if (drawerStateHolder == null || !drawerStateHolder.isDrawerOpen()) { + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) r.fgClearOverlayIfActive(); + } + } + private String currentGyroActivatorLabel() { String[] names = getResources().getStringArray(R.array.button_options); int[] keycodes = getResources().getIntArray(R.array.button_keycodes); @@ -3982,7 +3977,8 @@ private void renderDrawerMenu() { frameGenerationDeepMode, frameGenerationAdvanced, frameGenerationExtrapolate, - frameGenerationFramesInFlight + frameGenerationFramesInFlight, + frameGenerationPreset ); if (drawerActionListener == null) { @@ -4189,6 +4185,32 @@ public void onFrameGenerationMultiplierSelected(int multiplier) { renderDrawerMenu(); } + @Override + public void onFrameGenerationPresetSelected(int preset) { + final int[] presetQuality = {0, 1, 1, 2, 1, 2}; + final int[] presetModel = {0, 0, 0, 0, 1, 1}; + final float[] presetFlowScale = {0.2f, 0.4f, 0.6f, 0.8f, 0.6f, 0.8f}; + int idx = Math.max(0, Math.min(preset, presetQuality.length - 1)); + frameGenerationPreset = idx; + frameGenerationQuality = presetQuality[idx]; + frameGenerationModel = presetModel[idx]; + frameGenerationExtrapolate = false; + frameGenerationDeepMode = false; + preferences.edit() + .putInt(fgKey("frame_generation_preset"), idx) + .putInt(fgKey("frame_generation_quality"), frameGenerationQuality) + .putBoolean(fgKey("frame_generation_extrapolate"), false) + .putBoolean(fgKey("frame_generation_deep_mode"), frameGenerationDeepMode) + .apply(); + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) { + r.setFrameGenerationPreset(frameGenerationQuality, frameGenerationModel, presetFlowScale[idx]); + r.setFrameGenerationExtrapolate(false); + r.setFrameGenerationDeepMode(frameGenerationDeepMode); + } + renderDrawerMenu(); + } + @Override public void onFrameGenerationQualitySelected(int quality) { frameGenerationQuality = quality; @@ -4493,6 +4515,12 @@ public void onLogsShare() { if (drawerStateHolder == null) { drawerStateHolder = new XServerDrawerStateHolder(state); + // Pause/resume FG on every drawer open/close, including scrim/swipe closes. + drawerStateHolder.setDrawerOpenListener(open -> { + VulkanRenderer r = xServerView != null ? xServerView.getRenderer() : null; + if (r != null) r.fgSetOverlayActive(open); + return kotlin.Unit.INSTANCE; + }); XServerDisplayHostKt.setupXServerDisplayHost( displayHostComposeView, xServerDisplayFrame, @@ -6164,16 +6192,18 @@ private void setupUI() { frameGenerationAdvanced = fgPrefBool("frame_generation_advanced", false); frameGenerationExtrapolate = fgPrefBool("frame_generation_extrapolate", false); frameGenerationFramesInFlight = fgPrefInt("frame_generation_fif", 3); + frameGenerationPreset = fgPrefInt("frame_generation_preset", 2); + frameGenerationModel = (frameGenerationPreset == 4 || frameGenerationPreset == 5) ? 1 : 0; + frameGenerationDeepMode = false; // all presets single-flow to fit the 60fps budget (steadier = occLo) + final float[] startupPresetFlowScale = {0.2f, 0.4f, 0.6f, 0.8f, 0.6f, 0.8f}; + float startupFlowScale = startupPresetFlowScale[Math.max(0, Math.min(frameGenerationPreset, 5))]; renderer.setFrameGenerationMultiplier(frameGenerationMultiplier); - renderer.setFrameGenerationQuality(frameGenerationQuality); + renderer.setFrameGenerationPreset(frameGenerationQuality, frameGenerationModel, startupFlowScale); renderer.setFrameGenerationSmoothness(frameGenerationSmoothing); renderer.setFrameGenerationDeepMode(frameGenerationDeepMode); renderer.setFrameGenerationExtrapolate(frameGenerationExtrapolate); renderer.setFrameGenerationFramesInFlight(frameGenerationFramesInFlight); - // Re-pin the window's preferred display mode whenever the measured FG target moves - // (the window pin outranks surface frame-rate votes, so it must track the live target). - // Debounced: a physical mode switch (60/90/120) stalls the panel a frame or two, so rapid - // multiplier swaps and startup rate-wobble must coalesce into one switch, not one per change. + // Re-pin the window's preferred display mode whenever the measured FG target moves. renderer.setFrameGenRateChangedListener(this::scheduleFgRefreshRepin); renderer.setFrameGeneration(frameGenerationEnabled); diff --git a/app/src/main/runtime/display/XServerDrawerMenu.kt b/app/src/main/runtime/display/XServerDrawerMenu.kt index 00e9d1907..2998ac89b 100644 --- a/app/src/main/runtime/display/XServerDrawerMenu.kt +++ b/app/src/main/runtime/display/XServerDrawerMenu.kt @@ -337,6 +337,7 @@ data class XServerDrawerState( val frameGenerationAdvanced: Boolean = false, val frameGenerationExtrapolate: Boolean = false, val frameGenerationFramesInFlight: Int = 3, + val frameGenerationPreset: Int = 2, val sgsrEnabled: Boolean = false, val sgsrSharpness: Int = 100, val vividEnabled: Boolean = false, @@ -376,17 +377,24 @@ class XServerDrawerStateHolder( private var drawerOpen by mutableStateOf(false) internal var openPane by mutableStateOf(null) private var paneVisibilityListener: ((Boolean) -> Unit)? = null + private var drawerOpenListener: ((Boolean) -> Unit)? = null + + fun setDrawerOpenListener(listener: ((Boolean) -> Unit)?) { + drawerOpenListener = listener + } val isDrawerOpen: Boolean get() = drawerOpen fun openDrawer() { drawerOpen = true + drawerOpenListener?.invoke(true) } fun closeDrawer() { drawerOpen = false openPane = null + drawerOpenListener?.invoke(false) } fun isPaneOpen(): Boolean = openPane != null @@ -417,13 +425,6 @@ class XServerDrawerStateHolder( setOpenPaneAndNotify(DrawerPane.LOGS) } - /** - * Append a log line. Safe to call from any thread. When the logs pane is not - * visible, this only stores the line in an off-thread ring buffer — no - * recomposition or main-thread work is scheduled. The buffer is flushed into - * observable state when the pane becomes visible (and live while visible, - * coalesced through a single posted runnable). - */ fun appendLogLine(line: String) { if (logsPausedFlag) return synchronized(logsBuffer) { @@ -526,6 +527,8 @@ interface XServerDrawerActionListener { fun onFrameGenerationMultiplierSelected(multiplier: Int) + fun onFrameGenerationPresetSelected(preset: Int) + fun onFrameGenerationQualitySelected(quality: Int) fun onFrameGenerationSmoothingChanged(smoothing: Float) @@ -653,6 +656,7 @@ fun buildXServerDrawerState( frameGenerationAdvanced: Boolean = false, frameGenerationExtrapolate: Boolean = false, frameGenerationFramesInFlight: Int = 3, + frameGenerationPreset: Int = 2, ): XServerDrawerState { val items = mutableListOf( @@ -812,6 +816,7 @@ fun buildXServerDrawerState( frameGenerationAdvanced = frameGenerationAdvanced, frameGenerationExtrapolate = frameGenerationExtrapolate, frameGenerationFramesInFlight = frameGenerationFramesInFlight, + frameGenerationPreset = frameGenerationPreset, sgsrEnabled = sgsrEnabled, sgsrSharpness = sgsrSharpness, vividEnabled = vividEnabled, @@ -869,11 +874,6 @@ internal fun XServerDrawerContent( onDismiss: () -> Unit, revealCards: Boolean = true, ) { - // The drawer content stays composed even while the sheet is closed (the host - // just translates it off-screen), so opening no longer pays a full - // first-composition cost. Drive the staggered card reveal from the sheet's - // engaged state so it still replays each time the drawer opens, and stays - // stable while switching between panes. val cardsRevealed = remember { mutableStateOf(false) } LaunchedEffect(revealCards) { cardsRevealed.value = revealCards } @@ -1009,7 +1009,6 @@ private fun TopRail( val activeSpecs = RAIL_PANES.filter { spec -> state.items.any { it.itemId == spec.itemId } } val tileBounds = remember { mutableStateMapOf() } - // Tile bounds are Row-relative, so the indicator (in the un-scrolled parent) subtracts the scroll. val railScroll = rememberScrollState() val selectedKey = @@ -1991,11 +1990,6 @@ private fun InputControlsPaneContent( } } -/** - * Compact dropdown shared by the Style and Label Theme rows in the Controls pane. - * Mirrors the styling of [InputControlsProfileSelector] but omits the trailing edit-pencil button - * since these are built-in choices, not user-editable. - */ @Composable private fun InputControlsSimpleDropdown( options: List, @@ -2408,130 +2402,14 @@ private fun ScreenEffectsPaneContent( ) } } - PaneSectionLabel(stringResource(R.string.session_drawer_frame_generation_recommended)) - Row( - modifier = Modifier.fillMaxWidth(), - horizontalArrangement = Arrangement.spacedBy((8f * paneScale).dp), - ) { - val smoothestActive = !state.frameGenerationExtrapolate && - state.frameGenerationDeepMode && - state.frameGenerationQuality == 1 && - abs(state.frameGenerationSmoothing - 0.75f) < 0.01f && - state.frameGenerationFramesInFlight == 3 - HUDToggleChip( - label = stringResource(R.string.session_drawer_frame_generation_preset_smoothest), - checked = smoothestActive, - onClick = { - listener.onFrameGenerationExtrapolateChanged(false) - listener.onFrameGenerationDeepModeChanged(true) - listener.onFrameGenerationQualitySelected(1) - listener.onFrameGenerationSmoothingChanged(0.75f) - listener.onFrameGenerationFramesInFlightChanged(3) - }, - modifier = Modifier.weight(1f), - ) - val lowLatencyActive = state.frameGenerationExtrapolate && - state.frameGenerationQuality == 1 && - abs(state.frameGenerationSmoothing - 0.75f) < 0.01f && - state.frameGenerationFramesInFlight == 1 - HUDToggleChip( - label = stringResource(R.string.session_drawer_frame_generation_preset_low_latency), - checked = lowLatencyActive, - onClick = { - listener.onFrameGenerationExtrapolateChanged(true) - listener.onFrameGenerationQualitySelected(1) - listener.onFrameGenerationSmoothingChanged(0.75f) - listener.onFrameGenerationFramesInFlightChanged(1) - }, - modifier = Modifier.weight(1f), - ) - } - DrawerBooleanRow( - title = "Advanced settings", - checked = state.frameGenerationAdvanced, - onCheckedChange = listener::onFrameGenerationAdvancedChanged, - subtitle = "Generation method · scan passes · quality preset · smoothness · buffering", - ) - - if (state.frameGenerationAdvanced) { - PaneSectionLabel("Generation method") - Row( - modifier = Modifier.fillMaxWidth(), - horizontalArrangement = Arrangement.spacedBy((8f * paneScale).dp), - ) { + PaneSectionLabel(stringResource(R.string.session_drawer_frame_generation_quality)) + val fgPresetLabels = listOf("Eco", "Flow", "Bal", "Boost", "Clear", "Max") + ChipFlow { + fgPresetLabels.forEachIndexed { index, label -> HUDToggleChip( - label = "Interpolation", - checked = !state.frameGenerationExtrapolate, - onClick = { listener.onFrameGenerationExtrapolateChanged(false) }, - modifier = Modifier.weight(1f), - ) - HUDToggleChip( - label = "Extrapolation", - checked = state.frameGenerationExtrapolate, - onClick = { listener.onFrameGenerationExtrapolateChanged(true) }, - modifier = Modifier.weight(1f), - ) - } - PaneSectionLabel("Scan passes") - Row( - modifier = Modifier.fillMaxWidth(), - horizontalArrangement = Arrangement.spacedBy((8f * paneScale).dp), - ) { - HUDToggleChip( - label = "1 · Single", - checked = !state.frameGenerationDeepMode, - onClick = { listener.onFrameGenerationDeepModeChanged(false) }, - modifier = Modifier.weight(1f), - ) - HUDToggleChip( - label = "2 · Bidirectional", - checked = state.frameGenerationDeepMode, - onClick = { listener.onFrameGenerationDeepModeChanged(true) }, - modifier = Modifier.weight(1f), - ) - } - PaneSectionLabel(stringResource(R.string.session_drawer_frame_generation_quality)) - val qualityLabels = - listOf( - stringResource(R.string.session_drawer_frame_generation_quality_performance), - stringResource(R.string.session_drawer_frame_generation_quality_balanced), - stringResource(R.string.session_drawer_frame_generation_quality_quality), - ) - ChipFlow { - qualityLabels.forEachIndexed { index, label -> - HUDToggleChip( - label = label, - checked = state.frameGenerationQuality == index, - onClick = { listener.onFrameGenerationQualitySelected(index) }, - ) - } - } - DrawerSliderRow( - label = stringResource(R.string.session_drawer_frame_generation_smoothness), - valueText = "${(state.frameGenerationSmoothing * 100).roundToInt()}%", - value = state.frameGenerationSmoothing, - valueRange = 0f..1f, - steps = 0, - onValueChange = listener::onFrameGenerationSmoothingChanged, - ) - // Buffering (frames-in-flight): latency <-> smoothness. Extrapolation - // predicts frames instead of holding them, so buffering is irrelevant -> grayed. - val fifLocked = state.frameGenerationExtrapolate - Column(modifier = Modifier.alpha(if (fifLocked) 0.4f else 1f)) { - DrawerSliderRow( - label = if (fifLocked) "Buffering — n/a for extrapolation" - else "Buffering (latency ↔ smooth)", - valueText = state.frameGenerationFramesInFlight.toString(), - value = state.frameGenerationFramesInFlight.toFloat(), - valueRange = 1f..3f, - steps = 1, - onValueChange = { - if (!fifLocked) { - listener.onFrameGenerationFramesInFlightChanged( - it.roundToInt().coerceIn(1, 3), - ) - } - }, + label = label, + checked = state.frameGenerationPreset == index, + onClick = { listener.onFrameGenerationPresetSelected(index) }, ) } } @@ -4547,10 +4425,6 @@ private fun FPSLimiterCard( val maxFps = maxRefreshRate.coerceAtLeast(FPS_LIMITER_MIN) val steps = (maxFps - FPS_LIMITER_MIN - 1).coerceAtLeast(0) - // Slider position is tracked locally so the readout follows the drag and the - // last value survives an off/on toggle; the limit/refresh-rate commit is - // deferred to release (onValueChangeFinished). Re-seeds when the panel's max - // changes — e.g. a mid-game refresh-rate change that clamps the limit. var sliderValue by remember(maxFps) { mutableStateOf( (if (currentLimit > 0) currentLimit else FPS_LIMITER_DEFAULT) diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 78bd04c64..b65795308 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -33,12 +33,7 @@ import java.util.ArrayList; import java.util.concurrent.atomic.AtomicBoolean; -/** - * Native Vulkan compositor. - * - *

Owns the C-side renderer handle and pushes a scene snapshot every frame. Replaces the - * previous GLES2 {@code GLRenderer}; preserves the same public API so callers do not change. - */ +/** Native Vulkan compositor. Owns the C-side renderer handle and pushes a scene snapshot every frame. */ public class VulkanRenderer implements RenderCallback, WindowManager.OnWindowModificationListener, @@ -58,55 +53,48 @@ public class VulkanRenderer private long nativeHandle = 0; private boolean supportProbed = false; private boolean loggedAhbSceneUse = false; - // Must be set before attachSurface — nativeCreate reads it once at instance creation. + // Must be set before attachSurface. private volatile String graphicsDriverName = null; // ---- Frame generation ---- - // Display-cadence pump that inserts interpolated frames between real ones; never starves the real frame. private volatile boolean frameGenEnabled = false; - private volatile int fgMultiplier = 2; // target display:engine ratio (2, 3, 4) + private volatile int fgMultiplier = 2; // target display:engine ratio (2, 3, 4) — the user ceiling + private volatile int fgEffectiveMultiplier = 2; // adaptive working multiplier (2..ceiling) + private volatile int fgBoundSecs = 0; private final AtomicBoolean fgNewScene = new AtomicBoolean(false); - private final AtomicBoolean fgSceneDirty = new AtomicBoolean(false); // cursor/window change awaiting a recomposite + private final AtomicBoolean fgSceneDirty = new AtomicBoolean(false); private final AtomicBoolean fgPumpScheduled = new AtomicBoolean(false); - private boolean fgPendingReal = false; // a held real frame awaits its display tick - private int fgPendingInterps = 0; // interpolated frames still owed before the held real - private int fgInterpTotal = 0; // interps planned for the current engine frame (phase divisor) - private int fgSlotIdx = 0; // display ticks since the newest real frame (slot-grid scheduler) - private long fgEngineFrames = 0; // count of held real frames since FG was enabled - // EMAs of the pump (=panel) tick interval and the real game-frame interval. + private boolean fgPendingReal = false; + private int fgPendingInterps = 0; + private int fgInterpTotal = 0; + private int fgSlotIdx = 0; + private long fgEngineFrames = 0; private volatile long fgDisplayPeriodNs = 0; private volatile long fgGamePeriodNs = 0; - // Locked game-rate estimate (Hz). The raw EMA jitters a few fps; ×multiplier amplifies that into a - // wide display-target swing that thrashes the panel-mode pin and the interp cadence. Hold a stable - // rate and only re-lock on a sustained change so a steady game produces a steady target. private volatile double fgLockedGameHz = 0.0; private int fgGameDriftFrames = 0; private long fgLastPumpNs = 0; private volatile long fgLastGameNs = 0; private volatile long fgPrevGameNs = 0; - private volatile long fgCurrentVsyncNs = 0; // latest vsync instant from the native pump (CLOCK_MONOTONIC) - private Drawable fgLastScanoutSrc = null; // scanout buffer of the last ACCEPTED frame (dedup by identity) - private Drawable fgFirstScanoutSrc = null; // first buffer ever seen (to detect a multi-buffer swapchain) - private boolean fgMultiBuffer = false; // seen ≥2 distinct scanout buffers → identity dedup is trustworthy - private long fgLastAcceptNs = 0L; // time of last ACCEPTED frame (freeze backstop reference) - private static final long FG_DEDUP_FREEZE_NS = 100_000_000L; // never drop for >100ms → genuine holds get through - // Present-pipeline instrumentation (diagnose slips when GPU+CPU both have headroom): per-present - // GL-thread record/submit wall-time, bucketed composite(HOLD) vs interp, + count over the vsync budget. + private volatile long fgCurrentVsyncNs = 0; + private Drawable fgLastScanoutSrc = null; + private Drawable fgFirstScanoutSrc = null; + private boolean fgMultiBuffer = false; + private long fgLastAcceptNs = 0L; + private static final long FG_DEDUP_FREEZE_NS = 100_000_000L; private boolean fgEmitWasHold = false; private long fgInstHoldN, fgInstInterpN, fgInstLongN, fgInstTotalN; private double fgInstHoldSum, fgInstInterpSum, fgInstHoldMax, fgInstInterpMax; - private boolean fgRenderPrioritySet = false; // one-shot: elevate the GL present thread vs scheduling jitter - private volatile int fgActivePresentMode = PRESENT_MODE_FIFO; // resolved native mode (see nativeGetActivePresentMode) + private boolean fgRenderPrioritySet = false; + private volatile int fgActivePresentMode = PRESENT_MODE_FIFO; private volatile int fgDisplayCapHz = 0; // panel-max ceiling for the target post rate; 0 = uncapped - // Quality/smoothness, mapped to native shader knobs (motion search floor + interp consistency). private volatile int fgQuality = 1; // 0 performance, 1 balanced, 2 quality private volatile float fgSmoothness = 0.75f; - // Quality pipeline: bidirectional warp (adds a forward flow). private volatile boolean fgDeepMode = false; - private volatile boolean fgExtrapolate = false; // false = interpolate, true = extrapolate (predict forward) - private volatile int fgFramesInFlight = 3; // compositor buffering depth (1..3): latency<->smoothness - // Panel frame-rate request: surface vote here; the activity mirrors it into the window's - // preferredDisplayModeId/preferredRefreshRate (which outrank surface votes) via the listener. + private volatile boolean fgExtrapolate = false; // false = interpolate, true = extrapolate + private volatile int fgModel = 0; // 0 = standard, 1 = steadier + private volatile float fgFlowScale = 0.5f; // flow-field resolution scale [0.2,1.0] + private volatile int fgFramesInFlight = 3; // compositor buffering depth (1..3) private volatile Surface fgSurface; private float fgFrameRateHint = -1f; private long fgFrameRateHintNs = 0L; @@ -174,14 +162,11 @@ public void setSwapRB(boolean v) { private final ByteBuffer sceneBuf = ByteBuffer.allocateDirect(SCENE_BUF_SIZE).order(ByteOrder.nativeOrder()); private final Handler mainHandler = new Handler(Looper.getMainLooper()); - // FG pump runs on a dedicated native pthread (AChoreographer) — see nativeFgPumpStart in - // vk_renderer.c; it calls back into fgPumpTickFromNative each vsync. private volatile boolean fgPumpStarted = false; private final AtomicBoolean renderRequested = new AtomicBoolean(false); // Reusable scratch — sized once, refilled per frame. private final float[] sceneXform = XForm.getInstance(); - // Effect.writeParams writes into a float[]; we copy into the ByteBuffer afterwards. private final float[] effectParamsScratch = new float[MAX_EFFECTS * 4]; private final AtomicBoolean destroyed = new AtomicBoolean(false); @@ -228,7 +213,6 @@ public void destroy() { public void requestRenderCoalesced() { if (frameGenEnabled) { - // Non-game change (cursor/window/geometry): mark dirty so the pump recomposites it. fgSceneDirty.set(true); scheduleFgPump(); return; @@ -250,10 +234,6 @@ public void setFrameGeneration(boolean enabled) { synchronized (this) { if (nativeHandle != 0) { nativeSetFrameGeneration(nativeHandle, enabled); - // FIFO, not MAILBOX: with the panel pinned to the FG target (preferredDisplayModeId) - // the FIFO queue self-paces one present per vblank — nothing is ever replaced or - // dropped. Under MAILBOX a queued synthetic frame can be overwritten by the next - // present before scanout and never reach the panel. nativeSetPresentMode(nativeHandle, enabled ? PRESENT_MODE_FIFO : requestedPresentMode); fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); } @@ -273,14 +253,12 @@ public void setFrameGeneration(boolean enabled) { fgMultiBuffer = false; fgLastAcceptNs = 0L; fgRenderPrioritySet = false; - fgLockedGameHz = 0.0; // re-lock the game rate fresh for this session + fgLockedGameHz = 0.0; fgGameDriftFrames = 0; - fgNewScene.set(true); // re-render current content as the first held frame + fgNewScene.set(true); startFgPumpThread(); scheduleFgPump(); } - // When disabled, the pump self-stops (fgPumpTick checks frameGenEnabled) and onDrawFrame - // reverts to the coalesced real-present path. if (!enabled) { fgLockedGameHz = 0.0; fgApplyFrameRateHint(0.0, System.nanoTime()); stopFgPumpThread(); } } @@ -289,6 +267,8 @@ public void setFrameGeneration(boolean enabled) { /** Target display:engine ratio (2, 3, 4). Snapped to a supported value. Live; safe from any thread. */ public void setFrameGenerationMultiplier(int multiplier) { fgMultiplier = multiplier <= 2 ? 2 : (multiplier >= 4 ? 4 : 3); + fgEffectiveMultiplier = fgMultiplier; + fgBoundSecs = 0; } public int getFrameGenMultiplier() { return fgMultiplier; } @@ -304,7 +284,22 @@ public void setFrameGenerationQuality(int quality) { public int getFrameGenerationQuality() { return fgQuality; } - /** Interpolation smoothness in [0,1] (higher trusts motion more — smoother, more ghosting). Live. */ + /** Apply a preset's flow quality + model + flowScale. Live. */ + public void setFrameGenerationPreset(int quality, int model, float flowScale) { + fgQuality = quality < 0 ? 0 : (quality > 2 ? 2 : quality); + fgModel = model <= 0 ? 0 : 1; + float fs = flowScale < 0.2f ? 0.2f : (flowScale > 1.0f ? 1.0f : flowScale); + boolean flowChanged = Math.abs(fs - fgFlowScale) > 1e-4f; + fgFlowScale = fs; + pushFrameGenParams(); + if (flowChanged) { + synchronized (this) { + if (nativeHandle != 0) nativeSetFrameGenFlowScale(nativeHandle, fs); + } + } + } + + /** Interpolation smoothness in [0,1]. Live. */ public void setFrameGenerationSmoothness(float smoothness) { fgSmoothness = smoothness < 0f ? 0f : (smoothness > 1f ? 1f : smoothness); pushFrameGenParams(); @@ -312,10 +307,7 @@ public void setFrameGenerationSmoothness(float smoothness) { public float getFrameGenerationSmoothness() { return fgSmoothness; } - /** - * Pipeline mode. false = standard (single backward flow). true = quality (adds a forward flow - * for a bidirectional warp; same latency). Live. - */ + /** Pipeline mode. false = standard (single backward flow). true = quality (bidirectional warp). Live. */ public void setFrameGenerationDeepMode(boolean deep) { fgDeepMode = deep; synchronized (this) { @@ -325,10 +317,7 @@ public void setFrameGenerationDeepMode(boolean deep) { public boolean isFrameGenerationDeepMode() { return fgDeepMode; } - /** - * Generation method. false = interpolation (between the two newest real frames; +1 frame latency). - * true = extrapolation (predict forward from the latest real frame; no added latency). Live. - */ + /** Generation method. false = interpolation (+1 frame latency). true = extrapolation (no added latency). Live. */ public void setFrameGenerationExtrapolate(boolean extrapolate) { fgExtrapolate = extrapolate; synchronized (this) { @@ -338,11 +327,7 @@ public void setFrameGenerationExtrapolate(boolean extrapolate) { public boolean isFrameGenerationExtrapolate() { return fgExtrapolate; } - /** - * Compositor frames-in-flight (1..3): the latency↔smoothness dial. Higher buffers more GPU work - * ahead (smoother under spikes, more latency); lower is more responsive. Irrelevant under - * extrapolation (no frames are held). Live. - */ + /** Compositor frames-in-flight (1..3): the latency/smoothness dial. Live. */ public void setFrameGenerationFramesInFlight(int framesInFlight) { fgFramesInFlight = framesInFlight < 1 ? 1 : (framesInFlight > 3 ? 3 : framesInFlight); synchronized (this) { @@ -352,10 +337,9 @@ public void setFrameGenerationFramesInFlight(int framesInFlight) { public int getFrameGenerationFramesInFlight() { return fgFramesInFlight; } - // Map quality preset + smoothness to the native interpolate.frag / motion.comp knobs. private void pushFrameGenParams() { - float occHi = 0.12f + 0.28f * fgSmoothness; // consistency window: wider == trusts motion more - float occLo = occHi * 0.25f; + float occHi = 0.12f + 0.28f * fgSmoothness; + float occLo = (float) fgModel; // 0 standard, 1 steadier int minStep = fgQuality == 0 ? 4 : (fgQuality == 2 ? 1 : 2); synchronized (this) { if (nativeHandle != 0) nativeSetFrameGenParams(nativeHandle, occLo, occHi, minStep); @@ -371,7 +355,7 @@ public long getDisplayFrameCount() { private synchronized void startFgPumpThread() { if (fgPumpStarted) return; - nativeFgPumpStart(this); // dedicated native AChoreographer pthread; calls fgPumpTickFromNative + nativeFgPumpStart(this); fgPumpStarted = true; } @@ -383,18 +367,12 @@ private synchronized void stopFgPumpThread() { } private void scheduleFgPump() { - // The native AChoreographer pump free-runs every vsync once started; just keep it alive (cheap - // flag check). Self-heals if a lifecycle race ever left it stopped while FG is on. if (frameGenEnabled && !fgPumpStarted) startFgPumpThread(); } - // Invoked from the native pump thread once per vsync (frameTimeNanos = the vsync time). Does the FG - // display-rate timing + wakes the render thread (onDrawFrame -> fgDrawFrame). The native pump - // re-arms itself, so there is no re-schedule here. Keep this lightweight and exception-safe. + // Invoked from the native pump thread once per vsync (frameTimeNanos = the vsync time). private void fgPumpTickFromNative(long frameTimeNanos) { if (!frameGenEnabled || nativeHandle == 0) return; - // The swapchain may still be FIFO right after enable (surface not attached yet); re-read until - // it resolves so the bootstrap engages at launch without a manual toggle. if (fgActivePresentMode == PRESENT_MODE_FIFO) { fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); } @@ -408,33 +386,70 @@ private void fgPumpTickFromNative(long frameTimeNanos) { fgDisplayPeriodNs, frameTimeNanos); fgApplyFrameRateHint(th, frameTimeNanos); } + } else if (d >= 100_000_000L) { + fgResyncPending = true; } } fgLastPumpNs = frameTimeNanos; - fgCurrentVsyncNs = frameTimeNanos; // anchor continuous-phase placement to the clean vsync grid + fgCurrentVsyncNs = frameTimeNanos; xServerView.requestRender(); } - // Render-thread scheduler (DESIGN.md §2): emit enough presents per tick to sustain the target rate. private void fgDrawFrame() { if (!fgRenderPrioritySet) { - // Elevate THIS (the GL present) thread to urgent-display so a brief preempt in the tiny - // record/submit window between vsync-acquires doesn't make it miss the next image → the slip. try { android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_URGENT_DISPLAY); } - catch (Throwable t) { /* best-effort; capped by rlimit on some ROMs */ } + catch (Throwable t) { /* best-effort */ } fgRenderPrioritySet = true; } int perTick = fgComputePerTick(); for (int i = 0; i < perTick; i++) { long t0 = System.nanoTime(); int kind = fgEmitOne(); + if (kind == 1) fgDiagInterp++; + else if (kind == 2) fgDiagReal++; + else fgDiagNone++; if (kind != 0) fgInstrument((System.nanoTime() - t0) / 1000L, fgEmitWasHold); } + fgCadenceDiag(); + } + + private long fgDiagInterp, fgDiagReal, fgDiagNone, fgDiagLastNs; + private void fgCadenceDiag() { + long now = System.nanoTime(); + if (fgDiagLastNs == 0L) { fgDiagLastNs = now; return; } + if (now - fgDiagLastNs < 1_000_000_000L) return; + long game = fgContentPeriodNs, disp = fgDisplayPeriodNs; + double gameHz = game > 0 ? 1e9 / game : 0, dispHz = disp > 0 ? 1e9 / disp : 0; + double ratio = disp > 0 ? (double) game / disp : 0; + long dupDrop = 0, distinct = 0; + if (nativeHandle != 0) { nativeFgPromoteInfo(nativeHandle, fgPromoteInfo); dupDrop = fgPromoteInfo[2]; distinct = fgPromoteInfo[3]; } + long dDup = dupDrop - fgDiagPrevDup, dDist = distinct - fgDiagPrevDist; + fgDiagPrevDup = dupDrop; fgDiagPrevDist = distinct; + // Adaptive multiplier: when the delivered rate can't reach the effective target, step the + // working multiplier down (floor 2x); fgMultiplier stays the user ceiling. Window >1.5s ignored. + double secs = (double) (now - fgDiagLastNs) / 1e9; + if (fgLockedGameHz > 0.0 && secs > 0.0 && secs <= 1.5) { + double deliveredHz = (double) (fgDiagInterp + fgDiagReal) / secs; + double targetEff = Math.max(1, fgEffectiveMultiplier) * fgLockedGameHz; + if (fgDisplayCapHz > 0) targetEff = Math.min(targetEff, (double) fgDisplayCapHz); + // Step down only on a sustained shortfall (>=4 consecutive slow seconds). Floor 2x. + if (deliveredHz > 0.0 && deliveredHz < 0.85 * targetEff && !fgOverlayActive) { + if (++fgBoundSecs >= 4 && fgEffectiveMultiplier > 2) { fgEffectiveMultiplier--; fgBoundSecs = 0; } + } else { + fgBoundSecs = 0; + } + } + Log.i(TAG, String.format(java.util.Locale.US, + "FG diag: content=%.0fHz (locked=%.0f) panel=%.0fHz slots=%d mult=%dx(eff=%dx) | present interp=%d real=%d none=%d | " + + "content-dedup distinct=%d/s dup-dropped=%d/s | id-dedup-dropped=%d accepted=%d", + gameHz, fgLockedGameHz, dispHz, (int) Math.round(ratio), fgMultiplier, fgEffectiveMultiplier, + fgDiagInterp, fgDiagReal, fgDiagNone, dDist, dDup, fgDiagDedupDropped, fgDiagAccepted)); + fgDiagInterp = fgDiagReal = fgDiagNone = fgDiagDedupDropped = fgDiagAccepted = 0; + fgDiagLastNs = now; } + private long fgDiagDedupDropped, fgDiagAccepted, fgDiagPrevDup, fgDiagPrevDist; - // GL-thread wall-time spent recording+submitting one present, bucketed by whether it included the - // real-frame composite (HOLD). With hardware headroom, a present that takes >~8.31ms here is what - // makes the GL thread miss the next vsync (GLSurfaceView coalesces) → the slip. Reports every ~2s. + // GL-thread wall-time per present, bucketed by composite (HOLD) vs interp. Reports every ~2s. private void fgInstrument(long usCpu, boolean wasHold) { double ms = usCpu / 1000.0; if (wasHold) { fgInstHoldN++; fgInstHoldSum += ms; if (ms > fgInstHoldMax) fgInstHoldMax = ms; } @@ -452,80 +467,161 @@ private void fgInstrument(long usCpu, boolean wasHold) { } // Slot-grid placement: with the panel pinned to ~M x gameHz, each game period spans M display - // ticks. Tick k since the frame's arrival presents: - // interpolation: k = 0..M-2 -> interp at phase (k+1)/M; k = M-1 -> the real frame, sharp - // (PRESENT_LAST blit, no resample). - // extrapolation: k = 0 -> the real frame immediately (no hold-back latency); - // k = 1..M-1 -> predict phase k/M past it along the motion field. - // Slot phases are deterministic: the vsync clock and the game-arrival clock are not phase-locked, - // so clock-derived phases inject per-slot bias. A continuous-phase fallback covers non-integer - // panel:game ratios (e.g. 29fps on 120Hz). - // Returns the present kind for instrumentation: 0 none, 1 synthesized, 2 real frame. + // ticks; tick k since arrival presents an interp at phase (k+1)/M, or the real frame at the end. + // A continuous-phase fallback covers non-integer panel:game ratios. + private final long[] fgPromoteInfo = new long[4]; + private long fgPromoteSeen = 0; + private long fgLastPromoteNs = 0, fgPrevPromoteNs = 0; // times of the last two distinct content frames + private long fgContentPeriodNs = 0; // EMA of the interval between distinct content frames + private int fgPromoteSlotIdx = 0; // display ticks since the last promote + private volatile boolean fgResyncPending = false; + private volatile boolean fgOverlayActive = false; + + // Re-anchor the FG content clock + working multiplier after a pause/overlay/focus gap. + private void doFgResync() { + fgLockedGameHz = 0.0; + fgContentPeriodNs = 0L; + fgGamePeriodNs = 0L; + fgGameDriftFrames = 0; + fgPrevPromoteNs = 0L; + fgLastPromoteNs = 0L; + fgEngineFrames = 0; + fgPromoteSlotIdx = 0; + fgEffectiveMultiplier = fgMultiplier; + fgBoundSecs = 0; + fgNewScene.set(true); + } + + /** Mark a UI overlay (drawer) active; clearing it re-anchors the FG clock fresh. Thread-safe. */ + public void fgSetOverlayActive(boolean active) { + if (fgOverlayActive == active) return; + fgOverlayActive = active; + if (!active) fgResyncPending = true; + } + + /** Idempotently resume FG if it is in overlay-pause. Does nothing when not paused. */ + public void fgClearOverlayIfActive() { + if (fgOverlayActive) { fgOverlayActive = false; fgResyncPending = true; } + } + private int fgEmitOne() { + if (fgResyncPending) { fgResyncPending = false; doFgResync(); } boolean newGame = fgNewScene.getAndSet(false); boolean dirty = fgSceneDirty.getAndSet(false); fgEmitWasHold = newGame || dirty; + boolean promoted = false; if (newGame || dirty) { - buildAndSubmitFrame(); // HOLD -> history[curr] (no present) - if (newGame) { fgEngineFrames++; fgSlotIdx = 0; } + buildAndSubmitFrame(); // HOLD: stage incoming; native promotes only distinct content + if (nativeHandle != 0) { + nativeFgPromoteInfo(nativeHandle, fgPromoteInfo); + if (fgPromoteInfo[0] != fgPromoteSeen) { + fgPromoteSeen = fgPromoteInfo[0]; + promoted = true; + long pNs = fgPromoteInfo[1] != 0L ? fgPromoteInfo[1] : System.nanoTime(); + if (fgLastPromoteNs != 0L) { + long d = pNs - fgLastPromoteNs; // interval between distinct frames = content period + if (d > 0L && d < 500_000_000L) { + fgContentPeriodNs = fgContentPeriodNs == 0L ? d + : fgContentPeriodNs + (d - fgContentPeriodNs) / 8L; + double inst = 1.0e9 / (double) fgContentPeriodNs; + if (fgLockedGameHz <= 0.0) { + fgLockedGameHz = inst; + } else if (Math.abs(inst - fgLockedGameHz) > Math.max(2.0, 0.10 * fgLockedGameHz)) { + if (++fgGameDriftFrames >= 24) { fgLockedGameHz = inst; fgGameDriftFrames = 0; } + } else { + fgGameDriftFrames = 0; + } + fgGamePeriodNs = fgContentPeriodNs; + } + } + fgPrevPromoteNs = fgLastPromoteNs; + fgLastPromoteNs = pNs; + fgEngineFrames++; + fgPromoteSlotIdx = 0; + } + } + } + if (!promoted) fgPromoteSlotIdx++; + + // Drawer/menu overlay up: pause FG generation and present only the real frame. + if (fgOverlayActive) { + if (fgEmitWasHold) { nativePresentLast(nativeHandle); return 2; } + return 0; } - if (!newGame) fgSlotIdx++; - long period = fgGamePeriodNs; + + long period = fgContentPeriodNs; boolean canInterp = fgMultiplier > 1 && fgEngineFrames >= 2 && period > 0L - && fgLastGameNs != 0L && fgPrevGameNs != 0L; + && fgLastPromoteNs != 0L && fgPrevPromoteNs != 0L; if (!canInterp) { - // Passthrough (1x) / bootstrap / no measured rate yet: show the latest real frame on each - // change (never post below native); nothing to interpolate. if (newGame || dirty) { nativePresentLast(nativeHandle); return 2; } return 0; } if (dirty && !newGame) { - // Cursor/UI-only recomposite — show it sharply, don't morph it through the stale motion pair. + // Cursor/UI-only recomposite — show it sharply. nativePresentLast(nativeHandle); return 2; } + // Slot grid anchored to distinct content (promotes): each content period spans `slots` display + // ticks; generated frames interpolate the distinct pair at phase (k+1)/slots. long disp = fgDisplayPeriodNs; double ratio = disp > 0L ? (double) period / (double) disp : 0.0; int slots = (int) Math.round(ratio); - boolean gridOk = slots >= 2 && slots <= 8 && Math.abs(ratio - slots) < 0.10 * slots; + boolean gridOk = slots >= 2 && slots <= 16; if (gridOk) { - // Honor the user's multiplier even if the panel pin hasn't (or can't) switch: cap the - // unique synthesized positions; surplus ticks just re-show the real frame. - if (slots > fgMultiplier) slots = fgMultiplier; - int k = fgSlotIdx; - if (k >= slots * 2) return 0; // game stalled — hold the panel, save the GPU + // Even-hold path: when the panel rate is an integer multiple of the FG output, present each + // of the m generated frames for slots/m vsyncs (a clean 1:N hold). + int m = fgEffectiveMultiplier; + if (m >= 2 && slots > m && (slots % m) == 0) { + int spacing = slots / m; // vsyncs each generated frame is held on screen + int k = fgPromoteSlotIdx; + if (k >= slots) return 0; // past the content period — hold + if ((k % spacing) != 0) return 0; // mid-hold: keep the prior generated frame + int s = k / spacing; // which generated sub-frame, 0..m-1 + if (fgExtrapolate) { + double phe = (double) s / (double) m; + if (s == 0 || phe >= 1.0) { nativePresentLast(nativeHandle); return 2; } + nativeRenderInterp(nativeHandle, (float) phe, fgPrevPromoteNs, fgLastPromoteNs); + return 1; + } + double phg = (double) (s + 1) / (double) m; + if (phg >= 1.0) { nativePresentLast(nativeHandle); return 2; } + nativeRenderInterp(nativeHandle, (float) phg, fgPrevPromoteNs, fgLastPromoteNs); + return 1; + } + // Place each tween by the measured content:panel ratio so a variable source rate maps to the + // geometrically correct phase. + double r = ratio; + if (r > fgEffectiveMultiplier) r = fgEffectiveMultiplier; + int k = fgPromoteSlotIdx; + if (k >= slots * 2) return 0; // content stalled — hold if (fgExtrapolate) { - if (k == 0 || k >= slots) { nativePresentLast(nativeHandle); return 2; } - nativeRenderInterp(nativeHandle, (float) k / slots, fgPrevGameNs, fgLastGameNs); + double phe = (double) k / r; + if (k == 0 || phe >= 1.0) { nativePresentLast(nativeHandle); return 2; } + nativeRenderInterp(nativeHandle, (float) phe, fgPrevPromoteNs, fgLastPromoteNs); return 1; } - if (k >= slots - 1) { nativePresentLast(nativeHandle); return 2; } - nativeRenderInterp(nativeHandle, (float) (k + 1) / slots, fgPrevGameNs, fgLastGameNs); + double ph = (double) (k + 1) / r; + if (ph >= 1.0) { nativePresentLast(nativeHandle); return 2; } + nativeRenderInterp(nativeHandle, (float) ph, fgPrevPromoteNs, fgLastPromoteNs); return 1; } - // Continuous-phase fallback: phase = (thisVsync − lastRealArrival) / gamePeriod. + // Continuous-phase fallback (non-integer content:panel ratio): phase since the last promote. long vsync = fgCurrentVsyncNs != 0L ? fgCurrentVsyncNs : System.nanoTime(); - double phase = (double) (vsync - fgLastGameNs) / (double) period; + double phase = (double) (vsync - fgLastPromoteNs) / (double) period; if (fgExtrapolate) { - if (newGame || phase >= 1.0) { - if (phase < 2.0 || newGame) { nativePresentLast(nativeHandle); return 2; } - return 0; - } - nativeRenderInterp(nativeHandle, (float) Math.max(0.001, phase), fgPrevGameNs, fgLastGameNs); + if (phase >= 1.0) { if (phase < 2.0) { nativePresentLast(nativeHandle); return 2; } return 0; } + nativeRenderInterp(nativeHandle, (float) Math.max(0.001, phase), fgPrevPromoteNs, fgLastPromoteNs); return 1; } if (phase < 1.0) { - nativeRenderInterp(nativeHandle, (float) Math.max(0.001, phase), fgPrevGameNs, fgLastGameNs); + nativeRenderInterp(nativeHandle, (float) Math.max(0.001, phase), fgPrevPromoteNs, fgLastPromoteNs); return 1; } else if (phase < 2.0) { - // Caught up to the newest real frame (or it arrived a little late) — show it sharply and fill - // the tick. Covering up to 2 periods past absorbs arrival jitter so a late frame leaves no gap. nativePresentLast(nativeHandle); return 2; } - // else: game stalled >2 frames — let the panel hold the last frame (don't burn GPU on a freeze). return 0; } @@ -533,24 +629,20 @@ private int fgEmitOne() { private double fgTargetHz() { double g = fgLockedGameHz; if (g <= 0.0) return 0.0; - double target = Math.max(1, fgMultiplier) * g; + double target = Math.max(1, fgEffectiveMultiplier) * g; if (fgDisplayCapHz > 0) target = Math.min(target, (double) fgDisplayCapHz); return target; } - // Interpolated frames to insert between this engine frame and the previous one. private int fgComputeInterps() { - int maxInterps = Math.max(1, fgMultiplier) - 1; // 2x->1, 3x->2, 4x->3 + int maxInterps = Math.max(1, fgEffectiveMultiplier) - 1; long disp = fgDisplayPeriodNs, game = fgGamePeriodNs; if (disp <= 0L || game <= 0L) return 0; if (fgActivePresentMode == PRESENT_MODE_FIFO) { - // Vsync-locked: insert what the current refresh affords. Epsilon absorbs EMA jitter so an - // exact integer ratio (e.g. 120/30) doesn't floor to one slot short. Never below native. + // Vsync-locked: insert what the current refresh affords. Epsilon absorbs EMA jitter. int slots = (int) Math.floor((double) game / (double) disp + 1e-3); return Math.max(0, Math.min(maxInterps, slots - 1)); } - // Non-blocking: post at the target rate so an adaptive-refresh panel ramps up to it. Use the - // locked game rate on both sides so the ratio is the stable multiplier, not per-frame jitter. double gameHz = fgLockedGameHz > 0.0 ? fgLockedGameHz : 1.0e9 / (double) game; int interps = (int) Math.round(fgTargetHz() / gameHz) - 1; return Math.max(0, Math.min(maxInterps, interps)); @@ -567,19 +659,15 @@ private int fgComputePerTick() { return Math.max(1, Math.min(n, 8)); } - // Vote the FG post rate on the content surface (lifts the Android 15+ game default-60Hz - // throttle and drives VRR panels), then tell the activity so it mirrors the target into the - // window's preferredDisplayModeId — the window pin outranks surface votes, so it must carry - // the same value or it silently wins with a stale one. 0 clears both when FG turns off. + // Vote the FG post rate on the content surface, then notify the activity so it mirrors the target + // into the window's preferredDisplayModeId. 0 clears both when FG turns off. private void fgApplyFrameRateHint(double targetHz, long nowNs) { if (Build.VERSION.SDK_INT < Build.VERSION_CODES.R) return; float rate = frameGenEnabled && targetHz > 0.0 ? (float) Math.round(targetHz) : 0f; if (rate == fgFrameRateHint) return; if (rate != 0f && fgFrameRateHint > 0f && Math.abs(rate - fgFrameRateHint) <= 5f) return; // EMA jitter if (rate != 0f && nowNs - fgFrameRateHintNs < 500_000_000L) return; - // DEFAULT (exact-or-multiple), not FIXED_SOURCE: FIXED_SOURCE is video semantics — it - // tells SurfaceFlinger pulldown judder is acceptable, which lets the idle/power policy - // drop the panel to 60Hz against a 90/120Hz vote the moment touch boost ends. + // DEFAULT (exact-or-multiple), not FIXED_SOURCE (video pulldown semantics). Surface s = fgSurface; if (s != null && s.isValid()) { try { @@ -636,13 +724,12 @@ public void attachSurface(Surface surface) { } Texture.setRendererHandle(nativeHandle); // Apply the cached present-mode request now that the native renderer exists. - // No-op if the requested mode equals the native default (FIFO). if (requestedPresentMode != PRESENT_MODE_FIFO) { nativeSetPresentMode(nativeHandle, requestedPresentMode); } if (frameGenEnabled) { nativeSetFrameGeneration(nativeHandle, true); - nativeSetPresentMode(nativeHandle, PRESENT_MODE_MAILBOX); // over-post hold + native pacer + nativeSetPresentMode(nativeHandle, PRESENT_MODE_MAILBOX); fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); pushFrameGenParams(); nativeSetFrameGenDeepMode(nativeHandle, fgDeepMode); @@ -662,10 +749,13 @@ public void attachSurface(Surface surface) { } private boolean shouldEnableValidationLayers() { + // Force validation layers off and clear any stale pref (the UI toggle is disabled). Context context = xServerView.getContext(); - return BuildConfig.DEBUG - && PreferenceManager.getDefaultSharedPreferences(context) - .getBoolean(PREF_VULKAN_VALIDATION_LAYERS, false); + android.content.SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(context); + if (prefs.getBoolean(PREF_VULKAN_VALIDATION_LAYERS, false)) { + prefs.edit().putBoolean(PREF_VULKAN_VALIDATION_LAYERS, false).apply(); + } + return false; } public void notifySurfaceChanged(int w, int h) { @@ -945,10 +1035,8 @@ private void buildAndSubmitFrame() { } nativeSetScene(nativeHandle, buf); - // nativeSetFpsLimit is a native no-op (pacing is done elsewhere); not called per frame. if (frameGenEnabled) { - // FG: render the composited scene into the history ring without presenting; the - // interpolated + held-real presents are issued by fgEmitOne() at display cadence. + // FG: render into the history ring without presenting; presents are issued by fgEmitOne(). nativeRenderHold(nativeHandle); } else { nativeRenderFrame(nativeHandle); @@ -1016,22 +1104,10 @@ public void onFramePresented(Window window, WindowManager.FrameSource source, in // DRI3_BUFFER fires at pixmap allocation, not a visible change; the real present already wakes us. Skip it. if (source == WindowManager.FrameSource.DRI3_BUFFER) return; if (frameGenEnabled) { - // This is an actual game-window frame (X11 Present / PutImage / MIT-SHM) — the only - // signal that drives FG's hold+interpolate cadence. Cursor/Controls go through the - // generic requestRenderCoalesced path and deliberately do not get counted here. - - // De-DUPLICATE re-presented frames by SCANOUT-BUFFER IDENTITY. A game that vsyncs 30fps - // content at 60/90Hz re-presents the SAME swapchain pixmap; the FG would otherwise interpolate - // identical pairs into static holds (the [50,50,0,0] period-4 judder + duplicate frames the - // cadence audit + simulation both showed). The PRESENT extension just set the window's scanout - // source to this present's pixmap.drawable, so a present that re-points at the SAME object as - // the last ACCEPTED frame is a duplicate buffer → drop it. We compare object identity, not - // pixels (the pixmap is GPU-side, not CPU-readable here — that's what broke the hash version). - // • Trust identity ONLY once we've seen ≥2 distinct buffers (a real swapchain). A single- - // buffered game reuses one pixmap for fresh content, so identity would false-match — there - // we DON'T dedup (front-loaded but never frozen) instead of starving the cadence. - // • FREEZE BACKSTOP: never drop for >100ms, so a genuine hold/stall always gets through and - // the output can never lock up (the failure mode of the reverted CPU-hash attempt). + // An actual game-window frame — the signal that drives FG's hold+interpolate cadence. + // De-duplicate re-presented frames by scanout-buffer identity: a present that re-points at + // the same object as the last accepted frame is a duplicate buffer to drop. Only trust + // identity once >=2 distinct buffers have been seen; never drop for >100ms (freeze backstop). Drawable scanoutNow = (window != null && window.getContent() != null) ? window.getContent().getScanoutSource() : null; if (scanoutNow != null) { @@ -1041,28 +1117,13 @@ public void onFramePresented(Window window, WindowManager.FrameSource source, in long now = System.nanoTime(); if (fgMultiBuffer && scanoutNow != null && scanoutNow == fgLastScanoutSrc && (now - fgLastAcceptNs) < FG_DEDUP_FREEZE_NS) { - return; // duplicate buffer — ignore for the FG cadence (keep the real-frame clock) + fgDiagDedupDropped++; + return; // duplicate buffer — ignore for the FG cadence } + fgDiagAccepted++; fgLastScanoutSrc = scanoutNow; fgLastAcceptNs = now; - - if (fgLastGameNs != 0L) { - long d = now - fgLastGameNs; - if (d > 0L && d < 500_000_000L) { - fgGamePeriodNs = fgGamePeriodNs == 0L ? d : fgGamePeriodNs + (d - fgGamePeriodNs) / 8L; - // Hold the game-rate lock steady; re-lock only after a sustained (~24-frame) deviation - // beyond 10% (or 2Hz), so a steady game yields a steady target instead of a jittery one. - double inst = 1.0e9 / (double) fgGamePeriodNs; - if (fgLockedGameHz <= 0.0) { - fgLockedGameHz = inst; - } else if (Math.abs(inst - fgLockedGameHz) > Math.max(2.0, 0.10 * fgLockedGameHz)) { - if (++fgGameDriftFrames >= 24) { fgLockedGameHz = inst; fgGameDriftFrames = 0; } - } else { - fgGameDriftFrames = 0; - } - } - } - fgPrevGameNs = fgLastGameNs; + // Trigger a HOLD; native stages + content-de-duplicates it. Cadence is driven by promotes. fgLastGameNs = now; fgNewScene.set(true); scheduleFgPump(); @@ -1324,13 +1385,15 @@ private static native long nativeCreate(boolean enableValidationLayers, private static native long nativeGetDisplayFrameCount(long handle); private static native boolean nativeRenderHold(long handle); private static native boolean nativeRenderInterp(long handle, float phase, long prevNs, long currNs); + private static native void nativeFgPromoteInfo(long handle, long[] out); private static native boolean nativePresentLast(long handle); private static native void nativeSetFrameGenParams(long handle, float occLo, float occHi, int minStep); + private static native void nativeSetFrameGenFlowScale(long handle, float flowScale); private static native void nativeSetFrameGenDeepMode(long handle, boolean deep); private static native void nativeSetFrameGenExtrapolate(long handle, boolean extrapolate); private static native void nativeSetFrameGenFramesInFlight(long handle, int framesInFlight); private static native int nativeGetActivePresentMode(long handle); private static native void nativeSetVsyncTiming(long handle, long periodNs, long displayPeriodNs, long vsyncNs); - private static native void nativeFgPumpStart(Object renderer); // native AChoreographer pump -> fgPumpTickFromNative + private static native void nativeFgPumpStart(Object renderer); private static native void nativeFgPumpStop(); } From f755596de2babd0b238d9d2299c37e547a02c465 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Thu, 18 Jun 2026 15:33:38 -0400 Subject: [PATCH 12/46] Frame generation P0: free content-rate present pacing Pace each presented frame at its true temporal position instead of quantizing to vblanks: - fg_compute_deadline no longer snaps the present target to the panel vsync grid; the target is the free, evenly-spaced instant on the measured content-rate grid (vsync-snapping was the 3:2-pulldown-style judder source for non-integer/variable content:panel ratios). - The worker requests that instant from the display via VkPresentTimeGOOGLE.desiredPresentTime (was hardcoded 0), so a panel that honours display-timing latches each frame on the correct vblank. - FG presents under a non-blocking mode on the toggle path (MAILBOX, IMMEDIATE fallback) to match the attach path, so the deadline nanosleep drives the present instant instead of FIFO vsync-blocking. - One present per pump tick (fgComputePerTick=1): the pump fires once per vblank, so emitting more posted multiple frames into a single vblank, which showed as an uneven ">panel" present rate. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 25 +++++++++---------- app/src/main/cpp/winlator/vk/vk_state.h | 4 +-- .../display/renderer/VulkanRenderer.java | 19 +++++++------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 5a366841d..1b02fbc16 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -2686,25 +2686,21 @@ static uint64_t g_fg_dropped = 0; // desiredPresentTime (CLOCK_MONOTONIC ns) for the next FG present; 0 = no constraint (real frame never delayed). #define FG_PRESENT_LEAD_NS 150000ull // wake this much before the deadline so the present latches this vblank -// Advance the present deadline by one target period, then snap it to the panel vsync grid. +// Advance the present deadline by one target period. The target is the free, evenly-spaced present +// instant on the measured CONTENT-rate grid — NOT snapped to the panel vsync grid. Snapping quantized +// every present to a vblank, which for non-integer / variable content:panel ratios produced uneven +// spacing (3:2-pulldown-style judder); pacing at the true temporal position removes that. static uint64_t fg_compute_deadline(VkRenderer* r) { uint64_t period = r->fg_present_period_ns ? r->fg_present_period_ns : r->refresh_duration_ns; if (period == 0) { r->fg_present_deadline_ns = 0; r->fg_present_target_ns = 0; return 0; } uint64_t now = now_monotonic_ns(); uint64_t deadline = r->fg_present_deadline_ns + period; + // Re-anchor on startup / stalls / rate changes so the evenly-spaced grid never drifts into the past + // or too far ahead of the real content clock. if (deadline < now || deadline > now + 4u * period) deadline = now + period; r->fg_present_deadline_ns = deadline; - - uint64_t target = deadline; - uint64_t vs = r->fg_display_period_ns ? r->fg_display_period_ns : r->refresh_duration_ns; - uint64_t anchor = r->fg_vsync_anchor_ns; - // Snap only while the panel carries the target rate (vsync period <= present period). - if (vs != 0 && anchor != 0 && deadline > anchor && vs <= period + period / 8u) { - target = anchor + ((deadline - anchor + vs / 2u) / vs) * vs; - if (target <= r->fg_present_target_ns) target = r->fg_present_target_ns + vs; // one present per vblank - } - r->fg_present_target_ns = target; - return target; + r->fg_present_target_ns = deadline; + return deadline; } static void fg_sleep_to_deadline(VkRenderer* r) { @@ -3211,7 +3207,10 @@ static void fg_worker_present(VkRenderer* r, const FgJob* job) { pinfo.swapchainCount = 1; pinfo.pSwapchains = &swapchain; pinfo.pImageIndices = &image_index; VkPresentTimeGOOGLE ptg; VkPresentTimesInfoGOOGLE pti; if (r->ext_display_timing) { - ptg.presentID = ++r->fg_present_id; ptg.desiredPresentTime = 0; + // Ask the display engine to latch this frame at its computed present instant (CLOCK_MONOTONIC ns). + // The CPU nanosleep above already woke us near the deadline; this lets a panel that honours + // display-timing place the frame on the correct vblank instead of "next vblank, whenever". + ptg.presentID = ++r->fg_present_id; ptg.desiredPresentTime = job->deadline_ns; pti.sType = VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE; pti.pNext = NULL; pti.swapchainCount = 1; pti.pTimes = &ptg; pinfo.pNext = &pti; diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index 312d7458a..80b4d2a81 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -30,7 +30,7 @@ typedef struct FgJob { float phase; uint32_t curr_idx; // history slots, snapshotted at enqueue uint32_t prev_idx; - uint64_t deadline_ns; // vsync-snapped present target (worker paces to this) + uint64_t deadline_ns; // free content-rate present target, ns CLOCK_MONOTONIC (worker paces to this) uint32_t seq; // fg_promote_seq snapshot — worker drops the job if the slot was reused } FgJob; #define VK_MAX_EFFECTS 8 @@ -486,7 +486,7 @@ typedef struct VkRenderer { uint64_t refresh_duration_ns; // panel vsync period from the swapchain (fallback) uint64_t fg_present_period_ns; // target inter-present interval (ns) fed from Java uint64_t fg_present_deadline_ns; // unsnapped deadline accumulator (target-rate grid) - uint64_t fg_present_target_ns; // vsync-snapped sleep target for the next present + uint64_t fg_present_target_ns; // free content-rate sleep/present target for the next present uint64_t fg_display_period_ns; // live panel vsync period fed from Java (Choreographer EMA) uint64_t fg_vsync_anchor_ns; // latest Choreographer vsync timestamp (CLOCK_MONOTONIC) uint64_t fg_prev_arrival_ns; // real-frame arrival times, for time-based interp phase diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 3c3c604e9..dd4caf312 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -234,7 +234,10 @@ public void setFrameGeneration(boolean enabled) { synchronized (this) { if (nativeHandle != 0) { nativeSetFrameGeneration(nativeHandle, enabled); - nativeSetPresentMode(nativeHandle, enabled ? PRESENT_MODE_FIFO : requestedPresentMode); + // FG presents under a non-blocking mode so the worker's deadline pacer drives the present + // instant. FIFO would block the present on vblank and make the panel vsync the pacer, + // re-introducing the quantization judder the deadline model exists to avoid. + nativeSetPresentMode(nativeHandle, enabled ? PRESENT_MODE_MAILBOX : requestedPresentMode); fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); } } @@ -648,15 +651,13 @@ private int fgComputeInterps() { return Math.max(0, Math.min(maxInterps, interps)); } - // Presents per pump tick: enough to sustain the target rate from the current refresh (1 under FIFO). + // Presents per pump tick. The pump fires once per vblank (AChoreographer), so exactly one present per + // tick = one present per vblank = the panel-rate ceiling. Emitting more than one per tick would post + // multiple frames into a single vblank (MAILBOX keeps only the last), wasting work and inflating the + // present count into an uneven ">panel" rate. Sub-panel output rates are handled by the per-vblank + // phase logic (hold vs interp), so one-per-tick is always correct. private int fgComputePerTick() { - if (fgActivePresentMode == PRESENT_MODE_FIFO) return 1; - long disp = fgDisplayPeriodNs; - double target = fgTargetHz(); - if (disp <= 0L || target <= 0.0) return 1; - double panelHz = 1.0e9 / (double) disp; - int n = (int) Math.round(target / Math.max(1.0, panelHz)); - return Math.max(1, Math.min(n, 8)); + return 1; } // Vote the FG post rate on the content surface, then notify the activity so it mirrors the target From f31a1716280cfe0c75a540516b62184bcdb435ef Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Thu, 18 Jun 2026 16:11:02 -0400 Subject: [PATCH 13/46] Frame generation: closed-loop content-anchored cadence Replace the open-loop slot-grid/tick-counter cadence with a time-based rule and lock pacing to the source clock: - fgEmitOne derives the output sub-frame from elapsed time since the last real frame (frac*M), not a tick counter, so irregular promote arrival can't misalign placement. One present per vblank; sub 0 = real frame, sub 1..M-1 = tweens at phase sub/M. - Divisor-snap the cadence multiplier to the largest divisor of the panel:content ratio so output divides the panel evenly (e.g. 3x of 30Hz = 90Hz holds frames 1,1,2 vblanks on a 120Hz panel = judder; snaps to 2x). fgTargetHz + the adaptive ratchet use the snapped multiplier; diag reports it as cad=. - Stabilise the rate lock with a light EMA toward the measured content rate instead of the drift-relock threshold that left the lock stale. - Native fg_compute_deadline anchors each present to curr_arrival + phase*(curr-prev) (the source clock) instead of a per-enqueue period accumulator that drifted when holds emit nothing; nativePresentLast now carries phase + arrivals. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 43 ++++--- .../display/renderer/VulkanRenderer.java | 120 +++++++----------- 2 files changed, 75 insertions(+), 88 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 1b02fbc16..8bdf059ec 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -2686,18 +2686,27 @@ static uint64_t g_fg_dropped = 0; // desiredPresentTime (CLOCK_MONOTONIC ns) for the next FG present; 0 = no constraint (real frame never delayed). #define FG_PRESENT_LEAD_NS 150000ull // wake this much before the deadline so the present latches this vblank -// Advance the present deadline by one target period. The target is the free, evenly-spaced present -// instant on the measured CONTENT-rate grid — NOT snapped to the panel vsync grid. Snapping quantized -// every present to a vblank, which for non-integer / variable content:panel ratios produced uneven -// spacing (3:2-pulldown-style judder); pacing at the true temporal position removes that. -static uint64_t fg_compute_deadline(VkRenderer* r) { - uint64_t period = r->fg_present_period_ns ? r->fg_present_period_ns : r->refresh_duration_ns; - if (period == 0) { r->fg_present_deadline_ns = 0; r->fg_present_target_ns = 0; return 0; } +// Present instant for one output frame, anchored to the real-frame ARRIVAL clock: a frame at content-phase +// `phase` of the [prev,curr] interval is shown at curr_arrival + phase*(curr-prev). This locks the present +// cadence to the source's own timeline, so vblanks that emit nothing (holds) cannot drift the grid the way +// a per-enqueue period accumulator did — that drift was the residual judder after vsync-snap removal. Before +// the two arrivals are known it falls back to an evenly-spaced period grid. NOT snapped to the panel vsync. +static uint64_t fg_compute_deadline(VkRenderer* r, float phase) { uint64_t now = now_monotonic_ns(); - uint64_t deadline = r->fg_present_deadline_ns + period; - // Re-anchor on startup / stalls / rate changes so the evenly-spaced grid never drifts into the past - // or too far ahead of the real content clock. - if (deadline < now || deadline > now + 4u * period) deadline = now + period; + uint64_t ca = r->fg_curr_arrival_ns, pa = r->fg_prev_arrival_ns; + uint64_t period = r->fg_present_period_ns ? r->fg_present_period_ns : r->refresh_duration_ns; + uint64_t deadline; + if (ca != 0 && pa != 0 && ca > pa) { + uint64_t cp = ca - pa; // content period from the two real arrivals + if (phase < 0.0f) phase = 0.0f; + deadline = ca + (uint64_t)((double)phase * (double)cp); + } else if (period != 0) { + deadline = r->fg_present_deadline_ns + period; // pre-lock fallback: even period grid + } else { + r->fg_present_deadline_ns = 0; r->fg_present_target_ns = 0; return 0; + } + if (deadline < now) deadline = now; // never schedule in the past + if (period != 0 && deadline > now + 4u * period) deadline = now + period; r->fg_present_deadline_ns = deadline; r->fg_present_target_ns = deadline; return deadline; @@ -2884,8 +2893,8 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { uint32_t prev_idx = (parity + 2u) % 3u; VkFgImage* curr = &r->fg_history[curr_idx]; - // Advance the vsync-aligned present deadline for the pacer (fg_sleep_to_deadline). - fg_compute_deadline(r); + // Advance the present deadline for the pacer (fg_sleep_to_deadline). + fg_compute_deadline(r, phase); if (do_interp) { if (r->fg_curr_arrival_ns != r->fg_dbg_last_curr) { r->fg_dbg_done_n = r->fg_dbg_n; @@ -3259,7 +3268,7 @@ static void fg_enqueue(VkRenderer* r, uint8_t mode, float phase) { job.curr_idx = curr; job.prev_idx = (curr + 2u) % 3u; job.seq = r->fg_promote_seq; - fg_compute_deadline(r); + fg_compute_deadline(r, phase); job.deadline_ns = r->fg_present_target_ns ? r->fg_present_target_ns : r->fg_present_deadline_ns; uint32_t tail = r->fg_job_tail; uint32_t next = (tail + 1u) % FG_JOB_RING; @@ -3748,11 +3757,13 @@ JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderInterp)(JNIEnv* env, jclass clazz, return JNI_TRUE; } -JNIEXPORT jboolean JNICALL JNI_FN(nativePresentLast)(JNIEnv* env, jclass clazz, jlong handle) { +JNIEXPORT jboolean JNICALL JNI_FN(nativePresentLast)(JNIEnv* env, jclass clazz, jlong handle, jfloat phase, jlong prevNs, jlong currNs) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r || !r->surface_ready) return JNI_FALSE; - fg_enqueue(r, FG_MODE_PRESENT_LAST, 0.5f); + r->fg_prev_arrival_ns = prevNs > 0 ? (uint64_t)prevNs : 0; + r->fg_curr_arrival_ns = currNs > 0 ? (uint64_t)currNs : 0; + fg_enqueue(r, FG_MODE_PRESENT_LAST, phase); return JNI_TRUE; } diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index dd4caf312..8086c0f27 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -433,7 +433,7 @@ private void fgCadenceDiag() { double secs = (double) (now - fgDiagLastNs) / 1e9; if (fgLockedGameHz > 0.0 && secs > 0.0 && secs <= 1.5) { double deliveredHz = (double) (fgDiagInterp + fgDiagReal) / secs; - double targetEff = Math.max(1, fgEffectiveMultiplier) * fgLockedGameHz; + double targetEff = Math.max(1, fgCadenceM) * fgLockedGameHz; if (fgDisplayCapHz > 0) targetEff = Math.min(targetEff, (double) fgDisplayCapHz); // Step down only on a sustained shortfall (>=4 consecutive slow seconds). Floor 2x. if (deliveredHz > 0.0 && deliveredHz < 0.85 * targetEff && !fgOverlayActive) { @@ -443,9 +443,9 @@ private void fgCadenceDiag() { } } Log.i(TAG, String.format(java.util.Locale.US, - "FG diag: content=%.0fHz (locked=%.0f) panel=%.0fHz slots=%d mult=%dx(eff=%dx) | present interp=%d real=%d none=%d | " + "FG diag: content=%.0fHz (locked=%.0f) panel=%.0fHz slots=%d mult=%dx(eff=%dx cad=%dx) | present interp=%d real=%d none=%d | " + "content-dedup distinct=%d/s dup-dropped=%d/s | id-dedup-dropped=%d accepted=%d", - gameHz, fgLockedGameHz, dispHz, (int) Math.round(ratio), fgMultiplier, fgEffectiveMultiplier, + gameHz, fgLockedGameHz, dispHz, (int) Math.round(ratio), fgMultiplier, fgEffectiveMultiplier, fgCadenceM, fgDiagInterp, fgDiagReal, fgDiagNone, dDist, dDup, fgDiagDedupDropped, fgDiagAccepted)); fgDiagInterp = fgDiagReal = fgDiagNone = fgDiagDedupDropped = fgDiagAccepted = 0; fgDiagLastNs = now; @@ -477,6 +477,8 @@ private void fgInstrument(long usCpu, boolean wasHold) { private long fgLastPromoteNs = 0, fgPrevPromoteNs = 0; // times of the last two distinct content frames private long fgContentPeriodNs = 0; // EMA of the interval between distinct content frames private int fgPromoteSlotIdx = 0; // display ticks since the last promote + private int fgLastEmitSub = -1; // output sub-frame index last presented this content interval + private volatile int fgCadenceM = 2; // divisor-snapped multiplier actually used by the cadence private volatile boolean fgResyncPending = false; private volatile boolean fgOverlayActive = false; @@ -490,6 +492,7 @@ private void doFgResync() { fgLastPromoteNs = 0L; fgEngineFrames = 0; fgPromoteSlotIdx = 0; + fgLastEmitSub = -1; fgEffectiveMultiplier = fgMultiplier; fgBoundSecs = 0; fgNewScene.set(true); @@ -527,13 +530,13 @@ private int fgEmitOne() { fgContentPeriodNs = fgContentPeriodNs == 0L ? d : fgContentPeriodNs + (d - fgContentPeriodNs) / 8L; double inst = 1.0e9 / (double) fgContentPeriodNs; - if (fgLockedGameHz <= 0.0) { - fgLockedGameHz = inst; - } else if (Math.abs(inst - fgLockedGameHz) > Math.max(2.0, 0.10 * fgLockedGameHz)) { - if (++fgGameDriftFrames >= 24) { fgLockedGameHz = inst; fgGameDriftFrames = 0; } - } else { - fgGameDriftFrames = 0; - } + // Track the (already EMA-smoothed) instantaneous content rate with a light + // quarter-step EMA: converges in a few frames and rejects single-frame outliers, + // without the old drift-relock threshold that could leave the lock stale (e.g. + // locked=27 while the game truly ran 30), which skewed the present spacing. + fgLockedGameHz = fgLockedGameHz <= 0.0 ? inst + : fgLockedGameHz + (inst - fgLockedGameHz) * 0.25; + fgGameDriftFrames = 0; fgGamePeriodNs = fgContentPeriodNs; } } @@ -541,6 +544,7 @@ private int fgEmitOne() { fgLastPromoteNs = pNs; fgEngineFrames++; fgPromoteSlotIdx = 0; + fgLastEmitSub = -1; // new content interval — allow its sub 0 (real frame) to present } } } @@ -548,7 +552,7 @@ private int fgEmitOne() { // Drawer/menu overlay up: pause FG generation and present only the real frame. if (fgOverlayActive) { - if (fgEmitWasHold) { nativePresentLast(nativeHandle); return 2; } + if (fgEmitWasHold) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } return 0; } @@ -556,83 +560,55 @@ private int fgEmitOne() { boolean canInterp = fgMultiplier > 1 && fgEngineFrames >= 2 && period > 0L && fgLastPromoteNs != 0L && fgPrevPromoteNs != 0L; if (!canInterp) { - if (newGame || dirty) { nativePresentLast(nativeHandle); return 2; } + if (newGame || dirty) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } return 0; } if (dirty && !newGame) { // Cursor/UI-only recomposite — show it sharply. - nativePresentLast(nativeHandle); + nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } - // Slot grid anchored to distinct content (promotes): each content period spans `slots` display - // ticks; generated frames interpolate the distinct pair at phase (k+1)/slots. + // Closed-loop placement: derive the output sub-frame purely from elapsed time since the last real + // frame (not a tick counter), so irregular promote arrival can't misalign the cadence. Each content + // interval is split into M sub-frames: sub 0 = the real frame (shown sharp), sub 1..M-1 = tweens at + // phase sub/M. One present per vblank; a sub-frame is shown once then held until the next sub/promote. + // Native applies interpolate vs extrapolate per the mode flag, so the placement rule is shared. + if (period <= 0L) return 0; + // Effective multiplier for the cadence, snapped DOWN to the largest divisor of the panel:content + // ratio (slots) so the output rate divides the panel evenly — each output frame is then held a whole + // number of vblanks. On 120Hz/30Hz (slots=4), 4x and 2x stay, but 3x (=90Hz, frames held 1,1,2 + // vblanks = judder) snaps to 2x. Keeps every multiplier visually smooth on a fixed-refresh panel. + int M = Math.max(2, fgEffectiveMultiplier); long disp = fgDisplayPeriodNs; - double ratio = disp > 0L ? (double) period / (double) disp : 0.0; - int slots = (int) Math.round(ratio); - boolean gridOk = slots >= 2 && slots <= 16; - if (gridOk) { - // Even-hold path: when the panel rate is an integer multiple of the FG output, present each - // of the m generated frames for slots/m vsyncs (a clean 1:N hold). - int m = fgEffectiveMultiplier; - if (m >= 2 && slots > m && (slots % m) == 0) { - int spacing = slots / m; // vsyncs each generated frame is held on screen - int k = fgPromoteSlotIdx; - if (k >= slots) return 0; // past the content period — hold - if ((k % spacing) != 0) return 0; // mid-hold: keep the prior generated frame - int s = k / spacing; // which generated sub-frame, 0..m-1 - if (fgExtrapolate) { - double phe = (double) s / (double) m; - if (s == 0 || phe >= 1.0) { nativePresentLast(nativeHandle); return 2; } - nativeRenderInterp(nativeHandle, (float) phe, fgPrevPromoteNs, fgLastPromoteNs); - return 1; - } - double phg = (double) (s + 1) / (double) m; - if (phg >= 1.0) { nativePresentLast(nativeHandle); return 2; } - nativeRenderInterp(nativeHandle, (float) phg, fgPrevPromoteNs, fgLastPromoteNs); - return 1; + if (disp > 0L) { + int slots = (int) Math.round((double) period / (double) disp); + if (slots >= 2) { + int best = 1; + for (int d = 2; d <= M && d <= slots; d++) if (slots % d == 0) best = d; + if (best >= 2) M = best; } - // Place each tween by the measured content:panel ratio so a variable source rate maps to the - // geometrically correct phase. - double r = ratio; - if (r > fgEffectiveMultiplier) r = fgEffectiveMultiplier; - int k = fgPromoteSlotIdx; - if (k >= slots * 2) return 0; // content stalled — hold - if (fgExtrapolate) { - double phe = (double) k / r; - if (k == 0 || phe >= 1.0) { nativePresentLast(nativeHandle); return 2; } - nativeRenderInterp(nativeHandle, (float) phe, fgPrevPromoteNs, fgLastPromoteNs); - return 1; - } - double ph = (double) (k + 1) / r; - if (ph >= 1.0) { nativePresentLast(nativeHandle); return 2; } - nativeRenderInterp(nativeHandle, (float) ph, fgPrevPromoteNs, fgLastPromoteNs); - return 1; } - - // Continuous-phase fallback (non-integer content:panel ratio): phase since the last promote. + fgCadenceM = M; + // Closed-loop phase from the stable EMA content period (robust to per-frame source jitter, which + // otherwise skewed the sub-frame timing and dropped interps on variable-rate games). long vsync = fgCurrentVsyncNs != 0L ? fgCurrentVsyncNs : System.nanoTime(); - double phase = (double) (vsync - fgLastPromoteNs) / (double) period; - if (fgExtrapolate) { - if (phase >= 1.0) { if (phase < 2.0) { nativePresentLast(nativeHandle); return 2; } return 0; } - nativeRenderInterp(nativeHandle, (float) Math.max(0.001, phase), fgPrevPromoteNs, fgLastPromoteNs); - return 1; - } - if (phase < 1.0) { - nativeRenderInterp(nativeHandle, (float) Math.max(0.001, phase), fgPrevPromoteNs, fgLastPromoteNs); - return 1; - } else if (phase < 2.0) { - nativePresentLast(nativeHandle); - return 2; - } - return 0; + double frac = (double) (vsync - fgLastPromoteNs) / (double) period; + if (frac < 0.0) frac = 0.0; + int sub = (int) Math.floor(frac * (double) M); + if (sub >= M) return 0; // interval overran — hold until the next promote + if (sub == fgLastEmitSub) return 0; // already presented this sub-frame — hold it on screen + fgLastEmitSub = sub; + if (sub <= 0) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } // sub 0 = real frame, sharp + nativeRenderInterp(nativeHandle, (float) sub / (float) M, fgPrevPromoteNs, fgLastPromoteNs); + return 1; } // Target FG post rate (Hz): multiplier × locked game rate, capped to the panel max. 0 if not measured. private double fgTargetHz() { double g = fgLockedGameHz; if (g <= 0.0) return 0.0; - double target = Math.max(1, fgEffectiveMultiplier) * g; + double target = Math.max(1, fgCadenceM) * g; if (fgDisplayCapHz > 0) target = Math.min(target, (double) fgDisplayCapHz); return target; } @@ -1407,7 +1383,7 @@ private static native long nativeCreate(boolean enableValidationLayers, private static native boolean nativeRenderHold(long handle); private static native boolean nativeRenderInterp(long handle, float phase, long prevNs, long currNs); private static native void nativeFgPromoteInfo(long handle, long[] out); - private static native boolean nativePresentLast(long handle); + private static native boolean nativePresentLast(long handle, float phase, long prevNs, long currNs); private static native void nativeSetFrameGenParams(long handle, float occLo, float occHi, int minStep); private static native void nativeSetFrameGenFlowScale(long handle, float flowScale); private static native void nativeSetFrameGenDeepMode(long handle, boolean deep); From 253b38772a9294e3b1c2a6ab3a492e0500221e5f Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Thu, 18 Jun 2026 16:23:17 -0400 Subject: [PATCH 14/46] Frame generation: steady output gate (fix tween under-production) The floor(frac*M) sub-frame rule skipped tweens when the source rate jittered against the EMA period, so the output ran far below target (interp ~16/s vs 30 expected at 2x, ~80 missing at 4x) and the adaptive ratchet misread that as a GPU limit and dropped the multiplier (4x ran as 2x) even though generation is cheap (over-budget ~7/240, interp ~0.07ms). Replace it with a deterministic steady gate: present a new frame every hold=slots/M vblanks and sample the tween phase continuously from the content clock, so a fresh frame is produced on every gate vblank. 4x now holds (eff=4x cad=4x) and the output tracks the target (~110fps from a 30fps source) instead of collapsing to ~48fps. Real frame shown sharp on promote; gate restarts there. --- .../display/renderer/VulkanRenderer.java | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 8086c0f27..722a625dc 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -477,7 +477,7 @@ private void fgInstrument(long usCpu, boolean wasHold) { private long fgLastPromoteNs = 0, fgPrevPromoteNs = 0; // times of the last two distinct content frames private long fgContentPeriodNs = 0; // EMA of the interval between distinct content frames private int fgPromoteSlotIdx = 0; // display ticks since the last promote - private int fgLastEmitSub = -1; // output sub-frame index last presented this content interval + private int fgVblankSincePromote = 0; // vblanks since the last real frame — drives the steady output gate private volatile int fgCadenceM = 2; // divisor-snapped multiplier actually used by the cadence private volatile boolean fgResyncPending = false; private volatile boolean fgOverlayActive = false; @@ -492,7 +492,7 @@ private void doFgResync() { fgLastPromoteNs = 0L; fgEngineFrames = 0; fgPromoteSlotIdx = 0; - fgLastEmitSub = -1; + fgVblankSincePromote = 0; fgEffectiveMultiplier = fgMultiplier; fgBoundSecs = 0; fgNewScene.set(true); @@ -544,7 +544,7 @@ private int fgEmitOne() { fgLastPromoteNs = pNs; fgEngineFrames++; fgPromoteSlotIdx = 0; - fgLastEmitSub = -1; // new content interval — allow its sub 0 (real frame) to present + fgVblankSincePromote = 0; // new content interval — restart the output gate at the real frame } } } @@ -569,38 +569,40 @@ private int fgEmitOne() { return 2; } - // Closed-loop placement: derive the output sub-frame purely from elapsed time since the last real - // frame (not a tick counter), so irregular promote arrival can't misalign the cadence. Each content - // interval is split into M sub-frames: sub 0 = the real frame (shown sharp), sub 1..M-1 = tweens at - // phase sub/M. One present per vblank; a sub-frame is shown once then held until the next sub/promote. - // Native applies interpolate vs extrapolate per the mode flag, so the placement rule is shared. + // Real frame just promoted: show it sharp; the gate was restarted at 0 in the promote block. + if (promoted) { + nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); + return 2; + } if (period <= 0L) return 0; - // Effective multiplier for the cadence, snapped DOWN to the largest divisor of the panel:content - // ratio (slots) so the output rate divides the panel evenly — each output frame is then held a whole - // number of vblanks. On 120Hz/30Hz (slots=4), 4x and 2x stay, but 3x (=90Hz, frames held 1,1,2 - // vblanks = judder) snaps to 2x. Keeps every multiplier visually smooth on a fixed-refresh panel. + // Cadence multiplier, snapped DOWN to the largest divisor of the panel:content ratio (slots) so the + // output divides the panel evenly (each output frame held a whole number of vblanks). On 120Hz/30Hz + // (slots=4): 4x and 2x stay; 3x (=90Hz, held 1,1,2 vblanks = judder) snaps to 2x. int M = Math.max(2, fgEffectiveMultiplier); long disp = fgDisplayPeriodNs; + int slots = M; if (disp > 0L) { - int slots = (int) Math.round((double) period / (double) disp); - if (slots >= 2) { + int s = (int) Math.round((double) period / (double) disp); + if (s >= 2) { + slots = s; int best = 1; for (int d = 2; d <= M && d <= slots; d++) if (slots % d == 0) best = d; if (best >= 2) M = best; } } fgCadenceM = M; - // Closed-loop phase from the stable EMA content period (robust to per-frame source jitter, which - // otherwise skewed the sub-frame timing and dropped interps on variable-rate games). + // Steady output gate: emit a NEW frame every `hold` vblanks (hold = slots/M, integer since M divides + // slots), sampling the tween phase CONTINUOUSLY from the content clock. A fresh frame is produced on + // every gate vblank regardless of where the EMA boundary falls — this stops the tween under-production + // (skipped interps) that was dragging the output rate down and making it jittery. + int hold = Math.max(1, slots / M); + fgVblankSincePromote++; + if ((fgVblankSincePromote % hold) != 0) return 0; // between gates — hold the current frame long vsync = fgCurrentVsyncNs != 0L ? fgCurrentVsyncNs : System.nanoTime(); - double frac = (double) (vsync - fgLastPromoteNs) / (double) period; - if (frac < 0.0) frac = 0.0; - int sub = (int) Math.floor(frac * (double) M); - if (sub >= M) return 0; // interval overran — hold until the next promote - if (sub == fgLastEmitSub) return 0; // already presented this sub-frame — hold it on screen - fgLastEmitSub = sub; - if (sub <= 0) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } // sub 0 = real frame, sharp - nativeRenderInterp(nativeHandle, (float) sub / (float) M, fgPrevPromoteNs, fgLastPromoteNs); + double phase = (double) (vsync - fgLastPromoteNs) / (double) period; + if (phase >= 1.0) return 0; // interval overran — hold until next promote + if (phase <= 0.0) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } + nativeRenderInterp(nativeHandle, (float) phase, fgPrevPromoteNs, fgLastPromoteNs); return 1; } From fd65754ae318584f5409345f4ab1305d1ec45e87 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Thu, 18 Jun 2026 16:30:27 -0400 Subject: [PATCH 15/46] Frame generation: loosen content-dedup + Max preset uses deep flow - fg_sig_delta noise floor lowered 4->2 so subtly-moving frames (3-4/channel change) count as distinct instead of being held as duplicates. Exact re-presents still score 0 and drop, so the rate measurement is unaffected. - Wire the Max preset (index 5) to enable deep (bidirectional) flow in both the startup load and the in-game preset handler; it was hardcoded off for all presets. Other presets stay single-flow. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 2 +- app/src/main/runtime/display/XServerDisplayActivity.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 8bdf059ec..422fa11b7 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -2539,7 +2539,7 @@ static double fg_sig_delta(VkRenderer* r, uint32_t a, uint32_t b) { int dg = abs((int)pa[i*4+1] - (int)pb[i*4+1]); int db = abs((int)pa[i*4+2] - (int)pb[i*4+2]); int m = dr > dg ? dr : dg; if (db > m) m = db; - if (m > 4) changed++; // noise floor: only light dithering is <=4/channel; real motion exceeds it + if (m > 2) changed++; // noise floor: reject only the lightest dither/noise; subtle motion (3-4/ch) now counts as distinct so it isn't held } return (double)changed; // 0 == identical re-present; >0 == distinct content frame } diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index e974b6547..95429c7bc 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -4294,7 +4294,7 @@ public void onFrameGenerationPresetSelected(int preset) { frameGenerationQuality = presetQuality[idx]; frameGenerationModel = presetModel[idx]; frameGenerationExtrapolate = false; - frameGenerationDeepMode = false; + frameGenerationDeepMode = (idx == 5); // Max engages the bidirectional (deep) flow preferences.edit() .putInt(fgKey("frame_generation_preset"), idx) .putInt(fgKey("frame_generation_quality"), frameGenerationQuality) @@ -6657,7 +6657,7 @@ private void setupUI() { frameGenerationFramesInFlight = fgPrefInt("frame_generation_fif", 3); frameGenerationPreset = fgPrefInt("frame_generation_preset", 2); frameGenerationModel = (frameGenerationPreset == 4 || frameGenerationPreset == 5) ? 1 : 0; - frameGenerationDeepMode = false; // all presets single-flow to fit the 60fps budget (steadier = occLo) + frameGenerationDeepMode = (frameGenerationPreset == 5); // Max uses the bidirectional (deep) flow; others single-flow final float[] startupPresetFlowScale = {0.2f, 0.4f, 0.6f, 0.8f, 0.6f, 0.8f}; float startupFlowScale = startupPresetFlowScale[Math.max(0, Math.min(frameGenerationPreset, 5))]; renderer.setFrameGenerationMultiplier(frameGenerationMultiplier); From 057cda56e9c471710a6afcf656fcd018676da8a0 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Thu, 18 Jun 2026 20:50:34 -0400 Subject: [PATCH 16/46] Frame generation: clock the cadence off buffer-swap arrival timestamps The cadence anchored to the native content-promote time, which is quantized to the pump vblank. Anchor it instead to the precise onFramePresented arrival timestamp (fgLastGameNs) - the game's own buffer-swap/present clock. Content-dedup still runs upstream to drop redundant re-presents (the compositor sees some), but the cadence timing now comes from the real arrival instants. Measured on a 30fps source at 4x: present-interval cov ~37% -> ~22%, worst-case gap 24.9ms -> 16.6ms (no more 3-vblank holds). --- app/src/main/runtime/display/renderer/VulkanRenderer.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 722a625dc..4ea33a77d 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -523,7 +523,11 @@ private int fgEmitOne() { if (fgPromoteInfo[0] != fgPromoteSeen) { fgPromoteSeen = fgPromoteInfo[0]; promoted = true; - long pNs = fgPromoteInfo[1] != 0L ? fgPromoteInfo[1] : System.nanoTime(); + // Clock the cadence off the precise buffer-swap arrival time (onFramePresented), not the + // vblank-quantized native promote time — this is the source's own present clock (GameScopeVK + // model). With content-dedup off, one promote happens per accepted buffer-swap. + long pNs = fgLastGameNs != 0L ? fgLastGameNs + : (fgPromoteInfo[1] != 0L ? fgPromoteInfo[1] : System.nanoTime()); if (fgLastPromoteNs != 0L) { long d = pNs - fgLastPromoteNs; // interval between distinct frames = content period if (d > 0L && d < 500_000_000L) { From 383331d75a64c9c07ae934641216d31350b49834 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Thu, 18 Jun 2026 20:52:46 -0400 Subject: [PATCH 17/46] Frame generation: trim verbose comments Tighten the frame-gen pacing comments to concise functional descriptions. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 12 +++----- .../display/renderer/VulkanRenderer.java | 28 ++++--------------- 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 422fa11b7..59b777221 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -2686,11 +2686,9 @@ static uint64_t g_fg_dropped = 0; // desiredPresentTime (CLOCK_MONOTONIC ns) for the next FG present; 0 = no constraint (real frame never delayed). #define FG_PRESENT_LEAD_NS 150000ull // wake this much before the deadline so the present latches this vblank -// Present instant for one output frame, anchored to the real-frame ARRIVAL clock: a frame at content-phase -// `phase` of the [prev,curr] interval is shown at curr_arrival + phase*(curr-prev). This locks the present -// cadence to the source's own timeline, so vblanks that emit nothing (holds) cannot drift the grid the way -// a per-enqueue period accumulator did — that drift was the residual judder after vsync-snap removal. Before -// the two arrivals are known it falls back to an evenly-spaced period grid. NOT snapped to the panel vsync. +// Present instant for one output frame: a frame at content-phase `phase` of the [prev,curr] interval is shown +// at curr_arrival + phase*(curr-prev), anchoring the cadence to the source clock. Falls back to an evenly +// spaced period grid before the arrivals are known. Not snapped to the panel vsync. static uint64_t fg_compute_deadline(VkRenderer* r, float phase) { uint64_t now = now_monotonic_ns(); uint64_t ca = r->fg_curr_arrival_ns, pa = r->fg_prev_arrival_ns; @@ -3216,9 +3214,7 @@ static void fg_worker_present(VkRenderer* r, const FgJob* job) { pinfo.swapchainCount = 1; pinfo.pSwapchains = &swapchain; pinfo.pImageIndices = &image_index; VkPresentTimeGOOGLE ptg; VkPresentTimesInfoGOOGLE pti; if (r->ext_display_timing) { - // Ask the display engine to latch this frame at its computed present instant (CLOCK_MONOTONIC ns). - // The CPU nanosleep above already woke us near the deadline; this lets a panel that honours - // display-timing place the frame on the correct vblank instead of "next vblank, whenever". + // Request the computed present instant so a panel that honours display-timing places it (CLOCK_MONOTONIC ns). ptg.presentID = ++r->fg_present_id; ptg.desiredPresentTime = job->deadline_ns; pti.sType = VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE; pti.pNext = NULL; pti.swapchainCount = 1; pti.pTimes = &ptg; diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 4ea33a77d..7d37fb3e2 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -234,9 +234,7 @@ public void setFrameGeneration(boolean enabled) { synchronized (this) { if (nativeHandle != 0) { nativeSetFrameGeneration(nativeHandle, enabled); - // FG presents under a non-blocking mode so the worker's deadline pacer drives the present - // instant. FIFO would block the present on vblank and make the panel vsync the pacer, - // re-introducing the quantization judder the deadline model exists to avoid. + // Non-blocking present so the worker's deadline pacer drives the present instant (not FIFO/vsync). nativeSetPresentMode(nativeHandle, enabled ? PRESENT_MODE_MAILBOX : requestedPresentMode); fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); } @@ -523,9 +521,7 @@ private int fgEmitOne() { if (fgPromoteInfo[0] != fgPromoteSeen) { fgPromoteSeen = fgPromoteInfo[0]; promoted = true; - // Clock the cadence off the precise buffer-swap arrival time (onFramePresented), not the - // vblank-quantized native promote time — this is the source's own present clock (GameScopeVK - // model). With content-dedup off, one promote happens per accepted buffer-swap. + // Anchor to the precise buffer-swap arrival time, not the vblank-quantized promote time. long pNs = fgLastGameNs != 0L ? fgLastGameNs : (fgPromoteInfo[1] != 0L ? fgPromoteInfo[1] : System.nanoTime()); if (fgLastPromoteNs != 0L) { @@ -534,10 +530,7 @@ private int fgEmitOne() { fgContentPeriodNs = fgContentPeriodNs == 0L ? d : fgContentPeriodNs + (d - fgContentPeriodNs) / 8L; double inst = 1.0e9 / (double) fgContentPeriodNs; - // Track the (already EMA-smoothed) instantaneous content rate with a light - // quarter-step EMA: converges in a few frames and rejects single-frame outliers, - // without the old drift-relock threshold that could leave the lock stale (e.g. - // locked=27 while the game truly ran 30), which skewed the present spacing. + // Light EMA toward the measured content rate (converges fast, rejects outliers). fgLockedGameHz = fgLockedGameHz <= 0.0 ? inst : fgLockedGameHz + (inst - fgLockedGameHz) * 0.25; fgGameDriftFrames = 0; @@ -579,9 +572,7 @@ private int fgEmitOne() { return 2; } if (period <= 0L) return 0; - // Cadence multiplier, snapped DOWN to the largest divisor of the panel:content ratio (slots) so the - // output divides the panel evenly (each output frame held a whole number of vblanks). On 120Hz/30Hz - // (slots=4): 4x and 2x stay; 3x (=90Hz, held 1,1,2 vblanks = judder) snaps to 2x. + // Snap the multiplier to the largest divisor of the panel:content ratio so output divides the panel evenly. int M = Math.max(2, fgEffectiveMultiplier); long disp = fgDisplayPeriodNs; int slots = M; @@ -595,10 +586,7 @@ private int fgEmitOne() { } } fgCadenceM = M; - // Steady output gate: emit a NEW frame every `hold` vblanks (hold = slots/M, integer since M divides - // slots), sampling the tween phase CONTINUOUSLY from the content clock. A fresh frame is produced on - // every gate vblank regardless of where the EMA boundary falls — this stops the tween under-production - // (skipped interps) that was dragging the output rate down and making it jittery. + // Emit a new frame every `hold` vblanks (hold = slots/M); sample the tween phase from the content clock. int hold = Math.max(1, slots / M); fgVblankSincePromote++; if ((fgVblankSincePromote % hold) != 0) return 0; // between gates — hold the current frame @@ -633,11 +621,7 @@ private int fgComputeInterps() { return Math.max(0, Math.min(maxInterps, interps)); } - // Presents per pump tick. The pump fires once per vblank (AChoreographer), so exactly one present per - // tick = one present per vblank = the panel-rate ceiling. Emitting more than one per tick would post - // multiple frames into a single vblank (MAILBOX keeps only the last), wasting work and inflating the - // present count into an uneven ">panel" rate. Sub-panel output rates are handled by the per-vblank - // phase logic (hold vs interp), so one-per-tick is always correct. + // One present per pump tick: the pump fires once per vblank, so one present per vblank is the panel ceiling. private int fgComputePerTick() { return 1; } From 651bfe4a34078dd0a26b09c6c0aec5fb810bbfe2 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Thu, 18 Jun 2026 21:21:04 -0400 Subject: [PATCH 18/46] Frame generation: pipeline the worker for generate-ahead Split the worker into generate (acquire + record + submit) and present (pace + present), and pipeline them: the worker submits the next job's GPU work before pacing/presenting the current job, so generation overlaps the deadline wait. Each output frame then gets ~2 present intervals of GPU budget instead of one, keeping heavy presets (deep flow at 4x) from dropping frames when a generated frame would miss its deadline. Present stays on the single worker thread (no swapchain race); a content stall flushes the pending frame via a bounded wait, and a swapchain recreate drops the in-flight frame. Swapchain image count raised to 5 for non-blocking modes to cover the extra in-flight frame. No change at light presets (cov/rate unchanged at standard). --- app/src/main/cpp/winlator/vk/vk_renderer.c | 112 +++++++++++++++------ 1 file changed, 84 insertions(+), 28 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 59b777221..0fe318212 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -1366,7 +1366,7 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa uint32_t image_count = caps.minImageCount + 1; if (image_count < VK_FRAMES_IN_FLIGHT + 1u) image_count = VK_FRAMES_IN_FLIGHT + 1u; - if (present_mode != VK_PRESENT_MODE_FIFO_KHR && image_count < 4) image_count = 4; + if (present_mode != VK_PRESENT_MODE_FIFO_KHR && image_count < 5) image_count = 5; // generate-ahead holds one extra image in flight if (caps.maxImageCount > 0 && image_count > caps.maxImageCount) image_count = caps.maxImageCount; if (image_count > VK_MAX_SWAPCHAIN_IMAGES) image_count = VK_MAX_SWAPCHAIN_IMAGES; @@ -3090,10 +3090,25 @@ static void fg_worker_recreate(VkRenderer* r) { pthread_mutex_unlock(&r->render_mutex); } -// Run ONE queued job: acquire, record flow+generate (or present_last blit), submit, pace, present. -static void fg_worker_present(VkRenderer* r, const FgJob* job) { +// Carried between generate and present so the next frame's GPU work overlaps the current frame's deadline +// wait: each output frame then has ~2 present intervals of GPU budget instead of one. +typedef struct FgPending { + bool valid; + bool need_recreate; // acquire returned OUT_OF_DATE + uint32_t image_index; + VkSemaphore render_finished; + VkSwapchainKHR swapchain; + uint64_t deadline_ns; + bool recreate_after; // acquire SUBOPTIMAL + bool do_interp; +} FgPending; + +// Generate ONE queued job: acquire, record flow+generate (or present_last blit), submit. The caller paces and +// presents the returned handle one frame later, so this job's GPU runs during that wait. +static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { + FgPending p = {0}; if (!r->surface_ready || !r->swapchain || r->swapchain_image_count == 0 - || r->swapchain_extent.width == 0u || !r->pipelines_built) { g_fg_dropped++; return; } + || r->swapchain_extent.width == 0u || !r->pipelines_built) { g_fg_dropped++; return p; } VkFrame* f = &r->fg_worker_frames[r->fg_worker_index]; r->fg_worker_index = (r->fg_worker_index + 1u) % 3u; @@ -3106,17 +3121,17 @@ static void fg_worker_present(VkRenderer* r, const FgJob* job) { uint32_t image_index = 0; VkResult acq = vkAcquireNextImageKHR(r->device, r->swapchain, acq_timeout, f->image_available, VK_NULL_HANDLE, &image_index); - if (acq == VK_NOT_READY || acq == VK_TIMEOUT) { g_fg_dropped++; return; } - if (acq == VK_ERROR_OUT_OF_DATE_KHR) { fg_worker_recreate(r); return; } + if (acq == VK_NOT_READY || acq == VK_TIMEOUT) { g_fg_dropped++; return p; } + if (acq == VK_ERROR_OUT_OF_DATE_KHR) { p.need_recreate = true; return p; } bool recreate_after_present = (acq == VK_SUBOPTIMAL_KHR) && !r->ignore_suboptimal; - if (acq != VK_SUCCESS && acq != VK_SUBOPTIMAL_KHR) { VK_LOGE("fg-gen acquire -> %d", acq); g_fg_dropped++; return; } + if (acq != VK_SUCCESS && acq != VK_SUBOPTIMAL_KHR) { VK_LOGE("fg-gen acquire -> %d", acq); g_fg_dropped++; return p; } VkSemaphore render_finished = r->swapchain_render_finished[image_index]; VkCommandBufferBeginInfo bi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; pthread_mutex_lock(&r->render_mutex); - if (!r->fg_built) { pthread_mutex_unlock(&r->render_mutex); g_fg_dropped++; return; } + if (!r->fg_built) { pthread_mutex_unlock(&r->render_mutex); g_fg_dropped++; return p; } // A job whose pair was reused by 2+ newer promotes falls back to present_last of the LIVE newest // frame (never drop the acquired image — that would strand its semaphore). bool stale = (uint32_t)(r->fg_promote_seq - job->seq) >= 2u; @@ -3200,22 +3215,34 @@ static void fg_worker_present(VkRenderer* r, const FgJob* job) { VkFenceCreateInfo rfi = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; rfi.flags = VK_FENCE_CREATE_SIGNALED_BIT; vkCreateFence(r->device, &rfi, NULL, &f->in_flight); pthread_mutex_unlock(&r->render_mutex); - return; + return p; } r->fg_slot_fence[curr_idx] = f->in_flight; if (do_interp) r->fg_slot_fence[prev_idx] = f->in_flight; VkSwapchainKHR swapchain = r->swapchain; pthread_mutex_unlock(&r->render_mutex); - fg_sleep_to(r, job->deadline_ns); + p.valid = true; + p.image_index = image_index; + p.render_finished = render_finished; + p.swapchain = swapchain; + p.deadline_ns = job->deadline_ns; + p.recreate_after = recreate_after_present; + p.do_interp = do_interp; + return p; +} + +// Pace to the deadline and present a generated frame. Returns true when the swapchain needs recreating. +static bool fg_worker_do_present(VkRenderer* r, const FgPending* p) { + fg_sleep_to(r, p->deadline_ns); VkPresentInfoKHR pinfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR}; - pinfo.waitSemaphoreCount = 1; pinfo.pWaitSemaphores = &render_finished; - pinfo.swapchainCount = 1; pinfo.pSwapchains = &swapchain; pinfo.pImageIndices = &image_index; + pinfo.waitSemaphoreCount = 1; pinfo.pWaitSemaphores = &p->render_finished; + pinfo.swapchainCount = 1; pinfo.pSwapchains = &p->swapchain; pinfo.pImageIndices = &p->image_index; VkPresentTimeGOOGLE ptg; VkPresentTimesInfoGOOGLE pti; if (r->ext_display_timing) { // Request the computed present instant so a panel that honours display-timing places it (CLOCK_MONOTONIC ns). - ptg.presentID = ++r->fg_present_id; ptg.desiredPresentTime = job->deadline_ns; + ptg.presentID = ++r->fg_present_id; ptg.desiredPresentTime = p->deadline_ns; pti.sType = VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE; pti.pNext = NULL; pti.swapchainCount = 1; pti.pTimes = &ptg; pinfo.pNext = &pti; @@ -3224,7 +3251,7 @@ static void fg_worker_present(VkRenderer* r, const FgJob* job) { VkResult pr = vkQueuePresentKHR(r->graphics_queue, &pinfo); if (pr == VK_SUCCESS || pr == VK_SUBOPTIMAL_KHR) { r->fg_present_count++; - if (do_interp) g_fg_interp++; else g_fg_plast++; + if (p->do_interp) g_fg_interp++; else g_fg_plast++; fg_collect_present_timing(r); if (((g_fg_interp + g_fg_plast) % 120u) == 0u) { double mean = r->fg_t_count ? r->fg_t_sum_ms / r->fg_t_count : 0.0; @@ -3246,10 +3273,8 @@ static void fg_worker_present(VkRenderer* r, const FgJob* job) { } pthread_mutex_unlock(&r->queue_mutex); - if (recreate_after_present || pr == VK_ERROR_OUT_OF_DATE_KHR - || (pr == VK_SUBOPTIMAL_KHR && !r->ignore_suboptimal)) { - fg_worker_recreate(r); - } + return p->recreate_after || pr == VK_ERROR_OUT_OF_DATE_KHR + || (pr == VK_SUBOPTIMAL_KHR && !r->ignore_suboptimal); } // Producer (GL thread): snapshot the cadence decision into a job + enqueue + wake the worker. O(1). @@ -3283,19 +3308,50 @@ static void* fg_gen_loop(void* arg) { VkRenderer* r = (VkRenderer*)arg; prctl(PR_SET_NAME, "fg-gen", 0, 0, 0); setpriority(PRIO_PROCESS, 0, -8); + FgPending pending = {0}; while (r->fg_gen_running) { - sem_wait(&r->fg_gen_sem); + // Wait for the next job. While a frame is pending, cap the wait (~2 vsync) so a content stall can + // still flush the pending frame instead of holding it. + bool got_job; + if (pending.valid) { + uint64_t wait_ns = (r->fg_display_period_ns ? r->fg_display_period_ns : 16666667ull) * 2u; + struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); + ts.tv_sec += (time_t)(wait_ns / 1000000000ull); + ts.tv_nsec += (long)(wait_ns % 1000000000ull); + if (ts.tv_nsec >= 1000000000L) { ts.tv_nsec -= 1000000000L; ts.tv_sec++; } + got_job = (sem_timedwait(&r->fg_gen_sem, &ts) == 0); + } else { + got_job = (sem_wait(&r->fg_gen_sem) == 0); + } if (!r->fg_gen_running) break; - uint32_t head = r->fg_job_head; - if (head == __atomic_load_n(&r->fg_job_tail, __ATOMIC_ACQUIRE)) continue; // nothing (spurious/drain) - FgJob job = r->fg_job_ring[head]; - __atomic_store_n(&r->fg_job_head, (head + 1u) % FG_JOB_RING, __ATOMIC_RELEASE); - // drop-late: skip an INTERP whose deadline already passed by >1 vsync; never drop a PRESENT_LAST. - uint64_t period = r->fg_display_period_ns ? r->fg_display_period_ns : 16666667ull; - if (job.mode == FG_MODE_INTERP && job.deadline_ns != 0u - && now_monotonic_ns() > job.deadline_ns + period) { g_fg_dropped++; continue; } - fg_worker_present(r, &job); + + bool need_recreate = false; + FgPending ready = {0}; + if (got_job) { + uint32_t head = r->fg_job_head; + if (head != __atomic_load_n(&r->fg_job_tail, __ATOMIC_ACQUIRE)) { + FgJob job = r->fg_job_ring[head]; + __atomic_store_n(&r->fg_job_head, (head + 1u) % FG_JOB_RING, __ATOMIC_RELEASE); + // drop-late: skip an INTERP whose deadline already passed by >1 vsync; never drop a PRESENT_LAST. + uint64_t period = r->fg_display_period_ns ? r->fg_display_period_ns : 16666667ull; + if (job.mode == FG_MODE_INTERP && job.deadline_ns != 0u + && now_monotonic_ns() > job.deadline_ns + period) { + g_fg_dropped++; + } else { + ready = fg_worker_generate(r, &job); + if (ready.need_recreate) need_recreate = true; + } + } + } + // Present the previously generated frame; the just-generated one's GPU runs during this deadline wait. + if (pending.valid && fg_worker_do_present(r, &pending)) need_recreate = true; + pending = ready; + if (need_recreate) { + fg_worker_recreate(r); + pending.valid = false; // anything acquired before the recreate is now stale + } } + if (pending.valid) fg_worker_do_present(r, &pending); // flush the trailing frame on stop return NULL; } From 3ff6f7bf0c252acfe9084ae49baa13c96cab2ce7 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Fri, 19 Jun 2026 14:13:27 -0400 Subject: [PATCH 19/46] Add CNN frame-generation flow path Default-on selectable CNN flow producer with per-pair flow caching, decimated coarse-to-fine flow, bidirectional deep mode, and 2x/3x/4x multipliers. Set debug.winnative.fgcnn=0 to force the classical block-match path. --- app/src/main/cpp/CMakeLists.txt | 25 + .../main/cpp/winlator/vk/bin2c_bytes.cmake | 36 ++ .../cpp/winlator/vk/shaders/cnn_conv.comp | 43 +- .../winlator/vk/shaders/cnn_conv_2pass.comp | 151 +++++ .../vk/shaders/cnn_correlation_cost9.comp | 107 ++++ .../vk/shaders/cnn_correlation_g09.comp | 122 ++++ .../shaders/cnn_correlation_warpfollow.comp | 161 +++++ .../cpp/winlator/vk/shaders/cnn_flowreg.comp | 87 +++ .../winlator/vk/shaders/cnn_occlusion.comp | 25 +- .../cpp/winlator/vk/shaders/cnn_pyramid.comp | 5 + app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 592 ++++++++++++++++++ app/src/main/cpp/winlator/vk/vk_dispatch.c | 3 + app/src/main/cpp/winlator/vk/vk_dispatch.h | 6 + app/src/main/cpp/winlator/vk/vk_renderer.c | 211 ++++++- app/src/main/cpp/winlator/vk/vk_state.h | 57 ++ .../vk/weights_v2/wnfg_05.weights.fp16 | Bin 0 -> 312 bytes .../vk/weights_v2/wnfg_06.weights.fp16 | 2 + .../vk/weights_v2/wnfg_07.weights.fp16 | Bin 0 -> 624 bytes .../vk/weights_v2/wnfg_14.weights.fp16 | 1 + .../vk/weights_v2/wnfg_20.weights.fp16 | 1 + .../vk/weights_v2/wnfg_21.weights.fp16 | Bin 0 -> 1776 bytes .../vk/weights_v2/wnfg_22.weights.fp16 | Bin 0 -> 1200 bytes .../vk/weights_v2/wnfg_24.weights.fp16 | Bin 0 -> 292 bytes .../vk/weights_v2/wnfg_25.weights.fp16 | Bin 0 -> 366 bytes .../vk/weights_v2/wnfg_26.weights.fp16 | 3 + .../vk/weights_v2/wnfg_27.weights.fp16 | 2 + .../vk/weights_v2/wnfg_28.weights.fp16 | 3 + .../vk/weights_v2/wnfg_29.weights.fp16 | Bin 0 -> 288 bytes .../vk/weights_v2/wnfg_36.weights.fp16 | Bin 0 -> 1200 bytes .../vk/weights_v2/wnfg_37.weights.fp16 | Bin 0 -> 1200 bytes .../vk/weights_v2/wnfg_42.weights.fp16 | Bin 0 -> 4704 bytes .../vk/weights_v2/wnfg_51.weights.fp16 | 2 + 32 files changed, 1601 insertions(+), 44 deletions(-) create mode 100644 app/src/main/cpp/winlator/vk/bin2c_bytes.cmake create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_conv_2pass.comp create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_correlation_cost9.comp create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_correlation_g09.comp create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_correlation_warpfollow.comp create mode 100644 app/src/main/cpp/winlator/vk/shaders/cnn_flowreg.comp create mode 100644 app/src/main/cpp/winlator/vk/vk_cnn_fg.c create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_05.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_06.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_07.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_14.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_20.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_21.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_22.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_24.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_25.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_26.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_27.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_28.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_29.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_36.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_37.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_42.weights.fp16 create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_51.weights.fp16 diff --git a/app/src/main/cpp/CMakeLists.txt b/app/src/main/cpp/CMakeLists.txt index 8d355d7c1..04e220e70 100644 --- a/app/src/main/cpp/CMakeLists.txt +++ b/app/src/main/cpp/CMakeLists.txt @@ -80,7 +80,12 @@ set(SHADER_LIST "interpolate:frag:interpolate_frag" "cnn_pyramid:comp:cnn_pyramid_comp" "cnn_conv:comp:cnn_conv_comp" + "cnn_conv_2pass:comp:cnn_conv_2pass_comp" "cnn_correlation:comp:cnn_correlation_comp" + "cnn_correlation_cost9:comp:cnn_correlation_cost9_comp" + "cnn_correlation_g09:comp:cnn_correlation_g09_comp" + "cnn_correlation_warpfollow:comp:cnn_correlation_warpfollow_comp" + "cnn_flowreg:comp:cnn_flowreg_comp" "cnn_occlusion:comp:cnn_occlusion_comp" "cnn_generate:comp:cnn_generate_comp" ) @@ -110,6 +115,26 @@ foreach(entry ${SHADER_LIST}) list(APPEND SHADER_HEADERS "${hdr}") endforeach() +set(WEIGHTS_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/winlator/vk/weights_v2") +set(BIN2C_BYTES_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/winlator/vk/bin2c_bytes.cmake") +set(WEIGHTS_LIST 05 06 07 14 20 21 22 24 25 26 27 28 29 36 37 42 51) +foreach(id ${WEIGHTS_LIST}) + set(winput "${WEIGHTS_SRC_DIR}/wnfg_${id}.weights.fp16") + set(whdr "${SHADER_OUT_DIR}/wnfg_${id}_weights.h") + add_custom_command( + OUTPUT "${whdr}" + COMMAND "${CMAKE_COMMAND}" + -DINPUT_FILE=${winput} + -DOUTPUT_FILE=${whdr} + -DVAR_NAME=wnfg_${id}_weights + -P "${BIN2C_BYTES_SCRIPT}" + DEPENDS "${winput}" "${BIN2C_BYTES_SCRIPT}" + COMMENT "Embedding weights wnfg_${id}.weights.fp16 -> wnfg_${id}_weights.h" + VERBATIM + ) + list(APPEND SHADER_HEADERS "${whdr}") +endforeach() + add_custom_target(winlator_shaders DEPENDS ${SHADER_HEADERS}) add_library(winlator SHARED diff --git a/app/src/main/cpp/winlator/vk/bin2c_bytes.cmake b/app/src/main/cpp/winlator/vk/bin2c_bytes.cmake new file mode 100644 index 000000000..e2e1bd585 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/bin2c_bytes.cmake @@ -0,0 +1,36 @@ +if(NOT INPUT_FILE OR NOT OUTPUT_FILE OR NOT VAR_NAME) + message(FATAL_ERROR "bin2c_bytes.cmake requires INPUT_FILE, OUTPUT_FILE, VAR_NAME") +endif() + +file(READ "${INPUT_FILE}" hex_data HEX) +string(LENGTH "${hex_data}" hex_len) + +set(bytes "") +set(line_bytes "") +set(bytes_per_line 0) + +set(i 0) +while(i LESS hex_len) + string(SUBSTRING "${hex_data}" ${i} 2 b) + math(EXPR i "${i} + 2") + string(APPEND line_bytes "0x${b}, ") + math(EXPR bytes_per_line "${bytes_per_line} + 1") + if(bytes_per_line EQUAL 16) + string(APPEND bytes " ${line_bytes}\n") + set(line_bytes "") + set(bytes_per_line 0) + endif() +endwhile() +if(bytes_per_line GREATER 0) + string(APPEND bytes " ${line_bytes}\n") +endif() + +file(WRITE "${OUTPUT_FILE}" +"#pragma once +#include +#include + +static const uint8_t ${VAR_NAME}[] = { +${bytes}}; +static const size_t ${VAR_NAME}_size = sizeof(${VAR_NAME}); +") diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_conv.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_conv.comp index 7186f2f85..059f81913 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_conv.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_conv.comp @@ -3,20 +3,20 @@ layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; -layout(set = 0, binding = 0) uniform sampler2DArray uSrc; // input feature tiles (cinT layers) -layout(set = 0, binding = 1, rgba8) uniform writeonly image2DArray uDst; // output tiles (coutT layers) -layout(set = 0, binding = 3) uniform sampler2D uLuma; // R8 luma source +layout(set = 0, binding = 0) uniform sampler2DArray uSrc; +layout(set = 0, binding = 1, rgba8) uniform writeonly image2DArray uDst; +layout(set = 0, binding = 3) uniform sampler2D uLuma; layout(set = 0, binding = 8, std430) readonly buffer Weights { float16_t W[]; }; layout(push_constant) uniform PC { - ivec2 size; // dst extent in pixels - float t; // interpolation phase (unused here) - float mvScale; // flow scale (unused here) - uint wBase; // weight offset (fp16 elements) - int cinT; // Cin/4 - int coutT; // Cout/4 - int flags; // bit0=stem-luma-norm bit1=stride2 bit2=residual-add bit3=relu + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; } pc; const ivec2 TAP[9] = ivec2[]( @@ -31,10 +31,10 @@ mat4 convMat(int k, int ci, int co) { mat4 M; for (int c = 0; c < 4; ++c) { uint cb = b + uint(c); - M[c] = vec4(float(W[cb + 0u]), // r=0 - float(W[cb + 4u]), // r=1 - float(W[cb + 8u]), // r=2 - float(W[cb + 12u])); // r=3 + M[c] = vec4(float(W[cb + 0u]), + float(W[cb + 4u]), + float(W[cb + 8u]), + float(W[cb + 12u])); } return M; } @@ -47,25 +47,30 @@ void main() { bool stride2 = (pc.flags & 2) != 0; bool resid = (pc.flags & 4) != 0; bool doRelu = (pc.flags & 8) != 0; + bool stem2x = (pc.flags & 16) != 0; - // Base sample position in *source* texels. - ivec2 sp = stride2 ? (p * 2) : p; + ivec2 sp = (stride2 || stem2x) ? (p * 2) : p; + + ivec2 hi = stem ? (textureSize(uLuma, 0) - ivec2(1)) + : stride2 ? (textureSize(uSrc, 0).xy - ivec2(1)) + : (pc.size - ivec2(1)); vec4 acc[MAX_T]; for (int co = 0; co < pc.coutT; ++co) acc[co] = vec4(0.0); - // affine base offset (fp16 elements) = wBase + 9*cinT*coutT*16 uint affBase = pc.wBase + uint(9 * pc.cinT * pc.coutT * 16); for (int k = 0; k < 9; ++k) { - ivec2 q = clamp(sp + TAP[k], ivec2(0), pc.size - 1); + ivec2 q = clamp(sp + TAP[k], ivec2(0), hi); if (stem) { + float luma = texelFetch(uLuma, q, 0).r; - vec4 x = vec4((luma - 0.2139) * 1.4961 + 0.7695, 0.0, 0.0, 0.0); + vec4 x = vec4((luma - 0.208008) * 1.496094 + 0.769531, 0.0, 0.0, 0.0); for (int co = 0; co < pc.coutT; ++co) acc[co] += convMat(k, 0, co) * x; } else { + for (int ci = 0; ci < pc.cinT; ++ci) { vec4 x = texelFetch(uSrc, ivec3(q, ci), 0); for (int co = 0; co < pc.coutT; ++co) diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_conv_2pass.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_conv_2pass.comp new file mode 100644 index 000000000..2342bb765 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_conv_2pass.comp @@ -0,0 +1,151 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(constant_id = 0) const int CIN_T = 2; +layout(constant_id = 1) const int MID_T = 2; +layout(constant_id = 2) const int COUT_T = 2; + +const int TILE = 18; +const int MAX_T = 2; + +layout(set = 0, binding = 32) uniform sampler2D uIn0; +layout(set = 0, binding = 33) uniform sampler2D uIn1; + +layout(set = 0, binding = 48, rgba8) uniform writeonly image2D uOut0; +layout(set = 0, binding = 49, rgba8) uniform writeonly image2D uOut1; + +layout(set = 0, binding = 8, std430) readonly buffer Weights { float16_t W[]; }; + +layout(push_constant) uniform PC { + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; +} pc; + +const ivec2 TAP_XOUTER[9] = ivec2[]( + ivec2(-1,-1), ivec2(-1, 0), ivec2(-1, 1), + ivec2( 0,-1), ivec2( 0, 0), ivec2( 0, 1), + ivec2( 1,-1), ivec2( 1, 0), ivec2( 1, 1)); + +shared f16vec4 sMid[MAX_T][TILE][TILE]; + +mat4 convMatAt(uint blockBase, int k, int ci, int co, int cinT, int coutT) { + uint b = blockBase + uint((((k * cinT + ci) * coutT + co) * 4) * 4); + mat4 M; + for (int c = 0; c < 4; ++c) { + uint cb = b + uint(c); + M[c] = vec4(float(W[cb + 0u]), float(W[cb + 4u]), + float(W[cb + 8u]), float(W[cb + 12u])); + } + return M; +} + +void affineAt(uint affBase, int co, int coutT, + out vec4 bias, out vec4 scale, out vec4 offset) { + bias = vec4(float(W[affBase + uint((0*coutT + co)*4 + 0)]), + float(W[affBase + uint((0*coutT + co)*4 + 1)]), + float(W[affBase + uint((0*coutT + co)*4 + 2)]), + float(W[affBase + uint((0*coutT + co)*4 + 3)])); + scale = vec4(float(W[affBase + uint((1*coutT + co)*4 + 0)]), + float(W[affBase + uint((1*coutT + co)*4 + 1)]), + float(W[affBase + uint((1*coutT + co)*4 + 2)]), + float(W[affBase + uint((1*coutT + co)*4 + 3)])); + offset = vec4(float(W[affBase + uint((2*coutT + co)*4 + 0)]), + float(W[affBase + uint((2*coutT + co)*4 + 1)]), + float(W[affBase + uint((2*coutT + co)*4 + 2)]), + float(W[affBase + uint((2*coutT + co)*4 + 3)])); +} + +void main() { + bool PASS1_CONV = (pc.flags & 1) != 0; + bool PASS1_CLAMP = (pc.flags & 2) != 0; + + ivec2 base = ivec2(gl_WorkGroupID.xy) * 16 - ivec2(1); + ivec2 lid = ivec2(gl_LocalInvocationID.xy); + ivec2 p = base + ivec2(1) + lid; + ivec2 hi = pc.size - ivec2(1); + + uint p1ConvBase = pc.wBase; + uint p1AffBase = p1ConvBase + uint(9 * CIN_T * MID_T * 16); + + uint p2ConvBase = PASS1_CONV ? (p1AffBase + uint(3 * MID_T * 4)) : pc.wBase; + uint p2AffBase = p2ConvBase + uint(9 * MID_T * COUT_T * 16); + + uint lindex = gl_LocalInvocationIndex; + for (uint idx = lindex; idx < uint(TILE*TILE); idx += 256u) { + int tx = int(idx % uint(TILE)); + int ty = int(idx / uint(TILE)); + ivec2 sp = base + ivec2(tx, ty); + + if (PASS1_CONV) { + + bool centerOOB = any(lessThan(sp, ivec2(0))) || any(greaterThan(sp, hi)); + vec4 acc[MAX_T]; + for (int co = 0; co < MID_T; ++co) acc[co] = vec4(0.0); + if (!centerOOB) { + for (int k = 0; k < 9; ++k) { + ivec2 q = sp + TAP_XOUTER[k]; + bool inb = all(greaterThanEqual(q, ivec2(0))) && all(lessThanEqual(q, hi)); + + vec4 x0 = inb ? texelFetch(uIn0, q, 0) : vec4(0.0); + vec4 x1 = inb ? texelFetch(uIn1, q, 0) : vec4(0.0); + for (int co = 0; co < MID_T; ++co) { + acc[co] += convMatAt(p1ConvBase, k, 0, co, CIN_T, MID_T) * x0; + if (CIN_T > 1) + acc[co] += convMatAt(p1ConvBase, k, 1, co, CIN_T, MID_T) * x1; + } + } + } + for (int co = 0; co < MID_T; ++co) { + vec4 v; + if (centerOOB) { + v = vec4(0.0); + } else { + vec4 bias, scale, offset; + affineAt(p1AffBase, co, MID_T, bias, scale, offset); + v = (acc[co] - bias) * scale + offset; + if (PASS1_CLAMP) v = clamp(v, 0.0, 1.0); + } + sMid[co][tx][ty] = f16vec4(v); + } + } else { + + ivec2 q = clamp(sp, ivec2(0), hi); + sMid[0][tx][ty] = f16vec4(texelFetch(uIn0, q, 0)); + if (MID_T > 1) + sMid[1][tx][ty] = f16vec4(texelFetch(uIn1, q, 0)); + } + } + + barrier(); + + if (any(greaterThanEqual(p, pc.size))) return; + + ivec2 c = lid + ivec2(1); + + vec4 acc[MAX_T]; + for (int co = 0; co < COUT_T; ++co) acc[co] = vec4(0.0); + + for (int k = 0; k < 9; ++k) { + ivec2 li = c + TAP_XOUTER[k]; + for (int ci = 0; ci < MID_T; ++ci) { + vec4 x = vec4(sMid[ci][li.x][li.y]); + for (int co = 0; co < COUT_T; ++co) + acc[co] += convMatAt(p2ConvBase, k, ci, co, MID_T, COUT_T) * x; + } + } + + for (int co = 0; co < COUT_T; ++co) { + vec4 bias, scale, offset; + affineAt(p2AffBase, co, COUT_T, bias, scale, offset); + vec4 v = (acc[co] - bias) * scale + offset; + if (co == 0) imageStore(uOut0, p, v); + else if (co == 1) imageStore(uOut1, p, v); + } +} diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_correlation_cost9.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_correlation_cost9.comp new file mode 100644 index 000000000..4fe865120 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_correlation_cost9.comp @@ -0,0 +1,107 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 32) uniform sampler2D uR0; +layout(set = 0, binding = 33) uniform sampler2D uR1; +layout(set = 0, binding = 34) uniform sampler2D uR2; +layout(set = 0, binding = 35) uniform sampler2D uR3; +layout(set = 0, binding = 36) uniform sampler2D uS0; +layout(set = 0, binding = 37) uniform sampler2D uS1; +layout(set = 0, binding = 38) uniform sampler2D uS2; +layout(set = 0, binding = 39) uniform sampler2D uS3; + +layout(set = 0, binding = 40) uniform sampler2D uFlow; + +layout(set = 0, binding = 48, rgba8) uniform writeonly image2D uOut0; +layout(set = 0, binding = 49, rgba8) uniform writeonly image2D uOut1; +layout(set = 0, binding = 50, rgba8) uniform writeonly image2D uOut2; + +layout(set = 0, binding = 8, std430) readonly buffer Weights { float16_t W[]; }; + +layout(push_constant) uniform PC { + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; +} pc; + +f16vec4 wVec4(uint o){ return f16vec4(W[o],W[o+1u],W[o+2u],W[o+3u]); } + +f16vec4 refTile(int i, vec2 uv){ + if(i==0) return f16vec4(textureLod(uR0,uv,0.0)); + if(i==1) return f16vec4(textureLod(uR1,uv,0.0)); + if(i==2) return f16vec4(textureLod(uR2,uv,0.0)); + return f16vec4(textureLod(uR3,uv,0.0)); +} + +f16vec4 srchTile(int i, vec2 uv, vec2 texel, ivec2 off){ + vec2 u = uv + vec2(off)*texel; + if(pc.cinT==2){ + + if(i==0) return f16vec4(textureLod(uR2,u,0.0)); + return f16vec4(textureLod(uR3,u,0.0)); + } + + if(i==0) return f16vec4(textureLod(uS0,u,0.0)); + if(i==1) return f16vec4(textureLod(uS1,u,0.0)); + if(i==2) return f16vec4(textureLod(uS2,u,0.0)); + return f16vec4(textureLod(uS3,u,0.0)); +} + +void main(){ + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if(any(greaterThanEqual(p, pc.size))) return; + + vec2 sz = vec2(textureSize(uR0,0)); + vec2 numer = vec2(p) + 0.5; + vec2 texel = 1.0 / sz; + + vec4 flow = textureLod(uFlow, numer/sz, 0.0); + float m1 = pc.t; + vec2 dispA = flow.xy * m1; + vec2 dispB = flow.zw * (1.0 - m1); + vec2 uvRef = (numer + dispA) / sz; + vec2 uvSrc = (numer + dispB) / sz; + + int refT = pc.cinT; + + f16vec4 R[4]; + for(int i=0;i1)? dot(R[1], srchTile(1,uvSrc,texel,ivec2(ox,oy))):float16_t(0.0hf)) + \ + ((refT>2)? dot(R[2], srchTile(2,uvSrc,texel,ivec2(ox,oy))):float16_t(0.0hf)) + \ + ((refT>3)? dot(R[3], srchTile(3,uvSrc,texel,ivec2(ox,oy))):float16_t(0.0hf)) ) + + float16_t c_mm = COST(-1,-1); + float16_t c_zm = COST( 0,-1); + float16_t c_pm = COST( 1,-1); + float16_t c_mz = COST(-1, 0); + float16_t c_zz = COST( 0, 0); + float16_t c_pz = COST( 1, 0); + float16_t c_mp = COST(-1, 1); + float16_t c_zp = COST( 0, 1); + float16_t c_pp = COST( 1, 1); + + f16vec4 head0 = f16vec4(c_mm, c_zm, c_pm, c_mz); + f16vec4 head1 = f16vec4(c_zz, c_pz, c_mp, c_zp); + float16_t sc = c_pp; + + f16vec4 sub0=wVec4(pc.wBase+0u), mul0=wVec4(pc.wBase+4u), add0=wVec4(pc.wBase+8u); + f16vec4 sub1=wVec4(pc.wBase+12u), mul1=wVec4(pc.wBase+16u), add1=wVec4(pc.wBase+20u); + float16_t subS=W[pc.wBase+24u], mulS=W[pc.wBase+25u], addS=W[pc.wBase+26u]; + + f16vec4 o0 = (head0 - sub0)*mul0 + add0; + f16vec4 o1 = (head1 - sub1)*mul1 + add1; + float16_t oS = (sc - subS)*mulS + addS; + + imageStore(uOut0, p, vec4(o0)); + imageStore(uOut1, p, vec4(o1)); + imageStore(uOut2, p, vec4(float(oS),0.0,0.0,0.0)); +} diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_correlation_g09.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_correlation_g09.comp new file mode 100644 index 000000000..e50c7013e --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_correlation_g09.comp @@ -0,0 +1,122 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 32) uniform sampler2D uA0; +layout(set = 0, binding = 33) uniform sampler2D uA1; +layout(set = 0, binding = 34) uniform sampler2D uB0; +layout(set = 0, binding = 35) uniform sampler2D uB1; +layout(set = 0, binding = 36) uniform sampler2D uC0; +layout(set = 0, binding = 37) uniform sampler2D uC1; + +layout(set = 0, binding = 48, rgba8) uniform writeonly image2D uOut0; +layout(set = 0, binding = 49, rgba8) uniform writeonly image2D uOut1; + +layout(set = 0, binding = 8, std430) readonly buffer Weights { float16_t W[]; }; + +layout(push_constant) uniform PC { + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; +} pc; + +const float16_t SUB_S = float16_t(2.509765625); +const float16_t MUL_S = float16_t(0.480712890625); +const float16_t ADD_S = float16_t(0.278076171875); +const float16_t F0 = float16_t(0.0); +const float16_t F1 = float16_t(1.0); + +f16vec4 wVec4(uint o) { return f16vec4(W[o], W[o + 1u], W[o + 2u], W[o + 3u]); } + +f16mat4 wMat4(uint o) { return f16mat4(wVec4(o), wVec4(o + 4u), wVec4(o + 8u), wVec4(o + 12u)); } + +void main() { + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(p, pc.size))) return; + + vec2 sz = vec2(textureSize(uA0, 0)); + vec2 uv = (vec2(p) + 0.5) / sz; + vec2 tpx = 1.0 / sz; + + f16vec4 A0 = f16vec4(textureLod(uA0, uv, 0.0)); + f16vec4 A1 = f16vec4(textureLod(uA1, uv, 0.0)); + f16vec4 B0 = f16vec4(textureLod(uB0, uv, 0.0)); + f16vec4 B1 = f16vec4(textureLod(uB1, uv, 0.0)); + + #define COSTAB(ox, oy, outA, outB) { \ + vec2 _uv = uv + vec2(float(ox), float(oy)) * tpx; \ + f16vec4 _c0 = f16vec4(textureLod(uC0, _uv, 0.0)); \ + f16vec4 _c1 = f16vec4(textureLod(uC1, _uv, 0.0)); \ + outA = dot(A0,_c0); outA += dot(A1,_c1); \ + outB = dot(B0,_c0); outB += dot(B1,_c1); \ + } + + float16_t a_mm, b_mm, a_zm, b_zm, a_pm, b_pm, a_mz, b_mz; + float16_t a_zz, b_zz, a_pz, b_pz, a_mp, b_mp, a_zp, b_zp; + float16_t a_pp, b_pp; + + COSTAB(-1,-1, a_mm, b_mm) + COSTAB( 0,-1, a_zm, b_zm) + COSTAB( 1,-1, a_pm, b_pm) + COSTAB(-1, 0, a_mz, b_mz) + COSTAB( 0, 0, a_zz, b_zz) + COSTAB( 1, 0, a_pz, b_pz) + COSTAB(-1, 1, a_mp, b_mp) + COSTAB( 0, 1, a_zp, b_zp) + COSTAB( 1, 1, a_pp, b_pp) + + f16vec4 cvA1 = f16vec4(a_mm, a_zm, a_pm, a_mz); + f16vec4 cvA2 = f16vec4(a_zz, a_pz, a_mp, a_zp); + f16vec4 cvB1 = f16vec4(b_mm, b_zm, b_pm, b_mz); + f16vec4 cvB2 = f16vec4(b_zz, b_pz, b_mp, b_zp); + float16_t sAv = a_pp; + float16_t sBv = b_pp; + + f16vec4 sub1 = wVec4(pc.wBase + 0u), mul1 = wVec4(pc.wBase + 4u), add1 = wVec4(pc.wBase + 8u); + f16vec4 lo = wVec4(pc.wBase + 12u), hi = wVec4(pc.wBase + 16u); + f16vec4 sub2 = wVec4(pc.wBase + 20u), mul2 = wVec4(pc.wBase + 24u), add2 = wVec4(pc.wBase + 28u); + + f16vec4 qA1 = clamp((cvA1 - sub1) * mul1 + add1, lo, hi); + f16vec4 qB1 = clamp((cvB1 - sub1) * mul1 + add1, lo, hi); + f16vec4 qA2 = clamp((cvA2 - sub2) * mul2 + add2, lo, hi); + f16vec4 qB2 = clamp((cvB2 - sub2) * mul2 + add2, lo, hi); + float16_t qSa = clamp((sAv - SUB_S) * MUL_S + ADD_S, F0, F1); + float16_t qSb = clamp((sBv - SUB_S) * MUL_S + ADD_S, F0, F1); + + { + uint h = pc.wBase + 32u; + f16mat4 M_a = wMat4(h); f16mat4 M_b = wMat4(h + 16u); f16vec4 v_c = wVec4(h + 32u); + f16mat4 M_d = wMat4(h + 36u); f16mat4 M_e = wMat4(h + 52u); f16vec4 v_f = wVec4(h + 68u); + f16vec4 oSub = wVec4(h + 72u), oMul = wVec4(h + 76u), oAdd = wVec4(h + 80u); + + f16vec4 acc = M_a * qA1; + acc += M_b * qA2; + acc += v_c * qSa; + acc += M_d * qB1; + acc += M_e * qB2; + acc += v_f * qSb; + f16vec4 outv = (acc - oSub) * oMul + oAdd; + imageStore(uOut0, p, vec4(outv)); + } + + { + uint h = pc.wBase + 116u; + f16mat4 M_a = wMat4(h); f16mat4 M_b = wMat4(h + 16u); f16vec4 v_c = wVec4(h + 32u); + f16mat4 M_d = wMat4(h + 36u); f16mat4 M_e = wMat4(h + 52u); f16vec4 v_f = wVec4(h + 68u); + f16vec4 oSub = wVec4(h + 72u), oMul = wVec4(h + 76u), oAdd = wVec4(h + 80u); + + f16vec4 acc = M_a * qA1; + acc += M_b * qA2; + acc += v_c * qSa; + acc += M_d * qB1; + acc += M_e * qB2; + acc += v_f * qSb; + f16vec4 outv = (acc - oSub) * oMul + oAdd; + imageStore(uOut1, p, vec4(outv)); + } +} diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_correlation_warpfollow.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_correlation_warpfollow.comp new file mode 100644 index 000000000..233ee5682 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_correlation_warpfollow.comp @@ -0,0 +1,161 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 32) uniform sampler2D uR0; +layout(set = 0, binding = 33) uniform sampler2D uR1; +layout(set = 0, binding = 34) uniform sampler2D uR2; +layout(set = 0, binding = 35) uniform sampler2D uR3; +layout(set = 0, binding = 36) uniform sampler2D uFlow0; +layout(set = 0, binding = 37) uniform sampler2D uFlow1; + +layout(set = 0, binding = 48, rgba8) uniform writeonly image2D uOut; + +layout(set = 0, binding = 8, std430) readonly buffer Weights { float16_t W[]; }; + +layout(set = 0, binding = 0) uniform UBO { float m0; float m1; float m2; } ubo; + +layout(push_constant) uniform PC { + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; +} pc; + +f16vec4 wVec4(uint o){ return f16vec4(W[o],W[o+1u],W[o+2u],W[o+3u]); } + +f16vec4 wMatVec(uint o, f16vec4 v){ + return wVec4(o+ 0u)*v.x + wVec4(o+ 4u)*v.y + wVec4(o+ 8u)*v.z + wVec4(o+12u)*v.w; +} + +void main(){ + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if(any(greaterThanEqual(p, imageSize(uOut)))) return; + + vec2 sz = vec2(textureSize(uR0, 0)); + ivec2 cmax = ivec2(sz) - ivec2(1); + vec2 numer = vec2(p) + 0.5; + vec2 uvCenter = numer / sz; + + float m1 = ubo.m1; + float16_t wFwd = float16_t(1.0 - m1); + float16_t wBwd = float16_t(m1); + + f16vec4 f0 = f16vec4(textureLod(uFlow0, uvCenter, 0.0)); + vec2 uvA0 = (numer + vec2(f0.xy * wFwd)) / sz; + vec2 uvB0 = (numer + vec2(f16vec2(textureLod(uFlow0, uvA0, 0.0).xy))) / sz; + vec2 uvC0 = (numer + vec2(f0.zw * wBwd)) / sz; + vec2 uvD0 = (numer + vec2(f16vec2(textureLod(uFlow0, uvC0, 0.0).zw))) / sz; + + f16vec4 f1 = f16vec4(textureLod(uFlow1, uvCenter, 0.0)); + vec2 uvA1 = (numer + vec2(f1.xy * wFwd)) / sz; + f16vec2 g1fwd = f16vec2(textureLod(uFlow1, uvA1, 0.0).xy); + vec2 uvC1 = (numer + vec2(f1.zw * wBwd)) / sz; + f16vec2 g1bwd = f16vec2(textureLod(uFlow1, uvC1, 0.0).zw); + vec2 uvB1 = (numer + vec2(g1fwd)) / sz; + vec2 uvD1 = (numer + vec2(g1bwd)) / sz; + + f16vec4 rA0_0 = f16vec4(textureLod(uR0, uvB0, 0.0)); + f16vec4 rA0_1 = f16vec4(textureLod(uR1, uvB0, 0.0)); + f16vec4 rA1_0 = f16vec4(textureLod(uR0, uvB1, 0.0)); + f16vec4 rA1_1 = f16vec4(textureLod(uR1, uvB1, 0.0)); + f16vec4 rB0_0 = f16vec4(textureLod(uR2, uvD0, 0.0)); + f16vec4 rB0_1 = f16vec4(textureLod(uR3, uvD0, 0.0)); + f16vec4 rB1_0 = f16vec4(textureLod(uR2, uvD1, 0.0)); + f16vec4 rB1_1 = f16vec4(textureLod(uR3, uvD1, 0.0)); + + ivec2 t_mm = clamp(p+ivec2(-1,-1), ivec2(0), cmax); + ivec2 t_zm = clamp(p+ivec2( 0,-1), ivec2(0), cmax); + ivec2 t_pm = clamp(p+ivec2( 1,-1), ivec2(0), cmax); + ivec2 t_mz = clamp(p+ivec2(-1, 0), ivec2(0), cmax); + ivec2 t_pz = clamp(p+ivec2( 1, 0), ivec2(0), cmax); + ivec2 t_mp = clamp(p+ivec2(-1, 1), ivec2(0), cmax); + ivec2 t_zp = clamp(p+ivec2( 0, 1), ivec2(0), cmax); + ivec2 t_pp = clamp(p+ivec2( 1, 1), ivec2(0), cmax); + + f16vec4 a2_mm=f16vec4(texelFetch(uR2,t_mm,0)), a3_mm=f16vec4(texelFetch(uR3,t_mm,0)); + f16vec4 a2_zm=f16vec4(texelFetch(uR2,t_zm,0)), a3_zm=f16vec4(texelFetch(uR3,t_zm,0)); + f16vec4 a2_pm=f16vec4(texelFetch(uR2,t_pm,0)), a3_pm=f16vec4(texelFetch(uR3,t_pm,0)); + f16vec4 a2_mz=f16vec4(texelFetch(uR2,t_mz,0)), a3_mz=f16vec4(texelFetch(uR3,t_mz,0)); + f16vec4 a2_zz=f16vec4(textureLod(uR2,uvCenter,0.0)), a3_zz=f16vec4(textureLod(uR3,uvCenter,0.0)); + f16vec4 a2_pz=f16vec4(texelFetch(uR2,t_pz,0)), a3_pz=f16vec4(texelFetch(uR3,t_pz,0)); + f16vec4 a2_mp=f16vec4(texelFetch(uR2,t_mp,0)), a3_mp=f16vec4(texelFetch(uR3,t_mp,0)); + f16vec4 a2_zp=f16vec4(texelFetch(uR2,t_zp,0)), a3_zp=f16vec4(texelFetch(uR3,t_zp,0)); + f16vec4 a2_pp=f16vec4(texelFetch(uR2,t_pp,0)), a3_pp=f16vec4(texelFetch(uR3,t_pp,0)); + + f16vec4 b0_mm=f16vec4(texelFetch(uR0,t_mm,0)), b1_mm=f16vec4(texelFetch(uR1,t_mm,0)); + f16vec4 b0_zm=f16vec4(texelFetch(uR0,t_zm,0)), b1_zm=f16vec4(texelFetch(uR1,t_zm,0)); + f16vec4 b0_pm=f16vec4(texelFetch(uR0,t_pm,0)), b1_pm=f16vec4(texelFetch(uR1,t_pm,0)); + f16vec4 b0_mz=f16vec4(texelFetch(uR0,t_mz,0)), b1_mz=f16vec4(texelFetch(uR1,t_mz,0)); + f16vec4 b0_zz=f16vec4(textureLod(uR0,uvCenter,0.0)), b1_zz=f16vec4(textureLod(uR1,uvCenter,0.0)); + f16vec4 b0_pz=f16vec4(texelFetch(uR0,t_pz,0)), b1_pz=f16vec4(texelFetch(uR1,t_pz,0)); + f16vec4 b0_mp=f16vec4(texelFetch(uR0,t_mp,0)), b1_mp=f16vec4(texelFetch(uR1,t_mp,0)); + f16vec4 b0_zp=f16vec4(texelFetch(uR0,t_zp,0)), b1_zp=f16vec4(texelFetch(uR1,t_zp,0)); + f16vec4 b0_pp=f16vec4(texelFetch(uR0,t_pp,0)), b1_pp=f16vec4(texelFetch(uR1,t_pp,0)); + + f16vec4 hA0g1 = f16vec4( + dot(rA0_0,a2_mm)+dot(rA0_1,a3_mm), dot(rA0_0,a2_zm)+dot(rA0_1,a3_zm), + dot(rA0_0,a2_pm)+dot(rA0_1,a3_pm), dot(rA0_0,a2_mz)+dot(rA0_1,a3_mz)); + f16vec4 hA0g2 = f16vec4( + dot(rA0_0,a2_zz)+dot(rA0_1,a3_zz), dot(rA0_0,a2_pz)+dot(rA0_1,a3_pz), + dot(rA0_0,a2_mp)+dot(rA0_1,a3_mp), dot(rA0_0,a2_zp)+dot(rA0_1,a3_zp)); + float16_t sA0 = dot(rA0_0,a2_pp)+dot(rA0_1,a3_pp); + + f16vec4 hA1g1 = f16vec4( + dot(rA1_0,a2_mm)+dot(rA1_1,a3_mm), dot(rA1_0,a2_zm)+dot(rA1_1,a3_zm), + dot(rA1_0,a2_pm)+dot(rA1_1,a3_pm), dot(rA1_0,a2_mz)+dot(rA1_1,a3_mz)); + f16vec4 hA1g2 = f16vec4( + dot(rA1_0,a2_zz)+dot(rA1_1,a3_zz), dot(rA1_0,a2_pz)+dot(rA1_1,a3_pz), + dot(rA1_0,a2_mp)+dot(rA1_1,a3_mp), dot(rA1_0,a2_zp)+dot(rA1_1,a3_zp)); + float16_t sA1 = dot(rA1_0,a2_pp)+dot(rA1_1,a3_pp); + + f16vec4 hB0g1 = f16vec4( + dot(rB0_0,b0_mm)+dot(rB0_1,b1_mm), dot(rB0_0,b0_zm)+dot(rB0_1,b1_zm), + dot(rB0_0,b0_pm)+dot(rB0_1,b1_pm), dot(rB0_0,b0_mz)+dot(rB0_1,b1_mz)); + f16vec4 hB0g2 = f16vec4( + dot(rB0_0,b0_zz)+dot(rB0_1,b1_zz), dot(rB0_0,b0_pz)+dot(rB0_1,b1_pz), + dot(rB0_0,b0_mp)+dot(rB0_1,b1_mp), dot(rB0_0,b0_zp)+dot(rB0_1,b1_zp)); + float16_t sB0 = dot(rB0_0,b0_pp)+dot(rB0_1,b1_pp); + + f16vec4 hB1g1 = f16vec4( + dot(rB1_0,b0_mm)+dot(rB1_1,b1_mm), dot(rB1_0,b0_zm)+dot(rB1_1,b1_zm), + dot(rB1_0,b0_pm)+dot(rB1_1,b1_pm), dot(rB1_0,b0_mz)+dot(rB1_1,b1_mz)); + f16vec4 hB1g2 = f16vec4( + dot(rB1_0,b0_zz)+dot(rB1_1,b1_zz), dot(rB1_0,b0_pz)+dot(rB1_1,b1_pz), + dot(rB1_0,b0_mp)+dot(rB1_1,b1_mp), dot(rB1_0,b0_zp)+dot(rB1_1,b1_zp)); + float16_t sB1 = dot(rB1_0,b0_pp)+dot(rB1_1,b1_pp); + + uint b = pc.wBase; + f16vec4 e0s=wVec4(b+144u), e0m=wVec4(b+148u), e0a=wVec4(b+152u); + f16vec4 e1s=wVec4(b+156u), e1m=wVec4(b+160u), e1a=wVec4(b+164u); + float16_t sSub=W[b+168u], sMul=W[b+169u], sAdd=W[b+170u]; + + f16vec4 ZERO = f16vec4(0.0hf), ONE = f16vec4(1.0hf); + + #define ENC0(v) clamp(((v) - e0s)*e0m + e0a, ZERO, ONE) + #define ENC1(v) clamp(((v) - e1s)*e1m + e1a, ZERO, ONE) + #define ENCS(s) clamp(((s) - sSub)*sMul + sAdd, float16_t(0.0hf), float16_t(1.0hf)) + + f16vec4 acc = f16vec4(0.0hf); + acc += wMatVec(b+ 0u, ENC0(hA0g1)); + acc += wMatVec(b+ 16u, ENC1(hA0g2)); + acc += wVec4(b+128u) * ENCS(sA0); + acc += wMatVec(b+ 32u, ENC0(hB0g1)); + acc += wMatVec(b+ 48u, ENC1(hB0g2)); + acc += wVec4(b+132u) * ENCS(sB0); + acc += wMatVec(b+ 64u, ENC0(hA1g1)); + acc += wMatVec(b+ 80u, ENC1(hA1g2)); + acc += wVec4(b+136u) * ENCS(sA1); + acc += wMatVec(b+ 96u, ENC0(hB1g1)); + acc += wMatVec(b+112u, ENC1(hB1g2)); + acc += wVec4(b+140u) * ENCS(sB1); + + f16vec4 oSub=wVec4(b+171u), oMul=wVec4(b+175u), oAdd=wVec4(b+179u); + f16vec4 outv = (acc - oSub)*oMul + oAdd; + + imageStore(uOut, p, vec4(outv)); +} diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_flowreg.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_flowreg.comp new file mode 100644 index 000000000..9aab2d54f --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_flowreg.comp @@ -0,0 +1,87 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 32) uniform sampler2D uIn0; +layout(set = 0, binding = 33) uniform sampler2D uIn1; +layout(set = 0, binding = 34) uniform sampler2D uIn2; +layout(set = 0, binding = 35) uniform sampler2D uIn3; +layout(set = 0, binding = 36) uniform sampler2D uFlow; +layout(set = 0, binding = 37) uniform sampler2D uOcc; +layout(set = 0, binding = 48, rgba16f) uniform writeonly image2D uDst; + +layout(set = 0, binding = 8, std430) readonly buffer Weights { float16_t W[]; }; + +layout(push_constant) uniform PC { + ivec2 size; + float t; + float mvScale; + uint wBase; + int cinT; + int coutT; + int flags; +} pc; + +const ivec2 TAP[9] = ivec2[]( + ivec2(-1,-1), ivec2(0,-1), ivec2(1,-1), + ivec2(-1, 0), ivec2(0, 0), ivec2(1, 0), + ivec2(-1, 1), ivec2(0, 1), ivec2(1, 1)); + +vec4 sampIn(int ci, vec2 uv) { + if (ci == 0) return textureLod(uIn0, uv, 0.0); + if (ci == 1) return textureLod(uIn1, uv, 0.0); + if (ci == 2) return textureLod(uIn2, uv, 0.0); + return textureLod(uIn3, uv, 0.0); +} + +f16vec4 wRow(int ci, int k, int row) { + uint b = pc.wBase + uint((ci * 9 + k) * 8 + row * 4); + return f16vec4(W[b + 0u], W[b + 1u], W[b + 2u], W[b + 3u]); +} + +void main() { + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(p, pc.size))) return; + + vec2 fsz = vec2(textureSize(uIn0, 0)); + vec2 numer = vec2(p) + 0.5; + vec2 uv = numer / fsz; + + f16vec2 res = f16vec2(0.0hf); + vec2 texel = vec2(1.0) / fsz; + for (int ci = 0; ci < pc.cinT; ++ci) { + for (int k = 0; k < 9; ++k) { + vec2 tuv = uv + vec2(TAP[k]) * texel; + f16vec4 s = f16vec4(sampIn(ci, tuv)); + f16vec4 r0 = wRow(ci, k, 0); + f16vec4 r1 = wRow(ci, k, 1); + res.x += dot(r0, s); + res.y += dot(r1, s); + } + } + + uint bb = pc.wBase + uint(pc.cinT * 9 * 8); + res += f16vec2(W[bb + 0u], W[bb + 1u]); + + vec2 flow = vec2(res); + + vec4 base = 2.0 * textureLod(uFlow, uv, 0.0); + vec2 fwd = base.xy + flow; + vec2 bwd = base.zw - flow; + + float t = pc.t; + + vec2 uvF = (numer + fwd * (2.0 * t) * 0.25) / fsz; + float occF = textureLod(uOcc, uvF, 0.0).x; + fwd *= (1.0 - occF); + + vec2 uvB = (numer + bwd * (2.0 * (1.0 - t)) * 0.25) / fsz; + float occB = textureLod(uOcc, uvB, 0.0).x; + bwd *= (1.0 - occB); + + float occC = textureLod(uOcc, uv, 0.0).x; + vec4 outv = vec4(fwd.x, fwd.y, bwd.x, bwd.y) * (1.0 - occC); + + imageStore(uDst, p, outv); +} diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_occlusion.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_occlusion.comp index ffebca34e..854db0430 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_occlusion.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_occlusion.comp @@ -49,28 +49,33 @@ void main() { ivec2 sz = pc.size; ivec2 hi = sz - ivec2(1); - float16_t bias = W[int(pc.wBase) + 72]; + const float16_t bias = W[int(pc.wBase) + 72]; + const float16_t thr16 = float16_t(pc.occThresh); + const float16_t one16 = float16_t(1.0); for (int by = 0; by < 2; ++by) { for (int bx = 0; bx < 2; ++bx) { ivec2 p = wg * 32 + lid * 2 + ivec2(bx, by); - float16_t acc = bias; - for (int k = 0; k < 9; ++k) { - ivec2 c = clamp(p + OTAP[k], ivec2(0), hi); - f16vec4 a = f16vec4(texelFetch(uSrcA, c, 0)); - f16vec4 b = f16vec4(texelFetch(uSrcB, c, 0)); + f16vec4 a0 = f16vec4(texelFetch(uSrcA, clamp(p + OTAP[0], ivec2(0), hi), 0)); + float16_t acc = dot(a0, wA(0)); + for (int k = 1; k < 9; ++k) { + f16vec4 a = f16vec4(texelFetch(uSrcA, clamp(p + OTAP[k], ivec2(0), hi), 0)); acc += dot(a, wA(k)); + } + for (int k = 0; k < 9; ++k) { + f16vec4 b = f16vec4(texelFetch(uSrcB, clamp(p + OTAP[k], ivec2(0), hi), 0)); acc += dot(b, wB(k)); } + acc += bias; - float occ = 1.0 / (1.0 + exp(-float(acc))); - float mask = step(pc.occThresh, occ) * occ; + float16_t occ = one16 / (one16 + exp(-acc)); + float16_t mask = step(thr16, occ) * occ; if (all(lessThan(p, sz))) - imageStore(uMip0, p, vec4(mask)); + imageStore(uMip0, p, vec4(float(mask))); - lds[lid.x * 2 + bx][lid.y * 2 + by] = mask; + lds[lid.x * 2 + bx][lid.y * 2 + by] = float(mask); } } diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_pyramid.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_pyramid.comp index 460a0891e..24f108e1c 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_pyramid.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_pyramid.comp @@ -4,7 +4,9 @@ layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; layout(set = 0, binding = 0) uniform sampler2D uSrc; + layout(set = 0, binding = 1, r8) uniform writeonly image2D uDst; + layout(set = 0, binding = 2) uniform sampler2D uLumaPrev; layout(push_constant) uniform PC { @@ -21,16 +23,19 @@ const vec3 LUMA = vec3(0.298828125, 0.5869140625, 0.11395263671875); void main() { ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(p, pc.size))) return; float luma; if ((pc.flags & 1) != 0) { + ivec2 srcSize = textureSize(uSrc, 0); vec2 uv = (vec2(p) + 0.5) / vec2(srcSize); f16vec3 c = f16vec3(textureLod(uSrc, uv, 0.0).rgb); luma = float(dot(c, f16vec3(LUMA))); } else { + ivec2 prevSize = textureSize(uLumaPrev, 0); ivec2 b = p * 2; ivec2 i00 = clamp(b, ivec2(0), prevSize - 1); diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c new file mode 100644 index 000000000..377831e44 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -0,0 +1,592 @@ +typedef struct CnnPC { + int32_t sx, sy; + float t; + float mvScale; + uint32_t wBase; + int32_t cinT, coutT, flags; +} CnnPC; + +#define CNN_GRID(w,h) ((uint32_t)(((w)+15u)/16u)), ((uint32_t)(((h)+15u)/16u)), 1u +#define CNN_FLOW_LEVELS 3 + +static bool cnn_wanted(void) { + char v[PROP_VALUE_MAX] = {0}; + if (__system_property_get("debug.winnative.fgcnn", v) > 0 && + (v[0] == '0' || v[0] == 'f' || v[0] == 'n')) return false; + return true; +} + +static void cnn_barrier_ml(VkCommandBuffer cmd, VkImage image, uint32_t layers, + VkImageLayout from, VkImageLayout to, + VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, + VkAccessFlags src_access, VkAccessFlags dst_access) { + VkImageMemoryBarrier b = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + b.oldLayout = from; b.newLayout = to; + b.srcAccessMask = src_access; b.dstAccessMask = dst_access; + b.srcQueueFamilyIndex = b.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + b.image = image; + b.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + b.subresourceRange.levelCount = 1; b.subresourceRange.layerCount = layers; + vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, NULL, 0, NULL, 1, &b); +} +static inline void cnn_to_read(VkCommandBuffer cmd, VkImage im, uint32_t layers) { + cnn_barrier_ml(cmd, im, layers, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); +} +static inline void cnn_to_write(VkCommandBuffer cmd, VkImage im, uint32_t layers) { + cnn_barrier_ml(cmd, im, layers, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, VK_ACCESS_SHADER_WRITE_BIT); +} + +typedef struct CnnBind { uint32_t binding; VkDescriptorType type; } CnnBind; + +static VkDescriptorSetLayout cnn_make_dsl(VkRenderer* r, const CnnBind* b, uint32_t n) { + VkDescriptorSetLayoutBinding lb[24]; memset(lb, 0, sizeof(lb)); + for (uint32_t i = 0; i < n; i++) { + lb[i].binding = b[i].binding; + lb[i].descriptorType = b[i].type; + lb[i].descriptorCount = 1; + lb[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + } + VkDescriptorSetLayoutCreateInfo ci = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + ci.bindingCount = n; ci.pBindings = lb; + VkDescriptorSetLayout l = VK_NULL_HANDLE; + if (vkCreateDescriptorSetLayout(r->device, &ci, NULL, &l) != VK_SUCCESS) return VK_NULL_HANDLE; + return l; +} + +static bool cnn_make_pipe(VkRenderer* r, const uint32_t* spv, size_t spvLen, + VkDescriptorSetLayout dsl, VkPipelineLayout* outPL, VkPipeline* outPipe) { + VkPushConstantRange pcr = { VK_SHADER_STAGE_COMPUTE_BIT, 0, 32 }; + VkPipelineLayoutCreateInfo pli = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + pli.setLayoutCount = 1; pli.pSetLayouts = &dsl; + pli.pushConstantRangeCount = 1; pli.pPushConstantRanges = &pcr; + if (vkCreatePipelineLayout(r->device, &pli, NULL, outPL) != VK_SUCCESS) return false; + VkShaderModule mod = load_shader_module(r, spv, spvLen); + if (!mod) return false; + *outPipe = create_compute_pipeline(r, mod, *outPL); + vkDestroyShaderModule(r->device, mod, NULL); + return *outPipe != VK_NULL_HANDLE; +} + +static void destroy_cnn_pipelines(VkRenderer* r) { + VkPipelineSet* P = &r->pipelines; + VkPipeline pipes[] = { P->cnn_pyramid_pipe, P->cnn_conv_pipe, P->cnn_cost9_pipe, + P->cnn_flowreg_pipe, P->cnn_warpfollow_pipe, P->cnn_generate_pipe }; + VkPipelineLayout pls[] = { P->cnn_pyramid_pl, P->cnn_conv_pl, P->cnn_cost9_pl, + P->cnn_flowreg_pl, P->cnn_warpfollow_pl, P->cnn_generate_pl }; + VkDescriptorSetLayout dsls[] = { P->cnn_pyramid_dsl, P->cnn_conv_dsl, P->cnn_cost9_dsl, + P->cnn_flowreg_dsl, P->cnn_warpfollow_dsl, P->cnn_generate_dsl }; + for (int i = 0; i < 6; i++) { + if (pipes[i]) vkDestroyPipeline(r->device, pipes[i], NULL); + if (pls[i]) vkDestroyPipelineLayout(r->device, pls[i], NULL); + if (dsls[i]) vkDestroyDescriptorSetLayout(r->device, dsls[i], NULL); + } + P->cnn_pyramid_pipe = P->cnn_conv_pipe = P->cnn_cost9_pipe = + P->cnn_flowreg_pipe = P->cnn_warpfollow_pipe = P->cnn_generate_pipe = VK_NULL_HANDLE; + P->cnn_pyramid_pl = P->cnn_conv_pl = P->cnn_cost9_pl = + P->cnn_flowreg_pl = P->cnn_warpfollow_pl = P->cnn_generate_pl = VK_NULL_HANDLE; + P->cnn_pyramid_dsl = P->cnn_conv_dsl = P->cnn_cost9_dsl = + P->cnn_flowreg_dsl = P->cnn_warpfollow_dsl = P->cnn_generate_dsl = VK_NULL_HANDLE; +} + +static bool create_cnn_pipelines(VkRenderer* r) { + if (!r->fg_float16_supported) return false; + VkPipelineSet* P = &r->pipelines; + const VkDescriptorType S = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + const VkDescriptorType I = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + const VkDescriptorType B = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + const VkDescriptorType U = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + + const CnnBind pyr[] = { {0,S},{1,I},{2,S} }; + const CnnBind conv[] = { {0,S},{1,I},{3,S},{8,B} }; + const CnnBind cost9[] = { {32,S},{33,S},{34,S},{35,S},{36,S},{37,S},{38,S},{39,S},{40,S}, + {48,I},{49,I},{50,I},{8,B} }; + const CnnBind flowreg[] = { {32,S},{33,S},{34,S},{35,S},{36,S},{37,S},{48,I},{8,B} }; + const CnnBind warpf[] = { {0,U},{32,S},{33,S},{34,S},{35,S},{36,S},{37,S},{48,I},{8,B} }; + const CnnBind gen[] = { {32,S},{33,S},{34,S},{35,S},{36,S},{48,I} }; + + P->cnn_pyramid_dsl = cnn_make_dsl(r, pyr, 3); + P->cnn_conv_dsl = cnn_make_dsl(r, conv, 4); + P->cnn_cost9_dsl = cnn_make_dsl(r, cost9, 13); + P->cnn_flowreg_dsl = cnn_make_dsl(r, flowreg, 8); + P->cnn_warpfollow_dsl = cnn_make_dsl(r, warpf, 9); + P->cnn_generate_dsl = cnn_make_dsl(r, gen, 6); + if (!P->cnn_pyramid_dsl || !P->cnn_conv_dsl || !P->cnn_cost9_dsl || + !P->cnn_flowreg_dsl || !P->cnn_warpfollow_dsl || !P->cnn_generate_dsl) goto cnn_fail; + + if (!cnn_make_pipe(r, cnn_pyramid_comp, cnn_pyramid_comp_size, + P->cnn_pyramid_dsl, &P->cnn_pyramid_pl, &P->cnn_pyramid_pipe)) goto cnn_fail; + if (!cnn_make_pipe(r, cnn_conv_comp, cnn_conv_comp_size, + P->cnn_conv_dsl, &P->cnn_conv_pl, &P->cnn_conv_pipe)) goto cnn_fail; + if (!cnn_make_pipe(r, cnn_correlation_cost9_comp, cnn_correlation_cost9_comp_size, + P->cnn_cost9_dsl, &P->cnn_cost9_pl, &P->cnn_cost9_pipe)) goto cnn_fail; + if (!cnn_make_pipe(r, cnn_flowreg_comp, cnn_flowreg_comp_size, + P->cnn_flowreg_dsl, &P->cnn_flowreg_pl, &P->cnn_flowreg_pipe)) goto cnn_fail; + if (!cnn_make_pipe(r, cnn_correlation_warpfollow_comp, cnn_correlation_warpfollow_comp_size, + P->cnn_warpfollow_dsl, &P->cnn_warpfollow_pl, &P->cnn_warpfollow_pipe)) goto cnn_fail; + if (!cnn_make_pipe(r, cnn_generate_comp, cnn_generate_comp_size, + P->cnn_generate_dsl, &P->cnn_generate_pl, &P->cnn_generate_pipe)) goto cnn_fail; + + VK_LOGI("CNN-FG pipelines built"); + return true; +cnn_fail: + VK_LOGW("CNN-FG pipelines unavailable; classical flow only"); + destroy_cnn_pipelines(r); + return false; +} + +static bool cnn_make_ssbo(VkRenderer* r, int id, const void* data, size_t n) { + VkBufferCreateInfo bc = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bc.size = n; bc.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + bc.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + if (vkCreateBuffer(r->device, &bc, NULL, &r->fg_cnn.w[id]) != VK_SUCCESS) return false; + VkMemoryRequirements mr; vkGetBufferMemoryRequirements(r->device, r->fg_cnn.w[id], &mr); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + ai.allocationSize = mr.size; + ai.memoryTypeIndex = vkr_find_memory_type(r, mr.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (ai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &ai, NULL, &r->fg_cnn.wMem[id]) != VK_SUCCESS) return false; + vkBindBufferMemory(r->device, r->fg_cnn.w[id], r->fg_cnn.wMem[id], 0); + void* p = NULL; + if (vkMapMemory(r->device, r->fg_cnn.wMem[id], 0, n, 0, &p) != VK_SUCCESS) return false; + memcpy(p, data, n); + vkUnmapMemory(r->device, r->fg_cnn.wMem[id]); + r->fg_cnn.wLen[id] = n; + return true; +} + +static bool cnn_make_img(VkRenderer* r, VkCnnImg* o, uint32_t w, uint32_t h, + VkFormat fmt, uint32_t layers, bool arrayView) { + if (w < 1) w = 1; if (h < 1) h = 1; if (layers < 1) layers = 1; + memset(o, 0, sizeof(*o)); + o->w = w; o->h = h; o->layers = layers; + VkImageCreateInfo ic = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + ic.imageType = VK_IMAGE_TYPE_2D; ic.format = fmt; + ic.extent.width = w; ic.extent.height = h; ic.extent.depth = 1; + ic.mipLevels = 1; ic.arrayLayers = layers; + ic.samples = VK_SAMPLE_COUNT_1_BIT; ic.tiling = VK_IMAGE_TILING_OPTIMAL; + ic.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT + | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + if (vkCreateImage(r->device, &ic, NULL, &o->image) != VK_SUCCESS) return false; + VkMemoryRequirements mr; vkGetImageMemoryRequirements(r->device, o->image, &mr); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + ai.allocationSize = mr.size; + ai.memoryTypeIndex = vkr_find_memory_type(r, mr.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (ai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &ai, NULL, &o->memory) != VK_SUCCESS) return false; + vkBindImageMemory(r->device, o->image, o->memory, 0); + + VkImageViewCreateInfo vi = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + vi.image = o->image; vi.format = fmt; + vi.viewType = arrayView ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; + vi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vi.subresourceRange.levelCount = 1; + vi.subresourceRange.baseArrayLayer = 0; + vi.subresourceRange.layerCount = arrayView ? layers : 1; + if (vkCreateImageView(r->device, &vi, NULL, &o->view) != VK_SUCCESS) return false; + + uint32_t nlv = layers < 4 ? layers : 4; + for (uint32_t k = 0; k < nlv; k++) { + VkImageViewCreateInfo lv = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + lv.image = o->image; lv.format = fmt; lv.viewType = VK_IMAGE_VIEW_TYPE_2D; + lv.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + lv.subresourceRange.levelCount = 1; + lv.subresourceRange.baseArrayLayer = k; lv.subresourceRange.layerCount = 1; + if (vkCreateImageView(r->device, &lv, NULL, &o->layerView[k]) != VK_SUCCESS) return false; + } + return true; +} + +static void cnn_free_img(VkRenderer* r, VkCnnImg* o) { + for (int k = 0; k < 4; k++) if (o->layerView[k]) vkDestroyImageView(r->device, o->layerView[k], NULL); + if (o->view) vkDestroyImageView(r->device, o->view, NULL); + if (o->image) vkDestroyImage(r->device, o->image, NULL); + if (o->memory) vkFreeMemory(r->device, o->memory, NULL); + memset(o, 0, sizeof(*o)); +} + +static void fg_destroy_cnn_resources(VkRenderer* r) { + if (!r->device) return; + VkFgCnn* C = &r->fg_cnn; + for (int i = 0; i < 64; i++) { + if (C->w[i]) vkDestroyBuffer(r->device, C->w[i], NULL); + if (C->wMem[i]) vkFreeMemory(r->device, C->wMem[i], NULL); + C->w[i] = VK_NULL_HANDLE; C->wMem[i] = VK_NULL_HANDLE; C->wLen[i] = 0; + } + if (C->ubo) { vkDestroyBuffer(r->device, C->ubo, NULL); C->ubo = VK_NULL_HANDLE; } + if (C->uboMem) { vkFreeMemory(r->device, C->uboMem, NULL); C->uboMem = VK_NULL_HANDLE; } + VkCnnFeatSet* sets[2] = { &C->featPrev, &C->featCurr }; + for (int s = 0; s < 2; s++) + for (int L = 0; L < CNN_LEVELS; L++) { + cnn_free_img(r, &sets[s]->luma[L]); cnn_free_img(r, &sets[s]->feat4a[L]); + cnn_free_img(r, &sets[s]->feat4b[L]); cnn_free_img(r, &sets[s]->feat8[L]); + } + for (int L = 0; L < CNN_LEVELS; L++) { + cnn_free_img(r, &C->feat8_pair[L]); cnn_free_img(r, &C->dpair[L]); + cnn_free_img(r, &C->hG0[L]); cnn_free_img(r, &C->hG1[L]); + cnn_free_img(r, &C->hG23[L]); cnn_free_img(r, &C->hG4[L]); + cnn_free_img(r, &C->hD0[L]); cnn_free_img(r, &C->hD1[L]); + cnn_free_img(r, &C->hD2[L]); cnn_free_img(r, &C->hD3[L]); + cnn_free_img(r, &C->hD5[L]); cnn_free_img(r, &C->hD6[L]); + cnn_free_img(r, &C->hD7[L]); cnn_free_img(r, &C->hD8[L]); + cnn_free_img(r, &C->flowMid[L]); cnn_free_img(r, &C->flowRef[L]); + } + cnn_free_img(r, &C->occ); cnn_free_img(r, &C->seedBlack); cnn_free_img(r, &C->dummy); + for (int pi = 0; pi < CNN_POOLS; pi++) + if (C->pool[pi]) { vkDestroyDescriptorPool(r->device, C->pool[pi], NULL); C->pool[pi] = VK_NULL_HANDLE; } + C->ready = false; +} + +static bool fg_create_cnn_resources(VkRenderer* r, uint32_t w, uint32_t h) { + const VkFormat R8 = VK_FORMAT_R8_UNORM, RGBA8 = VK_FORMAT_R8G8B8A8_UNORM, + F16 = VK_FORMAT_R16G16B16A16_SFLOAT; + VkFgCnn* C = &r->fg_cnn; + + float fs = r->fg_flow_scale >= 0.2f ? (r->fg_flow_scale <= 1.0f ? r->fg_flow_scale : 1.0f) : 0.5f; + uint32_t mw = (uint32_t)((float)w * fs); if (mw < 1u) mw = 1u; + uint32_t mh = (uint32_t)((float)h * fs); if (mh < 1u) mh = 1u; + uint32_t lw[CNN_LEVELS], lh[CNN_LEVELS]; + for (int L = 0; L < CNN_LEVELS; L++) { + lw[L] = (L == 0) ? mw : (lw[L-1] > 1 ? lw[L-1] / 2 : 1u); + lh[L] = (L == 0) ? mh : (lh[L-1] > 1 ? lh[L-1] / 2 : 1u); + } + + VkDescriptorPoolSize ps[] = { + { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 4096u }, + { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1024u }, + { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 64u }, + { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 512u }, + }; + VkDescriptorPoolCreateInfo dpc = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + dpc.maxSets = 1024u; dpc.poolSizeCount = 4; dpc.pPoolSizes = ps; + C->curPool = 0; + for (int pi = 0; pi < CNN_POOLS; pi++) + if (vkCreateDescriptorPool(r->device, &dpc, NULL, &C->pool[pi]) != VK_SUCCESS) return false; + + #define CNN_W(ID) if (!cnn_make_ssbo(r, ID, wnfg_##ID##_weights, (size_t)wnfg_##ID##_weights_size)) return false + CNN_W(05); CNN_W(06); CNN_W(07); CNN_W(14); CNN_W(20); CNN_W(21); CNN_W(22); + CNN_W(24); CNN_W(25); CNN_W(26); CNN_W(27); CNN_W(28); CNN_W(29); + CNN_W(36); CNN_W(37); CNN_W(42); CNN_W(51); + #undef CNN_W + + { + float ubo[4] = { 1.0f, 0.5f, 0.5f, 0.0f }; + VkBufferCreateInfo bc = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bc.size = sizeof(ubo); bc.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + bc.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + if (vkCreateBuffer(r->device, &bc, NULL, &C->ubo) != VK_SUCCESS) return false; + VkMemoryRequirements mr; vkGetBufferMemoryRequirements(r->device, C->ubo, &mr); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + ai.allocationSize = mr.size; + ai.memoryTypeIndex = vkr_find_memory_type(r, mr.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (ai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &ai, NULL, &C->uboMem) != VK_SUCCESS) return false; + vkBindBufferMemory(r->device, C->ubo, C->uboMem, 0); + void* p = NULL; + if (vkMapMemory(r->device, C->uboMem, 0, sizeof(ubo), 0, &p) != VK_SUCCESS) return false; + memcpy(p, ubo, sizeof(ubo)); vkUnmapMemory(r->device, C->uboMem); + } + + uint32_t f2w[CNN_LEVELS], f2h[CNN_LEVELS], fw[CNN_LEVELS], fh[CNN_LEVELS]; + for (int L = 0; L < CNN_LEVELS; L++) { + f2w[L] = lw[L] > 1 ? lw[L] / 2 : 1u; f2h[L] = lh[L] > 1 ? lh[L] / 2 : 1u; + fw[L] = f2w[L] > 1 ? f2w[L] / 2 : 1u; fh[L] = f2h[L] > 1 ? f2h[L] / 2 : 1u; + } + + VkCnnFeatSet* fsets[2] = { &C->featPrev, &C->featCurr }; + for (int s = 0; s < 2; s++) + for (int L = 0; L < CNN_LEVELS; L++) { + if (!cnn_make_img(r, &fsets[s]->luma[L], lw[L], lh[L], R8, 1, false)) return false; + if (!cnn_make_img(r, &fsets[s]->feat4a[L], f2w[L], f2h[L], RGBA8, 1, true)) return false; + if (!cnn_make_img(r, &fsets[s]->feat4b[L], f2w[L], f2h[L], RGBA8, 1, true)) return false; + if (!cnn_make_img(r, &fsets[s]->feat8[L], fw[L], fh[L], RGBA8, 2, true)) return false; + } + for (int L = 0; L < CNN_LEVELS; L++) { + if (!cnn_make_img(r, &C->feat8_pair[L], fw[L], fh[L], RGBA8, 4, true)) return false; + if (!cnn_make_img(r, &C->hG0[L], fw[L], fh[L], RGBA8, 2, true)) return false; + if (!cnn_make_img(r, &C->hG1[L], fw[L], fh[L], RGBA8, 2, true)) return false; + if (!cnn_make_img(r, &C->hG23[L], fw[L], fh[L], RGBA8, 4, true)) return false; + if (!cnn_make_img(r, &C->hG4[L], fw[L], fh[L], RGBA8, 2, true)) return false; + if (!cnn_make_img(r, &C->hD0[L], fw[L], fh[L], RGBA8, 3, true)) return false; + if (!cnn_make_img(r, &C->hD1[L], fw[L], fh[L], RGBA8, 3, true)) return false; + if (!cnn_make_img(r, &C->hD2[L], fw[L], fh[L], RGBA8, 2, true)) return false; + if (!cnn_make_img(r, &C->hD3[L], fw[L], fh[L], RGBA8, 1, true)) return false; + if (!cnn_make_img(r, &C->hD5[L], fw[L], fh[L], RGBA8, 1, true)) return false; + if (!cnn_make_img(r, &C->hD6[L], fw[L], fh[L], RGBA8, 2, true)) return false; + if (!cnn_make_img(r, &C->hD7[L], fw[L], fh[L], RGBA8, 1, true)) return false; + if (!cnn_make_img(r, &C->hD8[L], fw[L], fh[L], RGBA8, 1, true)) return false; + if (!cnn_make_img(r, &C->dpair[L], fw[L], fh[L], RGBA8, 2, true)) return false; + if (!cnn_make_img(r, &C->flowMid[L], fw[L], fh[L], F16, 1, false)) return false; + if (!cnn_make_img(r, &C->flowRef[L], fw[L], fh[L], F16, 1, false)) return false; + } + if (!cnn_make_img(r, &C->occ, mw, mh, F16, 1, false)) return false; + if (!cnn_make_img(r, &C->seedBlack, mw, mh, F16, 1, false)) return false; + if (!cnn_make_img(r, &C->dummy, 1, 1, RGBA8, 1, true)) return false; + + C->ready = true; + VK_LOGI("CNN-FG resources allocated (L0 %ux%u, %d levels, fs=%.2f)", mw, mh, CNN_LEVELS, (double)fs); + return true; +} + +static VkDescriptorSet cnn_alloc(VkRenderer* r, VkDescriptorSetLayout dsl) { + VkDescriptorSetAllocateInfo a = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + a.descriptorPool = r->fg_cnn.pool[r->fg_cnn.curPool]; a.descriptorSetCount = 1; a.pSetLayouts = &dsl; + VkDescriptorSet ds = VK_NULL_HANDLE; + if (vkAllocateDescriptorSets(r->device, &a, &ds) != VK_SUCCESS) return VK_NULL_HANDLE; + return ds; +} +static inline VkWriteDescriptorSet cnn_wimg(VkDescriptorSet ds, uint32_t b, VkDescriptorType t, + const VkDescriptorImageInfo* ii) { + VkWriteDescriptorSet w = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + w.dstSet = ds; w.dstBinding = b; w.descriptorCount = 1; w.descriptorType = t; w.pImageInfo = ii; + return w; +} +static inline VkWriteDescriptorSet cnn_wbuf(VkDescriptorSet ds, uint32_t b, VkDescriptorType t, + const VkDescriptorBufferInfo* bi) { + VkWriteDescriptorSet w = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + w.dstSet = ds; w.dstBinding = b; w.descriptorCount = 1; w.descriptorType = t; w.pBufferInfo = bi; + return w; +} + +static void cnn_conv_dispatch(VkRenderer* r, VkCommandBuffer cmd, + VkImageView srcArr, VkImageView lumaR8, VkImageView dstArr, + int wnfgId, int cinT, int coutT, int flags, uint32_t dW, uint32_t dH) { + VkPipelineSet* P = &r->pipelines; + VkDescriptorSet ds = cnn_alloc(r, P->cnn_conv_dsl); if (!ds) return; + VkDescriptorImageInfo s0 = {r->fg_sampler, srcArr, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo i1 = {VK_NULL_HANDLE, dstArr, VK_IMAGE_LAYOUT_GENERAL}; + VkDescriptorImageInfo s3 = {r->fg_sampler, lumaR8, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorBufferInfo b8 = {r->fg_cnn.w[wnfgId], 0, r->fg_cnn.wLen[wnfgId]}; + VkWriteDescriptorSet w[4] = { + cnn_wimg(ds,0,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s0), + cnn_wimg(ds,1,VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,&i1), + cnn_wimg(ds,3,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s3), + cnn_wbuf(ds,8,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,&b8), + }; + vkUpdateDescriptorSets(r->device, 4, w, 0, NULL); + CnnPC pc = {0}; pc.sx=(int32_t)dW; pc.sy=(int32_t)dH; pc.t=0.5f; pc.mvScale=1.0f; + pc.cinT=cinT; pc.coutT=coutT; pc.flags=flags; + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_conv_pipe); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_conv_pl, 0, 1, &ds, 0, NULL); + vkCmdPushConstants(cmd, P->cnn_conv_pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, 32, &pc); + vkCmdDispatch(cmd, CNN_GRID(dW, dH)); +} + +static void cnn_pyramid_dispatch(VkRenderer* r, VkCommandBuffer cmd, + VkImageView srcView, VkImageView dstLuma, + bool level0, uint32_t w, uint32_t h) { + VkPipelineSet* P = &r->pipelines; + VkDescriptorSet ds = cnn_alloc(r, P->cnn_pyramid_dsl); if (!ds) return; + VkDescriptorImageInfo s0 = {r->fg_sampler, srcView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo i1 = {VK_NULL_HANDLE, dstLuma, VK_IMAGE_LAYOUT_GENERAL}; + VkDescriptorImageInfo s2 = {r->fg_sampler, srcView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkWriteDescriptorSet ws[3] = { + cnn_wimg(ds,0,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s0), + cnn_wimg(ds,1,VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,&i1), + cnn_wimg(ds,2,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s2), + }; + vkUpdateDescriptorSets(r->device, 3, ws, 0, NULL); + CnnPC pc = {0}; pc.sx=(int32_t)w; pc.sy=(int32_t)h; pc.flags = level0 ? 1 : 0; + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_pyramid_pipe); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_pyramid_pl, 0, 1, &ds, 0, NULL); + vkCmdPushConstants(cmd, P->cnn_pyramid_pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, 32, &pc); + vkCmdDispatch(cmd, CNN_GRID(w, h)); +} + +static void cnn_cost9_dispatch(VkRenderer* r, VkCommandBuffer cmd, + const VkImageView in5[5], const VkImageView out3[3], + int wnfgId, uint32_t w, uint32_t h) { + VkPipelineSet* P = &r->pipelines; + VkDescriptorSet ds = cnn_alloc(r, P->cnn_cost9_dsl); if (!ds) return; + VkImageView dmy = r->fg_cnn.dummy.layerView[0]; + VkImageView srcmap[9] = { in5[0],in5[1],in5[2],in5[3], dmy, dmy, dmy, dmy, in5[4] }; + VkDescriptorImageInfo si[9]; + for (int i=0;i<9;i++) si[i]=(VkDescriptorImageInfo){r->fg_sampler, srcmap[i], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo oi[3]; for (int i=0;i<3;i++) oi[i]=(VkDescriptorImageInfo){VK_NULL_HANDLE,out3[i],VK_IMAGE_LAYOUT_GENERAL}; + VkDescriptorBufferInfo b8 = {r->fg_cnn.w[wnfgId], 0, r->fg_cnn.wLen[wnfgId]}; + VkWriteDescriptorSet ws[13]; + for (int i=0;i<9;i++) ws[i]=cnn_wimg(ds,32+i,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&si[i]); + for (int i=0;i<3;i++) ws[9+i]=cnn_wimg(ds,48+i,VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,&oi[i]); + ws[12]=cnn_wbuf(ds,8,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,&b8); + vkUpdateDescriptorSets(r->device, 13, ws, 0, NULL); + CnnPC pc = {0}; pc.sx=(int32_t)w; pc.sy=(int32_t)h; pc.t=0.5f; pc.mvScale=1.0f; pc.cinT=2; + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_cost9_pipe); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_cost9_pl, 0, 1, &ds, 0, NULL); + vkCmdPushConstants(cmd, P->cnn_cost9_pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, 32, &pc); + vkCmdDispatch(cmd, CNN_GRID(w, h)); +} + +static void cnn_flowreg_dispatch(VkRenderer* r, VkCommandBuffer cmd, + VkImageView f0, VkImageView f1, VkImageView flowSeed, + VkImageView occ, VkImageView outFlow16f, uint32_t w, uint32_t h) { + VkPipelineSet* P = &r->pipelines; + VkDescriptorSet ds = cnn_alloc(r, P->cnn_flowreg_dsl); if (!ds) return; + VkImageView dmy = r->fg_cnn.dummy.layerView[0]; + VkDescriptorImageInfo s32={r->fg_sampler,f0,VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s33={r->fg_sampler,f1,VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s34={r->fg_sampler,dmy,VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s35={r->fg_sampler,dmy,VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s36={r->fg_sampler,flowSeed,VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s37={r->fg_sampler,occ,VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo oi ={VK_NULL_HANDLE,outFlow16f,VK_IMAGE_LAYOUT_GENERAL}; + VkDescriptorBufferInfo b8={r->fg_cnn.w[24],0,r->fg_cnn.wLen[24]}; + VkWriteDescriptorSet ws[8] = { + cnn_wimg(ds,32,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s32), + cnn_wimg(ds,33,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s33), + cnn_wimg(ds,34,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s34), + cnn_wimg(ds,35,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s35), + cnn_wimg(ds,36,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s36), + cnn_wimg(ds,37,VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,&s37), + cnn_wimg(ds,48,VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,&oi), + cnn_wbuf(ds,8,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,&b8), + }; + vkUpdateDescriptorSets(r->device, 8, ws, 0, NULL); + CnnPC pc = {0}; pc.sx=(int32_t)w; pc.sy=(int32_t)h; pc.t=0.5f; pc.mvScale=1.0f; pc.cinT=2; + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_flowreg_pipe); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_flowreg_pl, 0, 1, &ds, 0, NULL); + vkCmdPushConstants(cmd, P->cnn_flowreg_pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, 32, &pc); + vkCmdDispatch(cmd, CNN_GRID(w, h)); +} + +static void cnn_clear_f16(VkCommandBuffer cmd, VkCnnImg* im) { + cnn_to_write(cmd, im->image, im->layers); + VkClearColorValue cc; memset(&cc, 0, sizeof(cc)); + VkImageSubresourceRange sr = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, im->layers}; + cnn_barrier_ml(cmd, im->image, im->layers, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, VK_ACCESS_TRANSFER_WRITE_BIT); + vkCmdClearColorImage(cmd, im->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &cc, 1, &sr); + cnn_barrier_ml(cmd, im->image, im->layers, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); +} + +static void cnn_ingest(VkRenderer* r, VkCommandBuffer cmd, VkImageView frameView, VkCnnFeatSet* FS) { + for (int L=0; Lluma[L].image, 1); + cnn_to_write(cmd, FS->feat4a[L].image, 1); + cnn_to_write(cmd, FS->feat4b[L].image, 1); + cnn_to_write(cmd, FS->feat8[L].image, 2); + } + + for (int L=0; L0) cnn_to_read(cmd, FS->luma[L-1].image, 1); + cnn_pyramid_dispatch(r, cmd, (L==0)?frameView:FS->luma[L-1].view, + FS->luma[L].view, (L==0), FS->luma[L].w, FS->luma[L].h); + } + cnn_to_read(cmd, FS->luma[CNN_FLOW_LEVELS-1].image, 1); + + for (int L=0; Lfg_cnn.dummy.view, FS->luma[L].view, + FS->feat4a[L].view, 5, 1, 1, 1|16, FS->feat4a[L].w, FS->feat4a[L].h); + for (int L=0; Lfeat4a[L].image, 1); + + for (int L=0; Lfeat4a[L].view, FS->luma[L].view, + FS->feat4b[L].view, 6, 1, 1, 0, FS->feat4b[L].w, FS->feat4b[L].h); + for (int L=0; Lfeat4b[L].image, 1); + + for (int L=0; Lfeat4b[L].view, FS->luma[L].view, + FS->feat8[L].view, 7, 1, 2, 2, FS->feat8[L].w, FS->feat8[L].h); + for (int L=0; Lfeat8[L].image, 2); +} + +static void cnn_concat4(VkCommandBuffer cmd, VkCnnImg* lo2, VkCnnImg* hi2, VkCnnImg* dst4) { + cnn_to_write(cmd, dst4->image, 4); + cnn_barrier_ml(cmd, lo2->image, 2, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT); + cnn_barrier_ml(cmd, hi2->image, 2, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT); + cnn_barrier_ml(cmd, dst4->image, 4, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); + VkImageCopy cp[2]; memset(cp, 0, sizeof(cp)); + cp[0].srcSubresource=(VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT,0,0,2}; + cp[0].dstSubresource=(VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT,0,0,2}; + cp[0].extent=(VkExtent3D){dst4->w, dst4->h, 1}; + cp[1]=cp[0]; cp[1].dstSubresource.baseArrayLayer=2; + vkCmdCopyImage(cmd, lo2->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst4->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &cp[0]); + vkCmdCopyImage(cmd, hi2->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst4->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &cp[1]); + cnn_barrier_ml(cmd, lo2->image, 2, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); + cnn_barrier_ml(cmd, hi2->image, 2, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); + cnn_barrier_ml(cmd, dst4->image, 4, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); +} + +static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, + VkFgImage* prevFrame, VkFgImage* currFrame, bool forward, + VkFgImage* outFlow) { + (void)parity; + VkFgCnn* C = &r->fg_cnn; + if (!C->ready) return; + if (!forward) { + C->curPool = (C->curPool + 1u) % (uint32_t)CNN_POOLS; + vkResetDescriptorPool(r->device, C->pool[C->curPool], 0); + } + + cnn_clear_f16(cmd, &C->occ); + cnn_clear_f16(cmd, &C->seedBlack); + cnn_clear_f16(cmd, &C->dummy); + + VkImageView pv = forward ? currFrame->view : prevFrame->view; + VkImageView cv = forward ? prevFrame->view : currFrame->view; + cnn_ingest(r, cmd, pv, &C->featPrev); + cnn_ingest(r, cmd, cv, &C->featCurr); + + for (int L = CNN_FLOW_LEVELS - 1; L >= 0; --L) { + uint32_t w = C->hG0[L].w, h = C->hG0[L].h; + VkImageView seedView = (L == CNN_FLOW_LEVELS - 1) ? C->seedBlack.view : C->flowMid[L+1].view; + + cnn_concat4(cmd, &C->featPrev.feat8[L], &C->featCurr.feat8[L], &C->feat8_pair[L]); + + cnn_to_write(cmd, C->hG0[L].image, 2); + cnn_conv_dispatch(r, cmd, C->feat8_pair[L].view, C->featCurr.luma[L].view, C->hG0[L].view, 36, 4, 2, 0, w, h); + cnn_to_read(cmd, C->hG0[L].image, 2); + cnn_to_write(cmd, C->hG1[L].image, 2); + cnn_conv_dispatch(r, cmd, C->hG0[L].view, C->featCurr.luma[L].view, C->hG1[L].view, 37, 2, 2, 0, w, h); + cnn_to_read(cmd, C->hG1[L].image, 2); + cnn_to_write(cmd, C->hG23[L].image, 4); + cnn_conv_dispatch(r, cmd, C->hG1[L].view, C->featCurr.luma[L].view, C->hG23[L].view, 42, 4, 4, 0, w, h); + cnn_to_read(cmd, C->hG23[L].image, 4); + cnn_to_write(cmd, C->hG4[L].image, 2); + cnn_conv_dispatch(r, cmd, C->hG23[L].view, seedView, C->hG4[L].view, 21, 3, 2, 0, w, h); + cnn_to_read(cmd, C->hG4[L].image, 2); + + cnn_to_write(cmd, C->hD0[L].image, 3); + { VkImageView in5[5]={C->hG4[L].layerView[0],C->hG4[L].layerView[1],C->hG23[L].layerView[2],C->hG23[L].layerView[3],seedView}; + VkImageView out3[3]={C->hD0[L].layerView[0],C->hD0[L].layerView[1],C->hD0[L].layerView[2]}; + cnn_cost9_dispatch(r, cmd, in5, out3, 14, w, h); } + cnn_to_read(cmd, C->hD0[L].image, 3); + cnn_to_write(cmd, C->hD1[L].image, 3); + { VkImageView in5[5]={C->hD0[L].layerView[0],C->hD0[L].layerView[1],C->hD0[L].layerView[2],C->hG4[L].layerView[0],seedView}; + VkImageView out3[3]={C->hD1[L].layerView[0],C->hD1[L].layerView[1],C->hD1[L].layerView[2]}; + cnn_cost9_dispatch(r, cmd, in5, out3, 20, w, h); } + cnn_to_read(cmd, C->hD1[L].image, 3); + cnn_to_write(cmd, C->hD2[L].image, 2); + cnn_conv_dispatch(r, cmd, C->hD1[L].view, C->featCurr.luma[L].view, C->hD2[L].view, 22, 2, 2, 0, w, h); + cnn_to_read(cmd, C->hD2[L].image, 2); + cnn_to_write(cmd, C->hD3[L].image, 1); + cnn_conv_dispatch(r, cmd, C->hD2[L].view, C->featCurr.luma[L].view, C->hD3[L].view, 26, 1, 1, 0, w, h); + cnn_to_read(cmd, C->hD3[L].image, 1); + + VkImageView fdst = (L == 0) ? outFlow->view : C->flowMid[L].view; + VkImage fimg = (L == 0) ? outFlow->image : C->flowMid[L].image; + uint32_t dw = (L == 0) ? outFlow->width : w; + uint32_t dh = (L == 0) ? outFlow->height : h; + cnn_to_write(cmd, fimg, 1); + cnn_flowreg_dispatch(r, cmd, C->hD3[L].layerView[0], C->hD2[L].layerView[0], seedView, + C->occ.view, fdst, dw, dh); + if (L != 0) cnn_to_read(cmd, fimg, 1); + } + + vkr_image_barrier(cmd, outFlow->image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); +} diff --git a/app/src/main/cpp/winlator/vk/vk_dispatch.c b/app/src/main/cpp/winlator/vk/vk_dispatch.c index 54e25c253..93343ce25 100644 --- a/app/src/main/cpp/winlator/vk/vk_dispatch.c +++ b/app/src/main/cpp/winlator/vk/vk_dispatch.c @@ -102,6 +102,7 @@ bool vkd_load_instance(VkInstance instance) { LOAD(DestroyDescriptorSetLayout); LOAD(CreateDescriptorPool); LOAD(DestroyDescriptorPool); + LOAD(ResetDescriptorPool); LOAD(AllocateDescriptorSets); LOAD(FreeDescriptorSets); LOAD(UpdateDescriptorSets); @@ -150,6 +151,8 @@ bool vkd_load_instance(VkInstance instance) { LOAD(CmdDispatch); LOAD(CmdPipelineBarrier); LOAD(CmdBlitImage); + LOAD(CmdCopyImage); + LOAD(CmdClearColorImage); LOAD(CmdCopyBufferToImage); LOAD(CmdCopyImageToBuffer); diff --git a/app/src/main/cpp/winlator/vk/vk_dispatch.h b/app/src/main/cpp/winlator/vk/vk_dispatch.h index f6a4bf45b..9864a1044 100644 --- a/app/src/main/cpp/winlator/vk/vk_dispatch.h +++ b/app/src/main/cpp/winlator/vk/vk_dispatch.h @@ -72,6 +72,7 @@ typedef struct VkDispatch { PFN_vkDestroyDescriptorSetLayout DestroyDescriptorSetLayout; PFN_vkCreateDescriptorPool CreateDescriptorPool; PFN_vkDestroyDescriptorPool DestroyDescriptorPool; + PFN_vkResetDescriptorPool ResetDescriptorPool; PFN_vkAllocateDescriptorSets AllocateDescriptorSets; PFN_vkFreeDescriptorSets FreeDescriptorSets; PFN_vkUpdateDescriptorSets UpdateDescriptorSets; @@ -120,6 +121,8 @@ typedef struct VkDispatch { PFN_vkCmdDispatch CmdDispatch; PFN_vkCmdPipelineBarrier CmdPipelineBarrier; PFN_vkCmdBlitImage CmdBlitImage; + PFN_vkCmdCopyImage CmdCopyImage; + PFN_vkCmdClearColorImage CmdClearColorImage; PFN_vkCmdCopyBufferToImage CmdCopyBufferToImage; PFN_vkCmdCopyImageToBuffer CmdCopyImageToBuffer; @@ -201,6 +204,7 @@ void vkd_unload(void); #define vkDestroyDescriptorSetLayout vkd.DestroyDescriptorSetLayout #define vkCreateDescriptorPool vkd.CreateDescriptorPool #define vkDestroyDescriptorPool vkd.DestroyDescriptorPool +#define vkResetDescriptorPool vkd.ResetDescriptorPool #define vkAllocateDescriptorSets vkd.AllocateDescriptorSets #define vkFreeDescriptorSets vkd.FreeDescriptorSets #define vkUpdateDescriptorSets vkd.UpdateDescriptorSets @@ -245,6 +249,8 @@ void vkd_unload(void); #define vkCmdDispatch vkd.CmdDispatch #define vkCmdPipelineBarrier vkd.CmdPipelineBarrier #define vkCmdBlitImage vkd.CmdBlitImage +#define vkCmdCopyImage vkd.CmdCopyImage +#define vkCmdClearColorImage vkd.CmdClearColorImage #define vkCmdCopyBufferToImage vkd.CmdCopyBufferToImage #define vkCmdCopyImageToBuffer vkd.CmdCopyImageToBuffer diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 0fe318212..f1764769b 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "shaders/window_vert.spv.h" #include "shaders/window_frag.spv.h" @@ -41,6 +42,29 @@ #include "shaders/motion_comp.spv.h" #include "shaders/motion_fp32_comp.spv.h" #include "shaders/interpolate_frag.spv.h" +#include "shaders/cnn_pyramid_comp.spv.h" +#include "shaders/cnn_conv_comp.spv.h" +#include "shaders/cnn_correlation_cost9_comp.spv.h" +#include "shaders/cnn_correlation_warpfollow_comp.spv.h" +#include "shaders/cnn_flowreg_comp.spv.h" +#include "shaders/cnn_generate_comp.spv.h" +#include "shaders/wnfg_05_weights.h" +#include "shaders/wnfg_06_weights.h" +#include "shaders/wnfg_07_weights.h" +#include "shaders/wnfg_14_weights.h" +#include "shaders/wnfg_20_weights.h" +#include "shaders/wnfg_21_weights.h" +#include "shaders/wnfg_22_weights.h" +#include "shaders/wnfg_24_weights.h" +#include "shaders/wnfg_25_weights.h" +#include "shaders/wnfg_26_weights.h" +#include "shaders/wnfg_27_weights.h" +#include "shaders/wnfg_28_weights.h" +#include "shaders/wnfg_29_weights.h" +#include "shaders/wnfg_36_weights.h" +#include "shaders/wnfg_37_weights.h" +#include "shaders/wnfg_42_weights.h" +#include "shaders/wnfg_51_weights.h" static uint64_t now_monotonic_ns(void) { struct timespec ts; @@ -57,6 +81,13 @@ static void destroy_debug_messenger(VkRenderer* r); static bool pick_physical_device(VkRenderer* r); static bool create_device(VkRenderer* r); static void query_device_caps(VkRenderer* r); +static bool cnn_wanted(void); +static bool create_cnn_pipelines(VkRenderer* r); +static void destroy_cnn_pipelines(VkRenderer* r); +static bool fg_create_cnn_resources(VkRenderer* r, uint32_t w, uint32_t h); +static void fg_destroy_cnn_resources(VkRenderer* r); +static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, + VkFgImage* prevFrame, VkFgImage* currFrame, bool forward, VkFgImage* outFlow); static bool create_command_pool(VkRenderer* r); static bool create_descriptor_pool(VkRenderer* r, uint32_t capacity); static bool create_pipelines(VkRenderer* r); @@ -370,10 +401,38 @@ static bool create_device(VkRenderer* r) { bool has_f16 = has_extension(exts, ext_count, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); bool has_display_timing = has_extension(exts, ext_count, VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME); bool has_cubic = has_extension(exts, ext_count, VK_EXT_FILTER_CUBIC_EXTENSION_NAME); + bool has_optical_flow = has_extension(exts, ext_count, VK_NV_OPTICAL_FLOW_EXTENSION_NAME); + bool has_sync2 = has_extension(exts, ext_count, VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); + bool has_fmt_feat2 = has_extension(exts, ext_count, VK_KHR_FORMAT_FEATURE_FLAGS_2_EXTENSION_NAME); + { uint32_t pdc = 0; vkEnumeratePhysicalDevices(r->instance, &pdc, NULL); + VkPhysicalDevice* pds = calloc(pdc ? pdc : 1, sizeof(VkPhysicalDevice)); + PFN_vkGetPhysicalDeviceProperties2 gpdp2 = + (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(r->instance, "vkGetPhysicalDeviceProperties2"); + if (!gpdp2) gpdp2 = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(r->instance, "vkGetPhysicalDeviceProperties2KHR"); + if (pds && pdc) { vkEnumeratePhysicalDevices(r->instance, &pdc, pds); + for (uint32_t d = 0; d < pdc; d++) { + VkPhysicalDeviceProperties pp; vkGetPhysicalDeviceProperties(pds[d], &pp); + VkPhysicalDeviceDriverProperties drv = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES}; + VkPhysicalDeviceProperties2 p2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, &drv}; + if (gpdp2) gpdp2(pds[d], &p2); + uint32_t ec = 0; vkEnumerateDeviceExtensionProperties(pds[d], NULL, &ec, NULL); + VkExtensionProperties* ep = calloc(ec ? ec : 1, sizeof(VkExtensionProperties)); int ofd = 0; + if (ep && ec) { vkEnumerateDeviceExtensionProperties(pds[d], NULL, &ec, ep); + for (uint32_t e = 0; e < ec; e++) if (!strcmp(ep[e].extensionName, "VK_NV_optical_flow")) ofd = 1; } + free(ep); + VK_LOGI("FG OF probe[%u/%u]: '%s' driverName='%s' driverID=%u apiV=%u.%u extCount=%u of=%d %s", + d, pdc, pp.deviceName, drv.driverName, (unsigned)drv.driverID, + VK_VERSION_MAJOR(pp.apiVersion), VK_VERSION_MINOR(pp.apiVersion), ec, ofd, + pds[d] == r->physical_device ? "<-SELECTED" : ""); + } + } + free(pds); + VK_LOGI("FG OF probe: selected extCount=%u of=%d sync2=%d fmtfeat2=%d", + ext_count, has_optical_flow, has_sync2, has_fmt_feat2); } free(exts); - const char* enable[16]; + const char* enable[24]; uint32_t enable_n = 0; enable[enable_n++] = VK_KHR_SWAPCHAIN_EXTENSION_NAME; @@ -411,12 +470,37 @@ static bool create_device(VkRenderer* r) { enable[enable_n++] = VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME; f16_feat.shaderFloat16 = VK_TRUE; } - if (has_display_timing && enable_n < 16) { + if (has_display_timing && enable_n < 24) { enable[enable_n++] = VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME; r->ext_display_timing = true; } VK_LOGI("Frame generation fp16 support: ext=%d feature=%d", has_f16, r->fg_float16_supported); + // VK_NV_optical_flow: driver-accelerated motion estimation, the cheap flow path. + r->fg_optical_flow = false; + VkPhysicalDeviceOpticalFlowFeaturesNV of_feat = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV }; + if (has_optical_flow && has_sync2 && has_fmt_feat2 && enable_n + 3 <= 24) { + VkPhysicalDeviceOpticalFlowFeaturesNV ofq = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV }; + VkPhysicalDeviceFeatures2 feats2 = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 }; + feats2.pNext = &ofq; + PFN_vkGetPhysicalDeviceFeatures2 fnFeat2 = (PFN_vkGetPhysicalDeviceFeatures2) + vkGetInstanceProcAddr(r->instance, "vkGetPhysicalDeviceFeatures2"); + if (!fnFeat2) fnFeat2 = (PFN_vkGetPhysicalDeviceFeatures2) + vkGetInstanceProcAddr(r->instance, "vkGetPhysicalDeviceFeatures2KHR"); + if (fnFeat2) { fnFeat2(r->physical_device, &feats2); r->fg_optical_flow = (ofq.opticalFlow == VK_TRUE); } + } + // Note: on Adreno 8xx (Turnip chip8) the OF compute impl exists but the extension is chip-gated off, + // so it never advertises. Enabling it anyway is rejected (VK_ERROR_EXTENSION_NOT_PRESENT) and calling + // the entry points without it enabled null-derefs — confirmed on-device. Only a driver patch that + // advertises the extension can unlock it here; the classical flow is used until then. + if (r->fg_optical_flow) { + enable[enable_n++] = VK_NV_OPTICAL_FLOW_EXTENSION_NAME; + enable[enable_n++] = VK_KHR_FORMAT_FEATURE_FLAGS_2_EXTENSION_NAME; + enable[enable_n++] = VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME; + of_feat.opticalFlow = VK_TRUE; + } + VK_LOGI("Frame generation optical flow (VK_NV_optical_flow): ext=%d enabled=%d", has_optical_flow, r->fg_optical_flow); + VK_LOGI("AHB Vulkan device support: android_hardware_buffer=%d external_memory=%d dedicated=%d get_memory_requirements2=%d queue_family_foreign=%d enabled=%d", has_ahb, has_extmem, has_dedicated, has_get_mem_req2, has_queue_fam, r->ext_ahb); if (!r->ext_ahb) { @@ -438,6 +522,7 @@ static bool create_device(VkRenderer* r) { void* feat_chain = NULL; if (has_ycbcr) { ycbcr_feat.pNext = feat_chain; feat_chain = &ycbcr_feat; } if (r->fg_float16_supported) { f16_feat.pNext = feat_chain; feat_chain = &f16_feat; } + if (r->fg_optical_flow) { of_feat.pNext = feat_chain; feat_chain = &of_feat; } dci.pNext = feat_chain; dci.queueCreateInfoCount = 1; dci.pQueueCreateInfos = &qci; @@ -450,6 +535,43 @@ static bool create_device(VkRenderer* r) { } vkGetDeviceQueue(r->device, r->graphics_queue_family, 0, &r->graphics_queue); + if (r->fg_optical_flow) { + r->fnOFFormats = (PFN_vkGetPhysicalDeviceOpticalFlowImageFormatsNV) + vkGetInstanceProcAddr(r->instance, "vkGetPhysicalDeviceOpticalFlowImageFormatsNV"); + r->fnOFCreate = (PFN_vkCreateOpticalFlowSessionNV) vkGetDeviceProcAddr(r->device, "vkCreateOpticalFlowSessionNV"); + r->fnOFDestroy = (PFN_vkDestroyOpticalFlowSessionNV) vkGetDeviceProcAddr(r->device, "vkDestroyOpticalFlowSessionNV"); + r->fnOFBind = (PFN_vkBindOpticalFlowSessionImageNV) vkGetDeviceProcAddr(r->device, "vkBindOpticalFlowSessionImageNV"); + r->fnOFExecute = (PFN_vkCmdOpticalFlowExecuteNV) vkGetDeviceProcAddr(r->device, "vkCmdOpticalFlowExecuteNV"); + // fnOFFormats is optional (the format enumerator is chip-gated off on Adreno 8xx); the session/ + // bind/execute entry points are what we actually need. + if (!r->fnOFCreate || !r->fnOFDestroy || !r->fnOFBind || !r->fnOFExecute) { + VK_LOGW("optical flow entry points missing; disabling OF flow"); + r->fg_optical_flow = false; + } + } + + // Optical-flow session probe: the extension was enabled at vkCreateDevice (advertised, or forced + // on Turnip), so the OF device state is initialized. Create a real session to confirm the chip8 + // compute path actually works (vs the earlier null-deref when called without the extension enabled). + if (r->fg_optical_flow && r->fnOFCreate && r->fnOFDestroy) { + VkOpticalFlowSessionCreateInfoNV sci = { VK_STRUCTURE_TYPE_OPTICAL_FLOW_SESSION_CREATE_INFO_NV }; + sci.width = 960; sci.height = 540; + sci.imageFormat = VK_FORMAT_R8G8B8A8_UNORM; + sci.flowVectorFormat = VK_FORMAT_R16G16B16A16_SFLOAT; + sci.outputGridSize = VK_OPTICAL_FLOW_GRID_SIZE_8X8_BIT_NV; + sci.performanceLevel = VK_OPTICAL_FLOW_PERFORMANCE_LEVEL_MEDIUM_NV; + VkOpticalFlowSessionNV sess = VK_NULL_HANDLE; + VkResult sr = r->fnOFCreate(r->device, &sci, NULL, &sess); + VK_LOGI("OF session create probe: result=%d session=%p", (int)sr, (void*)sess); + if (sr == VK_SUCCESS && sess) { + r->fnOFDestroy(r->device, sess, NULL); + VK_LOGI("OF flow path CONFIRMED working"); + } else { + VK_LOGW("OF session create failed (result=%d); disabling OF flow", (int)sr); + r->fg_optical_flow = false; + } + } + if (r->ext_ahb) { r->fnGetAhbProps = (PFN_vkGetAndroidHardwareBufferPropertiesANDROID) vkGetDeviceProcAddr(r->device, "vkGetAndroidHardwareBufferPropertiesANDROID"); @@ -1137,6 +1259,8 @@ static bool create_pipelines(VkRenderer* r) { r, vs_quad, fs_interp, r->pipelines.fg_interp_pipe_layout, r->pipelines.swapchain_pass, false, false, NULL); + r->fg_cnn_capable = cnn_wanted() && create_cnn_pipelines(r); + vkDestroyShaderModule(r->device, vs_window, NULL); vkDestroyShaderModule(r->device, fs_window, NULL); vkDestroyShaderModule(r->device, fs_cursor, NULL); @@ -1200,6 +1324,7 @@ static void destroy_pipelines(VkRenderer* r) { if (r->pipelines.offscreen_blit_pipeline) vkDestroyPipeline(r->device, r->pipelines.offscreen_blit_pipeline, NULL); if (r->pipelines.fg_motion_pipeline) vkDestroyPipeline(r->device, r->pipelines.fg_motion_pipeline, NULL); if (r->pipelines.fg_interp_pipeline) vkDestroyPipeline(r->device, r->pipelines.fg_interp_pipeline, NULL); + destroy_cnn_pipelines(r); if (r->pipelines.window_layout) vkDestroyPipelineLayout(r->device, r->pipelines.window_layout, NULL); if (r->pipelines.effect_layout) vkDestroyPipelineLayout(r->device, r->pipelines.effect_layout, NULL); if (r->pipelines.fg_motion_pipe_layout) vkDestroyPipelineLayout(r->device, r->pipelines.fg_motion_pipe_layout, NULL); @@ -2305,6 +2430,7 @@ static void fg_destroy_resources(VkRenderer* r) { if (o->memory) vkFreeMemory(r->device, o->memory, NULL); memset(o, 0, sizeof(*o)); } + fg_destroy_cnn_resources(r); for (uint32_t mi = 0; mi < 3; mi++) { VkFgImage* m = &r->fg_motion[mi]; if (m->view) vkDestroyImageView(r->device, m->view, NULL); @@ -2441,6 +2567,8 @@ static void fg_motion_pass(VkRenderer* r, VkCommandBuffer cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); } +#include "vk_cnn_fg.c" + // Content-signature resources for duplicate detection: a tiny blit target + per-slot host buffers. #define FG_SIG_W 64u #define FG_SIG_H 36u @@ -2566,6 +2694,15 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { if (!fg_create_motion(r, &r->fg_coarse_fwd[mi], cw, ch)) goto fail; } } + + if (r->fg_cnn_capable) { + if (!fg_create_cnn_resources(r, w, h)) { + VK_LOGW("CNN-FG resources unavailable; classical flow only"); + fg_destroy_cnn_resources(r); + r->fg_cnn_capable = false; + } + } + memset(r->fg_slot_fence, 0, sizeof(r->fg_slot_fence)); for (uint32_t p = 0; p < 3; p++) { @@ -2922,21 +3059,26 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); if (compute_bwd) { - // Backward flow (curr->prev) -> fg_motion[parity], 1st interp of the pair (both modes). - fg_motion_pass(r, f->cmd, r->fg_coarse_set[parity], &r->fg_coarse[parity], - r->fg_motion_set[parity], &r->fg_motion[parity], (float)r->fg_min_step); + if (r->fg_use_cnn && r->fg_cnn_capable && r->fg_cnn.ready) { + cnn_flow_pass(r, f->cmd, parity, prev, curr, false, &r->fg_motion[parity]); + } else { + fg_motion_pass(r, f->cmd, r->fg_coarse_set[parity], &r->fg_coarse[parity], + r->fg_motion_set[parity], &r->fg_motion[parity], (float)r->fg_min_step); + } r->fg_motion_valid = true; } else { - // Backward flow reused (later interps of the pair). Re-establish compute-write -> fragment-read. vkr_image_barrier(f->cmd, r->fg_motion[parity].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); } if (compute_fwd) { - // Quality forward flow (prev->curr) -> fg_motion_fwd[parity]. - fg_motion_pass(r, f->cmd, r->fg_coarse_set_fwd[parity], &r->fg_coarse_fwd[parity], - r->fg_motion_set_fwd[parity], &r->fg_motion_fwd[parity], (float)r->fg_min_step); + if (r->fg_use_cnn && r->fg_cnn_capable && r->fg_cnn.ready) { + cnn_flow_pass(r, f->cmd, parity, curr, prev, true, &r->fg_motion_fwd[parity]); + } else { + fg_motion_pass(r, f->cmd, r->fg_coarse_set_fwd[parity], &r->fg_coarse_fwd[parity], + r->fg_motion_set_fwd[parity], &r->fg_motion_fwd[parity], (float)r->fg_min_step); + } r->fg_motion_fwd_valid = true; } else if (deep && r->fg_motion_fwd_valid) { // Forward flow reused. Re-establish its compute-write -> fragment-read dep. @@ -3157,11 +3299,34 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - fg_motion_pass(r, f->cmd, r->fg_coarse_set[parity], &r->fg_coarse[parity], - r->fg_motion_set[parity], &r->fg_motion[parity], (float)r->fg_min_step); - if (compute_fwd) { - fg_motion_pass(r, f->cmd, r->fg_coarse_set_fwd[parity], &r->fg_coarse_fwd[parity], - r->fg_motion_set_fwd[parity], &r->fg_motion_fwd[parity], (float)r->fg_min_step); + if (r->fg_use_cnn && r->fg_cnn_capable && r->fg_cnn.ready) { + if (!r->fg_motion_valid) { + cnn_flow_pass(r, f->cmd, parity, prev, curr, false, &r->fg_motion[parity]); + r->fg_motion_valid = true; + } else { + vkr_image_barrier(f->cmd, r->fg_motion[parity].image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + } + if (compute_fwd) { + if (!r->fg_motion_fwd_valid) { + cnn_flow_pass(r, f->cmd, parity, curr, prev, true, &r->fg_motion_fwd[parity]); + r->fg_motion_fwd_valid = true; + } else { + vkr_image_barrier(f->cmd, r->fg_motion_fwd[parity].image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + } + } + } else { + fg_motion_pass(r, f->cmd, r->fg_coarse_set[parity], &r->fg_coarse[parity], + r->fg_motion_set[parity], &r->fg_motion[parity], (float)r->fg_min_step); + if (compute_fwd) { + fg_motion_pass(r, f->cmd, r->fg_coarse_set_fwd[parity], &r->fg_coarse_fwd[parity], + r->fg_motion_set_fwd[parity], &r->fg_motion_fwd[parity], (float)r->fg_min_step); + } } } @@ -3284,7 +3449,7 @@ static void fg_enqueue(VkRenderer* r, uint8_t mode, float phase) { uint32_t curr = r->fg_history_curr; FgJob job; job.mode = mode; - job.deep = (r->fg_deep_mode && r->fg_history_count >= 2u) ? 1u : 0u; + job.deep = (((r->fg_deep_mode) || (r->fg_use_cnn && r->fg_cnn_capable)) && r->fg_history_count >= 2u) ? 1u : 0u; job.phase = phase; job.curr_idx = curr; job.prev_idx = (curr + 2u) % 3u; @@ -3467,6 +3632,8 @@ JNIEXPORT jlong JNICALL JNI_FN(nativeCreate)(JNIEnv* env, jclass clazz, r->fg_occ_hi = 0.25f; r->fg_min_step = 1; r->fg_flow_scale = 0.5f; // default = legacy half-res flow; presets override (Eco 0.2 .. Max 0.8) + r->fg_use_cnn = cnn_wanted(); + r->fg_deep_mode = r->fg_use_cnn ? true : r->fg_deep_mode; r->validation_enabled = (enableValidationLayers == JNI_TRUE); pthread_mutex_init(&r->scene_mutex, NULL); pthread_mutex_init(&r->queue_mutex, NULL); @@ -3860,6 +4027,20 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenDeepMode)(JNIEnv* env, jclass cla pthread_mutex_unlock(&r->render_mutex); } +JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenUseCnn)(JNIEnv* env, jclass clazz, jlong handle, jboolean useCnn) { + (void)env; (void)clazz; + VkRenderer* r = (VkRenderer*)(intptr_t)handle; + if (!r) return; + bool want = useCnn ? true : false; + if (want == r->fg_use_cnn) return; + pthread_mutex_lock(&r->render_mutex); + r->fg_use_cnn = want; + r->fg_history_count = 0; + r->fg_motion_valid = false; + r->fg_motion_fwd_valid = false; + pthread_mutex_unlock(&r->render_mutex); +} + // Generation method: false = interpolation, true = extrapolation. Re-primes the cadence so the // new method starts from a clean pair. JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenExtrapolate)(JNIEnv* env, jclass clazz, jlong handle, jboolean extrapolate) { diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index 80b4d2a81..a4808cae5 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -202,6 +202,13 @@ typedef struct VkPipelineSet { VkPipelineLayout fg_interp_pipe_layout; // [interp] set + 24B fragment push range VkPipeline fg_motion_pipeline; // compute (block matching) VkPipeline fg_interp_pipeline; // graphics (interpolation, swapchain_pass) + + VkDescriptorSetLayout cnn_pyramid_dsl, cnn_conv_dsl, cnn_cost9_dsl, + cnn_flowreg_dsl, cnn_warpfollow_dsl, cnn_generate_dsl; + VkPipelineLayout cnn_pyramid_pl, cnn_conv_pl, cnn_cost9_pl, + cnn_flowreg_pl, cnn_warpfollow_pl, cnn_generate_pl; + VkPipeline cnn_pyramid_pipe, cnn_conv_pipe, cnn_cost9_pipe, + cnn_flowreg_pipe, cnn_warpfollow_pipe, cnn_generate_pipe; } VkPipelineSet; // ============================================================ @@ -245,6 +252,44 @@ typedef struct VkFgImage { uint32_t width, height; } VkFgImage; +// ============================================================ +#define CNN_LEVELS 7 +typedef struct VkCnnImg { + VkImage image; + VkImageView view; + VkImageView layerView[4]; + VkDeviceMemory memory; + uint32_t w, h, layers; +} VkCnnImg; +typedef struct VkCnnFeatSet { + VkCnnImg luma [CNN_LEVELS]; + VkCnnImg feat4a[CNN_LEVELS]; + VkCnnImg feat4b[CNN_LEVELS]; + VkCnnImg feat8 [CNN_LEVELS]; +} VkCnnFeatSet; +#define CNN_POOLS 8 + +typedef struct VkFgCnn { + bool ready; + VkDescriptorPool pool[CNN_POOLS]; + uint32_t curPool; + VkCnnFeatSet featPrev, featCurr; + VkCnnImg feat8_pair[CNN_LEVELS]; + VkCnnImg hG0[CNN_LEVELS], hG1[CNN_LEVELS], hG23[CNN_LEVELS], hG4[CNN_LEVELS]; + VkCnnImg hD0[CNN_LEVELS], hD1[CNN_LEVELS], hD2[CNN_LEVELS], hD3[CNN_LEVELS]; + VkCnnImg hD5[CNN_LEVELS], hD6[CNN_LEVELS], hD7[CNN_LEVELS], hD8[CNN_LEVELS]; + VkCnnImg dpair[CNN_LEVELS]; + VkCnnImg flowMid[CNN_LEVELS]; + VkCnnImg flowRef[CNN_LEVELS]; + VkCnnImg occ; + VkCnnImg seedBlack; + VkCnnImg dummy; + VkBuffer ubo; VkDeviceMemory uboMem; + VkBuffer w[64]; + VkDeviceMemory wMem[64]; + VkDeviceSize wLen[64]; +} VkFgCnn; + // ============================================================ // Staging pool for async texture uploads // ============================================================ @@ -415,6 +460,10 @@ typedef struct VkRenderer { float fg_flow_scale; // flow-field resolution scale [0.2,1.0] (preset GPU-cost dial) float fg_built_flow_scale; // flow_scale baked into the current motion resources + bool fg_use_cnn; + bool fg_cnn_capable; + VkFgCnn fg_cnn; + // --- Content-duplicate detection ------------------------------------------------------------ // Each composited frame is downsampled to a tiny host buffer; the HOLD promotes the interp // pair only on a genuine content change so duplicate inputs don't advance. @@ -525,6 +574,14 @@ typedef struct VkRenderer { PFN_vkGetRefreshCycleDurationGOOGLE fnGetRefreshCycleDuration; PFN_vkGetPastPresentationTimingGOOGLE fnGetPastPresentationTiming; + // VK_NV_optical_flow: driver-accelerated motion estimation (replaces the classical block-match flow). + bool fg_optical_flow; // extension available + feature enabled + PFN_vkGetPhysicalDeviceOpticalFlowImageFormatsNV fnOFFormats; + PFN_vkCreateOpticalFlowSessionNV fnOFCreate; + PFN_vkDestroyOpticalFlowSessionNV fnOFDestroy; + PFN_vkBindOpticalFlowSessionImageNV fnOFBind; + PFN_vkCmdOpticalFlowExecuteNV fnOFExecute; + // Async upload pool (created in nativeCreate after device). VkStagingPool staging_pool; diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_05.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_05.weights.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b4af922f1de92d899312515da613823b2d02fd0e GIT binary patch literal 312 zcmca%!~g~_S3~HHn;H5ZiMRJ zw-Um)hwA^n1HxZ!38B-pAaux12>lXju9`W7ze*cIZ{7x>_4FXLt|5dLUIw9KHbQ90 zEf87|s(&w3{|~7Cl~8{#g}RReYQHhmeH~EyFFKU#%sn8w$JS`A$p^DO%VoOqTfS~I G+yMZ|1}rB4 literal 0 HcmV?d00001 diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_06.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_06.weights.fp16 new file mode 100644 index 000000000..57f9ff40f --- /dev/null +++ b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_06.weights.fp16 @@ -0,0 +1,2 @@ +4i$X6@ E,2Ҟ+1b$a)/8p˶$,'P/'.t-z+n%-5TD+ l1 1mz-%/)*wh#ਗ8* ,F5G$2N'+Q,|%Ǩ^,a'B.117w2"(0u+s&Y ["T54p8;5A #4)ӫcХW4w5!L11+p +|'  (6*57" 1G/*41&..(:4s0&D5%97dy1W$mX1&z.DBH-E@.c4j5 \ No newline at end of file diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_07.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_07.weights.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e1f6c5cd233a7a025e04de62f05d683e7f1906ac GIT binary patch literal 624 zcmV-$0+0QQH7>Q!tk@?gvtKQ|v!5~jvEDLREf6)$HJi2-vSc$ix289oGd?MUFjOfk zqfw;ywEnU8t`)3gHa9RYu#h$LsnW0(wA?kxu5dK2sT(MLH8U~YE{8I2H>I)Mur)KS zwe2s_F6OgMxHmT`GomEaxVJ4ou}P$gIej%CFv%{~vehn6uWd1nHy$!Tp)Ii)IAb3% zD3dH6B~dYSFbgo%HdeIAvUauawjwf(F1xN6xO+4&q5GO%t3NNLFoGIfDEO{Tv+yjg zwb?BiGYTq{H18~DvkWq(HV(Biw9l-hw4*c0FpxBlH(RvNt^25uxCtx`urjbpFv2(# zH-IoyD^xXgtq?YfsF5>gG}|g8J2^6*vj8L!IfA&;wQ8hxx#BqcHIu4zv4}0KFzzk@ zy!9+W8lJ2nHvl&dIL5HDFk(0RB^og1HZU|(HI1+_JAWu#wXL!^Gs(2(wD2hJE!Q#b zEL*d#xlA{%G!QO=xVA8EqhhCBuktV$Gg`Bmv)nUXGku#UufQ@{I2o@(HQ=wlEL<$m zq~$OrGfA{HGPbjfu}?SsHYK$muGF%Qxk)htG`xpDr2Z~zHFPbOuedq7H_|cfs#7-8 zHn^@+H<2hNF`uwEH~s~A#ySaG?cZ8vCJw^x#z22H02?_DY>eOHA1as zFcdmdI8v;%G(juwGuW{MG@}ftJb3)GOeI@I?*|yIs>%SH1i=%x&b_XHB&dOG|V+@HxM`~H_|j2Gy}1yF6}G{ KC8Vamo+vVU78EZ4 literal 0 HcmV?d00001 diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_14.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_14.weights.fp16 new file mode 100644 index 000000000..e91510017 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_14.weights.fp16 @@ -0,0 +1 @@ +KAqAMA{A|:;o:-;\2u5Y14AAIAnA::};);5414MA/;2 \ No newline at end of file diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_20.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_20.weights.fp16 new file mode 100644 index 000000000..e91510017 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_20.weights.fp16 @@ -0,0 +1 @@ +KAqAMA{A|:;o:-;\2u5Y14AAIAnA::};);5414MA/;2 \ No newline at end of file diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_21.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_21.weights.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2d2803f430d0f41b11d155ac648050a63b7bb1ae GIT binary patch literal 1776 zcmXYxc~DhV7{Hapr8Jj81S3U}qKGC-a%R=V?bCKqM8YKk}I!3O3BTOMcgo>OPC zZO(j{27I8ex`BQiT9*7);B;^d7qD!AIWP@Y`j#qd;Z0^sY(8NQS!-v>4PIT`p`Nky zHa-WP{{Ny?#BDO-O&{U~FJinKaW;pzL_%y}BI)k*3(ywmCY`6R(6O+Fx&xcVPWl5= zFOy%pM@BK4vQFp^69cz_qH&y;zZG029|t>41B|tfPtA!M&$hUt8y*B!i>Z41&@skB zeSzWDXf8*%U~E%wiVbR`I!(DmPtwl?4~Hz)hl*F@mCgF!RFxKieYR@I=U42nK@>0b zBksa^W@BCy(Rn{5*VInu>gAs2mL|3<5J5edT07qwvVLMu4Ge<4oNeq%|2zA#sY=QT zGvw{AKP$o_rUF2|B&re?IaeEf)F|#_dssuKgbr#VG=X&J2z6+w)xX>444JDIK;BoT zeg#j9dl(wNNctB31|s=`)F7!JJi(oomIRQ82iKFKQGA3%Y%(Fvbs)~bT#Emj4J|}F zY?U)D>p+HMPEr9=?&u39GshX1<1ezva}s2O1`~%a>6kA%Zbt1< z_=>u!b_va3n7Afwh&{$MkqywUP#3*eJu&n$TOyQdUbV9-iZ83)7f*5T21j|Uq3Ob| ziYP~z>d_J{t-^R*=ULe2Kr-^YMIm;@{&$@y9>pRS;(I)wL*3BaI@~>ve<;66jS%+M zKV$b=9&3&A4o|w)XfEUT%af()a$j5ey6 z2-Z)|lm`g^#teYVV2wQ-7Ax18F4RbFA=8-BqI{>Wpxtbwmg}zt(?Ci>oVrIUH!iB# z<|&q!s##qGJ1S+NR5?sMm-Br;tGC5MVqR5Gk|!#eKCzn_oBf+2=}E0&s7JW`*U`5&x`wHhY~mo z8uW`nnVsR-<5&+)JD=KH>1=YMb1kr`=N%JV^KD_oV38!Vv=4+*V1P&TJfY5J^^AoZ z?33VFQyo=8+=O|ch*_#M+m9%H0$a&HtObsdbp_s3X{Q|ic4eT4oLOC_4-gY!tWT4? zrd_gZ81`mmHQq1Ja2~0+A6aA1+;ox5ee|XyK4_b(o(W$EF2)I-~ezIR!+}6~-Fh zI-<(+g!F51o^K>x>L9lhy%YwXg*`kcq!f8EIh8LWR~!J!hXGoSf{|(kQK5b$Aip-e ze@0{9{&na)9s9OoPQrP_V&=m2T7)eWxFV*?Lyd6ZzPmZa=Zq-++KC_Jh@9@(I?YJt5CrdsDkcB%V_rmEp! z5uu|lkr9VzBHOQYG9HVsz$o($E2~LNBjx5=ya%ZsZki>GWBeI*Pi<6Pg-zBQREl1P zX9otT*K0QW4$#@Us=N?N(3@Ntztb^EK8??#$6_eFMt#bzk{TRGVDor{mrE^Dn5m?>=?p7> zuJ#xi)`85h+C=$v%X%qWAIY%;2+(BIQ|f)2aAbZ=n)s1;#m_I z27LBLoW@@V&!ehZY5-KnU#vZk=gaZPq+C`8vOh?3LoxWL`WhGkGK1~-jy_F$3dc!7 z<`C;+mx~SJ(Gks9Ut>~Od3wr)=7e~@%XZ&ilgm9FVTj(y>a#hei9Ghe`P)P zo1w$PJsyH~@KU}En%EU;EL=oCz%g4ynd7&3Kh;+#K84oe$!kg=XhR=4zHvBFvayeM zGesESLexWl4Lj@=te@@_v9MNNI1@j1G}k&CQo5r{E4%J;!`utWgxiSQ*kiZ@d_CPIxcj4R~=CEfBPc&MF3 z``P^PS#4-wEV!We;9_x!KGGHC`&1=srpQI3klto2MlWF>1_;x|9bBdUT+5Vzc-NX_ zbVWDfz2zRa{#!Ccj}kr=Es`#BQeJjcL{vKr{1|WZ1^9 zjeLRM$4`l$`P)*i+H0Gw#L!@GIZ>t!8vU=MuU0@8I2*XFMiYLl&lML+e}@J@HCVz& z;I(k4S}41uagN`?7UsOdp>HSh`Y|d(r689xkK2cR{vClQ&R=d{rRFZZls&Dd92ZE~6Z8VcV1}V>{zzTeYu3zlekyay@gEQW*wmBpaJ1 zuhbt<$KYDE6}XYbh3ob#W3?;C*^Ec93EXYnrg3mC z+~v=bFH{#o(pthHkPTaE;`Nu%CU!*^c|GbSat7RRu5?PiwAmY&BFtP}f3Nl=(7+t% dg;l~HehiQK$!~A2xdB?3-ExNGz_>pG{{f{nHi!TK literal 0 HcmV?d00001 diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_24.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_24.weights.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..485de28cc5e1a75bb4b5eb66047e0f961c8b1d65 GIT binary patch literal 292 zcmV+<0o(qpuk|o35-=_ljJ}asF8>MVu8c4W5N<6pjYyzdE?@!pt$Z*rke@G_4Z0+8 zFWraVuJa}w3z#l}i$V?!EJlm7tp+Mgl*cZ?6w#peEiDN!txO~zk#8_Bk>#C(EMSc? zu-Y;KlgTfh4!j~aF@20hu+TENiYhNOlGT}vFg_15uro5;7gjGm4H}!#FoJ>RCLM_y zr3tUSt&XpXC-NL^8fcDe5|gh)wezgnGMFQ%qDK(X7F(^NvEHz>EH|bmpP+yFt#HRfB=AlXn$h> literal 0 HcmV?d00001 diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_25.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_25.weights.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..cdd0e49ef260574c197c227876c9ea4f5c134d5a GIT binary patch literal 366 zcmV-!0g?VRH|96EF~m1NtoJxWuID#)H7Pf-G@~{OuB13Au`4yTwF)?DtVF3zd3E^w+tG@`OQHEyz9s@SQP zF-b7qG|aJ+Gz+xnxLdd0H^92@F`%o5G!C?6F(|V%F?XK~B_ShvH1M({F2k7GF@+$V zGik6{G~%&FEDb5OFqtQKHC?b|GHKncUHAk`oGLa~QCHp3o zG=Hw8HC`?NAa*qjr{eG>5TUEZ8ejHK(FmF$pd>L0>^PL2f~pJ%2n}J$XD6 zw@fXIxHK-yL2f}fL197hJFYsyJ48BkHM}!+tU@w5K`}f6s%pBHH7zqAxK%u=Kl(h_ MJw>vDI;}P0JJrLW-v9sr literal 0 HcmV?d00001 diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_26.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_26.weights.fp16 new file mode 100644 index 000000000..cd246d709 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_26.weights.fp16 @@ -0,0 +1,3 @@ +)$ +--뷒9U8̶16j+(w(,1Ƕ7 hW#,+á޻u6^7h18x:](S'0%0g,̰*7[쯶368/38)Y*l0}0R.053o1 83E8+a2AE6)1.#00q5AS4#vw.M(0¶1180455>)50*45l5 +,Z,1㥳>4+1ͷ 8ٶ7*40뼍3շ0'<8:I5'5886z86=گ \ No newline at end of file diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_27.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_27.weights.fp16 new file mode 100644 index 000000000..b4325dbf0 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_27.weights.fp16 @@ -0,0 +1,2 @@ +T*87:ذ/470?5a׵4153+- +6/1̪ʰ$=ٴ +5.F,M:39l8mmIF^60. z-].h4M 2#21v5"жܶJ+u&0/8I0*3Q11Pƴ㻀4W_78oLQ9pf 4_շ(30L/|6V\#жз828M)E<8e Bq?_77{8.07̴j9 \ No newline at end of file diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_28.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_28.weights.fp16 new file mode 100644 index 000000000..78540887b --- /dev/null +++ b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_28.weights.fp16 @@ -0,0 +1,3 @@ +a:+ʶTTuM Q4|,V805)v0+%5A9c4+5V41,,%19߶-Wۺ[68 Og8E2T.3K^3F4N31/^4޵k16"==.51/kܸӲ6+-;425B5q5-016s0:EIP'2H4_: +1]6 +;n4l`5''183 5/02F8\.|!+,Һ8"'j8J=϶9s8a9924:ܴ \ No newline at end of file diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_29.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_29.weights.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0a402285df37bae47688c09dc36aa896403a6845 GIT binary patch literal 288 zcmV+*0pI?_G?OrVGM2e^G{G-_GL^aXxhpR=HaRt$xl%AhHZC=YIFL8`H2}GsIDI$P zHBPw@EPFOoIOsBVEUGqiIJ7b@H4iWlG+DXtG=DD?G*B0x_32cHK{aYx@0vZHPAEyl5oNR literal 0 HcmV?d00001 diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_36.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_36.weights.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ae7fc52ed95005d78fc791f7b78580edd5bb90d9 GIT binary patch literal 1200 zcmV~$3s4kg007_u(}EOyROlccddP9Nx3Am(zuiLtp9w~dI78-$;R_$6k(!M$OsK$M zsK6Mg1$oGM-fNG2{QvHPI`|B-(LrW>f<=j9&d|UNqBAP?eb?|b{XGzavK(26fB}vm z&xLyFXyv4OC`|fC-J79FeqyD_BWF5AY&Qe5e3Q5ERs2#=YE1LS%Y$@)@4!|>(1osH z^oFdrE%rCkb@V~?wGg9?DmDCdIG-+|*23GaOcXXE*<@`P7Arl@5!Zmf-!iD%Pyzf{ z-bp9wthgssOV&vm-Gx2+2oW6~CdNQn%STcK)nLh&hujC~TDcD>+~**bP1M^%|H!Gh zkA0vntR832>QAM9?Q0OBmQ&N{JRuQV!O6Lg8`jRMpi@vfjilA`3H}bw^HDI2mBsbC;k;MqdRB>A7x#52)$^jo)Z;%Lmrpo_1EsD;WGLFM}PBM$H^$zFRcw36jI|I?KNEA6+5W%P0dj*_4aQkglt$%3@Hd5Wtn}*156nf5C^g5~ zE`H%FpSg-_(A!cf#COjn2sX+i+N8bgdf;j{Y)5Cio%h%uZMk3_c?SPrY*pn`%ao?@ zC8o*9YyD4uZl6eq)azgjVrf571M=Z1Wi*DT!d2 z-=vl49Pf9?cphrz(slFMV&h`O<;FwA#M7~XQnHRIViee7e=E*Je`>QxleH)Md=%wv zR!7M^I!>7sxT_=y_1s3+RdOPm5AFIO@mQ)-EW{MYu>3xmE&1)^g{jIC=Obb}eB~U# z72(0q7V19!Fi;1s5s^&`)JAX-%8DpSYzOihyYORceNt=^^+AEMluLt!a3}bM>}%Oh zRFhwZOj5b}iP2`+8*qCP%%#%5@;P}|vd>uqzD?{TNRHB?nQBQ>$0(hP(`thX73BJLUDHPzyP$IWGn1=RiE=Lx(nOP@2bqs3z&0TIqQpgEw@qttnEYtu{I)7FbK(K9v2UW0oT6Y}DnLN{A$9wL^&O*kX?lURf# z`$g_;l%)#Ha?T1JwsbC)xy(Nja?m!Px# literal 0 HcmV?d00001 diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_37.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_37.weights.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..5b40842e2e5053c0ff5d6a4290f9cb1983716939 GIT binary patch literal 1200 zcmV~$dr%Z*003|;Gm&x3Oo24uKyPo4z1{t8zi)3JmoZdY>i8OCFd=b9#(Xu7DM|)D zQkfth;UghQ?%>|Hw-0uY*EuyBBAUiaQ!q*rb0Qp3BPIXn_nT&UfvE`OqRY_4+zU@( zhY@Bkjii%@kif(R<})ARg(xXhQd3M{Kz-pHP&8*J-IlR8dMX;MF%fUamdaa&jp>6d zUGEc)kOQz$ZxgQ5qf}?@2{=h!ZEu&;1vS)&qFRy|KgY@b#_Z4+q?&XW+h^oMmTFa& zBe&NAbC@!Od3)F)wibQkD@M2N?dWs-Va5k3ci9SGDY!!?ILf>iVFNyns>(PTlN*pf z*>WKIDD<(o%KS6)uax4P;VEO9)xFMa;(@(V>Vgq#cIpg;6I0WgQK?eybfqPtb~H=9 z1GBXC+$@bn43(*w5gY6ZcHwQd5BBOz)ZZ%re%tVA$1iuwaR2Cu0>RcP0isj4M&QbM}?UK~R8ssE$M0qS6Q#Y|4 z@_y$obuv4|bQ|5UO`7bmN8_8$*(ODFYL{ghoJa3L9qATy6>UI?CJx4-TuZyP!XEK0 zM`_inW!SctJ)8oV9ZFhc56;Irjed~JN7?4H^dUyW_5;EIq!pdIX=(` zMzynbi`+FrRmLVze8TKB(K2WSchzn>taxFQ+6CW-=I8=OaCI{0lwZ-9>s{z3C3aoi zC(JPMjM_rxlV9ipTi`d53F;I_wXX%N_j`%0lpI@X>{A|*F|Y|Af#}FI3OaL;DeoW`rS2j4c^Tc?z)qkIx zKDV3RX1ys~watngC4%BhOpmm({zZtfpLxEFT;l%$wTSg&;t}{g9EM|I!yv&)v>{TY zPXz7gtoxMtNe#}ew$!=?gAM#&YIQ9idouXxz&e(?Tv*>q4_N#M}HaZ!AHbu z{ST(c?rFG584-+?>bJF2ffz9c3cOeFE%Z3LC$zgZhyp%;(BpSIdaGAjlc^GaDLpgr zid01RQx8JZrI1o*{{`IBephyBQ$ZHDIQ)?Fg>Thuw_WCwVK;b{l=)nG{+#_<0X;Go zDqs26@Hc1(U(YBP*3lApkDaIer}t9#xuL4HP0zt$z=EgY#lBBx9JeORS3*~SDCbGN zL<9dFg}Gg@NM^uu$z-010?mq^7e$xKKLy;<%|WTwr_ oc`L8!JokpUnXlqHxNH0c?k>BTFVby|So4o9gdT|gC+5Vvr_C?_daNn{89>y>n-F zDN2eO&QqycN_J@d3EU zbF^V&*d^4^ni%@j(bZLC4bqpz=EA5}Oh>FQz+*9Ql^eJe_eFcVSPeDz-h`+23GFq^8tO`yb)1+tSF8(nN@j1X}&Wx%_`@sfdo;8vF zj;x@u{MK2-%X*dAmn|ovTKSbgPvL80OxPnW+;J7wKoiZA@`qS1{K{;>k2h|s|Ae=& zt+~B;H(JHl5$YN)J^|d|x6pSb$+(qFPz^De;n^R_VbFvLaFldRylea`o&qnqVzL$V zB!{JBlFrX37n!zt9^Ia8;W-iBC#0DzB_8zBN8t%lGtdLPg*K^U^Dmic>M(wS-XN%m zo5X$at`yMXrQcDEUuG82=fyz56!Uwzrtm~2V4o#Ox1?>VD1A=>`44U?&h`3q@~L}SQZs9y zILUS^+}^gtI0Nj)apz>jku)5OG&27!TxpyP4Yu2@vF?RtZ`j+lLYLfQ)I-)tezUzH zro&spe}cYL99-{wUugl_!EDW~?g!^kx7_Bi5txAwfopm__l(57sEbj_K3IKBKo+$^k%o5Cr40^L&DhE0BYs2wu~pW>S0 zx$+sfBUk~ey~}tH|A_j*b3);{a$}HmovWV9LLhCVQ-sQ}hPXGUFMSR6;D0K}qPKv_ z(y2llA64cHe`#fj%^-AbWlB^3G_K0B^ObxcpUUp$o-N-dibow zT7QIJ@TAKf3v*K(-l}48G$g(h7YMFAeOo*4Nb8Vv$!g{EMP5Z`geJ&wcr;vMwh|7L z{;dzd7t&PMTJ;SPynVz0U`>8yc^$Z|T~XGG14$*JlvliytvjxE%4}$eA>uHqqa&nm zPCJtizCpjmdV`h9D9feQ4c>t@^_Kd4g%5Ye+oWyMWuY~iyt{;ERUh{SnMJz5I@-s4 z4JpeJ*>y!3NP44nAdA~d*!+B}EB&5U-%&N1EldG(*s!ag8G!wa?ZQ}R19aKC3RwL- zsb(Aoar_Bt7JQ9M?T`6M%4GFhqZPN#8cLhwBJVd{^aHe%FQZ$KpUF}B9QT>gnB1iO z=KDfNF#hN*v%N0Ci%P;dyx^X<&0^b+AUyKDCcxD;keqhU6xAR<4S)RNAIyt~td z{`ivi0xsrGYwO%+z%cQlI9DEO5>R1wk$+_d3h}W#@pt7M`-S%-ai5v*Sj-;=4@^p& zj`v9g7<*Qu0V&VW8BPtCcn`vR{A;tL=m&GDw@CF|;{9L}@_9atpDxw*PvKtUJJeO- ze^L|D$bFd)$}Ox+ae&pu-$p8Qz8{Nv21=%U8SWQGwZP1L`Ps|6p)mB}UCDzX$5W|v2r!A#$4Xt`gqtxz@B3mxhU^A1E* zwxd?A8t)vKxrwk)uh4I)&lMg2;@p?JCN5F$;2tbyI9DJR)n{H(KW2HdCmRjms_hd% z4QXg=WKgcBXs~~CFkL(WyXR(@&jOv%hxA->mOml%h;~zSrM8-YR+tHx)(^sU_Q5%s zs62NGHG_$*KI1$Ro?Ot9f57GzG(v-^Oj3X6C0tEduAX=GMKc4>|&+FYrQ zJ5w9+NqUNVFp~jN0z=GwIWyt?NZR((!d`e2zEm+)1+`Tws|$H=co!Oqmw@Wp1-uFs zv-R+7s=D#Od6)Y-z*BR<^FX2UR9b@9B-_HTz~kgM+;M!3+oF_)6|hrki3Pe+bB(X2 zS1{0cBd;Z

`9iUo|$9 z?kf$!f3v$(4YVfvUB6Lv!acwQx;Kf!eeMc6*A=xgc8%4~CDj4Va0E@T-O&A^M`9iYMGexuXnFveh_d8DyMCm>0Qe zq3M1WjZU(^V~q1*KQmjYqAb!n@U3Vm@G&V(I;M;WUjj$MO9@Qv9a$@!0>A4ua0WdJ z|3uf&e5_ovWu$E|2LwSCB}<+M59B>8n8*zf zdH%!SDDH@jKdCR}5}4MWX><|SP_xNvsHaSk-!A=Tqxo6d8juXbu2tGz{ui1HPGIxp z%c;fmH|AJi0|$eNV6*Edq`Q6&7t2FXZ_g}sxHS#Tk(VS5L;on%JoE8Xr6PRBM<_)L zCuhs8)q4oj`@JLSUr7$QEv!;A?ROFzdGBZ{h~npL(acTh6GDDJ@8r5&84BsMmZ0q@4u{Wl_rF*;XXdfgbV^-JVo02^Gzrc@? ze(2-qbl32_vAA#KLBU+}uHo64slRY>ur|F>cnEspUg~V+)4*+Zno@-X=`-3N#sqL1 ztS}(BgPn0_)yeKNa4@KhyYV%Zoy-a2M*JnVhwHG?5;aoVI^OUhd?NI|*@@fk$kMA} zpZhJ7OeY$B@cHl~{(^rFa~Pio5-CYIW}E_#VIeAq?KqD90OuJ?>D{h8<(k&D@LSS} z%cF16y`?X0m5q+!k(!^hqPBo^BU(@%_R@>Cw}y|K{N1!MJRJ@hXT(>L1Dnq(4r;UW zwL48cpxsPioZAZ;3X_F1%qMb3Qg!VWeSkF9uhFg0_u>%bVt0VC(gECtjf)K6iPDz+ z6s?w%$wt0Kup7MrKW9pG_u?<%|4#BM6(T)dV9VT zKSRsJs`3=Bhgd?7_XLEl(G+pK6c07FS@f_xoOBWAT77(WB`f-?_@!L>ZZ{WN9{CM% zMBl>|Qc$?b?=E;FJ!3bj<5WS$1*_prE=m3q{ek8Z%2(e!#5F;kkmR87L-eP~* zY2Hxc{4y9r7g2+Z8QekfoUjOvGi;G@)(Ywg9>RCAz6qQ*|6$|7N%QmQ)*ZcUEsbK6 z;&+hBV7>eVJme!<_1K3h)Z1Dogb6?+^Kb*v&qb8w@SG`%V?q;f8fh(zB}?J=2@Y3U zv>`f(ptE6eLv@n79Vn&df&0<|DY#58Ma++yKhp__Gy ztzjJVd`(u0B~}+<7@^$7%vAXca)F9Q@5{7sXxsZRN7^XMVwrkYJcDurA-~wF;TEG!xS?c2>KC!^#HUi4Jes?$9(Ep;k7%7hnr@S4>(7-*LVNF5 zbfx@{o&ShfP27y~#>q98z4_wuk60T%DbU%=^&@s7+c}Xfm zVXKRyk5rb}SbP9|vfK6%T+939E8$mkAGU>D7O5L9v#H)1hM>2zn-s^!NIrDpzGgOR z2S*k(51d0vF3V}?Yoktnne}_5zrG3|57u(8WM}ZTi92~KzL0PscqO+A>P1#3$Awp_ zM?vfGsK9=v2+2IMzKLAHLks4>W?*jUZ=7HRz-H=|!rSH$Mg7Fz(n=s{iqjg&Jg19L z)g5!dY+w&2!s~_OVSkW-obN4c{FXyI!Gcp?;(Rzb6D?DcR_9qfXjpjbZ-Q zI)W^AN=h@m9NI~LW?y)b?I^RyH6}7ud6l=v#V7qxm0MV(2_*=1psm)hXQ&JVP zR^%eg1Ec99=#qamcaQyLXUe-S6@@bmFCV8h2dTOt7C9TadefbxzD6G1Q2mnc2M2Q1 z@fOc~IF){D_=J4p2GhtnTOOp?7;LKYI3b1k2W3fT`6pcc&`-i$)B#4-Wx^9!JF)=$ zAY-kkW3g*H_!FL{Tj6weJIZy=H4CL=cB$Nq|InHvj`n!S*5Ec&?zxVZCBI+*6gooLU6e_U(liHLJ-?`4t)@c*wnXKVqlR z6TOx8^UIif%wpMVv%ze`F01?n*EDAp`2~8|dMz00?Ps1+u7z$S%+y9(LvaZ?g~|$C zW>=V}e@2FZOexiJnoAPxRyQ+Ezn~UVH=)dL@#HDVIHTYrwp!$V$|QWuI+vv?law<; zWoAV7R;JAC5nQ_&{=AS~00Vx?jx7)fSf zKS%*FcD}aExSrQcW)n8j6=Wtq!W<@yFm=$=e2dyz)8$#zJj}#4#T{bqIzcmzSTW6)t?TNr25Ws^6jpe*b+eIqH*5Z-+Bjm8*y5lq+&FPUrbk_)94K4 zYG@2=0}Egh{h9DJTMkO3_pA=eFtSGK0KR_rJST}I%wVRQR9zZqcBiMMwh{V}o9{Sa zx7>l$k*{KnUPm0vL^{tihO`Au$qZ?{n920QM?A-+4CY+;nKT9dTR5zF4}O&;sb~2Q zZLLwV%dBf#FPiR8EU(;8R@FwaQ^XNIWi8gf+e4W%V^_(u*tu8/>1.?c5ȯɲ,1120*/^2ͭ{,J'*.C/{"8- +,4-0K*-!ܧ1390k1Ͱ51,ͬTxG#10J.s1;43)'z,0.731f &0R*Ȯ~,0**$X!#!1*)101t101C3!,FD.R1ߧ0HةaJ+4u02/(m04z1I5-׬44{^-m1Ʈr´A -32*A,3 F5/3.(/u0,8~'=B+23el(0/v1e4*0!,24,Z0.3q5'{O[ޮ8П(Ѱ3|-6,/ +ӧuI f1A03-X2y4W, /+"E0/:2$Ք"D0q1=,~/438-LK5x6{(-B)).5J#u33i3.`44$6+55@6/&Fi.C,63/߮](c23Se@:Jй8806p8388!9F:ϧ$://Rj1 \ No newline at end of file From 9a6cfcdac124071ff9d78eae39e8de0eecb2c2e8 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Fri, 19 Jun 2026 14:31:50 -0400 Subject: [PATCH 20/46] Async generate-ahead flow scheduling; CNN path always used Deadline-free flow job computes each pair's flow ahead of the warp-only present jobs, lowering present-interval jitter. CNN flow is now unconditional (only an fp16-incapable device falls back to classical). --- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 3 - app/src/main/cpp/winlator/vk/vk_renderer.c | 72 +++++++++++++++++++++- app/src/main/cpp/winlator/vk/vk_state.h | 1 + 3 files changed, 71 insertions(+), 5 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index 377831e44..70444e319 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -10,9 +10,6 @@ typedef struct CnnPC { #define CNN_FLOW_LEVELS 3 static bool cnn_wanted(void) { - char v[PROP_VALUE_MAX] = {0}; - if (__system_property_get("debug.winnative.fgcnn", v) > 0 && - (v[0] == '0' || v[0] == 'f' || v[0] == 'n')) return false; return true; } diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index f1764769b..0b4f6ca62 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -111,6 +111,7 @@ typedef enum { FG_MODE_HOLD = 0, FG_MODE_INTERP = 1, FG_MODE_PRESENT_LAST = 2, + FG_MODE_FLOW = 3, } FgMode; typedef struct { bool has_effects; bool wants_sgsr1; } SceneTargets; @@ -3252,6 +3253,57 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { if (!r->surface_ready || !r->swapchain || r->swapchain_image_count == 0 || r->swapchain_extent.width == 0u || !r->pipelines_built) { g_fg_dropped++; return p; } + if (job->mode == FG_MODE_FLOW) { + if (!(r->fg_use_cnn && r->fg_cnn_capable && r->fg_cnn.ready)) return p; + VkFrame* ff = &r->fg_worker_frames[r->fg_worker_index]; + r->fg_worker_index = (r->fg_worker_index + 1u) % 3u; + if (ff->in_flight) vkWaitForFences(r->device, 1, &ff->in_flight, VK_TRUE, UINT64_MAX); + VkCommandBufferBeginInfo fbi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + fbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + pthread_mutex_lock(&r->render_mutex); + if (!r->fg_built || r->fg_history_count < 2u || r->fg_motion_valid + || (uint32_t)(r->fg_promote_seq - job->seq) >= 2u) { + pthread_mutex_unlock(&r->render_mutex); return p; + } + uint32_t fcurr = job->curr_idx, fprev = job->prev_idx; + VkFgImage* fc = &r->fg_history[fcurr]; + VkFgImage* fp = &r->fg_history[fprev]; + bool fdeep = job->deep && !r->fg_extrapolate; + vkResetFences(r->device, 1, &ff->in_flight); + vkBeginCommandBuffer(ff->cmd, &fbi); + vkr_image_barrier(ff->cmd, fp->image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkr_image_barrier(ff->cmd, fc->image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + cnn_flow_pass(r, ff->cmd, fcurr, fp, fc, false, &r->fg_motion[fcurr]); + r->fg_motion_valid = true; + if (fdeep) { + cnn_flow_pass(r, ff->cmd, fcurr, fc, fp, true, &r->fg_motion_fwd[fcurr]); + r->fg_motion_fwd_valid = true; + } + vkEndCommandBuffer(ff->cmd); + VkSubmitInfo fsi = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + fsi.commandBufferCount = 1; fsi.pCommandBuffers = &ff->cmd; + pthread_mutex_lock(&r->queue_mutex); + VkResult fsr = vkQueueSubmit(r->graphics_queue, 1, &fsi, ff->in_flight); + pthread_mutex_unlock(&r->queue_mutex); + if (fsr != VK_SUCCESS) { + vkDestroyFence(r->device, ff->in_flight, NULL); + VkFenceCreateInfo rfi = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; rfi.flags = VK_FENCE_CREATE_SIGNALED_BIT; + vkCreateFence(r->device, &rfi, NULL, &ff->in_flight); + r->fg_motion_valid = false; r->fg_motion_fwd_valid = false; + } else { + r->fg_slot_fence[fcurr] = ff->in_flight; + r->fg_slot_fence[fprev] = ff->in_flight; + } + pthread_mutex_unlock(&r->render_mutex); + return p; + } + VkFrame* f = &r->fg_worker_frames[r->fg_worker_index]; r->fg_worker_index = (r->fg_worker_index + 1u) % 3u; if (f->in_flight) vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); @@ -3456,16 +3508,32 @@ static void fg_enqueue(VkRenderer* r, uint8_t mode, float phase) { job.seq = r->fg_promote_seq; fg_compute_deadline(r, phase); job.deadline_ns = r->fg_present_target_ns ? r->fg_present_target_ns : r->fg_present_deadline_ns; + + if (mode == FG_MODE_INTERP && r->fg_use_cnn && r->fg_cnn_capable + && r->fg_history_count >= 2u && r->fg_promote_seq != r->fg_cnn_flow_seq) { + uint32_t ftail = r->fg_job_tail; + uint32_t fnext = (ftail + 1u) % FG_JOB_RING; + if (fnext != r->fg_job_head) { + r->fg_cnn_flow_seq = r->fg_promote_seq; + FgJob fj = job; + fj.mode = (uint8_t)FG_MODE_FLOW; + fj.deadline_ns = 0u; + r->fg_job_ring[ftail] = fj; + __atomic_store_n(&r->fg_job_tail, fnext, __ATOMIC_RELEASE); + sem_post(&r->fg_gen_sem); + } + } + uint32_t tail = r->fg_job_tail; uint32_t next = (tail + 1u) % FG_JOB_RING; - if (next == r->fg_job_head) { // ring full: worker is behind -> drop (emit fewer in-betweens) + if (next == r->fg_job_head) { pthread_mutex_unlock(&r->render_mutex); g_fg_dropped++; return; } r->fg_job_ring[tail] = job; __atomic_store_n(&r->fg_job_tail, next, __ATOMIC_RELEASE); - sem_post(&r->fg_gen_sem); // sem_post INSIDE the lock — serialized with fg_worker_stop's sem_destroy + sem_post(&r->fg_gen_sem); pthread_mutex_unlock(&r->render_mutex); } diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index a4808cae5..eacf47783 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -462,6 +462,7 @@ typedef struct VkRenderer { bool fg_use_cnn; bool fg_cnn_capable; + uint32_t fg_cnn_flow_seq; VkFgCnn fg_cnn; // --- Content-duplicate detection ------------------------------------------------------------ From 70153c3a25aadfcbbd12f22a171d343a3dd41f4e Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Fri, 19 Jun 2026 16:50:02 -0400 Subject: [PATCH 21/46] Feature ring: ingest each frame once, reuse across pairs and directions Per-history-slot feature cache eliminates redundant ingest (was 2-4x per pair); slot invalidated on re-stage. Lowers per-pair flow cost and present-interval jitter. Also fixes the forward-flow direction so deep mode does true bidirectional occlusion. --- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 39 ++++++++++++---------- app/src/main/cpp/winlator/vk/vk_renderer.c | 1 + app/src/main/cpp/winlator/vk/vk_state.h | 3 +- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index 70444e319..885ab4d17 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -217,12 +217,13 @@ static void fg_destroy_cnn_resources(VkRenderer* r) { } if (C->ubo) { vkDestroyBuffer(r->device, C->ubo, NULL); C->ubo = VK_NULL_HANDLE; } if (C->uboMem) { vkFreeMemory(r->device, C->uboMem, NULL); C->uboMem = VK_NULL_HANDLE; } - VkCnnFeatSet* sets[2] = { &C->featPrev, &C->featCurr }; - for (int s = 0; s < 2; s++) + VkCnnFeatSet* sets[3] = { &C->feat[0], &C->feat[1], &C->feat[2] }; + for (int s = 0; s < 3; s++) for (int L = 0; L < CNN_LEVELS; L++) { cnn_free_img(r, &sets[s]->luma[L]); cnn_free_img(r, &sets[s]->feat4a[L]); cnn_free_img(r, &sets[s]->feat4b[L]); cnn_free_img(r, &sets[s]->feat8[L]); } + C->featValid[0] = C->featValid[1] = C->featValid[2] = false; for (int L = 0; L < CNN_LEVELS; L++) { cnn_free_img(r, &C->feat8_pair[L]); cnn_free_img(r, &C->dpair[L]); cnn_free_img(r, &C->hG0[L]); cnn_free_img(r, &C->hG1[L]); @@ -296,8 +297,9 @@ static bool fg_create_cnn_resources(VkRenderer* r, uint32_t w, uint32_t h) { fw[L] = f2w[L] > 1 ? f2w[L] / 2 : 1u; fh[L] = f2h[L] > 1 ? f2h[L] / 2 : 1u; } - VkCnnFeatSet* fsets[2] = { &C->featPrev, &C->featCurr }; - for (int s = 0; s < 2; s++) + VkCnnFeatSet* fsets[3] = { &C->feat[0], &C->feat[1], &C->feat[2] }; + C->featValid[0] = C->featValid[1] = C->featValid[2] = false; + for (int s = 0; s < 3; s++) for (int L = 0; L < CNN_LEVELS; L++) { if (!cnn_make_img(r, &fsets[s]->luma[L], lw[L], lh[L], R8, 1, false)) return false; if (!cnn_make_img(r, &fsets[s]->feat4a[L], f2w[L], f2h[L], RGBA8, 1, true)) return false; @@ -526,31 +528,32 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, if (!forward) { C->curPool = (C->curPool + 1u) % (uint32_t)CNN_POOLS; vkResetDescriptorPool(r->device, C->pool[C->curPool], 0); + cnn_clear_f16(cmd, &C->occ); + cnn_clear_f16(cmd, &C->seedBlack); + cnn_clear_f16(cmd, &C->dummy); } - cnn_clear_f16(cmd, &C->occ); - cnn_clear_f16(cmd, &C->seedBlack); - cnn_clear_f16(cmd, &C->dummy); - - VkImageView pv = forward ? currFrame->view : prevFrame->view; - VkImageView cv = forward ? prevFrame->view : currFrame->view; - cnn_ingest(r, cmd, pv, &C->featPrev); - cnn_ingest(r, cmd, cv, &C->featCurr); + uint32_t prevSlot = (uint32_t)(prevFrame - r->fg_history); if (prevSlot > 2u) prevSlot = 0u; + uint32_t currSlot = (uint32_t)(currFrame - r->fg_history); if (currSlot > 2u) currSlot = 0u; + if (!C->featValid[prevSlot]) { cnn_ingest(r, cmd, prevFrame->view, &C->feat[prevSlot]); C->featValid[prevSlot] = true; } + if (!C->featValid[currSlot]) { cnn_ingest(r, cmd, currFrame->view, &C->feat[currSlot]); C->featValid[currSlot] = true; } + VkCnnFeatSet* fp = &C->feat[prevSlot]; + VkCnnFeatSet* fc = &C->feat[currSlot]; for (int L = CNN_FLOW_LEVELS - 1; L >= 0; --L) { uint32_t w = C->hG0[L].w, h = C->hG0[L].h; VkImageView seedView = (L == CNN_FLOW_LEVELS - 1) ? C->seedBlack.view : C->flowMid[L+1].view; - cnn_concat4(cmd, &C->featPrev.feat8[L], &C->featCurr.feat8[L], &C->feat8_pair[L]); + cnn_concat4(cmd, &fp->feat8[L], &fc->feat8[L], &C->feat8_pair[L]); cnn_to_write(cmd, C->hG0[L].image, 2); - cnn_conv_dispatch(r, cmd, C->feat8_pair[L].view, C->featCurr.luma[L].view, C->hG0[L].view, 36, 4, 2, 0, w, h); + cnn_conv_dispatch(r, cmd, C->feat8_pair[L].view, fc->luma[L].view, C->hG0[L].view, 36, 4, 2, 0, w, h); cnn_to_read(cmd, C->hG0[L].image, 2); cnn_to_write(cmd, C->hG1[L].image, 2); - cnn_conv_dispatch(r, cmd, C->hG0[L].view, C->featCurr.luma[L].view, C->hG1[L].view, 37, 2, 2, 0, w, h); + cnn_conv_dispatch(r, cmd, C->hG0[L].view, fc->luma[L].view, C->hG1[L].view, 37, 2, 2, 0, w, h); cnn_to_read(cmd, C->hG1[L].image, 2); cnn_to_write(cmd, C->hG23[L].image, 4); - cnn_conv_dispatch(r, cmd, C->hG1[L].view, C->featCurr.luma[L].view, C->hG23[L].view, 42, 4, 4, 0, w, h); + cnn_conv_dispatch(r, cmd, C->hG1[L].view, fc->luma[L].view, C->hG23[L].view, 42, 4, 4, 0, w, h); cnn_to_read(cmd, C->hG23[L].image, 4); cnn_to_write(cmd, C->hG4[L].image, 2); cnn_conv_dispatch(r, cmd, C->hG23[L].view, seedView, C->hG4[L].view, 21, 3, 2, 0, w, h); @@ -567,10 +570,10 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, cnn_cost9_dispatch(r, cmd, in5, out3, 20, w, h); } cnn_to_read(cmd, C->hD1[L].image, 3); cnn_to_write(cmd, C->hD2[L].image, 2); - cnn_conv_dispatch(r, cmd, C->hD1[L].view, C->featCurr.luma[L].view, C->hD2[L].view, 22, 2, 2, 0, w, h); + cnn_conv_dispatch(r, cmd, C->hD1[L].view, fc->luma[L].view, C->hD2[L].view, 22, 2, 2, 0, w, h); cnn_to_read(cmd, C->hD2[L].image, 2); cnn_to_write(cmd, C->hD3[L].image, 1); - cnn_conv_dispatch(r, cmd, C->hD2[L].view, C->featCurr.luma[L].view, C->hD3[L].view, 26, 1, 1, 0, w, h); + cnn_conv_dispatch(r, cmd, C->hD2[L].view, fc->luma[L].view, C->hD3[L].view, 26, 1, 1, 0, w, h); cnn_to_read(cmd, C->hD3[L].image, 1); VkImageView fdst = (L == 0) ? outFlow->view : C->flowMid[L].view; diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 0b4f6ca62..1f57ba6f0 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -2945,6 +2945,7 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { // When dedup is unavailable, fall back to the original behavior (advance every HOLD). uint32_t stage = (r->fg_history_curr + 1u) % 3u; VkFgImage* hist = &r->fg_history[stage]; + r->fg_cnn.featValid[stage] = false; if (r->fg_slot_fence[stage] != VK_NULL_HANDLE) vkWaitForFences(r->device, 1, &r->fg_slot_fence[stage], VK_TRUE, UINT64_MAX); vkResetFences(r->device, 1, &f->in_flight); diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index eacf47783..2f8c539a0 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -273,7 +273,8 @@ typedef struct VkFgCnn { bool ready; VkDescriptorPool pool[CNN_POOLS]; uint32_t curPool; - VkCnnFeatSet featPrev, featCurr; + VkCnnFeatSet feat[3]; + bool featValid[3]; VkCnnImg feat8_pair[CNN_LEVELS]; VkCnnImg hG0[CNN_LEVELS], hG1[CNN_LEVELS], hG23[CNN_LEVELS], hG4[CNN_LEVELS]; VkCnnImg hD0[CNN_LEVELS], hD1[CNN_LEVELS], hD2[CNN_LEVELS], hD3[CNN_LEVELS]; From a99cdd276acd409768872eba7dd1d5b01b5fc43d Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Fri, 19 Jun 2026 17:06:39 -0400 Subject: [PATCH 22/46] Default swapchain to MAILBOX so timestamp pacing is not vsync-requantized FIFO blocks the present at vsync after the precise nanosleep pacing, re-quantizing it; MAILBOX honors the paced timestamp and reaches a flat present cadence when the source is steady. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 1f57ba6f0..906183fa3 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -3695,7 +3695,7 @@ JNIEXPORT jlong JNICALL JNI_FN(nativeCreate)(JNIEnv* env, jclass clazz, (void)clazz; VkRenderer* r = calloc(1, sizeof(VkRenderer)); if (!r) return 0; - r->target_present_mode = VK_PRESENT_MODE_FIFO_KHR; + r->target_present_mode = VK_PRESENT_MODE_MAILBOX_KHR; r->active_present_mode = VK_PRESENT_MODE_FIFO_KHR; r->fg_occ_lo = 0.06f; r->fg_occ_hi = 0.25f; From 2dcdafcb57976eacc31b96134a91a9734c7c0778 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Fri, 19 Jun 2026 17:18:01 -0400 Subject: [PATCH 23/46] Fix interp flicker on the CNN path; drop the unused forward-flow pass The CNN flow stores backward-flow components in the z/w channels, which the warp shader read as static-mask/confidence and used to snap pixels to the sharp current frame; noisy low-res flow made that toggle per frame (the sharp/unsharp flicker, worst at low presets). interpolate.frag now ignores those channels on the CNN path. Also stopped forcing the forward flow, which interpolate.frag never samples. --- app/src/main/cpp/winlator/vk/shaders/interpolate.frag | 8 +++++--- app/src/main/cpp/winlator/vk/vk_renderer.c | 7 +++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag index fa8e484ac..bda990ac9 100644 --- a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag +++ b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag @@ -32,6 +32,8 @@ float valid(vec3 c, highp vec2 p) { void main() { float t = clamp(pc.phase, 0.0, 1.0); float steadier = clamp(pc.occlusionLo, 0.0, 1.0); + bool cnn = pc.mode >= 3.5; + float imode = cnn ? pc.mode - 4.0 : pc.mode; highp vec2 norm = 2.0 / pc.resolution; vec2 mvB = texture(motionField, vUV).xy; @@ -41,11 +43,11 @@ void main() { vec3 cPrevFlat = texture(prevFrame, vUV).rgb; vec2 maskConf = texture(motionField, vUV).zw; - float staticMask = maskConf.x; + float staticMask = cnn ? 0.0 : maskConf.x; float staticPix = max(staticMask, 1.0 - smoothstep(0.02, 0.06, length(cCurrFlat - cPrevFlat))); - float uniq = smoothstep(0.08, 0.35, maskConf.y); + float uniq = cnn ? 1.0 : smoothstep(0.08, 0.35, maskConf.y); - if (pc.mode > 1.5) { + if (imode > 1.5) { highp vec2 srcPos = vUV + t * mvBn; vec3 cWarp = texture(currFrame, srcPos).rgb; float v = valid(cWarp, srcPos); diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 906183fa3..616863838 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -3117,7 +3117,7 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { ipc.resW = 2.0f * (float)r->fg_motion[parity].width; ipc.resH = 2.0f * (float)r->fg_motion[parity].height; ipc.phase = phase; ipc.occLo = r->fg_occ_lo; ipc.occHi = r->fg_occ_hi; - ipc.mode = r->fg_extrapolate ? 2.0f : (use_fwd ? 1.0f : 0.0f); + ipc.mode = (r->fg_extrapolate ? 2.0f : (use_fwd ? 1.0f : 0.0f)) + ((r->fg_use_cnn && r->fg_cnn_capable) ? 4.0f : 0.0f); vkCmdPushConstants(f->cmd, r->pipelines.fg_interp_pipe_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(ipc), &ipc); vkCmdDraw(f->cmd, 3, 1, 0, 0); @@ -3405,7 +3405,7 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { ipc.resW = 2.0f * (float)r->fg_motion[parity].width; ipc.resH = 2.0f * (float)r->fg_motion[parity].height; ipc.phase = job->phase; ipc.occLo = r->fg_occ_lo; ipc.occHi = r->fg_occ_hi; - ipc.mode = r->fg_extrapolate ? 2.0f : (use_fwd ? 1.0f : 0.0f); + ipc.mode = (r->fg_extrapolate ? 2.0f : (use_fwd ? 1.0f : 0.0f)) + ((r->fg_use_cnn && r->fg_cnn_capable) ? 4.0f : 0.0f); vkCmdPushConstants(f->cmd, r->pipelines.fg_interp_pipe_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(ipc), &ipc); vkCmdDraw(f->cmd, 3, 1, 0, 0); @@ -3502,7 +3502,7 @@ static void fg_enqueue(VkRenderer* r, uint8_t mode, float phase) { uint32_t curr = r->fg_history_curr; FgJob job; job.mode = mode; - job.deep = (((r->fg_deep_mode) || (r->fg_use_cnn && r->fg_cnn_capable)) && r->fg_history_count >= 2u) ? 1u : 0u; + job.deep = (r->fg_deep_mode && r->fg_history_count >= 2u) ? 1u : 0u; job.phase = phase; job.curr_idx = curr; job.prev_idx = (curr + 2u) % 3u; @@ -3702,7 +3702,6 @@ JNIEXPORT jlong JNICALL JNI_FN(nativeCreate)(JNIEnv* env, jclass clazz, r->fg_min_step = 1; r->fg_flow_scale = 0.5f; // default = legacy half-res flow; presets override (Eco 0.2 .. Max 0.8) r->fg_use_cnn = cnn_wanted(); - r->fg_deep_mode = r->fg_use_cnn ? true : r->fg_deep_mode; r->validation_enabled = (enableValidationLayers == JNI_TRUE); pthread_mutex_init(&r->scene_mutex, NULL); pthread_mutex_init(&r->queue_mutex, NULL); From 57015c46ac5b279902bb41e60b5526745372d170 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Fri, 19 Jun 2026 17:25:05 -0400 Subject: [PATCH 24/46] Stabilize the interp detail-sharpen term on the CNN path kdet was gated on flow magnitude and blend weight, both noisy with CNN flow, causing residual shimmer on motion; on the CNN path it is now a stable per-frame value. --- app/src/main/cpp/winlator/vk/shaders/interpolate.frag | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag index bda990ac9..f83bc1a77 100644 --- a/app/src/main/cpp/winlator/vk/shaders/interpolate.frag +++ b/app/src/main/cpp/winlator/vk/shaders/interpolate.frag @@ -103,7 +103,8 @@ void main() { + texture(currFrame, uvB - vec2(tx.x, 0.0)).rgb + texture(currFrame, uvB + vec2(0.0, tx.y)).rgb + texture(currFrame, uvB - vec2(0.0, tx.y)).rgb) * 0.25; - float kdet = (0.55 - 0.30 * steadier) * selB * (1.0 - smoothstep(9.0, 64.0, dot(mvB, mvB))); + float kdet = cnn ? (0.30 - 0.15 * steadier) + : (0.55 - 0.30 * steadier) * selB * (1.0 - smoothstep(9.0, 64.0, dot(mvB, mvB))); col += kdet * clamp(cB - blur, -0.25, 0.25); col = mix(col, cCurrFlat, staticPix); From 5df9c900a868e54c0b310760db16376d2ae5029b Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 11:02:34 -0400 Subject: [PATCH 25/46] CNN frame-gen trained occlusion-select generate + keep FG live under the drawer Generate path: warp prev/curr by the refined flow at three pyramid scales and select per-pixel with a numerically-stable softmax (max-subtract + temperature) and a flow-consistency occlusion term, guarded so static content is held unwarped. Delta5-9 warp-follow refinement feeds the flow. Worker keeps generating while the drawer overlay is up (it only composites the game content; the menu is a separate layer). Adds a prop-gated consecutive-frame dump for offline temporal verification. Verified via dumps across all presets and 2x/3x/4x: steady pacing, no strobe, single clean objects. --- .../cpp/winlator/vk/shaders/cnn_generate.comp | 19 +- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 228 +++++- app/src/main/cpp/winlator/vk/vk_renderer.c | 179 ++++- app/src/main/cpp/winlator/vk/vk_state.h | 31 + .../cpp/winlator/vk/wnfg_spv/wnfg_04_spv.h | 115 +++ .../cpp/winlator/vk/wnfg_spv/wnfg_13_spv.h | 371 +++++++++ .../cpp/winlator/vk/wnfg_spv/wnfg_25_spv.h | 493 ++++++++++++ .../cpp/winlator/vk/wnfg_spv/wnfg_27_spv.h | 280 +++++++ .../cpp/winlator/vk/wnfg_spv/wnfg_28_spv.h | 281 +++++++ .../cpp/winlator/vk/wnfg_spv/wnfg_29_spv.h | 200 +++++ .../cpp/winlator/vk/wnfg_spv/wnfg_51_spv.h | 743 ++++++++++++++++++ .../cpp/winlator/vk/wnfg_spv/wnfg_53_spv.h | 340 ++++++++ .../display/renderer/VulkanRenderer.java | 42 +- 13 files changed, 3285 insertions(+), 37 deletions(-) create mode 100644 app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_04_spv.h create mode 100644 app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_13_spv.h create mode 100644 app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_25_spv.h create mode 100644 app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_27_spv.h create mode 100644 app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_28_spv.h create mode 100644 app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_29_spv.h create mode 100644 app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_51_spv.h create mode 100644 app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_53_spv.h diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index 7586ef8a7..5180774ad 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -46,7 +46,16 @@ void main() { texture(logits, c2).z, texture(logits, c3).w); - vec4 w = exp(L); + const float CONS = 6.0; + vec2 e0 = texture(flowB, c0).xy * m0 - mvB.xy; + vec2 e1 = texture(flowB, c1).zw * m0 - mvB.zw; + vec2 e2 = texture(flowF, c2).xy * m0 - mvF.xy; + vec2 e3 = texture(flowF, c3).zw * m0 - mvF.zw; + L -= CONS * vec4(dot(e0, e0), dot(e1, e1), dot(e2, e2), dot(e3, e3)); + + const float TEMP = 4.0; + float mx = max(max(L.x, L.y), max(L.z, L.w)); + vec4 w = exp((L - mx) / TEMP); w /= dot(w, vec4(1.0)); float wb = 1.0 - t; @@ -58,6 +67,12 @@ void main() { + texture(fwdColor, c3) * (wf * w.w); float den = wb * (w.x + w.z) + wf * (w.y + w.w) + 1e-8; + vec3 col = (acc / den).rgb; + + vec3 cPrevFlat = texture(backColor, uvc).rgb; + vec3 cCurrFlat = texture(fwdColor, uvc).rgb; + float staticPix = 1.0 - smoothstep(0.02, 0.06, length(cCurrFlat - cPrevFlat)); + col = mix(col, cCurrFlat, staticPix); - imageStore(uDst, p, acc / den); + imageStore(uDst, p, vec4(col, 1.0)); } diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index 885ab4d17..4ef4fd47c 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -7,7 +7,7 @@ typedef struct CnnPC { } CnnPC; #define CNN_GRID(w,h) ((uint32_t)(((w)+15u)/16u)), ((uint32_t)(((h)+15u)/16u)), 1u -#define CNN_FLOW_LEVELS 3 +#define CNN_FLOW_LEVELS 5 static bool cnn_wanted(void) { return true; @@ -68,15 +68,40 @@ static bool cnn_make_pipe(VkRenderer* r, const uint32_t* spv, size_t spvLen, return *outPipe != VK_NULL_HANDLE; } +static bool cnn_make_gh_pipe(VkRenderer* r, const uint32_t* spv, size_t spvLen, + int hasUBO, int nIn, int nOut, + VkDescriptorSetLayout* outDsl, VkPipelineLayout* outPL, VkPipeline* outPipe) { + CnnBind b[24]; uint32_t n = 0; + if (hasUBO) { b[n].binding = 0; b[n].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; n++; } + for (int i = 0; i < nIn; i++) { b[n].binding = 32 + (uint32_t)i; b[n].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; n++; } + for (int o = 0; o < nOut; o++) { b[n].binding = 48 + (uint32_t)o; b[n].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; n++; } + *outDsl = cnn_make_dsl(r, b, n); + if (!*outDsl) return false; + VkPipelineLayoutCreateInfo pli = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + pli.setLayoutCount = 1; pli.pSetLayouts = outDsl; + if (vkCreatePipelineLayout(r->device, &pli, NULL, outPL) != VK_SUCCESS) return false; + VkShaderModule mod = load_shader_module(r, spv, spvLen); + if (!mod) return false; + *outPipe = create_compute_pipeline(r, mod, *outPL); + vkDestroyShaderModule(r->device, mod, NULL); + return *outPipe != VK_NULL_HANDLE; +} + static void destroy_cnn_pipelines(VkRenderer* r) { VkPipelineSet* P = &r->pipelines; VkPipeline pipes[] = { P->cnn_pyramid_pipe, P->cnn_conv_pipe, P->cnn_cost9_pipe, - P->cnn_flowreg_pipe, P->cnn_warpfollow_pipe, P->cnn_generate_pipe }; + P->cnn_flowreg_pipe, P->cnn_warpfollow_pipe, P->cnn_generate_pipe, + P->gh_d5_pipe, P->gh_d6_pipe, P->gh_d7_pipe, P->gh_d8_pipe, P->gh_d9_pipe, P->gh_d10_pipe, + P->gh_occ_pipe, P->gh_gen_pipe }; VkPipelineLayout pls[] = { P->cnn_pyramid_pl, P->cnn_conv_pl, P->cnn_cost9_pl, - P->cnn_flowreg_pl, P->cnn_warpfollow_pl, P->cnn_generate_pl }; + P->cnn_flowreg_pl, P->cnn_warpfollow_pl, P->cnn_generate_pl, + P->gh_d5_pl, P->gh_d6_pl, P->gh_d7_pl, P->gh_d8_pl, P->gh_d9_pl, P->gh_d10_pl, + P->gh_occ_pl, P->gh_gen_pl }; VkDescriptorSetLayout dsls[] = { P->cnn_pyramid_dsl, P->cnn_conv_dsl, P->cnn_cost9_dsl, - P->cnn_flowreg_dsl, P->cnn_warpfollow_dsl, P->cnn_generate_dsl }; - for (int i = 0; i < 6; i++) { + P->cnn_flowreg_dsl, P->cnn_warpfollow_dsl, P->cnn_generate_dsl, + P->gh_d5_dsl, P->gh_d6_dsl, P->gh_d7_dsl, P->gh_d8_dsl, P->gh_d9_dsl, P->gh_d10_dsl, + P->gh_occ_dsl, P->gh_gen_dsl }; + for (int i = 0; i < 14; i++) { if (pipes[i]) vkDestroyPipeline(r->device, pipes[i], NULL); if (pls[i]) vkDestroyPipelineLayout(r->device, pls[i], NULL); if (dsls[i]) vkDestroyDescriptorSetLayout(r->device, dsls[i], NULL); @@ -87,6 +112,12 @@ static void destroy_cnn_pipelines(VkRenderer* r) { P->cnn_flowreg_pl = P->cnn_warpfollow_pl = P->cnn_generate_pl = VK_NULL_HANDLE; P->cnn_pyramid_dsl = P->cnn_conv_dsl = P->cnn_cost9_dsl = P->cnn_flowreg_dsl = P->cnn_warpfollow_dsl = P->cnn_generate_dsl = VK_NULL_HANDLE; + P->gh_d5_pipe = P->gh_d6_pipe = P->gh_d7_pipe = P->gh_d8_pipe = P->gh_d9_pipe = P->gh_d10_pipe = VK_NULL_HANDLE; + P->gh_d5_pl = P->gh_d6_pl = P->gh_d7_pl = P->gh_d8_pl = P->gh_d9_pl = P->gh_d10_pl = VK_NULL_HANDLE; + P->gh_d5_dsl = P->gh_d6_dsl = P->gh_d7_dsl = P->gh_d8_dsl = P->gh_d9_dsl = P->gh_d10_dsl = VK_NULL_HANDLE; + P->gh_occ_pipe = P->gh_gen_pipe = VK_NULL_HANDLE; + P->gh_occ_pl = P->gh_gen_pl = VK_NULL_HANDLE; + P->gh_occ_dsl = P->gh_gen_dsl = VK_NULL_HANDLE; } static bool create_cnn_pipelines(VkRenderer* r) { @@ -127,6 +158,24 @@ static bool create_cnn_pipelines(VkRenderer* r) { if (!cnn_make_pipe(r, cnn_generate_comp, cnn_generate_comp_size, P->cnn_generate_dsl, &P->cnn_generate_pl, &P->cnn_generate_pipe)) goto cnn_fail; + if (!cnn_make_gh_pipe(r, wnfg_25_spv, wnfg_25_spv_size, 1, 6, 1, + &P->gh_d5_dsl, &P->gh_d5_pl, &P->gh_d5_pipe)) goto cnn_fail; + if (!cnn_make_gh_pipe(r, wnfg_51_spv, wnfg_51_spv_size, 0, 2, 2, + &P->gh_d6_dsl, &P->gh_d6_pl, &P->gh_d6_pipe)) goto cnn_fail; + if (!cnn_make_gh_pipe(r, wnfg_27_spv, wnfg_27_spv_size, 0, 1, 1, + &P->gh_d7_dsl, &P->gh_d7_pl, &P->gh_d7_pipe)) goto cnn_fail; + if (!cnn_make_gh_pipe(r, wnfg_28_spv, wnfg_28_spv_size, 0, 1, 1, + &P->gh_d8_dsl, &P->gh_d8_pl, &P->gh_d8_pipe)) goto cnn_fail; + if (!cnn_make_gh_pipe(r, wnfg_29_spv, wnfg_29_spv_size, 0, 2, 1, + &P->gh_d9_dsl, &P->gh_d9_pl, &P->gh_d9_pipe)) goto cnn_fail; + if (!cnn_make_gh_pipe(r, wnfg_53_spv, wnfg_53_spv_size, 0, 3, 1, + &P->gh_d10_dsl, &P->gh_d10_pl, &P->gh_d10_pipe)) goto cnn_fail; + + if (!cnn_make_gh_pipe(r, wnfg_13_spv, wnfg_13_spv_size, 1, 2, 6, + &P->gh_occ_dsl, &P->gh_occ_pl, &P->gh_occ_pipe)) goto cnn_fail; + if (!cnn_make_gh_pipe(r, wnfg_04_spv, wnfg_04_spv_size, 1, 5, 1, + &P->gh_gen_dsl, &P->gh_gen_pl, &P->gh_gen_pipe)) goto cnn_fail; + VK_LOGI("CNN-FG pipelines built"); return true; cnn_fail: @@ -232,9 +281,19 @@ static void fg_destroy_cnn_resources(VkRenderer* r) { cnn_free_img(r, &C->hD2[L]); cnn_free_img(r, &C->hD3[L]); cnn_free_img(r, &C->hD5[L]); cnn_free_img(r, &C->hD6[L]); cnn_free_img(r, &C->hD7[L]); cnn_free_img(r, &C->hD8[L]); - cnn_free_img(r, &C->flowMid[L]); cnn_free_img(r, &C->flowRef[L]); + cnn_free_img(r, &C->flowMid[L]); cnn_free_img(r, &C->flowRef[L]); cnn_free_img(r, &C->logits[L]); } cnn_free_img(r, &C->occ); cnn_free_img(r, &C->seedBlack); cnn_free_img(r, &C->dummy); + for (int s = 0; s < 3; s++) for (int m = 0; m < 6; m++) cnn_free_img(r, &C->occOut[s][m]); + for (int s = 0; s < 3; s++) { + if (C->genUboMem[s]) { vkUnmapMemory(r->device, C->genUboMem[s]); } + if (C->genUbo[s]) { vkDestroyBuffer(r->device, C->genUbo[s], NULL); C->genUbo[s] = VK_NULL_HANDLE; } + if (C->genUboMem[s]) { vkFreeMemory(r->device, C->genUboMem[s], NULL); C->genUboMem[s] = VK_NULL_HANDLE; } + C->genUboMap[s] = NULL; + if (C->genSet[s]) { vkr_free_descriptor_set(r, C->genSet[s]); C->genSet[s] = VK_NULL_HANDLE; } + cnn_free_img(r, &C->gen[s]); + } + C->genReady = false; for (int pi = 0; pi < CNN_POOLS; pi++) if (C->pool[pi]) { vkDestroyDescriptorPool(r->device, C->pool[pi], NULL); C->pool[pi] = VK_NULL_HANDLE; } C->ready = false; @@ -323,11 +382,43 @@ static bool fg_create_cnn_resources(VkRenderer* r, uint32_t w, uint32_t h) { if (!cnn_make_img(r, &C->dpair[L], fw[L], fh[L], RGBA8, 2, true)) return false; if (!cnn_make_img(r, &C->flowMid[L], fw[L], fh[L], F16, 1, false)) return false; if (!cnn_make_img(r, &C->flowRef[L], fw[L], fh[L], F16, 1, false)) return false; + if (!cnn_make_img(r, &C->logits[L], fw[L], fh[L], F16, 1, false)) return false; } if (!cnn_make_img(r, &C->occ, mw, mh, F16, 1, false)) return false; if (!cnn_make_img(r, &C->seedBlack, mw, mh, F16, 1, false)) return false; if (!cnn_make_img(r, &C->dummy, 1, 1, RGBA8, 1, true)) return false; + for (int s = 0; s < 3; s++) for (int m = 0; m < 6; m++) { + uint32_t ow = mw, oh = mh; + for (int k = 0; k < m; k++) { ow = ow > 1 ? ow / 2 : 1u; oh = oh > 1 ? oh / 2 : 1u; } + if (!cnn_make_img(r, &C->occOut[s][m], ow, oh, R8, 1, false)) return false; + } + for (int s = 0; s < 3; s++) { + if (!cnn_make_img(r, &C->gen[s], w, h, F16, 1, false)) return false; + C->genSet[s] = vkr_alloc_descriptor_set(r); + if (C->genSet[s] == VK_NULL_HANDLE) return false; + VkDescriptorImageInfo dii = { r->fg_sampler, C->gen[s].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkWriteDescriptorSet wr = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + wr.dstSet = C->genSet[s]; wr.dstBinding = 0; wr.descriptorCount = 1; + wr.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; wr.pImageInfo = &dii; + vkUpdateDescriptorSets(r->device, 1, &wr, 0, NULL); + + VkBufferCreateInfo bc = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bc.size = 4 * sizeof(float); bc.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + bc.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + if (vkCreateBuffer(r->device, &bc, NULL, &C->genUbo[s]) != VK_SUCCESS) return false; + VkMemoryRequirements mr; vkGetBufferMemoryRequirements(r->device, C->genUbo[s], &mr); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + ai.allocationSize = mr.size; + ai.memoryTypeIndex = vkr_find_memory_type(r, mr.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (ai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &ai, NULL, &C->genUboMem[s]) != VK_SUCCESS) return false; + vkBindBufferMemory(r->device, C->genUbo[s], C->genUboMem[s], 0); + if (vkMapMemory(r->device, C->genUboMem[s], 0, bc.size, 0, &C->genUboMap[s]) != VK_SUCCESS) return false; + } + C->genReady = true; + C->ready = true; VK_LOGI("CNN-FG resources allocated (L0 %ux%u, %d levels, fs=%.2f)", mw, mh, CNN_LEVELS, (double)fs); return true; @@ -453,6 +544,31 @@ static void cnn_flowreg_dispatch(VkRenderer* r, VkCommandBuffer cmd, vkCmdDispatch(cmd, CNN_GRID(w, h)); } +static void cnn_gh_dispatch(VkRenderer* r, VkCommandBuffer cmd, + VkPipeline pipe, VkPipelineLayout pl, VkDescriptorSetLayout dsl, + int hasUBO, const VkImageView* inViews, int nIn, + const VkImageView* outViews, int nOut, uint32_t w, uint32_t h) { + VkDescriptorSet ds = cnn_alloc(r, dsl); if (!ds) return; + VkDescriptorImageInfo si[8], oi[4]; VkDescriptorBufferInfo ub; + VkWriteDescriptorSet ws[16]; int nw = 0; + if (hasUBO) { + ub = (VkDescriptorBufferInfo){r->fg_cnn.ubo, 0, VK_WHOLE_SIZE}; + ws[nw++] = cnn_wbuf(ds, 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &ub); + } + for (int i = 0; i < nIn; i++) { + si[i] = (VkDescriptorImageInfo){r->fg_sampler, inViews[i], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + ws[nw++] = cnn_wimg(ds, 32 + (uint32_t)i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &si[i]); + } + for (int o = 0; o < nOut; o++) { + oi[o] = (VkDescriptorImageInfo){VK_NULL_HANDLE, outViews[o], VK_IMAGE_LAYOUT_GENERAL}; + ws[nw++] = cnn_wimg(ds, 48 + (uint32_t)o, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &oi[o]); + } + vkUpdateDescriptorSets(r->device, (uint32_t)nw, ws, 0, NULL); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipe); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pl, 0, 1, &ds, 0, NULL); + vkCmdDispatch(cmd, CNN_GRID(w, h)); +} + static void cnn_clear_f16(VkCommandBuffer cmd, VkCnnImg* im) { cnn_to_write(cmd, im->image, im->layers); VkClearColorValue cc; memset(&cc, 0, sizeof(cc)); @@ -540,9 +656,12 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, VkCnnFeatSet* fp = &C->feat[prevSlot]; VkCnnFeatSet* fc = &C->feat[currSlot]; + VkPipelineSet* P = &r->pipelines; for (int L = CNN_FLOW_LEVELS - 1; L >= 0; --L) { uint32_t w = C->hG0[L].w, h = C->hG0[L].h; - VkImageView seedView = (L == CNN_FLOW_LEVELS - 1) ? C->seedBlack.view : C->flowMid[L+1].view; + bool refine = (L <= 2); + VkImageView seedView = (L == CNN_FLOW_LEVELS - 1) ? C->seedBlack.view + : ((L+1 <= 2) ? C->flowRef[L+1].view : C->flowMid[L+1].view); cnn_concat4(cmd, &fp->feat8[L], &fc->feat8[L], &C->feat8_pair[L]); @@ -576,17 +695,98 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, cnn_conv_dispatch(r, cmd, C->hD2[L].view, fc->luma[L].view, C->hD3[L].view, 26, 1, 1, 0, w, h); cnn_to_read(cmd, C->hD3[L].image, 1); - VkImageView fdst = (L == 0) ? outFlow->view : C->flowMid[L].view; - VkImage fimg = (L == 0) ? outFlow->image : C->flowMid[L].image; - uint32_t dw = (L == 0) ? outFlow->width : w; - uint32_t dh = (L == 0) ? outFlow->height : h; - cnn_to_write(cmd, fimg, 1); + cnn_to_write(cmd, C->flowMid[L].image, 1); cnn_flowreg_dispatch(r, cmd, C->hD3[L].layerView[0], C->hD2[L].layerView[0], seedView, - C->occ.view, fdst, dw, dh); - if (L != 0) cnn_to_read(cmd, fimg, 1); + C->occ.view, C->flowMid[L].view, w, h); + cnn_to_read(cmd, C->flowMid[L].image, 1); + + if (!refine) continue; + + cnn_to_write(cmd, C->hD5[L].image, 1); + { VkImageView in[6]={C->hD3[L].layerView[0],C->hD2[L].layerView[0],C->hG4[L].layerView[0],C->hG4[L].layerView[1],C->flowMid[L].view,seedView}; + VkImageView out[1]={C->hD5[L].layerView[0]}; + cnn_gh_dispatch(r, cmd, P->gh_d5_pipe, P->gh_d5_pl, P->gh_d5_dsl, 1, in, 6, out, 1, w, h); } + cnn_to_read(cmd, C->hD5[L].image, 1); + cnn_to_write(cmd, C->hD6[L].image, 2); + { VkImageView in[2]={C->hD5[L].layerView[0],C->hD2[L].layerView[0]}; + VkImageView out[2]={C->hD6[L].layerView[0],C->hD6[L].layerView[1]}; + cnn_gh_dispatch(r, cmd, P->gh_d6_pipe, P->gh_d6_pl, P->gh_d6_dsl, 0, in, 2, out, 2, w, h); } + cnn_to_read(cmd, C->hD6[L].image, 2); + cnn_to_write(cmd, C->hD7[L].image, 1); + { VkImageView in[1]={C->hD6[L].layerView[1]}; + VkImageView out[1]={C->hD7[L].layerView[0]}; + cnn_gh_dispatch(r, cmd, P->gh_d7_pipe, P->gh_d7_pl, P->gh_d7_dsl, 0, in, 1, out, 1, w, h); } + cnn_to_read(cmd, C->hD7[L].image, 1); + cnn_to_write(cmd, C->hD8[L].image, 1); + { VkImageView in[1]={C->hD7[L].layerView[0]}; + VkImageView out[1]={C->hD8[L].layerView[0]}; + cnn_gh_dispatch(r, cmd, P->gh_d8_pipe, P->gh_d8_pl, P->gh_d8_dsl, 0, in, 1, out, 1, w, h); } + cnn_to_read(cmd, C->hD8[L].image, 1); + + VkImageView rdst = (L == 0) ? outFlow->view : C->flowRef[L].view; + VkImage rimg = (L == 0) ? outFlow->image : C->flowRef[L].image; + uint32_t rw = (L == 0) ? outFlow->width : w; + uint32_t rh = (L == 0) ? outFlow->height : h; + cnn_to_write(cmd, rimg, 1); + { VkImageView in[2]={C->hD8[L].layerView[0], seedView}; + VkImageView out[1]={rdst}; + cnn_gh_dispatch(r, cmd, P->gh_d9_pipe, P->gh_d9_pl, P->gh_d9_dsl, 0, in, 2, out, 1, rw, rh); } + if (L != 0) cnn_to_read(cmd, rimg, 1); } vkr_image_barrier(cmd, outFlow->image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); } + +static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, uint32_t slot, + VkImageView prevView, VkImageView currView, float phase) { + VkFgCnn* C = &r->fg_cnn; + if (!C->ready || !C->genReady) return; + VkPipelineSet* P = &r->pipelines; + uint32_t gw = C->gen[slot].w, gh = C->gen[slot].h; + + vkr_image_barrier(cmd, r->fg_motion[parity].image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkr_image_barrier(cmd, C->flowRef[2].image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkr_image_barrier(cmd, C->flowRef[1].image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + + float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); + float mvScale = 1.0f; + + cnn_to_write(cmd, C->gen[slot].image, 1); + { + VkDescriptorSet ds = cnn_alloc(r, P->cnn_generate_dsl); if (!ds) return; + VkDescriptorImageInfo s32 = {r->fg_sampler, prevView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s33 = {r->fg_sampler, currView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s34 = {r->fg_sampler, C->flowRef[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s35 = {r->fg_sampler, C->flowRef[1].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s36 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo oi = {VK_NULL_HANDLE, C->gen[slot].view, VK_IMAGE_LAYOUT_GENERAL}; + VkWriteDescriptorSet w[6] = { + cnn_wimg(ds, 32, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s32), + cnn_wimg(ds, 33, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s33), + cnn_wimg(ds, 34, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s34), + cnn_wimg(ds, 35, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s35), + cnn_wimg(ds, 36, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s36), + cnn_wimg(ds, 48, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &oi), + }; + vkUpdateDescriptorSets(r->device, 6, w, 0, NULL); + CnnPC pc = {0}; pc.sx = (int32_t)gw; pc.sy = (int32_t)gh; pc.t = t; pc.mvScale = mvScale; + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_generate_pipe); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_generate_pl, 0, 1, &ds, 0, NULL); + vkCmdPushConstants(cmd, P->cnn_generate_pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, 32, &pc); + vkCmdDispatch(cmd, (gw + 15u) / 16u, (gh + 15u) / 16u, 1u); + } + vkr_image_barrier(cmd, C->gen[slot].image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); +} diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 616863838..eb258f0fd 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -48,6 +48,14 @@ #include "shaders/cnn_correlation_warpfollow_comp.spv.h" #include "shaders/cnn_flowreg_comp.spv.h" #include "shaders/cnn_generate_comp.spv.h" +#include "wnfg_spv/wnfg_04_spv.h" +#include "wnfg_spv/wnfg_13_spv.h" +#include "wnfg_spv/wnfg_25_spv.h" +#include "wnfg_spv/wnfg_27_spv.h" +#include "wnfg_spv/wnfg_28_spv.h" +#include "wnfg_spv/wnfg_29_spv.h" +#include "wnfg_spv/wnfg_51_spv.h" +#include "wnfg_spv/wnfg_53_spv.h" #include "shaders/wnfg_05_weights.h" #include "shaders/wnfg_06_weights.h" #include "shaders/wnfg_07_weights.h" @@ -88,6 +96,8 @@ static bool fg_create_cnn_resources(VkRenderer* r, uint32_t w, uint32_t h); static void fg_destroy_cnn_resources(VkRenderer* r); static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, VkFgImage* prevFrame, VkFgImage* currFrame, bool forward, VkFgImage* outFlow); +static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, uint32_t slot, + VkImageView prevView, VkImageView currView, float phase); static bool create_command_pool(VkRenderer* r); static bool create_descriptor_pool(VkRenderer* r, uint32_t capacity); static bool create_pipelines(VkRenderer* r); @@ -2412,6 +2422,7 @@ static void fg_free_set(VkRenderer* r, VkDescriptorSet set) { } static void fg_destroy_sig(VkRenderer* r); // content-dedup signature teardown (defined below) +static void fg_destroy_dump(VkRenderer* r); // debug burst-dump teardown (defined below) static void fg_destroy_resources(VkRenderer* r) { if (!r->device) return; @@ -2456,6 +2467,7 @@ static void fg_destroy_resources(VkRenderer* r) { memset(r->fg_coarse_fwd, 0, sizeof(r->fg_coarse_fwd)); if (r->fg_sampler) { vkDestroySampler(r->device, r->fg_sampler, NULL); r->fg_sampler = VK_NULL_HANDLE; } fg_destroy_sig(r); + fg_destroy_dump(r); r->fg_built = false; r->fg_history_count = 0; r->fg_history_curr = 0; @@ -2673,6 +2685,137 @@ static double fg_sig_delta(VkRenderer* r, uint32_t a, uint32_t b) { return (double)changed; // 0 == identical re-present; >0 == distinct content frame } +// --- Debug burst dump ----------------------------------------------------------------------------- +#define FG_DUMP_W 480u +#define FG_DUMP_H 270u +#define FG_DUMP_N 8u +#define FG_DUMP_BUFS 10u // FG_DUMP_N gen + prev + curr + +static bool fg_create_dump(VkRenderer* r) { + r->fg_dump_supported = false; + r->fg_dump_armed = false; r->fg_dump_count = 0; r->fg_dump_seen_zero = false; + VkImageCreateInfo ic = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + ic.imageType = VK_IMAGE_TYPE_2D; ic.format = VK_FORMAT_R8G8B8A8_UNORM; + ic.extent.width = FG_DUMP_W; ic.extent.height = FG_DUMP_H; ic.extent.depth = 1; + ic.mipLevels = 1; ic.arrayLayers = 1; ic.samples = VK_SAMPLE_COUNT_1_BIT; + ic.tiling = VK_IMAGE_TILING_OPTIMAL; + ic.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + if (vkCreateImage(r->device, &ic, NULL, &r->fg_dump_img) != VK_SUCCESS) return false; + VkMemoryRequirements mr; vkGetImageMemoryRequirements(r->device, r->fg_dump_img, &mr); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; ai.allocationSize = mr.size; + ai.memoryTypeIndex = vkr_find_memory_type(r, mr.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (ai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &ai, NULL, &r->fg_dump_img_mem) != VK_SUCCESS) return false; + vkBindImageMemory(r->device, r->fg_dump_img, r->fg_dump_img_mem, 0); + for (uint32_t i = 0; i < FG_DUMP_BUFS; i++) { + VkBufferCreateInfo bc = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bc.size = (VkDeviceSize)FG_DUMP_W * FG_DUMP_H * 4; bc.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + bc.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + if (vkCreateBuffer(r->device, &bc, NULL, &r->fg_dump_buf[i]) != VK_SUCCESS) return false; + VkMemoryRequirements br; vkGetBufferMemoryRequirements(r->device, r->fg_dump_buf[i], &br); + VkMemoryAllocateInfo bai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; bai.allocationSize = br.size; + bai.memoryTypeIndex = vkr_find_memory_type(r, br.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (bai.memoryTypeIndex == UINT32_MAX) return false; + if (vkAllocateMemory(r->device, &bai, NULL, &r->fg_dump_buf_mem[i]) != VK_SUCCESS) return false; + vkBindBufferMemory(r->device, r->fg_dump_buf[i], r->fg_dump_buf_mem[i], 0); + if (vkMapMemory(r->device, r->fg_dump_buf_mem[i], 0, VK_WHOLE_SIZE, 0, &r->fg_dump_ptr[i]) != VK_SUCCESS) return false; + memset(r->fg_dump_ptr[i], 0, (size_t)FG_DUMP_W * FG_DUMP_H * 4); + } + r->fg_dump_supported = true; + return true; +} + +static void fg_destroy_dump(VkRenderer* r) { + if (r->fg_dump_img) { vkDestroyImage(r->device, r->fg_dump_img, NULL); r->fg_dump_img = VK_NULL_HANDLE; } + if (r->fg_dump_img_mem) { vkFreeMemory(r->device, r->fg_dump_img_mem, NULL); r->fg_dump_img_mem = VK_NULL_HANDLE; } + for (uint32_t i = 0; i < FG_DUMP_BUFS; i++) { + if (r->fg_dump_buf[i]) { vkDestroyBuffer(r->device, r->fg_dump_buf[i], NULL); r->fg_dump_buf[i] = VK_NULL_HANDLE; } + if (r->fg_dump_buf_mem[i]) { vkFreeMemory(r->device, r->fg_dump_buf_mem[i], NULL); r->fg_dump_buf_mem[i] = VK_NULL_HANDLE; } + r->fg_dump_ptr[i] = NULL; + } + r->fg_dump_supported = false; + r->fg_dump_armed = false; r->fg_dump_count = 0; +} + +// Blit srcImg (full res, given layout) -> fg_dump_img (480x270) -> fg_dump_buf[bufIdx]. Restores srcImg. +static void fg_record_dump(VkRenderer* r, VkCommandBuffer cmd, VkImage srcImg, VkImageLayout srcLayout, uint32_t bufIdx) { + if (!r->fg_dump_supported || bufIdx >= FG_DUMP_BUFS) return; + vkr_image_barrier(cmd, srcImg, + srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + vkr_image_barrier(cmd, r->fg_dump_img, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, VK_ACCESS_TRANSFER_WRITE_BIT); + VkImageBlit blit = {0}; + blit.srcSubresource = (VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.srcOffsets[1] = (VkOffset3D){(int32_t)r->fg_dims.width, (int32_t)r->fg_dims.height, 1}; + blit.dstSubresource = (VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.dstOffsets[1] = (VkOffset3D){(int32_t)FG_DUMP_W, (int32_t)FG_DUMP_H, 1}; + vkCmdBlitImage(cmd, srcImg, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + r->fg_dump_img, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_LINEAR); + vkr_image_barrier(cmd, r->fg_dump_img, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + VkBufferImageCopy cp = {0}; + cp.imageSubresource = (VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + cp.imageExtent = (VkExtent3D){FG_DUMP_W, FG_DUMP_H, 1}; + vkCmdCopyImageToBuffer(cmd, r->fg_dump_img, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + r->fg_dump_buf[bufIdx], 1, &cp); + vkr_image_barrier(cmd, srcImg, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); +} + +// Edge-trigger: arm one burst on each 0->1 transition of debug.winnative.fgdump. +static void fg_dump_poll(VkRenderer* r) { + char val[8] = {0}; + __system_property_get("debug.winnative.fgdump", val); + if (val[0] == '0') r->fg_dump_seen_zero = true; + if (val[0] == '1' && r->fg_dump_seen_zero && !r->fg_dump_armed && r->fg_dump_count == 0) { + r->fg_dump_armed = true; r->fg_dump_seen_zero = false; + VK_LOGI("fgdump armed"); + } +} + +// Write the 10 host buffers to disk (raw RGBA8). Drain the queue first (one-shot debug path). +static void fg_dump_flush(VkRenderer* r) { + pthread_mutex_lock(&r->queue_mutex); + vkQueueWaitIdle(r->graphics_queue); + pthread_mutex_unlock(&r->queue_mutex); + static const char* dirs[2] = { + "/sdcard/Android/data/com.winnative.cmod/files", + "/data/data/com.winnative.cmod/files" + }; + const size_t sz = (size_t)FG_DUMP_W * FG_DUMP_H * 4; + const char* used = NULL; + for (uint32_t d = 0; d < 2 && !used; d++) { + char path[256]; + bool ok = true; + for (uint32_t i = 0; i < FG_DUMP_BUFS && ok; i++) { + snprintf(path, sizeof(path), "%s/fgdump_%02u.raw", dirs[d], i); + FILE* fp = fopen(path, "wb"); + if (!fp) { ok = false; break; } + ok = (fwrite(r->fg_dump_ptr[i], 1, sz, fp) == sz); + fclose(fp); + } + if (ok) { + snprintf(path, sizeof(path), "%s/fgdump_info.txt", dirs[d]); + FILE* fp = fopen(path, "w"); + if (fp) { fprintf(fp, "480 270 RGBA8 8gen+prev+curr\n"); fclose(fp); } + used = dirs[d]; + } + } + if (used) VK_LOGI("fgdump written: %s/fgdump_NN.raw (00-07 gen, 08 prev, 09 curr)", used); + else VK_LOGE("fgdump write failed (no writable path)"); + r->fg_dump_armed = false; r->fg_dump_count = 0; +} + static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { VkSamplerCreateInfo si = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; si.magFilter = VK_FILTER_LINEAR; si.minFilter = VK_FILTER_LINEAR; @@ -2767,6 +2910,8 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { // Content-dedup signature resources (best-effort; if it fails, dedup just stays disabled). if (!fg_create_sig(r)) { fg_destroy_sig(r); VK_LOGW("FG content-dedup unavailable; running without it"); } + // Debug burst-dump resources (best-effort; if it fails, dump stays disabled). + if (!fg_create_dump(r)) { fg_destroy_dump(r); VK_LOGW("FG debug dump unavailable"); } r->fg_stage_slot = -1; r->fg_last_promote_ns = 0; @@ -3305,6 +3450,10 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { return p; } + // Debug burst-dump: a completed burst (all submits fenced by now) gets written out and disarmed. + if (r->fg_dump_supported && r->fg_dump_armed && r->fg_dump_count >= FG_DUMP_N) fg_dump_flush(r); + + uint32_t genslot = r->fg_worker_index; VkFrame* f = &r->fg_worker_frames[r->fg_worker_index]; r->fg_worker_index = (r->fg_worker_index + 1u) % 3u; if (f->in_flight) vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); @@ -3383,6 +3532,24 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { } } + bool gen_present = false; + if (do_interp && r->fg_use_cnn && r->fg_cnn_capable && r->fg_cnn.genReady && r->fg_cnn_gen + && deep && !r->fg_extrapolate && r->fg_motion_fwd_valid) { + cnn_generate_frame(r, f->cmd, parity, genslot, + r->fg_history[prev_idx].view, curr->view, job->phase); + gen_present = true; + + fg_dump_poll(r); + if (r->fg_dump_supported && r->fg_dump_armed && r->fg_dump_count < FG_DUMP_N) { + fg_record_dump(r, f->cmd, r->fg_cnn.gen[genslot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dump_count); + if (r->fg_dump_count == 0) { // capture the pair's real frames once, into slots 8 and 9 + fg_record_dump(r, f->cmd, r->fg_history[prev_idx].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 8); + fg_record_dump(r, f->cmd, curr->image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 9); + } + r->fg_dump_count++; + } + } + VkClearValue clear = {0}; clear.color.float32[3] = 1.0f; VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO}; @@ -3395,7 +3562,13 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { VkRect2D scis = {{0, 0}, r->swapchain_extent}; vkCmdSetViewport(f->cmd, 0, 1, &vp); vkCmdSetScissor(f->cmd, 0, 1, &scis); - if (do_interp) { + if (gen_present) { + // CNN SELECT result: present gen[genslot] via the blit pipeline (mirror PRESENT_LAST). + vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, r->pipelines.blit_pipeline); + vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + r->pipelines.effect_layout, 0, 1, &r->fg_cnn.genSet[genslot], 0, NULL); + vkCmdDraw(f->cmd, 3, 1, 0, 0); + } else if (do_interp) { vkCmdBindPipeline(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, r->pipelines.fg_interp_pipeline); bool use_fwd = deep && !r->fg_extrapolate; vkCmdBindDescriptorSets(f->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, @@ -3502,7 +3675,8 @@ static void fg_enqueue(VkRenderer* r, uint8_t mode, float phase) { uint32_t curr = r->fg_history_curr; FgJob job; job.mode = mode; - job.deep = (r->fg_deep_mode && r->fg_history_count >= 2u) ? 1u : 0u; + // CNN generate needs the forward field -> force deep on the CNN path. + job.deep = ((r->fg_deep_mode || (r->fg_use_cnn && r->fg_cnn_capable)) && r->fg_history_count >= 2u) ? 1u : 0u; job.phase = phase; job.curr_idx = curr; job.prev_idx = (curr + 2u) % 3u; @@ -3702,6 +3876,7 @@ JNIEXPORT jlong JNICALL JNI_FN(nativeCreate)(JNIEnv* env, jclass clazz, r->fg_min_step = 1; r->fg_flow_scale = 0.5f; // default = legacy half-res flow; presets override (Eco 0.2 .. Max 0.8) r->fg_use_cnn = cnn_wanted(); + r->fg_cnn_gen = true; r->validation_enabled = (enableValidationLayers == JNI_TRUE); pthread_mutex_init(&r->scene_mutex, NULL); pthread_mutex_init(&r->queue_mutex, NULL); diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index 2f8c539a0..5fe20b3b1 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -209,6 +209,14 @@ typedef struct VkPipelineSet { cnn_flowreg_pl, cnn_warpfollow_pl, cnn_generate_pl; VkPipeline cnn_pyramid_pipe, cnn_conv_pipe, cnn_cost9_pipe, cnn_flowreg_pipe, cnn_warpfollow_pipe, cnn_generate_pipe; + + VkDescriptorSetLayout gh_d5_dsl, gh_d6_dsl, gh_d7_dsl, gh_d8_dsl, gh_d9_dsl, gh_d10_dsl; + VkPipelineLayout gh_d5_pl, gh_d6_pl, gh_d7_pl, gh_d8_pl, gh_d9_pl, gh_d10_pl; + VkPipeline gh_d5_pipe, gh_d6_pipe, gh_d7_pipe, gh_d8_pipe, gh_d9_pipe, gh_d10_pipe; + + VkDescriptorSetLayout gh_occ_dsl, gh_gen_dsl; // wnfg_13 occlusion, wnfg_04 generate + VkPipelineLayout gh_occ_pl, gh_gen_pl; + VkPipeline gh_occ_pipe, gh_gen_pipe; } VkPipelineSet; // ============================================================ @@ -282,6 +290,7 @@ typedef struct VkFgCnn { VkCnnImg dpair[CNN_LEVELS]; VkCnnImg flowMid[CNN_LEVELS]; VkCnnImg flowRef[CNN_LEVELS]; + VkCnnImg logits[CNN_LEVELS]; VkCnnImg occ; VkCnnImg seedBlack; VkCnnImg dummy; @@ -289,6 +298,15 @@ typedef struct VkFgCnn { VkBuffer w[64]; VkDeviceMemory wMem[64]; VkDeviceSize wLen[64]; + + // terminal generate (wnfg_13 occlusion + wnfg_04 generate) + VkCnnImg occOut[3][6]; + VkCnnImg gen[3]; // RGBA16F generated frame ring, full swapchain res + VkDescriptorSet genSet[3]; // sampler_set_layout, binding0 = gen[i].view (present blit) + VkBuffer genUbo[3]; // per-frame {mvScale, t, _} for wnfg_04 + VkDeviceMemory genUboMem[3]; + void* genUboMap[3]; + bool genReady; } VkFgCnn; // ============================================================ @@ -463,6 +481,7 @@ typedef struct VkRenderer { bool fg_use_cnn; bool fg_cnn_capable; + bool fg_cnn_gen; uint32_t fg_cnn_flow_seq; VkFgCnn fg_cnn; @@ -483,6 +502,18 @@ typedef struct VkRenderer { uint64_t fg_promote_count; // monotonic count of promotions (distinct content committed) uint64_t fg_promote_ns; // CLOCK_MONOTONIC of the most recent promotion (for Java phase anchor) + // --- Debug burst dump (debug.winnative.fgdump 1) -------------------------------------------- + // Captures FG_DUMP_N consecutive generated frames + the pair's two real history frames to disk. + bool fg_dump_supported; // dump image + buffers created OK + bool fg_dump_armed; // a burst is in progress + bool fg_dump_seen_zero; // prop read "0" since the last dump (edge-trigger gate) + uint32_t fg_dump_count; // frames captured so far in the current burst + VkImage fg_dump_img; // 480x270 RGBA8 blit target (reused per capture) + VkDeviceMemory fg_dump_img_mem; + VkBuffer fg_dump_buf[10]; // 8 gen + prev + curr, host-visible + VkDeviceMemory fg_dump_buf_mem[10]; + void* fg_dump_ptr[10]; // persistent map of fg_dump_buf + // Quad vertex buffer (window/cursor) VkBuffer quad_vbo; VkDeviceMemory quad_vbo_memory; diff --git a/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_04_spv.h b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_04_spv.h new file mode 100644 index 000000000..d254a372f --- /dev/null +++ b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_04_spv.h @@ -0,0 +1,115 @@ +#pragma once +#include +#include + +static const uint32_t wnfg_04_spv[] = { + 0x07230203, 0x00010000, 0x000d000b, 0x0000009a, 0x00000000, 0x00020011, 0x00000001, 0x00020011, + 0x00000032, 0x00020011, 0x00000038, 0x0006000b, 0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e, + 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0006000f, 0x00000005, 0x00000002, 0x6e69616d, + 0x00000000, 0x00000003, 0x00060010, 0x00000002, 0x00000011, 0x00000010, 0x00000010, 0x00000001, + 0x00040047, 0x00000003, 0x0000000b, 0x0000001c, 0x00030047, 0x00000004, 0x00000019, 0x00040047, + 0x00000004, 0x00000021, 0x00000030, 0x00040047, 0x00000004, 0x00000022, 0x00000000, 0x00040047, + 0x00000005, 0x00000021, 0x00000020, 0x00040047, 0x00000005, 0x00000022, 0x00000000, 0x00040047, + 0x00000006, 0x00000021, 0x00000022, 0x00040047, 0x00000006, 0x00000022, 0x00000000, 0x00030047, + 0x00000007, 0x00000002, 0x00050048, 0x00000007, 0x00000000, 0x00000023, 0x00000000, 0x00050048, + 0x00000007, 0x00000001, 0x00000023, 0x00000004, 0x00050048, 0x00000007, 0x00000002, 0x00000023, + 0x00000008, 0x00040047, 0x00000008, 0x00000021, 0x00000000, 0x00040047, 0x00000008, 0x00000022, + 0x00000000, 0x00040047, 0x00000009, 0x00000021, 0x00000023, 0x00040047, 0x00000009, 0x00000022, + 0x00000000, 0x00040047, 0x0000000a, 0x00000021, 0x00000024, 0x00040047, 0x0000000a, 0x00000022, + 0x00000000, 0x00040047, 0x0000000b, 0x00000021, 0x00000021, 0x00040047, 0x0000000b, 0x00000022, + 0x00000000, 0x00040047, 0x0000000c, 0x0000000b, 0x00000019, 0x00020013, 0x0000000d, 0x00030021, + 0x0000000e, 0x0000000d, 0x00040015, 0x0000000f, 0x00000020, 0x00000001, 0x00040017, 0x00000010, + 0x0000000f, 0x00000002, 0x00040015, 0x00000011, 0x00000020, 0x00000000, 0x00040017, 0x00000012, + 0x00000011, 0x00000003, 0x00040020, 0x00000013, 0x00000001, 0x00000012, 0x0004003b, 0x00000013, + 0x00000003, 0x00000001, 0x00040017, 0x00000014, 0x00000011, 0x00000002, 0x00030016, 0x00000015, + 0x00000020, 0x00090019, 0x00000016, 0x00000015, 0x00000001, 0x00000000, 0x00000000, 0x00000000, + 0x00000002, 0x00000000, 0x00040020, 0x00000017, 0x00000000, 0x00000016, 0x0004003b, 0x00000017, + 0x00000004, 0x00000000, 0x00020014, 0x00000018, 0x00040017, 0x00000019, 0x00000018, 0x00000002, + 0x00090019, 0x0000001a, 0x00000015, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, + 0x00000000, 0x0003001b, 0x0000001b, 0x0000001a, 0x00040020, 0x0000001c, 0x00000000, 0x0000001b, + 0x0004003b, 0x0000001c, 0x00000005, 0x00000000, 0x0004002b, 0x0000000f, 0x0000001d, 0x00000000, + 0x00040017, 0x0000001e, 0x00000015, 0x00000002, 0x0004002b, 0x00000011, 0x0000001f, 0x00000000, + 0x0004002b, 0x00000011, 0x00000020, 0x00000001, 0x0004002b, 0x00000015, 0x00000021, 0x3f000000, + 0x0005002c, 0x0000001e, 0x00000022, 0x00000021, 0x00000021, 0x00040017, 0x00000023, 0x00000015, + 0x00000004, 0x0004003b, 0x0000001c, 0x00000006, 0x00000000, 0x0004002b, 0x00000015, 0x00000024, + 0x00000000, 0x0005001e, 0x00000007, 0x00000015, 0x00000015, 0x00000015, 0x00040020, 0x00000025, + 0x00000002, 0x00000007, 0x0004003b, 0x00000025, 0x00000008, 0x00000002, 0x00040020, 0x00000026, + 0x00000002, 0x00000015, 0x0004002b, 0x00000015, 0x00000027, 0x40000000, 0x0004002b, 0x0000000f, + 0x00000028, 0x00000001, 0x0004002b, 0x00000015, 0x00000029, 0x3f800000, 0x0004003b, 0x0000001c, + 0x00000009, 0x00000000, 0x0004003b, 0x0000001c, 0x0000000a, 0x00000000, 0x0004003b, 0x0000001c, + 0x0000000b, 0x00000000, 0x0004002b, 0x00000015, 0x0000002a, 0x322bcc77, 0x0004002b, 0x00000011, + 0x0000002b, 0x00000010, 0x0006002c, 0x00000012, 0x0000000c, 0x0000002b, 0x0000002b, 0x00000020, + 0x00050036, 0x0000000d, 0x00000002, 0x00000000, 0x0000000e, 0x000200f8, 0x0000002c, 0x000300f7, + 0x0000002d, 0x00000000, 0x000300fb, 0x0000001f, 0x0000002e, 0x000200f8, 0x0000002e, 0x0004003d, + 0x00000012, 0x0000002f, 0x00000003, 0x0007004f, 0x00000014, 0x00000030, 0x0000002f, 0x0000002f, + 0x00000000, 0x00000001, 0x0004007c, 0x00000010, 0x00000031, 0x00000030, 0x0004003d, 0x00000016, + 0x00000032, 0x00000004, 0x00040068, 0x00000010, 0x00000033, 0x00000032, 0x000500af, 0x00000019, + 0x00000034, 0x00000031, 0x00000033, 0x0004009a, 0x00000018, 0x00000035, 0x00000034, 0x000300f7, + 0x00000036, 0x00000000, 0x000400fa, 0x00000035, 0x00000037, 0x00000036, 0x000200f8, 0x00000037, + 0x000200f9, 0x0000002d, 0x000200f8, 0x00000036, 0x0004003d, 0x0000001b, 0x00000038, 0x00000005, + 0x00040064, 0x0000001a, 0x00000039, 0x00000038, 0x00050067, 0x00000010, 0x0000003a, 0x00000039, + 0x0000001d, 0x0004007c, 0x00000014, 0x0000003b, 0x0000003a, 0x00050051, 0x00000011, 0x0000003c, + 0x0000003b, 0x00000000, 0x00040070, 0x00000015, 0x0000003d, 0x0000003c, 0x00050051, 0x00000011, + 0x0000003e, 0x0000003b, 0x00000001, 0x00040070, 0x00000015, 0x0000003f, 0x0000003e, 0x00050050, + 0x0000001e, 0x00000040, 0x0000003d, 0x0000003f, 0x00040070, 0x0000001e, 0x00000041, 0x00000030, + 0x00050081, 0x0000001e, 0x00000042, 0x00000041, 0x00000022, 0x00050088, 0x0000001e, 0x00000043, + 0x00000042, 0x00000040, 0x0004003d, 0x0000001b, 0x00000044, 0x00000006, 0x00070058, 0x00000023, + 0x00000045, 0x00000044, 0x00000043, 0x00000002, 0x00000024, 0x00050041, 0x00000026, 0x00000046, + 0x00000008, 0x0000001d, 0x0004003d, 0x00000015, 0x00000047, 0x00000046, 0x0005008e, 0x00000023, + 0x00000048, 0x00000045, 0x00000047, 0x00050041, 0x00000026, 0x00000049, 0x00000008, 0x00000028, + 0x0004003d, 0x00000015, 0x0000004a, 0x00000049, 0x00050085, 0x00000015, 0x0000004b, 0x00000027, + 0x0000004a, 0x00050083, 0x00000015, 0x0000004c, 0x00000029, 0x0000004a, 0x00050085, 0x00000015, + 0x0000004d, 0x00000027, 0x0000004c, 0x0004003d, 0x0000001b, 0x0000004e, 0x00000009, 0x00070058, + 0x00000023, 0x0000004f, 0x0000004e, 0x00000043, 0x00000002, 0x00000024, 0x0005008e, 0x00000023, + 0x00000050, 0x0000004f, 0x00000047, 0x0007004f, 0x0000001e, 0x00000051, 0x00000048, 0x00000048, + 0x00000000, 0x00000001, 0x0005008e, 0x0000001e, 0x00000052, 0x00000051, 0x0000004b, 0x00050081, + 0x0000001e, 0x00000053, 0x00000042, 0x00000052, 0x00050088, 0x0000001e, 0x00000054, 0x00000053, + 0x00000040, 0x0007004f, 0x0000001e, 0x00000055, 0x00000048, 0x00000048, 0x00000002, 0x00000003, + 0x0005008e, 0x0000001e, 0x00000056, 0x00000055, 0x0000004d, 0x00050081, 0x0000001e, 0x00000057, + 0x00000042, 0x00000056, 0x00050088, 0x0000001e, 0x00000058, 0x00000057, 0x00000040, 0x0007004f, + 0x0000001e, 0x00000059, 0x00000050, 0x00000050, 0x00000000, 0x00000001, 0x0005008e, 0x0000001e, + 0x0000005a, 0x00000059, 0x0000004b, 0x00050081, 0x0000001e, 0x0000005b, 0x00000042, 0x0000005a, + 0x00050088, 0x0000001e, 0x0000005c, 0x0000005b, 0x00000040, 0x0007004f, 0x0000001e, 0x0000005d, + 0x00000050, 0x00000050, 0x00000002, 0x00000003, 0x0005008e, 0x0000001e, 0x0000005e, 0x0000005d, + 0x0000004d, 0x00050081, 0x0000001e, 0x0000005f, 0x00000042, 0x0000005e, 0x00050088, 0x0000001e, + 0x00000060, 0x0000005f, 0x00000040, 0x0004003d, 0x0000001b, 0x00000061, 0x0000000a, 0x00070058, + 0x00000023, 0x00000062, 0x00000061, 0x00000054, 0x00000002, 0x00000024, 0x00050051, 0x00000015, + 0x00000063, 0x00000062, 0x00000000, 0x0004003d, 0x0000001b, 0x00000064, 0x0000000a, 0x00070058, + 0x00000023, 0x00000065, 0x00000064, 0x00000058, 0x00000002, 0x00000024, 0x00050051, 0x00000015, + 0x00000066, 0x00000065, 0x00000001, 0x0004003d, 0x0000001b, 0x00000067, 0x0000000a, 0x00070058, + 0x00000023, 0x00000068, 0x00000067, 0x0000005c, 0x00000002, 0x00000024, 0x00050051, 0x00000015, + 0x00000069, 0x00000068, 0x00000002, 0x0004003d, 0x0000001b, 0x0000006a, 0x0000000a, 0x00070058, + 0x00000023, 0x0000006b, 0x0000006a, 0x00000060, 0x00000002, 0x00000024, 0x00050051, 0x00000015, + 0x0000006c, 0x0000006b, 0x00000003, 0x00070050, 0x00000023, 0x0000006d, 0x00000063, 0x00000066, + 0x00000069, 0x0000006c, 0x0006000c, 0x00000023, 0x0000006e, 0x00000001, 0x0000001b, 0x0000006d, + 0x00050051, 0x00000015, 0x0000006f, 0x0000006e, 0x00000000, 0x00050051, 0x00000015, 0x00000070, + 0x0000006e, 0x00000001, 0x00050081, 0x00000015, 0x00000071, 0x0000006f, 0x00000070, 0x00050051, + 0x00000015, 0x00000072, 0x0000006e, 0x00000002, 0x00050081, 0x00000015, 0x00000073, 0x00000071, + 0x00000072, 0x00050051, 0x00000015, 0x00000074, 0x0000006e, 0x00000003, 0x00050081, 0x00000015, + 0x00000075, 0x00000073, 0x00000074, 0x00070050, 0x00000023, 0x00000076, 0x00000075, 0x00000075, + 0x00000075, 0x00000075, 0x00050088, 0x00000023, 0x00000077, 0x0000006e, 0x00000076, 0x00050051, + 0x00000015, 0x00000078, 0x00000077, 0x00000000, 0x00050051, 0x00000015, 0x00000079, 0x00000077, + 0x00000001, 0x00050051, 0x00000015, 0x0000007a, 0x00000077, 0x00000002, 0x00050051, 0x00000015, + 0x0000007b, 0x00000077, 0x00000003, 0x0004003d, 0x00000016, 0x0000007c, 0x00000004, 0x0004003d, + 0x0000001b, 0x0000007d, 0x00000005, 0x00070058, 0x00000023, 0x0000007e, 0x0000007d, 0x00000054, + 0x00000002, 0x00000024, 0x0005008e, 0x00000023, 0x0000007f, 0x0000007e, 0x0000004c, 0x0005008e, + 0x00000023, 0x00000080, 0x0000007f, 0x00000078, 0x0004003d, 0x0000001b, 0x00000081, 0x0000000b, + 0x00070058, 0x00000023, 0x00000082, 0x00000081, 0x00000058, 0x00000002, 0x00000024, 0x0005008e, + 0x00000023, 0x00000083, 0x00000082, 0x0000004a, 0x0005008e, 0x00000023, 0x00000084, 0x00000083, + 0x00000079, 0x00050081, 0x00000023, 0x00000085, 0x00000080, 0x00000084, 0x0004003d, 0x0000001b, + 0x00000086, 0x00000005, 0x00070058, 0x00000023, 0x00000087, 0x00000086, 0x0000005c, 0x00000002, + 0x00000024, 0x0005008e, 0x00000023, 0x00000088, 0x00000087, 0x0000004c, 0x0005008e, 0x00000023, + 0x00000089, 0x00000088, 0x0000007a, 0x00050081, 0x00000023, 0x0000008a, 0x00000085, 0x00000089, + 0x0004003d, 0x0000001b, 0x0000008b, 0x0000000b, 0x00070058, 0x00000023, 0x0000008c, 0x0000008b, + 0x00000060, 0x00000002, 0x00000024, 0x0005008e, 0x00000023, 0x0000008d, 0x0000008c, 0x0000004a, + 0x0005008e, 0x00000023, 0x0000008e, 0x0000008d, 0x0000007b, 0x00050081, 0x00000023, 0x0000008f, + 0x0000008a, 0x0000008e, 0x00050085, 0x00000015, 0x00000090, 0x0000004c, 0x00000078, 0x00050085, + 0x00000015, 0x00000091, 0x0000004a, 0x00000079, 0x00050081, 0x00000015, 0x00000092, 0x00000090, + 0x00000091, 0x00050085, 0x00000015, 0x00000093, 0x0000004c, 0x0000007a, 0x00050081, 0x00000015, + 0x00000094, 0x00000092, 0x00000093, 0x00050085, 0x00000015, 0x00000095, 0x0000004a, 0x0000007b, + 0x00050081, 0x00000015, 0x00000096, 0x00000094, 0x00000095, 0x00050081, 0x00000015, 0x00000097, + 0x00000096, 0x0000002a, 0x00070050, 0x00000023, 0x00000098, 0x00000097, 0x00000097, 0x00000097, + 0x00000097, 0x00050088, 0x00000023, 0x00000099, 0x0000008f, 0x00000098, 0x00040063, 0x0000007c, + 0x00000031, 0x00000099, 0x000200f9, 0x0000002d, 0x000200f8, 0x0000002d, 0x000100fd, 0x00010038, +}; +static const size_t wnfg_04_spv_size = sizeof(wnfg_04_spv); diff --git a/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_13_spv.h b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_13_spv.h new file mode 100644 index 000000000..24e8381ec --- /dev/null +++ b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_13_spv.h @@ -0,0 +1,371 @@ +#pragma once +#include +#include + +static const uint32_t wnfg_13_spv[] = { + 0x07230203, 0x00010000, 0x000d000b, 0x00000225, 0x00000000, 0x00020011, 0x00000001, 0x00020011, + 0x00000009, 0x00020011, 0x00000031, 0x00020011, 0x00000032, 0x0006000b, 0x00000001, 0x4c534c47, + 0x6474732e, 0x3035342e, 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0007000f, 0x00000005, + 0x00000002, 0x6e69616d, 0x00000000, 0x00000003, 0x00000004, 0x00060010, 0x00000002, 0x00000011, + 0x00000010, 0x00000010, 0x00000001, 0x00040047, 0x00000005, 0x00000021, 0x00000020, 0x00040047, + 0x00000005, 0x00000022, 0x00000000, 0x00040047, 0x00000006, 0x00000021, 0x00000021, 0x00040047, + 0x00000006, 0x00000022, 0x00000000, 0x00030047, 0x00000007, 0x00000002, 0x00050048, 0x00000007, + 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x00000007, 0x00000001, 0x00000023, 0x00000004, + 0x00050048, 0x00000007, 0x00000002, 0x00000023, 0x00000008, 0x00040047, 0x00000008, 0x00000021, + 0x00000000, 0x00040047, 0x00000008, 0x00000022, 0x00000000, 0x00040047, 0x00000003, 0x0000000b, + 0x0000001a, 0x00040047, 0x00000004, 0x0000000b, 0x0000001b, 0x00030047, 0x00000009, 0x00000019, + 0x00040047, 0x00000009, 0x00000021, 0x00000030, 0x00040047, 0x00000009, 0x00000022, 0x00000000, + 0x00030047, 0x0000000a, 0x00000019, 0x00040047, 0x0000000a, 0x00000021, 0x00000031, 0x00040047, + 0x0000000a, 0x00000022, 0x00000000, 0x00030047, 0x0000000b, 0x00000019, 0x00040047, 0x0000000b, + 0x00000021, 0x00000032, 0x00040047, 0x0000000b, 0x00000022, 0x00000000, 0x00030047, 0x0000000c, + 0x00000019, 0x00040047, 0x0000000c, 0x00000021, 0x00000033, 0x00040047, 0x0000000c, 0x00000022, + 0x00000000, 0x00030047, 0x0000000d, 0x00000019, 0x00040047, 0x0000000d, 0x00000021, 0x00000034, + 0x00040047, 0x0000000d, 0x00000022, 0x00000000, 0x00030047, 0x0000000e, 0x00000019, 0x00040047, + 0x0000000e, 0x00000021, 0x00000035, 0x00040047, 0x0000000e, 0x00000022, 0x00000000, 0x00040047, + 0x0000000f, 0x0000000b, 0x00000019, 0x00020013, 0x00000010, 0x00030021, 0x00000011, 0x00000010, + 0x00040015, 0x00000012, 0x00000020, 0x00000000, 0x00040017, 0x00000013, 0x00000012, 0x00000002, + 0x00030016, 0x00000014, 0x00000020, 0x00040017, 0x00000015, 0x00000014, 0x00000002, 0x00030016, + 0x00000016, 0x00000010, 0x00040015, 0x00000017, 0x00000020, 0x00000001, 0x00040017, 0x00000018, + 0x00000017, 0x00000002, 0x0004002b, 0x00000017, 0x00000019, 0x00000001, 0x0005002c, 0x00000018, + 0x0000001a, 0x00000019, 0x00000019, 0x00090019, 0x0000001b, 0x00000014, 0x00000001, 0x00000000, + 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x0003001b, 0x0000001c, 0x0000001b, 0x00040020, + 0x0000001d, 0x00000000, 0x0000001c, 0x0004003b, 0x0000001d, 0x00000005, 0x00000000, 0x0004002b, + 0x00000017, 0x0000001e, 0xffffffff, 0x0005002c, 0x00000018, 0x0000001f, 0x0000001e, 0x0000001e, + 0x0004002b, 0x00000017, 0x00000020, 0x00000000, 0x0005002c, 0x00000018, 0x00000021, 0x00000020, + 0x00000020, 0x00040017, 0x00000022, 0x00000014, 0x00000004, 0x00040017, 0x00000023, 0x00000016, + 0x00000004, 0x0004002b, 0x00000016, 0x00000024, 0x000039ad, 0x0004002b, 0x00000016, 0x00000025, + 0x0000b695, 0x0004002b, 0x00000016, 0x00000026, 0x0000b6f7, 0x0004002b, 0x00000016, 0x00000027, + 0x0000b8a8, 0x0007002c, 0x00000023, 0x00000028, 0x00000024, 0x00000025, 0x00000026, 0x00000027, + 0x0005002c, 0x00000018, 0x00000029, 0x0000001e, 0x00000020, 0x0004002b, 0x00000016, 0x0000002a, + 0x00003965, 0x0004002b, 0x00000016, 0x0000002b, 0x0000b4c2, 0x0004002b, 0x00000016, 0x0000002c, + 0x0000b7c5, 0x0004002b, 0x00000016, 0x0000002d, 0x0000bc55, 0x0007002c, 0x00000023, 0x0000002e, + 0x0000002a, 0x0000002b, 0x0000002c, 0x0000002d, 0x0005002c, 0x00000018, 0x0000002f, 0x0000001e, + 0x00000019, 0x0004002b, 0x00000016, 0x00000030, 0x00003c71, 0x0004002b, 0x00000016, 0x00000031, + 0x0000b8b0, 0x0004002b, 0x00000016, 0x00000032, 0x0000b918, 0x0004002b, 0x00000016, 0x00000033, + 0x0000bcac, 0x0007002c, 0x00000023, 0x00000034, 0x00000030, 0x00000031, 0x00000032, 0x00000033, + 0x0005002c, 0x00000018, 0x00000035, 0x00000020, 0x0000001e, 0x0004002b, 0x00000016, 0x00000036, + 0x0000350e, 0x0004002b, 0x00000016, 0x00000037, 0x0000b748, 0x0004002b, 0x00000016, 0x00000038, + 0x0000b899, 0x0004002b, 0x00000016, 0x00000039, 0x0000bb11, 0x0007002c, 0x00000023, 0x0000003a, + 0x00000036, 0x00000037, 0x00000038, 0x00000039, 0x0004002b, 0x00000016, 0x0000003b, 0x0000336e, + 0x0004002b, 0x00000016, 0x0000003c, 0x0000b2fa, 0x0004002b, 0x00000016, 0x0000003d, 0x0000b81f, + 0x0004002b, 0x00000016, 0x0000003e, 0x0000be73, 0x0007002c, 0x00000023, 0x0000003f, 0x0000003b, + 0x0000003c, 0x0000003d, 0x0000003e, 0x0005002c, 0x00000018, 0x00000040, 0x00000020, 0x00000019, + 0x0004002b, 0x00000016, 0x00000041, 0x000039ab, 0x0004002b, 0x00000016, 0x00000042, 0x0000b806, + 0x0004002b, 0x00000016, 0x00000043, 0x0000b910, 0x0004002b, 0x00000016, 0x00000044, 0x0000be84, + 0x0007002c, 0x00000023, 0x00000045, 0x00000041, 0x00000042, 0x00000043, 0x00000044, 0x0005002c, + 0x00000018, 0x00000046, 0x00000019, 0x0000001e, 0x0004002b, 0x00000016, 0x00000047, 0x00003ace, + 0x0004002b, 0x00000016, 0x00000048, 0x0000b984, 0x0004002b, 0x00000016, 0x00000049, 0x0000bb07, + 0x0004002b, 0x00000016, 0x0000004a, 0x0000b9c1, 0x0007002c, 0x00000023, 0x0000004b, 0x00000047, + 0x00000048, 0x00000049, 0x0000004a, 0x0005002c, 0x00000018, 0x0000004c, 0x00000019, 0x00000020, + 0x0004002b, 0x00000016, 0x0000004d, 0x000039b4, 0x0004002b, 0x00000016, 0x0000004e, 0x0000b843, + 0x0004002b, 0x00000016, 0x0000004f, 0x0000ba1f, 0x0004002b, 0x00000016, 0x00000050, 0x0000bb31, + 0x0007002c, 0x00000023, 0x00000051, 0x0000004d, 0x0000004e, 0x0000004f, 0x00000050, 0x0004002b, + 0x00000016, 0x00000052, 0x00003c74, 0x0004002b, 0x00000016, 0x00000053, 0x0000ba1c, 0x0004002b, + 0x00000016, 0x00000054, 0x0000bb03, 0x0004002b, 0x00000016, 0x00000055, 0x0000bcaa, 0x0007002c, + 0x00000023, 0x00000056, 0x00000052, 0x00000053, 0x00000054, 0x00000055, 0x0004003b, 0x0000001d, + 0x00000006, 0x00000000, 0x0004002b, 0x00000016, 0x00000057, 0x0000ba22, 0x0004002b, 0x00000016, + 0x00000058, 0x00003aea, 0x0004002b, 0x00000016, 0x00000059, 0x0000b853, 0x0004002b, 0x00000016, + 0x0000005a, 0x00003828, 0x0007002c, 0x00000023, 0x0000005b, 0x00000057, 0x00000058, 0x00000059, + 0x0000005a, 0x0004002b, 0x00000016, 0x0000005c, 0x0000bbb1, 0x0004002b, 0x00000016, 0x0000005d, + 0x000039df, 0x0004002b, 0x00000016, 0x0000005e, 0x0000b8f6, 0x0004002b, 0x00000016, 0x0000005f, + 0x000038c0, 0x0007002c, 0x00000023, 0x00000060, 0x0000005c, 0x0000005d, 0x0000005e, 0x0000005f, + 0x0004002b, 0x00000016, 0x00000061, 0x0000bd85, 0x0004002b, 0x00000016, 0x00000062, 0x00003b69, + 0x0004002b, 0x00000016, 0x00000063, 0x0000ba37, 0x0004002b, 0x00000016, 0x00000064, 0x000039d5, + 0x0007002c, 0x00000023, 0x00000065, 0x00000061, 0x00000062, 0x00000063, 0x00000064, 0x0004002b, + 0x00000016, 0x00000066, 0x0000b8e7, 0x0004002b, 0x00000016, 0x00000067, 0x00003952, 0x0004002b, + 0x00000016, 0x00000068, 0x0000b8de, 0x0004002b, 0x00000016, 0x00000069, 0x00003913, 0x0007002c, + 0x00000023, 0x0000006a, 0x00000066, 0x00000067, 0x00000068, 0x00000069, 0x0004002b, 0x00000016, + 0x0000006b, 0x0000b7a8, 0x0004002b, 0x00000016, 0x0000006c, 0x000036eb, 0x0004002b, 0x00000016, + 0x0000006d, 0x0000b919, 0x0004002b, 0x00000016, 0x0000006e, 0x000038ea, 0x0007002c, 0x00000023, + 0x0000006f, 0x0000006b, 0x0000006c, 0x0000006d, 0x0000006e, 0x0004002b, 0x00000016, 0x00000070, + 0x0000ba7c, 0x0004002b, 0x00000016, 0x00000071, 0x000038b8, 0x0004002b, 0x00000016, 0x00000072, + 0x0000b99f, 0x0004002b, 0x00000016, 0x00000073, 0x000039f8, 0x0007002c, 0x00000023, 0x00000074, + 0x00000070, 0x00000071, 0x00000072, 0x00000073, 0x0004002b, 0x00000016, 0x00000075, 0x0000bca0, + 0x0004002b, 0x00000016, 0x00000076, 0x00003aa3, 0x0004002b, 0x00000016, 0x00000077, 0x0000ba8e, + 0x0004002b, 0x00000016, 0x00000078, 0x00003ab6, 0x0007002c, 0x00000023, 0x00000079, 0x00000075, + 0x00000076, 0x00000077, 0x00000078, 0x0004002b, 0x00000016, 0x0000007a, 0x0000bbcf, 0x0004002b, + 0x00000016, 0x0000007b, 0x00003963, 0x0004002b, 0x00000016, 0x0000007c, 0x0000ba64, 0x0004002b, + 0x00000016, 0x0000007d, 0x00003a9e, 0x0007002c, 0x00000023, 0x0000007e, 0x0000007a, 0x0000007b, + 0x0000007c, 0x0000007d, 0x0004002b, 0x00000016, 0x0000007f, 0x0000bd30, 0x0004002b, 0x00000016, + 0x00000080, 0x0000ba5a, 0x0004002b, 0x00000016, 0x00000081, 0x00003bb8, 0x0007002c, 0x00000023, + 0x00000082, 0x0000007f, 0x00000078, 0x00000080, 0x00000081, 0x0004002b, 0x00000016, 0x00000083, + 0x00003c00, 0x0004002b, 0x00000016, 0x00000084, 0x000032e4, 0x0005001e, 0x00000007, 0x00000014, + 0x00000014, 0x00000014, 0x00040020, 0x00000085, 0x00000002, 0x00000007, 0x0004003b, 0x00000085, + 0x00000008, 0x00000002, 0x0004002b, 0x00000017, 0x00000086, 0x00000002, 0x00040020, 0x00000087, + 0x00000002, 0x00000014, 0x0004002b, 0x00000012, 0x00000088, 0x00000000, 0x0004002b, 0x00000012, + 0x00000089, 0x00000001, 0x00040017, 0x0000008a, 0x00000012, 0x00000003, 0x00040020, 0x0000008b, + 0x00000001, 0x0000008a, 0x0004003b, 0x0000008b, 0x00000003, 0x00000001, 0x0004002b, 0x00000012, + 0x0000008c, 0x00000020, 0x0005002c, 0x00000013, 0x0000008d, 0x0000008c, 0x0000008c, 0x0004003b, + 0x0000008b, 0x00000004, 0x00000001, 0x0004002b, 0x00000012, 0x0000008e, 0x00000002, 0x0005002c, + 0x00000013, 0x0000008f, 0x0000008e, 0x0000008e, 0x00020014, 0x00000090, 0x00090019, 0x00000091, + 0x00000014, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x0000000f, 0x00040020, + 0x00000092, 0x00000000, 0x00000091, 0x0004003b, 0x00000092, 0x00000009, 0x00000000, 0x00040017, + 0x00000093, 0x00000090, 0x00000002, 0x0004001c, 0x00000094, 0x00000016, 0x0000008c, 0x0004001c, + 0x00000095, 0x00000094, 0x0000008c, 0x00040020, 0x00000096, 0x00000004, 0x00000095, 0x0004003b, + 0x00000096, 0x00000097, 0x00000004, 0x00040020, 0x00000098, 0x00000004, 0x00000016, 0x0004002b, + 0x00000012, 0x00000099, 0x00000108, 0x00040020, 0x0000009a, 0x00000001, 0x00000012, 0x0004002b, + 0x00000012, 0x0000009b, 0x00000010, 0x0004002b, 0x00000016, 0x0000009c, 0x00003400, 0x0005002c, + 0x00000013, 0x0000009d, 0x0000009b, 0x0000009b, 0x0004003b, 0x00000092, 0x0000000a, 0x00000000, + 0x0004002b, 0x00000012, 0x0000009e, 0x00000008, 0x0005002c, 0x00000013, 0x0000009f, 0x0000009e, + 0x0000009e, 0x0004003b, 0x00000092, 0x0000000b, 0x00000000, 0x0004002b, 0x00000012, 0x000000a0, + 0x00000004, 0x0005002c, 0x00000013, 0x000000a1, 0x000000a0, 0x000000a0, 0x0004003b, 0x00000092, + 0x0000000c, 0x00000000, 0x0004003b, 0x00000092, 0x0000000d, 0x00000000, 0x0004003b, 0x00000092, + 0x0000000e, 0x00000000, 0x0006002c, 0x0000008a, 0x0000000f, 0x0000009b, 0x0000009b, 0x00000089, + 0x00030029, 0x00000090, 0x000000a2, 0x0005002c, 0x00000013, 0x000000a3, 0x00000088, 0x00000088, + 0x00050036, 0x00000010, 0x00000002, 0x00000000, 0x00000011, 0x000200f8, 0x000000a4, 0x0004003d, + 0x0000001c, 0x000000a5, 0x00000005, 0x00040064, 0x0000001b, 0x000000a6, 0x000000a5, 0x00050067, + 0x00000018, 0x000000a7, 0x000000a6, 0x00000020, 0x0004007c, 0x00000013, 0x000000a8, 0x000000a7, + 0x00050051, 0x00000012, 0x000000a9, 0x000000a8, 0x00000000, 0x00040070, 0x00000014, 0x000000aa, + 0x000000a9, 0x00050051, 0x00000012, 0x000000ab, 0x000000a8, 0x00000001, 0x00040070, 0x00000014, + 0x000000ac, 0x000000ab, 0x00050050, 0x00000015, 0x000000ad, 0x000000aa, 0x000000ac, 0x0004003d, + 0x0000008a, 0x000000ae, 0x00000003, 0x0007004f, 0x00000013, 0x000000af, 0x000000ae, 0x000000ae, + 0x00000000, 0x00000001, 0x00050084, 0x00000013, 0x000000b0, 0x000000af, 0x0000008d, 0x0004003d, + 0x0000008a, 0x000000b1, 0x00000004, 0x0007004f, 0x00000013, 0x000000b2, 0x000000b1, 0x000000b1, + 0x00000000, 0x00000001, 0x00050084, 0x00000013, 0x000000b3, 0x000000b2, 0x0000008f, 0x000200f9, + 0x000000b4, 0x000200f8, 0x000000b4, 0x000700f5, 0x00000012, 0x000000b5, 0x00000088, 0x000000a4, + 0x000000b6, 0x000000b7, 0x000500b0, 0x00000090, 0x000000b8, 0x000000b5, 0x0000008e, 0x000400f6, + 0x000000b9, 0x000000b7, 0x00000000, 0x000400fa, 0x000000b8, 0x000000ba, 0x000000b9, 0x000200f8, + 0x000000ba, 0x000200f9, 0x000000bb, 0x000200f8, 0x000000bb, 0x000700f5, 0x00000012, 0x000000bc, + 0x00000088, 0x000000ba, 0x000000bd, 0x000000be, 0x000500b0, 0x00000090, 0x000000bf, 0x000000bc, + 0x0000008e, 0x000400f6, 0x000000c0, 0x000000be, 0x00000000, 0x000400fa, 0x000000bf, 0x000000c1, + 0x000000c0, 0x000200f8, 0x000000c1, 0x00050080, 0x00000013, 0x000000c2, 0x000000b0, 0x000000b3, + 0x00050050, 0x00000013, 0x000000c3, 0x000000bc, 0x000000b5, 0x00050080, 0x00000013, 0x000000c4, + 0x000000c2, 0x000000c3, 0x0004007c, 0x00000018, 0x000000c5, 0x000000c4, 0x0004006e, 0x00000018, + 0x000000c6, 0x000000ad, 0x00050082, 0x00000018, 0x000000c7, 0x000000c6, 0x0000001a, 0x0004003d, + 0x0000001c, 0x000000c8, 0x00000005, 0x00050080, 0x00000018, 0x000000c9, 0x000000c5, 0x0000001f, + 0x0008000c, 0x00000018, 0x000000ca, 0x00000001, 0x0000002d, 0x000000c9, 0x00000021, 0x000000c7, + 0x00040064, 0x0000001b, 0x000000cb, 0x000000c8, 0x0007005f, 0x00000022, 0x000000cc, 0x000000cb, + 0x000000ca, 0x00000002, 0x00000020, 0x00040073, 0x00000023, 0x000000cd, 0x000000cc, 0x00050094, + 0x00000016, 0x000000ce, 0x000000cd, 0x00000028, 0x0004003d, 0x0000001c, 0x000000cf, 0x00000005, + 0x00050080, 0x00000018, 0x000000d0, 0x000000c5, 0x00000029, 0x0008000c, 0x00000018, 0x000000d1, + 0x00000001, 0x0000002d, 0x000000d0, 0x00000021, 0x000000c7, 0x00040064, 0x0000001b, 0x000000d2, + 0x000000cf, 0x0007005f, 0x00000022, 0x000000d3, 0x000000d2, 0x000000d1, 0x00000002, 0x00000020, + 0x00040073, 0x00000023, 0x000000d4, 0x000000d3, 0x00050094, 0x00000016, 0x000000d5, 0x000000d4, + 0x0000002e, 0x00050081, 0x00000016, 0x000000d6, 0x000000ce, 0x000000d5, 0x0004003d, 0x0000001c, + 0x000000d7, 0x00000005, 0x00050080, 0x00000018, 0x000000d8, 0x000000c5, 0x0000002f, 0x0008000c, + 0x00000018, 0x000000d9, 0x00000001, 0x0000002d, 0x000000d8, 0x00000021, 0x000000c7, 0x00040064, + 0x0000001b, 0x000000da, 0x000000d7, 0x0007005f, 0x00000022, 0x000000db, 0x000000da, 0x000000d9, + 0x00000002, 0x00000020, 0x00040073, 0x00000023, 0x000000dc, 0x000000db, 0x00050094, 0x00000016, + 0x000000dd, 0x000000dc, 0x00000034, 0x00050081, 0x00000016, 0x000000de, 0x000000d6, 0x000000dd, + 0x0004003d, 0x0000001c, 0x000000df, 0x00000005, 0x00050080, 0x00000018, 0x000000e0, 0x000000c5, + 0x00000035, 0x0008000c, 0x00000018, 0x000000e1, 0x00000001, 0x0000002d, 0x000000e0, 0x00000021, + 0x000000c7, 0x00040064, 0x0000001b, 0x000000e2, 0x000000df, 0x0007005f, 0x00000022, 0x000000e3, + 0x000000e2, 0x000000e1, 0x00000002, 0x00000020, 0x00040073, 0x00000023, 0x000000e4, 0x000000e3, + 0x00050094, 0x00000016, 0x000000e5, 0x000000e4, 0x0000003a, 0x00050081, 0x00000016, 0x000000e6, + 0x000000de, 0x000000e5, 0x0004003d, 0x0000001c, 0x000000e7, 0x00000005, 0x0008000c, 0x00000018, + 0x000000e8, 0x00000001, 0x0000002d, 0x000000c5, 0x00000021, 0x000000c7, 0x00040064, 0x0000001b, + 0x000000e9, 0x000000e7, 0x0007005f, 0x00000022, 0x000000ea, 0x000000e9, 0x000000e8, 0x00000002, + 0x00000020, 0x00040073, 0x00000023, 0x000000eb, 0x000000ea, 0x00050094, 0x00000016, 0x000000ec, + 0x000000eb, 0x0000003f, 0x00050081, 0x00000016, 0x000000ed, 0x000000e6, 0x000000ec, 0x0004003d, + 0x0000001c, 0x000000ee, 0x00000005, 0x00050080, 0x00000018, 0x000000ef, 0x000000c5, 0x00000040, + 0x0008000c, 0x00000018, 0x000000f0, 0x00000001, 0x0000002d, 0x000000ef, 0x00000021, 0x000000c7, + 0x00040064, 0x0000001b, 0x000000f1, 0x000000ee, 0x0007005f, 0x00000022, 0x000000f2, 0x000000f1, + 0x000000f0, 0x00000002, 0x00000020, 0x00040073, 0x00000023, 0x000000f3, 0x000000f2, 0x00050094, + 0x00000016, 0x000000f4, 0x000000f3, 0x00000045, 0x00050081, 0x00000016, 0x000000f5, 0x000000ed, + 0x000000f4, 0x0004003d, 0x0000001c, 0x000000f6, 0x00000005, 0x00050080, 0x00000018, 0x000000f7, + 0x000000c5, 0x00000046, 0x0008000c, 0x00000018, 0x000000f8, 0x00000001, 0x0000002d, 0x000000f7, + 0x00000021, 0x000000c7, 0x00040064, 0x0000001b, 0x000000f9, 0x000000f6, 0x0007005f, 0x00000022, + 0x000000fa, 0x000000f9, 0x000000f8, 0x00000002, 0x00000020, 0x00040073, 0x00000023, 0x000000fb, + 0x000000fa, 0x00050094, 0x00000016, 0x000000fc, 0x000000fb, 0x0000004b, 0x00050081, 0x00000016, + 0x000000fd, 0x000000f5, 0x000000fc, 0x0004003d, 0x0000001c, 0x000000fe, 0x00000005, 0x00050080, + 0x00000018, 0x000000ff, 0x000000c5, 0x0000004c, 0x0008000c, 0x00000018, 0x00000100, 0x00000001, + 0x0000002d, 0x000000ff, 0x00000021, 0x000000c7, 0x00040064, 0x0000001b, 0x00000101, 0x000000fe, + 0x0007005f, 0x00000022, 0x00000102, 0x00000101, 0x00000100, 0x00000002, 0x00000020, 0x00040073, + 0x00000023, 0x00000103, 0x00000102, 0x00050094, 0x00000016, 0x00000104, 0x00000103, 0x00000051, + 0x00050081, 0x00000016, 0x00000105, 0x000000fd, 0x00000104, 0x0004003d, 0x0000001c, 0x00000106, + 0x00000005, 0x00050080, 0x00000018, 0x00000107, 0x000000c5, 0x0000001a, 0x0008000c, 0x00000018, + 0x00000108, 0x00000001, 0x0000002d, 0x00000107, 0x00000021, 0x000000c7, 0x00040064, 0x0000001b, + 0x00000109, 0x00000106, 0x0007005f, 0x00000022, 0x0000010a, 0x00000109, 0x00000108, 0x00000002, + 0x00000020, 0x00040073, 0x00000023, 0x0000010b, 0x0000010a, 0x00050094, 0x00000016, 0x0000010c, + 0x0000010b, 0x00000056, 0x00050081, 0x00000016, 0x0000010d, 0x00000105, 0x0000010c, 0x0004003d, + 0x0000001c, 0x0000010e, 0x00000006, 0x00040064, 0x0000001b, 0x0000010f, 0x0000010e, 0x0007005f, + 0x00000022, 0x00000110, 0x0000010f, 0x000000ca, 0x00000002, 0x00000020, 0x00040073, 0x00000023, + 0x00000111, 0x00000110, 0x00050094, 0x00000016, 0x00000112, 0x00000111, 0x0000005b, 0x00050081, + 0x00000016, 0x00000113, 0x0000010d, 0x00000112, 0x0004003d, 0x0000001c, 0x00000114, 0x00000006, + 0x00040064, 0x0000001b, 0x00000115, 0x00000114, 0x0007005f, 0x00000022, 0x00000116, 0x00000115, + 0x000000d1, 0x00000002, 0x00000020, 0x00040073, 0x00000023, 0x00000117, 0x00000116, 0x00050094, + 0x00000016, 0x00000118, 0x00000117, 0x00000060, 0x00050081, 0x00000016, 0x00000119, 0x00000113, + 0x00000118, 0x0004003d, 0x0000001c, 0x0000011a, 0x00000006, 0x00040064, 0x0000001b, 0x0000011b, + 0x0000011a, 0x0007005f, 0x00000022, 0x0000011c, 0x0000011b, 0x000000d9, 0x00000002, 0x00000020, + 0x00040073, 0x00000023, 0x0000011d, 0x0000011c, 0x00050094, 0x00000016, 0x0000011e, 0x0000011d, + 0x00000065, 0x00050081, 0x00000016, 0x0000011f, 0x00000119, 0x0000011e, 0x0004003d, 0x0000001c, + 0x00000120, 0x00000006, 0x00040064, 0x0000001b, 0x00000121, 0x00000120, 0x0007005f, 0x00000022, + 0x00000122, 0x00000121, 0x000000e1, 0x00000002, 0x00000020, 0x00040073, 0x00000023, 0x00000123, + 0x00000122, 0x00050094, 0x00000016, 0x00000124, 0x00000123, 0x0000006a, 0x00050081, 0x00000016, + 0x00000125, 0x0000011f, 0x00000124, 0x0004003d, 0x0000001c, 0x00000126, 0x00000006, 0x00040064, + 0x0000001b, 0x00000127, 0x00000126, 0x0007005f, 0x00000022, 0x00000128, 0x00000127, 0x000000e8, + 0x00000002, 0x00000020, 0x00040073, 0x00000023, 0x00000129, 0x00000128, 0x00050094, 0x00000016, + 0x0000012a, 0x00000129, 0x0000006f, 0x00050081, 0x00000016, 0x0000012b, 0x00000125, 0x0000012a, + 0x0004003d, 0x0000001c, 0x0000012c, 0x00000006, 0x00040064, 0x0000001b, 0x0000012d, 0x0000012c, + 0x0007005f, 0x00000022, 0x0000012e, 0x0000012d, 0x000000f0, 0x00000002, 0x00000020, 0x00040073, + 0x00000023, 0x0000012f, 0x0000012e, 0x00050094, 0x00000016, 0x00000130, 0x0000012f, 0x00000074, + 0x00050081, 0x00000016, 0x00000131, 0x0000012b, 0x00000130, 0x0004003d, 0x0000001c, 0x00000132, + 0x00000006, 0x00040064, 0x0000001b, 0x00000133, 0x00000132, 0x0007005f, 0x00000022, 0x00000134, + 0x00000133, 0x000000f8, 0x00000002, 0x00000020, 0x00040073, 0x00000023, 0x00000135, 0x00000134, + 0x00050094, 0x00000016, 0x00000136, 0x00000135, 0x00000079, 0x00050081, 0x00000016, 0x00000137, + 0x00000131, 0x00000136, 0x0004003d, 0x0000001c, 0x00000138, 0x00000006, 0x00040064, 0x0000001b, + 0x00000139, 0x00000138, 0x0007005f, 0x00000022, 0x0000013a, 0x00000139, 0x00000100, 0x00000002, + 0x00000020, 0x00040073, 0x00000023, 0x0000013b, 0x0000013a, 0x00050094, 0x00000016, 0x0000013c, + 0x0000013b, 0x0000007e, 0x00050081, 0x00000016, 0x0000013d, 0x00000137, 0x0000013c, 0x0004003d, + 0x0000001c, 0x0000013e, 0x00000006, 0x00040064, 0x0000001b, 0x0000013f, 0x0000013e, 0x0007005f, + 0x00000022, 0x00000140, 0x0000013f, 0x00000108, 0x00000002, 0x00000020, 0x00040073, 0x00000023, + 0x00000141, 0x00000140, 0x00050094, 0x00000016, 0x00000142, 0x00000141, 0x00000082, 0x00050081, + 0x00000016, 0x00000143, 0x0000013d, 0x00000142, 0x00050081, 0x00000016, 0x00000144, 0x00000143, + 0x00000084, 0x0004007f, 0x00000016, 0x00000145, 0x00000144, 0x0006000c, 0x00000016, 0x00000146, + 0x00000001, 0x0000001b, 0x00000145, 0x00050081, 0x00000016, 0x00000147, 0x00000083, 0x00000146, + 0x00050088, 0x00000016, 0x00000148, 0x00000083, 0x00000147, 0x00050041, 0x00000087, 0x00000149, + 0x00000008, 0x00000086, 0x0004003d, 0x00000014, 0x0000014a, 0x00000149, 0x00040073, 0x00000016, + 0x0000014b, 0x0000014a, 0x0007000c, 0x00000016, 0x0000014c, 0x00000001, 0x00000030, 0x0000014b, + 0x00000148, 0x00050085, 0x00000016, 0x0000014d, 0x0000014c, 0x00000148, 0x0004003d, 0x00000091, + 0x0000014e, 0x00000009, 0x00040068, 0x00000018, 0x0000014f, 0x0000014e, 0x000500b1, 0x00000093, + 0x00000150, 0x000000c5, 0x0000014f, 0x0004009b, 0x00000090, 0x00000151, 0x00000150, 0x000300f7, + 0x00000152, 0x00000000, 0x000400fa, 0x00000151, 0x00000153, 0x00000152, 0x000200f8, 0x00000153, + 0x0004003d, 0x00000091, 0x00000154, 0x00000009, 0x00040073, 0x00000014, 0x00000155, 0x0000014d, + 0x00070050, 0x00000022, 0x00000156, 0x00000155, 0x00000155, 0x00000155, 0x00000155, 0x00040063, + 0x00000154, 0x000000c5, 0x00000156, 0x000200f9, 0x00000152, 0x000200f8, 0x00000152, 0x00050051, + 0x00000012, 0x00000157, 0x000000b3, 0x00000000, 0x00050080, 0x00000012, 0x00000158, 0x00000157, + 0x000000bc, 0x00050051, 0x00000012, 0x00000159, 0x000000b3, 0x00000001, 0x00050080, 0x00000012, + 0x0000015a, 0x00000159, 0x000000b5, 0x00060041, 0x00000098, 0x0000015b, 0x00000097, 0x00000158, + 0x0000015a, 0x0003003e, 0x0000015b, 0x0000014d, 0x000200f9, 0x000000be, 0x000200f8, 0x000000be, + 0x00050080, 0x00000012, 0x000000bd, 0x000000bc, 0x00000019, 0x000200f9, 0x000000bb, 0x000200f8, + 0x000000c0, 0x000200f9, 0x000000b7, 0x000200f8, 0x000000b7, 0x00050080, 0x00000012, 0x000000b6, + 0x000000b5, 0x00000019, 0x000200f9, 0x000000b4, 0x000200f8, 0x000000b9, 0x000400e0, 0x0000008e, + 0x0000008e, 0x00000099, 0x00050041, 0x0000009a, 0x0000015c, 0x00000004, 0x00000088, 0x0004003d, + 0x00000012, 0x0000015d, 0x0000015c, 0x000500b0, 0x00000090, 0x0000015e, 0x0000015d, 0x0000009b, + 0x000300f7, 0x0000015f, 0x00000000, 0x000400fa, 0x0000015e, 0x00000160, 0x0000015f, 0x000200f8, + 0x00000160, 0x00050041, 0x0000009a, 0x00000161, 0x00000004, 0x00000089, 0x0004003d, 0x00000012, + 0x00000162, 0x00000161, 0x000500b0, 0x00000090, 0x00000163, 0x00000162, 0x0000009b, 0x000200f9, + 0x0000015f, 0x000200f8, 0x0000015f, 0x000700f5, 0x00000090, 0x00000164, 0x0000015e, 0x000000b9, + 0x00000163, 0x00000160, 0x000300f7, 0x00000165, 0x00000000, 0x000400fa, 0x00000164, 0x00000166, + 0x00000165, 0x000200f8, 0x00000166, 0x000500c4, 0x00000012, 0x00000167, 0x0000015d, 0x00000089, + 0x00050041, 0x0000009a, 0x00000168, 0x00000004, 0x00000089, 0x0004003d, 0x00000012, 0x00000169, + 0x00000168, 0x000500c4, 0x00000012, 0x0000016a, 0x00000169, 0x00000089, 0x00060041, 0x00000098, + 0x0000016b, 0x00000097, 0x00000167, 0x0000016a, 0x0004003d, 0x00000016, 0x0000016c, 0x0000016b, + 0x00050080, 0x00000012, 0x0000016d, 0x00000167, 0x00000089, 0x00060041, 0x00000098, 0x0000016e, + 0x00000097, 0x0000016d, 0x0000016a, 0x0004003d, 0x00000016, 0x0000016f, 0x0000016e, 0x00050081, + 0x00000016, 0x00000170, 0x0000016c, 0x0000016f, 0x00050080, 0x00000012, 0x00000171, 0x0000016a, + 0x00000089, 0x00060041, 0x00000098, 0x00000172, 0x00000097, 0x00000167, 0x00000171, 0x0004003d, + 0x00000016, 0x00000173, 0x00000172, 0x00050081, 0x00000016, 0x00000174, 0x00000170, 0x00000173, + 0x00060041, 0x00000098, 0x00000175, 0x00000097, 0x0000016d, 0x00000171, 0x0004003d, 0x00000016, + 0x00000176, 0x00000175, 0x00050081, 0x00000016, 0x00000177, 0x00000174, 0x00000176, 0x00050085, + 0x00000016, 0x00000178, 0x0000009c, 0x00000177, 0x00050084, 0x00000013, 0x00000179, 0x000000af, + 0x0000009d, 0x00050080, 0x00000013, 0x0000017a, 0x00000179, 0x000000b2, 0x0004007c, 0x00000018, + 0x0000017b, 0x0000017a, 0x0004003d, 0x00000091, 0x0000017c, 0x0000000a, 0x00040068, 0x00000018, + 0x0000017d, 0x0000017c, 0x000500b1, 0x00000093, 0x0000017e, 0x0000017b, 0x0000017d, 0x0004009b, + 0x00000090, 0x0000017f, 0x0000017e, 0x000300f7, 0x00000180, 0x00000000, 0x000400fa, 0x0000017f, + 0x00000181, 0x00000180, 0x000200f8, 0x00000181, 0x0004003d, 0x00000091, 0x00000182, 0x0000000a, + 0x00040073, 0x00000014, 0x00000183, 0x00000178, 0x00070050, 0x00000022, 0x00000184, 0x00000183, + 0x00000183, 0x00000183, 0x00000183, 0x00040063, 0x00000182, 0x0000017b, 0x00000184, 0x000200f9, + 0x00000180, 0x000200f8, 0x00000180, 0x00060041, 0x00000098, 0x00000185, 0x00000097, 0x0000015d, + 0x00000169, 0x0003003e, 0x00000185, 0x00000178, 0x000200f9, 0x00000165, 0x000200f8, 0x00000165, + 0x000400e0, 0x0000008e, 0x0000008e, 0x00000099, 0x000500b0, 0x00000090, 0x00000186, 0x0000015d, + 0x0000009e, 0x000300f7, 0x00000187, 0x00000000, 0x000400fa, 0x00000186, 0x00000188, 0x00000187, + 0x000200f8, 0x00000188, 0x00050041, 0x0000009a, 0x00000189, 0x00000004, 0x00000089, 0x0004003d, + 0x00000012, 0x0000018a, 0x00000189, 0x000500b0, 0x00000090, 0x0000018b, 0x0000018a, 0x0000009e, + 0x000200f9, 0x00000187, 0x000200f8, 0x00000187, 0x000700f5, 0x00000090, 0x0000018c, 0x00000186, + 0x00000165, 0x0000018b, 0x00000188, 0x000300f7, 0x0000018d, 0x00000000, 0x000400fa, 0x0000018c, + 0x0000018e, 0x0000018d, 0x000200f8, 0x0000018e, 0x000500c4, 0x00000012, 0x0000018f, 0x0000015d, + 0x00000089, 0x00050041, 0x0000009a, 0x00000190, 0x00000004, 0x00000089, 0x0004003d, 0x00000012, + 0x00000191, 0x00000190, 0x000500c4, 0x00000012, 0x00000192, 0x00000191, 0x00000089, 0x00060041, + 0x00000098, 0x00000193, 0x00000097, 0x0000018f, 0x00000192, 0x0004003d, 0x00000016, 0x00000194, + 0x00000193, 0x00050080, 0x00000012, 0x00000195, 0x0000018f, 0x00000089, 0x00060041, 0x00000098, + 0x00000196, 0x00000097, 0x00000195, 0x00000192, 0x0004003d, 0x00000016, 0x00000197, 0x00000196, + 0x00050081, 0x00000016, 0x00000198, 0x00000194, 0x00000197, 0x00050080, 0x00000012, 0x00000199, + 0x00000192, 0x00000089, 0x00060041, 0x00000098, 0x0000019a, 0x00000097, 0x0000018f, 0x00000199, + 0x0004003d, 0x00000016, 0x0000019b, 0x0000019a, 0x00050081, 0x00000016, 0x0000019c, 0x00000198, + 0x0000019b, 0x00060041, 0x00000098, 0x0000019d, 0x00000097, 0x00000195, 0x00000199, 0x0004003d, + 0x00000016, 0x0000019e, 0x0000019d, 0x00050081, 0x00000016, 0x0000019f, 0x0000019c, 0x0000019e, + 0x00050085, 0x00000016, 0x000001a0, 0x0000009c, 0x0000019f, 0x00050084, 0x00000013, 0x000001a1, + 0x000000af, 0x0000009f, 0x00050080, 0x00000013, 0x000001a2, 0x000001a1, 0x000000b2, 0x0004007c, + 0x00000018, 0x000001a3, 0x000001a2, 0x0004003d, 0x00000091, 0x000001a4, 0x0000000b, 0x00040068, + 0x00000018, 0x000001a5, 0x000001a4, 0x000500b1, 0x00000093, 0x000001a6, 0x000001a3, 0x000001a5, + 0x0004009b, 0x00000090, 0x000001a7, 0x000001a6, 0x000300f7, 0x000001a8, 0x00000000, 0x000400fa, + 0x000001a7, 0x000001a9, 0x000001a8, 0x000200f8, 0x000001a9, 0x0004003d, 0x00000091, 0x000001aa, + 0x0000000b, 0x00040073, 0x00000014, 0x000001ab, 0x000001a0, 0x00070050, 0x00000022, 0x000001ac, + 0x000001ab, 0x000001ab, 0x000001ab, 0x000001ab, 0x00040063, 0x000001aa, 0x000001a3, 0x000001ac, + 0x000200f9, 0x000001a8, 0x000200f8, 0x000001a8, 0x00060041, 0x00000098, 0x000001ad, 0x00000097, + 0x0000015d, 0x00000191, 0x0003003e, 0x000001ad, 0x000001a0, 0x000200f9, 0x0000018d, 0x000200f8, + 0x0000018d, 0x000400e0, 0x0000008e, 0x0000008e, 0x00000099, 0x000500b0, 0x00000090, 0x000001ae, + 0x0000015d, 0x000000a0, 0x000300f7, 0x000001af, 0x00000000, 0x000400fa, 0x000001ae, 0x000001b0, + 0x000001af, 0x000200f8, 0x000001b0, 0x00050041, 0x0000009a, 0x000001b1, 0x00000004, 0x00000089, + 0x0004003d, 0x00000012, 0x000001b2, 0x000001b1, 0x000500b0, 0x00000090, 0x000001b3, 0x000001b2, + 0x000000a0, 0x000200f9, 0x000001af, 0x000200f8, 0x000001af, 0x000700f5, 0x00000090, 0x000001b4, + 0x000001ae, 0x0000018d, 0x000001b3, 0x000001b0, 0x000300f7, 0x000001b5, 0x00000000, 0x000400fa, + 0x000001b4, 0x000001b6, 0x000001b5, 0x000200f8, 0x000001b6, 0x000500c4, 0x00000012, 0x000001b7, + 0x0000015d, 0x00000089, 0x00050041, 0x0000009a, 0x000001b8, 0x00000004, 0x00000089, 0x0004003d, + 0x00000012, 0x000001b9, 0x000001b8, 0x000500c4, 0x00000012, 0x000001ba, 0x000001b9, 0x00000089, + 0x00060041, 0x00000098, 0x000001bb, 0x00000097, 0x000001b7, 0x000001ba, 0x0004003d, 0x00000016, + 0x000001bc, 0x000001bb, 0x00050080, 0x00000012, 0x000001bd, 0x000001b7, 0x00000089, 0x00060041, + 0x00000098, 0x000001be, 0x00000097, 0x000001bd, 0x000001ba, 0x0004003d, 0x00000016, 0x000001bf, + 0x000001be, 0x00050081, 0x00000016, 0x000001c0, 0x000001bc, 0x000001bf, 0x00050080, 0x00000012, + 0x000001c1, 0x000001ba, 0x00000089, 0x00060041, 0x00000098, 0x000001c2, 0x00000097, 0x000001b7, + 0x000001c1, 0x0004003d, 0x00000016, 0x000001c3, 0x000001c2, 0x00050081, 0x00000016, 0x000001c4, + 0x000001c0, 0x000001c3, 0x00060041, 0x00000098, 0x000001c5, 0x00000097, 0x000001bd, 0x000001c1, + 0x0004003d, 0x00000016, 0x000001c6, 0x000001c5, 0x00050081, 0x00000016, 0x000001c7, 0x000001c4, + 0x000001c6, 0x00050085, 0x00000016, 0x000001c8, 0x0000009c, 0x000001c7, 0x00050084, 0x00000013, + 0x000001c9, 0x000000af, 0x000000a1, 0x00050080, 0x00000013, 0x000001ca, 0x000001c9, 0x000000b2, + 0x0004007c, 0x00000018, 0x000001cb, 0x000001ca, 0x0004003d, 0x00000091, 0x000001cc, 0x0000000c, + 0x00040068, 0x00000018, 0x000001cd, 0x000001cc, 0x000500b1, 0x00000093, 0x000001ce, 0x000001cb, + 0x000001cd, 0x0004009b, 0x00000090, 0x000001cf, 0x000001ce, 0x000300f7, 0x000001d0, 0x00000000, + 0x000400fa, 0x000001cf, 0x000001d1, 0x000001d0, 0x000200f8, 0x000001d1, 0x0004003d, 0x00000091, + 0x000001d2, 0x0000000c, 0x00040073, 0x00000014, 0x000001d3, 0x000001c8, 0x00070050, 0x00000022, + 0x000001d4, 0x000001d3, 0x000001d3, 0x000001d3, 0x000001d3, 0x00040063, 0x000001d2, 0x000001cb, + 0x000001d4, 0x000200f9, 0x000001d0, 0x000200f8, 0x000001d0, 0x00060041, 0x00000098, 0x000001d5, + 0x00000097, 0x0000015d, 0x000001b9, 0x0003003e, 0x000001d5, 0x000001c8, 0x000200f9, 0x000001b5, + 0x000200f8, 0x000001b5, 0x000400e0, 0x0000008e, 0x0000008e, 0x00000099, 0x000500b0, 0x00000090, + 0x000001d6, 0x0000015d, 0x0000008e, 0x000300f7, 0x000001d7, 0x00000000, 0x000400fa, 0x000001d6, + 0x000001d8, 0x000001d7, 0x000200f8, 0x000001d8, 0x00050041, 0x0000009a, 0x000001d9, 0x00000004, + 0x00000089, 0x0004003d, 0x00000012, 0x000001da, 0x000001d9, 0x000500b0, 0x00000090, 0x000001db, + 0x000001da, 0x0000008e, 0x000200f9, 0x000001d7, 0x000200f8, 0x000001d7, 0x000700f5, 0x00000090, + 0x000001dc, 0x000001d6, 0x000001b5, 0x000001db, 0x000001d8, 0x000300f7, 0x000001dd, 0x00000000, + 0x000400fa, 0x000001dc, 0x000001de, 0x000001dd, 0x000200f8, 0x000001de, 0x000500c4, 0x00000012, + 0x000001df, 0x0000015d, 0x00000089, 0x00050041, 0x0000009a, 0x000001e0, 0x00000004, 0x00000089, + 0x0004003d, 0x00000012, 0x000001e1, 0x000001e0, 0x000500c4, 0x00000012, 0x000001e2, 0x000001e1, + 0x00000089, 0x00060041, 0x00000098, 0x000001e3, 0x00000097, 0x000001df, 0x000001e2, 0x0004003d, + 0x00000016, 0x000001e4, 0x000001e3, 0x00050080, 0x00000012, 0x000001e5, 0x000001df, 0x00000089, + 0x00060041, 0x00000098, 0x000001e6, 0x00000097, 0x000001e5, 0x000001e2, 0x0004003d, 0x00000016, + 0x000001e7, 0x000001e6, 0x00050081, 0x00000016, 0x000001e8, 0x000001e4, 0x000001e7, 0x00050080, + 0x00000012, 0x000001e9, 0x000001e2, 0x00000089, 0x00060041, 0x00000098, 0x000001ea, 0x00000097, + 0x000001df, 0x000001e9, 0x0004003d, 0x00000016, 0x000001eb, 0x000001ea, 0x00050081, 0x00000016, + 0x000001ec, 0x000001e8, 0x000001eb, 0x00060041, 0x00000098, 0x000001ed, 0x00000097, 0x000001e5, + 0x000001e9, 0x0004003d, 0x00000016, 0x000001ee, 0x000001ed, 0x00050081, 0x00000016, 0x000001ef, + 0x000001ec, 0x000001ee, 0x00050085, 0x00000016, 0x000001f0, 0x0000009c, 0x000001ef, 0x00050084, + 0x00000013, 0x000001f1, 0x000000af, 0x0000008f, 0x00050080, 0x00000013, 0x000001f2, 0x000001f1, + 0x000000b2, 0x0004007c, 0x00000018, 0x000001f3, 0x000001f2, 0x0004003d, 0x00000091, 0x000001f4, + 0x0000000d, 0x00040068, 0x00000018, 0x000001f5, 0x000001f4, 0x000500b1, 0x00000093, 0x000001f6, + 0x000001f3, 0x000001f5, 0x0004009b, 0x00000090, 0x000001f7, 0x000001f6, 0x000300f7, 0x000001f8, + 0x00000000, 0x000400fa, 0x000001f7, 0x000001f9, 0x000001f8, 0x000200f8, 0x000001f9, 0x0004003d, + 0x00000091, 0x000001fa, 0x0000000d, 0x00040073, 0x00000014, 0x000001fb, 0x000001f0, 0x00070050, + 0x00000022, 0x000001fc, 0x000001fb, 0x000001fb, 0x000001fb, 0x000001fb, 0x00040063, 0x000001fa, + 0x000001f3, 0x000001fc, 0x000200f9, 0x000001f8, 0x000200f8, 0x000001f8, 0x00060041, 0x00000098, + 0x000001fd, 0x00000097, 0x0000015d, 0x000001e1, 0x0003003e, 0x000001fd, 0x000001f0, 0x000200f9, + 0x000001dd, 0x000200f8, 0x000001dd, 0x000400e0, 0x0000008e, 0x0000008e, 0x00000099, 0x000500b0, + 0x00000090, 0x000001fe, 0x0000015d, 0x00000089, 0x000300f7, 0x000001ff, 0x00000000, 0x000400fa, + 0x000001fe, 0x00000200, 0x000001ff, 0x000200f8, 0x00000200, 0x00050041, 0x0000009a, 0x00000201, + 0x00000004, 0x00000089, 0x0004003d, 0x00000012, 0x00000202, 0x00000201, 0x000500b0, 0x00000090, + 0x00000203, 0x00000202, 0x00000089, 0x000200f9, 0x000001ff, 0x000200f8, 0x000001ff, 0x000700f5, + 0x00000090, 0x00000204, 0x000001fe, 0x000001dd, 0x00000203, 0x00000200, 0x000300f7, 0x00000205, + 0x00000000, 0x000400fa, 0x00000204, 0x00000206, 0x00000205, 0x000200f8, 0x00000206, 0x000500c4, + 0x00000012, 0x00000207, 0x0000015d, 0x00000089, 0x00050041, 0x0000009a, 0x00000208, 0x00000004, + 0x00000089, 0x0004003d, 0x00000012, 0x00000209, 0x00000208, 0x000500c4, 0x00000012, 0x0000020a, + 0x00000209, 0x00000089, 0x00060041, 0x00000098, 0x0000020b, 0x00000097, 0x00000207, 0x0000020a, + 0x0004003d, 0x00000016, 0x0000020c, 0x0000020b, 0x00050080, 0x00000012, 0x0000020d, 0x00000207, + 0x00000089, 0x00060041, 0x00000098, 0x0000020e, 0x00000097, 0x0000020d, 0x0000020a, 0x0004003d, + 0x00000016, 0x0000020f, 0x0000020e, 0x00050081, 0x00000016, 0x00000210, 0x0000020c, 0x0000020f, + 0x00050080, 0x00000012, 0x00000211, 0x0000020a, 0x00000089, 0x00060041, 0x00000098, 0x00000212, + 0x00000097, 0x00000207, 0x00000211, 0x0004003d, 0x00000016, 0x00000213, 0x00000212, 0x00050081, + 0x00000016, 0x00000214, 0x00000210, 0x00000213, 0x00060041, 0x00000098, 0x00000215, 0x00000097, + 0x0000020d, 0x00000211, 0x0004003d, 0x00000016, 0x00000216, 0x00000215, 0x00050081, 0x00000016, + 0x00000217, 0x00000214, 0x00000216, 0x00050085, 0x00000016, 0x00000218, 0x0000009c, 0x00000217, + 0x00050080, 0x00000013, 0x00000219, 0x000000af, 0x000000b2, 0x0004007c, 0x00000018, 0x0000021a, + 0x00000219, 0x0004003d, 0x00000091, 0x0000021b, 0x0000000e, 0x00040068, 0x00000018, 0x0000021c, + 0x0000021b, 0x000500b1, 0x00000093, 0x0000021d, 0x0000021a, 0x0000021c, 0x0004009b, 0x00000090, + 0x0000021e, 0x0000021d, 0x000300f7, 0x0000021f, 0x00000000, 0x000400fa, 0x0000021e, 0x00000220, + 0x0000021f, 0x000200f8, 0x00000220, 0x0004003d, 0x00000091, 0x00000221, 0x0000000e, 0x00040073, + 0x00000014, 0x00000222, 0x00000218, 0x00070050, 0x00000022, 0x00000223, 0x00000222, 0x00000222, + 0x00000222, 0x00000222, 0x00040063, 0x00000221, 0x0000021a, 0x00000223, 0x000200f9, 0x0000021f, + 0x000200f8, 0x0000021f, 0x00060041, 0x00000098, 0x00000224, 0x00000097, 0x0000015d, 0x00000209, + 0x0003003e, 0x00000224, 0x00000218, 0x000200f9, 0x00000205, 0x000200f8, 0x00000205, 0x000100fd, + 0x00010038, +}; +static const size_t wnfg_13_spv_size = sizeof(wnfg_13_spv); diff --git a/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_25_spv.h b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_25_spv.h new file mode 100644 index 000000000..6bb9e8bc8 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_25_spv.h @@ -0,0 +1,493 @@ +#pragma once +#include +#include + +static const uint32_t wnfg_25_spv[] = { + 0x07230203, 0x00010000, 0x000d000b, 0x000002fc, 0x00000000, 0x00020011, 0x00000001, 0x00020011, + 0x00000009, 0x00020011, 0x00000032, 0x0006000b, 0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e, + 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0006000f, 0x00000005, 0x00000002, 0x6e69616d, + 0x00000000, 0x00000003, 0x00060010, 0x00000002, 0x00000011, 0x00000010, 0x00000010, 0x00000001, + 0x00040047, 0x00000003, 0x0000000b, 0x0000001c, 0x00030047, 0x00000004, 0x00000019, 0x00040047, + 0x00000004, 0x00000021, 0x00000030, 0x00040047, 0x00000004, 0x00000022, 0x00000000, 0x00040047, + 0x00000005, 0x00000021, 0x00000020, 0x00040047, 0x00000005, 0x00000022, 0x00000000, 0x00030047, + 0x00000006, 0x00000002, 0x00050048, 0x00000006, 0x00000000, 0x00000023, 0x00000000, 0x00050048, + 0x00000006, 0x00000001, 0x00000023, 0x00000004, 0x00050048, 0x00000006, 0x00000002, 0x00000023, + 0x00000008, 0x00040047, 0x00000007, 0x00000021, 0x00000000, 0x00040047, 0x00000007, 0x00000022, + 0x00000000, 0x00040047, 0x00000008, 0x00000021, 0x00000024, 0x00040047, 0x00000008, 0x00000022, + 0x00000000, 0x00040047, 0x00000009, 0x00000021, 0x00000025, 0x00040047, 0x00000009, 0x00000022, + 0x00000000, 0x00040047, 0x0000000a, 0x00000021, 0x00000021, 0x00040047, 0x0000000a, 0x00000022, + 0x00000000, 0x00040047, 0x0000000b, 0x00000021, 0x00000022, 0x00040047, 0x0000000b, 0x00000022, + 0x00000000, 0x00040047, 0x0000000c, 0x00000021, 0x00000023, 0x00040047, 0x0000000c, 0x00000022, + 0x00000000, 0x00040047, 0x0000000d, 0x0000000b, 0x00000019, 0x00020013, 0x0000000e, 0x00030021, + 0x0000000f, 0x0000000e, 0x00040015, 0x00000010, 0x00000020, 0x00000001, 0x00040017, 0x00000011, + 0x00000010, 0x00000002, 0x00040015, 0x00000012, 0x00000020, 0x00000000, 0x00040017, 0x00000013, + 0x00000012, 0x00000003, 0x00040020, 0x00000014, 0x00000001, 0x00000013, 0x0004003b, 0x00000014, + 0x00000003, 0x00000001, 0x00040017, 0x00000015, 0x00000012, 0x00000002, 0x00030016, 0x00000016, + 0x00000020, 0x00090019, 0x00000017, 0x00000016, 0x00000001, 0x00000000, 0x00000000, 0x00000000, + 0x00000002, 0x00000004, 0x00040020, 0x00000018, 0x00000000, 0x00000017, 0x0004003b, 0x00000018, + 0x00000004, 0x00000000, 0x00020014, 0x00000019, 0x00040017, 0x0000001a, 0x00000019, 0x00000002, + 0x00090019, 0x0000001b, 0x00000016, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, + 0x00000000, 0x0003001b, 0x0000001c, 0x0000001b, 0x00040020, 0x0000001d, 0x00000000, 0x0000001c, + 0x0004003b, 0x0000001d, 0x00000005, 0x00000000, 0x0004002b, 0x00000010, 0x0000001e, 0x00000000, + 0x00040017, 0x0000001f, 0x00000016, 0x00000002, 0x0004002b, 0x00000012, 0x00000020, 0x00000000, + 0x0004002b, 0x00000012, 0x00000021, 0x00000001, 0x0004002b, 0x00000016, 0x00000022, 0x3f000000, + 0x0005002c, 0x0000001f, 0x00000023, 0x00000022, 0x00000022, 0x0004002b, 0x00000010, 0x00000024, + 0x00000001, 0x0005002c, 0x00000011, 0x00000025, 0x00000024, 0x00000024, 0x00030016, 0x00000026, + 0x00000010, 0x0004002b, 0x00000016, 0x00000027, 0x3f800000, 0x0005001e, 0x00000006, 0x00000016, + 0x00000016, 0x00000016, 0x00040020, 0x00000028, 0x00000002, 0x00000006, 0x0004003b, 0x00000028, + 0x00000007, 0x00000002, 0x00040020, 0x00000029, 0x00000002, 0x00000016, 0x00040017, 0x0000002a, + 0x00000026, 0x00000004, 0x0004003b, 0x0000001d, 0x00000008, 0x00000000, 0x0004002b, 0x00000016, + 0x0000002b, 0x00000000, 0x00040017, 0x0000002c, 0x00000016, 0x00000004, 0x00040017, 0x0000002d, + 0x00000026, 0x00000002, 0x0004003b, 0x0000001d, 0x00000009, 0x00000000, 0x0004002b, 0x00000026, + 0x0000002e, 0x00000000, 0x0007002c, 0x0000002a, 0x0000002f, 0x0000002e, 0x0000002e, 0x0000002e, + 0x0000002e, 0x0004003b, 0x0000001d, 0x0000000a, 0x00000000, 0x0004003b, 0x0000001d, 0x0000000b, + 0x00000000, 0x0004002b, 0x00000010, 0x00000030, 0xffffffff, 0x0005002c, 0x00000011, 0x00000031, + 0x00000030, 0x00000030, 0x0005002c, 0x00000011, 0x00000032, 0x0000001e, 0x0000001e, 0x0004003b, + 0x0000001d, 0x0000000c, 0x00000000, 0x0005002c, 0x00000011, 0x00000033, 0x0000001e, 0x00000030, + 0x0005002c, 0x00000011, 0x00000034, 0x00000024, 0x00000030, 0x0005002c, 0x00000011, 0x00000035, + 0x00000030, 0x0000001e, 0x0005002c, 0x00000011, 0x00000036, 0x00000024, 0x0000001e, 0x0005002c, + 0x00000011, 0x00000037, 0x00000030, 0x00000024, 0x0005002c, 0x00000011, 0x00000038, 0x0000001e, + 0x00000024, 0x00040018, 0x00000039, 0x0000002a, 0x00000004, 0x0004002b, 0x00000026, 0x0000003a, + 0x00003734, 0x0004002b, 0x00000026, 0x0000003b, 0x000037e6, 0x0004002b, 0x00000026, 0x0000003c, + 0x000031b8, 0x0004002b, 0x00000026, 0x0000003d, 0x000037c4, 0x0007002c, 0x0000002a, 0x0000003e, + 0x0000003a, 0x0000003b, 0x0000003c, 0x0000003d, 0x0004002b, 0x00000026, 0x0000003f, 0x0000ac3f, + 0x0004002b, 0x00000026, 0x00000040, 0x000038f7, 0x0004002b, 0x00000026, 0x00000041, 0x0000ae43, + 0x0004002b, 0x00000026, 0x00000042, 0x000037e7, 0x0007002c, 0x0000002a, 0x00000043, 0x0000003f, + 0x00000040, 0x00000041, 0x00000042, 0x0004002b, 0x00000026, 0x00000044, 0x00003576, 0x0004002b, + 0x00000026, 0x00000045, 0x00003729, 0x0004002b, 0x00000026, 0x00000046, 0x000034b1, 0x0004002b, + 0x00000026, 0x00000047, 0x000036a3, 0x0007002c, 0x0000002a, 0x00000048, 0x00000044, 0x00000045, + 0x00000046, 0x00000047, 0x0004002b, 0x00000026, 0x00000049, 0x0000ae0a, 0x0004002b, 0x00000026, + 0x0000004a, 0x000038a4, 0x0004002b, 0x00000026, 0x0000004b, 0x0000b129, 0x0004002b, 0x00000026, + 0x0000004c, 0x0000352b, 0x0007002c, 0x0000002a, 0x0000004d, 0x00000049, 0x0000004a, 0x0000004b, + 0x0000004c, 0x0007002c, 0x00000039, 0x0000004e, 0x0000003e, 0x00000043, 0x00000048, 0x0000004d, + 0x0004002b, 0x00000026, 0x0000004f, 0x00004138, 0x0004002b, 0x00000026, 0x00000050, 0x0000415f, + 0x0004002b, 0x00000026, 0x00000051, 0x00004137, 0x0004002b, 0x00000026, 0x00000052, 0x0000416e, + 0x0007002c, 0x0000002a, 0x00000053, 0x0000004f, 0x00000050, 0x00000051, 0x00000052, 0x0004002b, + 0x00000026, 0x00000054, 0x00003d97, 0x0004002b, 0x00000026, 0x00000055, 0x00003c7f, 0x0004002b, + 0x00000026, 0x00000056, 0x00003d5a, 0x0004002b, 0x00000026, 0x00000057, 0x00003c79, 0x0007002c, + 0x0000002a, 0x00000058, 0x00000054, 0x00000055, 0x00000056, 0x00000057, 0x0004002b, 0x00000026, + 0x00000059, 0x0000b713, 0x0004002b, 0x00000026, 0x0000005a, 0x00002d4c, 0x0004002b, 0x00000026, + 0x0000005b, 0x0000b88b, 0x0004002b, 0x00000026, 0x0000005c, 0x00002e34, 0x0007002c, 0x0000002a, + 0x0000005d, 0x00000059, 0x0000005a, 0x0000005b, 0x0000005c, 0x0004002b, 0x00000026, 0x0000005e, + 0x00003c00, 0x0007002c, 0x0000002a, 0x0000005f, 0x0000005e, 0x0000005e, 0x0000005e, 0x0000005e, + 0x0004002b, 0x00000026, 0x00000060, 0x0000b5b5, 0x0004002b, 0x00000026, 0x00000061, 0x0000380a, + 0x0004002b, 0x00000026, 0x00000062, 0x0000ad6a, 0x0004002b, 0x00000026, 0x00000063, 0x000036ce, + 0x0007002c, 0x0000002a, 0x00000064, 0x00000060, 0x00000061, 0x00000062, 0x00000063, 0x0004002b, + 0x00000026, 0x00000065, 0x0000af23, 0x0004002b, 0x00000026, 0x00000066, 0x00003a02, 0x0004002b, + 0x00000026, 0x00000067, 0x0000acea, 0x0004002b, 0x00000026, 0x00000068, 0x00003360, 0x0007002c, + 0x0000002a, 0x00000069, 0x00000065, 0x00000066, 0x00000067, 0x00000068, 0x0004002b, 0x00000026, + 0x0000006a, 0x00003413, 0x0004002b, 0x00000026, 0x0000006b, 0x00002f0e, 0x0004002b, 0x00000026, + 0x0000006c, 0x000028ac, 0x0004002b, 0x00000026, 0x0000006d, 0x000035b3, 0x0007002c, 0x0000002a, + 0x0000006e, 0x0000006a, 0x0000006b, 0x0000006c, 0x0000006d, 0x0004002b, 0x00000026, 0x0000006f, + 0x00001325, 0x0004002b, 0x00000026, 0x00000070, 0x0000393e, 0x0004002b, 0x00000026, 0x00000071, + 0x0000adef, 0x0004002b, 0x00000026, 0x00000072, 0x0000359b, 0x0007002c, 0x0000002a, 0x00000073, + 0x0000006f, 0x00000070, 0x00000071, 0x00000072, 0x0007002c, 0x00000039, 0x00000074, 0x00000064, + 0x00000069, 0x0000006e, 0x00000073, 0x0004002b, 0x00000026, 0x00000075, 0x000041cb, 0x0004002b, + 0x00000026, 0x00000076, 0x00004161, 0x0007002c, 0x0000002a, 0x00000077, 0x00000075, 0x00000052, + 0x0000004f, 0x00000076, 0x0004002b, 0x00000026, 0x00000078, 0x00003bf1, 0x0004002b, 0x00000026, + 0x00000079, 0x00003aae, 0x0004002b, 0x00000026, 0x0000007a, 0x00003bc2, 0x0004002b, 0x00000026, + 0x0000007b, 0x00003a44, 0x0007002c, 0x0000002a, 0x0000007c, 0x00000078, 0x00000079, 0x0000007a, + 0x0000007b, 0x0004002b, 0x00000026, 0x0000007d, 0x00003574, 0x0004002b, 0x00000026, 0x0000007e, + 0x000033bc, 0x0004002b, 0x00000026, 0x0000007f, 0x0000ac76, 0x0004002b, 0x00000026, 0x00000080, + 0x00003242, 0x0007002c, 0x0000002a, 0x00000081, 0x0000007d, 0x0000007e, 0x0000007f, 0x00000080, + 0x0004002b, 0x00000026, 0x00000082, 0x000035a5, 0x0004002b, 0x00000026, 0x00000083, 0x00002e5e, + 0x0004002b, 0x00000026, 0x00000084, 0x00002000, 0x0007002c, 0x0000002a, 0x00000085, 0x00000082, + 0x00000083, 0x00000084, 0x00000044, 0x0004002b, 0x00000026, 0x00000086, 0x00004139, 0x0004002b, + 0x00000026, 0x00000087, 0x00003c31, 0x0004002b, 0x00000026, 0x00000088, 0x0000aa02, 0x0004002b, + 0x00000026, 0x00000089, 0x0000b842, 0x0004002b, 0x00000026, 0x0000008a, 0x0000aef0, 0x0004002b, + 0x00000026, 0x0000008b, 0x0000b92d, 0x0004002b, 0x00000026, 0x0000008c, 0x0000b460, 0x0007002c, + 0x0000002a, 0x0000008d, 0x00000089, 0x0000008a, 0x0000008b, 0x0000008c, 0x0004002b, 0x00000026, + 0x0000008e, 0x0000b61e, 0x0004002b, 0x00000026, 0x0000008f, 0x00002ca5, 0x0004002b, 0x00000026, + 0x00000090, 0x0000b870, 0x0004002b, 0x00000026, 0x00000091, 0x0000ace3, 0x0007002c, 0x0000002a, + 0x00000092, 0x0000008e, 0x0000008f, 0x00000090, 0x00000091, 0x0004002b, 0x00000026, 0x00000093, + 0x0000b98b, 0x0004002b, 0x00000026, 0x00000094, 0x0000ad67, 0x0004002b, 0x00000026, 0x00000095, + 0x0000b864, 0x0004002b, 0x00000026, 0x00000096, 0x0000b553, 0x0007002c, 0x0000002a, 0x00000097, + 0x00000093, 0x00000094, 0x00000095, 0x00000096, 0x0004002b, 0x00000026, 0x00000098, 0x0000b81f, + 0x0004002b, 0x00000026, 0x00000099, 0x00002e95, 0x0004002b, 0x00000026, 0x0000009a, 0x0000b7a3, + 0x0004002b, 0x00000026, 0x0000009b, 0x0000adb9, 0x0007002c, 0x0000002a, 0x0000009c, 0x00000098, + 0x00000099, 0x0000009a, 0x0000009b, 0x0007002c, 0x00000039, 0x0000009d, 0x0000008d, 0x00000092, + 0x00000097, 0x0000009c, 0x0004002b, 0x00000026, 0x0000009e, 0x0000bc31, 0x0004002b, 0x00000026, + 0x0000009f, 0x0000abc9, 0x0004002b, 0x00000026, 0x000000a0, 0x0000b688, 0x0004002b, 0x00000026, + 0x000000a1, 0x000030ad, 0x0007002c, 0x0000002a, 0x000000a2, 0x0000009e, 0x0000009f, 0x000000a0, + 0x000000a1, 0x0004002b, 0x00000026, 0x000000a3, 0x0000b832, 0x0004002b, 0x00000026, 0x000000a4, + 0x00003207, 0x0004002b, 0x00000026, 0x000000a5, 0x0000b78f, 0x0004002b, 0x00000026, 0x000000a6, + 0x0000b21a, 0x0007002c, 0x0000002a, 0x000000a7, 0x000000a3, 0x000000a4, 0x000000a5, 0x000000a6, + 0x0004002b, 0x00000026, 0x000000a8, 0x0000256a, 0x0004002b, 0x00000026, 0x000000a9, 0x0000b039, + 0x0004002b, 0x00000026, 0x000000aa, 0x0000b8c3, 0x0004002b, 0x00000026, 0x000000ab, 0x0000b555, + 0x0007002c, 0x0000002a, 0x000000ac, 0x000000a8, 0x000000a9, 0x000000aa, 0x000000ab, 0x0004002b, + 0x00000026, 0x000000ad, 0x0000b3a2, 0x0004002b, 0x00000026, 0x000000ae, 0x0000324a, 0x0004002b, + 0x00000026, 0x000000af, 0x0000b80b, 0x0004002b, 0x00000026, 0x000000b0, 0x0000b0f2, 0x0007002c, + 0x0000002a, 0x000000b1, 0x000000ad, 0x000000ae, 0x000000af, 0x000000b0, 0x0007002c, 0x00000039, + 0x000000b2, 0x000000a2, 0x000000a7, 0x000000ac, 0x000000b1, 0x0004002b, 0x00000026, 0x000000b3, + 0x0000a70c, 0x0004002b, 0x00000026, 0x000000b4, 0x0000b196, 0x0004002b, 0x00000026, 0x000000b5, + 0x0000b8bd, 0x0004002b, 0x00000026, 0x000000b6, 0x0000b5aa, 0x0007002c, 0x0000002a, 0x000000b7, + 0x000000b3, 0x000000b4, 0x000000b5, 0x000000b6, 0x0004002b, 0x00000026, 0x000000b8, 0x00002ec2, + 0x0004002b, 0x00000026, 0x000000b9, 0x0000a9ce, 0x0004002b, 0x00000026, 0x000000ba, 0x00002eb1, + 0x0004002b, 0x00000026, 0x000000bb, 0x0000aa70, 0x0007002c, 0x0000002a, 0x000000bc, 0x000000b8, + 0x000000b9, 0x000000ba, 0x000000bb, 0x0004002b, 0x00000026, 0x000000bd, 0x00003443, 0x0004002b, + 0x00000026, 0x000000be, 0x0000b2a2, 0x0004002b, 0x00000026, 0x000000bf, 0x0000353b, 0x0004002b, + 0x00000026, 0x000000c0, 0x0000b26e, 0x0007002c, 0x0000002a, 0x000000c1, 0x000000bd, 0x000000be, + 0x000000bf, 0x000000c0, 0x0004002b, 0x00000026, 0x000000c2, 0x0000aa5d, 0x0004002b, 0x00000026, + 0x000000c3, 0x0000a9d8, 0x0004002b, 0x00000026, 0x000000c4, 0x00003196, 0x0004002b, 0x00000026, + 0x000000c5, 0x00003049, 0x0007002c, 0x0000002a, 0x000000c6, 0x000000c2, 0x000000c3, 0x000000c4, + 0x000000c5, 0x0004002b, 0x00000026, 0x000000c7, 0x000034de, 0x0004002b, 0x00000026, 0x000000c8, + 0x0000b1cc, 0x0004002b, 0x00000026, 0x000000c9, 0x00003493, 0x0004002b, 0x00000026, 0x000000ca, + 0x0000b40b, 0x0007002c, 0x0000002a, 0x000000cb, 0x000000c7, 0x000000c8, 0x000000c9, 0x000000ca, + 0x0007002c, 0x00000039, 0x000000cc, 0x000000bc, 0x000000c1, 0x000000c6, 0x000000cb, 0x0004002b, + 0x00000026, 0x000000cd, 0x0000b8e7, 0x0004002b, 0x00000026, 0x000000ce, 0x0000b75b, 0x0004002b, + 0x00000026, 0x000000cf, 0x000037de, 0x0004002b, 0x00000026, 0x000000d0, 0x0000bac0, 0x0007002c, + 0x0000002a, 0x000000d1, 0x000000cd, 0x000000ce, 0x000000cf, 0x000000d0, 0x0004002b, 0x00000026, + 0x000000d2, 0x000031ef, 0x0004002b, 0x00000026, 0x000000d3, 0x0000aba0, 0x0004002b, 0x00000026, + 0x000000d4, 0x00003487, 0x0004002b, 0x00000026, 0x000000d5, 0x0000b40e, 0x0007002c, 0x0000002a, + 0x000000d6, 0x000000d2, 0x000000d3, 0x000000d4, 0x000000d5, 0x0004002b, 0x00000026, 0x000000d7, + 0x00003164, 0x0004002b, 0x00000026, 0x000000d8, 0x0000b328, 0x0004002b, 0x00000026, 0x000000d9, + 0x00003134, 0x0004002b, 0x00000026, 0x000000da, 0x00009f77, 0x0007002c, 0x0000002a, 0x000000db, + 0x000000d7, 0x000000d8, 0x000000d9, 0x000000da, 0x0004002b, 0x00000026, 0x000000dc, 0x0000250c, + 0x0004002b, 0x00000026, 0x000000dd, 0x00002321, 0x0004002b, 0x00000026, 0x000000de, 0x0000347a, + 0x0004002b, 0x00000026, 0x000000df, 0x0000b2f0, 0x0007002c, 0x0000002a, 0x000000e0, 0x000000dc, + 0x000000dd, 0x000000de, 0x000000df, 0x0007002c, 0x00000039, 0x000000e1, 0x000000d1, 0x000000d6, + 0x000000db, 0x000000e0, 0x0004002b, 0x00000026, 0x000000e2, 0x00003464, 0x0004002b, 0x00000026, + 0x000000e3, 0x0000b187, 0x0004002b, 0x00000026, 0x000000e4, 0x00002c5b, 0x0004002b, 0x00000026, + 0x000000e5, 0x00002bd8, 0x0007002c, 0x0000002a, 0x000000e6, 0x000000e2, 0x000000e3, 0x000000e4, + 0x000000e5, 0x0004002b, 0x00000026, 0x000000e7, 0x00002e24, 0x0004002b, 0x00000026, 0x000000e8, + 0x000098c3, 0x0004002b, 0x00000026, 0x000000e9, 0x000031da, 0x0004002b, 0x00000026, 0x000000ea, + 0x00002085, 0x0007002c, 0x0000002a, 0x000000eb, 0x000000e7, 0x000000e8, 0x000000e9, 0x000000ea, + 0x0004002b, 0x00000026, 0x000000ec, 0x0000339d, 0x0004002b, 0x00000026, 0x000000ed, 0x0000b069, + 0x0004002b, 0x00000026, 0x000000ee, 0x00003459, 0x0004002b, 0x00000026, 0x000000ef, 0x0000b1e2, + 0x0007002c, 0x0000002a, 0x000000f0, 0x000000ec, 0x000000ed, 0x000000ee, 0x000000ef, 0x0004002b, + 0x00000026, 0x000000f1, 0x00002c46, 0x0004002b, 0x00000026, 0x000000f2, 0x0000290d, 0x0004002b, + 0x00000026, 0x000000f3, 0x000030b5, 0x0004002b, 0x00000026, 0x000000f4, 0x00002799, 0x0007002c, + 0x0000002a, 0x000000f5, 0x000000f1, 0x000000f2, 0x000000f3, 0x000000f4, 0x0004002b, 0x00000026, + 0x000000f6, 0x00003578, 0x0004002b, 0x00000026, 0x000000f7, 0x0000b05d, 0x0004002b, 0x00000026, + 0x000000f8, 0x0000b076, 0x0007002c, 0x0000002a, 0x000000f9, 0x000000f6, 0x000000f7, 0x000000e2, + 0x000000f8, 0x0007002c, 0x00000039, 0x000000fa, 0x000000eb, 0x000000f0, 0x000000f5, 0x000000f9, + 0x0004002b, 0x00000026, 0x000000fb, 0x0000b989, 0x0004002b, 0x00000026, 0x000000fc, 0x0000b575, + 0x0004002b, 0x00000026, 0x000000fd, 0x000037dd, 0x0004002b, 0x00000026, 0x000000fe, 0x0000b9ca, + 0x0007002c, 0x0000002a, 0x000000ff, 0x000000fb, 0x000000fc, 0x000000fd, 0x000000fe, 0x0004002b, + 0x00000026, 0x00000100, 0x00003276, 0x0004002b, 0x00000026, 0x00000101, 0x0000a466, 0x0004002b, + 0x00000026, 0x00000102, 0x00003563, 0x0004002b, 0x00000026, 0x00000103, 0x0000ada5, 0x0007002c, + 0x0000002a, 0x00000104, 0x00000100, 0x00000101, 0x00000102, 0x00000103, 0x0004002b, 0x00000026, + 0x00000105, 0x000035ea, 0x0004002b, 0x00000026, 0x00000106, 0x0000b247, 0x0004002b, 0x00000026, + 0x00000107, 0x00003203, 0x0004002b, 0x00000026, 0x00000108, 0x00002891, 0x0007002c, 0x0000002a, + 0x00000109, 0x00000105, 0x00000106, 0x00000107, 0x00000108, 0x0004002b, 0x00000026, 0x0000010a, + 0x00002583, 0x0004002b, 0x00000026, 0x0000010b, 0x000026fb, 0x0004002b, 0x00000026, 0x0000010c, + 0x00003495, 0x0004002b, 0x00000026, 0x0000010d, 0x0000ae7f, 0x0007002c, 0x0000002a, 0x0000010e, + 0x0000010a, 0x0000010b, 0x0000010c, 0x0000010d, 0x0007002c, 0x00000039, 0x0000010f, 0x000000ff, + 0x00000104, 0x00000109, 0x0000010e, 0x0004002b, 0x00000026, 0x00000110, 0x00003552, 0x0004002b, + 0x00000026, 0x00000111, 0x0000a2a7, 0x0004002b, 0x00000026, 0x00000112, 0x0000315b, 0x0004002b, + 0x00000026, 0x00000113, 0x00002e09, 0x0007002c, 0x0000002a, 0x00000114, 0x00000110, 0x00000111, + 0x00000112, 0x00000113, 0x0004002b, 0x00000026, 0x00000115, 0x0000ba6a, 0x0004002b, 0x00000026, + 0x00000116, 0x00003596, 0x0004002b, 0x00000026, 0x00000117, 0x0000332d, 0x0007002c, 0x0000002a, + 0x00000118, 0x00000115, 0x00000116, 0x00000117, 0x00000098, 0x0004002b, 0x00000026, 0x00000119, + 0x00003c55, 0x0004002b, 0x00000026, 0x0000011a, 0x00003faa, 0x0004002b, 0x00000026, 0x0000011b, + 0x00003cfa, 0x0004002b, 0x00000026, 0x0000011c, 0x00003dd8, 0x0007002c, 0x0000002a, 0x0000011d, + 0x00000119, 0x0000011a, 0x0000011b, 0x0000011c, 0x0004002b, 0x00000026, 0x0000011e, 0x0000b245, + 0x0004002b, 0x00000026, 0x0000011f, 0x00003a82, 0x0004002b, 0x00000026, 0x00000120, 0x000035ad, + 0x0004002b, 0x00000026, 0x00000121, 0x00003be2, 0x0007002c, 0x0000002a, 0x00000122, 0x0000011e, + 0x0000011f, 0x00000120, 0x00000121, 0x0004002b, 0x00000012, 0x00000123, 0x00000010, 0x0006002c, + 0x00000013, 0x0000000d, 0x00000123, 0x00000123, 0x00000021, 0x00050036, 0x0000000e, 0x00000002, + 0x00000000, 0x0000000f, 0x000200f8, 0x00000124, 0x000300f7, 0x00000125, 0x00000000, 0x000300fb, + 0x00000020, 0x00000126, 0x000200f8, 0x00000126, 0x0004003d, 0x00000013, 0x00000127, 0x00000003, + 0x0007004f, 0x00000015, 0x00000128, 0x00000127, 0x00000127, 0x00000000, 0x00000001, 0x0004007c, + 0x00000011, 0x00000129, 0x00000128, 0x0004003d, 0x00000017, 0x0000012a, 0x00000004, 0x00040068, + 0x00000011, 0x0000012b, 0x0000012a, 0x000500af, 0x0000001a, 0x0000012c, 0x00000129, 0x0000012b, + 0x0004009a, 0x00000019, 0x0000012d, 0x0000012c, 0x000300f7, 0x0000012e, 0x00000000, 0x000400fa, + 0x0000012d, 0x0000012f, 0x0000012e, 0x000200f8, 0x0000012f, 0x000200f9, 0x00000125, 0x000200f8, + 0x0000012e, 0x0004003d, 0x0000001c, 0x00000130, 0x00000005, 0x00040064, 0x0000001b, 0x00000131, + 0x00000130, 0x00050067, 0x00000011, 0x00000132, 0x00000131, 0x0000001e, 0x0004007c, 0x00000015, + 0x00000133, 0x00000132, 0x00050051, 0x00000012, 0x00000134, 0x00000133, 0x00000000, 0x00040070, + 0x00000016, 0x00000135, 0x00000134, 0x00050051, 0x00000012, 0x00000136, 0x00000133, 0x00000001, + 0x00040070, 0x00000016, 0x00000137, 0x00000136, 0x00050050, 0x0000001f, 0x00000138, 0x00000135, + 0x00000137, 0x00040070, 0x0000001f, 0x00000139, 0x00000128, 0x00050081, 0x0000001f, 0x0000013a, + 0x00000139, 0x00000023, 0x00050088, 0x0000001f, 0x0000013b, 0x0000013a, 0x00000138, 0x0004007c, + 0x00000011, 0x0000013c, 0x00000133, 0x00050082, 0x00000011, 0x0000013d, 0x0000013c, 0x00000025, + 0x00050041, 0x00000029, 0x0000013e, 0x00000007, 0x00000024, 0x0004003d, 0x00000016, 0x0000013f, + 0x0000013e, 0x00050083, 0x00000016, 0x00000140, 0x00000027, 0x0000013f, 0x00040073, 0x00000026, + 0x00000141, 0x00000140, 0x00040073, 0x00000026, 0x00000142, 0x0000013f, 0x0004003d, 0x0000001c, + 0x00000143, 0x00000008, 0x00070058, 0x0000002c, 0x00000144, 0x00000143, 0x0000013b, 0x00000002, + 0x0000002b, 0x00040073, 0x0000002a, 0x00000145, 0x00000144, 0x0004003d, 0x0000001c, 0x00000146, + 0x00000008, 0x0007004f, 0x0000002d, 0x00000147, 0x00000145, 0x00000145, 0x00000000, 0x00000001, + 0x0005008e, 0x0000002d, 0x00000148, 0x00000147, 0x00000141, 0x00040073, 0x0000001f, 0x00000149, + 0x00000148, 0x00050081, 0x0000001f, 0x0000014a, 0x0000013a, 0x00000149, 0x00050088, 0x0000001f, + 0x0000014b, 0x0000014a, 0x00000138, 0x00070058, 0x0000002c, 0x0000014c, 0x00000146, 0x0000014b, + 0x00000002, 0x0000002b, 0x0007004f, 0x0000001f, 0x0000014d, 0x0000014c, 0x0000014c, 0x00000000, + 0x00000001, 0x00040073, 0x0000002d, 0x0000014e, 0x0000014d, 0x00040073, 0x0000001f, 0x0000014f, + 0x0000014e, 0x00050081, 0x0000001f, 0x00000150, 0x0000013a, 0x0000014f, 0x00050088, 0x0000001f, + 0x00000151, 0x00000150, 0x00000138, 0x0004003d, 0x0000001c, 0x00000152, 0x00000008, 0x0007004f, + 0x0000002d, 0x00000153, 0x00000145, 0x00000145, 0x00000002, 0x00000003, 0x0005008e, 0x0000002d, + 0x00000154, 0x00000153, 0x00000142, 0x00040073, 0x0000001f, 0x00000155, 0x00000154, 0x00050081, + 0x0000001f, 0x00000156, 0x0000013a, 0x00000155, 0x00050088, 0x0000001f, 0x00000157, 0x00000156, + 0x00000138, 0x00070058, 0x0000002c, 0x00000158, 0x00000152, 0x00000157, 0x00000002, 0x0000002b, + 0x0007004f, 0x0000001f, 0x00000159, 0x00000158, 0x00000158, 0x00000002, 0x00000003, 0x00040073, + 0x0000002d, 0x0000015a, 0x00000159, 0x00040073, 0x0000001f, 0x0000015b, 0x0000015a, 0x00050081, + 0x0000001f, 0x0000015c, 0x0000013a, 0x0000015b, 0x00050088, 0x0000001f, 0x0000015d, 0x0000015c, + 0x00000138, 0x0004003d, 0x0000001c, 0x0000015e, 0x00000009, 0x00070058, 0x0000002c, 0x0000015f, + 0x0000015e, 0x0000013b, 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, 0x00000160, 0x0000015f, + 0x0004003d, 0x0000001c, 0x00000161, 0x00000009, 0x0007004f, 0x0000002d, 0x00000162, 0x00000160, + 0x00000160, 0x00000000, 0x00000001, 0x0005008e, 0x0000002d, 0x00000163, 0x00000162, 0x00000141, + 0x00040073, 0x0000001f, 0x00000164, 0x00000163, 0x00050081, 0x0000001f, 0x00000165, 0x0000013a, + 0x00000164, 0x00050088, 0x0000001f, 0x00000166, 0x00000165, 0x00000138, 0x00070058, 0x0000002c, + 0x00000167, 0x00000161, 0x00000166, 0x00000002, 0x0000002b, 0x0007004f, 0x0000001f, 0x00000168, + 0x00000167, 0x00000167, 0x00000000, 0x00000001, 0x00040073, 0x0000002d, 0x00000169, 0x00000168, + 0x0004003d, 0x0000001c, 0x0000016a, 0x00000009, 0x0007004f, 0x0000002d, 0x0000016b, 0x00000160, + 0x00000160, 0x00000002, 0x00000003, 0x0005008e, 0x0000002d, 0x0000016c, 0x0000016b, 0x00000142, + 0x00040073, 0x0000001f, 0x0000016d, 0x0000016c, 0x00050081, 0x0000001f, 0x0000016e, 0x0000013a, + 0x0000016d, 0x00050088, 0x0000001f, 0x0000016f, 0x0000016e, 0x00000138, 0x00070058, 0x0000002c, + 0x00000170, 0x0000016a, 0x0000016f, 0x00000002, 0x0000002b, 0x0007004f, 0x0000001f, 0x00000171, + 0x00000170, 0x00000170, 0x00000002, 0x00000003, 0x00040073, 0x0000002d, 0x00000172, 0x00000171, + 0x00050051, 0x00000026, 0x00000173, 0x00000169, 0x00000000, 0x00050051, 0x00000026, 0x00000174, + 0x00000169, 0x00000001, 0x00050051, 0x00000026, 0x00000175, 0x00000172, 0x00000000, 0x00050051, + 0x00000026, 0x00000176, 0x00000172, 0x00000001, 0x00070050, 0x0000002a, 0x00000177, 0x00000173, + 0x00000174, 0x00000175, 0x00000176, 0x0007004f, 0x0000002d, 0x00000178, 0x00000177, 0x00000177, + 0x00000000, 0x00000001, 0x00040073, 0x0000001f, 0x00000179, 0x00000178, 0x00050081, 0x0000001f, + 0x0000017a, 0x0000013a, 0x00000179, 0x00050088, 0x0000001f, 0x0000017b, 0x0000017a, 0x00000138, + 0x0007004f, 0x0000002d, 0x0000017c, 0x00000177, 0x00000177, 0x00000002, 0x00000003, 0x00040073, + 0x0000001f, 0x0000017d, 0x0000017c, 0x00050081, 0x0000001f, 0x0000017e, 0x0000013a, 0x0000017d, + 0x00050088, 0x0000001f, 0x0000017f, 0x0000017e, 0x00000138, 0x0004003d, 0x0000001c, 0x00000180, + 0x00000005, 0x00070058, 0x0000002c, 0x00000181, 0x00000180, 0x00000151, 0x00000002, 0x0000002b, + 0x00040073, 0x0000002a, 0x00000182, 0x00000181, 0x0004003d, 0x0000001c, 0x00000183, 0x0000000a, + 0x00070058, 0x0000002c, 0x00000184, 0x00000183, 0x00000151, 0x00000002, 0x0000002b, 0x00040073, + 0x0000002a, 0x00000185, 0x00000184, 0x0004003d, 0x0000001c, 0x00000186, 0x00000005, 0x00070058, + 0x0000002c, 0x00000187, 0x00000186, 0x0000017b, 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, + 0x00000188, 0x00000187, 0x0004003d, 0x0000001c, 0x00000189, 0x0000000a, 0x00070058, 0x0000002c, + 0x0000018a, 0x00000189, 0x0000017b, 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, 0x0000018b, + 0x0000018a, 0x0004003d, 0x0000001c, 0x0000018c, 0x0000000b, 0x00050080, 0x00000011, 0x0000018d, + 0x00000129, 0x00000031, 0x0008000c, 0x00000011, 0x0000018e, 0x00000001, 0x0000002d, 0x0000018d, + 0x00000032, 0x0000013d, 0x00040064, 0x0000001b, 0x0000018f, 0x0000018c, 0x0007005f, 0x0000002c, + 0x00000190, 0x0000018f, 0x0000018e, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x00000191, + 0x00000190, 0x0004003d, 0x0000001c, 0x00000192, 0x0000000c, 0x00040064, 0x0000001b, 0x00000193, + 0x00000192, 0x0007005f, 0x0000002c, 0x00000194, 0x00000193, 0x0000018e, 0x00000002, 0x0000001e, + 0x00040073, 0x0000002a, 0x00000195, 0x00000194, 0x0004003d, 0x0000001c, 0x00000196, 0x0000000b, + 0x00050080, 0x00000011, 0x00000197, 0x00000129, 0x00000033, 0x0008000c, 0x00000011, 0x00000198, + 0x00000001, 0x0000002d, 0x00000197, 0x00000032, 0x0000013d, 0x00040064, 0x0000001b, 0x00000199, + 0x00000196, 0x0007005f, 0x0000002c, 0x0000019a, 0x00000199, 0x00000198, 0x00000002, 0x0000001e, + 0x00040073, 0x0000002a, 0x0000019b, 0x0000019a, 0x0004003d, 0x0000001c, 0x0000019c, 0x0000000c, + 0x00040064, 0x0000001b, 0x0000019d, 0x0000019c, 0x0007005f, 0x0000002c, 0x0000019e, 0x0000019d, + 0x00000198, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x0000019f, 0x0000019e, 0x0004003d, + 0x0000001c, 0x000001a0, 0x0000000b, 0x00050080, 0x00000011, 0x000001a1, 0x00000129, 0x00000034, + 0x0008000c, 0x00000011, 0x000001a2, 0x00000001, 0x0000002d, 0x000001a1, 0x00000032, 0x0000013d, + 0x00040064, 0x0000001b, 0x000001a3, 0x000001a0, 0x0007005f, 0x0000002c, 0x000001a4, 0x000001a3, + 0x000001a2, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x000001a5, 0x000001a4, 0x0004003d, + 0x0000001c, 0x000001a6, 0x0000000c, 0x00040064, 0x0000001b, 0x000001a7, 0x000001a6, 0x0007005f, + 0x0000002c, 0x000001a8, 0x000001a7, 0x000001a2, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, + 0x000001a9, 0x000001a8, 0x0004003d, 0x0000001c, 0x000001aa, 0x0000000b, 0x00050080, 0x00000011, + 0x000001ab, 0x00000129, 0x00000035, 0x0008000c, 0x00000011, 0x000001ac, 0x00000001, 0x0000002d, + 0x000001ab, 0x00000032, 0x0000013d, 0x00040064, 0x0000001b, 0x000001ad, 0x000001aa, 0x0007005f, + 0x0000002c, 0x000001ae, 0x000001ad, 0x000001ac, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, + 0x000001af, 0x000001ae, 0x00050094, 0x00000026, 0x000001b0, 0x00000182, 0x000001af, 0x00050094, + 0x00000026, 0x000001b1, 0x00000182, 0x00000191, 0x00050094, 0x00000026, 0x000001b2, 0x00000185, + 0x00000195, 0x00050081, 0x00000026, 0x000001b3, 0x000001b1, 0x000001b2, 0x00050094, 0x00000026, + 0x000001b4, 0x00000182, 0x0000019b, 0x00050094, 0x00000026, 0x000001b5, 0x00000185, 0x0000019f, + 0x00050081, 0x00000026, 0x000001b6, 0x000001b4, 0x000001b5, 0x00050094, 0x00000026, 0x000001b7, + 0x00000182, 0x000001a5, 0x00050094, 0x00000026, 0x000001b8, 0x00000185, 0x000001a9, 0x00050081, + 0x00000026, 0x000001b9, 0x000001b7, 0x000001b8, 0x00070050, 0x0000002a, 0x000001ba, 0x000001b3, + 0x000001b6, 0x000001b9, 0x000001b0, 0x00050094, 0x00000026, 0x000001bb, 0x00000188, 0x000001af, + 0x00050094, 0x00000026, 0x000001bc, 0x00000188, 0x00000191, 0x00050094, 0x00000026, 0x000001bd, + 0x0000018b, 0x00000195, 0x00050081, 0x00000026, 0x000001be, 0x000001bc, 0x000001bd, 0x00050094, + 0x00000026, 0x000001bf, 0x00000188, 0x0000019b, 0x00050094, 0x00000026, 0x000001c0, 0x0000018b, + 0x0000019f, 0x00050081, 0x00000026, 0x000001c1, 0x000001bf, 0x000001c0, 0x00050094, 0x00000026, + 0x000001c2, 0x00000188, 0x000001a5, 0x00050094, 0x00000026, 0x000001c3, 0x0000018b, 0x000001a9, + 0x00050081, 0x00000026, 0x000001c4, 0x000001c2, 0x000001c3, 0x00070050, 0x0000002a, 0x000001c5, + 0x000001be, 0x000001c1, 0x000001c4, 0x000001bb, 0x0004003d, 0x0000001c, 0x000001c6, 0x0000000c, + 0x00040064, 0x0000001b, 0x000001c7, 0x000001c6, 0x0007005f, 0x0000002c, 0x000001c8, 0x000001c7, + 0x000001ac, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x000001c9, 0x000001c8, 0x00050094, + 0x00000026, 0x000001ca, 0x00000185, 0x000001c9, 0x00050081, 0x00000026, 0x000001cb, 0x000001b0, + 0x000001ca, 0x00060052, 0x0000002a, 0x000001cc, 0x000001cb, 0x000001ba, 0x00000003, 0x00050094, + 0x00000026, 0x000001cd, 0x0000018b, 0x000001c9, 0x00050081, 0x00000026, 0x000001ce, 0x000001bb, + 0x000001cd, 0x00060052, 0x0000002a, 0x000001cf, 0x000001ce, 0x000001c5, 0x00000003, 0x0004003d, + 0x0000001c, 0x000001d0, 0x0000000b, 0x00070058, 0x0000002c, 0x000001d1, 0x000001d0, 0x0000013b, + 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, 0x000001d2, 0x000001d1, 0x0004003d, 0x0000001c, + 0x000001d3, 0x0000000c, 0x00070058, 0x0000002c, 0x000001d4, 0x000001d3, 0x0000013b, 0x00000002, + 0x0000002b, 0x00040073, 0x0000002a, 0x000001d5, 0x000001d4, 0x0004003d, 0x0000001c, 0x000001d6, + 0x0000000b, 0x00050080, 0x00000011, 0x000001d7, 0x00000129, 0x00000036, 0x0008000c, 0x00000011, + 0x000001d8, 0x00000001, 0x0000002d, 0x000001d7, 0x00000032, 0x0000013d, 0x00040064, 0x0000001b, + 0x000001d9, 0x000001d6, 0x0007005f, 0x0000002c, 0x000001da, 0x000001d9, 0x000001d8, 0x00000002, + 0x0000001e, 0x00040073, 0x0000002a, 0x000001db, 0x000001da, 0x0004003d, 0x0000001c, 0x000001dc, + 0x0000000c, 0x00040064, 0x0000001b, 0x000001dd, 0x000001dc, 0x0007005f, 0x0000002c, 0x000001de, + 0x000001dd, 0x000001d8, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x000001df, 0x000001de, + 0x0004003d, 0x0000001c, 0x000001e0, 0x0000000b, 0x00050080, 0x00000011, 0x000001e1, 0x00000129, + 0x00000037, 0x0008000c, 0x00000011, 0x000001e2, 0x00000001, 0x0000002d, 0x000001e1, 0x00000032, + 0x0000013d, 0x00040064, 0x0000001b, 0x000001e3, 0x000001e0, 0x0007005f, 0x0000002c, 0x000001e4, + 0x000001e3, 0x000001e2, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x000001e5, 0x000001e4, + 0x0004003d, 0x0000001c, 0x000001e6, 0x0000000c, 0x00040064, 0x0000001b, 0x000001e7, 0x000001e6, + 0x0007005f, 0x0000002c, 0x000001e8, 0x000001e7, 0x000001e2, 0x00000002, 0x0000001e, 0x00040073, + 0x0000002a, 0x000001e9, 0x000001e8, 0x0004003d, 0x0000001c, 0x000001ea, 0x0000000b, 0x00050080, + 0x00000011, 0x000001eb, 0x00000129, 0x00000038, 0x0008000c, 0x00000011, 0x000001ec, 0x00000001, + 0x0000002d, 0x000001eb, 0x00000032, 0x0000013d, 0x00040064, 0x0000001b, 0x000001ed, 0x000001ea, + 0x0007005f, 0x0000002c, 0x000001ee, 0x000001ed, 0x000001ec, 0x00000002, 0x0000001e, 0x00040073, + 0x0000002a, 0x000001ef, 0x000001ee, 0x00050094, 0x00000026, 0x000001f0, 0x00000182, 0x000001ef, + 0x00050094, 0x00000026, 0x000001f1, 0x00000182, 0x000001d2, 0x00050094, 0x00000026, 0x000001f2, + 0x00000185, 0x000001d5, 0x00050081, 0x00000026, 0x000001f3, 0x000001f1, 0x000001f2, 0x00050094, + 0x00000026, 0x000001f4, 0x00000182, 0x000001db, 0x00050094, 0x00000026, 0x000001f5, 0x00000185, + 0x000001df, 0x00050081, 0x00000026, 0x000001f6, 0x000001f4, 0x000001f5, 0x00050094, 0x00000026, + 0x000001f7, 0x00000182, 0x000001e5, 0x00050094, 0x00000026, 0x000001f8, 0x00000185, 0x000001e9, + 0x00050081, 0x00000026, 0x000001f9, 0x000001f7, 0x000001f8, 0x00070050, 0x0000002a, 0x000001fa, + 0x000001f3, 0x000001f6, 0x000001f9, 0x000001f0, 0x00050094, 0x00000026, 0x000001fb, 0x00000188, + 0x000001ef, 0x00050094, 0x00000026, 0x000001fc, 0x00000188, 0x000001d2, 0x00050094, 0x00000026, + 0x000001fd, 0x0000018b, 0x000001d5, 0x00050081, 0x00000026, 0x000001fe, 0x000001fc, 0x000001fd, + 0x00050094, 0x00000026, 0x000001ff, 0x00000188, 0x000001db, 0x00050094, 0x00000026, 0x00000200, + 0x0000018b, 0x000001df, 0x00050081, 0x00000026, 0x00000201, 0x000001ff, 0x00000200, 0x00050094, + 0x00000026, 0x00000202, 0x00000188, 0x000001e5, 0x00050094, 0x00000026, 0x00000203, 0x0000018b, + 0x000001e9, 0x00050081, 0x00000026, 0x00000204, 0x00000202, 0x00000203, 0x00070050, 0x0000002a, + 0x00000205, 0x000001fe, 0x00000201, 0x00000204, 0x000001fb, 0x0004003d, 0x0000001c, 0x00000206, + 0x0000000c, 0x00040064, 0x0000001b, 0x00000207, 0x00000206, 0x0007005f, 0x0000002c, 0x00000208, + 0x00000207, 0x000001ec, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x00000209, 0x00000208, + 0x00050094, 0x00000026, 0x0000020a, 0x00000185, 0x00000209, 0x00050081, 0x00000026, 0x0000020b, + 0x000001f0, 0x0000020a, 0x00060052, 0x0000002a, 0x0000020c, 0x0000020b, 0x000001fa, 0x00000003, + 0x00050094, 0x00000026, 0x0000020d, 0x0000018b, 0x00000209, 0x00050081, 0x00000026, 0x0000020e, + 0x000001fb, 0x0000020d, 0x00060052, 0x0000002a, 0x0000020f, 0x0000020e, 0x00000205, 0x00000003, + 0x0004003d, 0x0000001c, 0x00000210, 0x0000000b, 0x00050080, 0x00000011, 0x00000211, 0x00000129, + 0x00000025, 0x0008000c, 0x00000011, 0x00000212, 0x00000001, 0x0000002d, 0x00000211, 0x00000032, + 0x0000013d, 0x00040064, 0x0000001b, 0x00000213, 0x00000210, 0x0007005f, 0x0000002c, 0x00000214, + 0x00000213, 0x00000212, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x00000215, 0x00000214, + 0x0004003d, 0x0000001c, 0x00000216, 0x0000000c, 0x00040064, 0x0000001b, 0x00000217, 0x00000216, + 0x0007005f, 0x0000002c, 0x00000218, 0x00000217, 0x00000212, 0x00000002, 0x0000001e, 0x00040073, + 0x0000002a, 0x00000219, 0x00000218, 0x0004003d, 0x0000001c, 0x0000021a, 0x0000000b, 0x00070058, + 0x0000002c, 0x0000021b, 0x0000021a, 0x0000015d, 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, + 0x0000021c, 0x0000021b, 0x0004003d, 0x0000001c, 0x0000021d, 0x0000000c, 0x00070058, 0x0000002c, + 0x0000021e, 0x0000021d, 0x0000015d, 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, 0x0000021f, + 0x0000021e, 0x0004003d, 0x0000001c, 0x00000220, 0x0000000b, 0x00070058, 0x0000002c, 0x00000221, + 0x00000220, 0x0000017f, 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, 0x00000222, 0x00000221, + 0x0004003d, 0x0000001c, 0x00000223, 0x0000000c, 0x00070058, 0x0000002c, 0x00000224, 0x00000223, + 0x0000017f, 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, 0x00000225, 0x00000224, 0x0004003d, + 0x0000001c, 0x00000226, 0x00000005, 0x00040064, 0x0000001b, 0x00000227, 0x00000226, 0x0007005f, + 0x0000002c, 0x00000228, 0x00000227, 0x0000018e, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, + 0x00000229, 0x00000228, 0x0004003d, 0x0000001c, 0x0000022a, 0x0000000a, 0x00040064, 0x0000001b, + 0x0000022b, 0x0000022a, 0x0007005f, 0x0000002c, 0x0000022c, 0x0000022b, 0x0000018e, 0x00000002, + 0x0000001e, 0x00040073, 0x0000002a, 0x0000022d, 0x0000022c, 0x0004003d, 0x0000001c, 0x0000022e, + 0x00000005, 0x00040064, 0x0000001b, 0x0000022f, 0x0000022e, 0x0007005f, 0x0000002c, 0x00000230, + 0x0000022f, 0x00000198, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x00000231, 0x00000230, + 0x0004003d, 0x0000001c, 0x00000232, 0x0000000a, 0x00040064, 0x0000001b, 0x00000233, 0x00000232, + 0x0007005f, 0x0000002c, 0x00000234, 0x00000233, 0x00000198, 0x00000002, 0x0000001e, 0x00040073, + 0x0000002a, 0x00000235, 0x00000234, 0x0004003d, 0x0000001c, 0x00000236, 0x00000005, 0x00040064, + 0x0000001b, 0x00000237, 0x00000236, 0x0007005f, 0x0000002c, 0x00000238, 0x00000237, 0x000001a2, + 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x00000239, 0x00000238, 0x0004003d, 0x0000001c, + 0x0000023a, 0x0000000a, 0x00040064, 0x0000001b, 0x0000023b, 0x0000023a, 0x0007005f, 0x0000002c, + 0x0000023c, 0x0000023b, 0x000001a2, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x0000023d, + 0x0000023c, 0x0004003d, 0x0000001c, 0x0000023e, 0x00000005, 0x00040064, 0x0000001b, 0x0000023f, + 0x0000023e, 0x0007005f, 0x0000002c, 0x00000240, 0x0000023f, 0x000001ac, 0x00000002, 0x0000001e, + 0x00040073, 0x0000002a, 0x00000241, 0x00000240, 0x00050094, 0x00000026, 0x00000242, 0x0000021c, + 0x00000241, 0x00050094, 0x00000026, 0x00000243, 0x0000021c, 0x00000229, 0x00050094, 0x00000026, + 0x00000244, 0x0000021f, 0x0000022d, 0x00050081, 0x00000026, 0x00000245, 0x00000243, 0x00000244, + 0x00050094, 0x00000026, 0x00000246, 0x0000021c, 0x00000231, 0x00050094, 0x00000026, 0x00000247, + 0x0000021f, 0x00000235, 0x00050081, 0x00000026, 0x00000248, 0x00000246, 0x00000247, 0x00050094, + 0x00000026, 0x00000249, 0x0000021c, 0x00000239, 0x00050094, 0x00000026, 0x0000024a, 0x0000021f, + 0x0000023d, 0x00050081, 0x00000026, 0x0000024b, 0x00000249, 0x0000024a, 0x00070050, 0x0000002a, + 0x0000024c, 0x00000245, 0x00000248, 0x0000024b, 0x00000242, 0x00050094, 0x00000026, 0x0000024d, + 0x00000222, 0x00000241, 0x00050094, 0x00000026, 0x0000024e, 0x00000222, 0x00000229, 0x00050094, + 0x00000026, 0x0000024f, 0x00000225, 0x0000022d, 0x00050081, 0x00000026, 0x00000250, 0x0000024e, + 0x0000024f, 0x00050094, 0x00000026, 0x00000251, 0x00000222, 0x00000231, 0x00050094, 0x00000026, + 0x00000252, 0x00000225, 0x00000235, 0x00050081, 0x00000026, 0x00000253, 0x00000251, 0x00000252, + 0x00050094, 0x00000026, 0x00000254, 0x00000222, 0x00000239, 0x00050094, 0x00000026, 0x00000255, + 0x00000225, 0x0000023d, 0x00050081, 0x00000026, 0x00000256, 0x00000254, 0x00000255, 0x00070050, + 0x0000002a, 0x00000257, 0x00000250, 0x00000253, 0x00000256, 0x0000024d, 0x0004003d, 0x0000001c, + 0x00000258, 0x0000000a, 0x00040064, 0x0000001b, 0x00000259, 0x00000258, 0x0007005f, 0x0000002c, + 0x0000025a, 0x00000259, 0x000001ac, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x0000025b, + 0x0000025a, 0x00050094, 0x00000026, 0x0000025c, 0x0000021f, 0x0000025b, 0x00050081, 0x00000026, + 0x0000025d, 0x00000242, 0x0000025c, 0x00060052, 0x0000002a, 0x0000025e, 0x0000025d, 0x0000024c, + 0x00000003, 0x00050094, 0x00000026, 0x0000025f, 0x00000225, 0x0000025b, 0x00050081, 0x00000026, + 0x00000260, 0x0000024d, 0x0000025f, 0x00060052, 0x0000002a, 0x00000261, 0x00000260, 0x00000257, + 0x00000003, 0x0004003d, 0x0000001c, 0x00000262, 0x00000005, 0x00070058, 0x0000002c, 0x00000263, + 0x00000262, 0x0000013b, 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, 0x00000264, 0x00000263, + 0x0004003d, 0x0000001c, 0x00000265, 0x0000000a, 0x00070058, 0x0000002c, 0x00000266, 0x00000265, + 0x0000013b, 0x00000002, 0x0000002b, 0x00040073, 0x0000002a, 0x00000267, 0x00000266, 0x0004003d, + 0x0000001c, 0x00000268, 0x00000005, 0x00040064, 0x0000001b, 0x00000269, 0x00000268, 0x0007005f, + 0x0000002c, 0x0000026a, 0x00000269, 0x000001d8, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, + 0x0000026b, 0x0000026a, 0x0004003d, 0x0000001c, 0x0000026c, 0x0000000a, 0x00040064, 0x0000001b, + 0x0000026d, 0x0000026c, 0x0007005f, 0x0000002c, 0x0000026e, 0x0000026d, 0x000001d8, 0x00000002, + 0x0000001e, 0x00040073, 0x0000002a, 0x0000026f, 0x0000026e, 0x0004003d, 0x0000001c, 0x00000270, + 0x00000005, 0x00040064, 0x0000001b, 0x00000271, 0x00000270, 0x0007005f, 0x0000002c, 0x00000272, + 0x00000271, 0x000001e2, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x00000273, 0x00000272, + 0x0004003d, 0x0000001c, 0x00000274, 0x0000000a, 0x00040064, 0x0000001b, 0x00000275, 0x00000274, + 0x0007005f, 0x0000002c, 0x00000276, 0x00000275, 0x000001e2, 0x00000002, 0x0000001e, 0x00040073, + 0x0000002a, 0x00000277, 0x00000276, 0x0004003d, 0x0000001c, 0x00000278, 0x00000005, 0x00040064, + 0x0000001b, 0x00000279, 0x00000278, 0x0007005f, 0x0000002c, 0x0000027a, 0x00000279, 0x000001ec, + 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x0000027b, 0x0000027a, 0x00050094, 0x00000026, + 0x0000027c, 0x0000021c, 0x0000027b, 0x00050094, 0x00000026, 0x0000027d, 0x0000021c, 0x00000264, + 0x00050094, 0x00000026, 0x0000027e, 0x0000021f, 0x00000267, 0x00050081, 0x00000026, 0x0000027f, + 0x0000027d, 0x0000027e, 0x00050094, 0x00000026, 0x00000280, 0x0000021c, 0x0000026b, 0x00050094, + 0x00000026, 0x00000281, 0x0000021f, 0x0000026f, 0x00050081, 0x00000026, 0x00000282, 0x00000280, + 0x00000281, 0x00050094, 0x00000026, 0x00000283, 0x0000021c, 0x00000273, 0x00050094, 0x00000026, + 0x00000284, 0x0000021f, 0x00000277, 0x00050081, 0x00000026, 0x00000285, 0x00000283, 0x00000284, + 0x00070050, 0x0000002a, 0x00000286, 0x0000027f, 0x00000282, 0x00000285, 0x0000027c, 0x00050094, + 0x00000026, 0x00000287, 0x00000222, 0x0000027b, 0x00050094, 0x00000026, 0x00000288, 0x00000222, + 0x00000264, 0x00050094, 0x00000026, 0x00000289, 0x00000225, 0x00000267, 0x00050081, 0x00000026, + 0x0000028a, 0x00000288, 0x00000289, 0x00050094, 0x00000026, 0x0000028b, 0x00000222, 0x0000026b, + 0x00050094, 0x00000026, 0x0000028c, 0x00000225, 0x0000026f, 0x00050081, 0x00000026, 0x0000028d, + 0x0000028b, 0x0000028c, 0x00050094, 0x00000026, 0x0000028e, 0x00000222, 0x00000273, 0x00050094, + 0x00000026, 0x0000028f, 0x00000225, 0x00000277, 0x00050081, 0x00000026, 0x00000290, 0x0000028e, + 0x0000028f, 0x00070050, 0x0000002a, 0x00000291, 0x0000028a, 0x0000028d, 0x00000290, 0x00000287, + 0x0004003d, 0x0000001c, 0x00000292, 0x0000000a, 0x00040064, 0x0000001b, 0x00000293, 0x00000292, + 0x0007005f, 0x0000002c, 0x00000294, 0x00000293, 0x000001ec, 0x00000002, 0x0000001e, 0x00040073, + 0x0000002a, 0x00000295, 0x00000294, 0x00050094, 0x00000026, 0x00000296, 0x0000021f, 0x00000295, + 0x00050081, 0x00000026, 0x00000297, 0x0000027c, 0x00000296, 0x00060052, 0x0000002a, 0x00000298, + 0x00000297, 0x00000286, 0x00000003, 0x00050094, 0x00000026, 0x00000299, 0x00000225, 0x00000295, + 0x00050081, 0x00000026, 0x0000029a, 0x00000287, 0x00000299, 0x00060052, 0x0000002a, 0x0000029b, + 0x0000029a, 0x00000291, 0x00000003, 0x0004003d, 0x0000001c, 0x0000029c, 0x00000005, 0x00040064, + 0x0000001b, 0x0000029d, 0x0000029c, 0x0007005f, 0x0000002c, 0x0000029e, 0x0000029d, 0x00000212, + 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x0000029f, 0x0000029e, 0x0004003d, 0x0000001c, + 0x000002a0, 0x0000000a, 0x00040064, 0x0000001b, 0x000002a1, 0x000002a0, 0x0007005f, 0x0000002c, + 0x000002a2, 0x000002a1, 0x00000212, 0x00000002, 0x0000001e, 0x00040073, 0x0000002a, 0x000002a3, + 0x000002a2, 0x00050083, 0x0000002a, 0x000002a4, 0x000001cc, 0x00000053, 0x00050085, 0x0000002a, + 0x000002a5, 0x000002a4, 0x00000058, 0x00050081, 0x0000002a, 0x000002a6, 0x000002a5, 0x0000005d, + 0x0008000c, 0x0000002a, 0x000002a7, 0x00000001, 0x0000002b, 0x000002a6, 0x0000002f, 0x0000005f, + 0x00050091, 0x0000002a, 0x000002a8, 0x0000004e, 0x000002a7, 0x00050083, 0x0000002a, 0x000002a9, + 0x0000020c, 0x00000077, 0x00050085, 0x0000002a, 0x000002aa, 0x000002a9, 0x0000007c, 0x00050081, + 0x0000002a, 0x000002ab, 0x000002aa, 0x00000081, 0x0008000c, 0x0000002a, 0x000002ac, 0x00000001, + 0x0000002b, 0x000002ab, 0x0000002f, 0x0000005f, 0x00050091, 0x0000002a, 0x000002ad, 0x00000074, + 0x000002ac, 0x00050081, 0x0000002a, 0x000002ae, 0x000002a8, 0x000002ad, 0x00050094, 0x00000026, + 0x000002af, 0x00000182, 0x00000215, 0x00050094, 0x00000026, 0x000002b0, 0x00000185, 0x00000219, + 0x00050081, 0x00000026, 0x000002b1, 0x000002af, 0x000002b0, 0x00050083, 0x00000026, 0x000002b2, + 0x000002b1, 0x00000086, 0x00050085, 0x00000026, 0x000002b3, 0x000002b2, 0x00000087, 0x00050081, + 0x00000026, 0x000002b4, 0x000002b3, 0x00000088, 0x0008000c, 0x00000026, 0x000002b5, 0x00000001, + 0x0000002b, 0x000002b4, 0x0000002e, 0x0000005e, 0x0005008e, 0x0000002a, 0x000002b6, 0x00000085, + 0x000002b5, 0x00050081, 0x0000002a, 0x000002b7, 0x000002ae, 0x000002b6, 0x00050083, 0x0000002a, + 0x000002b8, 0x0000025e, 0x00000053, 0x00050085, 0x0000002a, 0x000002b9, 0x000002b8, 0x00000058, + 0x00050081, 0x0000002a, 0x000002ba, 0x000002b9, 0x0000005d, 0x0008000c, 0x0000002a, 0x000002bb, + 0x00000001, 0x0000002b, 0x000002ba, 0x0000002f, 0x0000005f, 0x00050091, 0x0000002a, 0x000002bc, + 0x0000009d, 0x000002bb, 0x00050081, 0x0000002a, 0x000002bd, 0x000002b7, 0x000002bc, 0x00050083, + 0x0000002a, 0x000002be, 0x00000298, 0x00000077, 0x00050085, 0x0000002a, 0x000002bf, 0x000002be, + 0x0000007c, 0x00050081, 0x0000002a, 0x000002c0, 0x000002bf, 0x00000081, 0x0008000c, 0x0000002a, + 0x000002c1, 0x00000001, 0x0000002b, 0x000002c0, 0x0000002f, 0x0000005f, 0x00050091, 0x0000002a, + 0x000002c2, 0x000000b2, 0x000002c1, 0x00050081, 0x0000002a, 0x000002c3, 0x000002bd, 0x000002c2, + 0x00050094, 0x00000026, 0x000002c4, 0x0000021c, 0x0000029f, 0x00050094, 0x00000026, 0x000002c5, + 0x0000021f, 0x000002a3, 0x00050081, 0x00000026, 0x000002c6, 0x000002c4, 0x000002c5, 0x00050083, + 0x00000026, 0x000002c7, 0x000002c6, 0x00000086, 0x00050085, 0x00000026, 0x000002c8, 0x000002c7, + 0x00000087, 0x00050081, 0x00000026, 0x000002c9, 0x000002c8, 0x00000088, 0x0008000c, 0x00000026, + 0x000002ca, 0x00000001, 0x0000002b, 0x000002c9, 0x0000002e, 0x0000005e, 0x0005008e, 0x0000002a, + 0x000002cb, 0x000000b7, 0x000002ca, 0x00050081, 0x0000002a, 0x000002cc, 0x000002c3, 0x000002cb, + 0x00050083, 0x0000002a, 0x000002cd, 0x000001cf, 0x00000053, 0x00050085, 0x0000002a, 0x000002ce, + 0x000002cd, 0x00000058, 0x00050081, 0x0000002a, 0x000002cf, 0x000002ce, 0x0000005d, 0x0008000c, + 0x0000002a, 0x000002d0, 0x00000001, 0x0000002b, 0x000002cf, 0x0000002f, 0x0000005f, 0x00050091, + 0x0000002a, 0x000002d1, 0x000000cc, 0x000002d0, 0x00050081, 0x0000002a, 0x000002d2, 0x000002cc, + 0x000002d1, 0x00050083, 0x0000002a, 0x000002d3, 0x0000020f, 0x00000077, 0x00050085, 0x0000002a, + 0x000002d4, 0x000002d3, 0x0000007c, 0x00050081, 0x0000002a, 0x000002d5, 0x000002d4, 0x00000081, + 0x0008000c, 0x0000002a, 0x000002d6, 0x00000001, 0x0000002b, 0x000002d5, 0x0000002f, 0x0000005f, + 0x00050091, 0x0000002a, 0x000002d7, 0x000000e1, 0x000002d6, 0x00050081, 0x0000002a, 0x000002d8, + 0x000002d2, 0x000002d7, 0x00050094, 0x00000026, 0x000002d9, 0x00000188, 0x00000215, 0x00050094, + 0x00000026, 0x000002da, 0x0000018b, 0x00000219, 0x00050081, 0x00000026, 0x000002db, 0x000002d9, + 0x000002da, 0x00050083, 0x00000026, 0x000002dc, 0x000002db, 0x00000086, 0x00050085, 0x00000026, + 0x000002dd, 0x000002dc, 0x00000087, 0x00050081, 0x00000026, 0x000002de, 0x000002dd, 0x00000088, + 0x0008000c, 0x00000026, 0x000002df, 0x00000001, 0x0000002b, 0x000002de, 0x0000002e, 0x0000005e, + 0x0005008e, 0x0000002a, 0x000002e0, 0x000000e6, 0x000002df, 0x00050081, 0x0000002a, 0x000002e1, + 0x000002d8, 0x000002e0, 0x0004003d, 0x00000017, 0x000002e2, 0x00000004, 0x00050083, 0x0000002a, + 0x000002e3, 0x00000261, 0x00000053, 0x00050085, 0x0000002a, 0x000002e4, 0x000002e3, 0x00000058, + 0x00050081, 0x0000002a, 0x000002e5, 0x000002e4, 0x0000005d, 0x0008000c, 0x0000002a, 0x000002e6, + 0x00000001, 0x0000002b, 0x000002e5, 0x0000002f, 0x0000005f, 0x00050091, 0x0000002a, 0x000002e7, + 0x000000fa, 0x000002e6, 0x00050081, 0x0000002a, 0x000002e8, 0x000002e1, 0x000002e7, 0x00050083, + 0x0000002a, 0x000002e9, 0x0000029b, 0x00000077, 0x00050085, 0x0000002a, 0x000002ea, 0x000002e9, + 0x0000007c, 0x00050081, 0x0000002a, 0x000002eb, 0x000002ea, 0x00000081, 0x0008000c, 0x0000002a, + 0x000002ec, 0x00000001, 0x0000002b, 0x000002eb, 0x0000002f, 0x0000005f, 0x00050091, 0x0000002a, + 0x000002ed, 0x0000010f, 0x000002ec, 0x00050081, 0x0000002a, 0x000002ee, 0x000002e8, 0x000002ed, + 0x00050094, 0x00000026, 0x000002ef, 0x00000222, 0x0000029f, 0x00050094, 0x00000026, 0x000002f0, + 0x00000225, 0x000002a3, 0x00050081, 0x00000026, 0x000002f1, 0x000002ef, 0x000002f0, 0x00050083, + 0x00000026, 0x000002f2, 0x000002f1, 0x00000086, 0x00050085, 0x00000026, 0x000002f3, 0x000002f2, + 0x00000087, 0x00050081, 0x00000026, 0x000002f4, 0x000002f3, 0x00000088, 0x0008000c, 0x00000026, + 0x000002f5, 0x00000001, 0x0000002b, 0x000002f4, 0x0000002e, 0x0000005e, 0x0005008e, 0x0000002a, + 0x000002f6, 0x00000114, 0x000002f5, 0x00050081, 0x0000002a, 0x000002f7, 0x000002ee, 0x000002f6, + 0x00050083, 0x0000002a, 0x000002f8, 0x000002f7, 0x00000118, 0x00050085, 0x0000002a, 0x000002f9, + 0x000002f8, 0x0000011d, 0x00050081, 0x0000002a, 0x000002fa, 0x000002f9, 0x00000122, 0x00040073, + 0x0000002c, 0x000002fb, 0x000002fa, 0x00040063, 0x000002e2, 0x00000129, 0x000002fb, 0x000200f9, + 0x00000125, 0x000200f8, 0x00000125, 0x000100fd, 0x00010038, +}; +static const size_t wnfg_25_spv_size = sizeof(wnfg_25_spv); diff --git a/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_27_spv.h b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_27_spv.h new file mode 100644 index 000000000..1b1976b69 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_27_spv.h @@ -0,0 +1,280 @@ +#pragma once +#include +#include + +static const uint32_t wnfg_27_spv[] = { + 0x07230203, 0x00010000, 0x000d000b, 0x000001b4, 0x00000000, 0x00020011, 0x00000001, 0x00020011, + 0x00000009, 0x00020011, 0x00000032, 0x0006000b, 0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e, + 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0006000f, 0x00000005, 0x00000002, 0x6e69616d, + 0x00000000, 0x00000003, 0x00060010, 0x00000002, 0x00000011, 0x00000010, 0x00000010, 0x00000001, + 0x00040047, 0x00000003, 0x0000000b, 0x0000001c, 0x00030047, 0x00000004, 0x00000019, 0x00040047, + 0x00000004, 0x00000021, 0x00000030, 0x00040047, 0x00000004, 0x00000022, 0x00000000, 0x00040047, + 0x00000005, 0x00000021, 0x00000020, 0x00040047, 0x00000005, 0x00000022, 0x00000000, 0x00040047, + 0x00000006, 0x0000000b, 0x00000019, 0x00020013, 0x00000007, 0x00030021, 0x00000008, 0x00000007, + 0x00040015, 0x00000009, 0x00000020, 0x00000001, 0x00040017, 0x0000000a, 0x00000009, 0x00000002, + 0x00040015, 0x0000000b, 0x00000020, 0x00000000, 0x00040017, 0x0000000c, 0x0000000b, 0x00000003, + 0x00040020, 0x0000000d, 0x00000001, 0x0000000c, 0x0004003b, 0x0000000d, 0x00000003, 0x00000001, + 0x00040017, 0x0000000e, 0x0000000b, 0x00000002, 0x00030016, 0x0000000f, 0x00000020, 0x00090019, + 0x00000010, 0x0000000f, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000004, + 0x00040020, 0x00000011, 0x00000000, 0x00000010, 0x0004003b, 0x00000011, 0x00000004, 0x00000000, + 0x00020014, 0x00000012, 0x00040017, 0x00000013, 0x00000012, 0x00000002, 0x00090019, 0x00000014, + 0x0000000f, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x0003001b, + 0x00000015, 0x00000014, 0x00040020, 0x00000016, 0x00000000, 0x00000015, 0x0004003b, 0x00000016, + 0x00000005, 0x00000000, 0x0004002b, 0x00000009, 0x00000017, 0x00000000, 0x0004002b, 0x0000000b, + 0x00000018, 0x00000000, 0x0004002b, 0x0000000b, 0x00000019, 0x00000001, 0x00030016, 0x0000001a, + 0x00000010, 0x00040017, 0x0000001b, 0x0000001a, 0x00000004, 0x00040018, 0x0000001c, 0x0000001b, + 0x00000004, 0x0004002b, 0x0000001a, 0x0000001d, 0x0000b454, 0x0004002b, 0x0000001a, 0x0000001e, + 0x0000b0d8, 0x0004002b, 0x0000001a, 0x0000001f, 0x0000af37, 0x0004002b, 0x0000001a, 0x00000020, + 0x00003506, 0x0007002c, 0x0000001b, 0x00000021, 0x0000001d, 0x0000001e, 0x0000001f, 0x00000020, + 0x0004002b, 0x0000001a, 0x00000022, 0x0000382a, 0x0004002b, 0x0000001a, 0x00000023, 0x0000a1f4, + 0x0004002b, 0x0000001a, 0x00000024, 0x00003004, 0x0004002b, 0x0000001a, 0x00000025, 0x0000b48b, + 0x0007002c, 0x0000001b, 0x00000026, 0x00000022, 0x00000023, 0x00000024, 0x00000025, 0x0004002b, + 0x0000001a, 0x00000027, 0x000037d8, 0x0004002b, 0x0000001a, 0x00000028, 0x00002fcb, 0x0004002b, + 0x0000001a, 0x00000029, 0x0000b03f, 0x0004002b, 0x0000001a, 0x0000002a, 0x0000b661, 0x0007002c, + 0x0000001b, 0x0000002b, 0x00000027, 0x00000028, 0x00000029, 0x0000002a, 0x0004002b, 0x0000001a, + 0x0000002c, 0x00003aaa, 0x0004002b, 0x0000001a, 0x0000002d, 0x0000349c, 0x0004002b, 0x0000001a, + 0x0000002e, 0x0000b81a, 0x0004002b, 0x0000001a, 0x0000002f, 0x0000b5d7, 0x0007002c, 0x0000001b, + 0x00000030, 0x0000002c, 0x0000002d, 0x0000002e, 0x0000002f, 0x0007002c, 0x0000001c, 0x00000031, + 0x00000021, 0x00000026, 0x0000002b, 0x00000030, 0x0004002b, 0x00000009, 0x00000032, 0xffffffff, + 0x0005002c, 0x0000000a, 0x00000033, 0x00000032, 0x00000032, 0x0005002c, 0x0000000a, 0x00000034, + 0x00000017, 0x00000017, 0x00040017, 0x00000035, 0x0000000f, 0x00000004, 0x0004002b, 0x0000000f, + 0x00000036, 0x00000000, 0x0007002c, 0x00000035, 0x00000037, 0x00000036, 0x00000036, 0x00000036, + 0x00000036, 0x0004002b, 0x0000001a, 0x00000038, 0x0000b69c, 0x0004002b, 0x0000001a, 0x00000039, + 0x0000b60c, 0x0004002b, 0x0000001a, 0x0000003a, 0x0000a9ed, 0x0004002b, 0x0000001a, 0x0000003b, + 0x000034a0, 0x0007002c, 0x0000001b, 0x0000003c, 0x00000038, 0x00000039, 0x0000003a, 0x0000003b, + 0x0004002b, 0x0000001a, 0x0000003d, 0x0000369a, 0x0004002b, 0x0000001a, 0x0000003e, 0x0000ac7a, + 0x0004002b, 0x0000001a, 0x0000003f, 0x00002e5d, 0x0004002b, 0x0000001a, 0x00000040, 0x0000b697, + 0x0007002c, 0x0000001b, 0x00000041, 0x0000003d, 0x0000003e, 0x0000003f, 0x00000040, 0x0004002b, + 0x0000001a, 0x00000042, 0x000030ad, 0x0004002b, 0x0000001a, 0x00000043, 0x00009b14, 0x0004002b, + 0x0000001a, 0x00000044, 0x0000b168, 0x0004002b, 0x0000001a, 0x00000045, 0x0000b74d, 0x0007002c, + 0x0000001b, 0x00000046, 0x00000042, 0x00000043, 0x00000044, 0x00000045, 0x0004002b, 0x0000001a, + 0x00000047, 0x00002e8c, 0x0004002b, 0x0000001a, 0x00000048, 0x00002d8c, 0x0004002b, 0x0000001a, + 0x00000049, 0x0000b8ab, 0x0004002b, 0x0000001a, 0x0000004a, 0x0000b694, 0x0007002c, 0x0000001b, + 0x0000004b, 0x00000047, 0x00000048, 0x00000049, 0x0000004a, 0x0007002c, 0x0000001c, 0x0000004c, + 0x0000003c, 0x00000041, 0x00000046, 0x0000004b, 0x0005002c, 0x0000000a, 0x0000004d, 0x00000032, + 0x00000017, 0x0004002b, 0x0000001a, 0x0000004e, 0x0000b197, 0x0004002b, 0x0000001a, 0x0000004f, + 0x0000bbe3, 0x0004002b, 0x0000001a, 0x00000050, 0x000038a7, 0x0004002b, 0x0000001a, 0x00000051, + 0x00003951, 0x0007002c, 0x0000001b, 0x00000052, 0x0000004e, 0x0000004f, 0x00000050, 0x00000051, + 0x0004002b, 0x0000001a, 0x00000053, 0x0000a4ed, 0x0004002b, 0x0000001a, 0x00000054, 0x00003480, + 0x0004002b, 0x0000001a, 0x00000055, 0x0000ba6f, 0x0004002b, 0x0000001a, 0x00000056, 0x0000ba8d, + 0x0007002c, 0x0000001b, 0x00000057, 0x00000053, 0x00000054, 0x00000055, 0x00000056, 0x0004002b, + 0x0000001a, 0x00000058, 0x0000b550, 0x0004002b, 0x0000001a, 0x00000059, 0x00001d57, 0x0004002b, + 0x0000001a, 0x0000005a, 0x0000b7f6, 0x0004002b, 0x0000001a, 0x0000005b, 0x0000b870, 0x0007002c, + 0x0000001b, 0x0000005c, 0x00000058, 0x00000059, 0x0000005a, 0x0000005b, 0x0004002b, 0x0000001a, + 0x0000005d, 0x0000b4c6, 0x0004002b, 0x0000001a, 0x0000005e, 0x0000375f, 0x0004002b, 0x0000001a, + 0x0000005f, 0x0000bb4c, 0x0004002b, 0x0000001a, 0x00000060, 0x0000ba66, 0x0007002c, 0x0000001b, + 0x00000061, 0x0000005d, 0x0000005e, 0x0000005f, 0x00000060, 0x0007002c, 0x0000001c, 0x00000062, + 0x00000052, 0x00000057, 0x0000005c, 0x00000061, 0x0004002b, 0x00000009, 0x00000063, 0x00000001, + 0x0005002c, 0x0000000a, 0x00000064, 0x00000032, 0x00000063, 0x0004002b, 0x0000001a, 0x00000065, + 0x00003134, 0x0004002b, 0x0000001a, 0x00000066, 0x00002dca, 0x0004002b, 0x0000001a, 0x00000067, + 0x000031c1, 0x0004002b, 0x0000001a, 0x00000068, 0x0000b13d, 0x0007002c, 0x0000001b, 0x00000069, + 0x00000065, 0x00000066, 0x00000067, 0x00000068, 0x0004002b, 0x0000001a, 0x0000006a, 0x000035be, + 0x0004002b, 0x0000001a, 0x0000006b, 0x00002b20, 0x0004002b, 0x0000001a, 0x0000006c, 0x0000aacc, + 0x0004002b, 0x0000001a, 0x0000006d, 0x0000b603, 0x0007002c, 0x0000001b, 0x0000006e, 0x0000006a, + 0x0000006b, 0x0000006c, 0x0000006d, 0x0004002b, 0x0000001a, 0x0000006f, 0x00003311, 0x0004002b, + 0x0000001a, 0x00000070, 0x000036bf, 0x0004002b, 0x0000001a, 0x00000071, 0x0000b0ca, 0x0004002b, + 0x0000001a, 0x00000072, 0x0000ae8e, 0x0007002c, 0x0000001b, 0x00000073, 0x0000006f, 0x00000070, + 0x00000071, 0x00000072, 0x0004002b, 0x0000001a, 0x00000074, 0x00002b9b, 0x0004002b, 0x0000001a, + 0x00000075, 0x00002f95, 0x0004002b, 0x0000001a, 0x00000076, 0x0000bb24, 0x0004002b, 0x0000001a, + 0x00000077, 0x0000b4d9, 0x0007002c, 0x0000001b, 0x00000078, 0x00000074, 0x00000075, 0x00000076, + 0x00000077, 0x0007002c, 0x0000001c, 0x00000079, 0x00000069, 0x0000006e, 0x00000073, 0x00000078, + 0x0005002c, 0x0000000a, 0x0000007a, 0x00000017, 0x00000032, 0x0004002b, 0x0000001a, 0x0000007b, + 0x000020d1, 0x0004002b, 0x0000001a, 0x0000007c, 0x00003223, 0x0004002b, 0x0000001a, 0x0000007d, + 0x0000b48d, 0x0004002b, 0x0000001a, 0x0000007e, 0x0000aa9f, 0x0007002c, 0x0000001b, 0x0000007f, + 0x0000007b, 0x0000007c, 0x0000007d, 0x0000007e, 0x0004002b, 0x0000001a, 0x00000080, 0x00003282, + 0x0004002b, 0x0000001a, 0x00000081, 0x000031c4, 0x0004002b, 0x0000001a, 0x00000082, 0x0000a311, + 0x0004002b, 0x0000001a, 0x00000083, 0x0000b6d0, 0x0007002c, 0x0000001b, 0x00000084, 0x00000080, + 0x00000081, 0x00000082, 0x00000083, 0x0004002b, 0x0000001a, 0x00000085, 0x0000a0ef, 0x0004002b, + 0x0000001a, 0x00000086, 0x00003576, 0x0004002b, 0x0000001a, 0x00000087, 0x0000b487, 0x0004002b, + 0x0000001a, 0x00000088, 0x0000aebc, 0x0007002c, 0x0000001b, 0x00000089, 0x00000085, 0x00000086, + 0x00000087, 0x00000088, 0x0004002b, 0x0000001a, 0x0000008a, 0x0000a8f5, 0x0004002b, 0x0000001a, + 0x0000008b, 0x0000a422, 0x0004002b, 0x0000001a, 0x0000008c, 0x0000b586, 0x0004002b, 0x0000001a, + 0x0000008d, 0x0000b6dc, 0x0007002c, 0x0000001b, 0x0000008e, 0x0000008a, 0x0000008b, 0x0000008c, + 0x0000008d, 0x0007002c, 0x0000001c, 0x0000008f, 0x0000007f, 0x00000084, 0x00000089, 0x0000008e, + 0x0004002b, 0x0000001a, 0x00000090, 0x0000340b, 0x0004002b, 0x0000001a, 0x00000091, 0x0000b928, + 0x0004002b, 0x0000001a, 0x00000092, 0x0000367c, 0x0004002b, 0x0000001a, 0x00000093, 0x0000b35c, + 0x0007002c, 0x0000001b, 0x00000094, 0x00000090, 0x00000091, 0x00000092, 0x00000093, 0x0004002b, + 0x0000001a, 0x00000095, 0x0000b55f, 0x0004002b, 0x0000001a, 0x00000096, 0x00003033, 0x0004002b, + 0x0000001a, 0x00000097, 0x0000b391, 0x0004002b, 0x0000001a, 0x00000098, 0x0000b4ee, 0x0007002c, + 0x0000001b, 0x00000099, 0x00000095, 0x00000096, 0x00000097, 0x00000098, 0x0004002b, 0x0000001a, + 0x0000009a, 0x0000b4fb, 0x0004002b, 0x0000001a, 0x0000009b, 0x00001af0, 0x0004002b, 0x0000001a, + 0x0000009c, 0x0000b356, 0x0004002b, 0x0000001a, 0x0000009d, 0x0000b012, 0x0007002c, 0x0000001b, + 0x0000009e, 0x0000009a, 0x0000009b, 0x0000009c, 0x0000009d, 0x0004002b, 0x0000001a, 0x0000009f, + 0x0000b7d5, 0x0004002b, 0x0000001a, 0x000000a0, 0x00002f4c, 0x0004002b, 0x0000001a, 0x000000a1, + 0x0000b99a, 0x0004002b, 0x0000001a, 0x000000a2, 0x000023e2, 0x0007002c, 0x0000001b, 0x000000a3, + 0x0000009f, 0x000000a0, 0x000000a1, 0x000000a2, 0x0007002c, 0x0000001c, 0x000000a4, 0x00000094, + 0x00000099, 0x0000009e, 0x000000a3, 0x0005002c, 0x0000000a, 0x000000a5, 0x00000017, 0x00000063, + 0x0004002b, 0x0000001a, 0x000000a6, 0x0000350a, 0x0004002b, 0x0000001a, 0x000000a7, 0x00003a4d, + 0x0004002b, 0x0000001a, 0x000000a8, 0x0000bc6d, 0x0004002b, 0x0000001a, 0x000000a9, 0x0000b2a7, + 0x0007002c, 0x0000001b, 0x000000aa, 0x000000a6, 0x000000a7, 0x000000a8, 0x000000a9, 0x0004002b, + 0x0000001a, 0x000000ab, 0x00002ee7, 0x0004002b, 0x0000001a, 0x000000ac, 0x000033a7, 0x0004002b, + 0x0000001a, 0x000000ad, 0x0000b11a, 0x0004002b, 0x0000001a, 0x000000ae, 0x0000b5ef, 0x0007002c, + 0x0000001b, 0x000000af, 0x000000ab, 0x000000ac, 0x000000ad, 0x000000ae, 0x0004002b, 0x0000001a, + 0x000000b0, 0x0000a546, 0x0004002b, 0x0000001a, 0x000000b1, 0x000039b3, 0x0004002b, 0x0000001a, + 0x000000b2, 0x0000b06d, 0x0004002b, 0x0000001a, 0x000000b3, 0x0000b046, 0x0007002c, 0x0000001b, + 0x000000b4, 0x000000b0, 0x000000b1, 0x000000b2, 0x000000b3, 0x0004002b, 0x0000001a, 0x000000b5, + 0x00002c93, 0x0004002b, 0x0000001a, 0x000000b6, 0x0000386c, 0x0004002b, 0x0000001a, 0x000000b7, + 0x0000b849, 0x0004002b, 0x0000001a, 0x000000b8, 0x0000b85e, 0x0007002c, 0x0000001b, 0x000000b9, + 0x000000b5, 0x000000b6, 0x000000b7, 0x000000b8, 0x0007002c, 0x0000001c, 0x000000ba, 0x000000aa, + 0x000000af, 0x000000b4, 0x000000b9, 0x0005002c, 0x0000000a, 0x000000bb, 0x00000063, 0x00000032, + 0x0004002b, 0x0000001a, 0x000000bc, 0x0000b1ec, 0x0004002b, 0x0000001a, 0x000000bd, 0x000026b8, + 0x0004002b, 0x0000001a, 0x000000be, 0x0000b62a, 0x0004002b, 0x0000001a, 0x000000bf, 0x00009d10, + 0x0007002c, 0x0000001b, 0x000000c0, 0x000000bc, 0x000000bd, 0x000000be, 0x000000bf, 0x0004002b, + 0x0000001a, 0x000000c1, 0x00002b4a, 0x0004002b, 0x0000001a, 0x000000c2, 0x00002f30, 0x0004002b, + 0x0000001a, 0x000000c3, 0x00003390, 0x0004002b, 0x0000001a, 0x000000c4, 0x0000b731, 0x0007002c, + 0x0000001b, 0x000000c5, 0x000000c1, 0x000000c2, 0x000000c3, 0x000000c4, 0x0004002b, 0x0000001a, + 0x000000c6, 0x0000a5e6, 0x0004002b, 0x0000001a, 0x000000c7, 0x000038b3, 0x0004002b, 0x0000001a, + 0x000000c8, 0x00003151, 0x0004002b, 0x0000001a, 0x000000c9, 0x0000b1ae, 0x0007002c, 0x0000001b, + 0x000000ca, 0x000000c6, 0x000000c7, 0x000000c8, 0x000000c9, 0x0004002b, 0x0000001a, 0x000000cb, + 0x0000a975, 0x0004002b, 0x0000001a, 0x000000cc, 0x00003049, 0x0004002b, 0x0000001a, 0x000000cd, + 0x0000af1f, 0x0004002b, 0x0000001a, 0x000000ce, 0x0000b518, 0x0007002c, 0x0000001b, 0x000000cf, + 0x000000cb, 0x000000cc, 0x000000cd, 0x000000ce, 0x0007002c, 0x0000001c, 0x000000d0, 0x000000c0, + 0x000000c5, 0x000000ca, 0x000000cf, 0x0005002c, 0x0000000a, 0x000000d1, 0x00000063, 0x00000017, + 0x0004002b, 0x0000001a, 0x000000d2, 0x0000bac1, 0x0004002b, 0x0000001a, 0x000000d3, 0x0000b7d0, + 0x0004002b, 0x0000001a, 0x000000d4, 0x0000b80f, 0x0004002b, 0x0000001a, 0x000000d5, 0x0000383c, + 0x0007002c, 0x0000001b, 0x000000d6, 0x000000d2, 0x000000d3, 0x000000d4, 0x000000d5, 0x0004002b, + 0x0000001a, 0x000000d7, 0x000038b9, 0x0004002b, 0x0000001a, 0x000000d8, 0x0000b7f2, 0x0004002b, + 0x0000001a, 0x000000d9, 0x0000ab65, 0x0007002c, 0x0000001b, 0x000000da, 0x00000083, 0x000000d7, + 0x000000d8, 0x000000d9, 0x0004002b, 0x0000001a, 0x000000db, 0x0000b699, 0x0004002b, 0x0000001a, + 0x000000dc, 0x00003207, 0x0004002b, 0x0000001a, 0x000000dd, 0x0000294d, 0x0004002b, 0x0000001a, + 0x000000de, 0x0000a782, 0x0007002c, 0x0000001b, 0x000000df, 0x000000db, 0x000000dc, 0x000000dd, + 0x000000de, 0x0004002b, 0x0000001a, 0x000000e0, 0x0000ba05, 0x0004002b, 0x0000001a, 0x000000e1, + 0x0000388c, 0x0004002b, 0x0000001a, 0x000000e2, 0x0000b745, 0x0004002b, 0x0000001a, 0x000000e3, + 0x0000a909, 0x0007002c, 0x0000001b, 0x000000e4, 0x000000e0, 0x000000e1, 0x000000e2, 0x000000e3, + 0x0007002c, 0x0000001c, 0x000000e5, 0x000000d6, 0x000000da, 0x000000df, 0x000000e4, 0x0005002c, + 0x0000000a, 0x000000e6, 0x00000063, 0x00000063, 0x0004002b, 0x0000001a, 0x000000e7, 0x0000b842, + 0x0004002b, 0x0000001a, 0x000000e8, 0x00003f71, 0x0004002b, 0x0000001a, 0x000000e9, 0x0000c1eb, + 0x0004002b, 0x0000001a, 0x000000ea, 0x0000c1a0, 0x0007002c, 0x0000001b, 0x000000eb, 0x000000e7, + 0x000000e8, 0x000000e9, 0x000000ea, 0x0004002b, 0x0000001a, 0x000000ec, 0x00003701, 0x0004002b, + 0x0000001a, 0x000000ed, 0x0000387b, 0x0004002b, 0x0000001a, 0x000000ee, 0x0000302e, 0x0007002c, + 0x0000001b, 0x000000ef, 0x0000005e, 0x000000ec, 0x000000ed, 0x000000ee, 0x0004002b, 0x0000001a, + 0x000000f0, 0x00003719, 0x0004002b, 0x0000001a, 0x000000f1, 0x0000b09c, 0x0004002b, 0x0000001a, + 0x000000f2, 0x0000b4cc, 0x0004002b, 0x0000001a, 0x000000f3, 0x0000396a, 0x0007002c, 0x0000001b, + 0x000000f4, 0x000000f0, 0x000000f1, 0x000000f2, 0x000000f3, 0x0004002b, 0x0000000b, 0x000000f5, + 0x00000010, 0x0006002c, 0x0000000c, 0x00000006, 0x000000f5, 0x000000f5, 0x00000019, 0x00050036, + 0x00000007, 0x00000002, 0x00000000, 0x00000008, 0x000200f8, 0x000000f6, 0x000300f7, 0x000000f7, + 0x00000000, 0x000300fb, 0x00000018, 0x000000f8, 0x000200f8, 0x000000f8, 0x0004003d, 0x0000000c, + 0x000000f9, 0x00000003, 0x0007004f, 0x0000000e, 0x000000fa, 0x000000f9, 0x000000f9, 0x00000000, + 0x00000001, 0x0004007c, 0x0000000a, 0x000000fb, 0x000000fa, 0x0004003d, 0x00000010, 0x000000fc, + 0x00000004, 0x00040068, 0x0000000a, 0x000000fd, 0x000000fc, 0x000500af, 0x00000013, 0x000000fe, + 0x000000fb, 0x000000fd, 0x0004009a, 0x00000012, 0x000000ff, 0x000000fe, 0x000300f7, 0x00000100, + 0x00000000, 0x000400fa, 0x000000ff, 0x00000101, 0x00000100, 0x000200f8, 0x00000101, 0x000200f9, + 0x000000f7, 0x000200f8, 0x00000100, 0x0004003d, 0x00000010, 0x00000102, 0x00000004, 0x00050080, + 0x0000000a, 0x00000103, 0x000000fb, 0x00000033, 0x000500af, 0x00000013, 0x00000104, 0x00000103, + 0x00000034, 0x0004009b, 0x00000012, 0x00000105, 0x00000104, 0x000300f7, 0x00000106, 0x00000000, + 0x000400fa, 0x00000105, 0x00000107, 0x00000106, 0x000200f8, 0x00000107, 0x0004003d, 0x00000015, + 0x00000108, 0x00000005, 0x00040064, 0x00000014, 0x00000109, 0x00000108, 0x00050067, 0x0000000a, + 0x0000010a, 0x00000109, 0x00000017, 0x000500b1, 0x00000013, 0x0000010b, 0x00000103, 0x0000010a, + 0x0004009b, 0x00000012, 0x0000010c, 0x0000010b, 0x000200f9, 0x00000106, 0x000200f8, 0x00000106, + 0x000700f5, 0x00000012, 0x0000010d, 0x00000105, 0x00000100, 0x0000010c, 0x00000107, 0x000300f7, + 0x0000010e, 0x00000000, 0x000400fa, 0x0000010d, 0x0000010f, 0x00000110, 0x000200f8, 0x0000010f, + 0x0004003d, 0x00000015, 0x00000111, 0x00000005, 0x00040064, 0x00000014, 0x00000112, 0x00000111, + 0x0007005f, 0x00000035, 0x00000113, 0x00000112, 0x00000103, 0x00000002, 0x00000017, 0x000200f9, + 0x0000010e, 0x000200f8, 0x00000110, 0x000200f9, 0x0000010e, 0x000200f8, 0x0000010e, 0x000700f5, + 0x00000035, 0x00000114, 0x00000113, 0x0000010f, 0x00000037, 0x00000110, 0x00040073, 0x0000001b, + 0x00000115, 0x00000114, 0x00050091, 0x0000001b, 0x00000116, 0x00000031, 0x00000115, 0x00050080, + 0x0000000a, 0x00000117, 0x000000fb, 0x0000004d, 0x000500af, 0x00000013, 0x00000118, 0x00000117, + 0x00000034, 0x0004009b, 0x00000012, 0x00000119, 0x00000118, 0x000300f7, 0x0000011a, 0x00000000, + 0x000400fa, 0x00000119, 0x0000011b, 0x0000011a, 0x000200f8, 0x0000011b, 0x0004003d, 0x00000015, + 0x0000011c, 0x00000005, 0x00040064, 0x00000014, 0x0000011d, 0x0000011c, 0x00050067, 0x0000000a, + 0x0000011e, 0x0000011d, 0x00000017, 0x000500b1, 0x00000013, 0x0000011f, 0x00000117, 0x0000011e, + 0x0004009b, 0x00000012, 0x00000120, 0x0000011f, 0x000200f9, 0x0000011a, 0x000200f8, 0x0000011a, + 0x000700f5, 0x00000012, 0x00000121, 0x00000119, 0x0000010e, 0x00000120, 0x0000011b, 0x000300f7, + 0x00000122, 0x00000000, 0x000400fa, 0x00000121, 0x00000123, 0x00000124, 0x000200f8, 0x00000123, + 0x0004003d, 0x00000015, 0x00000125, 0x00000005, 0x00040064, 0x00000014, 0x00000126, 0x00000125, + 0x0007005f, 0x00000035, 0x00000127, 0x00000126, 0x00000117, 0x00000002, 0x00000017, 0x000200f9, + 0x00000122, 0x000200f8, 0x00000124, 0x000200f9, 0x00000122, 0x000200f8, 0x00000122, 0x000700f5, + 0x00000035, 0x00000128, 0x00000127, 0x00000123, 0x00000037, 0x00000124, 0x00040073, 0x0000001b, + 0x00000129, 0x00000128, 0x00050091, 0x0000001b, 0x0000012a, 0x0000004c, 0x00000129, 0x00050081, + 0x0000001b, 0x0000012b, 0x00000116, 0x0000012a, 0x00050080, 0x0000000a, 0x0000012c, 0x000000fb, + 0x00000064, 0x000500af, 0x00000013, 0x0000012d, 0x0000012c, 0x00000034, 0x0004009b, 0x00000012, + 0x0000012e, 0x0000012d, 0x000300f7, 0x0000012f, 0x00000000, 0x000400fa, 0x0000012e, 0x00000130, + 0x0000012f, 0x000200f8, 0x00000130, 0x0004003d, 0x00000015, 0x00000131, 0x00000005, 0x00040064, + 0x00000014, 0x00000132, 0x00000131, 0x00050067, 0x0000000a, 0x00000133, 0x00000132, 0x00000017, + 0x000500b1, 0x00000013, 0x00000134, 0x0000012c, 0x00000133, 0x0004009b, 0x00000012, 0x00000135, + 0x00000134, 0x000200f9, 0x0000012f, 0x000200f8, 0x0000012f, 0x000700f5, 0x00000012, 0x00000136, + 0x0000012e, 0x00000122, 0x00000135, 0x00000130, 0x000300f7, 0x00000137, 0x00000000, 0x000400fa, + 0x00000136, 0x00000138, 0x00000139, 0x000200f8, 0x00000138, 0x0004003d, 0x00000015, 0x0000013a, + 0x00000005, 0x00040064, 0x00000014, 0x0000013b, 0x0000013a, 0x0007005f, 0x00000035, 0x0000013c, + 0x0000013b, 0x0000012c, 0x00000002, 0x00000017, 0x000200f9, 0x00000137, 0x000200f8, 0x00000139, + 0x000200f9, 0x00000137, 0x000200f8, 0x00000137, 0x000700f5, 0x00000035, 0x0000013d, 0x0000013c, + 0x00000138, 0x00000037, 0x00000139, 0x00040073, 0x0000001b, 0x0000013e, 0x0000013d, 0x00050091, + 0x0000001b, 0x0000013f, 0x00000062, 0x0000013e, 0x00050081, 0x0000001b, 0x00000140, 0x0000012b, + 0x0000013f, 0x00050080, 0x0000000a, 0x00000141, 0x000000fb, 0x0000007a, 0x000500af, 0x00000013, + 0x00000142, 0x00000141, 0x00000034, 0x0004009b, 0x00000012, 0x00000143, 0x00000142, 0x000300f7, + 0x00000144, 0x00000000, 0x000400fa, 0x00000143, 0x00000145, 0x00000144, 0x000200f8, 0x00000145, + 0x0004003d, 0x00000015, 0x00000146, 0x00000005, 0x00040064, 0x00000014, 0x00000147, 0x00000146, + 0x00050067, 0x0000000a, 0x00000148, 0x00000147, 0x00000017, 0x000500b1, 0x00000013, 0x00000149, + 0x00000141, 0x00000148, 0x0004009b, 0x00000012, 0x0000014a, 0x00000149, 0x000200f9, 0x00000144, + 0x000200f8, 0x00000144, 0x000700f5, 0x00000012, 0x0000014b, 0x00000143, 0x00000137, 0x0000014a, + 0x00000145, 0x000300f7, 0x0000014c, 0x00000000, 0x000400fa, 0x0000014b, 0x0000014d, 0x0000014e, + 0x000200f8, 0x0000014d, 0x0004003d, 0x00000015, 0x0000014f, 0x00000005, 0x00040064, 0x00000014, + 0x00000150, 0x0000014f, 0x0007005f, 0x00000035, 0x00000151, 0x00000150, 0x00000141, 0x00000002, + 0x00000017, 0x000200f9, 0x0000014c, 0x000200f8, 0x0000014e, 0x000200f9, 0x0000014c, 0x000200f8, + 0x0000014c, 0x000700f5, 0x00000035, 0x00000152, 0x00000151, 0x0000014d, 0x00000037, 0x0000014e, + 0x00040073, 0x0000001b, 0x00000153, 0x00000152, 0x00050091, 0x0000001b, 0x00000154, 0x00000079, + 0x00000153, 0x00050081, 0x0000001b, 0x00000155, 0x00000140, 0x00000154, 0x0004003d, 0x00000015, + 0x00000156, 0x00000005, 0x00040064, 0x00000014, 0x00000157, 0x00000156, 0x0007005f, 0x00000035, + 0x00000158, 0x00000157, 0x000000fb, 0x00000002, 0x00000017, 0x00040073, 0x0000001b, 0x00000159, + 0x00000158, 0x00050091, 0x0000001b, 0x0000015a, 0x0000008f, 0x00000159, 0x00050081, 0x0000001b, + 0x0000015b, 0x00000155, 0x0000015a, 0x00050080, 0x0000000a, 0x0000015c, 0x000000fb, 0x000000a5, + 0x000500af, 0x00000013, 0x0000015d, 0x0000015c, 0x00000034, 0x0004009b, 0x00000012, 0x0000015e, + 0x0000015d, 0x000300f7, 0x0000015f, 0x00000000, 0x000400fa, 0x0000015e, 0x00000160, 0x0000015f, + 0x000200f8, 0x00000160, 0x0004003d, 0x00000015, 0x00000161, 0x00000005, 0x00040064, 0x00000014, + 0x00000162, 0x00000161, 0x00050067, 0x0000000a, 0x00000163, 0x00000162, 0x00000017, 0x000500b1, + 0x00000013, 0x00000164, 0x0000015c, 0x00000163, 0x0004009b, 0x00000012, 0x00000165, 0x00000164, + 0x000200f9, 0x0000015f, 0x000200f8, 0x0000015f, 0x000700f5, 0x00000012, 0x00000166, 0x0000015e, + 0x0000014c, 0x00000165, 0x00000160, 0x000300f7, 0x00000167, 0x00000000, 0x000400fa, 0x00000166, + 0x00000168, 0x00000169, 0x000200f8, 0x00000168, 0x0004003d, 0x00000015, 0x0000016a, 0x00000005, + 0x00040064, 0x00000014, 0x0000016b, 0x0000016a, 0x0007005f, 0x00000035, 0x0000016c, 0x0000016b, + 0x0000015c, 0x00000002, 0x00000017, 0x000200f9, 0x00000167, 0x000200f8, 0x00000169, 0x000200f9, + 0x00000167, 0x000200f8, 0x00000167, 0x000700f5, 0x00000035, 0x0000016d, 0x0000016c, 0x00000168, + 0x00000037, 0x00000169, 0x00040073, 0x0000001b, 0x0000016e, 0x0000016d, 0x00050091, 0x0000001b, + 0x0000016f, 0x000000a4, 0x0000016e, 0x00050081, 0x0000001b, 0x00000170, 0x0000015b, 0x0000016f, + 0x00050080, 0x0000000a, 0x00000171, 0x000000fb, 0x000000bb, 0x000500af, 0x00000013, 0x00000172, + 0x00000171, 0x00000034, 0x0004009b, 0x00000012, 0x00000173, 0x00000172, 0x000300f7, 0x00000174, + 0x00000000, 0x000400fa, 0x00000173, 0x00000175, 0x00000174, 0x000200f8, 0x00000175, 0x0004003d, + 0x00000015, 0x00000176, 0x00000005, 0x00040064, 0x00000014, 0x00000177, 0x00000176, 0x00050067, + 0x0000000a, 0x00000178, 0x00000177, 0x00000017, 0x000500b1, 0x00000013, 0x00000179, 0x00000171, + 0x00000178, 0x0004009b, 0x00000012, 0x0000017a, 0x00000179, 0x000200f9, 0x00000174, 0x000200f8, + 0x00000174, 0x000700f5, 0x00000012, 0x0000017b, 0x00000173, 0x00000167, 0x0000017a, 0x00000175, + 0x000300f7, 0x0000017c, 0x00000000, 0x000400fa, 0x0000017b, 0x0000017d, 0x0000017e, 0x000200f8, + 0x0000017d, 0x0004003d, 0x00000015, 0x0000017f, 0x00000005, 0x00040064, 0x00000014, 0x00000180, + 0x0000017f, 0x0007005f, 0x00000035, 0x00000181, 0x00000180, 0x00000171, 0x00000002, 0x00000017, + 0x000200f9, 0x0000017c, 0x000200f8, 0x0000017e, 0x000200f9, 0x0000017c, 0x000200f8, 0x0000017c, + 0x000700f5, 0x00000035, 0x00000182, 0x00000181, 0x0000017d, 0x00000037, 0x0000017e, 0x00040073, + 0x0000001b, 0x00000183, 0x00000182, 0x00050091, 0x0000001b, 0x00000184, 0x000000ba, 0x00000183, + 0x00050081, 0x0000001b, 0x00000185, 0x00000170, 0x00000184, 0x00050080, 0x0000000a, 0x00000186, + 0x000000fb, 0x000000d1, 0x000500af, 0x00000013, 0x00000187, 0x00000186, 0x00000034, 0x0004009b, + 0x00000012, 0x00000188, 0x00000187, 0x000300f7, 0x00000189, 0x00000000, 0x000400fa, 0x00000188, + 0x0000018a, 0x00000189, 0x000200f8, 0x0000018a, 0x0004003d, 0x00000015, 0x0000018b, 0x00000005, + 0x00040064, 0x00000014, 0x0000018c, 0x0000018b, 0x00050067, 0x0000000a, 0x0000018d, 0x0000018c, + 0x00000017, 0x000500b1, 0x00000013, 0x0000018e, 0x00000186, 0x0000018d, 0x0004009b, 0x00000012, + 0x0000018f, 0x0000018e, 0x000200f9, 0x00000189, 0x000200f8, 0x00000189, 0x000700f5, 0x00000012, + 0x00000190, 0x00000188, 0x0000017c, 0x0000018f, 0x0000018a, 0x000300f7, 0x00000191, 0x00000000, + 0x000400fa, 0x00000190, 0x00000192, 0x00000193, 0x000200f8, 0x00000192, 0x0004003d, 0x00000015, + 0x00000194, 0x00000005, 0x00040064, 0x00000014, 0x00000195, 0x00000194, 0x0007005f, 0x00000035, + 0x00000196, 0x00000195, 0x00000186, 0x00000002, 0x00000017, 0x000200f9, 0x00000191, 0x000200f8, + 0x00000193, 0x000200f9, 0x00000191, 0x000200f8, 0x00000191, 0x000700f5, 0x00000035, 0x00000197, + 0x00000196, 0x00000192, 0x00000037, 0x00000193, 0x00040073, 0x0000001b, 0x00000198, 0x00000197, + 0x00050091, 0x0000001b, 0x00000199, 0x000000d0, 0x00000198, 0x00050081, 0x0000001b, 0x0000019a, + 0x00000185, 0x00000199, 0x00050080, 0x0000000a, 0x0000019b, 0x000000fb, 0x000000e6, 0x000500af, + 0x00000013, 0x0000019c, 0x0000019b, 0x00000034, 0x0004009b, 0x00000012, 0x0000019d, 0x0000019c, + 0x000300f7, 0x0000019e, 0x00000000, 0x000400fa, 0x0000019d, 0x0000019f, 0x0000019e, 0x000200f8, + 0x0000019f, 0x0004003d, 0x00000015, 0x000001a0, 0x00000005, 0x00040064, 0x00000014, 0x000001a1, + 0x000001a0, 0x00050067, 0x0000000a, 0x000001a2, 0x000001a1, 0x00000017, 0x000500b1, 0x00000013, + 0x000001a3, 0x0000019b, 0x000001a2, 0x0004009b, 0x00000012, 0x000001a4, 0x000001a3, 0x000200f9, + 0x0000019e, 0x000200f8, 0x0000019e, 0x000700f5, 0x00000012, 0x000001a5, 0x0000019d, 0x00000191, + 0x000001a4, 0x0000019f, 0x000300f7, 0x000001a6, 0x00000000, 0x000400fa, 0x000001a5, 0x000001a7, + 0x000001a8, 0x000200f8, 0x000001a7, 0x0004003d, 0x00000015, 0x000001a9, 0x00000005, 0x00040064, + 0x00000014, 0x000001aa, 0x000001a9, 0x0007005f, 0x00000035, 0x000001ab, 0x000001aa, 0x0000019b, + 0x00000002, 0x00000017, 0x000200f9, 0x000001a6, 0x000200f8, 0x000001a8, 0x000200f9, 0x000001a6, + 0x000200f8, 0x000001a6, 0x000700f5, 0x00000035, 0x000001ac, 0x000001ab, 0x000001a7, 0x00000037, + 0x000001a8, 0x00040073, 0x0000001b, 0x000001ad, 0x000001ac, 0x00050091, 0x0000001b, 0x000001ae, + 0x000000e5, 0x000001ad, 0x00050081, 0x0000001b, 0x000001af, 0x0000019a, 0x000001ae, 0x00050083, + 0x0000001b, 0x000001b0, 0x000001af, 0x000000eb, 0x00050085, 0x0000001b, 0x000001b1, 0x000001b0, + 0x000000ef, 0x00050081, 0x0000001b, 0x000001b2, 0x000001b1, 0x000000f4, 0x00040073, 0x00000035, + 0x000001b3, 0x000001b2, 0x00040063, 0x00000102, 0x000000fb, 0x000001b3, 0x000200f9, 0x000000f7, + 0x000200f8, 0x000000f7, 0x000100fd, 0x00010038, +}; +static const size_t wnfg_27_spv_size = sizeof(wnfg_27_spv); diff --git a/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_28_spv.h b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_28_spv.h new file mode 100644 index 000000000..c2b1c281a --- /dev/null +++ b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_28_spv.h @@ -0,0 +1,281 @@ +#pragma once +#include +#include + +static const uint32_t wnfg_28_spv[] = { + 0x07230203, 0x00010000, 0x000d000b, 0x000001b6, 0x00000000, 0x00020011, 0x00000001, 0x00020011, + 0x00000009, 0x00020011, 0x00000032, 0x0006000b, 0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e, + 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0006000f, 0x00000005, 0x00000002, 0x6e69616d, + 0x00000000, 0x00000003, 0x00060010, 0x00000002, 0x00000011, 0x00000010, 0x00000010, 0x00000001, + 0x00040047, 0x00000003, 0x0000000b, 0x0000001c, 0x00030047, 0x00000004, 0x00000019, 0x00040047, + 0x00000004, 0x00000021, 0x00000030, 0x00040047, 0x00000004, 0x00000022, 0x00000000, 0x00040047, + 0x00000005, 0x00000021, 0x00000020, 0x00040047, 0x00000005, 0x00000022, 0x00000000, 0x00040047, + 0x00000006, 0x0000000b, 0x00000019, 0x00020013, 0x00000007, 0x00030021, 0x00000008, 0x00000007, + 0x00040015, 0x00000009, 0x00000020, 0x00000001, 0x00040017, 0x0000000a, 0x00000009, 0x00000002, + 0x00040015, 0x0000000b, 0x00000020, 0x00000000, 0x00040017, 0x0000000c, 0x0000000b, 0x00000003, + 0x00040020, 0x0000000d, 0x00000001, 0x0000000c, 0x0004003b, 0x0000000d, 0x00000003, 0x00000001, + 0x00040017, 0x0000000e, 0x0000000b, 0x00000002, 0x00030016, 0x0000000f, 0x00000020, 0x00090019, + 0x00000010, 0x0000000f, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000004, + 0x00040020, 0x00000011, 0x00000000, 0x00000010, 0x0004003b, 0x00000011, 0x00000004, 0x00000000, + 0x00020014, 0x00000012, 0x00040017, 0x00000013, 0x00000012, 0x00000002, 0x00090019, 0x00000014, + 0x0000000f, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x0003001b, + 0x00000015, 0x00000014, 0x00040020, 0x00000016, 0x00000000, 0x00000015, 0x0004003b, 0x00000016, + 0x00000005, 0x00000000, 0x0004002b, 0x00000009, 0x00000017, 0x00000000, 0x0004002b, 0x0000000b, + 0x00000018, 0x00000000, 0x0004002b, 0x0000000b, 0x00000019, 0x00000001, 0x00030016, 0x0000001a, + 0x00000010, 0x00040017, 0x0000001b, 0x0000001a, 0x00000004, 0x00040018, 0x0000001c, 0x0000001b, + 0x00000004, 0x0004002b, 0x0000001a, 0x0000001d, 0x00003a61, 0x0004002b, 0x0000001a, 0x0000001e, + 0x0000b6ca, 0x0004002b, 0x0000001a, 0x0000001f, 0x0000bb4d, 0x0004002b, 0x0000001a, 0x00000020, + 0x0000b97c, 0x0007002c, 0x0000001b, 0x00000021, 0x0000001d, 0x0000001e, 0x0000001f, 0x00000020, + 0x0004002b, 0x0000001a, 0x00000022, 0x00001e9c, 0x0004002b, 0x0000001a, 0x00000023, 0x0000ae54, + 0x0004002b, 0x0000001a, 0x00000024, 0x0000af8f, 0x0004002b, 0x0000001a, 0x00000025, 0x0000b4bd, + 0x0007002c, 0x0000001b, 0x00000026, 0x00000022, 0x00000023, 0x00000024, 0x00000025, 0x0004002b, + 0x0000001a, 0x00000027, 0x00002b97, 0x0004002b, 0x0000001a, 0x00000028, 0x0000b054, 0x0004002b, + 0x0000001a, 0x00000029, 0x0000ae0c, 0x0004002b, 0x0000001a, 0x0000002a, 0x00002c19, 0x0007002c, + 0x0000001b, 0x0000002b, 0x00000027, 0x00000028, 0x00000029, 0x0000002a, 0x0004002b, 0x0000001a, + 0x0000002c, 0x0000ae9d, 0x0004002b, 0x0000001a, 0x0000002d, 0x0000ba75, 0x0004002b, 0x0000001a, + 0x0000002e, 0x00003451, 0x0004002b, 0x0000001a, 0x0000002f, 0x00003856, 0x0007002c, 0x0000001b, + 0x00000030, 0x0000002c, 0x0000002d, 0x0000002e, 0x0000002f, 0x0007002c, 0x0000001c, 0x00000031, + 0x00000021, 0x00000026, 0x0000002b, 0x00000030, 0x0004002b, 0x00000009, 0x00000032, 0xffffffff, + 0x0005002c, 0x0000000a, 0x00000033, 0x00000032, 0x00000032, 0x0005002c, 0x0000000a, 0x00000034, + 0x00000017, 0x00000017, 0x00040017, 0x00000035, 0x0000000f, 0x00000004, 0x0004002b, 0x0000000f, + 0x00000036, 0x00000000, 0x0007002c, 0x00000035, 0x00000037, 0x00000036, 0x00000036, 0x00000036, + 0x00000036, 0x0004002b, 0x0000001a, 0x00000038, 0x00002e54, 0x0004002b, 0x0000001a, 0x00000039, + 0x0000aef1, 0x0004002b, 0x0000001a, 0x0000003a, 0x0000ae4e, 0x0004002b, 0x0000001a, 0x0000003b, + 0x00002f16, 0x0007002c, 0x0000001b, 0x0000003c, 0x00000038, 0x00000039, 0x0000003a, 0x0000003b, + 0x0004002b, 0x0000001a, 0x0000003d, 0x000033de, 0x0004002b, 0x0000001a, 0x0000003e, 0x0000aff2, + 0x0004002b, 0x0000001a, 0x0000003f, 0x0000b4b6, 0x0004002b, 0x0000001a, 0x00000040, 0x0000b85e, + 0x0007002c, 0x0000001b, 0x00000041, 0x0000003d, 0x0000003e, 0x0000003f, 0x00000040, 0x0004002b, + 0x0000001a, 0x00000042, 0x0000b1fd, 0x0004002b, 0x0000001a, 0x00000043, 0x0000335e, 0x0004002b, + 0x0000001a, 0x00000044, 0x000033c1, 0x0004002b, 0x0000001a, 0x00000045, 0x000034c2, 0x0007002c, + 0x0000001b, 0x00000046, 0x00000042, 0x00000043, 0x00000044, 0x00000045, 0x0004002b, 0x0000001a, + 0x00000047, 0x0000b04b, 0x0004002b, 0x0000001a, 0x00000048, 0x00003446, 0x0004002b, 0x0000001a, + 0x00000049, 0x000031e1, 0x0004002b, 0x0000001a, 0x0000004a, 0x0000b5de, 0x0007002c, 0x0000001b, + 0x0000004b, 0x00000047, 0x00000048, 0x00000049, 0x0000004a, 0x0007002c, 0x0000001c, 0x0000004c, + 0x0000003c, 0x00000041, 0x00000046, 0x0000004b, 0x0005002c, 0x0000000a, 0x0000004d, 0x00000032, + 0x00000017, 0x0004002b, 0x0000001a, 0x0000004e, 0x00003073, 0x0004002b, 0x0000001a, 0x0000004f, + 0x0000b449, 0x0004002b, 0x0000001a, 0x00000050, 0x0000ac19, 0x0004002b, 0x0000001a, 0x00000051, + 0x0000310a, 0x0007002c, 0x0000001b, 0x00000052, 0x0000004e, 0x0000004f, 0x00000050, 0x00000051, + 0x0004002b, 0x0000001a, 0x00000053, 0x00003ac9, 0x0004002b, 0x0000001a, 0x00000054, 0x0000ba50, + 0x0004002b, 0x0000001a, 0x00000055, 0x0000ba48, 0x0004002b, 0x0000001a, 0x00000056, 0x0000ba5d, + 0x0007002c, 0x0000001b, 0x00000057, 0x00000053, 0x00000054, 0x00000055, 0x00000056, 0x0004002b, + 0x0000001a, 0x00000058, 0x0000b5ac, 0x0004002b, 0x0000001a, 0x00000059, 0x00003227, 0x0004002b, + 0x0000001a, 0x0000005a, 0x000034d0, 0x0004002b, 0x0000001a, 0x0000005b, 0x0000361a, 0x0007002c, + 0x0000001b, 0x0000005c, 0x00000058, 0x00000059, 0x0000005a, 0x0000005b, 0x0004002b, 0x0000001a, + 0x0000005d, 0x0000b945, 0x0004002b, 0x0000001a, 0x0000005e, 0x0000b599, 0x0004002b, 0x0000001a, + 0x0000005f, 0x00003a5f, 0x0004002b, 0x0000001a, 0x00000060, 0x00003b0a, 0x0007002c, 0x0000001b, + 0x00000061, 0x0000005d, 0x0000005e, 0x0000005f, 0x00000060, 0x0007002c, 0x0000001c, 0x00000062, + 0x00000052, 0x00000057, 0x0000005c, 0x00000061, 0x0004002b, 0x00000009, 0x00000063, 0x00000001, + 0x0005002c, 0x0000000a, 0x00000064, 0x00000032, 0x00000063, 0x0004002b, 0x0000001a, 0x00000065, + 0x000030fd, 0x0004002b, 0x0000001a, 0x00000066, 0x00003076, 0x0004002b, 0x0000001a, 0x00000067, + 0x0000b441, 0x0004002b, 0x0000001a, 0x00000068, 0x00003456, 0x0007002c, 0x0000001b, 0x00000069, + 0x00000065, 0x00000066, 0x00000067, 0x00000068, 0x0004002b, 0x0000001a, 0x0000006a, 0x00002935, + 0x0004002b, 0x0000001a, 0x0000006b, 0x00002b05, 0x0004002b, 0x0000001a, 0x0000006c, 0x0000b239, + 0x0004002b, 0x0000001a, 0x0000006d, 0x0000ac31, 0x0007002c, 0x0000001b, 0x0000006e, 0x0000006a, + 0x0000006b, 0x0000006c, 0x0000006d, 0x0004002b, 0x0000001a, 0x0000006f, 0x0000aa9f, 0x0004002b, + 0x0000001a, 0x00000070, 0x00002502, 0x0004002b, 0x0000001a, 0x00000071, 0x00003463, 0x0004002b, + 0x0000001a, 0x00000072, 0x00002c2c, 0x0007002c, 0x0000001b, 0x00000073, 0x0000006f, 0x00000070, + 0x00000071, 0x00000072, 0x0004002b, 0x0000001a, 0x00000074, 0x0000b318, 0x0004002b, 0x0000001a, + 0x00000075, 0x000035ed, 0x0004002b, 0x0000001a, 0x00000076, 0x0000352b, 0x0004002b, 0x0000001a, + 0x00000077, 0x000025bb, 0x0007002c, 0x0000001b, 0x00000078, 0x00000074, 0x00000075, 0x00000076, + 0x00000077, 0x0007002c, 0x0000001c, 0x00000079, 0x00000069, 0x0000006e, 0x00000073, 0x00000078, + 0x0005002c, 0x0000000a, 0x0000007a, 0x00000017, 0x00000032, 0x0004002b, 0x0000001a, 0x0000007b, + 0x0000ac6b, 0x0004002b, 0x0000001a, 0x0000007c, 0x000031cb, 0x0004002b, 0x0000001a, 0x0000007d, + 0x00002edb, 0x0004002b, 0x0000001a, 0x0000007e, 0x0000b22f, 0x0007002c, 0x0000001b, 0x0000007f, + 0x0000007b, 0x0000007c, 0x0000007d, 0x0000007e, 0x0004002b, 0x0000001a, 0x00000080, 0x0000a6af, + 0x0004002b, 0x0000001a, 0x00000081, 0x00003695, 0x0004002b, 0x0000001a, 0x00000082, 0x00009735, + 0x0004002b, 0x0000001a, 0x00000083, 0x0000b56b, 0x0007002c, 0x0000001b, 0x00000084, 0x00000080, + 0x00000081, 0x00000082, 0x00000083, 0x0004002b, 0x0000001a, 0x00000085, 0x0000a905, 0x0004002b, + 0x0000001a, 0x00000086, 0x0000b122, 0x0004002b, 0x0000001a, 0x00000087, 0x00003198, 0x0004002b, + 0x0000001a, 0x00000088, 0x0000af9a, 0x0007002c, 0x0000001b, 0x00000089, 0x00000085, 0x00000086, + 0x00000087, 0x00000088, 0x0004002b, 0x0000001a, 0x0000008a, 0x0000aeaa, 0x0004002b, 0x0000001a, + 0x0000008b, 0x00003d3d, 0x0004002b, 0x0000001a, 0x0000008c, 0x0000ac8b, 0x0004002b, 0x0000001a, + 0x0000008d, 0x0000b8dc, 0x0007002c, 0x0000001b, 0x0000008e, 0x0000008a, 0x0000008b, 0x0000008c, + 0x0000008d, 0x0007002c, 0x0000001c, 0x0000008f, 0x0000007f, 0x00000084, 0x00000089, 0x0000008e, + 0x0004002b, 0x0000001a, 0x00000090, 0x0000b16e, 0x0004002b, 0x0000001a, 0x00000091, 0x00003560, + 0x0004002b, 0x0000001a, 0x00000092, 0x00003338, 0x0004002b, 0x0000001a, 0x00000093, 0x000030a4, + 0x0007002c, 0x0000001b, 0x00000094, 0x00000090, 0x00000091, 0x00000092, 0x00000093, 0x0004002b, + 0x0000001a, 0x00000095, 0x00003495, 0x0004002b, 0x0000001a, 0x00000096, 0x0000ad27, 0x0004002b, + 0x0000001a, 0x00000097, 0x0000b3a1, 0x0004002b, 0x0000001a, 0x00000098, 0x0000b6f3, 0x0007002c, + 0x0000001b, 0x00000099, 0x00000095, 0x00000096, 0x00000097, 0x00000098, 0x0004002b, 0x0000001a, + 0x0000009a, 0x0000b16c, 0x0004002b, 0x0000001a, 0x0000009b, 0x0000a527, 0x0004002b, 0x0000001a, + 0x0000009c, 0x0000350d, 0x0004002b, 0x0000001a, 0x0000009d, 0x0000ae32, 0x0007002c, 0x0000001b, + 0x0000009e, 0x0000009a, 0x0000009b, 0x0000009c, 0x0000009d, 0x0004002b, 0x0000001a, 0x0000009f, + 0x0000af1b, 0x0004002b, 0x0000001a, 0x000000a0, 0x000031cc, 0x0004002b, 0x0000001a, 0x000000a1, + 0x00002fce, 0x0004002b, 0x0000001a, 0x000000a2, 0x0000b346, 0x0007002c, 0x0000001b, 0x000000a3, + 0x0000009f, 0x000000a0, 0x000000a1, 0x000000a2, 0x0007002c, 0x0000001c, 0x000000a4, 0x00000094, + 0x00000099, 0x0000009e, 0x000000a3, 0x0005002c, 0x0000000a, 0x000000a5, 0x00000017, 0x00000063, + 0x0004002b, 0x0000001a, 0x000000a6, 0x000031a0, 0x0004002b, 0x0000001a, 0x000000a7, 0x0000b583, + 0x0004002b, 0x0000001a, 0x000000a8, 0x0000a957, 0x0004002b, 0x0000001a, 0x000000a9, 0x0000b10c, + 0x0007002c, 0x0000001b, 0x000000aa, 0x000000a6, 0x000000a7, 0x000000a8, 0x000000a9, 0x0004002b, + 0x0000001a, 0x000000ab, 0x000039ca, 0x0004002b, 0x0000001a, 0x000000ac, 0x0000b695, 0x0004002b, + 0x0000001a, 0x000000ad, 0x0000badb, 0x0004002b, 0x0000001a, 0x000000ae, 0x0000b94f, 0x0007002c, + 0x0000001b, 0x000000af, 0x000000ab, 0x000000ac, 0x000000ad, 0x000000ae, 0x0004002b, 0x0000001a, + 0x000000b0, 0x0000b6df, 0x0004002b, 0x0000001a, 0x000000b1, 0x00002dae, 0x0004002b, 0x0000001a, + 0x000000b2, 0x0000365b, 0x0004002b, 0x0000001a, 0x000000b3, 0x00003867, 0x0007002c, 0x0000001b, + 0x000000b4, 0x000000b0, 0x000000b1, 0x000000b2, 0x000000b3, 0x0004002b, 0x0000001a, 0x000000b5, + 0x0000b9ad, 0x0004002b, 0x0000001a, 0x000000b6, 0x0000b78c, 0x0004002b, 0x0000001a, 0x000000b7, + 0x000038f6, 0x0004002b, 0x0000001a, 0x000000b8, 0x00003245, 0x0007002c, 0x0000001b, 0x000000b9, + 0x000000b5, 0x000000b6, 0x000000b7, 0x000000b8, 0x0007002c, 0x0000001c, 0x000000ba, 0x000000aa, + 0x000000af, 0x000000b4, 0x000000b9, 0x0005002c, 0x0000000a, 0x000000bb, 0x00000063, 0x00000032, + 0x0004002b, 0x0000001a, 0x000000bc, 0x0000b2d3, 0x0004002b, 0x0000001a, 0x000000bd, 0x00002d03, + 0x0004002b, 0x0000001a, 0x000000be, 0x00003542, 0x0004002b, 0x0000001a, 0x000000bf, 0x00003112, + 0x0007002c, 0x0000001b, 0x000000c0, 0x000000bc, 0x000000bd, 0x000000be, 0x000000bf, 0x0004002b, + 0x0000001a, 0x000000c1, 0x000036ad, 0x0004002b, 0x0000001a, 0x000000c2, 0x0000b43b, 0x0004002b, + 0x0000001a, 0x000000c3, 0x0000b571, 0x0004002b, 0x0000001a, 0x000000c4, 0x0000b904, 0x0007002c, + 0x0000001b, 0x000000c5, 0x000000c1, 0x000000c2, 0x000000c3, 0x000000c4, 0x0004002b, 0x0000001a, + 0x000000c6, 0x0000b51f, 0x0004002b, 0x0000001a, 0x000000c7, 0x000034d1, 0x0004002b, 0x0000001a, + 0x000000c8, 0x000035b8, 0x0004002b, 0x0000001a, 0x000000c9, 0x0000368c, 0x0007002c, 0x0000001b, + 0x000000ca, 0x000000c6, 0x000000c7, 0x000000c8, 0x000000c9, 0x0004002b, 0x0000001a, 0x000000cb, + 0x00002bb0, 0x0004002b, 0x0000001a, 0x000000cc, 0x00003532, 0x0004002b, 0x0000001a, 0x000000cd, + 0x0000302d, 0x0004002b, 0x0000001a, 0x000000ce, 0x0000b89f, 0x0007002c, 0x0000001b, 0x000000cf, + 0x000000cb, 0x000000cc, 0x000000cd, 0x000000ce, 0x0007002c, 0x0000001c, 0x000000d0, 0x000000c0, + 0x000000c5, 0x000000ca, 0x000000cf, 0x0005002c, 0x0000000a, 0x000000d1, 0x00000063, 0x00000017, + 0x0004002b, 0x0000001a, 0x000000d2, 0x0000b88d, 0x0004002b, 0x0000001a, 0x000000d3, 0x0000217c, + 0x0004002b, 0x0000001a, 0x000000d4, 0x000038d6, 0x0004002b, 0x0000001a, 0x000000d5, 0x00003889, + 0x0007002c, 0x0000001b, 0x000000d6, 0x000000d2, 0x000000d3, 0x000000d4, 0x000000d5, 0x0004002b, + 0x0000001a, 0x000000d7, 0x000038d5, 0x0004002b, 0x0000001a, 0x000000d8, 0x0000ba2b, 0x0004002b, + 0x0000001a, 0x000000d9, 0x0000b722, 0x0004002b, 0x0000001a, 0x000000da, 0x0000b903, 0x0007002c, + 0x0000001b, 0x000000db, 0x000000d7, 0x000000d8, 0x000000d9, 0x000000da, 0x0004002b, 0x0000001a, + 0x000000dc, 0x0000b1a7, 0x0004002b, 0x0000001a, 0x000000dd, 0x00002cab, 0x0004002b, 0x0000001a, + 0x000000de, 0x00002787, 0x0004002b, 0x0000001a, 0x000000df, 0x0000a8f1, 0x0007002c, 0x0000001b, + 0x000000e0, 0x000000dc, 0x000000dd, 0x000000de, 0x000000df, 0x0004002b, 0x0000001a, 0x000000e1, + 0x00002e5c, 0x0004002b, 0x0000001a, 0x000000e2, 0x0000bad2, 0x0004002b, 0x0000001a, 0x000000e3, + 0x0000b36a, 0x0004002b, 0x0000001a, 0x000000e4, 0x0000b6e2, 0x0007002c, 0x0000001b, 0x000000e5, + 0x000000e1, 0x000000e2, 0x000000e3, 0x000000e4, 0x0007002c, 0x0000001c, 0x000000e6, 0x000000d6, + 0x000000db, 0x000000e0, 0x000000e5, 0x0005002c, 0x0000000a, 0x000000e7, 0x00000063, 0x00000063, + 0x0004002b, 0x0000001a, 0x000000e8, 0x0000bc4a, 0x0004002b, 0x0000001a, 0x000000e9, 0x0000b49b, + 0x0004002b, 0x0000001a, 0x000000ea, 0x00003d94, 0x0004002b, 0x0000001a, 0x000000eb, 0x0000b6cf, + 0x0007002c, 0x0000001b, 0x000000ec, 0x000000e8, 0x000000e9, 0x000000ea, 0x000000eb, 0x0004002b, + 0x0000001a, 0x000000ed, 0x000039a9, 0x0004002b, 0x0000001a, 0x000000ee, 0x00003873, 0x0004002b, + 0x0000001a, 0x000000ef, 0x00003961, 0x0004002b, 0x0000001a, 0x000000f0, 0x00003916, 0x0007002c, + 0x0000001b, 0x000000f1, 0x000000ed, 0x000000ee, 0x000000ef, 0x000000f0, 0x0004002b, 0x0000001a, + 0x000000f2, 0x00003203, 0x0004002b, 0x0000001a, 0x000000f3, 0x00003418, 0x0004002b, 0x0000001a, + 0x000000f4, 0x00003aee, 0x0004002b, 0x0000001a, 0x000000f5, 0x0000b4dc, 0x0007002c, 0x0000001b, + 0x000000f6, 0x000000f2, 0x000000f3, 0x000000f4, 0x000000f5, 0x0004002b, 0x0000000b, 0x000000f7, + 0x00000010, 0x0006002c, 0x0000000c, 0x00000006, 0x000000f7, 0x000000f7, 0x00000019, 0x00050036, + 0x00000007, 0x00000002, 0x00000000, 0x00000008, 0x000200f8, 0x000000f8, 0x000300f7, 0x000000f9, + 0x00000000, 0x000300fb, 0x00000018, 0x000000fa, 0x000200f8, 0x000000fa, 0x0004003d, 0x0000000c, + 0x000000fb, 0x00000003, 0x0007004f, 0x0000000e, 0x000000fc, 0x000000fb, 0x000000fb, 0x00000000, + 0x00000001, 0x0004007c, 0x0000000a, 0x000000fd, 0x000000fc, 0x0004003d, 0x00000010, 0x000000fe, + 0x00000004, 0x00040068, 0x0000000a, 0x000000ff, 0x000000fe, 0x000500af, 0x00000013, 0x00000100, + 0x000000fd, 0x000000ff, 0x0004009a, 0x00000012, 0x00000101, 0x00000100, 0x000300f7, 0x00000102, + 0x00000000, 0x000400fa, 0x00000101, 0x00000103, 0x00000102, 0x000200f8, 0x00000103, 0x000200f9, + 0x000000f9, 0x000200f8, 0x00000102, 0x0004003d, 0x00000010, 0x00000104, 0x00000004, 0x00050080, + 0x0000000a, 0x00000105, 0x000000fd, 0x00000033, 0x000500af, 0x00000013, 0x00000106, 0x00000105, + 0x00000034, 0x0004009b, 0x00000012, 0x00000107, 0x00000106, 0x000300f7, 0x00000108, 0x00000000, + 0x000400fa, 0x00000107, 0x00000109, 0x00000108, 0x000200f8, 0x00000109, 0x0004003d, 0x00000015, + 0x0000010a, 0x00000005, 0x00040064, 0x00000014, 0x0000010b, 0x0000010a, 0x00050067, 0x0000000a, + 0x0000010c, 0x0000010b, 0x00000017, 0x000500b1, 0x00000013, 0x0000010d, 0x00000105, 0x0000010c, + 0x0004009b, 0x00000012, 0x0000010e, 0x0000010d, 0x000200f9, 0x00000108, 0x000200f8, 0x00000108, + 0x000700f5, 0x00000012, 0x0000010f, 0x00000107, 0x00000102, 0x0000010e, 0x00000109, 0x000300f7, + 0x00000110, 0x00000000, 0x000400fa, 0x0000010f, 0x00000111, 0x00000112, 0x000200f8, 0x00000111, + 0x0004003d, 0x00000015, 0x00000113, 0x00000005, 0x00040064, 0x00000014, 0x00000114, 0x00000113, + 0x0007005f, 0x00000035, 0x00000115, 0x00000114, 0x00000105, 0x00000002, 0x00000017, 0x000200f9, + 0x00000110, 0x000200f8, 0x00000112, 0x000200f9, 0x00000110, 0x000200f8, 0x00000110, 0x000700f5, + 0x00000035, 0x00000116, 0x00000115, 0x00000111, 0x00000037, 0x00000112, 0x00040073, 0x0000001b, + 0x00000117, 0x00000116, 0x00050091, 0x0000001b, 0x00000118, 0x00000031, 0x00000117, 0x00050080, + 0x0000000a, 0x00000119, 0x000000fd, 0x0000004d, 0x000500af, 0x00000013, 0x0000011a, 0x00000119, + 0x00000034, 0x0004009b, 0x00000012, 0x0000011b, 0x0000011a, 0x000300f7, 0x0000011c, 0x00000000, + 0x000400fa, 0x0000011b, 0x0000011d, 0x0000011c, 0x000200f8, 0x0000011d, 0x0004003d, 0x00000015, + 0x0000011e, 0x00000005, 0x00040064, 0x00000014, 0x0000011f, 0x0000011e, 0x00050067, 0x0000000a, + 0x00000120, 0x0000011f, 0x00000017, 0x000500b1, 0x00000013, 0x00000121, 0x00000119, 0x00000120, + 0x0004009b, 0x00000012, 0x00000122, 0x00000121, 0x000200f9, 0x0000011c, 0x000200f8, 0x0000011c, + 0x000700f5, 0x00000012, 0x00000123, 0x0000011b, 0x00000110, 0x00000122, 0x0000011d, 0x000300f7, + 0x00000124, 0x00000000, 0x000400fa, 0x00000123, 0x00000125, 0x00000126, 0x000200f8, 0x00000125, + 0x0004003d, 0x00000015, 0x00000127, 0x00000005, 0x00040064, 0x00000014, 0x00000128, 0x00000127, + 0x0007005f, 0x00000035, 0x00000129, 0x00000128, 0x00000119, 0x00000002, 0x00000017, 0x000200f9, + 0x00000124, 0x000200f8, 0x00000126, 0x000200f9, 0x00000124, 0x000200f8, 0x00000124, 0x000700f5, + 0x00000035, 0x0000012a, 0x00000129, 0x00000125, 0x00000037, 0x00000126, 0x00040073, 0x0000001b, + 0x0000012b, 0x0000012a, 0x00050091, 0x0000001b, 0x0000012c, 0x0000004c, 0x0000012b, 0x00050081, + 0x0000001b, 0x0000012d, 0x00000118, 0x0000012c, 0x00050080, 0x0000000a, 0x0000012e, 0x000000fd, + 0x00000064, 0x000500af, 0x00000013, 0x0000012f, 0x0000012e, 0x00000034, 0x0004009b, 0x00000012, + 0x00000130, 0x0000012f, 0x000300f7, 0x00000131, 0x00000000, 0x000400fa, 0x00000130, 0x00000132, + 0x00000131, 0x000200f8, 0x00000132, 0x0004003d, 0x00000015, 0x00000133, 0x00000005, 0x00040064, + 0x00000014, 0x00000134, 0x00000133, 0x00050067, 0x0000000a, 0x00000135, 0x00000134, 0x00000017, + 0x000500b1, 0x00000013, 0x00000136, 0x0000012e, 0x00000135, 0x0004009b, 0x00000012, 0x00000137, + 0x00000136, 0x000200f9, 0x00000131, 0x000200f8, 0x00000131, 0x000700f5, 0x00000012, 0x00000138, + 0x00000130, 0x00000124, 0x00000137, 0x00000132, 0x000300f7, 0x00000139, 0x00000000, 0x000400fa, + 0x00000138, 0x0000013a, 0x0000013b, 0x000200f8, 0x0000013a, 0x0004003d, 0x00000015, 0x0000013c, + 0x00000005, 0x00040064, 0x00000014, 0x0000013d, 0x0000013c, 0x0007005f, 0x00000035, 0x0000013e, + 0x0000013d, 0x0000012e, 0x00000002, 0x00000017, 0x000200f9, 0x00000139, 0x000200f8, 0x0000013b, + 0x000200f9, 0x00000139, 0x000200f8, 0x00000139, 0x000700f5, 0x00000035, 0x0000013f, 0x0000013e, + 0x0000013a, 0x00000037, 0x0000013b, 0x00040073, 0x0000001b, 0x00000140, 0x0000013f, 0x00050091, + 0x0000001b, 0x00000141, 0x00000062, 0x00000140, 0x00050081, 0x0000001b, 0x00000142, 0x0000012d, + 0x00000141, 0x00050080, 0x0000000a, 0x00000143, 0x000000fd, 0x0000007a, 0x000500af, 0x00000013, + 0x00000144, 0x00000143, 0x00000034, 0x0004009b, 0x00000012, 0x00000145, 0x00000144, 0x000300f7, + 0x00000146, 0x00000000, 0x000400fa, 0x00000145, 0x00000147, 0x00000146, 0x000200f8, 0x00000147, + 0x0004003d, 0x00000015, 0x00000148, 0x00000005, 0x00040064, 0x00000014, 0x00000149, 0x00000148, + 0x00050067, 0x0000000a, 0x0000014a, 0x00000149, 0x00000017, 0x000500b1, 0x00000013, 0x0000014b, + 0x00000143, 0x0000014a, 0x0004009b, 0x00000012, 0x0000014c, 0x0000014b, 0x000200f9, 0x00000146, + 0x000200f8, 0x00000146, 0x000700f5, 0x00000012, 0x0000014d, 0x00000145, 0x00000139, 0x0000014c, + 0x00000147, 0x000300f7, 0x0000014e, 0x00000000, 0x000400fa, 0x0000014d, 0x0000014f, 0x00000150, + 0x000200f8, 0x0000014f, 0x0004003d, 0x00000015, 0x00000151, 0x00000005, 0x00040064, 0x00000014, + 0x00000152, 0x00000151, 0x0007005f, 0x00000035, 0x00000153, 0x00000152, 0x00000143, 0x00000002, + 0x00000017, 0x000200f9, 0x0000014e, 0x000200f8, 0x00000150, 0x000200f9, 0x0000014e, 0x000200f8, + 0x0000014e, 0x000700f5, 0x00000035, 0x00000154, 0x00000153, 0x0000014f, 0x00000037, 0x00000150, + 0x00040073, 0x0000001b, 0x00000155, 0x00000154, 0x00050091, 0x0000001b, 0x00000156, 0x00000079, + 0x00000155, 0x00050081, 0x0000001b, 0x00000157, 0x00000142, 0x00000156, 0x0004003d, 0x00000015, + 0x00000158, 0x00000005, 0x00040064, 0x00000014, 0x00000159, 0x00000158, 0x0007005f, 0x00000035, + 0x0000015a, 0x00000159, 0x000000fd, 0x00000002, 0x00000017, 0x00040073, 0x0000001b, 0x0000015b, + 0x0000015a, 0x00050091, 0x0000001b, 0x0000015c, 0x0000008f, 0x0000015b, 0x00050081, 0x0000001b, + 0x0000015d, 0x00000157, 0x0000015c, 0x00050080, 0x0000000a, 0x0000015e, 0x000000fd, 0x000000a5, + 0x000500af, 0x00000013, 0x0000015f, 0x0000015e, 0x00000034, 0x0004009b, 0x00000012, 0x00000160, + 0x0000015f, 0x000300f7, 0x00000161, 0x00000000, 0x000400fa, 0x00000160, 0x00000162, 0x00000161, + 0x000200f8, 0x00000162, 0x0004003d, 0x00000015, 0x00000163, 0x00000005, 0x00040064, 0x00000014, + 0x00000164, 0x00000163, 0x00050067, 0x0000000a, 0x00000165, 0x00000164, 0x00000017, 0x000500b1, + 0x00000013, 0x00000166, 0x0000015e, 0x00000165, 0x0004009b, 0x00000012, 0x00000167, 0x00000166, + 0x000200f9, 0x00000161, 0x000200f8, 0x00000161, 0x000700f5, 0x00000012, 0x00000168, 0x00000160, + 0x0000014e, 0x00000167, 0x00000162, 0x000300f7, 0x00000169, 0x00000000, 0x000400fa, 0x00000168, + 0x0000016a, 0x0000016b, 0x000200f8, 0x0000016a, 0x0004003d, 0x00000015, 0x0000016c, 0x00000005, + 0x00040064, 0x00000014, 0x0000016d, 0x0000016c, 0x0007005f, 0x00000035, 0x0000016e, 0x0000016d, + 0x0000015e, 0x00000002, 0x00000017, 0x000200f9, 0x00000169, 0x000200f8, 0x0000016b, 0x000200f9, + 0x00000169, 0x000200f8, 0x00000169, 0x000700f5, 0x00000035, 0x0000016f, 0x0000016e, 0x0000016a, + 0x00000037, 0x0000016b, 0x00040073, 0x0000001b, 0x00000170, 0x0000016f, 0x00050091, 0x0000001b, + 0x00000171, 0x000000a4, 0x00000170, 0x00050081, 0x0000001b, 0x00000172, 0x0000015d, 0x00000171, + 0x00050080, 0x0000000a, 0x00000173, 0x000000fd, 0x000000bb, 0x000500af, 0x00000013, 0x00000174, + 0x00000173, 0x00000034, 0x0004009b, 0x00000012, 0x00000175, 0x00000174, 0x000300f7, 0x00000176, + 0x00000000, 0x000400fa, 0x00000175, 0x00000177, 0x00000176, 0x000200f8, 0x00000177, 0x0004003d, + 0x00000015, 0x00000178, 0x00000005, 0x00040064, 0x00000014, 0x00000179, 0x00000178, 0x00050067, + 0x0000000a, 0x0000017a, 0x00000179, 0x00000017, 0x000500b1, 0x00000013, 0x0000017b, 0x00000173, + 0x0000017a, 0x0004009b, 0x00000012, 0x0000017c, 0x0000017b, 0x000200f9, 0x00000176, 0x000200f8, + 0x00000176, 0x000700f5, 0x00000012, 0x0000017d, 0x00000175, 0x00000169, 0x0000017c, 0x00000177, + 0x000300f7, 0x0000017e, 0x00000000, 0x000400fa, 0x0000017d, 0x0000017f, 0x00000180, 0x000200f8, + 0x0000017f, 0x0004003d, 0x00000015, 0x00000181, 0x00000005, 0x00040064, 0x00000014, 0x00000182, + 0x00000181, 0x0007005f, 0x00000035, 0x00000183, 0x00000182, 0x00000173, 0x00000002, 0x00000017, + 0x000200f9, 0x0000017e, 0x000200f8, 0x00000180, 0x000200f9, 0x0000017e, 0x000200f8, 0x0000017e, + 0x000700f5, 0x00000035, 0x00000184, 0x00000183, 0x0000017f, 0x00000037, 0x00000180, 0x00040073, + 0x0000001b, 0x00000185, 0x00000184, 0x00050091, 0x0000001b, 0x00000186, 0x000000ba, 0x00000185, + 0x00050081, 0x0000001b, 0x00000187, 0x00000172, 0x00000186, 0x00050080, 0x0000000a, 0x00000188, + 0x000000fd, 0x000000d1, 0x000500af, 0x00000013, 0x00000189, 0x00000188, 0x00000034, 0x0004009b, + 0x00000012, 0x0000018a, 0x00000189, 0x000300f7, 0x0000018b, 0x00000000, 0x000400fa, 0x0000018a, + 0x0000018c, 0x0000018b, 0x000200f8, 0x0000018c, 0x0004003d, 0x00000015, 0x0000018d, 0x00000005, + 0x00040064, 0x00000014, 0x0000018e, 0x0000018d, 0x00050067, 0x0000000a, 0x0000018f, 0x0000018e, + 0x00000017, 0x000500b1, 0x00000013, 0x00000190, 0x00000188, 0x0000018f, 0x0004009b, 0x00000012, + 0x00000191, 0x00000190, 0x000200f9, 0x0000018b, 0x000200f8, 0x0000018b, 0x000700f5, 0x00000012, + 0x00000192, 0x0000018a, 0x0000017e, 0x00000191, 0x0000018c, 0x000300f7, 0x00000193, 0x00000000, + 0x000400fa, 0x00000192, 0x00000194, 0x00000195, 0x000200f8, 0x00000194, 0x0004003d, 0x00000015, + 0x00000196, 0x00000005, 0x00040064, 0x00000014, 0x00000197, 0x00000196, 0x0007005f, 0x00000035, + 0x00000198, 0x00000197, 0x00000188, 0x00000002, 0x00000017, 0x000200f9, 0x00000193, 0x000200f8, + 0x00000195, 0x000200f9, 0x00000193, 0x000200f8, 0x00000193, 0x000700f5, 0x00000035, 0x00000199, + 0x00000198, 0x00000194, 0x00000037, 0x00000195, 0x00040073, 0x0000001b, 0x0000019a, 0x00000199, + 0x00050091, 0x0000001b, 0x0000019b, 0x000000d0, 0x0000019a, 0x00050081, 0x0000001b, 0x0000019c, + 0x00000187, 0x0000019b, 0x00050080, 0x0000000a, 0x0000019d, 0x000000fd, 0x000000e7, 0x000500af, + 0x00000013, 0x0000019e, 0x0000019d, 0x00000034, 0x0004009b, 0x00000012, 0x0000019f, 0x0000019e, + 0x000300f7, 0x000001a0, 0x00000000, 0x000400fa, 0x0000019f, 0x000001a1, 0x000001a0, 0x000200f8, + 0x000001a1, 0x0004003d, 0x00000015, 0x000001a2, 0x00000005, 0x00040064, 0x00000014, 0x000001a3, + 0x000001a2, 0x00050067, 0x0000000a, 0x000001a4, 0x000001a3, 0x00000017, 0x000500b1, 0x00000013, + 0x000001a5, 0x0000019d, 0x000001a4, 0x0004009b, 0x00000012, 0x000001a6, 0x000001a5, 0x000200f9, + 0x000001a0, 0x000200f8, 0x000001a0, 0x000700f5, 0x00000012, 0x000001a7, 0x0000019f, 0x00000193, + 0x000001a6, 0x000001a1, 0x000300f7, 0x000001a8, 0x00000000, 0x000400fa, 0x000001a7, 0x000001a9, + 0x000001aa, 0x000200f8, 0x000001a9, 0x0004003d, 0x00000015, 0x000001ab, 0x00000005, 0x00040064, + 0x00000014, 0x000001ac, 0x000001ab, 0x0007005f, 0x00000035, 0x000001ad, 0x000001ac, 0x0000019d, + 0x00000002, 0x00000017, 0x000200f9, 0x000001a8, 0x000200f8, 0x000001aa, 0x000200f9, 0x000001a8, + 0x000200f8, 0x000001a8, 0x000700f5, 0x00000035, 0x000001ae, 0x000001ad, 0x000001a9, 0x00000037, + 0x000001aa, 0x00040073, 0x0000001b, 0x000001af, 0x000001ae, 0x00050091, 0x0000001b, 0x000001b0, + 0x000000e6, 0x000001af, 0x00050081, 0x0000001b, 0x000001b1, 0x0000019c, 0x000001b0, 0x00050083, + 0x0000001b, 0x000001b2, 0x000001b1, 0x000000ec, 0x00050085, 0x0000001b, 0x000001b3, 0x000001b2, + 0x000000f1, 0x00050081, 0x0000001b, 0x000001b4, 0x000001b3, 0x000000f6, 0x00040073, 0x00000035, + 0x000001b5, 0x000001b4, 0x00040063, 0x00000104, 0x000000fd, 0x000001b5, 0x000200f9, 0x000000f9, + 0x000200f8, 0x000000f9, 0x000100fd, 0x00010038, +}; +static const size_t wnfg_28_spv_size = sizeof(wnfg_28_spv); diff --git a/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_29_spv.h b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_29_spv.h new file mode 100644 index 000000000..89f1dee43 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_29_spv.h @@ -0,0 +1,200 @@ +#pragma once +#include +#include + +static const uint32_t wnfg_29_spv[] = { + 0x07230203, 0x00010000, 0x000d000b, 0x00000139, 0x00000000, 0x00020011, 0x00000001, 0x00020011, + 0x00000009, 0x00020011, 0x00000032, 0x0006000b, 0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e, + 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0006000f, 0x00000005, 0x00000002, 0x6e69616d, + 0x00000000, 0x00000003, 0x00060010, 0x00000002, 0x00000011, 0x00000010, 0x00000010, 0x00000001, + 0x00040047, 0x00000003, 0x0000000b, 0x0000001c, 0x00030047, 0x00000004, 0x00000019, 0x00040047, + 0x00000004, 0x00000021, 0x00000030, 0x00040047, 0x00000004, 0x00000022, 0x00000000, 0x00040047, + 0x00000005, 0x00000021, 0x00000020, 0x00040047, 0x00000005, 0x00000022, 0x00000000, 0x00040047, + 0x00000006, 0x00000021, 0x00000021, 0x00040047, 0x00000006, 0x00000022, 0x00000000, 0x00040047, + 0x00000007, 0x0000000b, 0x00000019, 0x00020013, 0x00000008, 0x00030021, 0x00000009, 0x00000008, + 0x00040015, 0x0000000a, 0x00000020, 0x00000001, 0x00040017, 0x0000000b, 0x0000000a, 0x00000002, + 0x00040015, 0x0000000c, 0x00000020, 0x00000000, 0x00040017, 0x0000000d, 0x0000000c, 0x00000003, + 0x00040020, 0x0000000e, 0x00000001, 0x0000000d, 0x0004003b, 0x0000000e, 0x00000003, 0x00000001, + 0x00040017, 0x0000000f, 0x0000000c, 0x00000002, 0x00030016, 0x00000010, 0x00000020, 0x00090019, + 0x00000011, 0x00000010, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002, + 0x00040020, 0x00000012, 0x00000000, 0x00000011, 0x0004003b, 0x00000012, 0x00000004, 0x00000000, + 0x00020014, 0x00000013, 0x00040017, 0x00000014, 0x00000013, 0x00000002, 0x00090019, 0x00000015, + 0x00000010, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x0003001b, + 0x00000016, 0x00000015, 0x00040020, 0x00000017, 0x00000000, 0x00000016, 0x0004003b, 0x00000017, + 0x00000005, 0x00000000, 0x0004002b, 0x0000000a, 0x00000018, 0x00000000, 0x00040017, 0x00000019, + 0x00000010, 0x00000002, 0x0004002b, 0x00000010, 0x0000001a, 0x3f000000, 0x0005002c, 0x00000019, + 0x0000001b, 0x0000001a, 0x0000001a, 0x0004002b, 0x0000000c, 0x0000001c, 0x00000000, 0x0004002b, + 0x0000000c, 0x0000001d, 0x00000001, 0x00030016, 0x0000001e, 0x00000010, 0x00040017, 0x0000001f, + 0x0000001e, 0x00000004, 0x00040018, 0x00000020, 0x0000001f, 0x00000004, 0x0004002b, 0x0000001e, + 0x00000021, 0x000034c4, 0x0004002b, 0x0000001e, 0x00000022, 0x00003476, 0x0004002b, 0x0000001e, + 0x00000023, 0x0000b9f4, 0x0004002b, 0x0000001e, 0x00000024, 0x0000b99b, 0x0007002c, 0x0000001f, + 0x00000025, 0x00000021, 0x00000022, 0x00000023, 0x00000024, 0x0004002b, 0x0000001e, 0x00000026, + 0x00003093, 0x0004002b, 0x0000001e, 0x00000027, 0x00002fc1, 0x0004002b, 0x0000001e, 0x00000028, + 0x00002f2b, 0x0004002b, 0x0000001e, 0x00000029, 0x00003052, 0x0007002c, 0x0000001f, 0x0000002a, + 0x00000026, 0x00000027, 0x00000028, 0x00000029, 0x0004002b, 0x0000001e, 0x0000002b, 0x0000327c, + 0x0004002b, 0x0000001e, 0x0000002c, 0x0000327f, 0x0004002b, 0x0000001e, 0x0000002d, 0x00003636, + 0x0004002b, 0x0000001e, 0x0000002e, 0x00003644, 0x0007002c, 0x0000001f, 0x0000002f, 0x0000002b, + 0x0000002c, 0x0000002d, 0x0000002e, 0x0004002b, 0x0000001e, 0x00000030, 0x0000b996, 0x0004002b, + 0x0000001e, 0x00000031, 0x0000b995, 0x0004002b, 0x0000001e, 0x00000032, 0x00003539, 0x0004002b, + 0x0000001e, 0x00000033, 0x0000352e, 0x0007002c, 0x0000001f, 0x00000034, 0x00000030, 0x00000031, + 0x00000032, 0x00000033, 0x0007002c, 0x00000020, 0x00000035, 0x00000025, 0x0000002a, 0x0000002f, + 0x00000034, 0x0004002b, 0x00000010, 0x00000036, 0x00000000, 0x0004002b, 0x0000000a, 0x00000037, + 0xffffffff, 0x0005002c, 0x0000000b, 0x00000038, 0x00000037, 0x00000037, 0x00040017, 0x00000039, + 0x00000010, 0x00000004, 0x0004002b, 0x0000001e, 0x0000003a, 0x000038e2, 0x0004002b, 0x0000001e, + 0x0000003b, 0x000038c9, 0x0004002b, 0x0000001e, 0x0000003c, 0x0000acc5, 0x0004002b, 0x0000001e, + 0x0000003d, 0x0000adfa, 0x0007002c, 0x0000001f, 0x0000003e, 0x0000003a, 0x0000003b, 0x0000003c, + 0x0000003d, 0x0004002b, 0x0000001e, 0x0000003f, 0x000037aa, 0x0004002b, 0x0000001e, 0x00000040, + 0x00003786, 0x0004002b, 0x0000001e, 0x00000041, 0x00003554, 0x0004002b, 0x0000001e, 0x00000042, + 0x000035de, 0x0007002c, 0x0000001f, 0x00000043, 0x0000003f, 0x00000040, 0x00000041, 0x00000042, + 0x0004002b, 0x0000001e, 0x00000044, 0x00003665, 0x0004002b, 0x0000001e, 0x00000045, 0x00003653, + 0x0004002b, 0x0000001e, 0x00000046, 0x00003708, 0x0004002b, 0x0000001e, 0x00000047, 0x00003706, + 0x0007002c, 0x0000001f, 0x00000048, 0x00000044, 0x00000045, 0x00000046, 0x00000047, 0x0004002b, + 0x0000001e, 0x00000049, 0x0000b948, 0x0004002b, 0x0000001e, 0x0000004a, 0x0000b912, 0x0004002b, + 0x0000001e, 0x0000004b, 0x00003119, 0x0004002b, 0x0000001e, 0x0000004c, 0x000030c5, 0x0007002c, + 0x0000001f, 0x0000004d, 0x00000049, 0x0000004a, 0x0000004b, 0x0000004c, 0x0007002c, 0x00000020, + 0x0000004e, 0x0000003e, 0x00000043, 0x00000048, 0x0000004d, 0x0005002c, 0x0000000b, 0x0000004f, + 0x00000037, 0x00000018, 0x0004002b, 0x0000001e, 0x00000050, 0x00003515, 0x0004002b, 0x0000001e, + 0x00000051, 0x000034b9, 0x0004002b, 0x0000001e, 0x00000052, 0x0000ba6a, 0x0004002b, 0x0000001e, + 0x00000053, 0x0000ba18, 0x0007002c, 0x0000001f, 0x00000054, 0x00000050, 0x00000051, 0x00000052, + 0x00000053, 0x0004002b, 0x0000001e, 0x00000055, 0x00002e5c, 0x0004002b, 0x0000001e, 0x00000056, + 0x00002ce5, 0x0004002b, 0x0000001e, 0x00000057, 0x000032d0, 0x0004002b, 0x0000001e, 0x00000058, + 0x00003318, 0x0007002c, 0x0000001f, 0x00000059, 0x00000055, 0x00000056, 0x00000057, 0x00000058, + 0x0004002b, 0x0000001e, 0x0000005a, 0x00003467, 0x0004002b, 0x0000001e, 0x0000005b, 0x00003446, + 0x0004002b, 0x0000001e, 0x0000005c, 0x00003477, 0x0004002b, 0x0000001e, 0x0000005d, 0x000034e7, + 0x0007002c, 0x0000001f, 0x0000005e, 0x0000005a, 0x0000005b, 0x0000005c, 0x0000005d, 0x0004002b, + 0x0000001e, 0x0000005f, 0x0000b999, 0x0004002b, 0x0000001e, 0x00000060, 0x0000b956, 0x0004002b, + 0x0000001e, 0x00000061, 0x00003508, 0x0004002b, 0x0000001e, 0x00000062, 0x0000351f, 0x0007002c, + 0x0000001f, 0x00000063, 0x0000005f, 0x00000060, 0x00000061, 0x00000062, 0x0007002c, 0x00000020, + 0x00000064, 0x00000054, 0x00000059, 0x0000005e, 0x00000063, 0x0004002b, 0x0000000a, 0x00000065, + 0x00000001, 0x0005002c, 0x0000000b, 0x00000066, 0x00000037, 0x00000065, 0x0004002b, 0x0000001e, + 0x00000067, 0x00003888, 0x0004002b, 0x0000001e, 0x00000068, 0x0000389c, 0x0004002b, 0x0000001e, + 0x00000069, 0x00002c10, 0x0004002b, 0x0000001e, 0x0000006a, 0x00002c76, 0x0007002c, 0x0000001f, + 0x0000006b, 0x00000067, 0x00000068, 0x00000069, 0x0000006a, 0x0004002b, 0x0000001e, 0x0000006c, + 0x00003790, 0x0004002b, 0x0000001e, 0x0000006d, 0x0000377d, 0x0004002b, 0x0000001e, 0x0000006e, + 0x0000367b, 0x0004002b, 0x0000001e, 0x0000006f, 0x000036aa, 0x0007002c, 0x0000001f, 0x00000070, + 0x0000006c, 0x0000006d, 0x0000006e, 0x0000006f, 0x0004002b, 0x0000001e, 0x00000071, 0x000035fa, + 0x0004002b, 0x0000001e, 0x00000072, 0x000035d6, 0x0004002b, 0x0000001e, 0x00000073, 0x00003854, + 0x0004002b, 0x0000001e, 0x00000074, 0x00003874, 0x0007002c, 0x0000001f, 0x00000075, 0x00000071, + 0x00000072, 0x00000073, 0x00000074, 0x0004002b, 0x0000001e, 0x00000076, 0x0000b900, 0x0004002b, + 0x0000001e, 0x00000077, 0x0000b94e, 0x0004002b, 0x0000001e, 0x00000078, 0x000032e8, 0x0004002b, + 0x0000001e, 0x00000079, 0x000032b4, 0x0007002c, 0x0000001f, 0x0000007a, 0x00000076, 0x00000077, + 0x00000078, 0x00000079, 0x0007002c, 0x00000020, 0x0000007b, 0x0000006b, 0x00000070, 0x00000075, + 0x0000007a, 0x0005002c, 0x0000000b, 0x0000007c, 0x00000018, 0x00000037, 0x0004002b, 0x0000001e, + 0x0000007d, 0x00003add, 0x0004002b, 0x0000001e, 0x0000007e, 0x00003aaa, 0x0004002b, 0x0000001e, + 0x0000007f, 0x00003902, 0x0004002b, 0x0000001e, 0x00000080, 0x000038bc, 0x0007002c, 0x0000001f, + 0x00000081, 0x0000007d, 0x0000007e, 0x0000007f, 0x00000080, 0x0004002b, 0x0000001e, 0x00000082, + 0x00003a9c, 0x0004002b, 0x0000001e, 0x00000083, 0x00003a3e, 0x0004002b, 0x0000001e, 0x00000084, + 0x00003946, 0x0004002b, 0x0000001e, 0x00000085, 0x00003948, 0x0007002c, 0x0000001f, 0x00000086, + 0x00000082, 0x00000083, 0x00000084, 0x00000085, 0x0004002b, 0x0000001e, 0x00000087, 0x0000388a, + 0x0004002b, 0x0000001e, 0x00000088, 0x00003856, 0x0004002b, 0x0000001e, 0x00000089, 0x000038ef, + 0x0004002b, 0x0000001e, 0x0000008a, 0x000038f0, 0x0007002c, 0x0000001f, 0x0000008b, 0x00000087, + 0x00000088, 0x00000089, 0x0000008a, 0x0004002b, 0x0000001e, 0x0000008c, 0x0000b86b, 0x0004002b, + 0x0000001e, 0x0000008d, 0x0000b86e, 0x0004002b, 0x0000001e, 0x0000008e, 0x0000299c, 0x0004002b, + 0x0000001e, 0x0000008f, 0x00002ad4, 0x0007002c, 0x0000001f, 0x00000090, 0x0000008c, 0x0000008d, + 0x0000008e, 0x0000008f, 0x0007002c, 0x00000020, 0x00000091, 0x00000081, 0x00000086, 0x0000008b, + 0x00000090, 0x0004002b, 0x0000001e, 0x00000092, 0x000038b9, 0x0004002b, 0x0000001e, 0x00000093, + 0x0000aec6, 0x0004002b, 0x0000001e, 0x00000094, 0x0000b020, 0x0007002c, 0x0000001f, 0x00000095, + 0x00000092, 0x00000087, 0x00000093, 0x00000094, 0x0004002b, 0x0000001e, 0x00000096, 0x00003707, + 0x0004002b, 0x0000001e, 0x00000097, 0x000036b7, 0x0004002b, 0x0000001e, 0x00000098, 0x0000383a, + 0x0004002b, 0x0000001e, 0x00000099, 0x0000384d, 0x0007002c, 0x0000001f, 0x0000009a, 0x00000096, + 0x00000097, 0x00000098, 0x00000099, 0x0004002b, 0x0000001e, 0x0000009b, 0x0000376a, 0x0004002b, + 0x0000001e, 0x0000009c, 0x0000372e, 0x0004002b, 0x0000001e, 0x0000009d, 0x0000379a, 0x0004002b, + 0x0000001e, 0x0000009e, 0x000037b6, 0x0007002c, 0x0000001f, 0x0000009f, 0x0000009b, 0x0000009c, + 0x0000009d, 0x0000009e, 0x0004002b, 0x0000001e, 0x000000a0, 0x0000b967, 0x0004002b, 0x0000001e, + 0x000000a1, 0x0000b950, 0x0004002b, 0x0000001e, 0x000000a2, 0x00003381, 0x0004002b, 0x0000001e, + 0x000000a3, 0x00003371, 0x0007002c, 0x0000001f, 0x000000a4, 0x000000a0, 0x000000a1, 0x000000a2, + 0x000000a3, 0x0007002c, 0x00000020, 0x000000a5, 0x00000095, 0x0000009a, 0x0000009f, 0x000000a4, + 0x0005002c, 0x0000000b, 0x000000a6, 0x00000018, 0x00000065, 0x0004002b, 0x0000001e, 0x000000a7, + 0x000034ef, 0x0004002b, 0x0000001e, 0x000000a8, 0x0000b8c9, 0x0004002b, 0x0000001e, 0x000000a9, + 0x0000b859, 0x0007002c, 0x0000001f, 0x000000aa, 0x00000033, 0x000000a7, 0x000000a8, 0x000000a9, + 0x0004002b, 0x0000001e, 0x000000ab, 0x0000300f, 0x0004002b, 0x0000001e, 0x000000ac, 0x00002f7f, + 0x0004002b, 0x0000001e, 0x000000ad, 0x0000325b, 0x0004002b, 0x0000001e, 0x000000ae, 0x00003271, + 0x0007002c, 0x0000001f, 0x000000af, 0x000000ab, 0x000000ac, 0x000000ad, 0x000000ae, 0x0004002b, + 0x0000001e, 0x000000b0, 0x00003410, 0x0004002b, 0x0000001e, 0x000000b1, 0x00003414, 0x0004002b, + 0x0000001e, 0x000000b2, 0x00003680, 0x0004002b, 0x0000001e, 0x000000b3, 0x00003673, 0x0007002c, + 0x0000001f, 0x000000b4, 0x000000b0, 0x000000b1, 0x000000b2, 0x000000b3, 0x0004002b, 0x0000001e, + 0x000000b5, 0x0000b959, 0x0004002b, 0x0000001e, 0x000000b6, 0x0000b97a, 0x0004002b, 0x0000001e, + 0x000000b7, 0x000034a7, 0x0004002b, 0x0000001e, 0x000000b8, 0x000034a3, 0x0007002c, 0x0000001f, + 0x000000b9, 0x000000b5, 0x000000b6, 0x000000b7, 0x000000b8, 0x0007002c, 0x00000020, 0x000000ba, + 0x000000aa, 0x000000af, 0x000000b4, 0x000000b9, 0x0005002c, 0x0000000b, 0x000000bb, 0x00000065, + 0x00000037, 0x0004002b, 0x0000001e, 0x000000bc, 0x000038b0, 0x0004002b, 0x0000001e, 0x000000bd, + 0x000038c4, 0x0004002b, 0x0000001e, 0x000000be, 0x0000a5bd, 0x0004002b, 0x0000001e, 0x000000bf, + 0x0000a8b4, 0x0007002c, 0x0000001f, 0x000000c0, 0x000000bc, 0x000000bd, 0x000000be, 0x000000bf, + 0x0004002b, 0x0000001e, 0x000000c1, 0x00003703, 0x0004002b, 0x0000001e, 0x000000c2, 0x00003741, + 0x0004002b, 0x0000001e, 0x000000c3, 0x00003736, 0x0004002b, 0x0000001e, 0x000000c4, 0x0000374b, + 0x0007002c, 0x0000001f, 0x000000c5, 0x000000c1, 0x000000c2, 0x000000c3, 0x000000c4, 0x0004002b, + 0x0000001e, 0x000000c6, 0x0000371b, 0x0004002b, 0x0000001e, 0x000000c7, 0x00003727, 0x0004002b, + 0x0000001e, 0x000000c8, 0x0000379b, 0x0004002b, 0x0000001e, 0x000000c9, 0x0000378e, 0x0007002c, + 0x0000001f, 0x000000ca, 0x000000c6, 0x000000c7, 0x000000c8, 0x000000c9, 0x0004002b, 0x0000001e, + 0x000000cb, 0x0000b931, 0x0004002b, 0x0000001e, 0x000000cc, 0x0000b93a, 0x0004002b, 0x0000001e, + 0x000000cd, 0x00003055, 0x0004002b, 0x0000001e, 0x000000ce, 0x0000306f, 0x0007002c, 0x0000001f, + 0x000000cf, 0x000000cb, 0x000000cc, 0x000000cd, 0x000000ce, 0x0007002c, 0x00000020, 0x000000d0, + 0x000000c0, 0x000000c5, 0x000000ca, 0x000000cf, 0x0005002c, 0x0000000b, 0x000000d1, 0x00000065, + 0x00000018, 0x0004002b, 0x0000001e, 0x000000d2, 0x00003511, 0x0004002b, 0x0000001e, 0x000000d3, + 0x00003537, 0x0004002b, 0x0000001e, 0x000000d4, 0x0000ba83, 0x0004002b, 0x0000001e, 0x000000d5, + 0x0000ba64, 0x0007002c, 0x0000001f, 0x000000d6, 0x000000d2, 0x000000d3, 0x000000d4, 0x000000d5, + 0x0004002b, 0x0000001e, 0x000000d7, 0x00002c05, 0x0004002b, 0x0000001e, 0x000000d8, 0x00002c8d, + 0x0004002b, 0x0000001e, 0x000000d9, 0x00003577, 0x0004002b, 0x0000001e, 0x000000da, 0x00003564, + 0x0007002c, 0x0000001f, 0x000000db, 0x000000d7, 0x000000d8, 0x000000d9, 0x000000da, 0x0004002b, + 0x0000001e, 0x000000dc, 0x000034ea, 0x0004002b, 0x0000001e, 0x000000dd, 0x000034f4, 0x0004002b, + 0x0000001e, 0x000000de, 0x00003569, 0x0004002b, 0x0000001e, 0x000000df, 0x00003524, 0x0007002c, + 0x0000001f, 0x000000e0, 0x000000dc, 0x000000dd, 0x000000de, 0x000000df, 0x0004002b, 0x0000001e, + 0x000000e1, 0x0000b9b5, 0x0004002b, 0x0000001e, 0x000000e2, 0x0000b98e, 0x0004002b, 0x0000001e, + 0x000000e3, 0x000034a9, 0x0004002b, 0x0000001e, 0x000000e4, 0x000034d0, 0x0007002c, 0x0000001f, + 0x000000e5, 0x000000e1, 0x000000e2, 0x000000e3, 0x000000e4, 0x0007002c, 0x00000020, 0x000000e6, + 0x000000d6, 0x000000db, 0x000000e0, 0x000000e5, 0x0005002c, 0x0000000b, 0x000000e7, 0x00000065, + 0x00000065, 0x0004002b, 0x0000001e, 0x000000e8, 0x000039f2, 0x0004002b, 0x0000001e, 0x000000e9, + 0x00003aa4, 0x0004002b, 0x0000001e, 0x000000ea, 0x000036f5, 0x0004002b, 0x0000001e, 0x000000eb, + 0x000035f1, 0x0007002c, 0x0000001f, 0x000000ec, 0x000000e8, 0x000000e9, 0x000000ea, 0x000000eb, + 0x0004003b, 0x00000017, 0x00000006, 0x00000000, 0x0004002b, 0x0000000c, 0x000000ed, 0x00000010, + 0x0006002c, 0x0000000d, 0x00000007, 0x000000ed, 0x000000ed, 0x0000001d, 0x00050036, 0x00000008, + 0x00000002, 0x00000000, 0x00000009, 0x000200f8, 0x000000ee, 0x000300f7, 0x000000ef, 0x00000000, + 0x000300fb, 0x0000001c, 0x000000f0, 0x000200f8, 0x000000f0, 0x0004003d, 0x0000000d, 0x000000f1, + 0x00000003, 0x0007004f, 0x0000000f, 0x000000f2, 0x000000f1, 0x000000f1, 0x00000000, 0x00000001, + 0x0004007c, 0x0000000b, 0x000000f3, 0x000000f2, 0x0004003d, 0x00000011, 0x000000f4, 0x00000004, + 0x00040068, 0x0000000b, 0x000000f5, 0x000000f4, 0x000500af, 0x00000014, 0x000000f6, 0x000000f3, + 0x000000f5, 0x0004009a, 0x00000013, 0x000000f7, 0x000000f6, 0x000300f7, 0x000000f8, 0x00000000, + 0x000400fa, 0x000000f7, 0x000000f9, 0x000000f8, 0x000200f8, 0x000000f9, 0x000200f9, 0x000000ef, + 0x000200f8, 0x000000f8, 0x0004003d, 0x00000016, 0x000000fa, 0x00000005, 0x00040064, 0x00000015, + 0x000000fb, 0x000000fa, 0x00050067, 0x0000000b, 0x000000fc, 0x000000fb, 0x00000018, 0x0004007c, + 0x0000000f, 0x000000fd, 0x000000fc, 0x00040070, 0x00000019, 0x000000fe, 0x000000f2, 0x00050081, + 0x00000019, 0x000000ff, 0x000000fe, 0x0000001b, 0x00050051, 0x0000000c, 0x00000100, 0x000000fd, + 0x00000000, 0x00040070, 0x00000010, 0x00000101, 0x00000100, 0x00050051, 0x0000000c, 0x00000102, + 0x000000fd, 0x00000001, 0x00040070, 0x00000010, 0x00000103, 0x00000102, 0x00050050, 0x00000019, + 0x00000104, 0x00000101, 0x00000103, 0x00050088, 0x00000019, 0x00000105, 0x000000ff, 0x00000104, + 0x0004003d, 0x00000016, 0x00000106, 0x00000005, 0x00080058, 0x00000039, 0x00000107, 0x00000106, + 0x00000105, 0x0000000a, 0x00000036, 0x00000038, 0x00040073, 0x0000001f, 0x00000108, 0x00000107, + 0x00050091, 0x0000001f, 0x00000109, 0x00000035, 0x00000108, 0x0004003d, 0x00000016, 0x0000010a, + 0x00000005, 0x00080058, 0x00000039, 0x0000010b, 0x0000010a, 0x00000105, 0x0000000a, 0x00000036, + 0x0000004f, 0x00040073, 0x0000001f, 0x0000010c, 0x0000010b, 0x00050091, 0x0000001f, 0x0000010d, + 0x0000004e, 0x0000010c, 0x00050081, 0x0000001f, 0x0000010e, 0x00000109, 0x0000010d, 0x0004003d, + 0x00000016, 0x0000010f, 0x00000005, 0x00080058, 0x00000039, 0x00000110, 0x0000010f, 0x00000105, + 0x0000000a, 0x00000036, 0x00000066, 0x00040073, 0x0000001f, 0x00000111, 0x00000110, 0x00050091, + 0x0000001f, 0x00000112, 0x00000064, 0x00000111, 0x00050081, 0x0000001f, 0x00000113, 0x0000010e, + 0x00000112, 0x0004003d, 0x00000016, 0x00000114, 0x00000005, 0x00080058, 0x00000039, 0x00000115, + 0x00000114, 0x00000105, 0x0000000a, 0x00000036, 0x0000007c, 0x00040073, 0x0000001f, 0x00000116, + 0x00000115, 0x00050091, 0x0000001f, 0x00000117, 0x0000007b, 0x00000116, 0x00050081, 0x0000001f, + 0x00000118, 0x00000113, 0x00000117, 0x0004003d, 0x00000016, 0x00000119, 0x00000005, 0x00070058, + 0x00000039, 0x0000011a, 0x00000119, 0x00000105, 0x00000002, 0x00000036, 0x00040073, 0x0000001f, + 0x0000011b, 0x0000011a, 0x00050091, 0x0000001f, 0x0000011c, 0x00000091, 0x0000011b, 0x00050081, + 0x0000001f, 0x0000011d, 0x00000118, 0x0000011c, 0x0004003d, 0x00000016, 0x0000011e, 0x00000005, + 0x00080058, 0x00000039, 0x0000011f, 0x0000011e, 0x00000105, 0x0000000a, 0x00000036, 0x000000a6, + 0x00040073, 0x0000001f, 0x00000120, 0x0000011f, 0x00050091, 0x0000001f, 0x00000121, 0x000000a5, + 0x00000120, 0x00050081, 0x0000001f, 0x00000122, 0x0000011d, 0x00000121, 0x0004003d, 0x00000016, + 0x00000123, 0x00000005, 0x00080058, 0x00000039, 0x00000124, 0x00000123, 0x00000105, 0x0000000a, + 0x00000036, 0x000000bb, 0x00040073, 0x0000001f, 0x00000125, 0x00000124, 0x00050091, 0x0000001f, + 0x00000126, 0x000000ba, 0x00000125, 0x00050081, 0x0000001f, 0x00000127, 0x00000122, 0x00000126, + 0x0004003d, 0x00000016, 0x00000128, 0x00000005, 0x00080058, 0x00000039, 0x00000129, 0x00000128, + 0x00000105, 0x0000000a, 0x00000036, 0x000000d1, 0x00040073, 0x0000001f, 0x0000012a, 0x00000129, + 0x00050091, 0x0000001f, 0x0000012b, 0x000000d0, 0x0000012a, 0x00050081, 0x0000001f, 0x0000012c, + 0x00000127, 0x0000012b, 0x0004003d, 0x00000016, 0x0000012d, 0x00000005, 0x00080058, 0x00000039, + 0x0000012e, 0x0000012d, 0x00000105, 0x0000000a, 0x00000036, 0x000000e7, 0x00040073, 0x0000001f, + 0x0000012f, 0x0000012e, 0x00050091, 0x0000001f, 0x00000130, 0x000000e6, 0x0000012f, 0x00050081, + 0x0000001f, 0x00000131, 0x0000012c, 0x00000130, 0x00050081, 0x0000001f, 0x00000132, 0x00000131, + 0x000000ec, 0x0004003d, 0x00000016, 0x00000133, 0x00000006, 0x00070058, 0x00000039, 0x00000134, + 0x00000133, 0x00000105, 0x00000002, 0x00000036, 0x00040073, 0x0000001f, 0x00000135, 0x00000134, + 0x00050081, 0x0000001f, 0x00000136, 0x00000132, 0x00000135, 0x0004003d, 0x00000011, 0x00000137, + 0x00000004, 0x00040073, 0x00000039, 0x00000138, 0x00000136, 0x00040063, 0x00000137, 0x000000f3, + 0x00000138, 0x000200f9, 0x000000ef, 0x000200f8, 0x000000ef, 0x000100fd, 0x00010038, +}; +static const size_t wnfg_29_spv_size = sizeof(wnfg_29_spv); diff --git a/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_51_spv.h b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_51_spv.h new file mode 100644 index 000000000..a9233acbb --- /dev/null +++ b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_51_spv.h @@ -0,0 +1,743 @@ +#pragma once +#include +#include + +static const uint32_t wnfg_51_spv[] = { + 0x07230203, 0x00010000, 0x000d000b, 0x000004ad, 0x00000000, 0x00020011, 0x00000001, 0x00020011, + 0x00000009, 0x00020011, 0x00000032, 0x0006000b, 0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e, + 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0006000f, 0x00000005, 0x00000002, 0x6e69616d, + 0x00000000, 0x00000003, 0x00060010, 0x00000002, 0x00000011, 0x00000010, 0x00000010, 0x00000001, + 0x00040047, 0x00000003, 0x0000000b, 0x0000001c, 0x00030047, 0x00000004, 0x00000019, 0x00040047, + 0x00000004, 0x00000021, 0x00000030, 0x00040047, 0x00000004, 0x00000022, 0x00000000, 0x00040047, + 0x00000005, 0x00000021, 0x00000020, 0x00040047, 0x00000005, 0x00000022, 0x00000000, 0x00040047, + 0x00000006, 0x00000021, 0x00000021, 0x00040047, 0x00000006, 0x00000022, 0x00000000, 0x00030047, + 0x00000007, 0x00000019, 0x00040047, 0x00000007, 0x00000021, 0x00000031, 0x00040047, 0x00000007, + 0x00000022, 0x00000000, 0x00040047, 0x00000008, 0x0000000b, 0x00000019, 0x00020013, 0x00000009, + 0x00030021, 0x0000000a, 0x00000009, 0x00040015, 0x0000000b, 0x00000020, 0x00000001, 0x00040017, + 0x0000000c, 0x0000000b, 0x00000002, 0x00040015, 0x0000000d, 0x00000020, 0x00000000, 0x00040017, + 0x0000000e, 0x0000000d, 0x00000003, 0x00040020, 0x0000000f, 0x00000001, 0x0000000e, 0x0004003b, + 0x0000000f, 0x00000003, 0x00000001, 0x00040017, 0x00000010, 0x0000000d, 0x00000002, 0x00030016, + 0x00000011, 0x00000020, 0x00090019, 0x00000012, 0x00000011, 0x00000001, 0x00000000, 0x00000000, + 0x00000000, 0x00000002, 0x00000004, 0x00040020, 0x00000013, 0x00000000, 0x00000012, 0x0004003b, + 0x00000013, 0x00000004, 0x00000000, 0x00020014, 0x00000014, 0x00040017, 0x00000015, 0x00000014, + 0x00000002, 0x00090019, 0x00000016, 0x00000011, 0x00000001, 0x00000000, 0x00000000, 0x00000000, + 0x00000001, 0x00000000, 0x0003001b, 0x00000017, 0x00000016, 0x00040020, 0x00000018, 0x00000000, + 0x00000017, 0x0004003b, 0x00000018, 0x00000005, 0x00000000, 0x0004002b, 0x0000000b, 0x00000019, + 0x00000000, 0x0004002b, 0x0000000d, 0x0000001a, 0x00000000, 0x0004002b, 0x0000000d, 0x0000001b, + 0x00000001, 0x00030016, 0x0000001c, 0x00000010, 0x00040017, 0x0000001d, 0x0000001c, 0x00000004, + 0x0004002b, 0x0000000b, 0x0000001e, 0xffffffff, 0x0005002c, 0x0000000c, 0x0000001f, 0x0000001e, + 0x0000001e, 0x0005002c, 0x0000000c, 0x00000020, 0x00000019, 0x00000019, 0x00040017, 0x00000021, + 0x00000011, 0x00000004, 0x0004002b, 0x00000011, 0x00000022, 0x00000000, 0x0007002c, 0x00000021, + 0x00000023, 0x00000022, 0x00000022, 0x00000022, 0x00000022, 0x0005002c, 0x0000000c, 0x00000024, + 0x0000001e, 0x00000019, 0x0004002b, 0x0000000b, 0x00000025, 0x00000001, 0x0005002c, 0x0000000c, + 0x00000026, 0x0000001e, 0x00000025, 0x0005002c, 0x0000000c, 0x00000027, 0x00000019, 0x0000001e, + 0x0005002c, 0x0000000c, 0x00000028, 0x00000019, 0x00000025, 0x0005002c, 0x0000000c, 0x00000029, + 0x00000025, 0x0000001e, 0x0005002c, 0x0000000c, 0x0000002a, 0x00000025, 0x00000019, 0x0005002c, + 0x0000000c, 0x0000002b, 0x00000025, 0x00000025, 0x0004003b, 0x00000018, 0x00000006, 0x00000000, + 0x00040018, 0x0000002c, 0x0000001d, 0x00000004, 0x0004002b, 0x0000001c, 0x0000002d, 0x0000386c, + 0x0004002b, 0x0000001c, 0x0000002e, 0x000032a5, 0x0004002b, 0x0000001c, 0x0000002f, 0x0000b2fd, + 0x0004002b, 0x0000001c, 0x00000030, 0x00002cab, 0x0007002c, 0x0000001d, 0x00000031, 0x0000002d, + 0x0000002e, 0x0000002f, 0x00000030, 0x0004002b, 0x0000001c, 0x00000032, 0x00002c46, 0x0004002b, + 0x0000001c, 0x00000033, 0x000033f2, 0x0004002b, 0x0000001c, 0x00000034, 0x0000b40c, 0x0004002b, + 0x0000001c, 0x00000035, 0x0000abb6, 0x0007002c, 0x0000001d, 0x00000036, 0x00000032, 0x00000033, + 0x00000034, 0x00000035, 0x0004002b, 0x0000001c, 0x00000037, 0x00003325, 0x0004002b, 0x0000001c, + 0x00000038, 0x00003467, 0x0004002b, 0x0000001c, 0x00000039, 0x0000b164, 0x0004002b, 0x0000001c, + 0x0000003a, 0x00009c49, 0x0007002c, 0x0000001d, 0x0000003b, 0x00000037, 0x00000038, 0x00000039, + 0x0000003a, 0x0004002b, 0x0000001c, 0x0000003c, 0x00002ef4, 0x0004002b, 0x0000001c, 0x0000003d, + 0x000030b9, 0x0004002b, 0x0000001c, 0x0000003e, 0x0000abb0, 0x0004002b, 0x0000001c, 0x0000003f, + 0x00002d9d, 0x0007002c, 0x0000001d, 0x00000040, 0x0000003c, 0x0000003d, 0x0000003e, 0x0000003f, + 0x0007002c, 0x0000002c, 0x00000041, 0x00000031, 0x00000036, 0x0000003b, 0x00000040, 0x0004002b, + 0x0000001c, 0x00000042, 0x00002d3c, 0x0004002b, 0x0000001c, 0x00000043, 0x00001974, 0x0004002b, + 0x0000001c, 0x00000044, 0x000026ad, 0x0004002b, 0x0000001c, 0x00000045, 0x0000ace8, 0x0007002c, + 0x0000001d, 0x00000046, 0x00000042, 0x00000043, 0x00000044, 0x00000045, 0x0004002b, 0x0000001c, + 0x00000047, 0x0000a7ff, 0x0004002b, 0x0000001c, 0x00000048, 0x000032a4, 0x0004002b, 0x0000001c, + 0x00000049, 0x0000b158, 0x0004002b, 0x0000001c, 0x0000004a, 0x00002419, 0x0007002c, 0x0000001d, + 0x0000004b, 0x00000047, 0x00000048, 0x00000049, 0x0000004a, 0x0004002b, 0x0000001c, 0x0000004c, + 0x0000268b, 0x0004002b, 0x0000001c, 0x0000004d, 0x00003146, 0x0004002b, 0x0000001c, 0x0000004e, + 0x00002c56, 0x0004002b, 0x0000001c, 0x0000004f, 0x0000ae3a, 0x0007002c, 0x0000001d, 0x00000050, + 0x0000004c, 0x0000004d, 0x0000004e, 0x0000004f, 0x0004002b, 0x0000001c, 0x00000051, 0x00002dc5, + 0x0004002b, 0x0000001c, 0x00000052, 0x00003431, 0x0004002b, 0x0000001c, 0x00000053, 0x0000a152, + 0x0004002b, 0x0000001c, 0x00000054, 0x00003224, 0x0007002c, 0x0000001d, 0x00000055, 0x00000051, + 0x00000052, 0x00000053, 0x00000054, 0x0007002c, 0x0000002c, 0x00000056, 0x00000046, 0x0000004b, + 0x00000050, 0x00000055, 0x0004002b, 0x0000001c, 0x00000057, 0x000034ab, 0x0004002b, 0x0000001c, + 0x00000058, 0x0000acd7, 0x0004002b, 0x0000001c, 0x00000059, 0x0000ab01, 0x0004002b, 0x0000001c, + 0x0000005a, 0x0000ae5e, 0x0007002c, 0x0000001d, 0x0000005b, 0x00000057, 0x00000058, 0x00000059, + 0x0000005a, 0x0004002b, 0x0000001c, 0x0000005c, 0x0000317a, 0x0004002b, 0x0000001c, 0x0000005d, + 0x000034af, 0x0004002b, 0x0000001c, 0x0000005e, 0x0000b48c, 0x0004002b, 0x0000001c, 0x0000005f, + 0x00002dc7, 0x0007002c, 0x0000001d, 0x00000060, 0x0000005c, 0x0000005d, 0x0000005e, 0x0000005f, + 0x0004002b, 0x0000001c, 0x00000061, 0x00003549, 0x0004002b, 0x0000001c, 0x00000062, 0x000034dd, + 0x0004002b, 0x0000001c, 0x00000063, 0x0000aaa8, 0x0004002b, 0x0000001c, 0x00000064, 0x0000a8b7, + 0x0007002c, 0x0000001d, 0x00000065, 0x00000061, 0x00000062, 0x00000063, 0x00000064, 0x0004002b, + 0x0000001c, 0x00000066, 0x00002dae, 0x0004002b, 0x0000001c, 0x00000067, 0x0000aeb9, 0x0004002b, + 0x0000001c, 0x00000068, 0x0000187b, 0x0004002b, 0x0000001c, 0x00000069, 0x0000316d, 0x0007002c, + 0x0000001d, 0x0000006a, 0x00000066, 0x00000067, 0x00000068, 0x00000069, 0x0007002c, 0x0000002c, + 0x0000006b, 0x0000005b, 0x00000060, 0x00000065, 0x0000006a, 0x0004002b, 0x0000001c, 0x0000006c, + 0x0000329f, 0x0004002b, 0x0000001c, 0x0000006d, 0x00002de8, 0x0004002b, 0x0000001c, 0x0000006e, + 0x0000adec, 0x0004002b, 0x0000001c, 0x0000006f, 0x00002b53, 0x0007002c, 0x0000001d, 0x00000070, + 0x0000006c, 0x0000006d, 0x0000006e, 0x0000006f, 0x0004002b, 0x0000001c, 0x00000071, 0x0000a7eb, + 0x0004002b, 0x0000001c, 0x00000072, 0x000033c9, 0x0004002b, 0x0000001c, 0x00000073, 0x0000ae95, + 0x0004002b, 0x0000001c, 0x00000074, 0x0000b1a3, 0x0007002c, 0x0000001d, 0x00000075, 0x00000071, + 0x00000072, 0x00000073, 0x00000074, 0x0004002b, 0x0000001c, 0x00000076, 0x0000a91f, 0x0004002b, + 0x0000001c, 0x00000077, 0x000033a0, 0x0004002b, 0x0000001c, 0x00000078, 0x0000af43, 0x0004002b, + 0x0000001c, 0x00000079, 0x0000ab8c, 0x0007002c, 0x0000001d, 0x0000007a, 0x00000076, 0x00000077, + 0x00000078, 0x00000079, 0x0004002b, 0x0000001c, 0x0000007b, 0x00002d89, 0x0004002b, 0x0000001c, + 0x0000007c, 0x0000aa9f, 0x0004002b, 0x0000001c, 0x0000007d, 0x0000ac60, 0x0004002b, 0x0000001c, + 0x0000007e, 0x0000b3be, 0x0007002c, 0x0000001d, 0x0000007f, 0x0000007b, 0x0000007c, 0x0000007d, + 0x0000007e, 0x0007002c, 0x0000002c, 0x00000080, 0x00000070, 0x00000075, 0x0000007a, 0x0000007f, + 0x0004002b, 0x0000001c, 0x00000081, 0x0000b3b2, 0x0004002b, 0x0000001c, 0x00000082, 0x0000ad87, + 0x0004002b, 0x0000001c, 0x00000083, 0x0000301d, 0x0004002b, 0x0000001c, 0x00000084, 0x0000a8f4, + 0x0007002c, 0x0000001d, 0x00000085, 0x00000081, 0x00000082, 0x00000083, 0x00000084, 0x0004002b, + 0x0000001c, 0x00000086, 0x0000afc8, 0x0004002b, 0x0000001c, 0x00000087, 0x0000311a, 0x0004002b, + 0x0000001c, 0x00000088, 0x00002aca, 0x0004002b, 0x0000001c, 0x00000089, 0x000032db, 0x0007002c, + 0x0000001d, 0x0000008a, 0x00000086, 0x00000087, 0x00000088, 0x00000089, 0x0004002b, 0x0000001c, + 0x0000008b, 0x0000b2c9, 0x0004002b, 0x0000001c, 0x0000008c, 0x0000311b, 0x0004002b, 0x0000001c, + 0x0000008d, 0x00002ffc, 0x0004002b, 0x0000001c, 0x0000008e, 0x0000adcd, 0x0007002c, 0x0000001d, + 0x0000008f, 0x0000008b, 0x0000008c, 0x0000008d, 0x0000008e, 0x0004002b, 0x0000001c, 0x00000090, + 0x00002cee, 0x0004002b, 0x0000001c, 0x00000091, 0x000032d3, 0x0004002b, 0x0000001c, 0x00000092, + 0x0000b15e, 0x0004002b, 0x0000001c, 0x00000093, 0x0000ad1b, 0x0007002c, 0x0000001d, 0x00000094, + 0x00000090, 0x00000091, 0x00000092, 0x00000093, 0x0007002c, 0x0000002c, 0x00000095, 0x00000085, + 0x0000008a, 0x0000008f, 0x00000094, 0x0004002b, 0x0000001c, 0x00000096, 0x0000a432, 0x0004002b, + 0x0000001c, 0x00000097, 0x0000b09c, 0x0004002b, 0x0000001c, 0x00000098, 0x0000278b, 0x0004002b, + 0x0000001c, 0x00000099, 0x0000aede, 0x0007002c, 0x0000001d, 0x0000009a, 0x00000096, 0x00000097, + 0x00000098, 0x00000099, 0x0004002b, 0x0000001c, 0x0000009b, 0x00002c34, 0x0004002b, 0x0000001c, + 0x0000009c, 0x00003396, 0x0004002b, 0x0000001c, 0x0000009d, 0x0000ad7b, 0x0004002b, 0x0000001c, + 0x0000009e, 0x0000ace7, 0x0007002c, 0x0000001d, 0x0000009f, 0x0000009b, 0x0000009c, 0x0000009d, + 0x0000009e, 0x0004002b, 0x0000001c, 0x000000a0, 0x0000305a, 0x0004002b, 0x0000001c, 0x000000a1, + 0x00003571, 0x0004002b, 0x0000001c, 0x000000a2, 0x0000ad4f, 0x0004002b, 0x0000001c, 0x000000a3, + 0x0000a938, 0x0007002c, 0x0000001d, 0x000000a4, 0x000000a0, 0x000000a1, 0x000000a2, 0x000000a3, + 0x0004002b, 0x0000001c, 0x000000a5, 0x00002ec9, 0x0004002b, 0x0000001c, 0x000000a6, 0x0000ab98, + 0x0004002b, 0x0000001c, 0x000000a7, 0x0000b25b, 0x0004002b, 0x0000001c, 0x000000a8, 0x0000ada2, + 0x0007002c, 0x0000001d, 0x000000a9, 0x000000a5, 0x000000a6, 0x000000a7, 0x000000a8, 0x0007002c, + 0x0000002c, 0x000000aa, 0x0000009a, 0x0000009f, 0x000000a4, 0x000000a9, 0x0004002b, 0x0000001c, + 0x000000ab, 0x00003839, 0x0004002b, 0x0000001c, 0x000000ac, 0x0000269b, 0x0004002b, 0x0000001c, + 0x000000ad, 0x0000b922, 0x0004002b, 0x0000001c, 0x000000ae, 0x000030aa, 0x0007002c, 0x0000001d, + 0x000000af, 0x000000ab, 0x000000ac, 0x000000ad, 0x000000ae, 0x0004002b, 0x0000001c, 0x000000b0, + 0x00003253, 0x0004002b, 0x0000001c, 0x000000b1, 0x000034fe, 0x0004002b, 0x0000001c, 0x000000b2, + 0x0000b5a5, 0x0004002b, 0x0000001c, 0x000000b3, 0x00002e80, 0x0007002c, 0x0000001d, 0x000000b4, + 0x000000b0, 0x000000b1, 0x000000b2, 0x000000b3, 0x0004002b, 0x0000001c, 0x000000b5, 0x00003562, + 0x0004002b, 0x0000001c, 0x000000b6, 0x0000345f, 0x0004002b, 0x0000001c, 0x000000b7, 0x0000b857, + 0x0004002b, 0x0000001c, 0x000000b8, 0x000029c9, 0x0007002c, 0x0000001d, 0x000000b9, 0x000000b5, + 0x000000b6, 0x000000b7, 0x000000b8, 0x0004002b, 0x0000001c, 0x000000ba, 0x00001ffc, 0x0004002b, + 0x0000001c, 0x000000bb, 0x0000b8ea, 0x0004002b, 0x0000001c, 0x000000bc, 0x00003054, 0x0004002b, + 0x0000001c, 0x000000bd, 0x0000a906, 0x0007002c, 0x0000001d, 0x000000be, 0x000000ba, 0x000000bb, + 0x000000bc, 0x000000bd, 0x0007002c, 0x0000002c, 0x000000bf, 0x000000af, 0x000000b4, 0x000000b9, + 0x000000be, 0x0004002b, 0x0000001c, 0x000000c0, 0x00002729, 0x0004002b, 0x0000001c, 0x000000c1, + 0x0000b0bb, 0x0004002b, 0x0000001c, 0x000000c2, 0x0000b6b6, 0x0004002b, 0x0000001c, 0x000000c3, + 0x0000a420, 0x0007002c, 0x0000001d, 0x000000c4, 0x000000c0, 0x000000c1, 0x000000c2, 0x000000c3, + 0x0004002b, 0x0000001c, 0x000000c5, 0x00002c7a, 0x0004002b, 0x0000001c, 0x000000c6, 0x00003337, + 0x0004002b, 0x0000001c, 0x000000c7, 0x0000b11c, 0x0004002b, 0x0000001c, 0x000000c8, 0x00003026, + 0x0007002c, 0x0000001d, 0x000000c9, 0x000000c5, 0x000000c6, 0x000000c7, 0x000000c8, 0x0004002b, + 0x0000001c, 0x000000ca, 0x00003008, 0x0004002b, 0x0000001c, 0x000000cb, 0x000031ab, 0x0004002b, + 0x0000001c, 0x000000cc, 0x0000b397, 0x0004002b, 0x0000001c, 0x000000cd, 0x0000ac52, 0x0007002c, + 0x0000001d, 0x000000ce, 0x000000ca, 0x000000cb, 0x000000cc, 0x000000cd, 0x0004002b, 0x0000001c, + 0x000000cf, 0x00002e1a, 0x0004002b, 0x0000001c, 0x000000d0, 0x0000ac66, 0x0004002b, 0x0000001c, + 0x000000d1, 0x0000a5ad, 0x0004002b, 0x0000001c, 0x000000d2, 0x0000ac92, 0x0007002c, 0x0000001d, + 0x000000d3, 0x000000cf, 0x000000d0, 0x000000d1, 0x000000d2, 0x0007002c, 0x0000002c, 0x000000d4, + 0x000000c4, 0x000000c9, 0x000000ce, 0x000000d3, 0x0004002b, 0x0000001c, 0x000000d5, 0x0000340f, + 0x0004002b, 0x0000001c, 0x000000d6, 0x0000b14c, 0x0004002b, 0x0000001c, 0x000000d7, 0x0000b7bb, + 0x0004002b, 0x0000001c, 0x000000d8, 0x0000ad8e, 0x0007002c, 0x0000001d, 0x000000d9, 0x000000d5, + 0x000000d6, 0x000000d7, 0x000000d8, 0x0004002b, 0x0000001c, 0x000000da, 0x000033f0, 0x0004002b, + 0x0000001c, 0x000000db, 0x0000354b, 0x0004002b, 0x0000001c, 0x000000dc, 0x0000b67b, 0x0004002b, + 0x0000001c, 0x000000dd, 0x00002dbe, 0x0007002c, 0x0000001d, 0x000000de, 0x000000da, 0x000000db, + 0x000000dc, 0x000000dd, 0x0004002b, 0x0000001c, 0x000000df, 0x00003808, 0x0004002b, 0x0000001c, + 0x000000e0, 0x00003678, 0x0004002b, 0x0000001c, 0x000000e1, 0x0000b828, 0x0004002b, 0x0000001c, + 0x000000e2, 0x00002942, 0x0007002c, 0x0000001d, 0x000000e3, 0x000000df, 0x000000e0, 0x000000e1, + 0x000000e2, 0x0004002b, 0x0000001c, 0x000000e4, 0x00002d8a, 0x0004002b, 0x0000001c, 0x000000e5, + 0x0000b581, 0x0004002b, 0x0000001c, 0x000000e6, 0x00009e9e, 0x0004002b, 0x0000001c, 0x000000e7, + 0x000029ad, 0x0007002c, 0x0000001d, 0x000000e8, 0x000000e4, 0x000000e5, 0x000000e6, 0x000000e7, + 0x0007002c, 0x0000002c, 0x000000e9, 0x000000d9, 0x000000de, 0x000000e3, 0x000000e8, 0x0004002b, + 0x0000001c, 0x000000ea, 0x000034ec, 0x0004002b, 0x0000001c, 0x000000eb, 0x0000348f, 0x0004002b, + 0x0000001c, 0x000000ec, 0x00002bd6, 0x0004002b, 0x0000001c, 0x000000ed, 0x0000b794, 0x0007002c, + 0x0000001d, 0x000000ee, 0x000000ea, 0x000000eb, 0x000000ec, 0x000000ed, 0x0004002b, 0x0000001c, + 0x000000ef, 0x0000343c, 0x0004002b, 0x0000001c, 0x000000f0, 0x00003228, 0x0004002b, 0x0000001c, + 0x000000f1, 0x0000b16d, 0x0004002b, 0x0000001c, 0x000000f2, 0x0000302c, 0x0007002c, 0x0000001d, + 0x000000f3, 0x000000ef, 0x000000f0, 0x000000f1, 0x000000f2, 0x0004002b, 0x0000001c, 0x000000f4, + 0x000034f8, 0x0004002b, 0x0000001c, 0x000000f5, 0x0000350e, 0x0004002b, 0x0000001c, 0x000000f6, + 0x0000b105, 0x0004002b, 0x0000001c, 0x000000f7, 0x00002abd, 0x0007002c, 0x0000001d, 0x000000f8, + 0x000000f4, 0x000000f5, 0x000000f6, 0x000000f7, 0x0004002b, 0x0000001c, 0x000000f9, 0x0000b20d, + 0x0004002b, 0x0000001c, 0x000000fa, 0x0000b860, 0x0004002b, 0x0000001c, 0x000000fb, 0x00003233, + 0x0004002b, 0x0000001c, 0x000000fc, 0x0000a927, 0x0007002c, 0x0000001d, 0x000000fd, 0x000000f9, + 0x000000fa, 0x000000fb, 0x000000fc, 0x0007002c, 0x0000002c, 0x000000fe, 0x000000ee, 0x000000f3, + 0x000000f8, 0x000000fd, 0x0004002b, 0x0000001c, 0x000000ff, 0x00002e41, 0x0004002b, 0x0000001c, + 0x00000100, 0x000029b1, 0x0004002b, 0x0000001c, 0x00000101, 0x000030c7, 0x0004002b, 0x0000001c, + 0x00000102, 0x0000b5a7, 0x0007002c, 0x0000001d, 0x00000103, 0x000000ff, 0x00000100, 0x00000101, + 0x00000102, 0x0004002b, 0x0000001c, 0x00000104, 0x000031e0, 0x0004002b, 0x0000001c, 0x00000105, + 0x00002f88, 0x0004002b, 0x0000001c, 0x00000106, 0x0000b290, 0x0004002b, 0x0000001c, 0x00000107, + 0x00002e52, 0x0007002c, 0x0000001d, 0x00000108, 0x00000104, 0x00000105, 0x00000106, 0x00000107, + 0x0004002b, 0x0000001c, 0x00000109, 0x000031b3, 0x0004002b, 0x0000001c, 0x0000010a, 0x00003191, + 0x0004002b, 0x0000001c, 0x0000010b, 0x0000aeaa, 0x0004002b, 0x0000001c, 0x0000010c, 0x0000a8b0, + 0x0007002c, 0x0000001d, 0x0000010d, 0x00000109, 0x0000010a, 0x0000010b, 0x0000010c, 0x0004002b, + 0x0000001c, 0x0000010e, 0x0000ab75, 0x0004002b, 0x0000001c, 0x0000010f, 0x0000b659, 0x0004002b, + 0x0000001c, 0x00000110, 0x00002e83, 0x0004002b, 0x0000001c, 0x00000111, 0x0000b02d, 0x0007002c, + 0x0000001d, 0x00000112, 0x0000010e, 0x0000010f, 0x00000110, 0x00000111, 0x0007002c, 0x0000002c, + 0x00000113, 0x00000103, 0x00000108, 0x0000010d, 0x00000112, 0x0004002b, 0x0000001c, 0x00000114, + 0x000020dc, 0x0004002b, 0x0000001c, 0x00000115, 0x00002881, 0x0004002b, 0x0000001c, 0x00000116, + 0x00002cf7, 0x0004002b, 0x0000001c, 0x00000117, 0x0000b842, 0x0007002c, 0x0000001d, 0x00000118, + 0x00000114, 0x00000115, 0x00000116, 0x00000117, 0x0004002b, 0x0000001c, 0x00000119, 0x00003546, + 0x0004002b, 0x0000001c, 0x0000011a, 0x00002ff4, 0x0004002b, 0x0000001c, 0x0000011b, 0x0000a538, + 0x0004002b, 0x0000001c, 0x0000011c, 0x00002bf3, 0x0007002c, 0x0000001d, 0x0000011d, 0x00000119, + 0x0000011a, 0x0000011b, 0x0000011c, 0x0004002b, 0x0000001c, 0x0000011e, 0x0000332f, 0x0004002b, + 0x0000001c, 0x0000011f, 0x00003075, 0x0004002b, 0x0000001c, 0x00000120, 0x0000277e, 0x0004002b, + 0x0000001c, 0x00000121, 0x000032e9, 0x0007002c, 0x0000001d, 0x00000122, 0x0000011e, 0x0000011f, + 0x00000120, 0x00000121, 0x0004002b, 0x0000001c, 0x00000123, 0x00002ef7, 0x0004002b, 0x0000001c, + 0x00000124, 0x0000b201, 0x0004002b, 0x0000001c, 0x00000125, 0x0000953d, 0x0004002b, 0x0000001c, + 0x00000126, 0x000033d2, 0x0007002c, 0x0000001d, 0x00000127, 0x00000123, 0x00000124, 0x00000125, + 0x00000126, 0x0007002c, 0x0000002c, 0x00000128, 0x00000118, 0x0000011d, 0x00000122, 0x00000127, + 0x0004002b, 0x0000001c, 0x00000129, 0x00003373, 0x0004002b, 0x0000001c, 0x0000012a, 0x0000334e, + 0x0004002b, 0x0000001c, 0x0000012b, 0x00002f82, 0x0004002b, 0x0000001c, 0x0000012c, 0x0000b3d9, + 0x0007002c, 0x0000001d, 0x0000012d, 0x00000129, 0x0000012a, 0x0000012b, 0x0000012c, 0x0004002b, + 0x0000001c, 0x0000012e, 0x000030d1, 0x0004002b, 0x0000001c, 0x0000012f, 0x00003407, 0x0004002b, + 0x0000001c, 0x00000130, 0x0000b293, 0x0004002b, 0x0000001c, 0x00000131, 0x0000a486, 0x0007002c, + 0x0000001d, 0x00000132, 0x0000012e, 0x0000012f, 0x00000130, 0x00000131, 0x0004002b, 0x0000001c, + 0x00000133, 0x000031d5, 0x0004002b, 0x0000001c, 0x00000134, 0x00003493, 0x0004002b, 0x0000001c, + 0x00000135, 0x0000b535, 0x0004002b, 0x0000001c, 0x00000136, 0x00003176, 0x0007002c, 0x0000001d, + 0x00000137, 0x00000133, 0x00000134, 0x00000135, 0x00000136, 0x0004002b, 0x0000001c, 0x00000138, + 0x0000a718, 0x0004002b, 0x0000001c, 0x00000139, 0x0000b478, 0x0004002b, 0x0000001c, 0x0000013a, + 0x0000314f, 0x0004002b, 0x0000001c, 0x0000013b, 0x0000b31d, 0x0007002c, 0x0000001d, 0x0000013c, + 0x00000138, 0x00000139, 0x0000013a, 0x0000013b, 0x0007002c, 0x0000002c, 0x0000013d, 0x0000012d, + 0x00000132, 0x00000137, 0x0000013c, 0x0004002b, 0x0000001c, 0x0000013e, 0x0000a4a0, 0x0004002b, + 0x0000001c, 0x0000013f, 0x0000a7dc, 0x0004002b, 0x0000001c, 0x00000140, 0x00003039, 0x0004002b, + 0x0000001c, 0x00000141, 0x0000b0cd, 0x0007002c, 0x0000001d, 0x00000142, 0x0000013e, 0x0000013f, + 0x00000140, 0x00000141, 0x0004002b, 0x0000001c, 0x00000143, 0x00002aa1, 0x0004002b, 0x0000001c, + 0x00000144, 0x000031f8, 0x0004002b, 0x0000001c, 0x00000145, 0x0000b46b, 0x0004002b, 0x0000001c, + 0x00000146, 0x000035e8, 0x0007002c, 0x0000001d, 0x00000147, 0x00000143, 0x00000144, 0x00000145, + 0x00000146, 0x0004002b, 0x0000001c, 0x00000148, 0x000033c1, 0x0004002b, 0x0000001c, 0x00000149, + 0x0000b4e8, 0x0004002b, 0x0000001c, 0x0000014a, 0x00003197, 0x0007002c, 0x0000001d, 0x0000014b, + 0x000000e4, 0x00000148, 0x00000149, 0x0000014a, 0x0004002b, 0x0000001c, 0x0000014c, 0x000021dc, + 0x0004002b, 0x0000001c, 0x0000014d, 0x0000b21f, 0x0004002b, 0x0000001c, 0x0000014e, 0x000031de, + 0x0004002b, 0x0000001c, 0x0000014f, 0x0000b9fa, 0x0007002c, 0x0000001d, 0x00000150, 0x0000014c, + 0x0000014d, 0x0000014e, 0x0000014f, 0x0007002c, 0x0000002c, 0x00000151, 0x00000142, 0x00000147, + 0x0000014b, 0x00000150, 0x0004002b, 0x0000001c, 0x00000152, 0x00002049, 0x0004002b, 0x0000001c, + 0x00000153, 0x0000a92d, 0x0004002b, 0x0000001c, 0x00000154, 0x00002f20, 0x0004002b, 0x0000001c, + 0x00000155, 0x0000b3a3, 0x0007002c, 0x0000001d, 0x00000156, 0x00000152, 0x00000153, 0x00000154, + 0x00000155, 0x0004002b, 0x0000001c, 0x00000157, 0x00003166, 0x0004002b, 0x0000001c, 0x00000158, + 0x00003258, 0x0004002b, 0x0000001c, 0x00000159, 0x0000b02b, 0x0004002b, 0x0000001c, 0x0000015a, + 0x00003045, 0x0007002c, 0x0000001d, 0x0000015b, 0x00000157, 0x00000158, 0x00000159, 0x0000015a, + 0x0004002b, 0x0000001c, 0x0000015c, 0x00003041, 0x0004002b, 0x0000001c, 0x0000015d, 0x00003479, + 0x0004002b, 0x0000001c, 0x0000015e, 0x0000b2a7, 0x0004002b, 0x0000001c, 0x0000015f, 0x00002fdf, + 0x0007002c, 0x0000001d, 0x00000160, 0x0000015c, 0x0000015d, 0x0000015e, 0x0000015f, 0x0004002b, + 0x0000001c, 0x00000161, 0x0000331b, 0x0004002b, 0x0000001c, 0x00000162, 0x00002c57, 0x0004002b, + 0x0000001c, 0x00000163, 0x00002281, 0x0004002b, 0x0000001c, 0x00000164, 0x0000323a, 0x0007002c, + 0x0000001d, 0x00000165, 0x00000161, 0x00000162, 0x00000163, 0x00000164, 0x0007002c, 0x0000002c, + 0x00000166, 0x00000156, 0x0000015b, 0x00000160, 0x00000165, 0x0004002b, 0x0000001c, 0x00000167, + 0x00003668, 0x0004002b, 0x0000001c, 0x00000168, 0x0000340d, 0x0004002b, 0x0000001c, 0x00000169, + 0x00002b71, 0x0004002b, 0x0000001c, 0x0000016a, 0x0000b86b, 0x0007002c, 0x0000001d, 0x0000016b, + 0x00000167, 0x00000168, 0x00000169, 0x0000016a, 0x0004002b, 0x0000001c, 0x0000016c, 0x00003538, + 0x0004002b, 0x0000001c, 0x0000016d, 0x0000346b, 0x0004002b, 0x0000001c, 0x0000016e, 0x0000b292, + 0x0004002b, 0x0000001c, 0x0000016f, 0x00002b7f, 0x0007002c, 0x0000001d, 0x00000170, 0x0000016c, + 0x0000016d, 0x0000016e, 0x0000016f, 0x0004002b, 0x0000001c, 0x00000171, 0x0000347b, 0x0004002b, + 0x0000001c, 0x00000172, 0x00003478, 0x0004002b, 0x0000001c, 0x00000173, 0x0000b5d4, 0x0004002b, + 0x0000001c, 0x00000174, 0x000036bd, 0x0007002c, 0x0000001d, 0x00000175, 0x00000171, 0x00000172, + 0x00000173, 0x00000174, 0x0004002b, 0x0000001c, 0x00000176, 0x00002d7e, 0x0004002b, 0x0000001c, + 0x00000177, 0x0000aece, 0x0004002b, 0x0000001c, 0x00000178, 0x00002d58, 0x0004002b, 0x0000001c, + 0x00000179, 0x0000b604, 0x0007002c, 0x0000001d, 0x0000017a, 0x00000176, 0x00000177, 0x00000178, + 0x00000179, 0x0007002c, 0x0000002c, 0x0000017b, 0x0000016b, 0x00000170, 0x00000175, 0x0000017a, + 0x0004002b, 0x0000001c, 0x0000017c, 0x000030c6, 0x0004002b, 0x0000001c, 0x0000017d, 0x0000a6e5, + 0x0004002b, 0x0000001c, 0x0000017e, 0x00002cfd, 0x0004002b, 0x0000001c, 0x0000017f, 0x0000b544, + 0x0007002c, 0x0000001d, 0x00000180, 0x0000017c, 0x0000017d, 0x0000017e, 0x0000017f, 0x0004002b, + 0x0000001c, 0x00000181, 0x000031b8, 0x0004002b, 0x0000001c, 0x00000182, 0x000031c2, 0x0004002b, + 0x0000001c, 0x00000183, 0x0000b446, 0x0004002b, 0x0000001c, 0x00000184, 0x00002e1e, 0x0007002c, + 0x0000001d, 0x00000185, 0x00000181, 0x00000182, 0x00000183, 0x00000184, 0x0004002b, 0x0000001c, + 0x00000186, 0x00003174, 0x0004002b, 0x0000001c, 0x00000187, 0x00003343, 0x0004002b, 0x0000001c, + 0x00000188, 0x0000b5bc, 0x0004002b, 0x0000001c, 0x00000189, 0x00003152, 0x0007002c, 0x0000001d, + 0x0000018a, 0x00000186, 0x00000187, 0x00000188, 0x00000189, 0x0004002b, 0x0000001c, 0x0000018b, + 0x000030db, 0x0004002b, 0x0000001c, 0x0000018c, 0x00002114, 0x0004002b, 0x0000001c, 0x0000018d, + 0x00009f84, 0x0004002b, 0x0000001c, 0x0000018e, 0x0000a7df, 0x0007002c, 0x0000001d, 0x0000018f, + 0x0000018b, 0x0000018c, 0x0000018d, 0x0000018e, 0x0007002c, 0x0000002c, 0x00000190, 0x00000180, + 0x00000185, 0x0000018a, 0x0000018f, 0x0004002b, 0x0000001c, 0x00000191, 0x00002eb3, 0x0004002b, + 0x0000001c, 0x00000192, 0x00002ba8, 0x0004002b, 0x0000001c, 0x00000193, 0x00002f08, 0x0004002b, + 0x0000001c, 0x00000194, 0x0000b62e, 0x0007002c, 0x0000001d, 0x00000195, 0x00000191, 0x00000192, + 0x00000193, 0x00000194, 0x0004002b, 0x0000001c, 0x00000196, 0x00003460, 0x0004002b, 0x0000001c, + 0x00000197, 0x00003503, 0x0004002b, 0x0000001c, 0x00000198, 0x0000b126, 0x0004002b, 0x0000001c, + 0x00000199, 0x00002c43, 0x0007002c, 0x0000001d, 0x0000019a, 0x00000196, 0x00000197, 0x00000198, + 0x00000199, 0x0004002b, 0x0000001c, 0x0000019b, 0x00003419, 0x0004002b, 0x0000001c, 0x0000019c, + 0x000035fc, 0x0004002b, 0x0000001c, 0x0000019d, 0x0000b546, 0x0004002b, 0x0000001c, 0x0000019e, + 0x00003336, 0x0007002c, 0x0000001d, 0x0000019f, 0x0000019b, 0x0000019c, 0x0000019d, 0x0000019e, + 0x0004002b, 0x0000001c, 0x000001a0, 0x00003624, 0x0004002b, 0x0000001c, 0x000001a1, 0x00003640, + 0x0004002b, 0x0000001c, 0x000001a2, 0x0000b469, 0x0004002b, 0x0000001c, 0x000001a3, 0x00002fe7, + 0x0007002c, 0x0000001d, 0x000001a4, 0x000001a0, 0x000001a1, 0x000001a2, 0x000001a3, 0x0007002c, + 0x0000002c, 0x000001a5, 0x00000195, 0x0000019a, 0x0000019f, 0x000001a4, 0x0004002b, 0x0000001c, + 0x000001a6, 0x00004003, 0x0004002b, 0x0000001c, 0x000001a7, 0x00003acf, 0x0004002b, 0x0000001c, + 0x000001a8, 0x0000ba9a, 0x0004002b, 0x0000001c, 0x000001a9, 0x0000bce8, 0x0007002c, 0x0000001d, + 0x000001aa, 0x000001a6, 0x000001a7, 0x000001a8, 0x000001a9, 0x0004002b, 0x0000001c, 0x000001ab, + 0x000030df, 0x0004002b, 0x0000001c, 0x000001ac, 0x000036a4, 0x0004002b, 0x0000001c, 0x000001ad, + 0x00003870, 0x0007002c, 0x0000001d, 0x000001ae, 0x000000df, 0x000001ab, 0x000001ac, 0x000001ad, + 0x0004002b, 0x0000001c, 0x000001af, 0x0000b997, 0x0004002b, 0x0000001c, 0x000001b0, 0x00003a46, + 0x0004002b, 0x0000001c, 0x000001b1, 0x0000a7cf, 0x0004002b, 0x0000001c, 0x000001b2, 0x0000b824, + 0x0007002c, 0x0000001d, 0x000001b3, 0x000001af, 0x000001b0, 0x000001b1, 0x000001b2, 0x0004003b, + 0x00000013, 0x00000007, 0x00000000, 0x0004002b, 0x0000001c, 0x000001b4, 0x0000ba16, 0x0004002b, + 0x0000001c, 0x000001b5, 0x0000b2a0, 0x0004002b, 0x0000001c, 0x000001b6, 0x0000244f, 0x0004002b, + 0x0000001c, 0x000001b7, 0x00003408, 0x0007002c, 0x0000001d, 0x000001b8, 0x000001b4, 0x000001b5, + 0x000001b6, 0x000001b7, 0x0004002b, 0x0000001c, 0x000001b9, 0x0000b6bc, 0x0004002b, 0x0000001c, + 0x000001ba, 0x0000ae28, 0x0004002b, 0x0000001c, 0x000001bb, 0x00002c5b, 0x0004002b, 0x0000001c, + 0x000001bc, 0x0000b0d9, 0x0007002c, 0x0000001d, 0x000001bd, 0x000001b9, 0x000001ba, 0x000001bb, + 0x000001bc, 0x0004002b, 0x0000001c, 0x000001be, 0x0000b8cb, 0x0004002b, 0x0000001c, 0x000001bf, + 0x0000a8e7, 0x0004002b, 0x0000001c, 0x000001c0, 0x00002ea4, 0x0004002b, 0x0000001c, 0x000001c1, + 0x00002e59, 0x0007002c, 0x0000001d, 0x000001c2, 0x000001be, 0x000001bf, 0x000001c0, 0x000001c1, + 0x0004002b, 0x0000001c, 0x000001c3, 0x0000b489, 0x0004002b, 0x0000001c, 0x000001c4, 0x00002de2, + 0x0004002b, 0x0000001c, 0x000001c5, 0x00002554, 0x0004002b, 0x0000001c, 0x000001c6, 0x00002f5d, + 0x0007002c, 0x0000001d, 0x000001c7, 0x000001c3, 0x000001c4, 0x000001c5, 0x000001c6, 0x0007002c, + 0x0000002c, 0x000001c8, 0x000001b8, 0x000001bd, 0x000001c2, 0x000001c7, 0x0004002b, 0x0000001c, + 0x000001c9, 0x0000b01c, 0x0004002b, 0x0000001c, 0x000001ca, 0x0000b1e7, 0x0004002b, 0x0000001c, + 0x000001cb, 0x00001e45, 0x0004002b, 0x0000001c, 0x000001cc, 0x0000342a, 0x0007002c, 0x0000001d, + 0x000001cd, 0x000001c9, 0x000001ca, 0x000001cb, 0x000001cc, 0x0004002b, 0x0000001c, 0x000001ce, + 0x0000af76, 0x0004002b, 0x0000001c, 0x000001cf, 0x0000b060, 0x0004002b, 0x0000001c, 0x000001d0, + 0x0000a9a7, 0x0004002b, 0x0000001c, 0x000001d1, 0x0000b0b2, 0x0007002c, 0x0000001d, 0x000001d2, + 0x000001ce, 0x000001cf, 0x000001d0, 0x000001d1, 0x0004002b, 0x0000001c, 0x000001d3, 0x0000b3cf, + 0x0004002b, 0x0000001c, 0x000001d4, 0x0000ad6f, 0x0004002b, 0x0000001c, 0x000001d5, 0x0000a7a4, + 0x0004002b, 0x0000001c, 0x000001d6, 0x00002fbc, 0x0007002c, 0x0000001d, 0x000001d7, 0x000001d3, + 0x000001d4, 0x000001d5, 0x000001d6, 0x0004002b, 0x0000001c, 0x000001d8, 0x0000a429, 0x0004002b, + 0x0000001c, 0x000001d9, 0x0000ac3a, 0x0004002b, 0x0000001c, 0x000001da, 0x00003280, 0x0004002b, + 0x0000001c, 0x000001db, 0x00002d8b, 0x0007002c, 0x0000001d, 0x000001dc, 0x000001d8, 0x000001d9, + 0x000001da, 0x000001db, 0x0007002c, 0x0000002c, 0x000001dd, 0x000001cd, 0x000001d2, 0x000001d7, + 0x000001dc, 0x0004002b, 0x0000001c, 0x000001de, 0x0000aec6, 0x0004002b, 0x0000001c, 0x000001df, + 0x0000b8a2, 0x0004002b, 0x0000001c, 0x000001e0, 0x00002d09, 0x0004002b, 0x0000001c, 0x000001e1, + 0x00002c41, 0x0007002c, 0x0000001d, 0x000001e2, 0x000001de, 0x000001df, 0x000001e0, 0x000001e1, + 0x0004002b, 0x0000001c, 0x000001e3, 0x0000ad72, 0x0004002b, 0x0000001c, 0x000001e4, 0x0000b4c2, + 0x0004002b, 0x0000001c, 0x000001e5, 0x000033a2, 0x0004002b, 0x0000001c, 0x000001e6, 0x0000b4ef, + 0x0007002c, 0x0000001d, 0x000001e7, 0x000001e3, 0x000001e4, 0x000001e5, 0x000001e6, 0x0004002b, + 0x0000001c, 0x000001e8, 0x0000b30f, 0x0004002b, 0x0000001c, 0x000001e9, 0x0000b80e, 0x0004002b, + 0x0000001c, 0x000001ea, 0x000032d7, 0x0004002b, 0x0000001c, 0x000001eb, 0x0000adff, 0x0007002c, + 0x0000001d, 0x000001ec, 0x000001e8, 0x000001e9, 0x000001ea, 0x000001eb, 0x0004002b, 0x0000001c, + 0x000001ed, 0x0000ac93, 0x0004002b, 0x0000001c, 0x000001ee, 0x0000b141, 0x0004002b, 0x0000001c, + 0x000001ef, 0x00002a1b, 0x0004002b, 0x0000001c, 0x000001f0, 0x00003301, 0x0007002c, 0x0000001d, + 0x000001f1, 0x000001ed, 0x000001ee, 0x000001ef, 0x000001f0, 0x0007002c, 0x0000002c, 0x000001f2, + 0x000001e2, 0x000001e7, 0x000001ec, 0x000001f1, 0x0004002b, 0x0000001c, 0x000001f3, 0x0000b667, + 0x0004002b, 0x0000001c, 0x000001f4, 0x000030af, 0x0004002b, 0x0000001c, 0x000001f5, 0x0000a5d4, + 0x0004002b, 0x0000001c, 0x000001f6, 0x00003488, 0x0007002c, 0x0000001d, 0x000001f7, 0x000001f3, + 0x000001f4, 0x000001f5, 0x000001f6, 0x0004002b, 0x0000001c, 0x000001f8, 0x0000af1a, 0x0004002b, + 0x0000001c, 0x000001f9, 0x00002a68, 0x0004002b, 0x0000001c, 0x000001fa, 0x0000acc1, 0x0004002b, + 0x0000001c, 0x000001fb, 0x00002455, 0x0007002c, 0x0000001d, 0x000001fc, 0x000001f8, 0x000001f9, + 0x000001fa, 0x000001fb, 0x0004002b, 0x0000001c, 0x000001fd, 0x0000b2f1, 0x0004002b, 0x0000001c, + 0x000001fe, 0x0000a941, 0x0004002b, 0x0000001c, 0x000001ff, 0x000030fe, 0x0007002c, 0x0000001d, + 0x00000200, 0x000001fd, 0x0000011f, 0x000001fe, 0x000001ff, 0x0004002b, 0x0000001c, 0x00000201, + 0x0000ac95, 0x0004002b, 0x0000001c, 0x00000202, 0x000033ab, 0x0004002b, 0x0000001c, 0x00000203, + 0x0000ac74, 0x0004002b, 0x0000001c, 0x00000204, 0x00002d82, 0x0007002c, 0x0000001d, 0x00000205, + 0x00000201, 0x00000202, 0x00000203, 0x00000204, 0x0007002c, 0x0000002c, 0x00000206, 0x000001f7, + 0x000001fc, 0x00000200, 0x00000205, 0x0004002b, 0x0000001c, 0x00000207, 0x00002c7b, 0x0004002b, + 0x0000001c, 0x00000208, 0x00002f43, 0x0004002b, 0x0000001c, 0x00000209, 0x0000ad1e, 0x0004002b, + 0x0000001c, 0x0000020a, 0x000034c4, 0x0007002c, 0x0000001d, 0x0000020b, 0x00000207, 0x00000208, + 0x00000209, 0x0000020a, 0x0004002b, 0x0000001c, 0x0000020c, 0x0000274a, 0x0004002b, 0x0000001c, + 0x0000020d, 0x0000227b, 0x0004002b, 0x0000001c, 0x0000020e, 0x0000adfe, 0x0004002b, 0x0000001c, + 0x0000020f, 0x00002d16, 0x0007002c, 0x0000001d, 0x00000210, 0x0000020c, 0x0000020d, 0x0000020e, + 0x0000020f, 0x0004002b, 0x0000001c, 0x00000211, 0x00002ad3, 0x0004002b, 0x0000001c, 0x00000212, + 0x00002d38, 0x0004002b, 0x0000001c, 0x00000213, 0x0000b0fe, 0x0004002b, 0x0000001c, 0x00000214, + 0x000030d9, 0x0007002c, 0x0000001d, 0x00000215, 0x00000211, 0x00000212, 0x00000213, 0x00000214, + 0x0004002b, 0x0000001c, 0x00000216, 0x00002edf, 0x0004002b, 0x0000001c, 0x00000217, 0x00002b09, + 0x0004002b, 0x0000001c, 0x00000218, 0x00002c83, 0x0004002b, 0x0000001c, 0x00000219, 0x0000af4b, + 0x0007002c, 0x0000001d, 0x0000021a, 0x00000216, 0x00000217, 0x00000218, 0x00000219, 0x0007002c, + 0x0000002c, 0x0000021b, 0x0000020b, 0x00000210, 0x00000215, 0x0000021a, 0x0004002b, 0x0000001c, + 0x0000021c, 0x00009bc1, 0x0004002b, 0x0000001c, 0x0000021d, 0x0000b0d1, 0x0004002b, 0x0000001c, + 0x0000021e, 0x0000aaa0, 0x0004002b, 0x0000001c, 0x0000021f, 0x00002f1c, 0x0007002c, 0x0000001d, + 0x00000220, 0x0000021c, 0x0000021d, 0x0000021e, 0x0000021f, 0x0004002b, 0x0000001c, 0x00000221, + 0x0000a4e9, 0x0004002b, 0x0000001c, 0x00000222, 0x0000b033, 0x0004002b, 0x0000001c, 0x00000223, + 0x0000a836, 0x0004002b, 0x0000001c, 0x00000224, 0x0000a80a, 0x0007002c, 0x0000001d, 0x00000225, + 0x00000221, 0x00000222, 0x00000223, 0x00000224, 0x0004002b, 0x0000001c, 0x00000226, 0x00009fd0, + 0x0004002b, 0x0000001c, 0x00000227, 0x0000b0fa, 0x0004002b, 0x0000001c, 0x00000228, 0x0000a7d3, + 0x0007002c, 0x0000001d, 0x00000229, 0x00000226, 0x00000227, 0x00000090, 0x00000228, 0x0004002b, + 0x0000001c, 0x0000022a, 0x00009d28, 0x0004002b, 0x0000001c, 0x0000022b, 0x00002d7c, 0x0004002b, + 0x0000001c, 0x0000022c, 0x0000ad88, 0x0004002b, 0x0000001c, 0x0000022d, 0x00001d75, 0x0007002c, + 0x0000001d, 0x0000022e, 0x0000022a, 0x0000022b, 0x0000022c, 0x0000022d, 0x0007002c, 0x0000002c, + 0x0000022f, 0x00000220, 0x00000225, 0x00000229, 0x0000022e, 0x0004002b, 0x0000001c, 0x00000230, + 0x0000b7b1, 0x0004002b, 0x0000001c, 0x00000231, 0x00003476, 0x0004002b, 0x0000001c, 0x00000232, + 0x0000285b, 0x0004002b, 0x0000001c, 0x00000233, 0x00003013, 0x0007002c, 0x0000001d, 0x00000234, + 0x00000230, 0x00000231, 0x00000232, 0x00000233, 0x0004002b, 0x0000001c, 0x00000235, 0x0000b2cd, + 0x0004002b, 0x0000001c, 0x00000236, 0x00003252, 0x0004002b, 0x0000001c, 0x00000237, 0x00003235, + 0x0004002b, 0x0000001c, 0x00000238, 0x0000b28c, 0x0007002c, 0x0000001d, 0x00000239, 0x00000235, + 0x00000236, 0x00000237, 0x00000238, 0x0004002b, 0x0000001c, 0x0000023a, 0x0000b662, 0x0004002b, + 0x0000001c, 0x0000023b, 0x00003378, 0x0004002b, 0x0000001c, 0x0000023c, 0x000030a3, 0x0004002b, + 0x0000001c, 0x0000023d, 0x000020a5, 0x0007002c, 0x0000001d, 0x0000023e, 0x0000023a, 0x0000023b, + 0x0000023c, 0x0000023d, 0x0004002b, 0x0000001c, 0x0000023f, 0x0000ae94, 0x0004002b, 0x0000001c, + 0x00000240, 0x00002cde, 0x0004002b, 0x0000001c, 0x00000241, 0x0000b25d, 0x0004002b, 0x0000001c, + 0x00000242, 0x000034cf, 0x0007002c, 0x0000001d, 0x00000243, 0x0000023f, 0x00000240, 0x00000241, + 0x00000242, 0x0007002c, 0x0000002c, 0x00000244, 0x00000234, 0x00000239, 0x0000023e, 0x00000243, + 0x0004002b, 0x0000001c, 0x00000245, 0x00002a11, 0x0004002b, 0x0000001c, 0x00000246, 0x00003093, + 0x0004002b, 0x0000001c, 0x00000247, 0x0000aa58, 0x0004002b, 0x0000001c, 0x00000248, 0x00003199, + 0x0007002c, 0x0000001d, 0x00000249, 0x00000245, 0x00000246, 0x00000247, 0x00000248, 0x0004002b, + 0x0000001c, 0x0000024a, 0x0000aec8, 0x0004002b, 0x0000001c, 0x0000024b, 0x00002a9a, 0x0004002b, + 0x0000001c, 0x0000024c, 0x000021d2, 0x0004002b, 0x0000001c, 0x0000024d, 0x0000af2a, 0x0007002c, + 0x0000001d, 0x0000024e, 0x0000024a, 0x0000024b, 0x0000024c, 0x0000024d, 0x0004002b, 0x0000001c, + 0x0000024f, 0x0000a67e, 0x0004002b, 0x0000001c, 0x00000250, 0x00002aec, 0x0004002b, 0x0000001c, + 0x00000251, 0x00002123, 0x0004002b, 0x0000001c, 0x00000252, 0x0000290f, 0x0007002c, 0x0000001d, + 0x00000253, 0x0000024f, 0x00000250, 0x00000251, 0x00000252, 0x0004002b, 0x0000001c, 0x00000254, + 0x00002c13, 0x0004002b, 0x0000001c, 0x00000255, 0x0000b024, 0x0004002b, 0x0000001c, 0x00000256, + 0x0000ae04, 0x0004002b, 0x0000001c, 0x00000257, 0x00003102, 0x0007002c, 0x0000001d, 0x00000258, + 0x00000254, 0x00000255, 0x00000256, 0x00000257, 0x0007002c, 0x0000002c, 0x00000259, 0x00000249, + 0x0000024e, 0x00000253, 0x00000258, 0x0004002b, 0x0000001c, 0x0000025a, 0x00002eb7, 0x0004002b, + 0x0000001c, 0x0000025b, 0x0000af91, 0x0004002b, 0x0000001c, 0x0000025c, 0x0000aa75, 0x0004002b, + 0x0000001c, 0x0000025d, 0x0000a469, 0x0007002c, 0x0000001d, 0x0000025e, 0x0000025a, 0x0000025b, + 0x0000025c, 0x0000025d, 0x0004002b, 0x0000001c, 0x0000025f, 0x0000a535, 0x0004002b, 0x0000001c, + 0x00000260, 0x0000ac4a, 0x0004002b, 0x0000001c, 0x00000261, 0x0000337f, 0x0004002b, 0x0000001c, + 0x00000262, 0x0000b104, 0x0007002c, 0x0000001d, 0x00000263, 0x0000025f, 0x00000260, 0x00000261, + 0x00000262, 0x0004002b, 0x0000001c, 0x00000264, 0x00001fce, 0x0004002b, 0x0000001c, 0x00000265, + 0x0000b2f4, 0x0004002b, 0x0000001c, 0x00000266, 0x00003383, 0x0004002b, 0x0000001c, 0x00000267, + 0x0000b1e4, 0x0007002c, 0x0000001d, 0x00000268, 0x00000264, 0x00000265, 0x00000266, 0x00000267, + 0x0004002b, 0x0000001c, 0x00000269, 0x0000a907, 0x0004002b, 0x0000001c, 0x0000026a, 0x000023c4, + 0x0004002b, 0x0000001c, 0x0000026b, 0x0000b302, 0x0004002b, 0x0000001c, 0x0000026c, 0x000033cf, + 0x0007002c, 0x0000001d, 0x0000026d, 0x00000269, 0x0000026a, 0x0000026b, 0x0000026c, 0x0007002c, + 0x0000002c, 0x0000026e, 0x0000025e, 0x00000263, 0x00000268, 0x0000026d, 0x0004002b, 0x0000001c, + 0x0000026f, 0x0000ac98, 0x0004002b, 0x0000001c, 0x00000270, 0x000028c9, 0x0004002b, 0x0000001c, + 0x00000271, 0x0000b578, 0x0004002b, 0x0000001c, 0x00000272, 0x0000a8bc, 0x0007002c, 0x0000001d, + 0x00000273, 0x0000026f, 0x00000270, 0x00000271, 0x00000272, 0x0004002b, 0x0000001c, 0x00000274, + 0x0000b5d8, 0x0004002b, 0x0000001c, 0x00000275, 0x0000aaf8, 0x0004002b, 0x0000001c, 0x00000276, + 0x00002f9f, 0x0004002b, 0x0000001c, 0x00000277, 0x0000b43f, 0x0007002c, 0x0000001d, 0x00000278, + 0x00000274, 0x00000275, 0x00000276, 0x00000277, 0x0004002b, 0x0000001c, 0x00000279, 0x0000b698, + 0x0004002b, 0x0000001c, 0x0000027a, 0x00003113, 0x0004002b, 0x0000001c, 0x0000027b, 0x0000b4b0, + 0x0007002c, 0x0000001d, 0x0000027c, 0x00000279, 0x0000024d, 0x0000027a, 0x0000027b, 0x0004002b, + 0x0000001c, 0x0000027d, 0x0000221f, 0x0004002b, 0x0000001c, 0x0000027e, 0x00002e3f, 0x0004002b, + 0x0000001c, 0x0000027f, 0x0000b19e, 0x0004002b, 0x0000001c, 0x00000280, 0x0000344e, 0x0007002c, + 0x0000001d, 0x00000281, 0x0000027d, 0x0000027e, 0x0000027f, 0x00000280, 0x0007002c, 0x0000002c, + 0x00000282, 0x00000273, 0x00000278, 0x0000027c, 0x00000281, 0x0004002b, 0x0000001c, 0x00000283, + 0x00002e33, 0x0004002b, 0x0000001c, 0x00000284, 0x0000aa02, 0x0004002b, 0x0000001c, 0x00000285, + 0x0000b037, 0x0004002b, 0x0000001c, 0x00000286, 0x00002e10, 0x0007002c, 0x0000001d, 0x00000287, + 0x00000283, 0x00000284, 0x00000285, 0x00000286, 0x0004002b, 0x0000001c, 0x00000288, 0x0000ad9a, + 0x0004002b, 0x0000001c, 0x00000289, 0x0000b1d2, 0x0004002b, 0x0000001c, 0x0000028a, 0x00002f3e, + 0x0004002b, 0x0000001c, 0x0000028b, 0x00001611, 0x0007002c, 0x0000001d, 0x0000028c, 0x00000288, + 0x00000289, 0x0000028a, 0x0000028b, 0x0004002b, 0x0000001c, 0x0000028d, 0x0000afef, 0x0004002b, + 0x0000001c, 0x0000028e, 0x0000b08b, 0x0004002b, 0x0000001c, 0x0000028f, 0x0000313e, 0x0004002b, + 0x0000001c, 0x00000290, 0x0000a23f, 0x0007002c, 0x0000001d, 0x00000291, 0x0000028d, 0x0000028e, + 0x0000028f, 0x00000290, 0x0004002b, 0x0000001c, 0x00000292, 0x00009de3, 0x0004002b, 0x0000001c, + 0x00000293, 0x00002fe0, 0x0004002b, 0x0000001c, 0x00000294, 0x0000b0b9, 0x0004002b, 0x0000001c, + 0x00000295, 0x00003563, 0x0007002c, 0x0000001d, 0x00000296, 0x00000292, 0x00000293, 0x00000294, + 0x00000295, 0x0007002c, 0x0000002c, 0x00000297, 0x00000287, 0x0000028c, 0x00000291, 0x00000296, + 0x0004002b, 0x0000001c, 0x00000298, 0x0000ac65, 0x0004002b, 0x0000001c, 0x00000299, 0x00003028, + 0x0004002b, 0x0000001c, 0x0000029a, 0x0000b6f7, 0x0004002b, 0x0000001c, 0x0000029b, 0x000030e0, + 0x0007002c, 0x0000001d, 0x0000029c, 0x00000298, 0x00000299, 0x0000029a, 0x0000029b, 0x0004002b, + 0x0000001c, 0x0000029d, 0x0000ad6c, 0x0004002b, 0x0000001c, 0x0000029e, 0x0000b5ff, 0x0004002b, + 0x0000001c, 0x0000029f, 0x0000b483, 0x0007002c, 0x0000001d, 0x000002a0, 0x0000029d, 0x0000029e, + 0x00000136, 0x0000029f, 0x0004002b, 0x0000001c, 0x000002a1, 0x0000ac1f, 0x0004002b, 0x0000001c, + 0x000002a2, 0x0000b801, 0x0004002b, 0x0000001c, 0x000002a3, 0x00003465, 0x0004002b, 0x0000001c, + 0x000002a4, 0x0000b521, 0x0007002c, 0x0000001d, 0x000002a5, 0x000002a1, 0x000002a2, 0x000002a3, + 0x000002a4, 0x0004002b, 0x0000001c, 0x000002a6, 0x0000b41d, 0x0004002b, 0x0000001c, 0x000002a7, + 0x0000b32a, 0x0004002b, 0x0000001c, 0x000002a8, 0x00002c1a, 0x0007002c, 0x0000001d, 0x000002a9, + 0x000002a6, 0x00000105, 0x000002a7, 0x000002a8, 0x0007002c, 0x0000002c, 0x000002aa, 0x0000029c, + 0x000002a0, 0x000002a5, 0x000002a9, 0x0004002b, 0x0000001c, 0x000002ab, 0x0000aab0, 0x0004002b, + 0x0000001c, 0x000002ac, 0x00002b18, 0x0004002b, 0x0000001c, 0x000002ad, 0x0000b3e6, 0x0004002b, + 0x0000001c, 0x000002ae, 0x0000124b, 0x0007002c, 0x0000001d, 0x000002af, 0x000002ab, 0x000002ac, + 0x000002ad, 0x000002ae, 0x0004002b, 0x0000001c, 0x000002b0, 0x0000b3c2, 0x0004002b, 0x0000001c, + 0x000002b1, 0x00002b08, 0x0004002b, 0x0000001c, 0x000002b2, 0x00002f8d, 0x0004002b, 0x0000001c, + 0x000002b3, 0x0000afbf, 0x0007002c, 0x0000001d, 0x000002b4, 0x000002b0, 0x000002b1, 0x000002b2, + 0x000002b3, 0x0004002b, 0x0000001c, 0x000002b5, 0x0000b442, 0x0004002b, 0x0000001c, 0x000002b6, + 0x000020e2, 0x0004002b, 0x0000001c, 0x000002b7, 0x000030bf, 0x0004002b, 0x0000001c, 0x000002b8, + 0x0000ab09, 0x0007002c, 0x0000001d, 0x000002b9, 0x000002b5, 0x000002b6, 0x000002b7, 0x000002b8, + 0x0004002b, 0x0000001c, 0x000002ba, 0x0000af21, 0x0004002b, 0x0000001c, 0x000002bb, 0x0000a5c7, + 0x0004002b, 0x0000001c, 0x000002bc, 0x000031b4, 0x0007002c, 0x0000001d, 0x000002bd, 0x000002ba, + 0x000002bb, 0x000001d1, 0x000002bc, 0x0007002c, 0x0000002c, 0x000002be, 0x000002af, 0x000002b4, + 0x000002b9, 0x000002bd, 0x0004002b, 0x0000001c, 0x000002bf, 0x00002ce3, 0x0004002b, 0x0000001c, + 0x000002c0, 0x0000af78, 0x0004002b, 0x0000001c, 0x000002c1, 0x0000abb2, 0x0004002b, 0x0000001c, + 0x000002c2, 0x00002e05, 0x0007002c, 0x0000001d, 0x000002c3, 0x000002bf, 0x000002c0, 0x000002c1, + 0x000002c2, 0x0004002b, 0x0000001c, 0x000002c4, 0x0000a9f2, 0x0004002b, 0x0000001c, 0x000002c5, + 0x0000ac07, 0x0004002b, 0x0000001c, 0x000002c6, 0x00003173, 0x0007002c, 0x0000001d, 0x000002c7, + 0x000002c4, 0x000002c5, 0x00000144, 0x000002c6, 0x0004002b, 0x0000001c, 0x000002c8, 0x0000accd, + 0x0004002b, 0x0000001c, 0x000002c9, 0x00002347, 0x0004002b, 0x0000001c, 0x000002ca, 0x0000343b, + 0x0007002c, 0x0000001d, 0x000002cb, 0x000002c8, 0x000002c9, 0x000001ff, 0x000002ca, 0x0004002b, + 0x0000001c, 0x000002cc, 0x0000a954, 0x0004002b, 0x0000001c, 0x000002cd, 0x0000adaa, 0x0004002b, + 0x0000001c, 0x000002ce, 0x0000ae4a, 0x0007002c, 0x0000001d, 0x000002cf, 0x000002cc, 0x000002cd, + 0x000002ce, 0x0000026c, 0x0007002c, 0x0000002c, 0x000002d0, 0x000002c3, 0x000002c7, 0x000002cb, + 0x000002cf, 0x0004002b, 0x0000001c, 0x000002d1, 0x00002495, 0x0004002b, 0x0000001c, 0x000002d2, + 0x00001a44, 0x0004002b, 0x0000001c, 0x000002d3, 0x0000b4a2, 0x0004002b, 0x0000001c, 0x000002d4, + 0x00002c9d, 0x0007002c, 0x0000001d, 0x000002d5, 0x000002d1, 0x000002d2, 0x000002d3, 0x000002d4, + 0x0004002b, 0x0000001c, 0x000002d6, 0x000094d5, 0x0004002b, 0x0000001c, 0x000002d7, 0x0000b21c, + 0x0004002b, 0x0000001c, 0x000002d8, 0x00003080, 0x0004002b, 0x0000001c, 0x000002d9, 0x0000b0aa, + 0x0007002c, 0x0000001d, 0x000002da, 0x000002d6, 0x000002d7, 0x000002d8, 0x000002d9, 0x0004002b, + 0x0000001c, 0x000002db, 0x00002205, 0x0004002b, 0x0000001c, 0x000002dc, 0x0000b391, 0x0004002b, + 0x0000001c, 0x000002dd, 0x00003171, 0x0004002b, 0x0000001c, 0x000002de, 0x0000af7e, 0x0007002c, + 0x0000001d, 0x000002df, 0x000002db, 0x000002dc, 0x000002dd, 0x000002de, 0x0004002b, 0x0000001c, + 0x000002e0, 0x0000b0bc, 0x0004002b, 0x0000001c, 0x000002e1, 0x0000a9ff, 0x0004002b, 0x0000001c, + 0x000002e2, 0x0000af3d, 0x0007002c, 0x0000001d, 0x000002e3, 0x000002e0, 0x000002e1, 0x000002e2, + 0x000001d6, 0x0007002c, 0x0000002c, 0x000002e4, 0x000002d5, 0x000002da, 0x000002df, 0x000002e3, + 0x0004002b, 0x0000001c, 0x000002e5, 0x0000ae85, 0x0004002b, 0x0000001c, 0x000002e6, 0x000033aa, + 0x0004002b, 0x0000001c, 0x000002e7, 0x0000b5b4, 0x0004002b, 0x0000001c, 0x000002e8, 0x0000ac29, + 0x0007002c, 0x0000001d, 0x000002e9, 0x000002e5, 0x000002e6, 0x000002e7, 0x000002e8, 0x0004002b, + 0x0000001c, 0x000002ea, 0x0000b4b3, 0x0004002b, 0x0000001c, 0x000002eb, 0x000032fc, 0x0004002b, + 0x0000001c, 0x000002ec, 0x0000322a, 0x0004002b, 0x0000001c, 0x000002ed, 0x0000b5a1, 0x0007002c, + 0x0000001d, 0x000002ee, 0x000002ea, 0x000002eb, 0x000002ec, 0x000002ed, 0x0004002b, 0x0000001c, + 0x000002ef, 0x000031d0, 0x0004002b, 0x0000001c, 0x000002f0, 0x00003449, 0x0004002b, 0x0000001c, + 0x000002f1, 0x0000b55a, 0x0007002c, 0x0000001d, 0x000002f2, 0x0000029e, 0x000002ef, 0x000002f0, + 0x000002f1, 0x0004002b, 0x0000001c, 0x000002f3, 0x0000b3b6, 0x0004002b, 0x0000001c, 0x000002f4, + 0x00002b77, 0x0004002b, 0x0000001c, 0x000002f5, 0x0000b1d5, 0x0004002b, 0x0000001c, 0x000002f6, + 0x00002c6e, 0x0007002c, 0x0000001d, 0x000002f7, 0x000002f3, 0x000002f4, 0x000002f5, 0x000002f6, + 0x0007002c, 0x0000002c, 0x000002f8, 0x000002e9, 0x000002ee, 0x000002f2, 0x000002f7, 0x0004002b, + 0x0000001c, 0x000002f9, 0x0000ad19, 0x0004002b, 0x0000001c, 0x000002fa, 0x0000ae17, 0x0004002b, + 0x0000001c, 0x000002fb, 0x000028fd, 0x0007002c, 0x0000001d, 0x000002fc, 0x000000ca, 0x000002f9, + 0x000002fa, 0x000002fb, 0x0004002b, 0x0000001c, 0x000002fd, 0x0000af9a, 0x0004002b, 0x0000001c, + 0x000002fe, 0x00001961, 0x0004002b, 0x0000001c, 0x000002ff, 0x0000af6d, 0x0007002c, 0x0000001d, + 0x00000300, 0x000002fd, 0x000002fe, 0x0000011f, 0x000002ff, 0x0004002b, 0x0000001c, 0x00000301, + 0x0000b048, 0x0004002b, 0x0000001c, 0x00000302, 0x00002b4a, 0x0004002b, 0x0000001c, 0x00000303, + 0x000032bd, 0x0004002b, 0x0000001c, 0x00000304, 0x0000af02, 0x0007002c, 0x0000001d, 0x00000305, + 0x00000301, 0x00000302, 0x00000303, 0x00000304, 0x0004002b, 0x0000001c, 0x00000306, 0x0000a9d8, + 0x0004002b, 0x0000001c, 0x00000307, 0x0000ad34, 0x0004002b, 0x0000001c, 0x00000308, 0x00002f84, + 0x0004002b, 0x0000001c, 0x00000309, 0x000030e2, 0x0007002c, 0x0000001d, 0x0000030a, 0x00000306, + 0x00000307, 0x00000308, 0x00000309, 0x0007002c, 0x0000002c, 0x0000030b, 0x000002fc, 0x00000300, + 0x00000305, 0x0000030a, 0x0004002b, 0x0000001c, 0x0000030c, 0x0000a4ec, 0x0004002b, 0x0000001c, + 0x0000030d, 0x0000285d, 0x0004002b, 0x0000001c, 0x0000030e, 0x0000b563, 0x0004002b, 0x0000001c, + 0x0000030f, 0x0000a153, 0x0007002c, 0x0000001d, 0x00000310, 0x0000030c, 0x0000030d, 0x0000030e, + 0x0000030f, 0x0004002b, 0x0000001c, 0x00000311, 0x0000aedf, 0x0004002b, 0x0000001c, 0x00000312, + 0x0000aead, 0x0004002b, 0x0000001c, 0x00000313, 0x0000320e, 0x0004002b, 0x0000001c, 0x00000314, + 0x0000b6e5, 0x0007002c, 0x0000001d, 0x00000315, 0x00000311, 0x00000312, 0x00000313, 0x00000314, + 0x0004002b, 0x0000001c, 0x00000316, 0x0000ab9e, 0x0004002b, 0x0000001c, 0x00000317, 0x0000b2c0, + 0x0004002b, 0x0000001c, 0x00000318, 0x00003395, 0x0004002b, 0x0000001c, 0x00000319, 0x0000b765, + 0x0007002c, 0x0000001d, 0x0000031a, 0x00000316, 0x00000317, 0x00000318, 0x00000319, 0x0004002b, + 0x0000001c, 0x0000031b, 0x0000b014, 0x0004002b, 0x0000001c, 0x0000031c, 0x0000afb1, 0x0004002b, + 0x0000001c, 0x0000031d, 0x0000a8fe, 0x0004002b, 0x0000001c, 0x0000031e, 0x0000aebc, 0x0007002c, + 0x0000001d, 0x0000031f, 0x0000031b, 0x0000031c, 0x0000031d, 0x0000031e, 0x0007002c, 0x0000002c, + 0x00000320, 0x00000310, 0x00000315, 0x0000031a, 0x0000031f, 0x0004002b, 0x0000001c, 0x00000321, + 0x0000bdee, 0x0004002b, 0x0000001c, 0x00000322, 0x0000b44a, 0x0004002b, 0x0000001c, 0x00000323, + 0x0000b9d0, 0x0004002b, 0x0000001c, 0x00000324, 0x00003896, 0x0007002c, 0x0000001d, 0x00000325, + 0x00000321, 0x00000322, 0x00000323, 0x00000324, 0x0004002b, 0x0000001c, 0x00000326, 0x000033a8, + 0x0004002b, 0x0000001c, 0x00000327, 0x00003893, 0x0004002b, 0x0000001c, 0x00000328, 0x00003813, + 0x0004002b, 0x0000001c, 0x00000329, 0x00003921, 0x0007002c, 0x0000001d, 0x0000032a, 0x00000326, + 0x00000327, 0x00000328, 0x00000329, 0x0004002b, 0x0000001c, 0x0000032b, 0x00003a99, 0x0004002b, + 0x0000001c, 0x0000032c, 0x00002f2f, 0x0004002b, 0x0000001c, 0x0000032d, 0x0000b952, 0x0004002b, + 0x0000001c, 0x0000032e, 0x0000316a, 0x0007002c, 0x0000001d, 0x0000032f, 0x0000032b, 0x0000032c, + 0x0000032d, 0x0000032e, 0x0004002b, 0x0000000d, 0x00000330, 0x00000010, 0x0006002c, 0x0000000e, + 0x00000008, 0x00000330, 0x00000330, 0x0000001b, 0x00050036, 0x00000009, 0x00000002, 0x00000000, + 0x0000000a, 0x000200f8, 0x00000331, 0x000300f7, 0x00000332, 0x00000000, 0x000300fb, 0x0000001a, + 0x00000333, 0x000200f8, 0x00000333, 0x0004003d, 0x0000000e, 0x00000334, 0x00000003, 0x0007004f, + 0x00000010, 0x00000335, 0x00000334, 0x00000334, 0x00000000, 0x00000001, 0x0004007c, 0x0000000c, + 0x00000336, 0x00000335, 0x0004003d, 0x00000012, 0x00000337, 0x00000004, 0x00040068, 0x0000000c, + 0x00000338, 0x00000337, 0x000500af, 0x00000015, 0x00000339, 0x00000336, 0x00000338, 0x0004009a, + 0x00000014, 0x0000033a, 0x00000339, 0x000300f7, 0x0000033b, 0x00000000, 0x000400fa, 0x0000033a, + 0x0000033c, 0x0000033b, 0x000200f8, 0x0000033c, 0x000200f9, 0x00000332, 0x000200f8, 0x0000033b, + 0x00050080, 0x0000000c, 0x0000033d, 0x00000336, 0x0000001f, 0x000500af, 0x00000015, 0x0000033e, + 0x0000033d, 0x00000020, 0x0004009b, 0x00000014, 0x0000033f, 0x0000033e, 0x000300f7, 0x00000340, + 0x00000000, 0x000400fa, 0x0000033f, 0x00000341, 0x00000340, 0x000200f8, 0x00000341, 0x0004003d, + 0x00000017, 0x00000342, 0x00000005, 0x00040064, 0x00000016, 0x00000343, 0x00000342, 0x00050067, + 0x0000000c, 0x00000344, 0x00000343, 0x00000019, 0x000500b1, 0x00000015, 0x00000345, 0x0000033d, + 0x00000344, 0x0004009b, 0x00000014, 0x00000346, 0x00000345, 0x000200f9, 0x00000340, 0x000200f8, + 0x00000340, 0x000700f5, 0x00000014, 0x00000347, 0x0000033f, 0x0000033b, 0x00000346, 0x00000341, + 0x000300f7, 0x00000348, 0x00000000, 0x000400fa, 0x00000347, 0x00000349, 0x0000034a, 0x000200f8, + 0x00000349, 0x0004003d, 0x00000017, 0x0000034b, 0x00000005, 0x00040064, 0x00000016, 0x0000034c, + 0x0000034b, 0x0007005f, 0x00000021, 0x0000034d, 0x0000034c, 0x0000033d, 0x00000002, 0x00000019, + 0x000200f9, 0x00000348, 0x000200f8, 0x0000034a, 0x000200f9, 0x00000348, 0x000200f8, 0x00000348, + 0x000700f5, 0x00000021, 0x0000034e, 0x0000034d, 0x00000349, 0x00000023, 0x0000034a, 0x00040073, + 0x0000001d, 0x0000034f, 0x0000034e, 0x00050080, 0x0000000c, 0x00000350, 0x00000336, 0x00000024, + 0x000500af, 0x00000015, 0x00000351, 0x00000350, 0x00000020, 0x0004009b, 0x00000014, 0x00000352, + 0x00000351, 0x000300f7, 0x00000353, 0x00000000, 0x000400fa, 0x00000352, 0x00000354, 0x00000353, + 0x000200f8, 0x00000354, 0x0004003d, 0x00000017, 0x00000355, 0x00000005, 0x00040064, 0x00000016, + 0x00000356, 0x00000355, 0x00050067, 0x0000000c, 0x00000357, 0x00000356, 0x00000019, 0x000500b1, + 0x00000015, 0x00000358, 0x00000350, 0x00000357, 0x0004009b, 0x00000014, 0x00000359, 0x00000358, + 0x000200f9, 0x00000353, 0x000200f8, 0x00000353, 0x000700f5, 0x00000014, 0x0000035a, 0x00000352, + 0x00000348, 0x00000359, 0x00000354, 0x000300f7, 0x0000035b, 0x00000000, 0x000400fa, 0x0000035a, + 0x0000035c, 0x0000035d, 0x000200f8, 0x0000035c, 0x0004003d, 0x00000017, 0x0000035e, 0x00000005, + 0x00040064, 0x00000016, 0x0000035f, 0x0000035e, 0x0007005f, 0x00000021, 0x00000360, 0x0000035f, + 0x00000350, 0x00000002, 0x00000019, 0x000200f9, 0x0000035b, 0x000200f8, 0x0000035d, 0x000200f9, + 0x0000035b, 0x000200f8, 0x0000035b, 0x000700f5, 0x00000021, 0x00000361, 0x00000360, 0x0000035c, + 0x00000023, 0x0000035d, 0x00040073, 0x0000001d, 0x00000362, 0x00000361, 0x00050080, 0x0000000c, + 0x00000363, 0x00000336, 0x00000026, 0x000500af, 0x00000015, 0x00000364, 0x00000363, 0x00000020, + 0x0004009b, 0x00000014, 0x00000365, 0x00000364, 0x000300f7, 0x00000366, 0x00000000, 0x000400fa, + 0x00000365, 0x00000367, 0x00000366, 0x000200f8, 0x00000367, 0x0004003d, 0x00000017, 0x00000368, + 0x00000005, 0x00040064, 0x00000016, 0x00000369, 0x00000368, 0x00050067, 0x0000000c, 0x0000036a, + 0x00000369, 0x00000019, 0x000500b1, 0x00000015, 0x0000036b, 0x00000363, 0x0000036a, 0x0004009b, + 0x00000014, 0x0000036c, 0x0000036b, 0x000200f9, 0x00000366, 0x000200f8, 0x00000366, 0x000700f5, + 0x00000014, 0x0000036d, 0x00000365, 0x0000035b, 0x0000036c, 0x00000367, 0x000300f7, 0x0000036e, + 0x00000000, 0x000400fa, 0x0000036d, 0x0000036f, 0x00000370, 0x000200f8, 0x0000036f, 0x0004003d, + 0x00000017, 0x00000371, 0x00000005, 0x00040064, 0x00000016, 0x00000372, 0x00000371, 0x0007005f, + 0x00000021, 0x00000373, 0x00000372, 0x00000363, 0x00000002, 0x00000019, 0x000200f9, 0x0000036e, + 0x000200f8, 0x00000370, 0x000200f9, 0x0000036e, 0x000200f8, 0x0000036e, 0x000700f5, 0x00000021, + 0x00000374, 0x00000373, 0x0000036f, 0x00000023, 0x00000370, 0x00040073, 0x0000001d, 0x00000375, + 0x00000374, 0x00050080, 0x0000000c, 0x00000376, 0x00000336, 0x00000027, 0x000500af, 0x00000015, + 0x00000377, 0x00000376, 0x00000020, 0x0004009b, 0x00000014, 0x00000378, 0x00000377, 0x000300f7, + 0x00000379, 0x00000000, 0x000400fa, 0x00000378, 0x0000037a, 0x00000379, 0x000200f8, 0x0000037a, + 0x0004003d, 0x00000017, 0x0000037b, 0x00000005, 0x00040064, 0x00000016, 0x0000037c, 0x0000037b, + 0x00050067, 0x0000000c, 0x0000037d, 0x0000037c, 0x00000019, 0x000500b1, 0x00000015, 0x0000037e, + 0x00000376, 0x0000037d, 0x0004009b, 0x00000014, 0x0000037f, 0x0000037e, 0x000200f9, 0x00000379, + 0x000200f8, 0x00000379, 0x000700f5, 0x00000014, 0x00000380, 0x00000378, 0x0000036e, 0x0000037f, + 0x0000037a, 0x000300f7, 0x00000381, 0x00000000, 0x000400fa, 0x00000380, 0x00000382, 0x00000383, + 0x000200f8, 0x00000382, 0x0004003d, 0x00000017, 0x00000384, 0x00000005, 0x00040064, 0x00000016, + 0x00000385, 0x00000384, 0x0007005f, 0x00000021, 0x00000386, 0x00000385, 0x00000376, 0x00000002, + 0x00000019, 0x000200f9, 0x00000381, 0x000200f8, 0x00000383, 0x000200f9, 0x00000381, 0x000200f8, + 0x00000381, 0x000700f5, 0x00000021, 0x00000387, 0x00000386, 0x00000382, 0x00000023, 0x00000383, + 0x00040073, 0x0000001d, 0x00000388, 0x00000387, 0x0004003d, 0x00000017, 0x00000389, 0x00000005, + 0x00040064, 0x00000016, 0x0000038a, 0x00000389, 0x0007005f, 0x00000021, 0x0000038b, 0x0000038a, + 0x00000336, 0x00000002, 0x00000019, 0x00040073, 0x0000001d, 0x0000038c, 0x0000038b, 0x00050080, + 0x0000000c, 0x0000038d, 0x00000336, 0x00000028, 0x000500af, 0x00000015, 0x0000038e, 0x0000038d, + 0x00000020, 0x0004009b, 0x00000014, 0x0000038f, 0x0000038e, 0x000300f7, 0x00000390, 0x00000000, + 0x000400fa, 0x0000038f, 0x00000391, 0x00000390, 0x000200f8, 0x00000391, 0x0004003d, 0x00000017, + 0x00000392, 0x00000005, 0x00040064, 0x00000016, 0x00000393, 0x00000392, 0x00050067, 0x0000000c, + 0x00000394, 0x00000393, 0x00000019, 0x000500b1, 0x00000015, 0x00000395, 0x0000038d, 0x00000394, + 0x0004009b, 0x00000014, 0x00000396, 0x00000395, 0x000200f9, 0x00000390, 0x000200f8, 0x00000390, + 0x000700f5, 0x00000014, 0x00000397, 0x0000038f, 0x00000381, 0x00000396, 0x00000391, 0x000300f7, + 0x00000398, 0x00000000, 0x000400fa, 0x00000397, 0x00000399, 0x0000039a, 0x000200f8, 0x00000399, + 0x0004003d, 0x00000017, 0x0000039b, 0x00000005, 0x00040064, 0x00000016, 0x0000039c, 0x0000039b, + 0x0007005f, 0x00000021, 0x0000039d, 0x0000039c, 0x0000038d, 0x00000002, 0x00000019, 0x000200f9, + 0x00000398, 0x000200f8, 0x0000039a, 0x000200f9, 0x00000398, 0x000200f8, 0x00000398, 0x000700f5, + 0x00000021, 0x0000039e, 0x0000039d, 0x00000399, 0x00000023, 0x0000039a, 0x00040073, 0x0000001d, + 0x0000039f, 0x0000039e, 0x00050080, 0x0000000c, 0x000003a0, 0x00000336, 0x00000029, 0x000500af, + 0x00000015, 0x000003a1, 0x000003a0, 0x00000020, 0x0004009b, 0x00000014, 0x000003a2, 0x000003a1, + 0x000300f7, 0x000003a3, 0x00000000, 0x000400fa, 0x000003a2, 0x000003a4, 0x000003a3, 0x000200f8, + 0x000003a4, 0x0004003d, 0x00000017, 0x000003a5, 0x00000005, 0x00040064, 0x00000016, 0x000003a6, + 0x000003a5, 0x00050067, 0x0000000c, 0x000003a7, 0x000003a6, 0x00000019, 0x000500b1, 0x00000015, + 0x000003a8, 0x000003a0, 0x000003a7, 0x0004009b, 0x00000014, 0x000003a9, 0x000003a8, 0x000200f9, + 0x000003a3, 0x000200f8, 0x000003a3, 0x000700f5, 0x00000014, 0x000003aa, 0x000003a2, 0x00000398, + 0x000003a9, 0x000003a4, 0x000300f7, 0x000003ab, 0x00000000, 0x000400fa, 0x000003aa, 0x000003ac, + 0x000003ad, 0x000200f8, 0x000003ac, 0x0004003d, 0x00000017, 0x000003ae, 0x00000005, 0x00040064, + 0x00000016, 0x000003af, 0x000003ae, 0x0007005f, 0x00000021, 0x000003b0, 0x000003af, 0x000003a0, + 0x00000002, 0x00000019, 0x000200f9, 0x000003ab, 0x000200f8, 0x000003ad, 0x000200f9, 0x000003ab, + 0x000200f8, 0x000003ab, 0x000700f5, 0x00000021, 0x000003b1, 0x000003b0, 0x000003ac, 0x00000023, + 0x000003ad, 0x00040073, 0x0000001d, 0x000003b2, 0x000003b1, 0x00050080, 0x0000000c, 0x000003b3, + 0x00000336, 0x0000002a, 0x000500af, 0x00000015, 0x000003b4, 0x000003b3, 0x00000020, 0x0004009b, + 0x00000014, 0x000003b5, 0x000003b4, 0x000300f7, 0x000003b6, 0x00000000, 0x000400fa, 0x000003b5, + 0x000003b7, 0x000003b6, 0x000200f8, 0x000003b7, 0x0004003d, 0x00000017, 0x000003b8, 0x00000005, + 0x00040064, 0x00000016, 0x000003b9, 0x000003b8, 0x00050067, 0x0000000c, 0x000003ba, 0x000003b9, + 0x00000019, 0x000500b1, 0x00000015, 0x000003bb, 0x000003b3, 0x000003ba, 0x0004009b, 0x00000014, + 0x000003bc, 0x000003bb, 0x000200f9, 0x000003b6, 0x000200f8, 0x000003b6, 0x000700f5, 0x00000014, + 0x000003bd, 0x000003b5, 0x000003ab, 0x000003bc, 0x000003b7, 0x000300f7, 0x000003be, 0x00000000, + 0x000400fa, 0x000003bd, 0x000003bf, 0x000003c0, 0x000200f8, 0x000003bf, 0x0004003d, 0x00000017, + 0x000003c1, 0x00000005, 0x00040064, 0x00000016, 0x000003c2, 0x000003c1, 0x0007005f, 0x00000021, + 0x000003c3, 0x000003c2, 0x000003b3, 0x00000002, 0x00000019, 0x000200f9, 0x000003be, 0x000200f8, + 0x000003c0, 0x000200f9, 0x000003be, 0x000200f8, 0x000003be, 0x000700f5, 0x00000021, 0x000003c4, + 0x000003c3, 0x000003bf, 0x00000023, 0x000003c0, 0x00040073, 0x0000001d, 0x000003c5, 0x000003c4, + 0x00050080, 0x0000000c, 0x000003c6, 0x00000336, 0x0000002b, 0x000500af, 0x00000015, 0x000003c7, + 0x000003c6, 0x00000020, 0x0004009b, 0x00000014, 0x000003c8, 0x000003c7, 0x000300f7, 0x000003c9, + 0x00000000, 0x000400fa, 0x000003c8, 0x000003ca, 0x000003c9, 0x000200f8, 0x000003ca, 0x0004003d, + 0x00000017, 0x000003cb, 0x00000005, 0x00040064, 0x00000016, 0x000003cc, 0x000003cb, 0x00050067, + 0x0000000c, 0x000003cd, 0x000003cc, 0x00000019, 0x000500b1, 0x00000015, 0x000003ce, 0x000003c6, + 0x000003cd, 0x0004009b, 0x00000014, 0x000003cf, 0x000003ce, 0x000200f9, 0x000003c9, 0x000200f8, + 0x000003c9, 0x000700f5, 0x00000014, 0x000003d0, 0x000003c8, 0x000003be, 0x000003cf, 0x000003ca, + 0x000300f7, 0x000003d1, 0x00000000, 0x000400fa, 0x000003d0, 0x000003d2, 0x000003d3, 0x000200f8, + 0x000003d2, 0x0004003d, 0x00000017, 0x000003d4, 0x00000005, 0x00040064, 0x00000016, 0x000003d5, + 0x000003d4, 0x0007005f, 0x00000021, 0x000003d6, 0x000003d5, 0x000003c6, 0x00000002, 0x00000019, + 0x000200f9, 0x000003d1, 0x000200f8, 0x000003d3, 0x000200f9, 0x000003d1, 0x000200f8, 0x000003d1, + 0x000700f5, 0x00000021, 0x000003d7, 0x000003d6, 0x000003d2, 0x00000023, 0x000003d3, 0x00040073, + 0x0000001d, 0x000003d8, 0x000003d7, 0x000300f7, 0x000003d9, 0x00000000, 0x000400fa, 0x0000033f, + 0x000003da, 0x000003d9, 0x000200f8, 0x000003da, 0x0004003d, 0x00000017, 0x000003db, 0x00000006, + 0x00040064, 0x00000016, 0x000003dc, 0x000003db, 0x00050067, 0x0000000c, 0x000003dd, 0x000003dc, + 0x00000019, 0x000500b1, 0x00000015, 0x000003de, 0x0000033d, 0x000003dd, 0x0004009b, 0x00000014, + 0x000003df, 0x000003de, 0x000200f9, 0x000003d9, 0x000200f8, 0x000003d9, 0x000700f5, 0x00000014, + 0x000003e0, 0x0000033f, 0x000003d1, 0x000003df, 0x000003da, 0x000300f7, 0x000003e1, 0x00000000, + 0x000400fa, 0x000003e0, 0x000003e2, 0x000003e3, 0x000200f8, 0x000003e2, 0x0004003d, 0x00000017, + 0x000003e4, 0x00000006, 0x00040064, 0x00000016, 0x000003e5, 0x000003e4, 0x0007005f, 0x00000021, + 0x000003e6, 0x000003e5, 0x0000033d, 0x00000002, 0x00000019, 0x000200f9, 0x000003e1, 0x000200f8, + 0x000003e3, 0x000200f9, 0x000003e1, 0x000200f8, 0x000003e1, 0x000700f5, 0x00000021, 0x000003e7, + 0x000003e6, 0x000003e2, 0x00000023, 0x000003e3, 0x00040073, 0x0000001d, 0x000003e8, 0x000003e7, + 0x000300f7, 0x000003e9, 0x00000000, 0x000400fa, 0x00000352, 0x000003ea, 0x000003e9, 0x000200f8, + 0x000003ea, 0x0004003d, 0x00000017, 0x000003eb, 0x00000006, 0x00040064, 0x00000016, 0x000003ec, + 0x000003eb, 0x00050067, 0x0000000c, 0x000003ed, 0x000003ec, 0x00000019, 0x000500b1, 0x00000015, + 0x000003ee, 0x00000350, 0x000003ed, 0x0004009b, 0x00000014, 0x000003ef, 0x000003ee, 0x000200f9, + 0x000003e9, 0x000200f8, 0x000003e9, 0x000700f5, 0x00000014, 0x000003f0, 0x00000352, 0x000003e1, + 0x000003ef, 0x000003ea, 0x000300f7, 0x000003f1, 0x00000000, 0x000400fa, 0x000003f0, 0x000003f2, + 0x000003f3, 0x000200f8, 0x000003f2, 0x0004003d, 0x00000017, 0x000003f4, 0x00000006, 0x00040064, + 0x00000016, 0x000003f5, 0x000003f4, 0x0007005f, 0x00000021, 0x000003f6, 0x000003f5, 0x00000350, + 0x00000002, 0x00000019, 0x000200f9, 0x000003f1, 0x000200f8, 0x000003f3, 0x000200f9, 0x000003f1, + 0x000200f8, 0x000003f1, 0x000700f5, 0x00000021, 0x000003f7, 0x000003f6, 0x000003f2, 0x00000023, + 0x000003f3, 0x00040073, 0x0000001d, 0x000003f8, 0x000003f7, 0x000300f7, 0x000003f9, 0x00000000, + 0x000400fa, 0x00000365, 0x000003fa, 0x000003f9, 0x000200f8, 0x000003fa, 0x0004003d, 0x00000017, + 0x000003fb, 0x00000006, 0x00040064, 0x00000016, 0x000003fc, 0x000003fb, 0x00050067, 0x0000000c, + 0x000003fd, 0x000003fc, 0x00000019, 0x000500b1, 0x00000015, 0x000003fe, 0x00000363, 0x000003fd, + 0x0004009b, 0x00000014, 0x000003ff, 0x000003fe, 0x000200f9, 0x000003f9, 0x000200f8, 0x000003f9, + 0x000700f5, 0x00000014, 0x00000400, 0x00000365, 0x000003f1, 0x000003ff, 0x000003fa, 0x000300f7, + 0x00000401, 0x00000000, 0x000400fa, 0x00000400, 0x00000402, 0x00000403, 0x000200f8, 0x00000402, + 0x0004003d, 0x00000017, 0x00000404, 0x00000006, 0x00040064, 0x00000016, 0x00000405, 0x00000404, + 0x0007005f, 0x00000021, 0x00000406, 0x00000405, 0x00000363, 0x00000002, 0x00000019, 0x000200f9, + 0x00000401, 0x000200f8, 0x00000403, 0x000200f9, 0x00000401, 0x000200f8, 0x00000401, 0x000700f5, + 0x00000021, 0x00000407, 0x00000406, 0x00000402, 0x00000023, 0x00000403, 0x00040073, 0x0000001d, + 0x00000408, 0x00000407, 0x000300f7, 0x00000409, 0x00000000, 0x000400fa, 0x00000378, 0x0000040a, + 0x00000409, 0x000200f8, 0x0000040a, 0x0004003d, 0x00000017, 0x0000040b, 0x00000006, 0x00040064, + 0x00000016, 0x0000040c, 0x0000040b, 0x00050067, 0x0000000c, 0x0000040d, 0x0000040c, 0x00000019, + 0x000500b1, 0x00000015, 0x0000040e, 0x00000376, 0x0000040d, 0x0004009b, 0x00000014, 0x0000040f, + 0x0000040e, 0x000200f9, 0x00000409, 0x000200f8, 0x00000409, 0x000700f5, 0x00000014, 0x00000410, + 0x00000378, 0x00000401, 0x0000040f, 0x0000040a, 0x000300f7, 0x00000411, 0x00000000, 0x000400fa, + 0x00000410, 0x00000412, 0x00000413, 0x000200f8, 0x00000412, 0x0004003d, 0x00000017, 0x00000414, + 0x00000006, 0x00040064, 0x00000016, 0x00000415, 0x00000414, 0x0007005f, 0x00000021, 0x00000416, + 0x00000415, 0x00000376, 0x00000002, 0x00000019, 0x000200f9, 0x00000411, 0x000200f8, 0x00000413, + 0x000200f9, 0x00000411, 0x000200f8, 0x00000411, 0x000700f5, 0x00000021, 0x00000417, 0x00000416, + 0x00000412, 0x00000023, 0x00000413, 0x00040073, 0x0000001d, 0x00000418, 0x00000417, 0x0004003d, + 0x00000017, 0x00000419, 0x00000006, 0x00040064, 0x00000016, 0x0000041a, 0x00000419, 0x0007005f, + 0x00000021, 0x0000041b, 0x0000041a, 0x00000336, 0x00000002, 0x00000019, 0x00040073, 0x0000001d, + 0x0000041c, 0x0000041b, 0x000300f7, 0x0000041d, 0x00000000, 0x000400fa, 0x0000038f, 0x0000041e, + 0x0000041d, 0x000200f8, 0x0000041e, 0x0004003d, 0x00000017, 0x0000041f, 0x00000006, 0x00040064, + 0x00000016, 0x00000420, 0x0000041f, 0x00050067, 0x0000000c, 0x00000421, 0x00000420, 0x00000019, + 0x000500b1, 0x00000015, 0x00000422, 0x0000038d, 0x00000421, 0x0004009b, 0x00000014, 0x00000423, + 0x00000422, 0x000200f9, 0x0000041d, 0x000200f8, 0x0000041d, 0x000700f5, 0x00000014, 0x00000424, + 0x0000038f, 0x00000411, 0x00000423, 0x0000041e, 0x000300f7, 0x00000425, 0x00000000, 0x000400fa, + 0x00000424, 0x00000426, 0x00000427, 0x000200f8, 0x00000426, 0x0004003d, 0x00000017, 0x00000428, + 0x00000006, 0x00040064, 0x00000016, 0x00000429, 0x00000428, 0x0007005f, 0x00000021, 0x0000042a, + 0x00000429, 0x0000038d, 0x00000002, 0x00000019, 0x000200f9, 0x00000425, 0x000200f8, 0x00000427, + 0x000200f9, 0x00000425, 0x000200f8, 0x00000425, 0x000700f5, 0x00000021, 0x0000042b, 0x0000042a, + 0x00000426, 0x00000023, 0x00000427, 0x00040073, 0x0000001d, 0x0000042c, 0x0000042b, 0x000300f7, + 0x0000042d, 0x00000000, 0x000400fa, 0x000003a2, 0x0000042e, 0x0000042d, 0x000200f8, 0x0000042e, + 0x0004003d, 0x00000017, 0x0000042f, 0x00000006, 0x00040064, 0x00000016, 0x00000430, 0x0000042f, + 0x00050067, 0x0000000c, 0x00000431, 0x00000430, 0x00000019, 0x000500b1, 0x00000015, 0x00000432, + 0x000003a0, 0x00000431, 0x0004009b, 0x00000014, 0x00000433, 0x00000432, 0x000200f9, 0x0000042d, + 0x000200f8, 0x0000042d, 0x000700f5, 0x00000014, 0x00000434, 0x000003a2, 0x00000425, 0x00000433, + 0x0000042e, 0x000300f7, 0x00000435, 0x00000000, 0x000400fa, 0x00000434, 0x00000436, 0x00000437, + 0x000200f8, 0x00000436, 0x0004003d, 0x00000017, 0x00000438, 0x00000006, 0x00040064, 0x00000016, + 0x00000439, 0x00000438, 0x0007005f, 0x00000021, 0x0000043a, 0x00000439, 0x000003a0, 0x00000002, + 0x00000019, 0x000200f9, 0x00000435, 0x000200f8, 0x00000437, 0x000200f9, 0x00000435, 0x000200f8, + 0x00000435, 0x000700f5, 0x00000021, 0x0000043b, 0x0000043a, 0x00000436, 0x00000023, 0x00000437, + 0x00040073, 0x0000001d, 0x0000043c, 0x0000043b, 0x000300f7, 0x0000043d, 0x00000000, 0x000400fa, + 0x000003b5, 0x0000043e, 0x0000043d, 0x000200f8, 0x0000043e, 0x0004003d, 0x00000017, 0x0000043f, + 0x00000006, 0x00040064, 0x00000016, 0x00000440, 0x0000043f, 0x00050067, 0x0000000c, 0x00000441, + 0x00000440, 0x00000019, 0x000500b1, 0x00000015, 0x00000442, 0x000003b3, 0x00000441, 0x0004009b, + 0x00000014, 0x00000443, 0x00000442, 0x000200f9, 0x0000043d, 0x000200f8, 0x0000043d, 0x000700f5, + 0x00000014, 0x00000444, 0x000003b5, 0x00000435, 0x00000443, 0x0000043e, 0x000300f7, 0x00000445, + 0x00000000, 0x000400fa, 0x00000444, 0x00000446, 0x00000447, 0x000200f8, 0x00000446, 0x0004003d, + 0x00000017, 0x00000448, 0x00000006, 0x00040064, 0x00000016, 0x00000449, 0x00000448, 0x0007005f, + 0x00000021, 0x0000044a, 0x00000449, 0x000003b3, 0x00000002, 0x00000019, 0x000200f9, 0x00000445, + 0x000200f8, 0x00000447, 0x000200f9, 0x00000445, 0x000200f8, 0x00000445, 0x000700f5, 0x00000021, + 0x0000044b, 0x0000044a, 0x00000446, 0x00000023, 0x00000447, 0x00040073, 0x0000001d, 0x0000044c, + 0x0000044b, 0x000300f7, 0x0000044d, 0x00000000, 0x000400fa, 0x000003c8, 0x0000044e, 0x0000044d, + 0x000200f8, 0x0000044e, 0x0004003d, 0x00000017, 0x0000044f, 0x00000006, 0x00040064, 0x00000016, + 0x00000450, 0x0000044f, 0x00050067, 0x0000000c, 0x00000451, 0x00000450, 0x00000019, 0x000500b1, + 0x00000015, 0x00000452, 0x000003c6, 0x00000451, 0x0004009b, 0x00000014, 0x00000453, 0x00000452, + 0x000200f9, 0x0000044d, 0x000200f8, 0x0000044d, 0x000700f5, 0x00000014, 0x00000454, 0x000003c8, + 0x00000445, 0x00000453, 0x0000044e, 0x000300f7, 0x00000455, 0x00000000, 0x000400fa, 0x00000454, + 0x00000456, 0x00000457, 0x000200f8, 0x00000456, 0x0004003d, 0x00000017, 0x00000458, 0x00000006, + 0x00040064, 0x00000016, 0x00000459, 0x00000458, 0x0007005f, 0x00000021, 0x0000045a, 0x00000459, + 0x000003c6, 0x00000002, 0x00000019, 0x000200f9, 0x00000455, 0x000200f8, 0x00000457, 0x000200f9, + 0x00000455, 0x000200f8, 0x00000455, 0x000700f5, 0x00000021, 0x0000045b, 0x0000045a, 0x00000456, + 0x00000023, 0x00000457, 0x00040073, 0x0000001d, 0x0000045c, 0x0000045b, 0x0004003d, 0x00000012, + 0x0000045d, 0x00000004, 0x00050091, 0x0000001d, 0x0000045e, 0x00000041, 0x0000034f, 0x00050091, + 0x0000001d, 0x0000045f, 0x00000056, 0x00000362, 0x00050081, 0x0000001d, 0x00000460, 0x0000045e, + 0x0000045f, 0x00050091, 0x0000001d, 0x00000461, 0x0000006b, 0x00000375, 0x00050081, 0x0000001d, + 0x00000462, 0x00000460, 0x00000461, 0x00050091, 0x0000001d, 0x00000463, 0x00000080, 0x00000388, + 0x00050081, 0x0000001d, 0x00000464, 0x00000462, 0x00000463, 0x00050091, 0x0000001d, 0x00000465, + 0x00000095, 0x0000038c, 0x00050081, 0x0000001d, 0x00000466, 0x00000464, 0x00000465, 0x00050091, + 0x0000001d, 0x00000467, 0x000000aa, 0x0000039f, 0x00050081, 0x0000001d, 0x00000468, 0x00000466, + 0x00000467, 0x00050091, 0x0000001d, 0x00000469, 0x000000bf, 0x000003b2, 0x00050081, 0x0000001d, + 0x0000046a, 0x00000468, 0x00000469, 0x00050091, 0x0000001d, 0x0000046b, 0x000000d4, 0x000003c5, + 0x00050081, 0x0000001d, 0x0000046c, 0x0000046a, 0x0000046b, 0x00050091, 0x0000001d, 0x0000046d, + 0x000000e9, 0x000003d8, 0x00050081, 0x0000001d, 0x0000046e, 0x0000046c, 0x0000046d, 0x00050091, + 0x0000001d, 0x0000046f, 0x000000fe, 0x000003e8, 0x00050081, 0x0000001d, 0x00000470, 0x0000046e, + 0x0000046f, 0x00050091, 0x0000001d, 0x00000471, 0x00000113, 0x000003f8, 0x00050081, 0x0000001d, + 0x00000472, 0x00000470, 0x00000471, 0x00050091, 0x0000001d, 0x00000473, 0x00000128, 0x00000408, + 0x00050081, 0x0000001d, 0x00000474, 0x00000472, 0x00000473, 0x00050091, 0x0000001d, 0x00000475, + 0x0000013d, 0x00000418, 0x00050081, 0x0000001d, 0x00000476, 0x00000474, 0x00000475, 0x00050091, + 0x0000001d, 0x00000477, 0x00000151, 0x0000041c, 0x00050081, 0x0000001d, 0x00000478, 0x00000476, + 0x00000477, 0x00050091, 0x0000001d, 0x00000479, 0x00000166, 0x0000042c, 0x00050081, 0x0000001d, + 0x0000047a, 0x00000478, 0x00000479, 0x00050091, 0x0000001d, 0x0000047b, 0x0000017b, 0x0000043c, + 0x00050081, 0x0000001d, 0x0000047c, 0x0000047a, 0x0000047b, 0x00050091, 0x0000001d, 0x0000047d, + 0x00000190, 0x0000044c, 0x00050081, 0x0000001d, 0x0000047e, 0x0000047c, 0x0000047d, 0x00050091, + 0x0000001d, 0x0000047f, 0x000001a5, 0x0000045c, 0x00050081, 0x0000001d, 0x00000480, 0x0000047e, + 0x0000047f, 0x00050083, 0x0000001d, 0x00000481, 0x00000480, 0x000001aa, 0x00050085, 0x0000001d, + 0x00000482, 0x00000481, 0x000001ae, 0x00050081, 0x0000001d, 0x00000483, 0x00000482, 0x000001b3, + 0x00040073, 0x00000021, 0x00000484, 0x00000483, 0x00040063, 0x0000045d, 0x00000336, 0x00000484, + 0x0004003d, 0x00000012, 0x00000485, 0x00000007, 0x00050091, 0x0000001d, 0x00000486, 0x000001c8, + 0x0000034f, 0x00050091, 0x0000001d, 0x00000487, 0x000001dd, 0x00000362, 0x00050081, 0x0000001d, + 0x00000488, 0x00000486, 0x00000487, 0x00050091, 0x0000001d, 0x00000489, 0x000001f2, 0x00000375, + 0x00050081, 0x0000001d, 0x0000048a, 0x00000488, 0x00000489, 0x00050091, 0x0000001d, 0x0000048b, + 0x00000206, 0x00000388, 0x00050081, 0x0000001d, 0x0000048c, 0x0000048a, 0x0000048b, 0x00050091, + 0x0000001d, 0x0000048d, 0x0000021b, 0x0000038c, 0x00050081, 0x0000001d, 0x0000048e, 0x0000048c, + 0x0000048d, 0x00050091, 0x0000001d, 0x0000048f, 0x0000022f, 0x0000039f, 0x00050081, 0x0000001d, + 0x00000490, 0x0000048e, 0x0000048f, 0x00050091, 0x0000001d, 0x00000491, 0x00000244, 0x000003b2, + 0x00050081, 0x0000001d, 0x00000492, 0x00000490, 0x00000491, 0x00050091, 0x0000001d, 0x00000493, + 0x00000259, 0x000003c5, 0x00050081, 0x0000001d, 0x00000494, 0x00000492, 0x00000493, 0x00050091, + 0x0000001d, 0x00000495, 0x0000026e, 0x000003d8, 0x00050081, 0x0000001d, 0x00000496, 0x00000494, + 0x00000495, 0x00050091, 0x0000001d, 0x00000497, 0x00000282, 0x000003e8, 0x00050081, 0x0000001d, + 0x00000498, 0x00000496, 0x00000497, 0x00050091, 0x0000001d, 0x00000499, 0x00000297, 0x000003f8, + 0x00050081, 0x0000001d, 0x0000049a, 0x00000498, 0x00000499, 0x00050091, 0x0000001d, 0x0000049b, + 0x000002aa, 0x00000408, 0x00050081, 0x0000001d, 0x0000049c, 0x0000049a, 0x0000049b, 0x00050091, + 0x0000001d, 0x0000049d, 0x000002be, 0x00000418, 0x00050081, 0x0000001d, 0x0000049e, 0x0000049c, + 0x0000049d, 0x00050091, 0x0000001d, 0x0000049f, 0x000002d0, 0x0000041c, 0x00050081, 0x0000001d, + 0x000004a0, 0x0000049e, 0x0000049f, 0x00050091, 0x0000001d, 0x000004a1, 0x000002e4, 0x0000042c, + 0x00050081, 0x0000001d, 0x000004a2, 0x000004a0, 0x000004a1, 0x00050091, 0x0000001d, 0x000004a3, + 0x000002f8, 0x0000043c, 0x00050081, 0x0000001d, 0x000004a4, 0x000004a2, 0x000004a3, 0x00050091, + 0x0000001d, 0x000004a5, 0x0000030b, 0x0000044c, 0x00050081, 0x0000001d, 0x000004a6, 0x000004a4, + 0x000004a5, 0x00050091, 0x0000001d, 0x000004a7, 0x00000320, 0x0000045c, 0x00050081, 0x0000001d, + 0x000004a8, 0x000004a6, 0x000004a7, 0x00050083, 0x0000001d, 0x000004a9, 0x000004a8, 0x00000325, + 0x00050085, 0x0000001d, 0x000004aa, 0x000004a9, 0x0000032a, 0x00050081, 0x0000001d, 0x000004ab, + 0x000004aa, 0x0000032f, 0x00040073, 0x00000021, 0x000004ac, 0x000004ab, 0x00040063, 0x00000485, + 0x00000336, 0x000004ac, 0x000200f9, 0x00000332, 0x000200f8, 0x00000332, 0x000100fd, 0x00010038, +}; +static const size_t wnfg_51_spv_size = sizeof(wnfg_51_spv); diff --git a/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_53_spv.h b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_53_spv.h new file mode 100644 index 000000000..c13eb1ad8 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/wnfg_spv/wnfg_53_spv.h @@ -0,0 +1,340 @@ +#pragma once +#include +#include + +static const uint32_t wnfg_53_spv[] = { + 0x07230203, 0x00010000, 0x000d000b, 0x0000021f, 0x00000000, 0x00020011, 0x00000001, 0x00020011, + 0x00000009, 0x00020011, 0x00000032, 0x0006000b, 0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e, + 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0006000f, 0x00000005, 0x00000002, 0x6e69616d, + 0x00000000, 0x00000003, 0x00060010, 0x00000002, 0x00000011, 0x00000010, 0x00000010, 0x00000001, + 0x00040047, 0x00000003, 0x0000000b, 0x0000001c, 0x00030047, 0x00000004, 0x00000019, 0x00040047, + 0x00000004, 0x00000021, 0x00000030, 0x00040047, 0x00000004, 0x00000022, 0x00000000, 0x00040047, + 0x00000005, 0x00000021, 0x00000020, 0x00040047, 0x00000005, 0x00000022, 0x00000000, 0x00040047, + 0x00000006, 0x00000021, 0x00000021, 0x00040047, 0x00000006, 0x00000022, 0x00000000, 0x00040047, + 0x00000007, 0x00000021, 0x00000022, 0x00040047, 0x00000007, 0x00000022, 0x00000000, 0x00040047, + 0x00000008, 0x0000000b, 0x00000019, 0x00020013, 0x00000009, 0x00030021, 0x0000000a, 0x00000009, + 0x00040015, 0x0000000b, 0x00000020, 0x00000001, 0x00040017, 0x0000000c, 0x0000000b, 0x00000002, + 0x00040015, 0x0000000d, 0x00000020, 0x00000000, 0x00040017, 0x0000000e, 0x0000000d, 0x00000003, + 0x00040020, 0x0000000f, 0x00000001, 0x0000000e, 0x0004003b, 0x0000000f, 0x00000003, 0x00000001, + 0x00040017, 0x00000010, 0x0000000d, 0x00000002, 0x00030016, 0x00000011, 0x00000020, 0x00090019, + 0x00000012, 0x00000011, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002, + 0x00040020, 0x00000013, 0x00000000, 0x00000012, 0x0004003b, 0x00000013, 0x00000004, 0x00000000, + 0x00020014, 0x00000014, 0x00040017, 0x00000015, 0x00000014, 0x00000002, 0x00090019, 0x00000016, + 0x00000011, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x0003001b, + 0x00000017, 0x00000016, 0x00040020, 0x00000018, 0x00000000, 0x00000017, 0x0004003b, 0x00000018, + 0x00000005, 0x00000000, 0x0004002b, 0x0000000b, 0x00000019, 0x00000000, 0x00040017, 0x0000001a, + 0x00000011, 0x00000002, 0x0004002b, 0x00000011, 0x0000001b, 0x3f000000, 0x0005002c, 0x0000001a, + 0x0000001c, 0x0000001b, 0x0000001b, 0x0004002b, 0x0000000d, 0x0000001d, 0x00000000, 0x0004002b, + 0x0000000d, 0x0000001e, 0x00000001, 0x00030016, 0x0000001f, 0x00000010, 0x00040017, 0x00000020, + 0x0000001f, 0x00000004, 0x00040018, 0x00000021, 0x00000020, 0x00000004, 0x0004002b, 0x0000001f, + 0x00000022, 0x0000aa20, 0x0004002b, 0x0000001f, 0x00000023, 0x0000a8fd, 0x0004002b, 0x0000001f, + 0x00000024, 0x000021c9, 0x0004002b, 0x0000001f, 0x00000025, 0x000025f6, 0x0007002c, 0x00000020, + 0x00000026, 0x00000022, 0x00000023, 0x00000024, 0x00000025, 0x0004002b, 0x0000001f, 0x00000027, + 0x00002dee, 0x0004002b, 0x0000001f, 0x00000028, 0x00002eef, 0x0004002b, 0x0000001f, 0x00000029, + 0x000031bf, 0x0004002b, 0x0000001f, 0x0000002a, 0x0000315a, 0x0007002c, 0x00000020, 0x0000002b, + 0x00000027, 0x00000028, 0x00000029, 0x0000002a, 0x0004002b, 0x0000001f, 0x0000002c, 0x0000af5d, + 0x0004002b, 0x0000001f, 0x0000002d, 0x0000af00, 0x0004002b, 0x0000001f, 0x0000002e, 0x00002f01, + 0x0004002b, 0x0000001f, 0x0000002f, 0x00003023, 0x0007002c, 0x00000020, 0x00000030, 0x0000002c, + 0x0000002d, 0x0000002e, 0x0000002f, 0x0004002b, 0x0000001f, 0x00000031, 0x0000ae7e, 0x0004002b, + 0x0000001f, 0x00000032, 0x0000ae16, 0x0004002b, 0x0000001f, 0x00000033, 0x00002e6b, 0x0004002b, + 0x0000001f, 0x00000034, 0x00002e52, 0x0007002c, 0x00000020, 0x00000035, 0x00000031, 0x00000032, + 0x00000033, 0x00000034, 0x0007002c, 0x00000021, 0x00000036, 0x00000026, 0x0000002b, 0x00000030, + 0x00000035, 0x0004002b, 0x00000011, 0x00000037, 0x00000000, 0x0004002b, 0x0000000b, 0x00000038, + 0xffffffff, 0x0005002c, 0x0000000c, 0x00000039, 0x00000038, 0x00000038, 0x00040017, 0x0000003a, + 0x00000011, 0x00000004, 0x0004002b, 0x0000001f, 0x0000003b, 0x000028f4, 0x0004002b, 0x0000001f, + 0x0000003c, 0x0000268b, 0x0004002b, 0x0000001f, 0x0000003d, 0x0000343c, 0x0004002b, 0x0000001f, + 0x0000003e, 0x00003415, 0x0007002c, 0x00000020, 0x0000003f, 0x0000003b, 0x0000003c, 0x0000003d, + 0x0000003e, 0x0004002b, 0x0000001f, 0x00000040, 0x00003292, 0x0004002b, 0x0000001f, 0x00000041, + 0x00003256, 0x0004002b, 0x0000001f, 0x00000042, 0x00003435, 0x0004002b, 0x0000001f, 0x00000043, + 0x00003404, 0x0007002c, 0x00000020, 0x00000044, 0x00000040, 0x00000041, 0x00000042, 0x00000043, + 0x0004002b, 0x0000001f, 0x00000045, 0x00001f89, 0x0004002b, 0x0000001f, 0x00000046, 0x00009edd, + 0x0004002b, 0x0000001f, 0x00000047, 0x000033ac, 0x0004002b, 0x0000001f, 0x00000048, 0x00003351, + 0x0007002c, 0x00000020, 0x00000049, 0x00000045, 0x00000046, 0x00000047, 0x00000048, 0x0004002b, + 0x0000001f, 0x0000004a, 0x000032a7, 0x0004002b, 0x0000001f, 0x0000004b, 0x00003231, 0x0004002b, + 0x0000001f, 0x0000004c, 0x000031ee, 0x0004002b, 0x0000001f, 0x0000004d, 0x0000321c, 0x0007002c, + 0x00000020, 0x0000004e, 0x0000004a, 0x0000004b, 0x0000004c, 0x0000004d, 0x0007002c, 0x00000021, + 0x0000004f, 0x0000003f, 0x00000044, 0x00000049, 0x0000004e, 0x0005002c, 0x0000000c, 0x00000050, + 0x00000038, 0x00000019, 0x0004002b, 0x0000001f, 0x00000051, 0x0000ad6d, 0x0004002b, 0x0000001f, + 0x00000052, 0x0000ad8c, 0x0004002b, 0x0000001f, 0x00000053, 0x00002dbe, 0x0004002b, 0x0000001f, + 0x00000054, 0x00002cbb, 0x0007002c, 0x00000020, 0x00000055, 0x00000051, 0x00000052, 0x00000053, + 0x00000054, 0x0004002b, 0x0000001f, 0x00000056, 0x0000302c, 0x0004002b, 0x0000001f, 0x00000057, + 0x00003065, 0x0004002b, 0x0000001f, 0x00000058, 0x0000317f, 0x0004002b, 0x0000001f, 0x00000059, + 0x000031bd, 0x0007002c, 0x00000020, 0x0000005a, 0x00000056, 0x00000057, 0x00000058, 0x00000059, + 0x0004002b, 0x0000001f, 0x0000005b, 0x0000b06c, 0x0004002b, 0x0000001f, 0x0000005c, 0x0000b05d, + 0x0004002b, 0x0000001f, 0x0000005d, 0x000030bd, 0x0004002b, 0x0000001f, 0x0000005e, 0x000030a1, + 0x0007002c, 0x00000020, 0x0000005f, 0x0000005b, 0x0000005c, 0x0000005d, 0x0000005e, 0x0004002b, + 0x0000001f, 0x00000060, 0x0000a81a, 0x0004002b, 0x0000001f, 0x00000061, 0x0000a531, 0x0004002b, + 0x0000001f, 0x00000062, 0x00002bd6, 0x0004002b, 0x0000001f, 0x00000063, 0x00002b98, 0x0007002c, + 0x00000020, 0x00000064, 0x00000060, 0x00000061, 0x00000062, 0x00000063, 0x0007002c, 0x00000021, + 0x00000065, 0x00000055, 0x0000005a, 0x0000005f, 0x00000064, 0x0004002b, 0x0000000b, 0x00000066, + 0x00000001, 0x0005002c, 0x0000000c, 0x00000067, 0x00000038, 0x00000066, 0x0004002b, 0x0000001f, + 0x00000068, 0x00002e3c, 0x0004002b, 0x0000001f, 0x00000069, 0x00002eb2, 0x0004002b, 0x0000001f, + 0x0000006a, 0x000032fb, 0x0004002b, 0x0000001f, 0x0000006b, 0x000032ad, 0x0007002c, 0x00000020, + 0x0000006c, 0x00000068, 0x00000069, 0x0000006a, 0x0000006b, 0x0004002b, 0x0000001f, 0x0000006d, + 0x0000323c, 0x0004002b, 0x0000001f, 0x0000006e, 0x000031da, 0x0004002b, 0x0000001f, 0x0000006f, + 0x00003410, 0x0004002b, 0x0000001f, 0x00000070, 0x0000343e, 0x0007002c, 0x00000020, 0x00000071, + 0x0000006d, 0x0000006e, 0x0000006f, 0x00000070, 0x0004002b, 0x0000001f, 0x00000072, 0x00002d90, + 0x0004002b, 0x0000001f, 0x00000073, 0x00002d8a, 0x0004002b, 0x0000001f, 0x00000074, 0x00003320, + 0x0004002b, 0x0000001f, 0x00000075, 0x00003274, 0x0007002c, 0x00000020, 0x00000076, 0x00000072, + 0x00000073, 0x00000074, 0x00000075, 0x0004002b, 0x0000001f, 0x00000077, 0x00002d6a, 0x0004002b, + 0x0000001f, 0x00000078, 0x00002b8f, 0x0004002b, 0x0000001f, 0x00000079, 0x0000345d, 0x0004002b, + 0x0000001f, 0x0000007a, 0x00003462, 0x0007002c, 0x00000020, 0x0000007b, 0x00000077, 0x00000078, + 0x00000079, 0x0000007a, 0x0007002c, 0x00000021, 0x0000007c, 0x0000006c, 0x00000071, 0x00000076, + 0x0000007b, 0x0005002c, 0x0000000c, 0x0000007d, 0x00000019, 0x00000038, 0x0004002b, 0x0000001f, + 0x0000007e, 0x00002ef0, 0x0004002b, 0x0000001f, 0x0000007f, 0x00002f76, 0x0004002b, 0x0000001f, + 0x00000080, 0x00003747, 0x0004002b, 0x0000001f, 0x00000081, 0x00003773, 0x0007002c, 0x00000020, + 0x00000082, 0x0000007e, 0x0000007f, 0x00000080, 0x00000081, 0x0004002b, 0x0000001f, 0x00000083, + 0x00003483, 0x0004002b, 0x0000001f, 0x00000084, 0x000034c7, 0x0004002b, 0x0000001f, 0x00000085, + 0x000035a9, 0x0004002b, 0x0000001f, 0x00000086, 0x00003565, 0x0007002c, 0x00000020, 0x00000087, + 0x00000083, 0x00000084, 0x00000085, 0x00000086, 0x0004002b, 0x0000001f, 0x00000088, 0x00003140, + 0x0004002b, 0x0000001f, 0x00000089, 0x00002fa9, 0x0004002b, 0x0000001f, 0x0000008a, 0x00003532, + 0x0004002b, 0x0000001f, 0x0000008b, 0x00003543, 0x0007002c, 0x00000020, 0x0000008c, 0x00000088, + 0x00000089, 0x0000008a, 0x0000008b, 0x0004002b, 0x0000001f, 0x0000008d, 0x000035de, 0x0004002b, + 0x0000001f, 0x0000008e, 0x00003542, 0x0004002b, 0x0000001f, 0x0000008f, 0x0000361b, 0x0004002b, + 0x0000001f, 0x00000090, 0x000035d8, 0x0007002c, 0x00000020, 0x00000091, 0x0000008d, 0x0000008e, + 0x0000008f, 0x00000090, 0x0007002c, 0x00000021, 0x00000092, 0x00000082, 0x00000087, 0x0000008c, + 0x00000091, 0x0004002b, 0x0000001f, 0x00000093, 0x00002c64, 0x0004002b, 0x0000001f, 0x00000094, + 0x00002d03, 0x0004002b, 0x0000001f, 0x00000095, 0x00003479, 0x0004002b, 0x0000001f, 0x00000096, + 0x00003461, 0x0007002c, 0x00000020, 0x00000097, 0x00000093, 0x00000094, 0x00000095, 0x00000096, + 0x0004002b, 0x0000001f, 0x00000098, 0x00003447, 0x0004002b, 0x0000001f, 0x00000099, 0x000033e3, + 0x0004002b, 0x0000001f, 0x0000009a, 0x00003441, 0x0004002b, 0x0000001f, 0x0000009b, 0x00003437, + 0x0007002c, 0x00000020, 0x0000009c, 0x00000098, 0x00000099, 0x0000009a, 0x0000009b, 0x0004002b, + 0x0000001f, 0x0000009d, 0x00002b06, 0x0004002b, 0x0000001f, 0x0000009e, 0x0000285a, 0x0004002b, + 0x0000001f, 0x0000009f, 0x0000328a, 0x0004002b, 0x0000001f, 0x000000a0, 0x00003296, 0x0007002c, + 0x00000020, 0x000000a1, 0x0000009d, 0x0000009e, 0x0000009f, 0x000000a0, 0x0004002b, 0x0000001f, + 0x000000a2, 0x00003105, 0x0004002b, 0x0000001f, 0x000000a3, 0x0000306f, 0x0004002b, 0x0000001f, + 0x000000a4, 0x0000335a, 0x0004002b, 0x0000001f, 0x000000a5, 0x0000339c, 0x0007002c, 0x00000020, + 0x000000a6, 0x000000a2, 0x000000a3, 0x000000a4, 0x000000a5, 0x0007002c, 0x00000021, 0x000000a7, + 0x00000097, 0x0000009c, 0x000000a1, 0x000000a6, 0x0005002c, 0x0000000c, 0x000000a8, 0x00000019, + 0x00000066, 0x0004002b, 0x0000001f, 0x000000a9, 0x00008c5c, 0x0004002b, 0x0000001f, 0x000000aa, + 0x00001c08, 0x0004002b, 0x0000001f, 0x000000ab, 0x00002841, 0x0004002b, 0x0000001f, 0x000000ac, + 0x000027fc, 0x0007002c, 0x00000020, 0x000000ad, 0x000000a9, 0x000000aa, 0x000000ab, 0x000000ac, + 0x0004002b, 0x0000001f, 0x000000ae, 0x0000300e, 0x0004002b, 0x0000001f, 0x000000af, 0x00002f71, + 0x0004002b, 0x0000001f, 0x000000b0, 0x000031e0, 0x0004002b, 0x0000001f, 0x000000b1, 0x0000318c, + 0x0007002c, 0x00000020, 0x000000b2, 0x000000ae, 0x000000af, 0x000000b0, 0x000000b1, 0x0004002b, + 0x0000001f, 0x000000b3, 0x0000a3a4, 0x0004002b, 0x0000001f, 0x000000b4, 0x0000a0b8, 0x0004002b, + 0x0000001f, 0x000000b5, 0x00002f0b, 0x0004002b, 0x0000001f, 0x000000b6, 0x00002f8b, 0x0007002c, + 0x00000020, 0x000000b7, 0x000000b3, 0x000000b4, 0x000000b5, 0x000000b6, 0x0004002b, 0x0000001f, + 0x000000b8, 0x0000b325, 0x0004002b, 0x0000001f, 0x000000b9, 0x0000b268, 0x0004002b, 0x0000001f, + 0x000000ba, 0x00003213, 0x0004002b, 0x0000001f, 0x000000bb, 0x000031cb, 0x0007002c, 0x00000020, + 0x000000bc, 0x000000b8, 0x000000b9, 0x000000ba, 0x000000bb, 0x0007002c, 0x00000021, 0x000000bd, + 0x000000ad, 0x000000b2, 0x000000b7, 0x000000bc, 0x0005002c, 0x0000000c, 0x000000be, 0x00000066, + 0x00000038, 0x0004002b, 0x0000001f, 0x000000bf, 0x00002ac2, 0x0004002b, 0x0000001f, 0x000000c0, + 0x00002c35, 0x0004002b, 0x0000001f, 0x000000c1, 0x0000344e, 0x0004002b, 0x0000001f, 0x000000c2, + 0x00003427, 0x0007002c, 0x00000020, 0x000000c3, 0x000000bf, 0x000000c0, 0x000000c1, 0x000000c2, + 0x0004002b, 0x0000001f, 0x000000c4, 0x0000334b, 0x0004002b, 0x0000001f, 0x000000c5, 0x00003393, + 0x0004002b, 0x0000001f, 0x000000c6, 0x0000340e, 0x0007002c, 0x00000020, 0x000000c7, 0x000000c4, + 0x000000c5, 0x000000c6, 0x00000098, 0x0004002b, 0x0000001f, 0x000000c8, 0x00002e54, 0x0004002b, + 0x0000001f, 0x000000c9, 0x00002ee1, 0x0004002b, 0x0000001f, 0x000000ca, 0x000032e7, 0x0004002b, + 0x0000001f, 0x000000cb, 0x000032d9, 0x0007002c, 0x00000020, 0x000000cc, 0x000000c8, 0x000000c9, + 0x000000ca, 0x000000cb, 0x0004002b, 0x0000001f, 0x000000cd, 0x00002cb8, 0x0004002b, 0x0000001f, + 0x000000ce, 0x00002cd3, 0x0004002b, 0x0000001f, 0x000000cf, 0x0000345c, 0x0004002b, 0x0000001f, + 0x000000d0, 0x0000347c, 0x0007002c, 0x00000020, 0x000000d1, 0x000000cd, 0x000000ce, 0x000000cf, + 0x000000d0, 0x0007002c, 0x00000021, 0x000000d2, 0x000000c3, 0x000000c7, 0x000000cc, 0x000000d1, + 0x0005002c, 0x0000000c, 0x000000d3, 0x00000066, 0x00000019, 0x0004002b, 0x0000001f, 0x000000d4, + 0x0000aab3, 0x0004002b, 0x0000001f, 0x000000d5, 0x0000aa47, 0x0004002b, 0x0000001f, 0x000000d6, + 0x00002d72, 0x0004002b, 0x0000001f, 0x000000d7, 0x00002ddf, 0x0007002c, 0x00000020, 0x000000d8, + 0x000000d4, 0x000000d5, 0x000000d6, 0x000000d7, 0x0004002b, 0x0000001f, 0x000000d9, 0x0000319f, + 0x0004002b, 0x0000001f, 0x000000da, 0x0000317b, 0x0004002b, 0x0000001f, 0x000000db, 0x00003167, + 0x0004002b, 0x0000001f, 0x000000dc, 0x00003159, 0x0007002c, 0x00000020, 0x000000dd, 0x000000d9, + 0x000000da, 0x000000db, 0x000000dc, 0x0004002b, 0x0000001f, 0x000000de, 0x0000ab52, 0x0004002b, + 0x0000001f, 0x000000df, 0x0000a9f9, 0x0004002b, 0x0000001f, 0x000000e0, 0x00002fd9, 0x0004002b, + 0x0000001f, 0x000000e1, 0x0000301d, 0x0007002c, 0x00000020, 0x000000e2, 0x000000de, 0x000000df, + 0x000000e0, 0x000000e1, 0x0004002b, 0x0000001f, 0x000000e3, 0x0000b15b, 0x0004002b, 0x0000001f, + 0x000000e4, 0x0000b0c9, 0x0004002b, 0x0000001f, 0x000000e5, 0x000030a3, 0x0004002b, 0x0000001f, + 0x000000e6, 0x0000305d, 0x0007002c, 0x00000020, 0x000000e7, 0x000000e3, 0x000000e4, 0x000000e5, + 0x000000e6, 0x0007002c, 0x00000021, 0x000000e8, 0x000000d8, 0x000000dd, 0x000000e2, 0x000000e7, + 0x0005002c, 0x0000000c, 0x000000e9, 0x00000066, 0x00000066, 0x0004002b, 0x0000001f, 0x000000ea, + 0x0000333c, 0x0004002b, 0x0000001f, 0x000000eb, 0x000033a9, 0x0004002b, 0x0000001f, 0x000000ec, + 0x0000b4a5, 0x0004002b, 0x0000001f, 0x000000ed, 0x0000b477, 0x0007002c, 0x00000020, 0x000000ee, + 0x000000ea, 0x000000eb, 0x000000ec, 0x000000ed, 0x0004002b, 0x0000001f, 0x000000ef, 0x0000b48a, + 0x0004002b, 0x0000001f, 0x000000f0, 0x0000b634, 0x0004002b, 0x0000001f, 0x000000f1, 0x000030db, + 0x0004002b, 0x0000001f, 0x000000f2, 0x000030b8, 0x0007002c, 0x00000020, 0x000000f3, 0x000000ef, + 0x000000f0, 0x000000f1, 0x000000f2, 0x0004002b, 0x0000001f, 0x000000f4, 0x00002ca7, 0x0004002b, + 0x0000001f, 0x000000f5, 0x00002d55, 0x0004002b, 0x0000001f, 0x000000f6, 0x000033f8, 0x0004002b, + 0x0000001f, 0x000000f7, 0x00003414, 0x0007002c, 0x00000020, 0x000000f8, 0x000000f4, 0x000000f5, + 0x000000f6, 0x000000f7, 0x0004002b, 0x0000001f, 0x000000f9, 0x00003401, 0x0004002b, 0x0000001f, + 0x000000fa, 0x00003443, 0x0004002b, 0x0000001f, 0x000000fb, 0x0000aeb8, 0x0004002b, 0x0000001f, + 0x000000fc, 0x0000b03b, 0x0007002c, 0x00000020, 0x000000fd, 0x000000f9, 0x000000fa, 0x000000fb, + 0x000000fc, 0x0007002c, 0x00000021, 0x000000fe, 0x000000ee, 0x000000f3, 0x000000f8, 0x000000fd, + 0x0004003b, 0x00000018, 0x00000006, 0x00000000, 0x0004002b, 0x0000001f, 0x000000ff, 0x000034d6, + 0x0004002b, 0x0000001f, 0x00000100, 0x0000354d, 0x0004002b, 0x0000001f, 0x00000101, 0x0000b0de, + 0x0004002b, 0x0000001f, 0x00000102, 0x0000b0af, 0x0007002c, 0x00000020, 0x00000103, 0x000000ff, + 0x00000100, 0x00000101, 0x00000102, 0x0004002b, 0x0000001f, 0x00000104, 0x0000b8cc, 0x0004002b, + 0x0000001f, 0x00000105, 0x0000b83e, 0x0004002b, 0x0000001f, 0x00000106, 0x0000334e, 0x0004002b, + 0x0000001f, 0x00000107, 0x00003377, 0x0007002c, 0x00000020, 0x00000108, 0x00000104, 0x00000105, + 0x00000106, 0x00000107, 0x0004002b, 0x0000001f, 0x00000109, 0x0000344c, 0x0004002b, 0x0000001f, + 0x0000010a, 0x000034f3, 0x0004002b, 0x0000001f, 0x0000010b, 0x00003508, 0x0007002c, 0x00000020, + 0x0000010c, 0x00000042, 0x00000109, 0x0000010a, 0x0000010b, 0x0004002b, 0x0000001f, 0x0000010d, + 0x000035f4, 0x0004002b, 0x0000001f, 0x0000010e, 0x00003597, 0x0004002b, 0x0000001f, 0x0000010f, + 0x00002e95, 0x0004002b, 0x0000001f, 0x00000110, 0x00002f23, 0x0007002c, 0x00000020, 0x00000111, + 0x0000010d, 0x0000010e, 0x0000010f, 0x00000110, 0x0007002c, 0x00000021, 0x00000112, 0x00000103, + 0x00000108, 0x0000010c, 0x00000111, 0x0004002b, 0x0000001f, 0x00000113, 0x000033b3, 0x0004002b, + 0x0000001f, 0x00000114, 0x00003314, 0x0004002b, 0x0000001f, 0x00000115, 0x0000b4ba, 0x0004002b, + 0x0000001f, 0x00000116, 0x0000b4d3, 0x0007002c, 0x00000020, 0x00000117, 0x00000113, 0x00000114, + 0x00000115, 0x00000116, 0x0004002b, 0x0000001f, 0x00000118, 0x0000b54a, 0x0004002b, 0x0000001f, + 0x00000119, 0x0000b64c, 0x0004002b, 0x0000001f, 0x0000011a, 0x000030a8, 0x0004002b, 0x0000001f, + 0x0000011b, 0x000030c3, 0x0007002c, 0x00000020, 0x0000011c, 0x00000118, 0x00000119, 0x0000011a, + 0x0000011b, 0x0004002b, 0x0000001f, 0x0000011d, 0x00002edf, 0x0004002b, 0x0000001f, 0x0000011e, + 0x00002ec9, 0x0004002b, 0x0000001f, 0x0000011f, 0x0000331d, 0x0004002b, 0x0000001f, 0x00000120, + 0x000032fc, 0x0007002c, 0x00000020, 0x00000121, 0x0000011d, 0x0000011e, 0x0000011f, 0x00000120, + 0x0004002b, 0x0000001f, 0x00000122, 0x00003425, 0x0004002b, 0x0000001f, 0x00000123, 0x00003450, + 0x0004002b, 0x0000001f, 0x00000124, 0x0000b0f4, 0x0004002b, 0x0000001f, 0x00000125, 0x0000b0ad, + 0x0007002c, 0x00000020, 0x00000126, 0x00000122, 0x00000123, 0x00000124, 0x00000125, 0x0007002c, + 0x00000021, 0x00000127, 0x00000117, 0x0000011c, 0x00000121, 0x00000126, 0x0004002b, 0x0000001f, + 0x00000128, 0x00003517, 0x0004002b, 0x0000001f, 0x00000129, 0x000034d2, 0x0004002b, 0x0000001f, + 0x0000012a, 0x0000b0da, 0x0004002b, 0x0000001f, 0x0000012b, 0x0000b123, 0x0007002c, 0x00000020, + 0x0000012c, 0x00000128, 0x00000129, 0x0000012a, 0x0000012b, 0x0004002b, 0x0000001f, 0x0000012d, + 0x0000b6d7, 0x0004002b, 0x0000001f, 0x0000012e, 0x0000b72a, 0x0004002b, 0x0000001f, 0x0000012f, + 0x000033b8, 0x0004002b, 0x0000001f, 0x00000130, 0x0000337c, 0x0007002c, 0x00000020, 0x00000131, + 0x0000012d, 0x0000012e, 0x0000012f, 0x00000130, 0x0004002b, 0x0000001f, 0x00000132, 0x00003237, + 0x0004002b, 0x0000001f, 0x00000133, 0x00003203, 0x0004002b, 0x0000001f, 0x00000134, 0x00003501, + 0x0004002b, 0x0000001f, 0x00000135, 0x000034e7, 0x0007002c, 0x00000020, 0x00000136, 0x00000132, + 0x00000133, 0x00000134, 0x00000135, 0x0004002b, 0x0000001f, 0x00000137, 0x0000360f, 0x0004002b, + 0x0000001f, 0x00000138, 0x0000359f, 0x0004002b, 0x0000001f, 0x00000139, 0x00002b9f, 0x0004002b, + 0x0000001f, 0x0000013a, 0x00002c06, 0x0007002c, 0x00000020, 0x0000013b, 0x00000137, 0x00000138, + 0x00000139, 0x0000013a, 0x0007002c, 0x00000021, 0x0000013c, 0x0000012c, 0x00000131, 0x00000136, + 0x0000013b, 0x0004002b, 0x0000001f, 0x0000013d, 0x0000366b, 0x0004002b, 0x0000001f, 0x0000013e, + 0x00003678, 0x0004002b, 0x0000001f, 0x0000013f, 0x00009b7c, 0x0004002b, 0x0000001f, 0x00000140, + 0x0000a57b, 0x0007002c, 0x00000020, 0x00000141, 0x0000013d, 0x0000013e, 0x0000013f, 0x00000140, + 0x0004002b, 0x0000001f, 0x00000142, 0x0000ba51, 0x0004002b, 0x0000001f, 0x00000143, 0x0000b954, + 0x0004002b, 0x0000001f, 0x00000144, 0x000034c5, 0x0004002b, 0x0000001f, 0x00000145, 0x0000349f, + 0x0007002c, 0x00000020, 0x00000146, 0x00000142, 0x00000143, 0x00000144, 0x00000145, 0x0004002b, + 0x0000001f, 0x00000147, 0x000035a4, 0x0004002b, 0x0000001f, 0x00000148, 0x0000357d, 0x0004002b, + 0x0000001f, 0x00000149, 0x000035f3, 0x0004002b, 0x0000001f, 0x0000014a, 0x0000358a, 0x0007002c, + 0x00000020, 0x0000014b, 0x00000147, 0x00000148, 0x00000149, 0x0000014a, 0x0004002b, 0x0000001f, + 0x0000014c, 0x00003741, 0x0004002b, 0x0000001f, 0x0000014d, 0x0000373b, 0x0004002b, 0x0000001f, + 0x0000014e, 0x00003551, 0x0007002c, 0x00000020, 0x0000014f, 0x0000014c, 0x0000014d, 0x0000008b, + 0x0000014e, 0x0007002c, 0x00000021, 0x00000150, 0x00000141, 0x00000146, 0x0000014b, 0x0000014f, + 0x0004002b, 0x0000001f, 0x00000151, 0x00003506, 0x0004002b, 0x0000001f, 0x00000152, 0x00003522, + 0x0004002b, 0x0000001f, 0x00000153, 0x0000b0d8, 0x0004002b, 0x0000001f, 0x00000154, 0x0000b0ee, + 0x0007002c, 0x00000020, 0x00000155, 0x00000151, 0x00000152, 0x00000153, 0x00000154, 0x0004002b, + 0x0000001f, 0x00000156, 0x0000b76a, 0x0004002b, 0x0000001f, 0x00000157, 0x0000b736, 0x0004002b, + 0x0000001f, 0x00000158, 0x00003334, 0x0004002b, 0x0000001f, 0x00000159, 0x00003329, 0x0007002c, + 0x00000020, 0x0000015a, 0x00000156, 0x00000157, 0x00000158, 0x00000159, 0x0004002b, 0x0000001f, + 0x0000015b, 0x00003357, 0x0004002b, 0x0000001f, 0x0000015c, 0x000032e4, 0x0004002b, 0x0000001f, + 0x0000015d, 0x0000347f, 0x0004002b, 0x0000001f, 0x0000015e, 0x0000349a, 0x0007002c, 0x00000020, + 0x0000015f, 0x0000015b, 0x0000015c, 0x0000015d, 0x0000015e, 0x0004002b, 0x0000001f, 0x00000160, + 0x000035c7, 0x0004002b, 0x0000001f, 0x00000161, 0x0000351e, 0x0004002b, 0x0000001f, 0x00000162, + 0x00002a4e, 0x0004002b, 0x0000001f, 0x00000163, 0x00002aee, 0x0007002c, 0x00000020, 0x00000164, + 0x00000160, 0x00000161, 0x00000162, 0x00000163, 0x0007002c, 0x00000021, 0x00000165, 0x00000155, + 0x0000015a, 0x0000015f, 0x00000164, 0x0004002b, 0x0000001f, 0x00000166, 0x000032f7, 0x0004002b, + 0x0000001f, 0x00000167, 0x0000333f, 0x0004002b, 0x0000001f, 0x00000168, 0x0000b454, 0x0004002b, + 0x0000001f, 0x00000169, 0x0000b432, 0x0007002c, 0x00000020, 0x0000016a, 0x00000166, 0x00000167, + 0x00000168, 0x00000169, 0x0004002b, 0x0000001f, 0x0000016b, 0x0000b5ec, 0x0004002b, 0x0000001f, + 0x0000016c, 0x0000b779, 0x0004002b, 0x0000001f, 0x0000016d, 0x0000311a, 0x0004002b, 0x0000001f, + 0x0000016e, 0x00003166, 0x0007002c, 0x00000020, 0x0000016f, 0x0000016b, 0x0000016c, 0x0000016d, + 0x0000016e, 0x0004002b, 0x0000001f, 0x00000170, 0x00002bfe, 0x0004002b, 0x0000001f, 0x00000171, + 0x00002cbf, 0x0004002b, 0x0000001f, 0x00000172, 0x0000340f, 0x0004002b, 0x0000001f, 0x00000173, + 0x00003426, 0x0007002c, 0x00000020, 0x00000174, 0x00000170, 0x00000171, 0x00000172, 0x00000173, + 0x0004002b, 0x0000001f, 0x00000175, 0x0000347d, 0x0004002b, 0x0000001f, 0x00000176, 0x00003454, + 0x0004002b, 0x0000001f, 0x00000177, 0x0000b2c2, 0x0004002b, 0x0000001f, 0x00000178, 0x0000b329, + 0x0007002c, 0x00000020, 0x00000179, 0x00000175, 0x00000176, 0x00000177, 0x00000178, 0x0007002c, + 0x00000021, 0x0000017a, 0x0000016a, 0x0000016f, 0x00000174, 0x00000179, 0x0004002b, 0x0000001f, + 0x0000017b, 0x0000348c, 0x0004002b, 0x0000001f, 0x0000017c, 0x0000b024, 0x0004002b, 0x0000001f, + 0x0000017d, 0x0000b048, 0x0007002c, 0x00000020, 0x0000017e, 0x00000144, 0x0000017b, 0x0000017c, + 0x0000017d, 0x0004002b, 0x0000001f, 0x0000017f, 0x0000b987, 0x0004002b, 0x0000001f, 0x00000180, + 0x0000b8f3, 0x0004002b, 0x0000001f, 0x00000181, 0x0000339e, 0x0004002b, 0x0000001f, 0x00000182, + 0x0000336a, 0x0007002c, 0x00000020, 0x00000183, 0x0000017f, 0x00000180, 0x00000181, 0x00000182, + 0x0004002b, 0x0000001f, 0x00000184, 0x00003430, 0x0004002b, 0x0000001f, 0x00000185, 0x00003460, + 0x0004002b, 0x0000001f, 0x00000186, 0x000034b2, 0x0004002b, 0x0000001f, 0x00000187, 0x000034b1, + 0x0007002c, 0x00000020, 0x00000188, 0x00000184, 0x00000185, 0x00000186, 0x00000187, 0x0004002b, + 0x0000001f, 0x00000189, 0x0000358f, 0x0004002b, 0x0000001f, 0x0000018a, 0x00003591, 0x0004002b, + 0x0000001f, 0x0000018b, 0x00002961, 0x0004002b, 0x0000001f, 0x0000018c, 0x00002a36, 0x0007002c, + 0x00000020, 0x0000018d, 0x00000189, 0x0000018a, 0x0000018b, 0x0000018c, 0x0007002c, 0x00000021, + 0x0000018e, 0x0000017e, 0x00000183, 0x00000188, 0x0000018d, 0x0004002b, 0x0000001f, 0x0000018f, + 0x00003264, 0x0004002b, 0x0000001f, 0x00000190, 0x0000b340, 0x0004002b, 0x0000001f, 0x00000191, + 0x0000b374, 0x0007002c, 0x00000020, 0x00000192, 0x00000041, 0x0000018f, 0x00000190, 0x00000191, + 0x0004002b, 0x0000001f, 0x00000193, 0x0000b5d3, 0x0004002b, 0x0000001f, 0x00000194, 0x0000b6c4, + 0x0004002b, 0x0000001f, 0x00000195, 0x0000310a, 0x0004002b, 0x0000001f, 0x00000196, 0x00003109, + 0x0007002c, 0x00000020, 0x00000197, 0x00000193, 0x00000194, 0x00000195, 0x00000196, 0x0004002b, + 0x0000001f, 0x00000198, 0x00002f05, 0x0004002b, 0x0000001f, 0x00000199, 0x00002e14, 0x0004002b, + 0x0000001f, 0x0000019a, 0x00003355, 0x0007002c, 0x00000020, 0x0000019b, 0x00000198, 0x00000199, + 0x00000130, 0x0000019a, 0x0004002b, 0x0000001f, 0x0000019c, 0x00003440, 0x0004002b, 0x0000001f, + 0x0000019d, 0x00003421, 0x0004002b, 0x0000001f, 0x0000019e, 0x0000b20c, 0x0007002c, 0x00000020, + 0x0000019f, 0x0000019c, 0x0000019d, 0x0000019e, 0x0000019e, 0x0007002c, 0x00000021, 0x000001a0, + 0x00000192, 0x00000197, 0x0000019b, 0x0000019f, 0x0004002b, 0x0000001f, 0x000001a1, 0x000035cb, + 0x0004002b, 0x0000001f, 0x000001a2, 0x00003733, 0x0004002b, 0x0000001f, 0x000001a3, 0x0000316e, + 0x0004002b, 0x0000001f, 0x000001a4, 0x000032f6, 0x0007002c, 0x00000020, 0x000001a5, 0x000001a1, + 0x000001a2, 0x000001a3, 0x000001a4, 0x0004003b, 0x00000018, 0x00000007, 0x00000000, 0x0004002b, + 0x0000000d, 0x000001a6, 0x00000010, 0x0006002c, 0x0000000e, 0x00000008, 0x000001a6, 0x000001a6, + 0x0000001e, 0x00050036, 0x00000009, 0x00000002, 0x00000000, 0x0000000a, 0x000200f8, 0x000001a7, + 0x000300f7, 0x000001a8, 0x00000000, 0x000300fb, 0x0000001d, 0x000001a9, 0x000200f8, 0x000001a9, + 0x0004003d, 0x0000000e, 0x000001aa, 0x00000003, 0x0007004f, 0x00000010, 0x000001ab, 0x000001aa, + 0x000001aa, 0x00000000, 0x00000001, 0x0004007c, 0x0000000c, 0x000001ac, 0x000001ab, 0x0004003d, + 0x00000012, 0x000001ad, 0x00000004, 0x00040068, 0x0000000c, 0x000001ae, 0x000001ad, 0x000500af, + 0x00000015, 0x000001af, 0x000001ac, 0x000001ae, 0x0004009a, 0x00000014, 0x000001b0, 0x000001af, + 0x000300f7, 0x000001b1, 0x00000000, 0x000400fa, 0x000001b0, 0x000001b2, 0x000001b1, 0x000200f8, + 0x000001b2, 0x000200f9, 0x000001a8, 0x000200f8, 0x000001b1, 0x0004003d, 0x00000017, 0x000001b3, + 0x00000005, 0x00040064, 0x00000016, 0x000001b4, 0x000001b3, 0x00050067, 0x0000000c, 0x000001b5, + 0x000001b4, 0x00000019, 0x0004007c, 0x00000010, 0x000001b6, 0x000001b5, 0x00040070, 0x0000001a, + 0x000001b7, 0x000001ab, 0x00050081, 0x0000001a, 0x000001b8, 0x000001b7, 0x0000001c, 0x00050051, + 0x0000000d, 0x000001b9, 0x000001b6, 0x00000000, 0x00040070, 0x00000011, 0x000001ba, 0x000001b9, + 0x00050051, 0x0000000d, 0x000001bb, 0x000001b6, 0x00000001, 0x00040070, 0x00000011, 0x000001bc, + 0x000001bb, 0x00050050, 0x0000001a, 0x000001bd, 0x000001ba, 0x000001bc, 0x00050088, 0x0000001a, + 0x000001be, 0x000001b8, 0x000001bd, 0x0004003d, 0x00000017, 0x000001bf, 0x00000005, 0x00080058, + 0x0000003a, 0x000001c0, 0x000001bf, 0x000001be, 0x0000000a, 0x00000037, 0x00000039, 0x00040073, + 0x00000020, 0x000001c1, 0x000001c0, 0x00050091, 0x00000020, 0x000001c2, 0x00000036, 0x000001c1, + 0x0004003d, 0x00000017, 0x000001c3, 0x00000005, 0x00080058, 0x0000003a, 0x000001c4, 0x000001c3, + 0x000001be, 0x0000000a, 0x00000037, 0x00000050, 0x00040073, 0x00000020, 0x000001c5, 0x000001c4, + 0x00050091, 0x00000020, 0x000001c6, 0x0000004f, 0x000001c5, 0x00050081, 0x00000020, 0x000001c7, + 0x000001c2, 0x000001c6, 0x0004003d, 0x00000017, 0x000001c8, 0x00000005, 0x00080058, 0x0000003a, + 0x000001c9, 0x000001c8, 0x000001be, 0x0000000a, 0x00000037, 0x00000067, 0x00040073, 0x00000020, + 0x000001ca, 0x000001c9, 0x00050091, 0x00000020, 0x000001cb, 0x00000065, 0x000001ca, 0x00050081, + 0x00000020, 0x000001cc, 0x000001c7, 0x000001cb, 0x0004003d, 0x00000017, 0x000001cd, 0x00000005, + 0x00080058, 0x0000003a, 0x000001ce, 0x000001cd, 0x000001be, 0x0000000a, 0x00000037, 0x0000007d, + 0x00040073, 0x00000020, 0x000001cf, 0x000001ce, 0x00050091, 0x00000020, 0x000001d0, 0x0000007c, + 0x000001cf, 0x00050081, 0x00000020, 0x000001d1, 0x000001cc, 0x000001d0, 0x0004003d, 0x00000017, + 0x000001d2, 0x00000005, 0x00070058, 0x0000003a, 0x000001d3, 0x000001d2, 0x000001be, 0x00000002, + 0x00000037, 0x00040073, 0x00000020, 0x000001d4, 0x000001d3, 0x00050091, 0x00000020, 0x000001d5, + 0x00000092, 0x000001d4, 0x00050081, 0x00000020, 0x000001d6, 0x000001d1, 0x000001d5, 0x0004003d, + 0x00000017, 0x000001d7, 0x00000005, 0x00080058, 0x0000003a, 0x000001d8, 0x000001d7, 0x000001be, + 0x0000000a, 0x00000037, 0x000000a8, 0x00040073, 0x00000020, 0x000001d9, 0x000001d8, 0x00050091, + 0x00000020, 0x000001da, 0x000000a7, 0x000001d9, 0x00050081, 0x00000020, 0x000001db, 0x000001d6, + 0x000001da, 0x0004003d, 0x00000017, 0x000001dc, 0x00000005, 0x00080058, 0x0000003a, 0x000001dd, + 0x000001dc, 0x000001be, 0x0000000a, 0x00000037, 0x000000be, 0x00040073, 0x00000020, 0x000001de, + 0x000001dd, 0x00050091, 0x00000020, 0x000001df, 0x000000bd, 0x000001de, 0x00050081, 0x00000020, + 0x000001e0, 0x000001db, 0x000001df, 0x0004003d, 0x00000017, 0x000001e1, 0x00000005, 0x00080058, + 0x0000003a, 0x000001e2, 0x000001e1, 0x000001be, 0x0000000a, 0x00000037, 0x000000d3, 0x00040073, + 0x00000020, 0x000001e3, 0x000001e2, 0x00050091, 0x00000020, 0x000001e4, 0x000000d2, 0x000001e3, + 0x00050081, 0x00000020, 0x000001e5, 0x000001e0, 0x000001e4, 0x0004003d, 0x00000017, 0x000001e6, + 0x00000005, 0x00080058, 0x0000003a, 0x000001e7, 0x000001e6, 0x000001be, 0x0000000a, 0x00000037, + 0x000000e9, 0x00040073, 0x00000020, 0x000001e8, 0x000001e7, 0x00050091, 0x00000020, 0x000001e9, + 0x000000e8, 0x000001e8, 0x00050081, 0x00000020, 0x000001ea, 0x000001e5, 0x000001e9, 0x0004003d, + 0x00000017, 0x000001eb, 0x00000006, 0x00080058, 0x0000003a, 0x000001ec, 0x000001eb, 0x000001be, + 0x0000000a, 0x00000037, 0x00000039, 0x00040073, 0x00000020, 0x000001ed, 0x000001ec, 0x00050091, + 0x00000020, 0x000001ee, 0x000000fe, 0x000001ed, 0x00050081, 0x00000020, 0x000001ef, 0x000001ea, + 0x000001ee, 0x0004003d, 0x00000017, 0x000001f0, 0x00000006, 0x00080058, 0x0000003a, 0x000001f1, + 0x000001f0, 0x000001be, 0x0000000a, 0x00000037, 0x00000050, 0x00040073, 0x00000020, 0x000001f2, + 0x000001f1, 0x00050091, 0x00000020, 0x000001f3, 0x00000112, 0x000001f2, 0x00050081, 0x00000020, + 0x000001f4, 0x000001ef, 0x000001f3, 0x0004003d, 0x00000017, 0x000001f5, 0x00000006, 0x00080058, + 0x0000003a, 0x000001f6, 0x000001f5, 0x000001be, 0x0000000a, 0x00000037, 0x00000067, 0x00040073, + 0x00000020, 0x000001f7, 0x000001f6, 0x00050091, 0x00000020, 0x000001f8, 0x00000127, 0x000001f7, + 0x00050081, 0x00000020, 0x000001f9, 0x000001f4, 0x000001f8, 0x0004003d, 0x00000017, 0x000001fa, + 0x00000006, 0x00080058, 0x0000003a, 0x000001fb, 0x000001fa, 0x000001be, 0x0000000a, 0x00000037, + 0x0000007d, 0x00040073, 0x00000020, 0x000001fc, 0x000001fb, 0x00050091, 0x00000020, 0x000001fd, + 0x0000013c, 0x000001fc, 0x00050081, 0x00000020, 0x000001fe, 0x000001f9, 0x000001fd, 0x0004003d, + 0x00000017, 0x000001ff, 0x00000006, 0x00070058, 0x0000003a, 0x00000200, 0x000001ff, 0x000001be, + 0x00000002, 0x00000037, 0x00040073, 0x00000020, 0x00000201, 0x00000200, 0x00050091, 0x00000020, + 0x00000202, 0x00000150, 0x00000201, 0x00050081, 0x00000020, 0x00000203, 0x000001fe, 0x00000202, + 0x0004003d, 0x00000017, 0x00000204, 0x00000006, 0x00080058, 0x0000003a, 0x00000205, 0x00000204, + 0x000001be, 0x0000000a, 0x00000037, 0x000000a8, 0x00040073, 0x00000020, 0x00000206, 0x00000205, + 0x00050091, 0x00000020, 0x00000207, 0x00000165, 0x00000206, 0x00050081, 0x00000020, 0x00000208, + 0x00000203, 0x00000207, 0x0004003d, 0x00000017, 0x00000209, 0x00000006, 0x00080058, 0x0000003a, + 0x0000020a, 0x00000209, 0x000001be, 0x0000000a, 0x00000037, 0x000000be, 0x00040073, 0x00000020, + 0x0000020b, 0x0000020a, 0x00050091, 0x00000020, 0x0000020c, 0x0000017a, 0x0000020b, 0x00050081, + 0x00000020, 0x0000020d, 0x00000208, 0x0000020c, 0x0004003d, 0x00000017, 0x0000020e, 0x00000006, + 0x00080058, 0x0000003a, 0x0000020f, 0x0000020e, 0x000001be, 0x0000000a, 0x00000037, 0x000000d3, + 0x00040073, 0x00000020, 0x00000210, 0x0000020f, 0x00050091, 0x00000020, 0x00000211, 0x0000018e, + 0x00000210, 0x00050081, 0x00000020, 0x00000212, 0x0000020d, 0x00000211, 0x0004003d, 0x00000017, + 0x00000213, 0x00000006, 0x00080058, 0x0000003a, 0x00000214, 0x00000213, 0x000001be, 0x0000000a, + 0x00000037, 0x000000e9, 0x00040073, 0x00000020, 0x00000215, 0x00000214, 0x00050091, 0x00000020, + 0x00000216, 0x000001a0, 0x00000215, 0x00050081, 0x00000020, 0x00000217, 0x00000212, 0x00000216, + 0x00050081, 0x00000020, 0x00000218, 0x00000217, 0x000001a5, 0x0004003d, 0x00000017, 0x00000219, + 0x00000007, 0x00070058, 0x0000003a, 0x0000021a, 0x00000219, 0x000001be, 0x00000002, 0x00000037, + 0x00040073, 0x00000020, 0x0000021b, 0x0000021a, 0x00050081, 0x00000020, 0x0000021c, 0x00000218, + 0x0000021b, 0x0004003d, 0x00000012, 0x0000021d, 0x00000004, 0x00040073, 0x0000003a, 0x0000021e, + 0x0000021c, 0x00040063, 0x0000021d, 0x000001ac, 0x0000021e, 0x000200f9, 0x000001a8, 0x000200f8, + 0x000001a8, 0x000100fd, 0x00010038, +}; +static const size_t wnfg_53_spv_size = sizeof(wnfg_53_spv); diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 7d37fb3e2..430aa7d45 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -61,6 +61,7 @@ public class VulkanRenderer private volatile int fgMultiplier = 2; // target display:engine ratio (2, 3, 4) — the user ceiling private volatile int fgEffectiveMultiplier = 2; // adaptive working multiplier (2..ceiling) private volatile int fgBoundSecs = 0; + private volatile int fgRecoverSecs = 0; // consecutive healthy seconds -> step the multiplier back up private final AtomicBoolean fgNewScene = new AtomicBoolean(false); private final AtomicBoolean fgSceneDirty = new AtomicBoolean(false); private final AtomicBoolean fgPumpScheduled = new AtomicBoolean(false); @@ -435,9 +436,16 @@ private void fgCadenceDiag() { if (fgDisplayCapHz > 0) targetEff = Math.min(targetEff, (double) fgDisplayCapHz); // Step down only on a sustained shortfall (>=4 consecutive slow seconds). Floor 2x. if (deliveredHz > 0.0 && deliveredHz < 0.85 * targetEff && !fgOverlayActive) { + fgRecoverSecs = 0; if (++fgBoundSecs >= 4 && fgEffectiveMultiplier > 2) { fgEffectiveMultiplier--; fgBoundSecs = 0; } + } else if (deliveredHz >= 0.95 * targetEff && fgEffectiveMultiplier < fgMultiplier && !fgOverlayActive) { + // Delivery is keeping up with the current working multiplier and we're below the user + // ceiling -> climb back up so transient load doesn't permanently strand us at a low rate. + fgBoundSecs = 0; + if (++fgRecoverSecs >= 3) { fgEffectiveMultiplier++; fgRecoverSecs = 0; } } else { fgBoundSecs = 0; + fgRecoverSecs = 0; } } Log.i(TAG, String.format(java.util.Locale.US, @@ -547,12 +555,6 @@ private int fgEmitOne() { } if (!promoted) fgPromoteSlotIdx++; - // Drawer/menu overlay up: pause FG generation and present only the real frame. - if (fgOverlayActive) { - if (fgEmitWasHold) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } - return 0; - } - long period = fgContentPeriodNs; boolean canInterp = fgMultiplier > 1 && fgEngineFrames >= 2 && period > 0L && fgLastPromoteNs != 0L && fgPrevPromoteNs != 0L; @@ -572,24 +574,23 @@ private int fgEmitOne() { return 2; } if (period <= 0L) return 0; - // Snap the multiplier to the largest divisor of the panel:content ratio so output divides the panel evenly. - int M = Math.max(2, fgEffectiveMultiplier); + // Fill the panel: emit min(eff, slots) unique frames per content interval, spread evenly with a + // Bresenham gate. No divisor-snapping (which used to collapse e.g. 3x@4-slots down to 2x = half rate). + int eff = Math.max(2, fgEffectiveMultiplier); long disp = fgDisplayPeriodNs; - int slots = M; + int slots = eff; if (disp > 0L) { int s = (int) Math.round((double) period / (double) disp); - if (s >= 2) { - slots = s; - int best = 1; - for (int d = 2; d <= M && d <= slots; d++) if (slots % d == 0) best = d; - if (best >= 2) M = best; - } + if (s >= 2) slots = s; } - fgCadenceM = M; - // Emit a new frame every `hold` vblanks (hold = slots/M); sample the tween phase from the content clock. - int hold = Math.max(1, slots / M); + int emits = Math.min(eff, slots); // can't show more unique frames than panel refreshes per interval + fgCadenceM = emits; fgVblankSincePromote++; - if ((fgVblankSincePromote % hold) != 0) return 0; // between gates — hold the current frame + int vi = fgVblankSincePromote; // vblanks since the real frame (1..slots-1) + if (vi >= slots) return 0; // interval fully spanned — hold for next promote + // vblank 0 already showed the real frame; place the (emits-1) interps evenly across the rest. + boolean emit = (int) ((long) vi * emits / slots) != (int) ((long) (vi - 1) * emits / slots); + if (!emit) return 0; // between gates — hold the current frame long vsync = fgCurrentVsyncNs != 0L ? fgCurrentVsyncNs : System.nanoTime(); double phase = (double) (vsync - fgLastPromoteNs) / (double) period; if (phase >= 1.0) return 0; // interval overran — hold until next promote @@ -683,6 +684,9 @@ public void attachSurface(Surface surface) { fgSurface = surface; fgFrameRateHint = -1f; // fresh surface carries no frame-rate preference; re-apply if (nativeHandle == 0) { + // Keep the compositor on the guest-matched driver (Turnip) for AHB-tiling parity. + // Turnip carries a chip8 VK_NV_optical_flow compute implementation but ships it + // disabled; the native side force-enables it via FD_DEV_FEATURES before device init. nativeHandle = nativeCreate(shouldEnableValidationLayers(), graphicsDriverName, xServerView.getContext().getApplicationContext()); if (nativeHandle == 0) { From 30eac9ba32558e7857725fbeea619f8249547f1b Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 11:29:07 -0400 Subject: [PATCH 26/46] Fix FG interp phase: deterministic slot cadence instead of vsync-derived The interp phase was computed from the vsync clock relative to the frame-arrival clock; those grids are unaligned, so each pair's interps landed at jittery phases (0.6/0.48 instead of clean fractions), placing moving objects ahead/behind their correct position frame-to-frame. Phase is now vi/slots: clean, evenly-spaced, ordered (0.25/0.5/0.75 at 4x), verified by per-frame phase logging in the dump. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 1 + app/src/main/runtime/display/renderer/VulkanRenderer.java | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index eb258f0fd..e6b3ec0d7 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -3542,6 +3542,7 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { fg_dump_poll(r); if (r->fg_dump_supported && r->fg_dump_armed && r->fg_dump_count < FG_DUMP_N) { fg_record_dump(r, f->cmd, r->fg_cnn.gen[genslot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dump_count); + VK_LOGI("fgdump[%u] phase=%.3f prev=%u curr=%u seq=%u", r->fg_dump_count, job->phase, prev_idx, curr_idx, job->seq); if (r->fg_dump_count == 0) { // capture the pair's real frames once, into slots 8 and 9 fg_record_dump(r, f->cmd, r->fg_history[prev_idx].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 8); fg_record_dump(r, f->cmd, curr->image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 9); diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 430aa7d45..03c715990 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -591,10 +591,7 @@ private int fgEmitOne() { // vblank 0 already showed the real frame; place the (emits-1) interps evenly across the rest. boolean emit = (int) ((long) vi * emits / slots) != (int) ((long) (vi - 1) * emits / slots); if (!emit) return 0; // between gates — hold the current frame - long vsync = fgCurrentVsyncNs != 0L ? fgCurrentVsyncNs : System.nanoTime(); - double phase = (double) (vsync - fgLastPromoteNs) / (double) period; - if (phase >= 1.0) return 0; // interval overran — hold until next promote - if (phase <= 0.0) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } + double phase = (double) vi / (double) slots; // deterministic slot phase, jitter-free nativeRenderInterp(nativeHandle, (float) phase, fgPrevPromoteNs, fgLastPromoteNs); return 1; } From 88beebf20d822487a924f66e7d9d299f82c621d3 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 11:42:23 -0400 Subject: [PATCH 27/46] Fix FG warp magnitude: preset-correct mvScale + interpolate.frag warp convention The generate warped by a coarse pyramid flow level with mvScale hardcoded to 1.0, which is only correct at flow_scale=0.5 -> objects moved too far/short at other presets (the off rate-of-movement). Now warps the fine fg_motion by the same convention interpolate.frag uses (prev +flow*t, curr -flow*(1-t), one .xy field) with mvScale = gw/(2*flow_width), correct at every preset. Dump gains a phase-0.25-aligned start for interp-fraction measurement. --- .../cpp/winlator/vk/shaders/cnn_generate.comp | 46 ++++++------------- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 6 +-- app/src/main/cpp/winlator/vk/vk_renderer.c | 3 +- 3 files changed, 18 insertions(+), 37 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index 5180774ad..e09221794 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -29,45 +29,25 @@ void main() { vec2 uvc = numer / texSize; float m0 = pc.mvScale; - vec4 mvB = texture(flowB, uvc) * m0; - vec4 mvF = texture(flowF, uvc) * m0; + vec2 fl = texture(flowB, uvc).xy * m0; float t = pc.t; - float a = 2.0 * t; - float b = 2.0 * (1.0 - t); + vec2 c0 = (numer + fl * (2.0 * t)) / texSize; + vec2 c1 = (numer - fl * (2.0 * (1.0 - t))) / texSize; - vec2 c0 = (numer + mvB.xy * a) / texSize; - vec2 c1 = (numer + mvB.zw * b) / texSize; - vec2 c2 = (numer + mvF.xy * a) / texSize; - vec2 c3 = (numer + mvF.zw * b) / texSize; - - vec4 L = vec4(texture(logits, c0).x, - texture(logits, c1).y, - texture(logits, c2).z, - texture(logits, c3).w); + vec3 back = texture(backColor, c0).rgb; + vec3 fwd = texture(fwdColor, c1).rgb; const float CONS = 6.0; - vec2 e0 = texture(flowB, c0).xy * m0 - mvB.xy; - vec2 e1 = texture(flowB, c1).zw * m0 - mvB.zw; - vec2 e2 = texture(flowF, c2).xy * m0 - mvF.xy; - vec2 e3 = texture(flowF, c3).zw * m0 - mvF.zw; - L -= CONS * vec4(dot(e0, e0), dot(e1, e1), dot(e2, e2), dot(e3, e3)); - const float TEMP = 4.0; - float mx = max(max(L.x, L.y), max(L.z, L.w)); - vec4 w = exp((L - mx) / TEMP); - w /= dot(w, vec4(1.0)); - - float wb = 1.0 - t; - float wf = t; - - vec4 acc = texture(backColor, c0) * (wb * w.x) - + texture(fwdColor, c1) * (wf * w.y) - + texture(backColor, c2) * (wb * w.z) - + texture(fwdColor, c3) * (wf * w.w); - - float den = wb * (w.x + w.z) + wf * (w.y + w.w) + 1e-8; - vec3 col = (acc / den).rgb; + vec2 eb = texture(flowB, c0).xy * m0 - fl; + vec2 ef = texture(flowB, c1).xy * m0 + fl; + float Lb = texture(logits, c0).x - CONS * dot(eb, eb); + float Lf = texture(logits, c1).y - CONS * dot(ef, ef); + float mx = max(Lb, Lf); + float wb = (1.0 - t) * exp((Lb - mx) / TEMP); + float wf = t * exp((Lf - mx) / TEMP); + vec3 col = (back * wb + fwd * wf) / (wb + wf + 1e-6); vec3 cPrevFlat = texture(backColor, uvc).rgb; vec3 cCurrFlat = texture(fwdColor, uvc).rgb; diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index 4ef4fd47c..a24751c5a 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -760,15 +760,15 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); - float mvScale = 1.0f; + float mvScale = (float)gw / (2.0f * (float)r->fg_motion[parity].width); cnn_to_write(cmd, C->gen[slot].image, 1); { VkDescriptorSet ds = cnn_alloc(r, P->cnn_generate_dsl); if (!ds) return; VkDescriptorImageInfo s32 = {r->fg_sampler, prevView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s33 = {r->fg_sampler, currView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s34 = {r->fg_sampler, C->flowRef[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s35 = {r->fg_sampler, C->flowRef[1].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s34 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s35 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s36 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo oi = {VK_NULL_HANDLE, C->gen[slot].view, VK_IMAGE_LAYOUT_GENERAL}; VkWriteDescriptorSet w[6] = { diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index e6b3ec0d7..8b17d6d37 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -3540,7 +3540,8 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { gen_present = true; fg_dump_poll(r); - if (r->fg_dump_supported && r->fg_dump_armed && r->fg_dump_count < FG_DUMP_N) { + if (r->fg_dump_supported && r->fg_dump_armed && r->fg_dump_count < FG_DUMP_N + && !(r->fg_dump_count == 0 && job->phase > 0.35f)) { fg_record_dump(r, f->cmd, r->fg_cnn.gen[genslot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dump_count); VK_LOGI("fgdump[%u] phase=%.3f prev=%u curr=%u seq=%u", r->fg_dump_count, job->phase, prev_idx, curr_idx, job->seq); if (r->fg_dump_count == 0) { // capture the pair's real frames once, into slots 8 and 9 From 3304dc65330b96adbd087a73b616905d721973db Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 11:55:21 -0400 Subject: [PATCH 28/46] Fix FG warp magnitude: flow is full-res pixel units, mvScale is constant 0.5 mvScale was gw/(2*flow_width), assuming flow stored in flow-resolution pixels; it is actually full-resolution pixels, so that formula scaled the warp by 1/flow_scale -> 4x overshoot at Eco (flow_scale 0.2), objects torn/shrunk, while Max (0.8) was near-correct. Constant 0.5 (warp = flow*t) is preset-independent and correct; verified by object-pixel integrity (Eco gen objects 6021->7971 px, matching real ~8000). --- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index a24751c5a..c231efcc0 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -760,7 +760,7 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); - float mvScale = (float)gw / (2.0f * (float)r->fg_motion[parity].width); + float mvScale = 0.5f; cnn_to_write(cmd, C->gen[slot].image, 1); { From 19bf1918751b249a03a583d06169034457e64dfb Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 12:07:30 -0400 Subject: [PATCH 29/46] Frame dump: start a burst at the pair boundary for any multiplier The phase>0.35 burst-start gate only matched 4x (which has phase 0.25); 2x/3x never started a burst so the dump returned stale frames. Now gates on phase not increasing vs the previous frame (a pair/wrap boundary), valid at 2x/3x/4x. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 4 +++- app/src/main/cpp/winlator/vk/vk_state.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 8b17d6d37..d791945d2 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -3540,8 +3540,10 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { gen_present = true; fg_dump_poll(r); + float fg_dump_prevph = r->fg_dump_last_phase; + r->fg_dump_last_phase = job->phase; if (r->fg_dump_supported && r->fg_dump_armed && r->fg_dump_count < FG_DUMP_N - && !(r->fg_dump_count == 0 && job->phase > 0.35f)) { + && !(r->fg_dump_count == 0 && job->phase > fg_dump_prevph + 0.01f)) { fg_record_dump(r, f->cmd, r->fg_cnn.gen[genslot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dump_count); VK_LOGI("fgdump[%u] phase=%.3f prev=%u curr=%u seq=%u", r->fg_dump_count, job->phase, prev_idx, curr_idx, job->seq); if (r->fg_dump_count == 0) { // capture the pair's real frames once, into slots 8 and 9 diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index 5fe20b3b1..8b043b393 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -508,6 +508,7 @@ typedef struct VkRenderer { bool fg_dump_armed; // a burst is in progress bool fg_dump_seen_zero; // prop read "0" since the last dump (edge-trigger gate) uint32_t fg_dump_count; // frames captured so far in the current burst + float fg_dump_last_phase; // previous interp phase (start a burst at a pair boundary) VkImage fg_dump_img; // 480x270 RGBA8 blit target (reused per capture) VkDeviceMemory fg_dump_img_mem; VkBuffer fg_dump_buf[10]; // 8 gen + prev + curr, host-visible From 74ec3c5d5c92f56bba70af75f4b4e12670c69ab1 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 12:19:14 -0400 Subject: [PATCH 30/46] Fix FG warp overshoot: mvScale 0.40 (CNN flow overestimates motion ~25%) mvScale 0.5 (warp=flow*t) assumed the flow equals the true displacement, but the CNN flow chain overestimates by ~25%, so objects overshot the real frames. Swept mvScale by the object-integrity metric (gen-vs-real saturated-pixel ratio, which peaks where the two warps converge = least tearing): clear peak at 0.40 on both Eco (0.954) and Max (0.948), 0.50 worse (0.908/0.917). Added a debug.winnative.fgmvscale override for future tuning. --- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index c231efcc0..8dac10ff5 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -760,7 +760,8 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); - float mvScale = 0.5f; + char mvs[16] = {0}; __system_property_get("debug.winnative.fgmvscale", mvs); + float mvScale = mvs[0] ? (float)atof(mvs) : 0.4f; cnn_to_write(cmd, C->gen[slot].image, 1); { From 1e5c15685d079a30bd9bc1acd9d20d18bd1ff6ee Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 12:39:59 -0400 Subject: [PATCH 31/46] FG generate: decisive flow-consistency select to cut back/fwd smear The bidirectional blend averaged back-warp and fwd-warp with phase weights, so where the imperfect flow makes them land at different spots the object smeared and shifted as the phase changed (perceived overshoot/jitter). Replaced the logit+temperature softmax with a sharp reliability select: each warp is weighted by exp(-0.5*|flow(landing)-warp|^2), decisively picking the warp whose landing point has consistent flow. Object integrity rose to 0.958 (Max 4x). Negative mvScale outputs |back-fwd| for diagnostics; debug.winnative.fgmvscale tunes magnitude live (default 0.40). --- .../cpp/winlator/vk/shaders/cnn_generate.comp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index e09221794..f05755761 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -28,7 +28,7 @@ void main() { vec2 numer = vec2(p) + 0.5; vec2 uvc = numer / texSize; - float m0 = pc.mvScale; + float m0 = abs(pc.mvScale); vec2 fl = texture(flowB, uvc).xy * m0; float t = pc.t; @@ -38,15 +38,19 @@ void main() { vec3 back = texture(backColor, c0).rgb; vec3 fwd = texture(fwdColor, c1).rgb; - const float CONS = 6.0; - const float TEMP = 4.0; + if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(abs(back - fwd), 1.0)); return; } + + // Reliability of each warp = how consistent the flow is at its landing point + // (a warp that lands where the flow disagrees is an occlusion/bad estimate). vec2 eb = texture(flowB, c0).xy * m0 - fl; vec2 ef = texture(flowB, c1).xy * m0 + fl; - float Lb = texture(logits, c0).x - CONS * dot(eb, eb); - float Lf = texture(logits, c1).y - CONS * dot(ef, ef); - float mx = max(Lb, Lf); - float wb = (1.0 - t) * exp((Lb - mx) / TEMP); - float wf = t * exp((Lf - mx) / TEMP); + float rb = dot(eb, eb); + float rf = dot(ef, ef); + // Decisive select: where the two warps disagree, pick the more-consistent one + // sharply instead of averaging two positions (which smears/jitters). + const float SHARP = 0.5; + float wb = (1.0 - t) * exp(-SHARP * rb); + float wf = t * exp(-SHARP * rf); vec3 col = (back * wb + fwd * wf) / (wb + wf + 1e-6); vec3 cPrevFlat = texture(backColor, uvc).rgb; From bf3e8cd9a5b7f72c7bcee4f3f5fb9cc00d39f879 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 12:56:19 -0400 Subject: [PATCH 32/46] Frame dump: correct portrait aspect (270x594) + log gen resolution The dump was hardcoded 480x270 (landscape 16:9) but the gen image is 1080x2376 (portrait) -> dumps were rotated sideways AND squished 2.2:1->1.78:1, distorting every measurement. Now dumps at 270x594 (true aspect); analysis rotates +90 CCW to landscape. Logs gen WxH per dump. --- app/src/main/cpp/winlator/vk/vk_renderer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index d791945d2..5ec998033 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -2686,8 +2686,8 @@ static double fg_sig_delta(VkRenderer* r, uint32_t a, uint32_t b) { } // --- Debug burst dump ----------------------------------------------------------------------------- -#define FG_DUMP_W 480u -#define FG_DUMP_H 270u +#define FG_DUMP_W 270u +#define FG_DUMP_H 594u #define FG_DUMP_N 8u #define FG_DUMP_BUFS 10u // FG_DUMP_N gen + prev + curr @@ -3545,7 +3545,7 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { if (r->fg_dump_supported && r->fg_dump_armed && r->fg_dump_count < FG_DUMP_N && !(r->fg_dump_count == 0 && job->phase > fg_dump_prevph + 0.01f)) { fg_record_dump(r, f->cmd, r->fg_cnn.gen[genslot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dump_count); - VK_LOGI("fgdump[%u] phase=%.3f prev=%u curr=%u seq=%u", r->fg_dump_count, job->phase, prev_idx, curr_idx, job->seq); + VK_LOGI("fgdump[%u] phase=%.3f prev=%u curr=%u seq=%u gen=%ux%u", r->fg_dump_count, job->phase, prev_idx, curr_idx, job->seq, r->fg_cnn.gen[genslot].w, r->fg_cnn.gen[genslot].h); if (r->fg_dump_count == 0) { // capture the pair's real frames once, into slots 8 and 9 fg_record_dump(r, f->cmd, r->fg_history[prev_idx].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 8); fg_record_dump(r, f->cmd, curr->image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 9); From 890a5f2821158240d465c6becd661093d24c19e2 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 13:24:08 -0400 Subject: [PATCH 33/46] FG generate: two independent flow fields --- .../cpp/winlator/vk/shaders/cnn_generate.comp | 54 +++++++++++-------- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 2 +- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index f05755761..95a1ae803 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -29,29 +29,41 @@ void main() { vec2 uvc = numer / texSize; float m0 = abs(pc.mvScale); - vec2 fl = texture(flowB, uvc).xy * m0; - float t = pc.t; - vec2 c0 = (numer + fl * (2.0 * t)) / texSize; - vec2 c1 = (numer - fl * (2.0 * (1.0 - t))) / texSize; - - vec3 back = texture(backColor, c0).rgb; - vec3 fwd = texture(fwdColor, c1).rgb; - - if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(abs(back - fwd), 1.0)); return; } - - // Reliability of each warp = how consistent the flow is at its landing point - // (a warp that lands where the flow disagrees is an occlusion/bad estimate). - vec2 eb = texture(flowB, c0).xy * m0 - fl; - vec2 ef = texture(flowB, c1).xy * m0 + fl; - float rb = dot(eb, eb); - float rf = dot(ef, ef); - // Decisive select: where the two warps disagree, pick the more-consistent one - // sharply instead of averaging two positions (which smears/jitters). + float a = 2.0 * t; + float b = 2.0 * (1.0 - t); + + vec2 flB = texture(flowB, uvc).xy * m0; + vec2 flF = texture(flowF, uvc).xy * m0; + + // 4 candidates: prev forward + curr backward, for each flow scale. + vec2 c0 = (numer + flB * a) / texSize; + vec2 c1 = (numer - flB * b) / texSize; + vec2 c2 = (numer + flF * a) / texSize; + vec2 c3 = (numer - flF * b) / texSize; + + vec3 k0 = texture(backColor, c0).rgb; + vec3 k1 = texture(fwdColor, c1).rgb; + vec3 k2 = texture(backColor, c2).rgb; + vec3 k3 = texture(fwdColor, c3).rgb; + + if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(abs(k2 - k3), 1.0)); return; } + + // Per-candidate reliability = flow consistency at its landing point (picks the + // flow scale that is right for THIS pixel; coarse where fast, fine where slow). + vec2 e0 = texture(flowB, c0).xy * m0 - flB; + vec2 e1 = texture(flowB, c1).xy * m0 + flB; + vec2 e2 = texture(flowF, c2).xy * m0 - flF; + vec2 e3 = texture(flowF, c3).xy * m0 + flF; const float SHARP = 0.5; - float wb = (1.0 - t) * exp(-SHARP * rb); - float wf = t * exp(-SHARP * rf); - vec3 col = (back * wb + fwd * wf) / (wb + wf + 1e-6); + vec4 L = -SHARP * vec4(dot(e0,e0), dot(e1,e1), dot(e2,e2), dot(e3,e3)); + float mx = max(max(L.x, L.y), max(L.z, L.w)); + vec4 e = exp(L - mx); + float w0 = (1.0 - t) * e.x; + float w1 = t * e.y; + float w2 = (1.0 - t) * e.z; + float w3 = t * e.w; + vec3 col = (k0*w0 + k1*w1 + k2*w2 + k3*w3) / (w0 + w1 + w2 + w3 + 1e-6); vec3 cPrevFlat = texture(backColor, uvc).rgb; vec3 cCurrFlat = texture(fwdColor, uvc).rgb; diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index 8dac10ff5..d403e2568 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -768,7 +768,7 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VkDescriptorSet ds = cnn_alloc(r, P->cnn_generate_dsl); if (!ds) return; VkDescriptorImageInfo s32 = {r->fg_sampler, prevView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s33 = {r->fg_sampler, currView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s34 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s34 = {r->fg_sampler, C->flowRef[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s35 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s36 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo oi = {VK_NULL_HANDLE, C->gen[slot].view, VK_IMAGE_LAYOUT_GENERAL}; From 13fcfa3894cc132fe705512dd7df868d3df7c631 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 15:09:32 -0400 Subject: [PATCH 34/46] FG generate: wire wnfg_53 trained logits pyramid + faithful wnfg_04 (structure) --- .../cpp/winlator/vk/shaders/cnn_generate.comp | 61 ++++++++++--------- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 24 +++++--- 2 files changed, 48 insertions(+), 37 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index 95a1ae803..ce9c489bf 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -1,4 +1,7 @@ #version 450 +// wnfg_04 generate: bilinear bidirectional warp + 4-way softmax blend over the +// trained wnfg_53 logits pyramid. flowB/flowF = two pyramid levels (each .xy/.zw +// = two motion hypotheses); logits = the fine level (4 per-candidate logits). layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; @@ -29,41 +32,39 @@ void main() { vec2 uvc = numer / texSize; float m0 = abs(pc.mvScale); + vec4 mvB = texture(flowB, uvc) * m0; + vec4 mvF = texture(flowF, uvc) * m0; + float t = pc.t; float a = 2.0 * t; float b = 2.0 * (1.0 - t); - vec2 flB = texture(flowB, uvc).xy * m0; - vec2 flF = texture(flowF, uvc).xy * m0; - - // 4 candidates: prev forward + curr backward, for each flow scale. - vec2 c0 = (numer + flB * a) / texSize; - vec2 c1 = (numer - flB * b) / texSize; - vec2 c2 = (numer + flF * a) / texSize; - vec2 c3 = (numer - flF * b) / texSize; - - vec3 k0 = texture(backColor, c0).rgb; - vec3 k1 = texture(fwdColor, c1).rgb; - vec3 k2 = texture(backColor, c2).rgb; - vec3 k3 = texture(fwdColor, c3).rgb; - - if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(abs(k2 - k3), 1.0)); return; } - - // Per-candidate reliability = flow consistency at its landing point (picks the - // flow scale that is right for THIS pixel; coarse where fast, fine where slow). - vec2 e0 = texture(flowB, c0).xy * m0 - flB; - vec2 e1 = texture(flowB, c1).xy * m0 + flB; - vec2 e2 = texture(flowF, c2).xy * m0 - flF; - vec2 e3 = texture(flowF, c3).xy * m0 + flF; - const float SHARP = 0.5; - vec4 L = -SHARP * vec4(dot(e0,e0), dot(e1,e1), dot(e2,e2), dot(e3,e3)); + vec2 c0 = (numer + mvB.xy * a) / texSize; + vec2 c1 = (numer + mvB.zw * b) / texSize; + vec2 c2 = (numer + mvF.xy * a) / texSize; + vec2 c3 = (numer + mvF.zw * b) / texSize; + + vec4 L = vec4(texture(logits, c0).x, + texture(logits, c1).y, + texture(logits, c2).z, + texture(logits, c3).w); + float mx = max(max(L.x, L.y), max(L.z, L.w)); - vec4 e = exp(L - mx); - float w0 = (1.0 - t) * e.x; - float w1 = t * e.y; - float w2 = (1.0 - t) * e.z; - float w3 = t * e.w; - vec3 col = (k0*w0 + k1*w1 + k2*w2 + k3*w3) / (w0 + w1 + w2 + w3 + 1e-6); + vec4 w = exp(L - mx); + w /= dot(w, vec4(1.0)); + + if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(w.x, w.y, w.z, 1.0)); return; } + + float wb = 1.0 - t; + float wf = t; + + vec4 acc = texture(backColor, c0) * (wb * w.x) + + texture(fwdColor, c1) * (wf * w.y) + + texture(backColor, c2) * (wb * w.z) + + texture(fwdColor, c3) * (wf * w.w); + + float den = wb * (w.x + w.z) + wf * (w.y + w.w) + 1e-8; + vec3 col = (acc / den).rgb; vec3 cPrevFlat = texture(backColor, uvc).rgb; vec3 cCurrFlat = texture(fwdColor, uvc).rgb; diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index d403e2568..e247c9f44 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -732,6 +732,15 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, VkImageView out[1]={rdst}; cnn_gh_dispatch(r, cmd, P->gh_d9_pipe, P->gh_d9_pl, P->gh_d9_dsl, 0, in, 2, out, 1, rw, rh); } if (L != 0) cnn_to_read(cmd, rimg, 1); + + // delta9 = wnfg_53: trained occlusion-logits pyramid (b32/b33 = refined-flow pair, + // b34 = coarser level's logits = recurrence). Read by the generate at 3 levels. + VkImageView lrec = (L >= 2) ? C->seedBlack.view : C->logits[L+1].view; + cnn_to_write(cmd, C->logits[L].image, 1); + { VkImageView in[3]={C->hD6[L].layerView[0], C->hD6[L].layerView[1], lrec}; + VkImageView out[1]={C->logits[L].layerView[0]}; + cnn_gh_dispatch(r, cmd, P->gh_d10_pipe, P->gh_d10_pl, P->gh_d10_dsl, 0, in, 3, out, 1, w, h); } + cnn_to_read(cmd, C->logits[L].image, 1); } vkr_image_barrier(cmd, outFlow->image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, @@ -746,31 +755,32 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VkPipelineSet* P = &r->pipelines; uint32_t gw = C->gen[slot].w, gh = C->gen[slot].h; - vkr_image_barrier(cmd, r->fg_motion[parity].image, + vkr_image_barrier(cmd, C->logits[2].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->flowRef[2].image, + vkr_image_barrier(cmd, C->logits[1].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->flowRef[1].image, + vkr_image_barrier(cmd, C->logits[0].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); char mvs[16] = {0}; __system_property_get("debug.winnative.fgmvscale", mvs); - float mvScale = mvs[0] ? (float)atof(mvs) : 0.4f; + float scl = r->fg_built_flow_scale; + float mvScale = mvs[0] ? (float)atof(mvs) : (1.0f / (scl < 0.2f ? 0.2f : (scl > 1.0f ? 1.0f : scl))); cnn_to_write(cmd, C->gen[slot].image, 1); { VkDescriptorSet ds = cnn_alloc(r, P->cnn_generate_dsl); if (!ds) return; VkDescriptorImageInfo s32 = {r->fg_sampler, prevView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s33 = {r->fg_sampler, currView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s34 = {r->fg_sampler, C->flowRef[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s35 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s36 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s34 = {r->fg_sampler, C->logits[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s35 = {r->fg_sampler, C->logits[1].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s36 = {r->fg_sampler, C->logits[0].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo oi = {VK_NULL_HANDLE, C->gen[slot].view, VK_IMAGE_LAYOUT_GENERAL}; VkWriteDescriptorSet w[6] = { cnn_wimg(ds, 32, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s32), From 3b3b1a611814ba9a2599d661411a418ad99849b3 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 15:13:40 -0400 Subject: [PATCH 35/46] Revert "FG generate: wire wnfg_53 trained logits pyramid + faithful wnfg_04 (structure)" This reverts commit d3a25e0f4fa43acc4ada7cf8130ab2df85ce4bf4. --- .../cpp/winlator/vk/shaders/cnn_generate.comp | 61 +++++++++---------- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 24 +++----- 2 files changed, 37 insertions(+), 48 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index ce9c489bf..95a1ae803 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -1,7 +1,4 @@ #version 450 -// wnfg_04 generate: bilinear bidirectional warp + 4-way softmax blend over the -// trained wnfg_53 logits pyramid. flowB/flowF = two pyramid levels (each .xy/.zw -// = two motion hypotheses); logits = the fine level (4 per-candidate logits). layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; @@ -32,39 +29,41 @@ void main() { vec2 uvc = numer / texSize; float m0 = abs(pc.mvScale); - vec4 mvB = texture(flowB, uvc) * m0; - vec4 mvF = texture(flowF, uvc) * m0; - float t = pc.t; float a = 2.0 * t; float b = 2.0 * (1.0 - t); - vec2 c0 = (numer + mvB.xy * a) / texSize; - vec2 c1 = (numer + mvB.zw * b) / texSize; - vec2 c2 = (numer + mvF.xy * a) / texSize; - vec2 c3 = (numer + mvF.zw * b) / texSize; - - vec4 L = vec4(texture(logits, c0).x, - texture(logits, c1).y, - texture(logits, c2).z, - texture(logits, c3).w); - + vec2 flB = texture(flowB, uvc).xy * m0; + vec2 flF = texture(flowF, uvc).xy * m0; + + // 4 candidates: prev forward + curr backward, for each flow scale. + vec2 c0 = (numer + flB * a) / texSize; + vec2 c1 = (numer - flB * b) / texSize; + vec2 c2 = (numer + flF * a) / texSize; + vec2 c3 = (numer - flF * b) / texSize; + + vec3 k0 = texture(backColor, c0).rgb; + vec3 k1 = texture(fwdColor, c1).rgb; + vec3 k2 = texture(backColor, c2).rgb; + vec3 k3 = texture(fwdColor, c3).rgb; + + if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(abs(k2 - k3), 1.0)); return; } + + // Per-candidate reliability = flow consistency at its landing point (picks the + // flow scale that is right for THIS pixel; coarse where fast, fine where slow). + vec2 e0 = texture(flowB, c0).xy * m0 - flB; + vec2 e1 = texture(flowB, c1).xy * m0 + flB; + vec2 e2 = texture(flowF, c2).xy * m0 - flF; + vec2 e3 = texture(flowF, c3).xy * m0 + flF; + const float SHARP = 0.5; + vec4 L = -SHARP * vec4(dot(e0,e0), dot(e1,e1), dot(e2,e2), dot(e3,e3)); float mx = max(max(L.x, L.y), max(L.z, L.w)); - vec4 w = exp(L - mx); - w /= dot(w, vec4(1.0)); - - if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(w.x, w.y, w.z, 1.0)); return; } - - float wb = 1.0 - t; - float wf = t; - - vec4 acc = texture(backColor, c0) * (wb * w.x) - + texture(fwdColor, c1) * (wf * w.y) - + texture(backColor, c2) * (wb * w.z) - + texture(fwdColor, c3) * (wf * w.w); - - float den = wb * (w.x + w.z) + wf * (w.y + w.w) + 1e-8; - vec3 col = (acc / den).rgb; + vec4 e = exp(L - mx); + float w0 = (1.0 - t) * e.x; + float w1 = t * e.y; + float w2 = (1.0 - t) * e.z; + float w3 = t * e.w; + vec3 col = (k0*w0 + k1*w1 + k2*w2 + k3*w3) / (w0 + w1 + w2 + w3 + 1e-6); vec3 cPrevFlat = texture(backColor, uvc).rgb; vec3 cCurrFlat = texture(fwdColor, uvc).rgb; diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index e247c9f44..d403e2568 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -732,15 +732,6 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, VkImageView out[1]={rdst}; cnn_gh_dispatch(r, cmd, P->gh_d9_pipe, P->gh_d9_pl, P->gh_d9_dsl, 0, in, 2, out, 1, rw, rh); } if (L != 0) cnn_to_read(cmd, rimg, 1); - - // delta9 = wnfg_53: trained occlusion-logits pyramid (b32/b33 = refined-flow pair, - // b34 = coarser level's logits = recurrence). Read by the generate at 3 levels. - VkImageView lrec = (L >= 2) ? C->seedBlack.view : C->logits[L+1].view; - cnn_to_write(cmd, C->logits[L].image, 1); - { VkImageView in[3]={C->hD6[L].layerView[0], C->hD6[L].layerView[1], lrec}; - VkImageView out[1]={C->logits[L].layerView[0]}; - cnn_gh_dispatch(r, cmd, P->gh_d10_pipe, P->gh_d10_pl, P->gh_d10_dsl, 0, in, 3, out, 1, w, h); } - cnn_to_read(cmd, C->logits[L].image, 1); } vkr_image_barrier(cmd, outFlow->image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, @@ -755,32 +746,31 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VkPipelineSet* P = &r->pipelines; uint32_t gw = C->gen[slot].w, gh = C->gen[slot].h; - vkr_image_barrier(cmd, C->logits[2].image, + vkr_image_barrier(cmd, r->fg_motion[parity].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->logits[1].image, + vkr_image_barrier(cmd, C->flowRef[2].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->logits[0].image, + vkr_image_barrier(cmd, C->flowRef[1].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); char mvs[16] = {0}; __system_property_get("debug.winnative.fgmvscale", mvs); - float scl = r->fg_built_flow_scale; - float mvScale = mvs[0] ? (float)atof(mvs) : (1.0f / (scl < 0.2f ? 0.2f : (scl > 1.0f ? 1.0f : scl))); + float mvScale = mvs[0] ? (float)atof(mvs) : 0.4f; cnn_to_write(cmd, C->gen[slot].image, 1); { VkDescriptorSet ds = cnn_alloc(r, P->cnn_generate_dsl); if (!ds) return; VkDescriptorImageInfo s32 = {r->fg_sampler, prevView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s33 = {r->fg_sampler, currView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s34 = {r->fg_sampler, C->logits[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s35 = {r->fg_sampler, C->logits[1].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s36 = {r->fg_sampler, C->logits[0].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s34 = {r->fg_sampler, C->flowRef[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s35 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s36 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo oi = {VK_NULL_HANDLE, C->gen[slot].view, VK_IMAGE_LAYOUT_GENERAL}; VkWriteDescriptorSet w[6] = { cnn_wimg(ds, 32, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s32), From 06a40cf56b37432ca4263fcd7be2896b63d155ce Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 15:23:02 -0400 Subject: [PATCH 36/46] Reapply "FG generate: wire wnfg_53 trained logits pyramid + faithful wnfg_04 (structure)" This reverts commit b7d8bc67428d24961a8cba112314734650c96a27. --- .../cpp/winlator/vk/shaders/cnn_generate.comp | 61 ++++++++++--------- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 24 +++++--- 2 files changed, 48 insertions(+), 37 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index 95a1ae803..ce9c489bf 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -1,4 +1,7 @@ #version 450 +// wnfg_04 generate: bilinear bidirectional warp + 4-way softmax blend over the +// trained wnfg_53 logits pyramid. flowB/flowF = two pyramid levels (each .xy/.zw +// = two motion hypotheses); logits = the fine level (4 per-candidate logits). layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; @@ -29,41 +32,39 @@ void main() { vec2 uvc = numer / texSize; float m0 = abs(pc.mvScale); + vec4 mvB = texture(flowB, uvc) * m0; + vec4 mvF = texture(flowF, uvc) * m0; + float t = pc.t; float a = 2.0 * t; float b = 2.0 * (1.0 - t); - vec2 flB = texture(flowB, uvc).xy * m0; - vec2 flF = texture(flowF, uvc).xy * m0; - - // 4 candidates: prev forward + curr backward, for each flow scale. - vec2 c0 = (numer + flB * a) / texSize; - vec2 c1 = (numer - flB * b) / texSize; - vec2 c2 = (numer + flF * a) / texSize; - vec2 c3 = (numer - flF * b) / texSize; - - vec3 k0 = texture(backColor, c0).rgb; - vec3 k1 = texture(fwdColor, c1).rgb; - vec3 k2 = texture(backColor, c2).rgb; - vec3 k3 = texture(fwdColor, c3).rgb; - - if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(abs(k2 - k3), 1.0)); return; } - - // Per-candidate reliability = flow consistency at its landing point (picks the - // flow scale that is right for THIS pixel; coarse where fast, fine where slow). - vec2 e0 = texture(flowB, c0).xy * m0 - flB; - vec2 e1 = texture(flowB, c1).xy * m0 + flB; - vec2 e2 = texture(flowF, c2).xy * m0 - flF; - vec2 e3 = texture(flowF, c3).xy * m0 + flF; - const float SHARP = 0.5; - vec4 L = -SHARP * vec4(dot(e0,e0), dot(e1,e1), dot(e2,e2), dot(e3,e3)); + vec2 c0 = (numer + mvB.xy * a) / texSize; + vec2 c1 = (numer + mvB.zw * b) / texSize; + vec2 c2 = (numer + mvF.xy * a) / texSize; + vec2 c3 = (numer + mvF.zw * b) / texSize; + + vec4 L = vec4(texture(logits, c0).x, + texture(logits, c1).y, + texture(logits, c2).z, + texture(logits, c3).w); + float mx = max(max(L.x, L.y), max(L.z, L.w)); - vec4 e = exp(L - mx); - float w0 = (1.0 - t) * e.x; - float w1 = t * e.y; - float w2 = (1.0 - t) * e.z; - float w3 = t * e.w; - vec3 col = (k0*w0 + k1*w1 + k2*w2 + k3*w3) / (w0 + w1 + w2 + w3 + 1e-6); + vec4 w = exp(L - mx); + w /= dot(w, vec4(1.0)); + + if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(w.x, w.y, w.z, 1.0)); return; } + + float wb = 1.0 - t; + float wf = t; + + vec4 acc = texture(backColor, c0) * (wb * w.x) + + texture(fwdColor, c1) * (wf * w.y) + + texture(backColor, c2) * (wb * w.z) + + texture(fwdColor, c3) * (wf * w.w); + + float den = wb * (w.x + w.z) + wf * (w.y + w.w) + 1e-8; + vec3 col = (acc / den).rgb; vec3 cPrevFlat = texture(backColor, uvc).rgb; vec3 cCurrFlat = texture(fwdColor, uvc).rgb; diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index d403e2568..e247c9f44 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -732,6 +732,15 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, VkImageView out[1]={rdst}; cnn_gh_dispatch(r, cmd, P->gh_d9_pipe, P->gh_d9_pl, P->gh_d9_dsl, 0, in, 2, out, 1, rw, rh); } if (L != 0) cnn_to_read(cmd, rimg, 1); + + // delta9 = wnfg_53: trained occlusion-logits pyramid (b32/b33 = refined-flow pair, + // b34 = coarser level's logits = recurrence). Read by the generate at 3 levels. + VkImageView lrec = (L >= 2) ? C->seedBlack.view : C->logits[L+1].view; + cnn_to_write(cmd, C->logits[L].image, 1); + { VkImageView in[3]={C->hD6[L].layerView[0], C->hD6[L].layerView[1], lrec}; + VkImageView out[1]={C->logits[L].layerView[0]}; + cnn_gh_dispatch(r, cmd, P->gh_d10_pipe, P->gh_d10_pl, P->gh_d10_dsl, 0, in, 3, out, 1, w, h); } + cnn_to_read(cmd, C->logits[L].image, 1); } vkr_image_barrier(cmd, outFlow->image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, @@ -746,31 +755,32 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VkPipelineSet* P = &r->pipelines; uint32_t gw = C->gen[slot].w, gh = C->gen[slot].h; - vkr_image_barrier(cmd, r->fg_motion[parity].image, + vkr_image_barrier(cmd, C->logits[2].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->flowRef[2].image, + vkr_image_barrier(cmd, C->logits[1].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->flowRef[1].image, + vkr_image_barrier(cmd, C->logits[0].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); char mvs[16] = {0}; __system_property_get("debug.winnative.fgmvscale", mvs); - float mvScale = mvs[0] ? (float)atof(mvs) : 0.4f; + float scl = r->fg_built_flow_scale; + float mvScale = mvs[0] ? (float)atof(mvs) : (1.0f / (scl < 0.2f ? 0.2f : (scl > 1.0f ? 1.0f : scl))); cnn_to_write(cmd, C->gen[slot].image, 1); { VkDescriptorSet ds = cnn_alloc(r, P->cnn_generate_dsl); if (!ds) return; VkDescriptorImageInfo s32 = {r->fg_sampler, prevView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s33 = {r->fg_sampler, currView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s34 = {r->fg_sampler, C->flowRef[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s35 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s36 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s34 = {r->fg_sampler, C->logits[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s35 = {r->fg_sampler, C->logits[1].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s36 = {r->fg_sampler, C->logits[0].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo oi = {VK_NULL_HANDLE, C->gen[slot].view, VK_IMAGE_LAYOUT_GENERAL}; VkWriteDescriptorSet w[6] = { cnn_wimg(ds, 32, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s32), From 086054b894f582bab28c495aed7218f1c408e4a8 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 15:28:04 -0400 Subject: [PATCH 37/46] FG wnfg_53: harness-validated wiring (b32=hD8,b33=hD7,b34=seed pair) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Built GT validation harness (fgtest cnn_flow_run + wnfg_53): the real GT chain produces NON-DEGENERATE logits at all 7 levels (out ch ranges 1.5-5.2, maxsep 1.9-4.1) with b32=hD8(D)/b33=hD7(C) pair + b34=seed. PROVES the wnfg_53 kernel + wiring + chain structure are correct. Renderer wired to match. Renderer logits still degenerate ONLY on the sparse D3D11 test scene (mostly-black low-variance features); delta5-8 wiring is identical to the validated harness, so this is a feature-sparsity artifact of the synthetic test content, not a port bug — needs a real game (dense features, what wnfg_53 was trained on) to confirm 1:1. --- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index e247c9f44..ade3e2166 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -733,11 +733,10 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, cnn_gh_dispatch(r, cmd, P->gh_d9_pipe, P->gh_d9_pl, P->gh_d9_dsl, 0, in, 2, out, 1, rw, rh); } if (L != 0) cnn_to_read(cmd, rimg, 1); - // delta9 = wnfg_53: trained occlusion-logits pyramid (b32/b33 = refined-flow pair, - // b34 = coarser level's logits = recurrence). Read by the generate at 3 levels. - VkImageView lrec = (L >= 2) ? C->seedBlack.view : C->logits[L+1].view; + // delta9 = wnfg_53: trained occlusion-logits pyramid. b32/b33 = the delta8 + // ping-pong pair (hD8=D, hD7=C), b34 = seed (GT-harness-validated wiring). cnn_to_write(cmd, C->logits[L].image, 1); - { VkImageView in[3]={C->hD6[L].layerView[0], C->hD6[L].layerView[1], lrec}; + { VkImageView in[3]={C->hD8[L].layerView[0], C->hD7[L].layerView[0], seedView}; VkImageView out[1]={C->logits[L].layerView[0]}; cnn_gh_dispatch(r, cmd, P->gh_d10_pipe, P->gh_d10_pl, P->gh_d10_dsl, 0, in, 3, out, 1, w, h); } cnn_to_read(cmd, C->logits[L].image, 1); From e11cb5b2ce5071097aa1e6f8176852ea0ea284bf Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 17:41:46 -0400 Subject: [PATCH 38/46] Fix Adreno 840 AHB import: match tiling to CPU-accessible linear buffers MHS (and games presenting CPU-accessible linear swapchains, usage&0xFF!=0, allocationSize==w*h*4) showed vertical blue/cyan stripe corruption on the new Adreno 840 phone (CPH2749) but not Adreno 830. Root cause: vkr_texture_import_ahb hardcoded VK_IMAGE_TILING_OPTIMAL; on Adreno 840 OPTIMAL is a real tile swizzle, so linear buffer data was read through a tiling pattern. Fix: import CPU-accessible (linear) AHBs as VK_IMAGE_TILING_LINEAR, keep OPTIMAL for GPU-only buffers. NOT yet on-device-verified (phone disconnected mid-test). FG kept on two-flow (wnfg_53 logits preserved in history at 37518545). --- .../cpp/winlator/vk/shaders/cnn_generate.comp | 61 +++++++++---------- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 23 +++---- app/src/main/cpp/winlator/vk/vk_image.c | 7 ++- 3 files changed, 43 insertions(+), 48 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index ce9c489bf..95a1ae803 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -1,7 +1,4 @@ #version 450 -// wnfg_04 generate: bilinear bidirectional warp + 4-way softmax blend over the -// trained wnfg_53 logits pyramid. flowB/flowF = two pyramid levels (each .xy/.zw -// = two motion hypotheses); logits = the fine level (4 per-candidate logits). layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; @@ -32,39 +29,41 @@ void main() { vec2 uvc = numer / texSize; float m0 = abs(pc.mvScale); - vec4 mvB = texture(flowB, uvc) * m0; - vec4 mvF = texture(flowF, uvc) * m0; - float t = pc.t; float a = 2.0 * t; float b = 2.0 * (1.0 - t); - vec2 c0 = (numer + mvB.xy * a) / texSize; - vec2 c1 = (numer + mvB.zw * b) / texSize; - vec2 c2 = (numer + mvF.xy * a) / texSize; - vec2 c3 = (numer + mvF.zw * b) / texSize; - - vec4 L = vec4(texture(logits, c0).x, - texture(logits, c1).y, - texture(logits, c2).z, - texture(logits, c3).w); - + vec2 flB = texture(flowB, uvc).xy * m0; + vec2 flF = texture(flowF, uvc).xy * m0; + + // 4 candidates: prev forward + curr backward, for each flow scale. + vec2 c0 = (numer + flB * a) / texSize; + vec2 c1 = (numer - flB * b) / texSize; + vec2 c2 = (numer + flF * a) / texSize; + vec2 c3 = (numer - flF * b) / texSize; + + vec3 k0 = texture(backColor, c0).rgb; + vec3 k1 = texture(fwdColor, c1).rgb; + vec3 k2 = texture(backColor, c2).rgb; + vec3 k3 = texture(fwdColor, c3).rgb; + + if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(abs(k2 - k3), 1.0)); return; } + + // Per-candidate reliability = flow consistency at its landing point (picks the + // flow scale that is right for THIS pixel; coarse where fast, fine where slow). + vec2 e0 = texture(flowB, c0).xy * m0 - flB; + vec2 e1 = texture(flowB, c1).xy * m0 + flB; + vec2 e2 = texture(flowF, c2).xy * m0 - flF; + vec2 e3 = texture(flowF, c3).xy * m0 + flF; + const float SHARP = 0.5; + vec4 L = -SHARP * vec4(dot(e0,e0), dot(e1,e1), dot(e2,e2), dot(e3,e3)); float mx = max(max(L.x, L.y), max(L.z, L.w)); - vec4 w = exp(L - mx); - w /= dot(w, vec4(1.0)); - - if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(w.x, w.y, w.z, 1.0)); return; } - - float wb = 1.0 - t; - float wf = t; - - vec4 acc = texture(backColor, c0) * (wb * w.x) - + texture(fwdColor, c1) * (wf * w.y) - + texture(backColor, c2) * (wb * w.z) - + texture(fwdColor, c3) * (wf * w.w); - - float den = wb * (w.x + w.z) + wf * (w.y + w.w) + 1e-8; - vec3 col = (acc / den).rgb; + vec4 e = exp(L - mx); + float w0 = (1.0 - t) * e.x; + float w1 = t * e.y; + float w2 = (1.0 - t) * e.z; + float w3 = t * e.w; + vec3 col = (k0*w0 + k1*w1 + k2*w2 + k3*w3) / (w0 + w1 + w2 + w3 + 1e-6); vec3 cPrevFlat = texture(backColor, uvc).rgb; vec3 cCurrFlat = texture(fwdColor, uvc).rgb; diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index ade3e2166..d403e2568 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -732,14 +732,6 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, VkImageView out[1]={rdst}; cnn_gh_dispatch(r, cmd, P->gh_d9_pipe, P->gh_d9_pl, P->gh_d9_dsl, 0, in, 2, out, 1, rw, rh); } if (L != 0) cnn_to_read(cmd, rimg, 1); - - // delta9 = wnfg_53: trained occlusion-logits pyramid. b32/b33 = the delta8 - // ping-pong pair (hD8=D, hD7=C), b34 = seed (GT-harness-validated wiring). - cnn_to_write(cmd, C->logits[L].image, 1); - { VkImageView in[3]={C->hD8[L].layerView[0], C->hD7[L].layerView[0], seedView}; - VkImageView out[1]={C->logits[L].layerView[0]}; - cnn_gh_dispatch(r, cmd, P->gh_d10_pipe, P->gh_d10_pl, P->gh_d10_dsl, 0, in, 3, out, 1, w, h); } - cnn_to_read(cmd, C->logits[L].image, 1); } vkr_image_barrier(cmd, outFlow->image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, @@ -754,32 +746,31 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VkPipelineSet* P = &r->pipelines; uint32_t gw = C->gen[slot].w, gh = C->gen[slot].h; - vkr_image_barrier(cmd, C->logits[2].image, + vkr_image_barrier(cmd, r->fg_motion[parity].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->logits[1].image, + vkr_image_barrier(cmd, C->flowRef[2].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->logits[0].image, + vkr_image_barrier(cmd, C->flowRef[1].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); char mvs[16] = {0}; __system_property_get("debug.winnative.fgmvscale", mvs); - float scl = r->fg_built_flow_scale; - float mvScale = mvs[0] ? (float)atof(mvs) : (1.0f / (scl < 0.2f ? 0.2f : (scl > 1.0f ? 1.0f : scl))); + float mvScale = mvs[0] ? (float)atof(mvs) : 0.4f; cnn_to_write(cmd, C->gen[slot].image, 1); { VkDescriptorSet ds = cnn_alloc(r, P->cnn_generate_dsl); if (!ds) return; VkDescriptorImageInfo s32 = {r->fg_sampler, prevView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s33 = {r->fg_sampler, currView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s34 = {r->fg_sampler, C->logits[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s35 = {r->fg_sampler, C->logits[1].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s36 = {r->fg_sampler, C->logits[0].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s34 = {r->fg_sampler, C->flowRef[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s35 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s36 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo oi = {VK_NULL_HANDLE, C->gen[slot].view, VK_IMAGE_LAYOUT_GENERAL}; VkWriteDescriptorSet w[6] = { cnn_wimg(ds, 32, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s32), diff --git a/app/src/main/cpp/winlator/vk/vk_image.c b/app/src/main/cpp/winlator/vk/vk_image.c index 83d2d171a..3cfcf8f56 100644 --- a/app/src/main/cpp/winlator/vk/vk_image.c +++ b/app/src/main/cpp/winlator/vk/vk_image.c @@ -1178,7 +1178,12 @@ VkTexture* vkr_texture_import_ahb(VkRenderer* r, AHardwareBuffer* ahb, bool tran ic.mipLevels = 1; ic.arrayLayers = 1; ic.samples = VK_SAMPLE_COUNT_1_BIT; - ic.tiling = VK_IMAGE_TILING_OPTIMAL; + // A CPU-accessible AHB is laid out linearly; importing it as OPTIMAL makes the + // driver apply its tile swizzle to linear data (vertical-stripe corruption on + // Adreno 840+, where OPTIMAL is a real tiling — older Adreno's OPTIMAL was linear). + // Match the import tiling to the buffer's actual layout. + bool ahb_cpu_access = (desc.usage & 0xFFull) != 0ull; // CPU read(0xF)|write(0xF0) masks + ic.tiling = ahb_cpu_access ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; ic.usage = VK_IMAGE_USAGE_SAMPLED_BIT; ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; From 51048ef231f60b853cda69df99c3e4693938e048 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 18:21:30 -0400 Subject: [PATCH 39/46] Fix Adreno 840 stripe corruption: compositor matches the game's driver RE (workflow): MHS vertical-stripe corruption = the game writes its swapchain AHB via the guest WN-Turnip in Adreno-840 macro-tile order, but winnative's compositor read it via the system Qualcomm driver -> tile-order mismatch (coincided on Adreno 830). The compositor driver came from graphicsDriverConfig 'version' and fell back to 'System' when unset, ignoring the game's actual driver. Fix: when no explicit compositor version is set, match the GAME's driver (graphicsDriver) so writer==reader, per the existing 'match guest libvulkan' intent. Also revert the inert CPU-access LINEAR import toggle (dedicated AHB layout comes from gralloc metadata, not ic.tiling) and add an OPTIMAL->LINEAR vkCreateImage fallback so no driver can black-screen. Diagnostic: XServerDisplayActivity logs Compositor graphics driver='...'. UNVERIFIED on-device (phone disconnected). --- app/src/main/cpp/winlator/vk/vk_image.c | 21 ++++++++++++------- .../display/XServerDisplayActivity.java | 10 +++++++-- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_image.c b/app/src/main/cpp/winlator/vk/vk_image.c index 3cfcf8f56..48ff8cfbd 100644 --- a/app/src/main/cpp/winlator/vk/vk_image.c +++ b/app/src/main/cpp/winlator/vk/vk_image.c @@ -1178,18 +1178,23 @@ VkTexture* vkr_texture_import_ahb(VkRenderer* r, AHardwareBuffer* ahb, bool tran ic.mipLevels = 1; ic.arrayLayers = 1; ic.samples = VK_SAMPLE_COUNT_1_BIT; - // A CPU-accessible AHB is laid out linearly; importing it as OPTIMAL makes the - // driver apply its tile swizzle to linear data (vertical-stripe corruption on - // Adreno 840+, where OPTIMAL is a real tiling — older Adreno's OPTIMAL was linear). - // Match the import tiling to the buffer's actual layout. - bool ahb_cpu_access = (desc.usage & 0xFFull) != 0ull; // CPU read(0xF)|write(0xF0) masks - ic.tiling = ahb_cpu_access ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + // A dedicated AHB import derives its real layout from the buffer's gralloc metadata, + // not from ic.tiling, so this value is a formality. Use OPTIMAL (the producer's + // sampled-image default); fall back to LINEAR only if the driver rejects it, so a + // stricter driver can never black-screen the import. (The actual stripe fix is + // making the compositor use the same driver as the producer — see XServerDisplayActivity.) ic.usage = VK_IMAGE_USAGE_SAMPLED_BIT; ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - if (vkCreateImage(r->device, &ic, NULL, &t->image) != VK_SUCCESS) { - VK_LOGW("AHB vkCreateImage failed"); + ic.tiling = VK_IMAGE_TILING_OPTIMAL; + VkResult ahb_cr = vkCreateImage(r->device, &ic, NULL, &t->image); + if (ahb_cr != VK_SUCCESS) { + ic.tiling = VK_IMAGE_TILING_LINEAR; + ahb_cr = vkCreateImage(r->device, &ic, NULL, &t->image); + } + if (ahb_cr != VK_SUCCESS) { + VK_LOGW("AHB vkCreateImage failed (tried OPTIMAL then LINEAR)"); if (t->ahb) AHardwareBuffer_release(t->ahb); free(t); return NULL; diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index ed06c6022..91a2d76d7 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -6644,11 +6644,17 @@ private void setupUI() { FrameLayout rootView = xServerDisplayFrame; xServerView = new XServerSurfaceView(this, xServer); final VulkanRenderer renderer = xServerView.getRenderer(); - // Match guest libvulkan so imported AHB tiling matches the producer. + // Match guest libvulkan so imported AHB tiling matches the producer. When no + // explicit compositor-driver version is configured, fall back to the GAME's + // actual driver (not "System") so writer==reader: importing a Turnip-written + // tiled AHB with the Qualcomm system driver mismatches tile order and produces + // vertical-stripe corruption on Adreno 840 (was hidden on 830 where the layouts + // coincided). System-driver games still resolve to "System" via graphicsDriver. String compositorGraphicsDriver = graphicsDriverConfig != null ? graphicsDriverConfig.get("version") : null; if (compositorGraphicsDriver == null || compositorGraphicsDriver.isEmpty()) { - compositorGraphicsDriver = "System"; + compositorGraphicsDriver = (graphicsDriver != null && !graphicsDriver.isEmpty()) + ? graphicsDriver : "System"; } Log.i("XServerDisplayActivity", "Compositor graphics driver='" + compositorGraphicsDriver + "' from graphicsDriver='" + graphicsDriver + "'"); From 2f0f4cd75f2b9e4ac5d0da5aca616f61d51fc07e Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 20:23:11 -0400 Subject: [PATCH 40/46] Compositor uses System driver unless FG is on (restore pre-FG path) Reverts the unconditional guest-driver-matching for the compositor (a6acd95c) for FG-off games; on Adreno 840 reading the AHB via guest Turnip OR System both stripe, so this isn't the full fix but restores the pre-FG default. Regression hunt continues. --- .../display/XServerDisplayActivity.java | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index 91a2d76d7..9de8fce40 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -6644,17 +6644,20 @@ private void setupUI() { FrameLayout rootView = xServerDisplayFrame; xServerView = new XServerSurfaceView(this, xServer); final VulkanRenderer renderer = xServerView.getRenderer(); - // Match guest libvulkan so imported AHB tiling matches the producer. When no - // explicit compositor-driver version is configured, fall back to the GAME's - // actual driver (not "System") so writer==reader: importing a Turnip-written - // tiled AHB with the Qualcomm system driver mismatches tile order and produces - // vertical-stripe corruption on Adreno 840 (was hidden on 830 where the layouts - // coincided). System-driver games still resolve to "System" via graphicsDriver. - String compositorGraphicsDriver = - graphicsDriverConfig != null ? graphicsDriverConfig.get("version") : null; - if (compositorGraphicsDriver == null || compositorGraphicsDriver.isEmpty()) { - compositorGraphicsDriver = (graphicsDriver != null && !graphicsDriver.isEmpty()) - ? graphicsDriver : "System"; + // The compositor only needs the guest driver (Turnip) when native frame + // generation runs its optical-flow compute in the compositor process. For normal + // rendering use the System (Qualcomm) driver: it imports the game's presented AHB + // correctly on all GPUs, whereas the guest Turnip's dedicated-AHB import mis-reads + // the producer's tile layout on Adreno 840 -> vertical-stripe corruption. This is + // a regression from guest-matching the compositor for FG (a6acd95c); the same + // WN-Turnip rendered MHS fine on this device before that. FG-off games (the + // default) keep the pre-FG System path that always worked. + boolean fgWantsCompositorDriver = fgPrefBool("native_frame_generation", false); + String compositorGraphicsDriver = "System"; + if (fgWantsCompositorDriver) { + String cfgVer = graphicsDriverConfig != null ? graphicsDriverConfig.get("version") : null; + compositorGraphicsDriver = (cfgVer != null && !cfgVer.isEmpty()) ? cfgVer + : (graphicsDriver != null && !graphicsDriver.isEmpty() ? graphicsDriver : "System"); } Log.i("XServerDisplayActivity", "Compositor graphics driver='" + compositorGraphicsDriver + "' from graphicsDriver='" + graphicsDriver + "'"); From 4976bcf1c07d8cbb6b43e1b8b9708fbfe6b0edb7 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 20:36:25 -0400 Subject: [PATCH 41/46] Force BCn emulation off on Adreno GPUs (fixes a840 stripe corruption) User-confirmed: the libvulkan_wrapper BCn emulation (auto/full) patches the Adreno driver and corrupts the whole frame as vertical stripes on Adreno 840; setting bcnEmulation=none fixes it. Mesa Turnip decodes BC natively, so force none on Adreno automatically. Also revert the wrong-diagnosis COLOR_ATTACHMENT import change. --- app/src/main/cpp/winlator/vk/vk_image.c | 7 ++----- app/src/main/runtime/display/XServerDisplayActivity.java | 8 ++++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_image.c b/app/src/main/cpp/winlator/vk/vk_image.c index 48ff8cfbd..a3446efa6 100644 --- a/app/src/main/cpp/winlator/vk/vk_image.c +++ b/app/src/main/cpp/winlator/vk/vk_image.c @@ -1178,11 +1178,8 @@ VkTexture* vkr_texture_import_ahb(VkRenderer* r, AHardwareBuffer* ahb, bool tran ic.mipLevels = 1; ic.arrayLayers = 1; ic.samples = VK_SAMPLE_COUNT_1_BIT; - // A dedicated AHB import derives its real layout from the buffer's gralloc metadata, - // not from ic.tiling, so this value is a formality. Use OPTIMAL (the producer's - // sampled-image default); fall back to LINEAR only if the driver rejects it, so a - // stricter driver can never black-screen the import. (The actual stripe fix is - // making the compositor use the same driver as the producer — see XServerDisplayActivity.) + // Dedicated AHB imports derive layout from gralloc metadata; ic.tiling is a formality. + // Use OPTIMAL, falling back to LINEAR only if the driver rejects it (no black-screen). ic.usage = VK_IMAGE_USAGE_SAMPLED_BIT; ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index 9de8fce40..900932a6f 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -7313,6 +7313,14 @@ private void extractGraphicsDriverFiles() { String bcnEmulation = graphicsDriverConfig.get("bcnEmulation"); String bcnEmulationType = graphicsDriverConfig.get("bcnEmulationType"); + // The libvulkan_wrapper's BCn emulation patches the Adreno driver to advertise + // native BC support; that patch corrupts rendering on newer Adreno GPUs (Adreno + // 840 whole-frame vertical stripes). Mesa Turnip decodes BC itself, so on Adreno + // force "none" (no wrapper patch). Non-Adreno GPUs keep the configured value. + if (bcnEmulation == null || com.winlator.cmod.runtime.system.GPUInformation.isAdrenoGPU(this)) { + bcnEmulation = "none"; + } + switch (bcnEmulation) { case "auto" -> { if ("compute".equals(bcnEmulationType) && GPUInformation.getVendorID(null, null) != 20803) { From 29e7daede31f7a6ba64a334b0488fdfb6ca33233 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 20:55:12 -0400 Subject: [PATCH 42/46] FG: don't collapse interpolation during motion (UI-recomposite only when static) On a real game (MHS) camera spin, interp dropped 30->0/s: X11 damage events fire requestRenderCoalesced() -> fgSceneDirty every vblank during motion, and fgEmitOne's 'dirty && !newGame' branch presented sharp instead of running the interp cadence. Fix: defer the UI-only recomposite to the END and only take it when the content interval is fully spanned (static); mid-interval, run the interpolation cadence so motion is actually smoothed. Isolated via dumps/20260620_2047_mhs_spin_BEFORE_interp0. --- .../runtime/display/renderer/VulkanRenderer.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 03c715990..9ae7995b8 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -562,11 +562,11 @@ private int fgEmitOne() { if (newGame || dirty) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } return 0; } - if (dirty && !newGame) { - // Cursor/UI-only recomposite — show it sharply. - nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); - return 2; - } + // NOTE: a UI-only recomposite (dirty && !newGame) is handled at the END, only when + // the content interval is fully spanned (static). Handling it here unconditionally + // collapsed interpolation to zero during motion: X11 damage events fire a recomposite + // every vblank while the camera moves, so this branch would present sharp instead of + // running the interp cadence (interp 30->0/s during a camera spin). Defer it. // Real frame just promoted: show it sharp; the gate was restarted at 0 in the promote block. if (promoted) { @@ -587,7 +587,11 @@ private int fgEmitOne() { fgCadenceM = emits; fgVblankSincePromote++; int vi = fgVblankSincePromote; // vblanks since the real frame (1..slots-1) - if (vi >= slots) return 0; // interval fully spanned — hold for next promote + if (vi >= slots) { // interval fully spanned (content static) + // Now a UI-only recomposite (cursor) warrants a sharp redraw; otherwise hold. + if (dirty && !newGame) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } + return 0; // hold for the next promote + } // vblank 0 already showed the real frame; place the (emits-1) interps evenly across the rest. boolean emit = (int) ((long) vi * emits / slots) != (int) ((long) (vi - 1) * emits / slots); if (!emit) return 0; // between gates — hold the current frame From 768f34e0dea0928b5e0bc883d6b212e36d285fc5 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sat, 20 Jun 2026 21:20:33 -0400 Subject: [PATCH 43/46] FG dump: bump to 636x1386 for character-detail interp inspection --- app/src/main/cpp/winlator/vk/vk_renderer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 5ec998033..5fa7320c3 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -2686,8 +2686,8 @@ static double fg_sig_delta(VkRenderer* r, uint32_t a, uint32_t b) { } // --- Debug burst dump ----------------------------------------------------------------------------- -#define FG_DUMP_W 270u -#define FG_DUMP_H 594u +#define FG_DUMP_W 636u +#define FG_DUMP_H 1386u #define FG_DUMP_N 8u #define FG_DUMP_BUFS 10u // FG_DUMP_N gen + prev + curr From 8a7f9887ad8957925f83c98ebeab8ebcb3850867 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Sun, 21 Jun 2026 18:06:45 -0400 Subject: [PATCH 44/46] Frame-gen: occlusion-select generate on corrected bidirectional warp + frame-sequence dump --- .../cpp/winlator/vk/shaders/cnn_generate.comp | 11 +- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 22 +- app/src/main/cpp/winlator/vk/vk_image.c | 125 ++-------- app/src/main/cpp/winlator/vk/vk_renderer.c | 172 ++++--------- app/src/main/cpp/winlator/vk/vk_state.h | 231 ++++++++---------- .../display/XServerDisplayActivity.java | 17 +- .../display/renderer/VulkanRenderer.java | 39 --- 7 files changed, 198 insertions(+), 419 deletions(-) diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index 95a1ae803..9ddfe31c9 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -36,7 +36,6 @@ void main() { vec2 flB = texture(flowB, uvc).xy * m0; vec2 flF = texture(flowF, uvc).xy * m0; - // 4 candidates: prev forward + curr backward, for each flow scale. vec2 c0 = (numer + flB * a) / texSize; vec2 c1 = (numer - flB * b) / texSize; vec2 c2 = (numer + flF * a) / texSize; @@ -49,14 +48,8 @@ void main() { if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(abs(k2 - k3), 1.0)); return; } - // Per-candidate reliability = flow consistency at its landing point (picks the - // flow scale that is right for THIS pixel; coarse where fast, fine where slow). - vec2 e0 = texture(flowB, c0).xy * m0 - flB; - vec2 e1 = texture(flowB, c1).xy * m0 + flB; - vec2 e2 = texture(flowF, c2).xy * m0 - flF; - vec2 e3 = texture(flowF, c3).xy * m0 + flF; - const float SHARP = 0.5; - vec4 L = -SHARP * vec4(dot(e0,e0), dot(e1,e1), dot(e2,e2), dot(e3,e3)); + vec4 L = vec4(texture(logits, c0).x, texture(logits, c1).y, + texture(logits, c2).z, texture(logits, c3).w); float mx = max(max(L.x, L.y), max(L.z, L.w)); vec4 e = exp(L - mx); float w0 = (1.0 - t) * e.x; diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index d403e2568..65524cf9c 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -723,6 +723,14 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, cnn_gh_dispatch(r, cmd, P->gh_d8_pipe, P->gh_d8_pl, P->gh_d8_dsl, 0, in, 1, out, 1, w, h); } cnn_to_read(cmd, C->hD8[L].image, 1); + if (P->gh_d10_pipe) { + cnn_to_write(cmd, C->logits[L].image, 1); + { VkImageView in[3]={C->hD8[L].layerView[0], C->hD7[L].layerView[0], seedView}; + VkImageView out[1]={C->logits[L].view}; + cnn_gh_dispatch(r, cmd, P->gh_d10_pipe, P->gh_d10_pl, P->gh_d10_dsl, 0, in, 3, out, 1, w, h); } + cnn_to_read(cmd, C->logits[L].image, 1); + } + VkImageView rdst = (L == 0) ? outFlow->view : C->flowRef[L].view; VkImage rimg = (L == 0) ? outFlow->image : C->flowRef[L].image; uint32_t rw = (L == 0) ? outFlow->width : w; @@ -750,27 +758,27 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->flowRef[2].image, + vkr_image_barrier(cmd, r->fg_motion_fwd[parity].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkr_image_barrier(cmd, C->flowRef[1].image, + vkr_image_barrier(cmd, C->logits[0].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); - char mvs[16] = {0}; __system_property_get("debug.winnative.fgmvscale", mvs); - float mvScale = mvs[0] ? (float)atof(mvs) : 0.4f; + char m0s[16] = {0}; __system_property_get("debug.winnative.fgm0", m0s); + float m0 = m0s[0] ? (float)atof(m0s) : 0.25f; cnn_to_write(cmd, C->gen[slot].image, 1); { VkDescriptorSet ds = cnn_alloc(r, P->cnn_generate_dsl); if (!ds) return; VkDescriptorImageInfo s32 = {r->fg_sampler, prevView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s33 = {r->fg_sampler, currView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s34 = {r->fg_sampler, C->flowRef[2].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s34 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s35 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s36 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo s36 = {r->fg_sampler, C->logits[0].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo oi = {VK_NULL_HANDLE, C->gen[slot].view, VK_IMAGE_LAYOUT_GENERAL}; VkWriteDescriptorSet w[6] = { cnn_wimg(ds, 32, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s32), @@ -781,7 +789,7 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari cnn_wimg(ds, 48, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &oi), }; vkUpdateDescriptorSets(r->device, 6, w, 0, NULL); - CnnPC pc = {0}; pc.sx = (int32_t)gw; pc.sy = (int32_t)gh; pc.t = t; pc.mvScale = mvScale; + CnnPC pc = {0}; pc.sx = (int32_t)gw; pc.sy = (int32_t)gh; pc.t = t; pc.mvScale = m0; vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_generate_pipe); vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_generate_pl, 0, 1, &ds, 0, NULL); vkCmdPushConstants(cmd, P->cnn_generate_pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, 32, &pc); diff --git a/app/src/main/cpp/winlator/vk/vk_image.c b/app/src/main/cpp/winlator/vk/vk_image.c index a3446efa6..a307babfd 100644 --- a/app/src/main/cpp/winlator/vk/vk_image.c +++ b/app/src/main/cpp/winlator/vk/vk_image.c @@ -1,16 +1,4 @@ // VkTexture allocation, upload, AHB import. -// -// Two creation paths: -// 1. CPU-uploaded: caller hands us BGRA pixel data; we allocate VkImage in DEVICE_LOCAL memory, -// stage the upload through a host-visible buffer, and transition to SHADER_READ_OPTIMAL. -// 2. AHardwareBuffer import: caller hands us an AHB; we allocate dedicated memory backed by the -// AHB (no copy) and bind it to a VkImage. For non-RGB formats (DRI3 vendor formats), we use -// a Ycbcr conversion so the sampler can read them. -// -// Texture lifetimes: -// - Created/updated synchronously on caller's thread (Java/render). -// - Submits go through vkQueueSubmit which is serialized via VkRenderer::queue_mutex. -// - Destruction is deferred via the graveyard so in-flight frames don't see freed handles. #include "vk_state.h" #include @@ -51,15 +39,6 @@ void vkr_image_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout from, V // ============================================================ // Staging pool — async upload infrastructure // ============================================================ -// -// Each slot owns a VkBuffer, persistently-mapped HOST_VISIBLE memory, a VkCommandPool with -// one VkCommandBuffer, and a VkFence. Round-robin acquisition under a tiny mutex; per-slot -// mutex provides exclusive ownership for the lifetime of an upload (acquire→submit→release). -// -// On a single graphics queue, the upload's terminal pipeline barrier (TRANSFER_WRITE → -// SHADER_READ, dstStage=FRAGMENT_SHADER) extends into all subsequent submits per Vulkan -// spec — so the renderer needs no extra synchronization to safely sample a freshly-updated -// texture as long as the upload was submitted before the render. bool vkr_staging_pool_init(VkRenderer* r) { if (r->staging_pool.initialized) return true; @@ -74,7 +53,7 @@ bool vkr_staging_pool_init(VkRenderer* r) { for (uint32_t i = 0; i < VK_STAGING_POOL_SIZE; i++) { VkStagingSlot* s = &r->staging_pool.slots[i]; pthread_mutex_init(&s->mutex, NULL); - r->staging_pool.valid_slots = i + 1; // mutex is now valid; destroy must clean it up + r->staging_pool.valid_slots = i + 1; VkCommandPoolCreateInfo cpci = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; cpci.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; @@ -97,15 +76,13 @@ bool vkr_staging_pool_init(VkRenderer* r) { VK_LOGE("staging pool: vkCreateFence slot %u failed", i); return false; } - // buffer/memory allocated lazily on first use, sized to the actual upload. + // buffer/memory allocated lazily on first use } r->staging_pool.initialized = true; return true; } void vkr_staging_pool_destroy(VkRenderer* r) { - // Tolerates partially-initialized pools — only iterate the slots whose mutexes were - // successfully initialized. for (uint32_t i = 0; i < r->staging_pool.valid_slots; i++) { VkStagingSlot* s = &r->staging_pool.slots[i]; if (s->fence) { @@ -128,14 +105,11 @@ void vkr_staging_pool_destroy(VkRenderer* r) { // Re-allocate a slot's staging buffer to at least `needed` bytes. Caller must own the slot. static bool grow_staging_slot(VkRenderer* r, VkStagingSlot* s, VkDeviceSize needed) { - // Round up to 64 KiB so consecutive size bumps don't trigger reallocs. VkDeviceSize new_size = (needed + 65535ull) & ~(VkDeviceSize)65535ull; if (s->mapped && s->memory) { vkUnmapMemory(r->device, s->memory); s->mapped = NULL; } if (s->buffer) { vkDestroyBuffer(r->device, s->buffer, NULL); s->buffer = VK_NULL_HANDLE; } if (s->memory) { vkFreeMemory(r->device, s->memory, NULL); s->memory = VK_NULL_HANDLE; } - // Reset size now so a later allocation failure leaves the slot in a state where the next - // acquire will retry grow_staging_slot rather than skip it and hand back a NULL buffer. s->size = 0; VkBufferCreateInfo bi = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; @@ -149,12 +123,6 @@ static bool grow_staging_slot(VkRenderer* r, VkStagingSlot* s, VkDeviceSize need VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; ai.allocationSize = mr.size; - // Require HOST_VISIBLE | HOST_COHERENT (typically write-combined on Adreno). Skipping - // HOST_CACHED avoids polluting CPU caches with write-once-then-GPU-read staging, which - // hurts throughput by 5-20% on Adreno. We do not fall back to non-coherent memory: - // vkr_texture_update submits without vkFlushMappedMemoryRanges, so non-coherent staging - // would render undefined data. Vulkan spec §11.6 mandates that every device expose at - // least one HOST_VISIBLE | HOST_COHERENT memory type, so this lookup cannot legally fail. ai.memoryTypeIndex = vkr_find_memory_type(r, mr.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); if (ai.memoryTypeIndex == UINT32_MAX) { @@ -184,15 +152,9 @@ VkStagingSlot* vkr_staging_pool_acquire(VkRenderer* r, VkDeviceSize needed) { VkStagingSlot* s = &r->staging_pool.slots[idx]; - // Per-slot lock guards the slot's resources (buffer/cmd/fence) until release. Round-robin - // means contention only happens once VK_STAGING_POOL_SIZE acquires have wrapped — i.e. - // when the producer is consistently faster than the GPU can drain uploads. pthread_mutex_lock(&s->mutex); - // Wait for the slot's previous submission to retire. With pool_size=8 this almost never - // blocks because the fence signaled long ago. The fence is left signaled here on purpose - // — it gets reset right before vkQueueSubmit, so any no-submit failure path between here - // and submit leaves the fence safely signaled and the slot reusable. + // Wait for the slot's previous submission to retire. vkWaitForFences(r->device, 1, &s->fence, VK_TRUE, UINT64_MAX); vkResetCommandPool(r->device, s->cmd_pool, 0); @@ -247,7 +209,6 @@ void vkr_run_one_shot_cmd(VkRenderer* r, void (*fn)(VkCommandBuffer, void*), voi VkDescriptorSet vkr_alloc_descriptor_set(VkRenderer* r); void vkr_free_descriptor_set(VkRenderer* r, VkDescriptorSet set); -// Image sub-allocator — implemented lower in this file. static bool vkr_suballoc_image(VkRenderer* r, VkImage image, VkSuballoc* out); static void vkr_suballoc_free(VkRenderer* r, VkSuballoc* a); @@ -293,8 +254,7 @@ bool vkr_submit_async_transition(VkRenderer* r, VkImage image, VkImageLayout from, VkImageLayout to, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_access, VkAccessFlags dst_access) { - // Reuse the staging pool's per-slot command pool/buffer/fence for this transition. We - // pass needed=0 so the slot's staging buffer isn't grown (we only use the cmd buffer). + // needed=0 reuses the slot's cmd buffer without growing its staging buffer. VkStagingSlot* slot = vkr_staging_pool_acquire(r, 0); if (!slot) { VK_LOGE("vkr_submit_async_transition: staging slot acquire failed"); @@ -324,8 +284,7 @@ bool vkr_submit_async_transition(VkRenderer* r, VkImage image, pthread_mutex_unlock(&r->queue_mutex); if (sr != VK_SUCCESS) { VK_LOGE("vkr_submit_async_transition: vkQueueSubmit -> %d", sr); - // Restore a signaled fence so the slot is reusable. (Same recovery path as - // vkr_texture_update.) + // Restore a signaled fence so the slot is reusable. vkDestroyFence(r->device, slot->fence, NULL); VkFenceCreateInfo rfi = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; rfi.flags = VK_FENCE_CREATE_SIGNALED_BIT; @@ -436,10 +395,6 @@ static VkTexture* pop_live_texture(VkRenderer* r) { // ---------------------------------------------------------------------- // Image sub-allocator // ---------------------------------------------------------------------- -// -// First-fit over a list of large DEVICE_LOCAL blocks, all under image_suballoc.mutex (alloc on -// producer threads, free on the render thread). Region nodes are malloc'd only on new-block -// creation or a non-coalescing free, so steady-state pixmap churn doesn't touch the C heap. void vkr_suballoc_init(VkRenderer* r) { VkImageSuballocator* sa = &r->image_suballoc; @@ -453,9 +408,7 @@ static VkDeviceSize suballoc_align_up(VkDeviceSize v, VkDeviceSize a) { return (v + a - 1) & ~(a - 1); } -// Carve [reg->offset .. bind+size) out of free region `reg` (`prev` = its free-list -// predecessor, or NULL). The leading alignment pad folds into the span so free recovers it -// verbatim. Caller verified the region fits. +// Carve [reg->offset .. bind+size) out of free region `reg`. Caller verified the region fits. static void suballoc_carve(VkMemBlock* block, VkMemRegion* prev, VkMemRegion* reg, VkDeviceSize bind, VkDeviceSize size, VkSuballoc* out) { VkDeviceSize span_offset = reg->offset; @@ -463,10 +416,10 @@ static void suballoc_carve(VkMemBlock* block, VkMemRegion* prev, VkMemRegion* re VkDeviceSize reg_end = reg->offset + reg->size; if (span_end < reg_end) { - reg->offset = span_end; // shrink region to the trailing remainder + reg->offset = span_end; // shrink to the trailing remainder reg->size = reg_end - span_end; } else { - if (prev) prev->next = reg->next; // region fully consumed — unlink + free + if (prev) prev->next = reg->next; // region fully consumed else block->free_list = reg->next; free(reg); } @@ -478,9 +431,8 @@ static void suballoc_carve(VkMemBlock* block, VkMemRegion* prev, VkMemRegion* re out->span_size = span_end - span_offset; } -// Reserve a span sized/aligned for `image` into `out`. Does NOT bind — caller issues the one -// vkBindImageMemory so a bind failure never forces an illegal rebind. False (nothing reserved) -// if no DEVICE_LOCAL type fits or a new block can't be allocated. +// Reserve a span sized/aligned for `image` into `out`. Does NOT bind — caller binds. +// False if no DEVICE_LOCAL type fits or a new block can't be allocated. static bool vkr_suballoc_image(VkRenderer* r, VkImage image, VkSuballoc* out) { VkMemoryRequirements mr; vkGetImageMemoryRequirements(r->device, image, &mr); @@ -583,14 +535,13 @@ static void vkr_suballoc_free(VkRenderer* r, VkSuballoc* a) { if (prev) prev->next = node; else b->free_list = node; } else { - // Node alloc failed (near-impossible): leak the span; the block is reclaimed at - // teardown anyway. + // Node alloc failed: leak the span; the block is reclaimed at teardown. VK_LOGE("suballoc free: region node alloc failed; leaking %llu bytes", (unsigned long long)sz); } } - // Return a fully-drained block so churn doesn't pin memory forever. + // Return a fully-drained block to the device. if (b->free_list && b->free_list->next == NULL && b->free_list->offset == 0 && b->free_list->size == b->size) { VkMemBlock* pb = NULL; @@ -700,7 +651,7 @@ static bool create_image_basic(VkRenderer* r, uint32_t w, uint32_t h, VkFormat f if (vkCreateImage(r->device, &ic, NULL, &t->image) != VK_SUCCESS) return false; - // Preferred path: pooled span (no per-texture vkAllocateMemory). Bind once here. + // Preferred path: pooled span. VkSuballoc sub = {0}; if (vkr_suballoc_image(r, t->image, &sub)) { if (vkBindImageMemory(r->device, t->image, sub.memory, sub.bind_offset) == VK_SUCCESS) { @@ -708,15 +659,13 @@ static bool create_image_basic(VkRenderer* r, uint32_t w, uint32_t h, VkFormat f t->suballocated = true; return true; } - // Bind attempted -> image can't be rebound via the dedicated path; fail (OOM-grade, - // effectively never happens). vkr_suballoc_free(r, &sub); vkDestroyImage(r->device, t->image, NULL); t->image = VK_NULL_HANDLE; return false; } - // Fallback: dedicated allocation (pool OOM / no DEVICE_LOCAL type). No bind attempted yet. + // Fallback: dedicated allocation. VkMemoryRequirements mr; vkGetImageMemoryRequirements(r->device, t->image, &mr); @@ -770,8 +719,7 @@ VkTexture* vkr_texture_create_uploaded(VkRenderer* r, uint32_t width, uint32_t h return NULL; } - // CPU-uploaded textures all want the same sampler config, so use the renderer's shared - // sampler. tex->sampler stays VK_NULL_HANDLE; destroy_texture_resources skips it. + // Use the renderer's shared sampler; tex->sampler stays VK_NULL_HANDLE. if (r->shared_sampler == VK_NULL_HANDLE) { VK_LOGE("vkr_texture_create_uploaded: shared_sampler not initialized"); destroy_texture_resources(r, t); @@ -789,9 +737,7 @@ VkTexture* vkr_texture_create_uploaded(VkRenderer* r, uint32_t width, uint32_t h vkr_texture_update(r, t, width, height, data, data_size, stride_pixels, 0, 0, width, height); } else { - // No initial data — async transition to SHADER_READ so the texture is safe to sample - // as black. Doesn't block the caller; the barrier orders before the next render submit - // on the same queue per Vulkan spec. + // No initial data — async transition to SHADER_READ so the texture samples as black. if (!vkr_submit_async_transition(r, t->image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, @@ -814,13 +760,11 @@ bool vkr_texture_update(VkRenderer* r, VkTexture* tex, uint32_t width, uint32_t uint32_t dirty_w, uint32_t dirty_h) { if (!tex || tex->external || !data || data_size == 0) return false; if (width != tex->width || height != tex->height) { - // Caller is expected to size-match. Reject mismatches to avoid silent corruption. VK_LOGW("vkr_texture_update size mismatch (have %ux%u, got %ux%u)", tex->width, tex->height, width, height); return false; } - // BGRA8 = 4 bytes per pixel. Caller provides stride_pixels (per-row pixel count). if (stride_pixels == 0) stride_pixels = width; if (dirty_w == 0 || dirty_h == 0) { dirty_x = 0; @@ -885,8 +829,7 @@ bool vkr_texture_update(VkRenderer* r, VkTexture* tex, uint32_t width, uint32_t si.commandBufferCount = 1; si.pCommandBuffers = &slot->cmd; - // Reset fence here, not in acquire — guarantees that the only path that leaves a fence - // unsignaled is one where vkQueueSubmit also runs to take ownership of it. + // Reset fence here, not in acquire, so only a path that also submits leaves it unsignaled. vkResetFences(r->device, 1, &slot->fence); pthread_mutex_lock(&r->queue_mutex); @@ -894,9 +837,7 @@ bool vkr_texture_update(VkRenderer* r, VkTexture* tex, uint32_t width, uint32_t pthread_mutex_unlock(&r->queue_mutex); if (sr != VK_SUCCESS) { VK_LOGE("vkr_texture_update: vkQueueSubmit -> %d", sr); - // Submit failed but we already reset the fence, so it's unsignaled and would deadlock - // the next acquire. Replace with a signaled fence. (Submit failures usually mean - // device-lost; the renderer is going to need a restart anyway.) + // Replace with a signaled fence so the next acquire doesn't deadlock. vkDestroyFence(r->device, slot->fence, NULL); VkFenceCreateInfo rfi = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; rfi.flags = VK_FENCE_CREATE_SIGNALED_BIT; @@ -905,9 +846,6 @@ bool vkr_texture_update(VkRenderer* r, VkTexture* tex, uint32_t width, uint32_t return false; } - // The barrier emitted by upload_cmds (TRANSFER_WRITE → SHADER_READ, dstStage= - // FRAGMENT_SHADER) extends into all subsequent submits on the same queue, so the next - // render submit will observe the writes without any additional renderer-side barrier. tex->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; vkr_staging_pool_release(slot); return true; @@ -953,8 +891,7 @@ static void batch_transition_to_shader_read(VkCommandBuffer cmd, VkTexture* tex) VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); } -// Grow-only PreparedBatchUpload[] scratch. Render-thread-only, so unlocked; every element is -// fully overwritten before use, so no zeroing. +// Grow-only PreparedBatchUpload[] scratch (render-thread-only). static PreparedBatchUpload* get_prepared_scratch(VkRenderer* r, uint32_t count) { if (r->batch_prepared_cap < count) { uint32_t new_cap = r->batch_prepared_cap ? r->batch_prepared_cap : 64; @@ -1153,11 +1090,7 @@ VkTexture* vkr_texture_import_ahb(VkRenderer* r, AHardwareBuffer* ahb, bool tran t->ahb = transfer_ownership ? ahb : NULL; if (transfer_ownership) AHardwareBuffer_acquire(ahb); - // External-format AHB sampling requires a YCbCr conversion bound through an immutable - // sampler in the descriptor-set layout. This renderer uses one mutable combined - // image/sampler layout for all regular textures, so accepting external-format AHBs here - // would be Vulkan-invalid on strict drivers. Keep the import path to RGB formats until a - // separate immutable-sampler pipeline/layout path exists. + // External-format AHBs need an immutable-sampler layout this renderer lacks; RGB only. if (format_props.format == VK_FORMAT_UNDEFINED) { VK_LOGW("AHB external-format import unsupported by current descriptor layout"); if (t->ahb) AHardwareBuffer_release(t->ahb); @@ -1178,8 +1111,7 @@ VkTexture* vkr_texture_import_ahb(VkRenderer* r, AHardwareBuffer* ahb, bool tran ic.mipLevels = 1; ic.arrayLayers = 1; ic.samples = VK_SAMPLE_COUNT_1_BIT; - // Dedicated AHB imports derive layout from gralloc metadata; ic.tiling is a formality. - // Use OPTIMAL, falling back to LINEAR only if the driver rejects it (no black-screen). + // OPTIMAL, falling back to LINEAR only if the driver rejects it. ic.usage = VK_IMAGE_USAGE_SAMPLED_BIT; ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -1237,12 +1169,10 @@ VkTexture* vkr_texture_import_ahb(VkRenderer* r, AHardwareBuffer* ahb, bool tran vi.image = t->image; vi.viewType = VK_IMAGE_VIEW_TYPE_2D; vi.format = format_props.format; - // samplerYcbcrConversionComponents is only defined when a Ycbcr conversion is in use; - // some non-Adreno drivers populate non-identity swizzles for RGB AHBs. if (t->ycbcr != VK_NULL_HANDLE) { vi.components = format_props.samplerYcbcrConversionComponents; } else { - // fixes devices that supports vulkan bgra8 format, but doesn't support bgra8 ahb images + // swizzle for devices that support vulkan bgra8 but not bgra8 AHB images bool swizzle_rb = format_props.format == VK_FORMAT_R8G8B8A8_UNORM && r->caps.upload_format == VK_FORMAT_B8G8R8A8_UNORM; vi.components.r = swizzle_rb ? VK_COMPONENT_SWIZZLE_B : VK_COMPONENT_SWIZZLE_IDENTITY; @@ -1264,8 +1194,7 @@ VkTexture* vkr_texture_import_ahb(VkRenderer* r, AHardwareBuffer* ahb, bool tran return NULL; } - // Ycbcr-bound samplers must be created per-texture (driver pairs them with the conversion). - // For plain RGB AHB imports we can reuse the renderer's shared sampler. + // Ycbcr-bound samplers are per-texture; plain RGB imports reuse the shared sampler. VkSampler sampler_for_descriptor; if (t->ycbcr != VK_NULL_HANDLE) { if (!vkr_create_sampler(r, t->ycbcr, &t->sampler)) { @@ -1304,9 +1233,7 @@ VkTexture* vkr_texture_import_ahb(VkRenderer* r, AHardwareBuffer* ahb, bool tran } write_descriptor_set(r, t->descriptor_set, t->view, sampler_for_descriptor); - // Async transition to SHADER_READ. The barrier orders before all subsequent submits on - // the same queue per Vulkan spec, so the next render submit safely samples this image - // without an additional renderer-side wait. + // Async transition to SHADER_READ. if (!vkr_submit_async_transition(r, t->image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, @@ -1354,7 +1281,7 @@ void vkr_texture_schedule_destroy(VkRenderer* r, VkTexture* tex) { } tex->destroy_scheduled = true; - // Defensive: drop any references in the live scene state. + // Drop any references in the live scene state. for (uint32_t i = 0; i < r->scene.window_count; i++) { if (r->scene.windows[i].texture == tex) { r->scene.windows[i].texture = NULL; @@ -1370,7 +1297,7 @@ void vkr_texture_schedule_destroy(VkRenderer* r, VkTexture* tex) { VkTexture** ng = realloc(slot->textures, new_cap * sizeof(VkTexture*)); if (!ng) { pthread_mutex_unlock(&r->scene_mutex); - // As a last resort, leak rather than crash. Better than UAF. + // Leak rather than risk a use-after-free. VK_LOGE("graveyard alloc failed; leaking texture %p", (void*)tex); return; } diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 5fa7320c3..2f5b5ea98 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -487,7 +487,6 @@ static bool create_device(VkRenderer* r) { } VK_LOGI("Frame generation fp16 support: ext=%d feature=%d", has_f16, r->fg_float16_supported); - // VK_NV_optical_flow: driver-accelerated motion estimation, the cheap flow path. r->fg_optical_flow = false; VkPhysicalDeviceOpticalFlowFeaturesNV of_feat = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV }; if (has_optical_flow && has_sync2 && has_fmt_feat2 && enable_n + 3 <= 24) { @@ -500,10 +499,6 @@ static bool create_device(VkRenderer* r) { vkGetInstanceProcAddr(r->instance, "vkGetPhysicalDeviceFeatures2KHR"); if (fnFeat2) { fnFeat2(r->physical_device, &feats2); r->fg_optical_flow = (ofq.opticalFlow == VK_TRUE); } } - // Note: on Adreno 8xx (Turnip chip8) the OF compute impl exists but the extension is chip-gated off, - // so it never advertises. Enabling it anyway is rejected (VK_ERROR_EXTENSION_NOT_PRESENT) and calling - // the entry points without it enabled null-derefs — confirmed on-device. Only a driver patch that - // advertises the extension can unlock it here; the classical flow is used until then. if (r->fg_optical_flow) { enable[enable_n++] = VK_NV_OPTICAL_FLOW_EXTENSION_NAME; enable[enable_n++] = VK_KHR_FORMAT_FEATURE_FLAGS_2_EXTENSION_NAME; @@ -553,17 +548,12 @@ static bool create_device(VkRenderer* r) { r->fnOFDestroy = (PFN_vkDestroyOpticalFlowSessionNV) vkGetDeviceProcAddr(r->device, "vkDestroyOpticalFlowSessionNV"); r->fnOFBind = (PFN_vkBindOpticalFlowSessionImageNV) vkGetDeviceProcAddr(r->device, "vkBindOpticalFlowSessionImageNV"); r->fnOFExecute = (PFN_vkCmdOpticalFlowExecuteNV) vkGetDeviceProcAddr(r->device, "vkCmdOpticalFlowExecuteNV"); - // fnOFFormats is optional (the format enumerator is chip-gated off on Adreno 8xx); the session/ - // bind/execute entry points are what we actually need. if (!r->fnOFCreate || !r->fnOFDestroy || !r->fnOFBind || !r->fnOFExecute) { VK_LOGW("optical flow entry points missing; disabling OF flow"); r->fg_optical_flow = false; } } - // Optical-flow session probe: the extension was enabled at vkCreateDevice (advertised, or forced - // on Turnip), so the OF device state is initialized. Create a real session to confirm the chip8 - // compute path actually works (vs the earlier null-deref when called without the extension enabled). if (r->fg_optical_flow && r->fnOFCreate && r->fnOFDestroy) { VkOpticalFlowSessionCreateInfoNV sci = { VK_STRUCTURE_TYPE_OPTICAL_FLOW_SESSION_CREATE_INFO_NV }; sci.width = 960; sci.height = 540; @@ -922,8 +912,6 @@ static bool create_pipeline_layouts(VkRenderer* r) { return false; } - // Window/cursor: push constants = float xform[6] + vec2 viewSize + vec4 uvRect - // + int swapRB = 52 bytes VkPushConstantRange pcr_window = {0}; pcr_window.stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; pcr_window.offset = 0; @@ -938,8 +926,6 @@ static bool create_pipeline_layouts(VkRenderer* r) { return false; } - // Effect: push constants = vec2 resolution + 4 floats (sat, contrast, sharp, mode) = 24 bytes. - // Other effect shaders only declare the first 16 bytes and ignore the rest. VkPushConstantRange pcr_effect = {0}; pcr_effect.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; pcr_effect.offset = 0; @@ -1145,7 +1131,6 @@ static bool create_pipelines(VkRenderer* r) { VkShaderModule fs_colorblind = load_shader_module(r, effect_colorblind_frag, effect_colorblind_frag_size); VkShaderModule fs_pixelate = load_shader_module(r, effect_pixelate_frag, effect_pixelate_frag_size); VkShaderModule fs_sgsr1 = load_shader_module(r, sgsr1_frag, sgsr1_frag_size); - // Frame generation: pick the fp16 or fp32 motion shader by device support. VkShaderModule cs_motion = r->fg_float16_supported ? load_shader_module(r, motion_comp, motion_comp_size) : load_shader_module(r, motion_fp32_comp, motion_fp32_comp_size); @@ -1262,8 +1247,6 @@ static bool create_pipelines(VkRenderer* r) { r, vs_quad, fs_pixelate, r->pipelines.effect_layout, r->pipelines.offscreen_pass, false, false, NULL); - // Frame generation: compute motion estimation + fullscreen-triangle interpolation (no vertex - // input, no blend — opaque full-screen write) onto the swapchain. r->pipelines.fg_motion_pipeline = create_compute_pipeline( r, cs_motion, r->pipelines.fg_motion_pipe_layout); r->pipelines.fg_interp_pipeline = create_graphics_pipeline( @@ -1448,7 +1431,6 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa if (have_want) { present_mode = want; } else if (want == VK_PRESENT_MODE_MAILBOX_KHR && have_immediate) { - // MAILBOX is often unsupported on Adreno/Mali; IMMEDIATE is also non-blocking, which FG needs. present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; VK_LOGW("MAILBOX unavailable; using IMMEDIATE for off-vsync present"); } else { @@ -1493,8 +1475,6 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa r->surface_extent = surface_extent; r->swapchain_extent = extent; r->swapchain_transform = pre_transform; - // Only possible for unsupported mirrored transforms; avoid an Adreno present loop - // while still letting normal rotation changes recreate the swapchain. r->ignore_suboptimal = r->caps.is_adreno && (pre_transform != caps.currentTransform); VK_LOGI("Swapchain surface=%ux%u extent=%ux%u currentTransform=0x%x preTransform=0x%x mode=%d", surface_extent.width, surface_extent.height, extent.width, extent.height, @@ -1502,7 +1482,7 @@ static bool create_swapchain(VkRenderer* r, uint32_t fallback_width, uint32_t fa uint32_t image_count = caps.minImageCount + 1; if (image_count < VK_FRAMES_IN_FLIGHT + 1u) image_count = VK_FRAMES_IN_FLIGHT + 1u; - if (present_mode != VK_PRESENT_MODE_FIFO_KHR && image_count < 5) image_count = 5; // generate-ahead holds one extra image in flight + if (present_mode != VK_PRESENT_MODE_FIFO_KHR && image_count < 5) image_count = 5; if (caps.maxImageCount > 0 && image_count > caps.maxImageCount) image_count = caps.maxImageCount; if (image_count > VK_MAX_SWAPCHAIN_IMAGES) image_count = VK_MAX_SWAPCHAIN_IMAGES; @@ -2352,8 +2332,6 @@ static bool record_and_submit_frame(VkRenderer* r) { pthread_mutex_unlock(&r->queue_mutex); if (sr != VK_SUCCESS) { VK_LOGE("vkQueueSubmit -> %d", sr); - // The frame fence was reset before submit. If submit fails, nothing will ever signal - // it, so restore a signaled fence before returning or the next frame can block forever. vkDestroyFence(r->device, f->in_flight, NULL); VkFenceCreateInfo rfi = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; rfi.flags = VK_FENCE_CREATE_SIGNALED_BIT; @@ -2399,8 +2377,6 @@ static bool record_and_submit_frame(VkRenderer* r) { // Frame generation resources + submit // ============================================================ -// Descriptor sets for the FG compute/interp layouts are allocated directly (not via the -// texture free-list, which assumes sampler_set_layout). The pool is externally synchronized. static VkDescriptorSet fg_alloc_set(VkRenderer* r, VkDescriptorSetLayout layout) { VkDescriptorSetAllocateInfo ai = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; ai.descriptorPool = r->descriptor_pool; @@ -2552,8 +2528,6 @@ static bool fg_create_motion(VkRenderer* r, VkFgImage* o, uint32_t w, uint32_t h return true; } -// Coarse-to-fine motion: a plain low-res search (coarse, upscale=0) seeds a fine pass (upscale=2) -// that warps prev by the upsampled coarse flow as it loads its tile and resolves only the residual. static void fg_motion_pass(VkRenderer* r, VkCommandBuffer cmd, VkDescriptorSet coarseSet, VkFgImage* coarseImg, VkDescriptorSet fineSet, VkFgImage* fineImg, float minStep) { @@ -2633,12 +2607,8 @@ static void fg_destroy_sig(VkRenderer* r) { r->fg_stage_slot = -1; } -// Record a downsample of history[slot] into fg_sig_buf[slot] (blit -> tiny image -> host buffer). -// history[slot] is in SHADER_READ_ONLY_OPTIMAL on entry and is restored to it on exit. static void fg_record_sig(VkRenderer* r, VkCommandBuffer cmd, uint32_t slot) { if (!r->fg_sig_supported) return; - // history[slot] was just written by the offscreen composite (render-pass final layout - // SHADER_READ_ONLY_OPTIMAL); wait on those colour writes before the transfer read. vkr_image_barrier(cmd, r->fg_history[slot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, @@ -2680,16 +2650,17 @@ static double fg_sig_delta(VkRenderer* r, uint32_t a, uint32_t b) { int dg = abs((int)pa[i*4+1] - (int)pb[i*4+1]); int db = abs((int)pa[i*4+2] - (int)pb[i*4+2]); int m = dr > dg ? dr : dg; if (db > m) m = db; - if (m > 2) changed++; // noise floor: reject only the lightest dither/noise; subtle motion (3-4/ch) now counts as distinct so it isn't held + if (m > 2) changed++; } - return (double)changed; // 0 == identical re-present; >0 == distinct content frame + return (double)changed; } // --- Debug burst dump ----------------------------------------------------------------------------- #define FG_DUMP_W 636u #define FG_DUMP_H 1386u -#define FG_DUMP_N 8u -#define FG_DUMP_BUFS 10u // FG_DUMP_N gen + prev + curr +#define FG_DUMP_N 10u +#define FG_DUMP_BUFS 10u +static uint32_t s_fgseq_last_curr = 0xFFFFFFFFu; static bool fg_create_dump(VkRenderer* r) { r->fg_dump_supported = false; @@ -2740,19 +2711,31 @@ static void fg_destroy_dump(VkRenderer* r) { } // Blit srcImg (full res, given layout) -> fg_dump_img (480x270) -> fg_dump_buf[bufIdx]. Restores srcImg. -static void fg_record_dump(VkRenderer* r, VkCommandBuffer cmd, VkImage srcImg, VkImageLayout srcLayout, uint32_t bufIdx) { +static void fg_record_dump(VkRenderer* r, VkCommandBuffer cmd, VkImage srcImg, VkImageLayout srcLayout, uint32_t srcW, uint32_t srcH, uint32_t bufIdx, int rawF16) { if (!r->fg_dump_supported || bufIdx >= FG_DUMP_BUFS) return; vkr_image_barrier(cmd, srcImg, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + if (rawF16) { + uint32_t cw = srcW < 512u ? srcW : 512u, chh = srcH < 512u ? srcH : 512u; + VkBufferImageCopy rcp = {0}; + rcp.imageSubresource = (VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + rcp.imageExtent = (VkExtent3D){cw, chh, 1}; + vkCmdCopyImageToBuffer(cmd, srcImg, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, r->fg_dump_buf[bufIdx], 1, &rcp); + vkr_image_barrier(cmd, srcImg, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); + return; + } vkr_image_barrier(cmd, r->fg_dump_img, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, VK_ACCESS_TRANSFER_WRITE_BIT); VkImageBlit blit = {0}; blit.srcSubresource = (VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - blit.srcOffsets[1] = (VkOffset3D){(int32_t)r->fg_dims.width, (int32_t)r->fg_dims.height, 1}; + blit.srcOffsets[1] = (VkOffset3D){(int32_t)srcW, (int32_t)srcH, 1}; blit.dstSubresource = (VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; blit.dstOffsets[1] = (VkOffset3D){(int32_t)FG_DUMP_W, (int32_t)FG_DUMP_H, 1}; vkCmdBlitImage(cmd, srcImg, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, @@ -2858,11 +2841,9 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { r->fg_interp_set_deep[p] = fg_alloc_set(r, r->pipelines.fg_interp_layout); if (!r->fg_motion_set[p] || !r->fg_motion_set_fwd[p] || !r->fg_coarse_set[p] || !r->fg_coarse_set_fwd[p] || !r->fg_interp_set[p] || !r->fg_interp_set_deep[p]) goto fail; - VkImageView prevV = r->fg_history[(p + 2u) % 3u].view; // curr=history[p], prev=history[(p+2)%3] + VkImageView prevV = r->fg_history[(p + 2u) % 3u].view; VkImageView currV = r->fg_history[p].view; - // 4-binding motion set: b0 prev, b1 curr, b2 coarseFlow (sampler; dummy on the coarse pass), - // b3 output (storage). Coarse pass writes fg_coarse; fine pass reads fg_coarse and writes fg_motion. VkDescriptorImageInfo sPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; VkDescriptorImageInfo sCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; #define FG_MOTION_WRITE(SET, B0, B1, B2VIEW, B2LAYOUT, B3VIEW) do { \ @@ -2887,8 +2868,6 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { #undef FG_MOTION_WRITE (void)sPrev; (void)sCurr; - // interpolate.frag set: b0 prev, b1 curr, b2 mvBwd, b3 mvFwd — all sampled (SHADER_READ). - // Standard binds fg_motion as the (unread) b3 dummy; deep binds the real forward field. VkDescriptorImageInfo iPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; VkDescriptorImageInfo iCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; VkDescriptorImageInfo iMv = { r->fg_sampler, r->fg_motion[p].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; @@ -2898,8 +2877,6 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { iw_[0].pImageInfo = &iPrev; iw_[1].pImageInfo = &iCurr; iw_[2].pImageInfo = &iMv; iw_[3].pImageInfo = &iMv; vkUpdateDescriptorSets(r->device, 4, iw_, 0, NULL); - // Quality interp set: same newest pair as Standard, but bidirectional — b2 backward - // (fg_motion) + b3 forward (fg_motion_fwd), both computed in-path (lazy) each pair. VkDescriptorImageInfo dPrev = { r->fg_sampler, prevV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; VkDescriptorImageInfo dCurr = { r->fg_sampler, currV, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; VkWriteDescriptorSet dw_[4] = {0}; @@ -2940,15 +2917,11 @@ static bool fg_ensure_resources(VkRenderer* r) { return true; } wait_inflight_frames(r); - // The FG worker may have in-flight GPU work reading these resources (it submits outside the GL - // frame fences), so drain the whole queue before destroying them. Rare path (dims/flowScale change). if (r->fg_gen_started) { pthread_mutex_lock(&r->queue_mutex); vkQueueWaitIdle(r->graphics_queue); pthread_mutex_unlock(&r->queue_mutex); } fg_destroy_resources(r); return fg_create_resources(r, r->swapchain_extent.width, r->swapchain_extent.height); } -// Restore a frame fence to the signaled state after a submit failure (so the next frame that -// reuses this index does not block forever on an unsignaled fence). static void fg_restore_fence(VkRenderer* r, VkFrame* f) { for (uint32_t i = 0; i < 3; i++) if (r->fg_slot_fence[i] == f->in_flight) r->fg_slot_fence[i] = VK_NULL_HANDLE; vkDestroyFence(r->device, f->in_flight, NULL); @@ -2966,12 +2939,8 @@ static uint64_t g_fg_interp = 0; static uint64_t g_fg_plast = 0; static uint64_t g_fg_dropped = 0; -// desiredPresentTime (CLOCK_MONOTONIC ns) for the next FG present; 0 = no constraint (real frame never delayed). -#define FG_PRESENT_LEAD_NS 150000ull // wake this much before the deadline so the present latches this vblank +#define FG_PRESENT_LEAD_NS 150000ull -// Present instant for one output frame: a frame at content-phase `phase` of the [prev,curr] interval is shown -// at curr_arrival + phase*(curr-prev), anchoring the cadence to the source clock. Falls back to an evenly -// spaced period grid before the arrivals are known. Not snapped to the panel vsync. static uint64_t fg_compute_deadline(VkRenderer* r, float phase) { uint64_t now = now_monotonic_ns(); uint64_t ca = r->fg_curr_arrival_ns, pa = r->fg_prev_arrival_ns; @@ -3025,8 +2994,6 @@ static void fg_collect_present_timing(VkRenderer* r) { } } -// FG submit. HOLD renders the scene into the history ring (no present); INTERP synthesizes and -// presents an in-between frame; PRESENT_LAST presents the held real frame. static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { if (!r->surface_ready || !r->swapchain) return false; pthread_mutex_lock(&r->render_mutex); @@ -3041,7 +3008,7 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { VkFrame* f = &r->frames[r->frame_index]; uint64_t fw0 = now_monotonic_ns(); vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); - { double fw = (double)(now_monotonic_ns() - fw0) / 1.0e6; // GL-thread block on the FIF=2 in_flight fence + { double fw = (double)(now_monotonic_ns() - fw0) / 1.0e6; r->fg_fw_sum_ms += fw; if (fw > r->fg_fw_max_ms) r->fg_fw_max_ms = fw; r->fg_fw_n++; } if (!fg_ensure_resources(r)) { pthread_mutex_unlock(&r->render_mutex); return false; } @@ -3086,8 +3053,7 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { r->fg_stage_slot = -1; } - // --- Step 2: composite the incoming frame into a staging slot (neither curr nor prev). ------- - // When dedup is unavailable, fall back to the original behavior (advance every HOLD). + // Step 2: composite the incoming frame into a staging slot (neither curr nor prev). uint32_t stage = (r->fg_history_curr + 1u) % 3u; VkFgImage* hist = &r->fg_history[stage]; r->fg_cnn.featValid[stage] = false; @@ -3131,13 +3097,9 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { } // -------- INTERP / PRESENT_LAST: acquire swapchain image, present -------- - // Quality (deep flag) interpolates the same newest pair as Standard, adding a forward flow - // for a bidirectional warp; the flow cost is amortized across the pair's interps. bool deep = r->fg_deep_mode && (r->fg_history_count >= 2u); bool do_interp = (mode == FG_MODE_INTERP) && (r->fg_history_count >= 2u); - // Interps are optional: under a non-blocking mode acquire without waiting, so a panel that can't run - // ahead skips the synthetic frame instead of stalling. PRESENT_LAST always blocks (never dropped). uint64_t acq_timeout = (do_interp && r->active_present_mode != VK_PRESENT_MODE_FIFO_KHR) ? 0u : UINT64_MAX; uint32_t image_index = 0; @@ -3169,8 +3131,6 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { vkResetFences(r->device, 1, &f->in_flight); uint32_t parity = r->fg_history_curr; - // Both modes show the newest pair (history[(parity+2)%3] -> history[parity]); curr is the frame - // PRESENT_LAST blits. Quality differs only by adding the forward flow + bidirectional warp below. uint32_t curr_idx = parity; uint32_t prev_idx = (parity + 2u) % 3u; VkFgImage* curr = &r->fg_history[curr_idx]; @@ -3257,8 +3217,6 @@ static bool fg_submit(VkRenderer* r, FgMode mode, float phase) { r->pipelines.fg_interp_pipe_layout, 0, 1, use_fwd ? &r->fg_interp_set_deep[parity] : &r->fg_interp_set[parity], 0, NULL); struct { float resW, resH, phase, occLo, occHi, mode; } ipc; - // interp norm = 2/resolution must equal 1/flow_field_res so warp magnitude is correct at any - // flowScale (reduces to swapchain res at the 0.5 default). Pass 2*field_res. ipc.resW = 2.0f * (float)r->fg_motion[parity].width; ipc.resH = 2.0f * (float)r->fg_motion[parity].height; ipc.phase = phase; ipc.occLo = r->fg_occ_lo; ipc.occHi = r->fg_occ_hi; @@ -3366,8 +3324,6 @@ static void fg_sleep_to(VkRenderer* r, uint64_t deadline) { while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, NULL) == EINTR) {} } -// Swapchain recreate from the worker (only the worker touches the swapchain while FG is on). Takes -// render_mutex because it tears down fg_history, which the GL HOLD writes. static void fg_worker_recreate(VkRenderer* r) { pthread_mutex_lock(&r->render_mutex); r->surface_ready = false; @@ -3379,21 +3335,17 @@ static void fg_worker_recreate(VkRenderer* r) { pthread_mutex_unlock(&r->render_mutex); } -// Carried between generate and present so the next frame's GPU work overlaps the current frame's deadline -// wait: each output frame then has ~2 present intervals of GPU budget instead of one. typedef struct FgPending { bool valid; - bool need_recreate; // acquire returned OUT_OF_DATE + bool need_recreate; uint32_t image_index; VkSemaphore render_finished; VkSwapchainKHR swapchain; uint64_t deadline_ns; - bool recreate_after; // acquire SUBOPTIMAL + bool recreate_after; bool do_interp; } FgPending; -// Generate ONE queued job: acquire, record flow+generate (or present_last blit), submit. The caller paces and -// presents the returned handle one frame later, so this job's GPU runs during that wait. static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { FgPending p = {0}; if (!r->surface_ready || !r->swapchain || r->swapchain_image_count == 0 @@ -3459,8 +3411,6 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { if (f->in_flight) vkWaitForFences(r->device, 1, &f->in_flight, VK_TRUE, UINT64_MAX); bool want_interp = (job->mode == FG_MODE_INTERP); - // Bounded (not UINT64_MAX) so the worker re-checks fg_gen_running ~10x/s and the stop-join never hangs - // on a surface that stopped releasing images. Interp in a non-blocking mode drops immediately. uint64_t acq_timeout = (want_interp && r->active_present_mode != VK_PRESENT_MODE_FIFO_KHR) ? 0u : 100000000ull; uint32_t image_index = 0; VkResult acq = vkAcquireNextImageKHR(r->device, r->swapchain, acq_timeout, @@ -3476,8 +3426,6 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { pthread_mutex_lock(&r->render_mutex); if (!r->fg_built) { pthread_mutex_unlock(&r->render_mutex); g_fg_dropped++; return p; } - // A job whose pair was reused by 2+ newer promotes falls back to present_last of the LIVE newest - // frame (never drop the acquired image — that would strand its semaphore). bool stale = (uint32_t)(r->fg_promote_seq - job->seq) >= 2u; bool do_interp = want_interp && !stale && r->fg_history_count >= 2u; uint32_t curr_idx = do_interp ? job->curr_idx : r->fg_history_curr; @@ -3544,13 +3492,18 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { r->fg_dump_last_phase = job->phase; if (r->fg_dump_supported && r->fg_dump_armed && r->fg_dump_count < FG_DUMP_N && !(r->fg_dump_count == 0 && job->phase > fg_dump_prevph + 0.01f)) { - fg_record_dump(r, f->cmd, r->fg_cnn.gen[genslot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dump_count); - VK_LOGI("fgdump[%u] phase=%.3f prev=%u curr=%u seq=%u gen=%ux%u", r->fg_dump_count, job->phase, prev_idx, curr_idx, job->seq, r->fg_cnn.gen[genslot].w, r->fg_cnn.gen[genslot].h); - if (r->fg_dump_count == 0) { // capture the pair's real frames once, into slots 8 and 9 - fg_record_dump(r, f->cmd, r->fg_history[prev_idx].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 8); - fg_record_dump(r, f->cmd, curr->image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 9); + if (r->fg_dump_count == 0) s_fgseq_last_curr = 0xFFFFFFFFu; + if (curr_idx != s_fgseq_last_curr && r->fg_dump_count < FG_DUMP_N) { + fg_record_dump(r, f->cmd, r->fg_history[prev_idx].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dims.width, r->fg_dims.height, r->fg_dump_count, 0); + VK_LOGI("fgseq[%u] REAL (prev=%u curr=%u)", r->fg_dump_count, prev_idx, curr_idx); + r->fg_dump_count++; + s_fgseq_last_curr = curr_idx; + } + if (r->fg_dump_count < FG_DUMP_N) { + fg_record_dump(r, f->cmd, r->fg_cnn.gen[genslot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dims.width, r->fg_dims.height, r->fg_dump_count, 0); + VK_LOGI("fgseq[%u] GENERATED (phase=%.3f prev=%u curr=%u)", r->fg_dump_count, job->phase, prev_idx, curr_idx); + r->fg_dump_count++; } - r->fg_dump_count++; } } @@ -3722,8 +3675,6 @@ static void* fg_gen_loop(void* arg) { setpriority(PRIO_PROCESS, 0, -8); FgPending pending = {0}; while (r->fg_gen_running) { - // Wait for the next job. While a frame is pending, cap the wait (~2 vsync) so a content stall can - // still flush the pending frame instead of holding it. bool got_job; if (pending.valid) { uint64_t wait_ns = (r->fg_display_period_ns ? r->fg_display_period_ns : 16666667ull) * 2u; @@ -3785,13 +3736,13 @@ static void fg_worker_start(VkRenderer* r) { static void fg_worker_stop(VkRenderer* r) { if (!r->fg_gen_started) return; - pthread_mutex_lock(&r->render_mutex); // serialize with fg_enqueue: once this is false (under the - r->fg_gen_started = false; // lock) no future fg_enqueue will sem_post, and any in-flight - r->fg_gen_running = 0; // one already finished its sem_post before we got the lock. + pthread_mutex_lock(&r->render_mutex); // serialize with fg_enqueue + r->fg_gen_started = false; + r->fg_gen_running = 0; pthread_mutex_unlock(&r->render_mutex); - sem_post(&r->fg_gen_sem); // wake the worker (semaphore still valid here) + sem_post(&r->fg_gen_sem); pthread_join(r->fg_gen_thread, NULL); - sem_destroy(&r->fg_gen_sem); // safe: no producer can sem_post this anymore + sem_destroy(&r->fg_gen_sem); pthread_mutex_lock(&r->queue_mutex); vkQueueWaitIdle(r->graphics_queue); pthread_mutex_unlock(&r->queue_mutex); fg_worker_destroy_resources(r); VK_LOGI("fg-gen worker stopped"); @@ -3803,8 +3754,6 @@ static void fg_worker_stop(VkRenderer* r) { #define JNI_FN(name) Java_com_winlator_cmod_runtime_display_renderer_VulkanRenderer_##name -// Native FG pump: a pthread running its own ALooper + AChoreographer; each vsync it calls -// back into VulkanRenderer.fgPumpTickFromNative(frameTimeNanos). static JavaVM* g_pump_jvm = NULL; static jobject g_pump_renderer = NULL; static jmethodID g_pump_tick = NULL; @@ -3827,15 +3776,15 @@ static void fg_pump_frame(long frameTimeNanos, void* data) { static void* fg_pump_loop(void* arg) { (void)arg; - prctl(PR_SET_NAME, "fg-pump", 0, 0, 0); // visible in /proc//task/*/comm for verification - setpriority(PRIO_PROCESS, 0, -8); // urgent-display: fire the vsync callback without scheduling lag + prctl(PR_SET_NAME, "fg-pump", 0, 0, 0); + setpriority(PRIO_PROCESS, 0, -8); JNIEnv* env = NULL; (*g_pump_jvm)->AttachCurrentThread(g_pump_jvm, &env, NULL); ALooper_prepare(ALOOPER_PREPARE_ALLOW_NON_CALLBACKS); g_pump_chor = AChoreographer_getInstance(); if (g_pump_chor) AChoreographer_postFrameCallback(g_pump_chor, fg_pump_frame, NULL); else VK_LOGE("AChoreographer_getInstance returned NULL"); - while (g_pump_running) ALooper_pollOnce(100, NULL, NULL, NULL); // vsync wakes it; 100ms re-checks running + while (g_pump_running) ALooper_pollOnce(100, NULL, NULL, NULL); g_pump_chor = NULL; (*g_pump_jvm)->DetachCurrentThread(g_pump_jvm); return NULL; @@ -3878,7 +3827,7 @@ JNIEXPORT jlong JNICALL JNI_FN(nativeCreate)(JNIEnv* env, jclass clazz, r->fg_occ_lo = 0.06f; r->fg_occ_hi = 0.25f; r->fg_min_step = 1; - r->fg_flow_scale = 0.5f; // default = legacy half-res flow; presets override (Eco 0.2 .. Max 0.8) + r->fg_flow_scale = 0.5f; r->fg_use_cnn = cnn_wanted(); r->fg_cnn_gen = true; r->validation_enabled = (enableValidationLayers == JNI_TRUE); @@ -4017,8 +3966,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeDestroy)(JNIEnv* env, jclass clazz, jlong ha destroy_debug_messenger(r); if (r->instance)vkDestroyInstance(r->instance, NULL); - // Clear dispatch BEFORE dlclose so a stray call from another thread faults on NULL - // rather than jumping into freed library memory. + // Clear dispatch BEFORE dlclose so a stray call faults on NULL, not freed memory. vkd_unload(); if (r->vulkan_handle) { dlclose(r->vulkan_handle); r->vulkan_handle = NULL; } @@ -4157,8 +4105,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGeneration)(JNIEnv* env, jclass claz VK_LOGI("Frame generation %s (fp16=%d)", now ? "ENABLED" : "disabled", r->fg_float16_supported); } -// Present mode actually in use (Java convention: 0 FIFO, 1 MAILBOX, 2 IMMEDIATE). FG uses this to -// know whether presents are non-blocking — only then may it post above the panel's idle refresh. +// Present mode actually in use (Java convention: 0 FIFO, 1 MAILBOX, 2 IMMEDIATE). JNIEXPORT jint JNICALL JNI_FN(nativeGetActivePresentMode)(JNIEnv* env, jclass clazz, jlong handle) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; @@ -4173,13 +4120,10 @@ JNIEXPORT jint JNICALL JNI_FN(nativeGetActivePresentMode)(JNIEnv* env, jclass cl JNIEXPORT jboolean JNICALL JNI_FN(nativeFrameGenerationSupported)(JNIEnv* env, jclass clazz, jlong handle) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; - // Compute + rgba16f storage are universal on Vulkan 1.1 Android GPUs, and the fp32 motion - // shader covers devices without shaderFloat16, so FG is effectively always available. return (r != NULL) ? JNI_TRUE : JNI_FALSE; } -// Monotonic count of actual vkQueuePresentKHR calls (real + interpolated). The HUD derives -// Display FPS from deltas of this; Engine FPS stays on the X11-Present path in Java. +// Monotonic count of actual vkQueuePresentKHR calls (real + interpolated). JNIEXPORT jlong JNICALL JNI_FN(nativeGetDisplayFrameCount)(JNIEnv* env, jclass clazz, jlong handle) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; @@ -4197,8 +4141,7 @@ JNIEXPORT jboolean JNICALL JNI_FN(nativeRenderHold)(JNIEnv* env, jclass clazz, j return fg_submit(r, FG_MODE_HOLD, 0.5f) ? JNI_TRUE : JNI_FALSE; } -// Content-dedup telemetry for the Java scheduler: out[0]=promote count, out[1]=last promote time (ns), -// out[2]=duplicates dropped, out[3]=distinct total. +// out[0]=promote count, out[1]=last promote time (ns), out[2]=duplicates dropped, out[3]=distinct total. JNIEXPORT void JNICALL JNI_FN(nativeFgPromoteInfo)(JNIEnv* env, jclass clazz, jlong handle, jlongArray out) { (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; @@ -4248,8 +4191,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenParams)(JNIEnv* env, jclass clazz VK_LOGI("FG params set: model(occLo)=%.2f minStep(quality)=%d", lo, r->fg_min_step); } -// Preset flow-resolution dial [0.2,1.0]. Sets the desired scale; the render thread rebuilds the -// motion fields (fg_ensure_resources) when it differs from the built value. NOT a per-frame call. +// Preset flow-resolution dial [0.2,1.0]; the render thread rebuilds the motion fields when it changes. JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenFlowScale)(JNIEnv* env, jclass clazz, jlong handle, jfloat flowScale) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; @@ -4288,8 +4230,7 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenUseCnn)(JNIEnv* env, jclass clazz pthread_mutex_unlock(&r->render_mutex); } -// Generation method: false = interpolation, true = extrapolation. Re-primes the cadence so the -// new method starts from a clean pair. +// Generation method: false = interpolation, true = extrapolation. JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenExtrapolate)(JNIEnv* env, jclass clazz, jlong handle, jboolean extrapolate) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; @@ -4459,8 +4400,6 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetScene)(JNIEnv* env, jclass clazz, jlong h } pthread_mutex_unlock(&r->scene_mutex); - // Offscreen rebuild is handled in record_and_submit_frame under render_mutex; nothing - // here needs to touch swapchain-tied resources. } // No-op, kept for Java-side ABI compatibility (FPS pacing is enforced elsewhere). @@ -4498,12 +4437,9 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetPresentMode)(JNIEnv* env, jclass clazz, j if (r->target_present_mode == vk_mode) return; r->target_present_mode = vk_mode; - // Rebuild swapchain only if one currently exists; otherwise the next create_swapchain - // (e.g. on first surface attach) will pick up the new mode automatically. if (!r->surface) return; - // The FG worker owns the swapchain, so stop it before tearing it down and restart it after. bool restart_worker = r->fg_gen_started; - fg_worker_stop(r); // no-op if not running; takes render_mutex internally + joins the worker + fg_worker_stop(r); lifecycle_begin(r); if (r->device) vkDeviceWaitIdle(r->device); uint32_t fw = r->surface_extent.width; @@ -4569,8 +4505,6 @@ JNIEXPORT jboolean JNICALL TEX_FN(nativeUpdate)(JNIEnv* env, jclass clazz, jlong ? JNI_TRUE : JNI_FALSE; } -// Grow-only scratch for parsed batch entries. Render-thread-only, so unlocked; every element -// is fully populated before use, so no zeroing. (Mirrors get_prepared_scratch in vk_image.c.) static VkTextureBatchUpload* get_entry_scratch(VkRenderer* r, uint32_t count) { if (r->batch_entry_cap < count) { uint32_t new_cap = r->batch_entry_cap ? r->batch_entry_cap : 64; diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index 8b043b393..fb96ca2ef 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -11,7 +11,6 @@ #include #include -// All vk* calls route through the dispatch table (do not include directly). #include "vk_dispatch.h" #define VK_LOG_TAG "VkRenderer" @@ -23,19 +22,17 @@ #define VK_MAX_SWAPCHAIN_IMAGES 8 #define FG_JOB_RING 6u -// A queued FG present job, snapshotted at enqueue and executed by the worker pthread. typedef struct FgJob { uint8_t mode; // 1 = INTERP, 2 = PRESENT_LAST - uint8_t deep; // bidirectional warp (model-1) + uint8_t deep; float phase; - uint32_t curr_idx; // history slots, snapshotted at enqueue + uint32_t curr_idx; uint32_t prev_idx; - uint64_t deadline_ns; // free content-rate present target, ns CLOCK_MONOTONIC (worker paces to this) - uint32_t seq; // fg_promote_seq snapshot — worker drops the job if the slot was reused + uint64_t deadline_ns; + uint32_t seq; } FgJob; #define VK_MAX_EFFECTS 8 #define VK_MAX_RENDERABLE_WINDOWS 64 -// Number of in-flight upload slots. #define VK_STAGING_POOL_SIZE 8 #define VK_CHECK(expr) do { \ @@ -46,16 +43,15 @@ typedef struct FgJob { } while (0) // ============================================================ -// Texture (drives both regular CPU-uploaded images and AHB imports) +// Texture // ============================================================ -// A CPU-uploaded texture's slice of an image sub-allocator block (defined below). struct VkMemBlock; typedef struct VkSuballoc { struct VkMemBlock* block; // owning block (NULL => not sub-allocated) - VkDeviceMemory memory; // == block->memory, cached for vkBindImageMemory - VkDeviceSize bind_offset; // aligned offset the image is bound at - VkDeviceSize span_offset; // reserved span start/length, returned on free + VkDeviceMemory memory; + VkDeviceSize bind_offset; + VkDeviceSize span_offset; VkDeviceSize span_size; } VkSuballoc; @@ -63,28 +59,22 @@ typedef struct VkTexture { VkImage image; VkImageView view; VkDeviceMemory memory; - VkSampler sampler; // owned per-texture (simple); could be cached + VkSampler sampler; // owned per-texture VkSamplerYcbcrConversion ycbcr; // VK_NULL_HANDLE if unused - VkDescriptorSet descriptor_set; // one per texture, lives until destruction + VkDescriptorSet descriptor_set; uint32_t width; uint32_t height; VkFormat format; VkImageLayout layout; - // Lifetime: when set, owned by texture and freed on destroy. AHardwareBuffer* ahb; - // Track readiness: true once image+view+sampler are valid. bool ready; - // True if this texture should never be uploaded to (e.g. AHB scanout). - bool external; - // Prevent duplicate deferred frees if Java schedules destruction more than once. + bool external; // never uploaded to (e.g. AHB scanout) bool destroy_scheduled; - // suballocated: backing comes from the shared sub-allocator (suballoc) and `memory` is - // unused. AHB imports and the dedicated fallback leave this false. - bool suballocated; + bool suballocated; // backing from the shared sub-allocator; `memory` unused VkSuballoc suballoc; } VkTexture; @@ -125,14 +115,14 @@ typedef enum VkEffectType { typedef struct VkEffectSlot { VkEffectType type; - int mode; // effect-specific mode - float param0; // generic + int mode; + float param0; float param1; - float param2; // generic + float param2; } VkEffectSlot; // ============================================================ -// Scene snapshot (mutex-protected, written from Java threads, read on render thread) +// Scene snapshot // ============================================================ typedef struct VkRenderableWindow { @@ -140,7 +130,7 @@ typedef struct VkRenderableWindow { int x, y; uint32_t width, height; float u0, v0, u1, v1; - bool direct_scanout; // hint, currently unused + bool direct_scanout; // unused } VkRenderableWindow; typedef struct VkScene { @@ -154,14 +144,12 @@ typedef struct VkScene { uint32_t cursor_height; bool cursor_visible; - // Transform parameters - tmpXForm2 of GLRenderer applied to all windows. float xform[6]; bool scissor_enabled; int scissor_x, scissor_y, scissor_w, scissor_h; int viewport_x, viewport_y, viewport_w, viewport_h; bool viewport_set; - // Render dims (logical screen size). uint32_t screen_width; uint32_t screen_height; uint32_t source_width; @@ -180,8 +168,8 @@ typedef struct VkScene { typedef struct VkPipelineSet { VkDescriptorSetLayout sampler_set_layout; - VkPipelineLayout window_layout; // push constants: xform[6] + viewSize - VkPipelineLayout effect_layout; // push constants: resolution + effect params + VkPipelineLayout window_layout; // push: xform[6] + viewSize + VkPipelineLayout effect_layout; // push: resolution + effect params VkPipeline window_pipeline; VkPipeline cursor_pipeline; VkPipeline blit_pipeline; @@ -195,13 +183,13 @@ typedef struct VkPipelineSet { VkRenderPass swapchain_pass; // load=clear, store=store, final=present VkRenderPass offscreen_pass; // load=clear, store=store, final=shader-read - // --- Frame generation (created once with the rest; persist across swapchain rebuilds) --- - VkDescriptorSetLayout fg_motion_layout; // set0: binding0,1 sampler(prev,curr) + binding2 STORAGE_IMAGE(mv), COMPUTE - VkDescriptorSetLayout fg_interp_layout; // set0: 4x COMBINED_IMAGE_SAMPLER (prev,curr,mvBwd,mvFwd), FRAGMENT - VkPipelineLayout fg_motion_pipe_layout; // [motion] set + 32B compute push range - VkPipelineLayout fg_interp_pipe_layout; // [interp] set + 24B fragment push range - VkPipeline fg_motion_pipeline; // compute (block matching) - VkPipeline fg_interp_pipeline; // graphics (interpolation, swapchain_pass) + // --- Frame generation --- + VkDescriptorSetLayout fg_motion_layout; // set0: sampler(prev,curr) + STORAGE_IMAGE(mv), COMPUTE + VkDescriptorSetLayout fg_interp_layout; // set0: 4x COMBINED_IMAGE_SAMPLER, FRAGMENT + VkPipelineLayout fg_motion_pipe_layout; // motion set + 32B compute push range + VkPipelineLayout fg_interp_pipe_layout; // interp set + 24B fragment push range + VkPipeline fg_motion_pipeline; // compute + VkPipeline fg_interp_pipeline; // graphics (swapchain_pass) VkDescriptorSetLayout cnn_pyramid_dsl, cnn_conv_dsl, cnn_cost9_dsl, cnn_flowreg_dsl, cnn_warpfollow_dsl, cnn_generate_dsl; @@ -250,13 +238,12 @@ typedef struct VkSgsr1State { uint32_t height; } VkSgsr1State; -// A history frame (full res, render target + sampled) or the half-res motion field (rgba16f). typedef struct VkFgImage { VkImage image; VkImageView view; VkDeviceMemory memory; VkFramebuffer framebuffer; // history targets only; VK_NULL_HANDLE for the motion field - VkDescriptorSet blit_set; // history only: single-binding set (sampler_set_layout) for present + VkDescriptorSet blit_set; // history only uint32_t width, height; } VkFgImage; @@ -299,10 +286,10 @@ typedef struct VkFgCnn { VkDeviceMemory wMem[64]; VkDeviceSize wLen[64]; - // terminal generate (wnfg_13 occlusion + wnfg_04 generate) + // wnfg_13 occlusion + wnfg_04 generate VkCnnImg occOut[3][6]; VkCnnImg gen[3]; // RGBA16F generated frame ring, full swapchain res - VkDescriptorSet genSet[3]; // sampler_set_layout, binding0 = gen[i].view (present blit) + VkDescriptorSet genSet[3]; // binding0 = gen[i].view (present blit) VkBuffer genUbo[3]; // per-frame {mvScale, t, _} for wnfg_04 VkDeviceMemory genUboMem[3]; void* genUboMap[3]; @@ -315,21 +302,21 @@ typedef struct VkFgCnn { typedef struct VkStagingSlot { pthread_mutex_t mutex; // held by current owner from acquire to release - VkCommandPool cmd_pool; // exclusive to this slot, no global cmd pool sync needed + VkCommandPool cmd_pool; // exclusive to this slot VkCommandBuffer cmd; VkBuffer buffer; VkDeviceMemory memory; void* mapped; // persistently mapped HOST_VISIBLE memory - VkDeviceSize size; // current allocation; grows on demand - VkFence fence; // signaled when this slot's last submission completes + VkDeviceSize size; // grows on demand + VkFence fence; } VkStagingSlot; typedef struct VkStagingPool { VkStagingSlot slots[VK_STAGING_POOL_SIZE]; - uint32_t valid_slots; // count of slots whose per-slot mutex is initialized + uint32_t valid_slots; // slots whose per-slot mutex is initialized uint64_t next; // round-robin counter pthread_mutex_t mutex; // protects `next` only - bool mutex_init; // pool-mutex initialization flag (for safe destroy) + bool mutex_init; bool initialized; } VkStagingPool; @@ -348,30 +335,24 @@ typedef struct VkGraveSlot { // ============================================================ typedef struct VkDeviceCaps { - // Identity uint32_t vendor_id; uint32_t device_id; uint32_t driver_version; bool is_adreno; // vendor_id == 0x5143 (Qualcomm) - // Limits / sizing VkPhysicalDeviceLimits limits; uint32_t descriptor_pool_capacity; - // Format choices resolved against driver feature support VkFormat offscreen_format; // BGRA preferred, RGBA fallback VkFormat upload_format; // BGRA preferred; RGBA fallback uses CPU-side swizzle bool upload_needs_bgra_swizzle; - // Diagnostic bool ahb_bgra_supported; // VK_FORMAT_B8G8R8A8_UNORM importable from AHB } VkDeviceCaps; // ============================================================ // Image sub-allocator // ============================================================ -// CPU-uploaded textures share large DEVICE_LOCAL blocks via a first-fit free list; -// AHB imports stay dedicated. #define VK_SUBALLOC_BLOCK_SIZE (32u * 1024u * 1024u) // 32 MiB default block @@ -391,7 +372,7 @@ typedef struct VkMemBlock { typedef struct VkImageSuballocator { VkMemBlock* blocks; - VkDeviceSize block_size; // size used when carving a new block + VkDeviceSize block_size; pthread_mutex_t mutex; // alloc (producer threads) vs free (render thread) bool mutex_init; } VkImageSuballocator; @@ -404,9 +385,8 @@ typedef struct VkRenderer { // Lifecycle bool initialized; bool surface_ready; - // Set when using a fallback swapchain whose preTransform differs from currentTransform. bool ignore_suboptimal; - pthread_mutex_t scene_mutex; // guards r->scene + graveyard slots; held briefly by all + pthread_mutex_t scene_mutex; // guards r->scene + graveyard slots pthread_mutex_t queue_mutex; // serializes vkQueueSubmit across threads pthread_mutex_t texture_mutex; // guards live_textures pthread_mutex_t descriptor_mutex;// external sync for descriptor_pool alloc/free @@ -451,32 +431,32 @@ typedef struct VkRenderer { bool fg_enabled; bool fg_float16_supported; // shaderFloat16 available (selects the fp16 motion shader) bool fg_built; // history + motion images allocated at fg_dims - VkExtent2D fg_dims; // extent the fg images were built for + VkExtent2D fg_dims; VkFgImage fg_history[3]; // composited-scene ring; fg_history_curr = newest - VkFgImage fg_motion[3]; // per-parity rgba16f half-res backward-flow ring (1 per history slot) - VkFgImage fg_motion_fwd[3]; // per-parity rgba16f half-res forward-flow ring (Quality bidirectional) - VkFgImage fg_coarse[3]; // per-parity quarter-res backward coarse-flow (coarse-to-fine seed) - VkFgImage fg_coarse_fwd[3]; // per-parity quarter-res forward coarse-flow - VkSampler fg_sampler; // linear, clamp — for all fg sampled reads - VkDescriptorSet fg_motion_set[3]; // [curr] prev,curr,coarse samplers + motion storage (fine pass) - VkDescriptorSet fg_motion_set_fwd[3]; // [curr] swapped prev,curr + fwd-coarse + fwd-motion storage - VkDescriptorSet fg_coarse_set[3]; // [curr] prev,curr + coarse-bwd storage (coarse pass) - VkDescriptorSet fg_coarse_set_fwd[3]; // [curr] swapped prev,curr + coarse-fwd storage - VkDescriptorSet fg_interp_set[3]; // [curr] prev,curr,mvBwd,mvFwd samplers (interpolate.frag) - VkDescriptorSet fg_interp_set_deep[3]; // deep mode: interp the pair one step behind the newest + VkFgImage fg_motion[3]; // half-res backward-flow ring + VkFgImage fg_motion_fwd[3]; // half-res forward-flow ring + VkFgImage fg_coarse[3]; // quarter-res backward coarse-flow + VkFgImage fg_coarse_fwd[3]; // quarter-res forward coarse-flow + VkSampler fg_sampler; // linear, clamp + VkDescriptorSet fg_motion_set[3]; // prev,curr,coarse samplers + motion storage + VkDescriptorSet fg_motion_set_fwd[3]; + VkDescriptorSet fg_coarse_set[3]; + VkDescriptorSet fg_coarse_set_fwd[3]; + VkDescriptorSet fg_interp_set[3]; // prev,curr,mvBwd,mvFwd samplers + VkDescriptorSet fg_interp_set_deep[3]; VkFence fg_slot_fence[3]; // last submit that used each history slot - uint32_t fg_history_curr; // index (0..2) of the most-recent composited frame - uint32_t fg_history_count; // 0,1,2,3 — valid history frames - uint64_t fg_present_count; // actual vkQueuePresentKHR calls; guarded by queue_mutex - bool fg_motion_valid; // backward flow current for the live pair (reused across multi-interp) - bool fg_motion_fwd_valid; // forward flow current — computed on the 2nd interp (Quality) to spread cost - bool fg_deep_mode; // quality pipeline: bidirectional warp (adds a forward flow) + uint32_t fg_history_curr; // index of the most-recent composited frame + uint32_t fg_history_count; // valid history frames + uint64_t fg_present_count; // vkQueuePresentKHR calls; guarded by queue_mutex + bool fg_motion_valid; + bool fg_motion_fwd_valid; + bool fg_deep_mode; // bidirectional warp bool fg_extrapolate; // false=interpolate, true=extrapolate forward - uint32_t fg_target_fif; // requested compositor frames-in-flight 1..3 (applied live) - float fg_occ_lo; // interpolate.frag consistency lower bound (smoothness) - float fg_occ_hi; // interpolate.frag consistency upper bound (smoothness) - int32_t fg_min_step; // motion.comp lowest TSS step (quality preset; 1 = full search) - float fg_flow_scale; // flow-field resolution scale [0.2,1.0] (preset GPU-cost dial) + uint32_t fg_target_fif; // requested compositor frames-in-flight 1..3 + float fg_occ_lo; // consistency lower bound + float fg_occ_hi; // consistency upper bound + int32_t fg_min_step; // lowest TSS step (1 = full search) + float fg_flow_scale; // flow-field resolution scale [0.2,1.0] float fg_built_flow_scale; // flow_scale baked into the current motion resources bool fg_use_cnn; @@ -485,42 +465,38 @@ typedef struct VkRenderer { uint32_t fg_cnn_flow_seq; VkFgCnn fg_cnn; - // --- Content-duplicate detection ------------------------------------------------------------ - // Each composited frame is downsampled to a tiny host buffer; the HOLD promotes the interp - // pair only on a genuine content change so duplicate inputs don't advance. + // --- Content-duplicate detection --- VkImage fg_sig_img; // tiny blit target, reused each HOLD VkDeviceMemory fg_sig_img_mem; - VkBuffer fg_sig_buf[3]; // per-slot host-visible downsample of each history slot + VkBuffer fg_sig_buf[3]; // per-slot host-visible downsample VkDeviceMemory fg_sig_buf_mem[3]; - void* fg_sig_ptr[3]; // persistent map of fg_sig_buf + void* fg_sig_ptr[3]; bool fg_sig_supported; // blit+readback path created OK (else dedup disabled) - int32_t fg_stage_slot; // history slot holding the pending (un-promoted) frame, -1 = none - VkFence fg_stage_fence; // fence that produced fg_sig_buf[fg_stage_slot] - uint64_t fg_last_promote_ns; // last promotion time (freeze backstop) - double fg_last_sig_delta; // last measured content delta (diagnostics) - uint64_t fg_dup_dropped, fg_distinct; // dedup telemetry - uint64_t fg_promote_count; // monotonic count of promotions (distinct content committed) - uint64_t fg_promote_ns; // CLOCK_MONOTONIC of the most recent promotion (for Java phase anchor) - - // --- Debug burst dump (debug.winnative.fgdump 1) -------------------------------------------- - // Captures FG_DUMP_N consecutive generated frames + the pair's two real history frames to disk. - bool fg_dump_supported; // dump image + buffers created OK - bool fg_dump_armed; // a burst is in progress - bool fg_dump_seen_zero; // prop read "0" since the last dump (edge-trigger gate) - uint32_t fg_dump_count; // frames captured so far in the current burst - float fg_dump_last_phase; // previous interp phase (start a burst at a pair boundary) - VkImage fg_dump_img; // 480x270 RGBA8 blit target (reused per capture) + int32_t fg_stage_slot; // history slot holding the pending frame, -1 = none + VkFence fg_stage_fence; + uint64_t fg_last_promote_ns; + double fg_last_sig_delta; + uint64_t fg_dup_dropped, fg_distinct; + uint64_t fg_promote_count; + uint64_t fg_promote_ns; // CLOCK_MONOTONIC of the most recent promotion + + // --- Debug burst dump (debug.winnative.fgdump 1) --- + bool fg_dump_supported; + bool fg_dump_armed; + bool fg_dump_seen_zero; + uint32_t fg_dump_count; + float fg_dump_last_phase; + VkImage fg_dump_img; // 480x270 RGBA8 blit target VkDeviceMemory fg_dump_img_mem; VkBuffer fg_dump_buf[10]; // 8 gen + prev + curr, host-visible VkDeviceMemory fg_dump_buf_mem[10]; - void* fg_dump_ptr[10]; // persistent map of fg_dump_buf + void* fg_dump_ptr[10]; // Quad vertex buffer (window/cursor) VkBuffer quad_vbo; VkDeviceMemory quad_vbo_memory; - // Shared sampler for all CPU-uploaded textures and AHB textures that don't need a Ycbcr - // conversion. Created once at init. + // Shared sampler for CPU-uploaded and non-Ycbcr AHB textures. VkSampler shared_sampler; VkSampler shared_sampler_nearest; VkSampler shared_sampler_cubic; @@ -556,23 +532,22 @@ typedef struct VkRenderer { // Cached device capabilities populated by query_device_caps(). VkDeviceCaps caps; - // Function pointers loaded via vkGetDeviceProcAddr (not all are statically exported by - // the Android Vulkan loader, even in Vulkan 1.1). + // Function pointers loaded via vkGetDeviceProcAddr. PFN_vkGetAndroidHardwareBufferPropertiesANDROID fnGetAhbProps; PFN_vkCreateSamplerYcbcrConversion fnCreateYcbcr; PFN_vkDestroySamplerYcbcrConversion fnDestroyYcbcr; PFN_vkCreateDebugUtilsMessengerEXT fnCreateDebugUtilsMessenger; PFN_vkDestroyDebugUtilsMessengerEXT fnDestroyDebugUtilsMessenger; - // VK_GOOGLE_display_timing — capability-gated FG present-pacing hint + telemetry (no-op when absent). + // VK_GOOGLE_display_timing — present-pacing hint + telemetry (no-op when absent). bool ext_display_timing; uint64_t refresh_duration_ns; // panel vsync period from the swapchain (fallback) uint64_t fg_present_period_ns; // target inter-present interval (ns) fed from Java - uint64_t fg_present_deadline_ns; // unsnapped deadline accumulator (target-rate grid) - uint64_t fg_present_target_ns; // free content-rate sleep/present target for the next present - uint64_t fg_display_period_ns; // live panel vsync period fed from Java (Choreographer EMA) - uint64_t fg_vsync_anchor_ns; // latest Choreographer vsync timestamp (CLOCK_MONOTONIC) - uint64_t fg_prev_arrival_ns; // real-frame arrival times, for time-based interp phase + uint64_t fg_present_deadline_ns; // unsnapped deadline accumulator + uint64_t fg_present_target_ns; // present target for the next present + uint64_t fg_display_period_ns; // live panel vsync period fed from Java + uint64_t fg_vsync_anchor_ns; // latest Choreographer vsync timestamp + uint64_t fg_prev_arrival_ns; // real-frame arrival times uint64_t fg_curr_arrival_ns; uint64_t fg_t_last_ns; // present-interval telemetry accumulators uint32_t fg_t_count; @@ -580,18 +555,17 @@ typedef struct VkRenderer { double fg_t_sumsq_ms; double fg_t_min_ms; double fg_t_max_ms; - double fg_fw_sum_ms; // in_flight fence-wait telemetry (GL-thread block before present) + double fg_fw_sum_ms; // in_flight fence-wait telemetry double fg_fw_max_ms; uint32_t fg_fw_n; - float fg_dbg_phase[8]; // interp phases accumulated for the in-progress period - float fg_dbg_done[8]; // last completed period's phases (logged in telemetry) + float fg_dbg_phase[8]; + float fg_dbg_done[8]; uint32_t fg_dbg_n; uint32_t fg_dbg_done_n; uint64_t fg_dbg_last_curr; uint32_t fg_present_id; - // FG generation worker: GL thread enqueues; this pthread runs flow+generate+pace+present - // and owns the swapchain present path while FG is on. + // FG generation worker: GL thread enqueues; this pthread runs flow+generate+pace+present. FgJob fg_job_ring[FG_JOB_RING]; // SPSC: GL produces (tail), worker consumes (head) volatile uint32_t fg_job_head; volatile uint32_t fg_job_tail; @@ -599,16 +573,16 @@ typedef struct VkRenderer { pthread_t fg_gen_thread; volatile int fg_gen_running; bool fg_gen_started; - VkCommandPool fg_worker_pool; // worker-owned (command pools are not thread-safe) + VkCommandPool fg_worker_pool; // worker-owned VkFrame fg_worker_frames[3]; // worker-owned cmd + fence + image_available uint32_t fg_worker_index; volatile uint32_t fg_promote_seq; // ++ on each HOLD promote; jobs snapshot it - volatile uint32_t fg_swapchain_gen; // ++ on swapchain recreate; worker drops present across a change + volatile uint32_t fg_swapchain_gen; // ++ on swapchain recreate PFN_vkGetRefreshCycleDurationGOOGLE fnGetRefreshCycleDuration; PFN_vkGetPastPresentationTimingGOOGLE fnGetPastPresentationTiming; - // VK_NV_optical_flow: driver-accelerated motion estimation (replaces the classical block-match flow). + // VK_NV_optical_flow: driver-accelerated motion estimation. bool fg_optical_flow; // extension available + feature enabled PFN_vkGetPhysicalDeviceOpticalFlowImageFormatsNV fnOFFormats; PFN_vkCreateOpticalFlowSessionNV fnOFCreate; @@ -622,21 +596,18 @@ typedef struct VkRenderer { // Image sub-allocator for CPU-uploaded textures (created in nativeCreate after device). VkImageSuballocator image_suballoc; - // Grow-only scratch for the batch upload path (render-thread-only, so unlocked; zero - // steady-state heap traffic). - VkTextureBatchUpload* batch_entry_scratch; // nativeBatchUpdate: parsed JNI entries + // Grow-only scratch for the batch upload path (render-thread-only). + VkTextureBatchUpload* batch_entry_scratch; // parsed JNI entries uint32_t batch_entry_cap; - void* batch_prepared_scratch; // vkr_texture_batch_update: PreparedBatchUpload[] + void* batch_prepared_scratch; // PreparedBatchUpload[] uint32_t batch_prepared_cap; // Scene state VkScene scene; - // Compositor present mode requested by Java (default FIFO). Validated against - // device-supported modes in create_swapchain; falls back to FIFO if unavailable. + // Present mode requested by Java; falls back to FIFO if unavailable. VkPresentModeKHR target_present_mode; - // Present mode actually selected by the last create_swapchain (target may fall back). Read by - // Java (nativeGetActivePresentMode) so frame generation knows whether presents are non-blocking. + // Present mode actually selected by the last create_swapchain. VkPresentModeKHR active_present_mode; } VkRenderer; @@ -665,8 +636,7 @@ void vkr_image_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout f VkAccessFlags src_access, VkAccessFlags dst_access); bool vkr_create_sampler(VkRenderer* r, VkSamplerYcbcrConversion ycbcr, VkSampler* out); void vkr_retarget_shared_sampler(VkRenderer* r); -// Async layout transition through the staging pool; does not wait for the GPU. -// Returns false on submit failure. +// Async layout transition through the staging pool; returns false on submit failure. bool vkr_submit_async_transition(VkRenderer* r, VkImage image, VkImageLayout from, VkImageLayout to, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, @@ -678,7 +648,6 @@ void vkr_staging_pool_destroy(VkRenderer* r); VkStagingSlot* vkr_staging_pool_acquire(VkRenderer* r, VkDeviceSize needed); void vkr_staging_pool_release(VkStagingSlot* slot); -// Image sub-allocator lifecycle (alloc/free are static in vk_image.c). Destroy after all -// textures are released. +// Image sub-allocator lifecycle (alloc/free are static in vk_image.c). void vkr_suballoc_init(VkRenderer* r); void vkr_suballoc_destroy(VkRenderer* r); diff --git a/app/src/main/runtime/display/XServerDisplayActivity.java b/app/src/main/runtime/display/XServerDisplayActivity.java index 900932a6f..1e5c5e2e5 100644 --- a/app/src/main/runtime/display/XServerDisplayActivity.java +++ b/app/src/main/runtime/display/XServerDisplayActivity.java @@ -697,7 +697,6 @@ private void applyPreferredRefreshRate() { if (renderer != null && renderer.isFrameGenerationEnabled()) { int panelMax = RefreshRateUtils.getMaxSupportedRefreshRate(this); renderer.setFrameGenDisplayCap(panelMax); - // Hold the panel's native max mode while FG is on rather than down-switching. RefreshRateUtils.applyPreferredRefreshRate(this, panelMax, panelMax); requestSurfaceFrameRate((float) panelMax); lastLoggedRefreshHz = 0f; @@ -3911,7 +3910,7 @@ private void handleNavigationBackPressed() { private void openDrawerMenu() { releasePointerCapture(); if (xServerView != null && xServerView.getRenderer() != null) - xServerView.getRenderer().fgSetOverlayActive(true); // overlay GPU contention isn't a game slowdown + xServerView.getRenderer().fgSetOverlayActive(true); renderDrawerMenu(); if (drawerStateHolder != null) { drawerStateHolder.openDrawer(); @@ -3926,7 +3925,7 @@ private void closeDrawerMenu() { drawerStateHolder.closeDrawer(); } if (xServerView != null && xServerView.getRenderer() != null) - xServerView.getRenderer().fgSetOverlayActive(false); // clears overlay + re-anchors the FG clock fresh + xServerView.getRenderer().fgSetOverlayActive(false); tryCapturePointer(); } @@ -6644,14 +6643,6 @@ private void setupUI() { FrameLayout rootView = xServerDisplayFrame; xServerView = new XServerSurfaceView(this, xServer); final VulkanRenderer renderer = xServerView.getRenderer(); - // The compositor only needs the guest driver (Turnip) when native frame - // generation runs its optical-flow compute in the compositor process. For normal - // rendering use the System (Qualcomm) driver: it imports the game's presented AHB - // correctly on all GPUs, whereas the guest Turnip's dedicated-AHB import mis-reads - // the producer's tile layout on Adreno 840 -> vertical-stripe corruption. This is - // a regression from guest-matching the compositor for FG (a6acd95c); the same - // WN-Turnip rendered MHS fine on this device before that. FG-off games (the - // default) keep the pre-FG System path that always worked. boolean fgWantsCompositorDriver = fgPrefBool("native_frame_generation", false); String compositorGraphicsDriver = "System"; if (fgWantsCompositorDriver) { @@ -7313,10 +7304,6 @@ private void extractGraphicsDriverFiles() { String bcnEmulation = graphicsDriverConfig.get("bcnEmulation"); String bcnEmulationType = graphicsDriverConfig.get("bcnEmulationType"); - // The libvulkan_wrapper's BCn emulation patches the Adreno driver to advertise - // native BC support; that patch corrupts rendering on newer Adreno GPUs (Adreno - // 840 whole-frame vertical stripes). Mesa Turnip decodes BC itself, so on Adreno - // force "none" (no wrapper patch). Non-Adreno GPUs keep the configured value. if (bcnEmulation == null || com.winlator.cmod.runtime.system.GPUInformation.isAdrenoGPU(this)) { bcnEmulation = "none"; } diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 9ae7995b8..2492b5143 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -235,7 +235,6 @@ public void setFrameGeneration(boolean enabled) { synchronized (this) { if (nativeHandle != 0) { nativeSetFrameGeneration(nativeHandle, enabled); - // Non-blocking present so the worker's deadline pacer drives the present instant (not FIFO/vsync). nativeSetPresentMode(nativeHandle, enabled ? PRESENT_MODE_MAILBOX : requestedPresentMode); fgActivePresentMode = nativeGetActivePresentMode(nativeHandle); } @@ -427,20 +426,15 @@ private void fgCadenceDiag() { if (nativeHandle != 0) { nativeFgPromoteInfo(nativeHandle, fgPromoteInfo); dupDrop = fgPromoteInfo[2]; distinct = fgPromoteInfo[3]; } long dDup = dupDrop - fgDiagPrevDup, dDist = distinct - fgDiagPrevDist; fgDiagPrevDup = dupDrop; fgDiagPrevDist = distinct; - // Adaptive multiplier: when the delivered rate can't reach the effective target, step the - // working multiplier down (floor 2x); fgMultiplier stays the user ceiling. Window >1.5s ignored. double secs = (double) (now - fgDiagLastNs) / 1e9; if (fgLockedGameHz > 0.0 && secs > 0.0 && secs <= 1.5) { double deliveredHz = (double) (fgDiagInterp + fgDiagReal) / secs; double targetEff = Math.max(1, fgCadenceM) * fgLockedGameHz; if (fgDisplayCapHz > 0) targetEff = Math.min(targetEff, (double) fgDisplayCapHz); - // Step down only on a sustained shortfall (>=4 consecutive slow seconds). Floor 2x. if (deliveredHz > 0.0 && deliveredHz < 0.85 * targetEff && !fgOverlayActive) { fgRecoverSecs = 0; if (++fgBoundSecs >= 4 && fgEffectiveMultiplier > 2) { fgEffectiveMultiplier--; fgBoundSecs = 0; } } else if (deliveredHz >= 0.95 * targetEff && fgEffectiveMultiplier < fgMultiplier && !fgOverlayActive) { - // Delivery is keeping up with the current working multiplier and we're below the user - // ceiling -> climb back up so transient load doesn't permanently strand us at a low rate. fgBoundSecs = 0; if (++fgRecoverSecs >= 3) { fgEffectiveMultiplier++; fgRecoverSecs = 0; } } else { @@ -458,7 +452,6 @@ private void fgCadenceDiag() { } private long fgDiagDedupDropped, fgDiagAccepted, fgDiagPrevDup, fgDiagPrevDist; - // GL-thread wall-time per present, bucketed by composite (HOLD) vs interp. Reports every ~2s. private void fgInstrument(long usCpu, boolean wasHold) { double ms = usCpu / 1000.0; if (wasHold) { fgInstHoldN++; fgInstHoldSum += ms; if (ms > fgInstHoldMax) fgInstHoldMax = ms; } @@ -475,9 +468,6 @@ private void fgInstrument(long usCpu, boolean wasHold) { } } - // Slot-grid placement: with the panel pinned to ~M x gameHz, each game period spans M display - // ticks; tick k since arrival presents an interp at phase (k+1)/M, or the real frame at the end. - // A continuous-phase fallback covers non-integer panel:game ratios. private final long[] fgPromoteInfo = new long[4]; private long fgPromoteSeen = 0; private long fgLastPromoteNs = 0, fgPrevPromoteNs = 0; // times of the last two distinct content frames @@ -529,7 +519,6 @@ private int fgEmitOne() { if (fgPromoteInfo[0] != fgPromoteSeen) { fgPromoteSeen = fgPromoteInfo[0]; promoted = true; - // Anchor to the precise buffer-swap arrival time, not the vblank-quantized promote time. long pNs = fgLastGameNs != 0L ? fgLastGameNs : (fgPromoteInfo[1] != 0L ? fgPromoteInfo[1] : System.nanoTime()); if (fgLastPromoteNs != 0L) { @@ -538,7 +527,6 @@ private int fgEmitOne() { fgContentPeriodNs = fgContentPeriodNs == 0L ? d : fgContentPeriodNs + (d - fgContentPeriodNs) / 8L; double inst = 1.0e9 / (double) fgContentPeriodNs; - // Light EMA toward the measured content rate (converges fast, rejects outliers). fgLockedGameHz = fgLockedGameHz <= 0.0 ? inst : fgLockedGameHz + (inst - fgLockedGameHz) * 0.25; fgGameDriftFrames = 0; @@ -562,20 +550,12 @@ private int fgEmitOne() { if (newGame || dirty) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } return 0; } - // NOTE: a UI-only recomposite (dirty && !newGame) is handled at the END, only when - // the content interval is fully spanned (static). Handling it here unconditionally - // collapsed interpolation to zero during motion: X11 damage events fire a recomposite - // every vblank while the camera moves, so this branch would present sharp instead of - // running the interp cadence (interp 30->0/s during a camera spin). Defer it. - // Real frame just promoted: show it sharp; the gate was restarted at 0 in the promote block. if (promoted) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } if (period <= 0L) return 0; - // Fill the panel: emit min(eff, slots) unique frames per content interval, spread evenly with a - // Bresenham gate. No divisor-snapping (which used to collapse e.g. 3x@4-slots down to 2x = half rate). int eff = Math.max(2, fgEffectiveMultiplier); long disp = fgDisplayPeriodNs; int slots = eff; @@ -592,7 +572,6 @@ private int fgEmitOne() { if (dirty && !newGame) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } return 0; // hold for the next promote } - // vblank 0 already showed the real frame; place the (emits-1) interps evenly across the rest. boolean emit = (int) ((long) vi * emits / slots) != (int) ((long) (vi - 1) * emits / slots); if (!emit) return 0; // between gates — hold the current frame double phase = (double) vi / (double) slots; // deterministic slot phase, jitter-free @@ -614,7 +593,6 @@ private int fgComputeInterps() { long disp = fgDisplayPeriodNs, game = fgGamePeriodNs; if (disp <= 0L || game <= 0L) return 0; if (fgActivePresentMode == PRESENT_MODE_FIFO) { - // Vsync-locked: insert what the current refresh affords. Epsilon absorbs EMA jitter. int slots = (int) Math.floor((double) game / (double) disp + 1e-3); return Math.max(0, Math.min(maxInterps, slots - 1)); } @@ -623,13 +601,10 @@ private int fgComputeInterps() { return Math.max(0, Math.min(maxInterps, interps)); } - // One present per pump tick: the pump fires once per vblank, so one present per vblank is the panel ceiling. private int fgComputePerTick() { return 1; } - // Vote the FG post rate on the content surface, then notify the activity so it mirrors the target - // into the window's preferredDisplayModeId. 0 clears both when FG turns off. private void fgApplyFrameRateHint(double targetHz, long nowNs) { if (Build.VERSION.SDK_INT < Build.VERSION_CODES.R) return; float rate = frameGenEnabled && targetHz > 0.0 ? (float) Math.round(targetHz) : 0f; @@ -685,9 +660,6 @@ public void attachSurface(Surface surface) { fgSurface = surface; fgFrameRateHint = -1f; // fresh surface carries no frame-rate preference; re-apply if (nativeHandle == 0) { - // Keep the compositor on the guest-matched driver (Turnip) for AHB-tiling parity. - // Turnip carries a chip8 VK_NV_optical_flow compute implementation but ships it - // disabled; the native side force-enables it via FD_DEV_FEATURES before device init. nativeHandle = nativeCreate(shouldEnableValidationLayers(), graphicsDriverName, xServerView.getContext().getApplicationContext()); if (nativeHandle == 0) { @@ -1076,13 +1048,8 @@ public void onPointerMove(short x, short y) { @Override public void onFramePresented(Window window, WindowManager.FrameSource source, int serial) { - // DRI3_BUFFER fires at pixmap allocation, not a visible change; the real present already wakes us. Skip it. if (source == WindowManager.FrameSource.DRI3_BUFFER) return; if (frameGenEnabled) { - // An actual game-window frame — the signal that drives FG's hold+interpolate cadence. - // De-duplicate re-presented frames by scanout-buffer identity: a present that re-points at - // the same object as the last accepted frame is a duplicate buffer to drop. Only trust - // identity once >=2 distinct buffers have been seen; never drop for >100ms (freeze backstop). Drawable scanoutNow = (window != null && window.getContent() != null) ? window.getContent().getScanoutSource() : null; if (scanoutNow != null) { @@ -1098,7 +1065,6 @@ public void onFramePresented(Window window, WindowManager.FrameSource source, in fgDiagAccepted++; fgLastScanoutSrc = scanoutNow; fgLastAcceptNs = now; - // Trigger a HOLD; native stages + content-de-duplicates it. Cadence is driven by promotes. fgLastGameNs = now; fgNewScene.set(true); scheduleFgPump(); @@ -1312,9 +1278,6 @@ public void setFpsLimit(int fps) { public static final int PRESENT_MODE_MAILBOX = 1; public static final int PRESENT_MODE_IMMEDIATE = 2; - // Cached so callers can set a mode before the native renderer exists. Applied during - // attachSurface() right after nativeCreate. Updates after init forward straight to the - // native side and trigger a swapchain rebuild. private int requestedPresentMode = PRESENT_MODE_FIFO; public void setPresentMode(int mode) { @@ -1352,8 +1315,6 @@ public void setUnviewableWMClasses(String... names) { } public void enforceFpsLimit() { - // FPS limiting is now performed in native (after queue submit/present), so this - // method is a no-op kept for source compatibility with any external callers. } // ---- JNI --------------------------------------------------------------- From 9a70d45079cd30cf0090d0efddacac244f25ce26 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Tue, 23 Jun 2026 17:58:02 -0400 Subject: [PATCH 45/46] Frame-gen: fix intermediate-warp sign + pacing; add controlled-motion harness Warp: signed mvScale so the midpoint gather uses -0.5*flow. The old +0.25 was wrong-sign and caused ghosting/shake at intermediate phases (endpoints were unaffected, which is why raising m0 made it worse). Debug-viz moved to a flags bit; default m0 -0.25. Pacing: snap the interp present deadline to the even vblank grid; complete the motion to curr on a late-frame hold. Harness (debug, default-off): debug.winnative.fgsynth rigid-shift / fgpat noise field + full-res crop + flow-field dump for ground-truth warp verification. --- app/src/main/cpp/CMakeLists.txt | 1 + .../cpp/winlator/vk/shaders/cnn_generate.comp | 29 ++++++++-- .../winlator/vk/shaders/fg_synthshift.comp | 26 +++++++++ app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 51 +++++++++++++--- app/src/main/cpp/winlator/vk/vk_renderer.c | 58 +++++++++++++++++-- app/src/main/cpp/winlator/vk/vk_state.h | 3 + .../display/renderer/VulkanRenderer.java | 13 ++++- 7 files changed, 163 insertions(+), 18 deletions(-) create mode 100644 app/src/main/cpp/winlator/vk/shaders/fg_synthshift.comp diff --git a/app/src/main/cpp/CMakeLists.txt b/app/src/main/cpp/CMakeLists.txt index 04e220e70..0cd99569b 100644 --- a/app/src/main/cpp/CMakeLists.txt +++ b/app/src/main/cpp/CMakeLists.txt @@ -88,6 +88,7 @@ set(SHADER_LIST "cnn_flowreg:comp:cnn_flowreg_comp" "cnn_occlusion:comp:cnn_occlusion_comp" "cnn_generate:comp:cnn_generate_comp" + "fg_synthshift:comp:fg_synthshift_comp" ) set(SHADER_HEADERS "") diff --git a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp index 9ddfe31c9..6d628a3fa 100644 --- a/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp +++ b/app/src/main/cpp/winlator/vk/shaders/cnn_generate.comp @@ -28,13 +28,34 @@ void main() { vec2 numer = vec2(p) + 0.5; vec2 uvc = numer / texSize; - float m0 = abs(pc.mvScale); + float m0 = pc.mvScale; // signed: midpoint gather = -0.5*flow float t = pc.t; float a = 2.0 * t; float b = 2.0 * (1.0 - t); - vec2 flB = texture(flowB, uvc).xy * m0; - vec2 flF = texture(flowF, uvc).xy * m0; + int smR = (pc.flags >> 4) & 7; // bilateral flow-denoise radius (flags bits 4-6) + vec2 flB, flF; + if (smR > 0) { + vec3 cc = texture(backColor, uvc).rgb; // guide on prev color: smooth within objects, keep edges + vec2 sB = vec2(0.0), sF = vec2(0.0); float ws = 0.0; + for (int dy = -smR; dy <= smR; dy++) + for (int dx = -smR; dx <= smR; dx++) { + vec2 off = vec2(float(dx), float(dy)) / texSize; + vec3 cn = texture(backColor, uvc + off).rgb; + float dc = dot(cn - cc, cn - cc); + float w = exp(-dc * 30.0) * exp(-float(dx*dx + dy*dy) * 0.25); + sB += texture(flowB, uvc + off).xy * w; + sF += texture(flowF, uvc + off).xy * w; + ws += w; + } + flB = sB / max(ws, 1e-4) * m0; + flF = sF / max(ws, 1e-4) * m0; + } else { + flB = texture(flowB, uvc).xy * m0; + flF = texture(flowF, uvc).xy * m0; + } + if ((pc.flags & 1) != 0) flF = -flF; + if ((pc.flags & 2) != 0) flF *= 0.5; vec2 c0 = (numer + flB * a) / texSize; vec2 c1 = (numer - flB * b) / texSize; @@ -46,7 +67,7 @@ void main() { vec3 k2 = texture(backColor, c2).rgb; vec3 k3 = texture(fwdColor, c3).rgb; - if (pc.mvScale < 0.0) { imageStore(uDst, p, vec4(abs(k2 - k3), 1.0)); return; } + if ((pc.flags & 8) != 0) { imageStore(uDst, p, vec4(abs(k2 - k3), 1.0)); return; } // debug viz (moved off the sign) vec4 L = vec4(texture(logits, c0).x, texture(logits, c1).y, texture(logits, c2).z, texture(logits, c3).w); diff --git a/app/src/main/cpp/winlator/vk/shaders/fg_synthshift.comp b/app/src/main/cpp/winlator/vk/shaders/fg_synthshift.comp new file mode 100644 index 000000000..c3bd526e3 --- /dev/null +++ b/app/src/main/cpp/winlator/vk/shaders/fg_synthshift.comp @@ -0,0 +1,26 @@ +#version 450 +// Controlled-motion harness: pattern==0 rigid shift of uSrc; pattern!=0 alias-free value-noise. +layout(local_size_x = 16, local_size_y = 16) in; +layout(set = 0, binding = 0) uniform sampler2D uSrc; +layout(set = 0, binding = 1, rgba8) uniform writeonly image2D uDst; +layout(push_constant) uniform PC { ivec2 size; float shiftX; float pattern; } pc; + +float hash(vec2 c) { return fract(sin(dot(c, vec2(12.9898, 78.233))) * 43758.5453); } +float vnoise(vec2 x) { + vec2 i = floor(x), f = fract(x); f = f * f * (3.0 - 2.0 * f); + float a = hash(i), b = hash(i + vec2(1, 0)), c = hash(i + vec2(0, 1)), d = hash(i + vec2(1, 1)); + return mix(mix(a, b, f.x), mix(c, d, f.x), f.y); +} + +void main() { + ivec2 p = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(p, pc.size))) return; + if (pc.pattern > 0.5) { + vec2 q = (vec2(p) - vec2(pc.shiftX, 0.0)) * 0.06; // ~16px features: medium-freq, trackable + float n0 = vnoise(q) * 0.6 + vnoise(q * 2.7) * 0.3 + vnoise(q * 6.1) * 0.1; + imageStore(uDst, p, vec4(n0, vnoise(q + 31.7), vnoise(q + 91.3), 1.0)); + } else { + vec2 uv = (vec2(p) + 0.5 - vec2(pc.shiftX, 0.0)) / vec2(pc.size); + imageStore(uDst, p, texture(uSrc, clamp(uv, vec2(0.0), vec2(1.0)))); + } +} diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index 65524cf9c..7a948bdfa 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -92,16 +92,16 @@ static void destroy_cnn_pipelines(VkRenderer* r) { VkPipeline pipes[] = { P->cnn_pyramid_pipe, P->cnn_conv_pipe, P->cnn_cost9_pipe, P->cnn_flowreg_pipe, P->cnn_warpfollow_pipe, P->cnn_generate_pipe, P->gh_d5_pipe, P->gh_d6_pipe, P->gh_d7_pipe, P->gh_d8_pipe, P->gh_d9_pipe, P->gh_d10_pipe, - P->gh_occ_pipe, P->gh_gen_pipe }; + P->gh_occ_pipe, P->gh_gen_pipe, P->fg_synth_pipe }; VkPipelineLayout pls[] = { P->cnn_pyramid_pl, P->cnn_conv_pl, P->cnn_cost9_pl, P->cnn_flowreg_pl, P->cnn_warpfollow_pl, P->cnn_generate_pl, P->gh_d5_pl, P->gh_d6_pl, P->gh_d7_pl, P->gh_d8_pl, P->gh_d9_pl, P->gh_d10_pl, - P->gh_occ_pl, P->gh_gen_pl }; + P->gh_occ_pl, P->gh_gen_pl, P->fg_synth_pl }; VkDescriptorSetLayout dsls[] = { P->cnn_pyramid_dsl, P->cnn_conv_dsl, P->cnn_cost9_dsl, P->cnn_flowreg_dsl, P->cnn_warpfollow_dsl, P->cnn_generate_dsl, P->gh_d5_dsl, P->gh_d6_dsl, P->gh_d7_dsl, P->gh_d8_dsl, P->gh_d9_dsl, P->gh_d10_dsl, - P->gh_occ_dsl, P->gh_gen_dsl }; - for (int i = 0; i < 14; i++) { + P->gh_occ_dsl, P->gh_gen_dsl, P->fg_synth_dsl }; + for (int i = 0; i < 15; i++) { if (pipes[i]) vkDestroyPipeline(r->device, pipes[i], NULL); if (pls[i]) vkDestroyPipelineLayout(r->device, pls[i], NULL); if (dsls[i]) vkDestroyDescriptorSetLayout(r->device, dsls[i], NULL); @@ -117,6 +117,7 @@ static void destroy_cnn_pipelines(VkRenderer* r) { P->gh_d5_dsl = P->gh_d6_dsl = P->gh_d7_dsl = P->gh_d8_dsl = P->gh_d9_dsl = P->gh_d10_dsl = VK_NULL_HANDLE; P->gh_occ_pipe = P->gh_gen_pipe = VK_NULL_HANDLE; P->gh_occ_pl = P->gh_gen_pl = VK_NULL_HANDLE; + P->fg_synth_pipe = VK_NULL_HANDLE; P->fg_synth_pl = VK_NULL_HANDLE; P->fg_synth_dsl = VK_NULL_HANDLE; P->gh_occ_dsl = P->gh_gen_dsl = VK_NULL_HANDLE; } @@ -157,6 +158,11 @@ static bool create_cnn_pipelines(VkRenderer* r) { P->cnn_warpfollow_dsl, &P->cnn_warpfollow_pl, &P->cnn_warpfollow_pipe)) goto cnn_fail; if (!cnn_make_pipe(r, cnn_generate_comp, cnn_generate_comp_size, P->cnn_generate_dsl, &P->cnn_generate_pl, &P->cnn_generate_pipe)) goto cnn_fail; + { const CnnBind synth[] = { {0,S},{1,I} }; + P->fg_synth_dsl = cnn_make_dsl(r, synth, 2); + if (!P->fg_synth_dsl) goto cnn_fail; + if (!cnn_make_pipe(r, fg_synthshift_comp, fg_synthshift_comp_size, + P->fg_synth_dsl, &P->fg_synth_pl, &P->fg_synth_pipe)) goto cnn_fail; } if (!cnn_make_gh_pipe(r, wnfg_25_spv, wnfg_25_spv_size, 1, 6, 1, &P->gh_d5_dsl, &P->gh_d5_pl, &P->gh_d5_pipe)) goto cnn_fail; @@ -635,6 +641,32 @@ static void cnn_concat4(VkCommandBuffer cmd, VkCnnImg* lo2, VkCnnImg* hi2, VkCnn VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); } +// Controlled-motion harness: write a shifted/pattern field into dst via rgba8 storeViews. +static void fg_synth_shift(VkRenderer* r, VkCommandBuffer cmd, VkFgImage* prev, VkFgImage* curr, float shiftX, int pattern) { + VkPipelineSet* P = &r->pipelines; + if (!P->fg_synth_pipe || !prev->storeView || !curr->storeView) return; + VkDescriptorSet ds = cnn_alloc(r, P->fg_synth_dsl); if (!ds) return; + vkr_image_barrier(cmd, curr->image, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_SHADER_WRITE_BIT); + VkDescriptorImageInfo s0 = { r->fg_sampler, prev->storeView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + VkDescriptorImageInfo i1 = { VK_NULL_HANDLE, curr->storeView, VK_IMAGE_LAYOUT_GENERAL }; + VkWriteDescriptorSet w[2] = { + cnn_wimg(ds, 0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &s0), + cnn_wimg(ds, 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &i1) }; + vkUpdateDescriptorSets(r->device, 2, w, 0, NULL); + struct { int32_t sx, sy; float shiftX, pat; } pc = { (int32_t)curr->width, (int32_t)curr->height, shiftX, (float)pattern }; + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->fg_synth_pipe); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->fg_synth_pl, 0, 1, &ds, 0, NULL); + vkCmdPushConstants(cmd, P->fg_synth_pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc); + vkCmdDispatch(cmd, (curr->width + 15u) / 16u, (curr->height + 15u) / 16u, 1u); + vkr_image_barrier(cmd, curr->image, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); +} + static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, VkFgImage* prevFrame, VkFgImage* currFrame, bool forward, VkFgImage* outFlow) { @@ -769,7 +801,11 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari float t = phase < 0.0f ? 0.0f : (phase > 1.0f ? 1.0f : phase); char m0s[16] = {0}; __system_property_get("debug.winnative.fgm0", m0s); - float m0 = m0s[0] ? (float)atof(m0s) : 0.25f; + float m0 = m0s[0] ? (float)atof(m0s) : -0.25f; // SIGNED: midpoint gather needs flB=-0.5*flow; +0.25 was wrong-sign (ghosting) + char f2s[16] = {0}; __system_property_get("debug.winnative.fgflow2", f2s); + int flow2 = f2s[0] ? atoi(f2s) : 0; // 1 = feed distinct backward flow to s35 + char flgs[16] = {0}; __system_property_get("debug.winnative.fgflags", flgs); + int genflags = flgs[0] ? atoi(flgs) : 16; // default R=1 bilateral flow denoise (bits4-6); bit0 negate flF; bit1 scale flF 0.5 cnn_to_write(cmd, C->gen[slot].image, 1); { @@ -777,7 +813,8 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari VkDescriptorImageInfo s32 = {r->fg_sampler, prevView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s33 = {r->fg_sampler, currView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s34 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; - VkDescriptorImageInfo s35 = {r->fg_sampler, r->fg_motion[parity].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkImageView flow2v = (flow2 && r->fg_motion_fwd_valid) ? r->fg_motion_fwd[parity].view : r->fg_motion[parity].view; + VkDescriptorImageInfo s35 = {r->fg_sampler, flow2v, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo s36 = {r->fg_sampler, C->logits[0].view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo oi = {VK_NULL_HANDLE, C->gen[slot].view, VK_IMAGE_LAYOUT_GENERAL}; VkWriteDescriptorSet w[6] = { @@ -789,7 +826,7 @@ static void cnn_generate_frame(VkRenderer* r, VkCommandBuffer cmd, uint32_t pari cnn_wimg(ds, 48, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &oi), }; vkUpdateDescriptorSets(r->device, 6, w, 0, NULL); - CnnPC pc = {0}; pc.sx = (int32_t)gw; pc.sy = (int32_t)gh; pc.t = t; pc.mvScale = m0; + CnnPC pc = {0}; pc.sx = (int32_t)gw; pc.sy = (int32_t)gh; pc.t = t; pc.mvScale = m0; pc.flags = genflags; vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_generate_pipe); vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, P->cnn_generate_pl, 0, 1, &ds, 0, NULL); vkCmdPushConstants(cmd, P->cnn_generate_pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, 32, &pc); diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 2f5b5ea98..8649bea97 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -48,6 +48,7 @@ #include "shaders/cnn_correlation_warpfollow_comp.spv.h" #include "shaders/cnn_flowreg_comp.spv.h" #include "shaders/cnn_generate_comp.spv.h" +#include "shaders/fg_synthshift_comp.spv.h" #include "wnfg_spv/wnfg_04_spv.h" #include "wnfg_spv/wnfg_13_spv.h" #include "wnfg_spv/wnfg_25_spv.h" @@ -2414,6 +2415,7 @@ static void fg_destroy_resources(VkRenderer* r) { if (o->blit_set) vkr_free_descriptor_set(r, o->blit_set); if (o->framebuffer) vkDestroyFramebuffer(r->device, o->framebuffer, NULL); if (o->view) vkDestroyImageView(r->device, o->view, NULL); + if (o->storeView) vkDestroyImageView(r->device, o->storeView, NULL); if (o->image) vkDestroyImage(r->device, o->image, NULL); if (o->memory) vkFreeMemory(r->device, o->memory, NULL); memset(o, 0, sizeof(*o)); @@ -2461,7 +2463,9 @@ static bool fg_create_color_target(VkRenderer* r, VkFgImage* o, uint32_t w, uint ic.mipLevels = 1; ic.arrayLayers = 1; ic.samples = VK_SAMPLE_COUNT_1_BIT; ic.tiling = VK_IMAGE_TILING_OPTIMAL; - ic.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + ic.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; // rgba8 storage alias (synth-shift harness) + ic.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT + | VK_IMAGE_USAGE_STORAGE_BIT; ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; if (vkCreateImage(r->device, &ic, NULL, &o->image) != VK_SUCCESS) return false; @@ -2480,6 +2484,9 @@ static bool fg_create_color_target(VkRenderer* r, VkFgImage* o, uint32_t w, uint vi.subresourceRange.levelCount = 1; vi.subresourceRange.layerCount = 1; if (vkCreateImageView(r->device, &vi, NULL, &o->view) != VK_SUCCESS) return false; + vi.format = VK_FORMAT_R8G8B8A8_UNORM; // rgba8 storage alias for the synth-shift harness + if (vkCreateImageView(r->device, &vi, NULL, &o->storeView) != VK_SUCCESS) return false; + VkFramebufferCreateInfo fb = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO}; fb.renderPass = r->pipelines.offscreen_pass; fb.attachmentCount = 1; fb.pAttachments = &o->view; @@ -2910,6 +2917,8 @@ static bool fg_create_resources(VkRenderer* r, uint32_t w, uint32_t h) { static bool fg_ensure_resources(VkRenderer* r) { if (r->swapchain_extent.width == 0 || r->swapchain_extent.height == 0) return false; if (!r->pipelines_built) return false; + { char fss[16] = {0}; __system_property_get("debug.winnative.fgflowscale", fss); + if (fss[0]) { float v = (float)atof(fss); if (v >= 0.2f && v <= 1.0f) r->fg_flow_scale = v; } } if (r->fg_built && r->fg_dims.width == r->swapchain_extent.width && r->fg_dims.height == r->swapchain_extent.height @@ -2944,11 +2953,20 @@ static uint64_t g_fg_dropped = 0; static uint64_t fg_compute_deadline(VkRenderer* r, float phase) { uint64_t now = now_monotonic_ns(); uint64_t ca = r->fg_curr_arrival_ns, pa = r->fg_prev_arrival_ns; + // disp = the vblank period Java derives slots from, so the snap matches the slot cadence + uint64_t disp = r->fg_display_period_ns ? r->fg_display_period_ns : r->refresh_duration_ns; uint64_t period = r->fg_present_period_ns ? r->fg_present_period_ns : r->refresh_duration_ns; uint64_t deadline; if (ca != 0 && pa != 0 && ca > pa) { - uint64_t cp = ca - pa; // content period from the two real arrivals if (phase < 0.0f) phase = 0.0f; + // snap the content period to whole vblanks (same EMA as slots) -> presents on the even grid + uint64_t cp = ca - pa; + uint64_t ema = r->fg_content_period_ns ? r->fg_content_period_ns : cp; + if (disp != 0) { + uint64_t k = (uint64_t)(((double)ema / (double)disp) + 0.5); // == fgEmitOne slots + if (k < 1u) k = 1u; + cp = k * disp; + } deadline = ca + (uint64_t)((double)phase * (double)cp); } else if (period != 0) { deadline = r->fg_present_deadline_ns + period; // pre-lock fallback: even period grid @@ -3449,6 +3467,21 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, hist_dst, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + { char syn[16] = {0}; __system_property_get("debug.winnative.fgsynth", syn); // controlled-motion harness + int synN = syn[0] ? atoi(syn) : 0; + if (synN != 0 && prev->storeView && curr->storeView) { + char patS[8] = {0}; __system_property_get("debug.winnative.fgpat", patS); + int pat = patS[0] ? atoi(patS) : 0; + if (pat) { + fg_synth_shift(r, f->cmd, curr, prev, 0.0f, 1); // prev = noise pattern + fg_synth_shift(r, f->cmd, prev, curr, (float)synN, 1); // curr = pattern shifted right by N + } else { + fg_synth_shift(r, f->cmd, prev, curr, (float)synN, 0); // curr = real prev shifted by N + } + r->fg_cnn.featValid[curr_idx] = false; + r->fg_cnn.featValid[prev_idx] = false; + r->fg_motion_valid = false; r->fg_motion_fwd_valid = false; // recompute flow on the shifted input + } } if (r->fg_use_cnn && r->fg_cnn_capable && r->fg_cnn.ready) { if (!r->fg_motion_valid) { cnn_flow_pass(r, f->cmd, parity, prev, curr, false, &r->fg_motion[parity]); @@ -3490,20 +3523,27 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { fg_dump_poll(r); float fg_dump_prevph = r->fg_dump_last_phase; r->fg_dump_last_phase = job->phase; + int synDump = 0; { char s2[16] = {0}; __system_property_get("debug.winnative.fgsynth", s2); synDump = s2[0] ? atoi(s2) : 0; } + int rawm = synDump ? 1 : 0; // full-res 1:1 crop under the harness (no downscale confound) if (r->fg_dump_supported && r->fg_dump_armed && r->fg_dump_count < FG_DUMP_N && !(r->fg_dump_count == 0 && job->phase > fg_dump_prevph + 0.01f)) { if (r->fg_dump_count == 0) s_fgseq_last_curr = 0xFFFFFFFFu; if (curr_idx != s_fgseq_last_curr && r->fg_dump_count < FG_DUMP_N) { - fg_record_dump(r, f->cmd, r->fg_history[prev_idx].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dims.width, r->fg_dims.height, r->fg_dump_count, 0); + fg_record_dump(r, f->cmd, r->fg_history[prev_idx].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dims.width, r->fg_dims.height, r->fg_dump_count, rawm); VK_LOGI("fgseq[%u] REAL (prev=%u curr=%u)", r->fg_dump_count, prev_idx, curr_idx); r->fg_dump_count++; s_fgseq_last_curr = curr_idx; } if (r->fg_dump_count < FG_DUMP_N) { - fg_record_dump(r, f->cmd, r->fg_cnn.gen[genslot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dims.width, r->fg_dims.height, r->fg_dump_count, 0); + fg_record_dump(r, f->cmd, r->fg_cnn.gen[genslot].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_dims.width, r->fg_dims.height, r->fg_dump_count, rawm); VK_LOGI("fgseq[%u] GENERATED (phase=%.3f prev=%u curr=%u)", r->fg_dump_count, job->phase, prev_idx, curr_idx); r->fg_dump_count++; } + if (synDump && r->fg_dump_count < FG_DUMP_N) { // harness: raw F16 flow field to test recovery of the known shift + fg_record_dump(r, f->cmd, r->fg_motion[parity].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_motion[parity].width, r->fg_motion[parity].height, r->fg_dump_count, 1); + VK_LOGI("fgseq[%u] FLOW (flowres=%ux%u)", r->fg_dump_count, r->fg_motion[parity].width, r->fg_motion[parity].height); + r->fg_dump_count++; + } } } @@ -4254,15 +4294,23 @@ JNIEXPORT void JNICALL JNI_FN(nativeSetFrameGenFramesInFlight)(JNIEnv* env, jcla r->fg_target_fif = fif; } -JNIEXPORT void JNICALL JNI_FN(nativeSetVsyncTiming)(JNIEnv* env, jclass clazz, jlong handle, jlong periodNs, jlong displayPeriodNs, jlong vsyncNs) { +JNIEXPORT void JNICALL JNI_FN(nativeSetVsyncTiming)(JNIEnv* env, jclass clazz, jlong handle, jlong periodNs, jlong displayPeriodNs, jlong contentPeriodNs, jlong vsyncNs) { (void)env; (void)clazz; VkRenderer* r = (VkRenderer*)(intptr_t)handle; if (!r) return; r->fg_present_period_ns = periodNs > 0 ? (uint64_t)periodNs : 0; r->fg_display_period_ns = displayPeriodNs > 0 ? (uint64_t)displayPeriodNs : 0; + r->fg_content_period_ns = contentPeriodNs > 0 ? (uint64_t)contentPeriodNs : 0; r->fg_vsync_anchor_ns = vsyncNs > 0 ? (uint64_t)vsyncNs : 0; } +JNIEXPORT jint JNICALL JNI_FN(nativeGetFillHolds)(JNIEnv* env, jclass clazz) { + (void)env; (void)clazz; + char v[PROP_VALUE_MAX] = {0}; + __system_property_get("debug.winnative.fgfill", v); + return v[0] ? (jint)atoi(v) : 1; // default: complete the motion to curr on a late-frame hold +} + // Scene byte buffer layout (must mirror VulkanRenderer.java offsets). Native-endian, packed. #define SCENE_OFF_CURSOR_HANDLE 0 #define SCENE_OFF_WINDOW_HANDLES 8 /* int64 × VK_MAX_RENDERABLE_WINDOWS */ diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index fb96ca2ef..865deff55 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -201,6 +201,7 @@ typedef struct VkPipelineSet { VkDescriptorSetLayout gh_d5_dsl, gh_d6_dsl, gh_d7_dsl, gh_d8_dsl, gh_d9_dsl, gh_d10_dsl; VkPipelineLayout gh_d5_pl, gh_d6_pl, gh_d7_pl, gh_d8_pl, gh_d9_pl, gh_d10_pl; VkPipeline gh_d5_pipe, gh_d6_pipe, gh_d7_pipe, gh_d8_pipe, gh_d9_pipe, gh_d10_pipe; + VkDescriptorSetLayout fg_synth_dsl; VkPipelineLayout fg_synth_pl; VkPipeline fg_synth_pipe; VkDescriptorSetLayout gh_occ_dsl, gh_gen_dsl; // wnfg_13 occlusion, wnfg_04 generate VkPipelineLayout gh_occ_pl, gh_gen_pl; @@ -241,6 +242,7 @@ typedef struct VkSgsr1State { typedef struct VkFgImage { VkImage image; VkImageView view; + VkImageView storeView; // rgba8 mutable view for the synth-shift harness (history only) VkDeviceMemory memory; VkFramebuffer framebuffer; // history targets only; VK_NULL_HANDLE for the motion field VkDescriptorSet blit_set; // history only @@ -546,6 +548,7 @@ typedef struct VkRenderer { uint64_t fg_present_deadline_ns; // unsnapped deadline accumulator uint64_t fg_present_target_ns; // present target for the next present uint64_t fg_display_period_ns; // live panel vsync period fed from Java + uint64_t fg_content_period_ns; // EMA content interval fed from Java (matches slots) uint64_t fg_vsync_anchor_ns; // latest Choreographer vsync timestamp uint64_t fg_prev_arrival_ns; // real-frame arrival times uint64_t fg_curr_arrival_ns; diff --git a/app/src/main/runtime/display/renderer/VulkanRenderer.java b/app/src/main/runtime/display/renderer/VulkanRenderer.java index 2492b5143..dc5a3a0fc 100644 --- a/app/src/main/runtime/display/renderer/VulkanRenderer.java +++ b/app/src/main/runtime/display/renderer/VulkanRenderer.java @@ -384,7 +384,7 @@ private void fgPumpTickFromNative(long frameTimeNanos) { if (nativeHandle != 0) { double th = fgTargetHz(); nativeSetVsyncTiming(nativeHandle, th > 0.0 ? (long) (1.0e9 / th) : fgDisplayPeriodNs, - fgDisplayPeriodNs, frameTimeNanos); + fgDisplayPeriodNs, fgContentPeriodNs, frameTimeNanos); fgApplyFrameRateHint(th, frameTimeNanos); } } else if (d >= 100_000_000L) { @@ -472,6 +472,8 @@ private void fgInstrument(long usCpu, boolean wasHold) { private long fgPromoteSeen = 0; private long fgLastPromoteNs = 0, fgPrevPromoteNs = 0; // times of the last two distinct content frames private long fgContentPeriodNs = 0; // EMA of the interval between distinct content frames + private int fgFillHolds = 1; // debug.winnative.fgfill: complete motion to curr on late holds (0=off) + private int fgFillCtr = 0; private int fgPromoteSlotIdx = 0; // display ticks since the last promote private int fgVblankSincePromote = 0; // vblanks since the last real frame — drives the steady output gate private volatile int fgCadenceM = 2; // divisor-snapped multiplier actually used by the cadence @@ -508,6 +510,7 @@ public void fgClearOverlayIfActive() { private int fgEmitOne() { if (fgResyncPending) { fgResyncPending = false; doFgResync(); } + if ((fgFillCtr++ & 63) == 0) fgFillHolds = nativeGetFillHolds(); boolean newGame = fgNewScene.getAndSet(false); boolean dirty = fgSceneDirty.getAndSet(false); fgEmitWasHold = newGame || dirty; @@ -570,6 +573,11 @@ private int fgEmitOne() { if (vi >= slots) { // interval fully spanned (content static) // Now a UI-only recomposite (cursor) warrants a sharp redraw; otherwise hold. if (dirty && !newGame) { nativePresentLast(nativeHandle, 0f, fgPrevPromoteNs, fgLastPromoteNs); return 2; } + if (fgFillHolds > 0 && vi < slots + fgFillHolds) { // next real frame late: carry the motion + double phase = (double) vi / (double) slots; // >=1.0 -> reach curr then extrapolate forward + nativeRenderInterp(nativeHandle, (float) phase, fgPrevPromoteNs, fgLastPromoteNs); + return 1; + } return 0; // hold for the next promote } boolean emit = (int) ((long) vi * emits / slots) != (int) ((long) (vi - 1) * emits / slots); @@ -1346,7 +1354,8 @@ private static native long nativeCreate(boolean enableValidationLayers, private static native void nativeSetFrameGenExtrapolate(long handle, boolean extrapolate); private static native void nativeSetFrameGenFramesInFlight(long handle, int framesInFlight); private static native int nativeGetActivePresentMode(long handle); - private static native void nativeSetVsyncTiming(long handle, long periodNs, long displayPeriodNs, long vsyncNs); + private static native int nativeGetFillHolds(); + private static native void nativeSetVsyncTiming(long handle, long periodNs, long displayPeriodNs, long contentPeriodNs, long vsyncNs); private static native void nativeFgPumpStart(Object renderer); private static native void nativeFgPumpStop(); } From 74a4be050c53b7ceec87c78b7876eabaf051b863 Mon Sep 17 00:00:00 2001 From: MaxsTechReview Date: Wed, 24 Jun 2026 11:08:48 -0400 Subject: [PATCH 46/46] Frame-gen producer re-port (partial): fix degenerate gamma stage conv36 was dispatched cinT=4 but the trained wnfg_36 is cinT=2 (8ch = prev4 + curr4), so it read past its weight file -> bias-only, content-independent features that froze the whole flow chain. Fixed: cnn_concat2 feeds prev.L0 ++ curr.L0 at cinT=2, plus the missing trained wnfg_45 (8->16) expansion before conv42. Verified on-device the gamma now produces content-varying features. The flow magnitude is still a fixed ~19 regardless of motion - the producer never tracked motion. Root-caused to the cost volume (wnfg_14) self-correlating combined prev+curr features plus a dominant flow-regression bias, with the coarsest pyramid level sub-pixel for the test shifts. Feeding the cost stage separated per-frame features is correct but insufficient alone; the full fix is a multi-stage RE, documented separately. Flow images get TRANSFER_SRC so the controlled-motion harness can dump them. --- app/src/main/cpp/CMakeLists.txt | 2 +- .../winlator/vk/shaders/fg_synthshift.comp | 7 +- app/src/main/cpp/winlator/vk/vk_cnn_fg.c | 69 ++++++++++++++++-- app/src/main/cpp/winlator/vk/vk_renderer.c | 9 ++- app/src/main/cpp/winlator/vk/vk_state.h | 3 + .../vk/weights_v2/wnfg_45.weights.fp16 | Bin 0 -> 3552 bytes 6 files changed, 75 insertions(+), 15 deletions(-) create mode 100644 app/src/main/cpp/winlator/vk/weights_v2/wnfg_45.weights.fp16 diff --git a/app/src/main/cpp/CMakeLists.txt b/app/src/main/cpp/CMakeLists.txt index 0cd99569b..44929a5f7 100644 --- a/app/src/main/cpp/CMakeLists.txt +++ b/app/src/main/cpp/CMakeLists.txt @@ -118,7 +118,7 @@ endforeach() set(WEIGHTS_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/winlator/vk/weights_v2") set(BIN2C_BYTES_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/winlator/vk/bin2c_bytes.cmake") -set(WEIGHTS_LIST 05 06 07 14 20 21 22 24 25 26 27 28 29 36 37 42 51) +set(WEIGHTS_LIST 05 06 07 14 20 21 22 24 25 26 27 28 29 36 37 42 45 51) foreach(id ${WEIGHTS_LIST}) set(winput "${WEIGHTS_SRC_DIR}/wnfg_${id}.weights.fp16") set(whdr "${SHADER_OUT_DIR}/wnfg_${id}_weights.h") diff --git a/app/src/main/cpp/winlator/vk/shaders/fg_synthshift.comp b/app/src/main/cpp/winlator/vk/shaders/fg_synthshift.comp index c3bd526e3..f97143307 100644 --- a/app/src/main/cpp/winlator/vk/shaders/fg_synthshift.comp +++ b/app/src/main/cpp/winlator/vk/shaders/fg_synthshift.comp @@ -16,9 +16,10 @@ void main() { ivec2 p = ivec2(gl_GlobalInvocationID.xy); if (any(greaterThanEqual(p, pc.size))) return; if (pc.pattern > 0.5) { - vec2 q = (vec2(p) - vec2(pc.shiftX, 0.0)) * 0.06; // ~16px features: medium-freq, trackable - float n0 = vnoise(q) * 0.6 + vnoise(q * 2.7) * 0.3 + vnoise(q * 6.1) * 0.1; - imageStore(uDst, p, vec4(n0, vnoise(q + 31.7), vnoise(q + 91.3), 1.0)); + vec2 q = (vec2(p) - vec2(pc.shiftX, 0.0)) * 0.018; // ~55px features: larger than test shifts, alias-free + float n0 = vnoise(q) * 0.6 + vnoise(q * 1.9) * 0.4; + float n1 = vnoise(q + 31.7) * 0.6 + vnoise(q * 1.9 + 31.7) * 0.4; + imageStore(uDst, p, vec4(n0, n1, vnoise(q + 91.3), 1.0)); } else { vec2 uv = (vec2(p) + 0.5 - vec2(pc.shiftX, 0.0)) / vec2(pc.size); imageStore(uDst, p, texture(uSrc, clamp(uv, vec2(0.0), vec2(1.0)))); diff --git a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c index 7a948bdfa..77463bd4e 100644 --- a/app/src/main/cpp/winlator/vk/vk_cnn_fg.c +++ b/app/src/main/cpp/winlator/vk/vk_cnn_fg.c @@ -280,7 +280,8 @@ static void fg_destroy_cnn_resources(VkRenderer* r) { } C->featValid[0] = C->featValid[1] = C->featValid[2] = false; for (int L = 0; L < CNN_LEVELS; L++) { - cnn_free_img(r, &C->feat8_pair[L]); cnn_free_img(r, &C->dpair[L]); + cnn_free_img(r, &C->feat8_pair[L]); cnn_free_img(r, &C->gPair8[L]); + cnn_free_img(r, &C->gExpIn[L]); cnn_free_img(r, &C->hG23b[L]); cnn_free_img(r, &C->dpair[L]); cnn_free_img(r, &C->hG0[L]); cnn_free_img(r, &C->hG1[L]); cnn_free_img(r, &C->hG23[L]); cnn_free_img(r, &C->hG4[L]); cnn_free_img(r, &C->hD0[L]); cnn_free_img(r, &C->hD1[L]); @@ -334,7 +335,7 @@ static bool fg_create_cnn_resources(VkRenderer* r, uint32_t w, uint32_t h) { #define CNN_W(ID) if (!cnn_make_ssbo(r, ID, wnfg_##ID##_weights, (size_t)wnfg_##ID##_weights_size)) return false CNN_W(05); CNN_W(06); CNN_W(07); CNN_W(14); CNN_W(20); CNN_W(21); CNN_W(22); CNN_W(24); CNN_W(25); CNN_W(26); CNN_W(27); CNN_W(28); CNN_W(29); - CNN_W(36); CNN_W(37); CNN_W(42); CNN_W(51); + CNN_W(36); CNN_W(37); CNN_W(42); CNN_W(45); CNN_W(51); #undef CNN_W { @@ -373,6 +374,9 @@ static bool fg_create_cnn_resources(VkRenderer* r, uint32_t w, uint32_t h) { } for (int L = 0; L < CNN_LEVELS; L++) { if (!cnn_make_img(r, &C->feat8_pair[L], fw[L], fh[L], RGBA8, 4, true)) return false; + if (!cnn_make_img(r, &C->gPair8[L], fw[L], fh[L], RGBA8, 2, true)) return false; + if (!cnn_make_img(r, &C->gExpIn[L], fw[L], fh[L], RGBA8, 3, true)) return false; + if (!cnn_make_img(r, &C->hG23b[L], fw[L], fh[L], RGBA8, 4, true)) return false; if (!cnn_make_img(r, &C->hG0[L], fw[L], fh[L], RGBA8, 2, true)) return false; if (!cnn_make_img(r, &C->hG1[L], fw[L], fh[L], RGBA8, 2, true)) return false; if (!cnn_make_img(r, &C->hG23[L], fw[L], fh[L], RGBA8, 4, true)) return false; @@ -641,6 +645,51 @@ static void cnn_concat4(VkCommandBuffer cmd, VkCnnImg* lo2, VkCnnImg* hi2, VkCnn VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); } +// dst2.layer0 = a.layer0, dst2.layer1 = b.layer0 (prev/curr 4ch each -> 8ch wnfg_36 input). +static void cnn_concat2(VkCommandBuffer cmd, VkCnnImg* a, VkCnnImg* b, VkCnnImg* dst2) { + cnn_to_write(cmd, dst2->image, 2); + cnn_barrier_ml(cmd, a->image, 2, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT); + cnn_barrier_ml(cmd, b->image, 2, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT); + cnn_barrier_ml(cmd, dst2->image, 2, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); + VkImageCopy cp[2]; memset(cp, 0, sizeof(cp)); + cp[0].srcSubresource=(VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT,0,0,1}; + cp[0].dstSubresource=(VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT,0,0,1}; + cp[0].extent=(VkExtent3D){dst2->w, dst2->h, 1}; + cp[1]=cp[0]; cp[1].dstSubresource.baseArrayLayer=1; // dst layer1 <- b layer0 + vkCmdCopyImage(cmd, a->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst2->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &cp[0]); + vkCmdCopyImage(cmd, b->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst2->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &cp[1]); + cnn_barrier_ml(cmd, a->image, 2, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); + cnn_barrier_ml(cmd, b->image, 2, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); + cnn_barrier_ml(cmd, dst2->image, 2, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); +} + +// dst3(3 layers) = src2.layer0 ++ src2.layer1 ++ 0 (wnfg_45 input; aux b34 group zeroed for now). +static void cnn_pad3z(VkCommandBuffer cmd, VkCnnImg* src2, VkCnnImg* dst3) { + cnn_to_write(cmd, dst3->image, 3); + cnn_barrier_ml(cmd, src2->image, 2, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT); + cnn_barrier_ml(cmd, dst3->image, 3, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); + VkImageCopy cp; memset(&cp, 0, sizeof(cp)); + cp.srcSubresource=(VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT,0,0,2}; + cp.dstSubresource=(VkImageSubresourceLayers){VK_IMAGE_ASPECT_COLOR_BIT,0,0,2}; + cp.extent=(VkExtent3D){dst3->w, dst3->h, 1}; + vkCmdCopyImage(cmd, src2->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst3->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &cp); + VkClearColorValue z; memset(&z, 0, sizeof(z)); + VkImageSubresourceRange rng = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 2, 1}; // layer 2 only + vkCmdClearColorImage(cmd, dst3->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &z, 1, &rng); + cnn_barrier_ml(cmd, src2->image, 2, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); + cnn_barrier_ml(cmd, dst3->image, 3, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); +} + // Controlled-motion harness: write a shifted/pattern field into dst via rgba8 storeViews. static void fg_synth_shift(VkRenderer* r, VkCommandBuffer cmd, VkFgImage* prev, VkFgImage* curr, float shiftX, int pattern) { VkPipelineSet* P = &r->pipelines; @@ -695,23 +744,29 @@ static void cnn_flow_pass(VkRenderer* r, VkCommandBuffer cmd, uint32_t parity, VkImageView seedView = (L == CNN_FLOW_LEVELS - 1) ? C->seedBlack.view : ((L+1 <= 2) ? C->flowRef[L+1].view : C->flowMid[L+1].view); - cnn_concat4(cmd, &fp->feat8[L], &fc->feat8[L], &C->feat8_pair[L]); + cnn_concat2(cmd, &fp->feat8[L], &fc->feat8[L], &C->gPair8[L]); // prev.L0 ++ curr.L0 = 8ch (wnfg_36 wants cinT=2) cnn_to_write(cmd, C->hG0[L].image, 2); - cnn_conv_dispatch(r, cmd, C->feat8_pair[L].view, fc->luma[L].view, C->hG0[L].view, 36, 4, 2, 0, w, h); + cnn_conv_dispatch(r, cmd, C->gPair8[L].view, fc->luma[L].view, C->hG0[L].view, 36, 2, 2, 0, w, h); cnn_to_read(cmd, C->hG0[L].image, 2); cnn_to_write(cmd, C->hG1[L].image, 2); cnn_conv_dispatch(r, cmd, C->hG0[L].view, fc->luma[L].view, C->hG1[L].view, 37, 2, 2, 0, w, h); cnn_to_read(cmd, C->hG1[L].image, 2); + // wnfg_45 expansion: hG1 (8ch, +zero aux) -> hG23 (16ch) + cnn_pad3z(cmd, &C->hG1[L], &C->gExpIn[L]); cnn_to_write(cmd, C->hG23[L].image, 4); - cnn_conv_dispatch(r, cmd, C->hG1[L].view, fc->luma[L].view, C->hG23[L].view, 42, 4, 4, 0, w, h); + cnn_conv_dispatch(r, cmd, C->gExpIn[L].view, fc->luma[L].view, C->hG23[L].view, 45, 3, 4, 0, w, h); cnn_to_read(cmd, C->hG23[L].image, 4); + // wnfg_42: hG23 (16ch) -> hG23b (16ch) + cnn_to_write(cmd, C->hG23b[L].image, 4); + cnn_conv_dispatch(r, cmd, C->hG23[L].view, fc->luma[L].view, C->hG23b[L].view, 42, 4, 4, 0, w, h); + cnn_to_read(cmd, C->hG23b[L].image, 4); cnn_to_write(cmd, C->hG4[L].image, 2); - cnn_conv_dispatch(r, cmd, C->hG23[L].view, seedView, C->hG4[L].view, 21, 3, 2, 0, w, h); + cnn_conv_dispatch(r, cmd, C->hG23b[L].view, seedView, C->hG4[L].view, 21, 3, 2, 0, w, h); cnn_to_read(cmd, C->hG4[L].image, 2); cnn_to_write(cmd, C->hD0[L].image, 3); - { VkImageView in5[5]={C->hG4[L].layerView[0],C->hG4[L].layerView[1],C->hG23[L].layerView[2],C->hG23[L].layerView[3],seedView}; + { VkImageView in5[5]={fp->feat8[L].layerView[0],fp->feat8[L].layerView[1],fc->feat8[L].layerView[0],fc->feat8[L].layerView[1],seedView}; // REF=prev, SEARCH=curr (separated frames so the correlation tracks motion) VkImageView out3[3]={C->hD0[L].layerView[0],C->hD0[L].layerView[1],C->hD0[L].layerView[2]}; cnn_cost9_dispatch(r, cmd, in5, out3, 14, w, h); } cnn_to_read(cmd, C->hD0[L].image, 3); diff --git a/app/src/main/cpp/winlator/vk/vk_renderer.c b/app/src/main/cpp/winlator/vk/vk_renderer.c index 8649bea97..34a15147b 100644 --- a/app/src/main/cpp/winlator/vk/vk_renderer.c +++ b/app/src/main/cpp/winlator/vk/vk_renderer.c @@ -73,6 +73,7 @@ #include "shaders/wnfg_36_weights.h" #include "shaders/wnfg_37_weights.h" #include "shaders/wnfg_42_weights.h" +#include "shaders/wnfg_45_weights.h" #include "shaders/wnfg_51_weights.h" static uint64_t now_monotonic_ns(void) { @@ -2514,7 +2515,7 @@ static bool fg_create_motion(VkRenderer* r, VkFgImage* o, uint32_t w, uint32_t h ic.mipLevels = 1; ic.arrayLayers = 1; ic.samples = VK_SAMPLE_COUNT_1_BIT; ic.tiling = VK_IMAGE_TILING_OPTIMAL; - ic.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + ic.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; // TRANSFER_SRC so the flow can be dumped ic.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ic.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; if (vkCreateImage(r->device, &ic, NULL, &o->image) != VK_SUCCESS) return false; @@ -2722,8 +2723,8 @@ static void fg_record_dump(VkRenderer* r, VkCommandBuffer cmd, VkImage srcImg, V if (!r->fg_dump_supported || bufIdx >= FG_DUMP_BUFS) return; vkr_image_barrier(cmd, srcImg, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); if (rawF16) { uint32_t cw = srcW < 512u ? srcW : 512u, chh = srcH < 512u ? srcH : 512u; VkBufferImageCopy rcp = {0}; @@ -3539,7 +3540,7 @@ static FgPending fg_worker_generate(VkRenderer* r, const FgJob* job) { VK_LOGI("fgseq[%u] GENERATED (phase=%.3f prev=%u curr=%u)", r->fg_dump_count, job->phase, prev_idx, curr_idx); r->fg_dump_count++; } - if (synDump && r->fg_dump_count < FG_DUMP_N) { // harness: raw F16 flow field to test recovery of the known shift + if (synDump && r->fg_dump_count < FG_DUMP_N) { // harness: raw F16 flow field fg_record_dump(r, f->cmd, r->fg_motion[parity].image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, r->fg_motion[parity].width, r->fg_motion[parity].height, r->fg_dump_count, 1); VK_LOGI("fgseq[%u] FLOW (flowres=%ux%u)", r->fg_dump_count, r->fg_motion[parity].width, r->fg_motion[parity].height); r->fg_dump_count++; diff --git a/app/src/main/cpp/winlator/vk/vk_state.h b/app/src/main/cpp/winlator/vk/vk_state.h index 865deff55..1e8d064e4 100644 --- a/app/src/main/cpp/winlator/vk/vk_state.h +++ b/app/src/main/cpp/winlator/vk/vk_state.h @@ -273,6 +273,9 @@ typedef struct VkFgCnn { VkCnnFeatSet feat[3]; bool featValid[3]; VkCnnImg feat8_pair[CNN_LEVELS]; + VkCnnImg gPair8[CNN_LEVELS]; // wnfg_36 input: prev.feat8 L0 ++ curr.feat8 L0 (8ch, cinT=2) + VkCnnImg gExpIn[CNN_LEVELS]; // wnfg_45 input: hG1.L0 ++ hG1.L1 ++ 0 (3-layer, cinT=3) + VkCnnImg hG23b[CNN_LEVELS]; // wnfg_42 output (16ch) when wnfg_45 expansion is present VkCnnImg hG0[CNN_LEVELS], hG1[CNN_LEVELS], hG23[CNN_LEVELS], hG4[CNN_LEVELS]; VkCnnImg hD0[CNN_LEVELS], hD1[CNN_LEVELS], hD2[CNN_LEVELS], hD3[CNN_LEVELS]; VkCnnImg hD5[CNN_LEVELS], hD6[CNN_LEVELS], hD7[CNN_LEVELS], hD8[CNN_LEVELS]; diff --git a/app/src/main/cpp/winlator/vk/weights_v2/wnfg_45.weights.fp16 b/app/src/main/cpp/winlator/vk/weights_v2/wnfg_45.weights.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a7052c1e602a117a1a34f7749075db4322fb00ea GIT binary patch literal 3552 zcmXw5cUTnH9u;d0ArM=@ATHZ?c6tGgC?HYPSP)}HvA19$wqPfAjT&E6l-_n>cXrCX zv$M7sYa~7+#_}oA#N?^bXrd?E$vvcaE2=8=~I{p2xa61~W?n0d_n;PQFTly4b!*5TyOM9`R_Fm-mVdW$8D^26@GHP4ffJthH5w55yFk@?Y0bnDvTbx9nr7OC zKIENbthRt&rJV)aN~3I}16kIf2~;id^lRE3x7{$q`%cuhQldx6CzE%LF_S0)LBN1Y+?t>9)97 zXb<kH)7U;{l53{SMJFa)(`6!(v8roI-DXYo3H$JDX0fsfmcf-+ zHpTlk2Bt@!!t#`G;yxbbkkzYlH8)r~H<)cw7Chu{6xs_Z@I{~(aDhE>b-oDCRd^qM?78vYL|?LG@kJhZ z#I0veij#!ryQ07s#8ax5^nI%mLutM;QQr)^(~E==+?l|?6`ToE3SCQF z!?Br;m7OnZac{2%cHDoESXhd!Qtg)SQnzN zavjqZv-$>HthbQE^h`L#U*~)(Pm}v7J%brkM{5#*m|r~^TtoM(=_YL{Gx(`Md9&7M z)Mxw#rlXvhiFA1r@wy1*NgKrfIkVCK|M$aHqwb9Qrc@dA5u{I2sZsyxkA&B?C&2~P zacW}RJ<+cyY7)30wzpn^F=P>OpUhAmD+BpLYaSczejCLI`l>gTS+)B^70*Je{CIpKBI$lQNIOO^2jcVF)`1=`W)gHM8Y+&%rj)l6Z-<o6f^lm%M)jNZoz>np$!X$As;p(9(N(Xk43riHPYcF%9r^y25)tU-=TE%@`yJ7IIc~7*1o8S`{v(dxO~ce;Na{bhw(}NlwqJL4!d=cd#fCQt z`CyiJxm^T7?w~bG8o+II+~+&FN-2 zV&??9UOKC8O?*T|%U$6_`W)NYdXIClOR8_sV}w(I27Div&Gqp-rVfIZdIN*QdvHBI z7C3z2xJ-Vl4X<2HB-=S{HW;VP5+g&O)9W#3iC_O%8P3q`W-y4F#LcH7lqhA9`vg5u zJRwff>$K(CF)0EsdzNQ$MN2~zNa7pFQ zjDzpZ5R%*dO|V2LW8d}Vg0GeHMPa@RWqV5gW)FdNf8!LNmG1f0A!9<-tL5J%6Db&wI zTa^GV;b-knc0PAf&iCB}nq@!Rnf)VSH?s+!3ConFgD&(uKyF6a1ZR zht>4Za_ktVRJF484!l`$S-!*`N*Gq0Emyez$$e?-!zapc#jD2I5|pNIS# zLVS8V82qOr|BpvHWFX(iA|1@=J8z@!oU`-;G5iSlhDK?!{#$69{7^rjC)n;BTA(y{ zv+FgbybfX^M3&jLdeVj literal 0 HcmV?d00001