mipgen updates

This commit is contained in:
Turanszki Janos
2018-08-06 23:37:03 +01:00
parent 0e9fff9d3c
commit f45f08e8ee
9 changed files with 56 additions and 40 deletions
+1 -2
View File
@@ -175,14 +175,13 @@ RendererWindow::RendererWindow(wiGUI* gui, Renderable3DComponent* component) : G
tessellationCheckBox->SetEnabled(wiRenderer::GetDevice()->CheckCapability(wiGraphicsTypes::GraphicsDevice::GRAPHICSDEVICE_CAPABILITY_TESSELLATION));
advancedRefractionsCheckBox = new wiCheckBox("Advanced Refractions: ");
advancedRefractionsCheckBox->SetTooltip("Enable advanced refraction rendering: rough transparent materials will be more matte. This needs additional support from the graphics driver.");
advancedRefractionsCheckBox->SetTooltip("Enable advanced refraction rendering: rough transparent materials will be more matte.");
advancedRefractionsCheckBox->SetPos(XMFLOAT2(x, y += step));
advancedRefractionsCheckBox->OnClick([=](wiEventArgs args) {
wiRenderer::SetAdvancedRefractionsEnabled(args.bValue);
});
advancedRefractionsCheckBox->SetCheck(wiRenderer::GetAdvancedRefractionsEnabled());
rendererWindow->AddWidget(advancedRefractionsCheckBox);
advancedRefractionsCheckBox->SetEnabled(wiRenderer::GetDevice()->CheckCapability(wiGraphicsTypes::GraphicsDevice::GRAPHICSDEVICE_CAPABILITY_UNORDEREDACCESSTEXTURE_LOAD_FORMAT_EXT));
alphaCompositionCheckBox = new wiCheckBox("Alpha Composition: ");
alphaCompositionCheckBox->SetTooltip("Enable Alpha Composition. Enables softer alpha blending on partly solid geometry (eg. vegetation) but rendering performance will be slower.");
@@ -8,44 +8,61 @@
TEXTURE2D(input, float4, TEXSLOT_UNIQUE0);
RWTEXTURE2D(input_output, MIP_OUTPUT_FORMAT, 0);
// Shader requires feature: Typed UAV additional format loads!
static const uint TILE_BORDER = 4;
static const uint TILE_SIZE = TILE_BORDER + GENERATEMIPCHAIN_2D_BLOCK_SIZE + TILE_BORDER;
groupshared float4 tile[TILE_SIZE][TILE_SIZE];
[numthreads(GENERATEMIPCHAIN_2D_BLOCK_SIZE, GENERATEMIPCHAIN_2D_BLOCK_SIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID)
{
#ifndef SHADERCOMPILER_SPIRV
uint i;
// Determine if the thread is alive (it is alive when the dispatchthreadID can directly index a pixel)
if (DTid.x < outputResolution.x && DTid.y < outputResolution.y)
// First, we prewarm the tile cache, including border region:
const uint2 tile_upperleft = Gid.xy * GENERATEMIPCHAIN_2D_BLOCK_SIZE - TILE_BORDER;
const uint2 co[] = {
uint2(0, 0), uint2(1, 0),
uint2(0, 1), uint2(1, 1)
};
for (i = 0; i < 4; ++i)
{
// Do a bilinear sample first and write it out:
input_output[DTid.xy] = input.SampleLevel(sampler_linear_clamp, (DTid.xy + 0.5f) / (float2)outputResolution.xy, 0);
DeviceMemoryBarrier();
const uint2 coord = GTid.xy * 2 + co[i];
tile[coord.x][coord.y] = input.SampleLevel(sampler_linear_clamp, (tile_upperleft + coord + 1.0f) / (float2)outputResolution.xy, 0);
}
GroupMemoryBarrierWithGroupSync();
uint i = 0;
float4 sum = 0;
const int2 thread_to_cache = GTid.xy + TILE_BORDER;
// Gather samples in the X (horizontal) direction:
[unroll]
for (i = 0; i < 9; ++i)
{
sum += input_output[DTid.xy + uint2(gaussianOffsets[i], 0)] * gaussianWeightsNormalized[i];
}
// Write out the result of the horizontal blur:
DeviceMemoryBarrier();
input_output[DTid.xy] = sum;
DeviceMemoryBarrier();
sum = 0;
float4 sum = 0;
// Gather samples in the Y (vertical) direction:
[unroll]
for (i = 0; i < 9; ++i)
{
sum += input_output[DTid.xy + uint2(0, gaussianOffsets[i])] * gaussianWeightsNormalized[i];
}
// Write out the result of the vertical blur:
DeviceMemoryBarrier();
input_output[DTid.xy] = sum;
// Then each thread processes just one pixel within tile, excluding border:
// Horizontal accumulation for each tile pixel, with help of the border region
[unroll]
for (i = 0; i < 9; ++i)
{
const uint2 coord = thread_to_cache + int2(gaussianOffsets[i], 0);
sum += tile[coord.x][coord.y] * gaussianWeightsNormalized[i];
}
#endif
// write out into cache (excluding border region):
tile[thread_to_cache.x][thread_to_cache.y] = sum;
GroupMemoryBarrierWithGroupSync();
sum = 0;
// Vertical accumulation for each tile pixel, with help of the border region
[unroll]
for (i = 0; i < 9; ++i)
{
const uint2 coord = thread_to_cache + int2(0, gaussianOffsets[i]);
sum += tile[coord.x][coord.y] * gaussianWeightsNormalized[i];
}
if (DTid.x < outputResolution.x && DTid.y < outputResolution.y)
{
// Each valid thread writes out one pixel:
input_output[DTid.xy] = sum;
}
}
@@ -15,6 +15,6 @@ void main(uint3 DTid : SV_DispatchThreadID)
{
if (DTid.x < outputResolution.x && DTid.y < outputResolution.y)
{
output[DTid.xy] = input.SampleLevel(customsampler, (DTid.xy + 0.5f) / (float2)outputResolution.xy, 0);
output[DTid.xy] = input.SampleLevel(customsampler, (DTid.xy + 1.0f) / (float2)outputResolution.xy, 0);
}
}
@@ -18,7 +18,7 @@ void main(uint3 DTid : SV_DispatchThreadID)
if (DTid.x < outputResolution.x && DTid.y < outputResolution.y && DTid.z < outputResolution.z)
{
// Do a bilinear sample first and write it out:
input_output[DTid] = input.SampleLevel(sampler_linear_clamp, (DTid + 0.5f) / (float3)outputResolution, 0);
input_output[DTid] = input.SampleLevel(sampler_linear_clamp, (DTid + 1.0f) / (float3)outputResolution, 0);
DeviceMemoryBarrier();
uint i = 0;
@@ -15,6 +15,6 @@ void main( uint3 DTid : SV_DispatchThreadID )
{
if (DTid.x < outputResolution.x && DTid.y < outputResolution.y && DTid.z < outputResolution.z)
{
output[DTid] = input.SampleLevel(customsampler, (DTid + 0.5f) / (float3)outputResolution, 0);
output[DTid] = input.SampleLevel(customsampler, (DTid + 1.0f) / (float3)outputResolution, 0);
}
}
@@ -15,7 +15,7 @@ void main(uint3 DTid : SV_DispatchThreadID)
{
if (DTid.x < outputResolution.x && DTid.y < outputResolution.y)
{
float2 uv = (DTid.xy + 0.5f) / outputResolution.xy;
float2 uv = (DTid.xy + 1.0f) / outputResolution.xy;
float3 N = UV_to_CubeMap(uv, DTid.z);
output[uint3(DTid.xy, DTid.z + arrayIndex * 6)] = input.SampleLevel(customsampler, float4(N, arrayIndex), 0);
@@ -15,7 +15,7 @@ void main(uint3 DTid : SV_DispatchThreadID)
{
if (DTid.x < outputResolution.x && DTid.y < outputResolution.y)
{
float2 uv = (DTid.xy + 0.5f) / outputResolution.xy;
float2 uv = (DTid.xy + 1.0f) / outputResolution.xy;
float3 N = UV_to_CubeMap(uv, DTid.z);
output[DTid.xyz] = input.SampleLevel(customsampler, N, 0);
+1 -1
View File
@@ -19,7 +19,7 @@ static const float gaussianWeightsNormalized[9] = {
gaussWeight3 * gaussNormalization,
gaussWeight4 * gaussNormalization,
};
static const uint gaussianOffsets[9] = {
static const int gaussianOffsets[9] = {
-4, -3, -2, -1, 0, 1, 2, 3, 4
};
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wiVersion
// minor features, major updates
const int minor = 19;
// minor bug fixes, alterations, refactors, updates
const int revision = 2;
const int revision = 3;
long GetVersion()