impostor culling optimization: reduce to 1 atomic operation per wave

This commit is contained in:
Turánszki János
2022-11-13 17:31:00 +01:00
parent 497263e376
commit bafe687dda
+61 -56
View File
@@ -31,73 +31,78 @@ void main(uint3 DTid : SV_DispatchThreadID)
ShaderMeshInstance instance = load_instance(instanceIndex);
ShaderGeometry geometry = load_geometry(instance.geometryOffset);
[branch]
if (geometry.impostorSliceOffset < 0)
return;
float dist = distance(GetCamera().position, instance.center);
[branch]
if (dist < instance.fadeDistance - instance.radius)
return;
const bool distance_culled = dist < (instance.fadeDistance - instance.radius);
// Frustum culling:
ShaderSphere sphere;
sphere.center = instance.center;
sphere.radius = instance.radius;
[branch]
if (!GetCamera().frustum.intersects(sphere))
return;
uint indexOffset;
InterlockedAdd(output_indirect[0].IndexCountPerInstance, 6u, indexOffset);
uint impostorOffset = indexOffset / 6u;
uint vertexOffset = impostorOffset * 4u;
const bool visible = geometry.impostorSliceOffset >= 0 && !distance_culled && GetCamera().frustum.intersects(sphere);
// Write out indices:
output_indices[indexOffset + 0] = vertexOffset + 0;
output_indices[indexOffset + 1] = vertexOffset + 1;
output_indices[indexOffset + 2] = vertexOffset + 2;
output_indices[indexOffset + 3] = vertexOffset + 2;
output_indices[indexOffset + 4] = vertexOffset + 1;
output_indices[indexOffset + 5] = vertexOffset + 3;
// We rotate the billboard to face camera, but unlike emitted particles,
// they don't rotate according to camera rotation, but the camera position relative
// to the impostor (at least for now)
float3 origin = instance.center;
float3 up = float3(0, 1, 0);
float3 face = GetCamera().position - origin;
face.y = 0; // only rotate around Y axis!
face = normalize(face);
float3 right = normalize(cross(face, up));
// Decide which slice to show according to billboard facing direction:
uint slice = uint(geometry.impostorSliceOffset);
float angle = acos(dot(face.xz, float2(0, 1))) / PI;
if (cross(face, float3(0, 0, 1)).y < 0)
// Optimization: reduce to 1 atomic operation per wave
const uint waveAppendCount = WaveActiveCountBits(visible);
uint waveOffset;
if (WaveIsFirstLane() && waveAppendCount > 0)
{
angle = 2 - angle;
InterlockedAdd(output_indirect[0].IndexCountPerInstance, waveAppendCount * 6u, waveOffset);
}
angle *= 0.5f;
angle = saturate(angle - 0.0001);
slice += uint(angle * impostorCaptureAngles) * 3;
waveOffset = WaveReadLaneFirst(waveOffset);
const float dither = max(0, instance.fadeDistance - dist) / instance.radius;
// Write out per impostor data:
uint2 data = 0;
data.x |= slice & 0xFFFFFF;
data.x |= (uint(dither * 255) & 0xFF) << 24u;
data.y = instance.color;
output_impostor_data.Store2(impostorOffset * sizeof(uint2), data);
// Write out vertices:
for (uint vertexID = 0; vertexID < 4; ++vertexID)
[branch]
if (visible)
{
float3 pos = BILLBOARD[vertexID];
pos = mul(pos, float3x3(right, up, face));
pos *= instance.radius;
pos += instance.center;
output_vertices_pos_nor.Store4((vertexOffset + vertexID) * sizeof(uint4), uint4(asuint(pos), pack_unitvector(face)));
const uint indexOffset = waveOffset + WavePrefixSum(6u);
const uint impostorOffset = indexOffset / 6u;
const uint vertexOffset = impostorOffset * 4u;
// Write out indices:
output_indices[indexOffset + 0] = vertexOffset + 0;
output_indices[indexOffset + 1] = vertexOffset + 1;
output_indices[indexOffset + 2] = vertexOffset + 2;
output_indices[indexOffset + 3] = vertexOffset + 2;
output_indices[indexOffset + 4] = vertexOffset + 1;
output_indices[indexOffset + 5] = vertexOffset + 3;
// We rotate the billboard to face camera, but unlike emitted particles,
// they don't rotate according to camera rotation, but the camera position relative
// to the impostor (at least for now)
float3 origin = instance.center;
float3 up = float3(0, 1, 0);
float3 face = GetCamera().position - origin;
face.y = 0; // only rotate around Y axis!
face = normalize(face);
float3 right = normalize(cross(face, up));
// Decide which slice to show according to billboard facing direction:
uint slice = uint(geometry.impostorSliceOffset);
float angle = acos(dot(face.xz, float2(0, 1))) / PI;
if (cross(face, float3(0, 0, 1)).y < 0)
{
angle = 2 - angle;
}
angle *= 0.5f;
angle = saturate(angle - 0.0001);
slice += uint(angle * impostorCaptureAngles) * 3;
const float dither = max(0, instance.fadeDistance - dist) / instance.radius;
// Write out per impostor data:
uint2 data = 0;
data.x |= slice & 0xFFFFFF;
data.x |= (uint(dither * 255) & 0xFF) << 24u;
data.y = instance.color;
output_impostor_data.Store2(impostorOffset * sizeof(uint2), data);
// Write out vertices:
for (uint vertexID = 0; vertexID < 4; ++vertexID)
{
float3 pos = BILLBOARD[vertexID];
pos = mul(pos, float3x3(right, up, face));
pos *= instance.radius;
pos += instance.center;
output_vertices_pos_nor.Store4((vertexOffset + vertexID) * sizeof(uint4), uint4(asuint(pos), pack_unitvector(face)));
}
}
}