impostor culling optimization: reduce to 1 atomic operation per wave
This commit is contained in:
@@ -31,73 +31,78 @@ void main(uint3 DTid : SV_DispatchThreadID)
|
||||
ShaderMeshInstance instance = load_instance(instanceIndex);
|
||||
ShaderGeometry geometry = load_geometry(instance.geometryOffset);
|
||||
|
||||
[branch]
|
||||
if (geometry.impostorSliceOffset < 0)
|
||||
return;
|
||||
|
||||
float dist = distance(GetCamera().position, instance.center);
|
||||
[branch]
|
||||
if (dist < instance.fadeDistance - instance.radius)
|
||||
return;
|
||||
const bool distance_culled = dist < (instance.fadeDistance - instance.radius);
|
||||
|
||||
// Frustum culling:
|
||||
ShaderSphere sphere;
|
||||
sphere.center = instance.center;
|
||||
sphere.radius = instance.radius;
|
||||
[branch]
|
||||
if (!GetCamera().frustum.intersects(sphere))
|
||||
return;
|
||||
|
||||
uint indexOffset;
|
||||
InterlockedAdd(output_indirect[0].IndexCountPerInstance, 6u, indexOffset);
|
||||
uint impostorOffset = indexOffset / 6u;
|
||||
uint vertexOffset = impostorOffset * 4u;
|
||||
const bool visible = geometry.impostorSliceOffset >= 0 && !distance_culled && GetCamera().frustum.intersects(sphere);
|
||||
|
||||
// Write out indices:
|
||||
output_indices[indexOffset + 0] = vertexOffset + 0;
|
||||
output_indices[indexOffset + 1] = vertexOffset + 1;
|
||||
output_indices[indexOffset + 2] = vertexOffset + 2;
|
||||
output_indices[indexOffset + 3] = vertexOffset + 2;
|
||||
output_indices[indexOffset + 4] = vertexOffset + 1;
|
||||
output_indices[indexOffset + 5] = vertexOffset + 3;
|
||||
|
||||
// We rotate the billboard to face camera, but unlike emitted particles,
|
||||
// they don't rotate according to camera rotation, but the camera position relative
|
||||
// to the impostor (at least for now)
|
||||
float3 origin = instance.center;
|
||||
float3 up = float3(0, 1, 0);
|
||||
float3 face = GetCamera().position - origin;
|
||||
face.y = 0; // only rotate around Y axis!
|
||||
face = normalize(face);
|
||||
float3 right = normalize(cross(face, up));
|
||||
|
||||
// Decide which slice to show according to billboard facing direction:
|
||||
uint slice = uint(geometry.impostorSliceOffset);
|
||||
float angle = acos(dot(face.xz, float2(0, 1))) / PI;
|
||||
if (cross(face, float3(0, 0, 1)).y < 0)
|
||||
// Optimization: reduce to 1 atomic operation per wave
|
||||
const uint waveAppendCount = WaveActiveCountBits(visible);
|
||||
uint waveOffset;
|
||||
if (WaveIsFirstLane() && waveAppendCount > 0)
|
||||
{
|
||||
angle = 2 - angle;
|
||||
InterlockedAdd(output_indirect[0].IndexCountPerInstance, waveAppendCount * 6u, waveOffset);
|
||||
}
|
||||
angle *= 0.5f;
|
||||
angle = saturate(angle - 0.0001);
|
||||
slice += uint(angle * impostorCaptureAngles) * 3;
|
||||
waveOffset = WaveReadLaneFirst(waveOffset);
|
||||
|
||||
const float dither = max(0, instance.fadeDistance - dist) / instance.radius;
|
||||
|
||||
// Write out per impostor data:
|
||||
uint2 data = 0;
|
||||
data.x |= slice & 0xFFFFFF;
|
||||
data.x |= (uint(dither * 255) & 0xFF) << 24u;
|
||||
data.y = instance.color;
|
||||
output_impostor_data.Store2(impostorOffset * sizeof(uint2), data);
|
||||
|
||||
// Write out vertices:
|
||||
for (uint vertexID = 0; vertexID < 4; ++vertexID)
|
||||
[branch]
|
||||
if (visible)
|
||||
{
|
||||
float3 pos = BILLBOARD[vertexID];
|
||||
pos = mul(pos, float3x3(right, up, face));
|
||||
pos *= instance.radius;
|
||||
pos += instance.center;
|
||||
output_vertices_pos_nor.Store4((vertexOffset + vertexID) * sizeof(uint4), uint4(asuint(pos), pack_unitvector(face)));
|
||||
const uint indexOffset = waveOffset + WavePrefixSum(6u);
|
||||
const uint impostorOffset = indexOffset / 6u;
|
||||
const uint vertexOffset = impostorOffset * 4u;
|
||||
|
||||
// Write out indices:
|
||||
output_indices[indexOffset + 0] = vertexOffset + 0;
|
||||
output_indices[indexOffset + 1] = vertexOffset + 1;
|
||||
output_indices[indexOffset + 2] = vertexOffset + 2;
|
||||
output_indices[indexOffset + 3] = vertexOffset + 2;
|
||||
output_indices[indexOffset + 4] = vertexOffset + 1;
|
||||
output_indices[indexOffset + 5] = vertexOffset + 3;
|
||||
|
||||
// We rotate the billboard to face camera, but unlike emitted particles,
|
||||
// they don't rotate according to camera rotation, but the camera position relative
|
||||
// to the impostor (at least for now)
|
||||
float3 origin = instance.center;
|
||||
float3 up = float3(0, 1, 0);
|
||||
float3 face = GetCamera().position - origin;
|
||||
face.y = 0; // only rotate around Y axis!
|
||||
face = normalize(face);
|
||||
float3 right = normalize(cross(face, up));
|
||||
|
||||
// Decide which slice to show according to billboard facing direction:
|
||||
uint slice = uint(geometry.impostorSliceOffset);
|
||||
float angle = acos(dot(face.xz, float2(0, 1))) / PI;
|
||||
if (cross(face, float3(0, 0, 1)).y < 0)
|
||||
{
|
||||
angle = 2 - angle;
|
||||
}
|
||||
angle *= 0.5f;
|
||||
angle = saturate(angle - 0.0001);
|
||||
slice += uint(angle * impostorCaptureAngles) * 3;
|
||||
|
||||
const float dither = max(0, instance.fadeDistance - dist) / instance.radius;
|
||||
|
||||
// Write out per impostor data:
|
||||
uint2 data = 0;
|
||||
data.x |= slice & 0xFFFFFF;
|
||||
data.x |= (uint(dither * 255) & 0xFF) << 24u;
|
||||
data.y = instance.color;
|
||||
output_impostor_data.Store2(impostorOffset * sizeof(uint2), data);
|
||||
|
||||
// Write out vertices:
|
||||
for (uint vertexID = 0; vertexID < 4; ++vertexID)
|
||||
{
|
||||
float3 pos = BILLBOARD[vertexID];
|
||||
pos = mul(pos, float3x3(right, up, face));
|
||||
pos *= instance.radius;
|
||||
pos += instance.center;
|
||||
output_vertices_pos_nor.Store4((vertexOffset + vertexID) * sizeof(uint4), uint4(asuint(pos), pack_unitvector(face)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user