general improvements (#1049)

- Entity is now 64-bit uint
- terrain chunk hashing improvement
- structure memory alignment improvements
- light shafts improvements
This commit is contained in:
Turánszki János
2025-01-31 08:25:02 +01:00
committed by GitHub
parent ed5b4a0e9c
commit 12f6abd4de
14 changed files with 303 additions and 291 deletions
+1 -1
View File
@@ -411,7 +411,7 @@ void GeneralWindow::Create(EditorComponent* _editor)
}
theme.shadow_highlight = !focusModeCheckBox.GetCheck();
theme.shadow_highlight_spread = 0.6f;
theme.shadow_highlight_spread = 0.4f;
theme.shadow_highlight_color = theme_color_focus;
theme.shadow_highlight_color.x *= 1.4f;
theme.shadow_highlight_color.y *= 1.4f;
@@ -653,15 +653,18 @@ struct alignas(16) ShaderTransform
struct alignas(16) ShaderMeshInstance
{
uint uid;
uint64_t uid;
uint flags; // high 8 bits: user stencilRef
uint layerMask;
uint meshletOffset; // offset in the global meshlet buffer for first subset (for LOD0)
uint geometryOffset; // offset of all geometries for currently active LOD
uint geometryCount; // number of all geometries in currently active LOD
uint meshletOffset; // offset in the global meshlet buffer for first subset (for LOD0)
uint geometryOffset; // offset of all geometries for currently active LOD
uint geometryCount; // number of all geometries in currently active LOD
uint baseGeometryOffset; // offset of all geometries of the instance (if no LODs, then it is equal to geometryOffset)
uint2 rimHighlight; // packed half4
uint baseGeometryCount; // number of all geometries of the instance (if no LODs, then it is equal to geometryCount)
float fadeDistance;
uint2 color; // packed half4
uint2 emissive; // packed half4
@@ -671,10 +674,6 @@ struct alignas(16) ShaderMeshInstance
int lightmap;
uint alphaTest_size; // packed half2
uint2 rimHighlight; // packed half4
float fadeDistance;
float padding;
float3 center;
float radius;
@@ -49,14 +49,13 @@ struct Surfel
// This per-surfel structure will store all additional persistent data per surfel that isn't needed at GI lookup
struct SurfelData
{
uint64_t uid;
uint2 primitiveID;
uint bary;
uint uid;
uint bary;
uint raydata; // 24bit rayOffset, 8bit rayCount
uint properties; // 8bit life frames, 8bit recycle frames, 1bit backface normal
float max_inconsistency;
int padding1;
inline uint GetRayOffset() { return raydata & 0xFFFFFF; }
inline uint GetRayCount() { return (raydata >> 24u) & 0xFF; }
+13 -19
View File
@@ -3,40 +3,34 @@
PUSHCONSTANT(postprocess, PostProcess);
Texture2D<float4> input : register(t0);
Texture2D<half4> input : register(t0);
RWTexture2D<float4> output : register(u0);
RWTexture2D<half4> output : register(u0);
static const uint NUM_SAMPLES = 32;
static const uint UNROLL_GRANULARITY = 8;
static const uint NUM_SAMPLES = 64;
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
float2 uv = (DTid.xy + 0.5) * postprocess.resolution_rcp;
float3 color = input.SampleLevel(sampler_linear_clamp, uv, 0).rgb;
half3 color = input.SampleLevel(sampler_linear_clamp, uv, 0).rgb;
float2 lightPos = postprocess.params1.xy;
float2 deltaTexCoord = uv - lightPos;
deltaTexCoord *= postprocess.params0.x / NUM_SAMPLES;
float illuminationDecay = 1.0f;
half illuminationDecay = 1.0;
[loop] // loop big part (balance register pressure)
for (uint i = 0; i < NUM_SAMPLES / UNROLL_GRANULARITY; i++)
for (uint i = 0; i < NUM_SAMPLES; i++)
{
[unroll] // unroll small parts (balance register pressure)
for (uint j = 0; j < UNROLL_GRANULARITY; ++j)
{
uv.xy -= deltaTexCoord;
float3 sam = input.SampleLevel(sampler_linear_clamp, uv.xy, 0).rgb;
sam *= illuminationDecay * postprocess.params0.y;
color.rgb += sam;
illuminationDecay *= postprocess.params0.z;
}
uv.xy -= deltaTexCoord;
half3 sam = input.SampleLevel(sampler_linear_clamp, uv.xy, 0).rgb;
sam *= illuminationDecay * postprocess.params0.y;
color.rgb += sam;
illuminationDecay *= postprocess.params0.z;
}
color *= postprocess.params0.w;
output[DTid.xy] = float4(color, 1);
output[DTid.xy] = half4(color, 1);
}
+12 -6
View File
@@ -19,9 +19,10 @@ namespace wi::ecs
// The Entity is a global unique persistent identifier within the entity-component system
// It can be stored and used for the duration of the application
// The entity can be a different value on a different run of the application, if it was serialized
// It must be only serialized with the SerializeEntity() function. It will ensure that entities still match with their components correctly after serialization
using Entity = uint32_t;
inline constexpr Entity INVALID_ENTITY = 0;
// It must be only serialized with the SerializeEntity() function if persistence is needed across different program runs,
// this will ensure that entities still match with their components correctly after serialization
using Entity = uint64_t;
inline static constexpr Entity INVALID_ENTITY = 0;
// Runtime can create a new entity with this
inline Entity CreateEntity()
{
@@ -142,9 +143,14 @@ namespace wi::ecs
// reservedCount : how much components can be held initially before growing the container
ComponentManager(size_t reservedCount = 0)
{
components.reserve(reservedCount);
entities.reserve(reservedCount);
lookup.reserve(reservedCount);
Reserve(reservedCount);
}
inline void Reserve(size_t count)
{
components.reserve(count);
entities.reserve(count);
lookup.reserve(count);
}
// Clear the whole container
+3 -3
View File
@@ -60,7 +60,6 @@ namespace wi
wi::graphics::CommandList cmd
);
mutable bool gpu_initialized = false;
void InitializeGPUDataIfNeeded(wi::graphics::CommandList cmd);
void Draw(
@@ -69,6 +68,8 @@ namespace wi
wi::graphics::CommandList cmd
) const;
wi::ecs::Entity meshID = wi::ecs::INVALID_ENTITY;
enum FLAGS
{
EMPTY = 0,
@@ -78,8 +79,6 @@ namespace wi
};
uint32_t _flags = EMPTY;
wi::ecs::Entity meshID = wi::ecs::INVALID_ENTITY;
uint32_t strandCount = 0;
uint32_t segmentCount = 1;
uint32_t randomSeed = 1;
@@ -106,6 +105,7 @@ namespace wi
mutable bool regenerate_frame = true;
wi::graphics::Format position_format = wi::graphics::Format::R16G16B16A16_UNORM;
mutable bool must_rebuild_blas = true;
mutable bool gpu_initialized = false;
void Serialize(wi::Archive& archive, wi::ecs::EntitySerializer& seri);
+14 -3
View File
@@ -36,6 +36,7 @@ namespace wi
rtShadow = {};
rtSun[0] = {};
rtSun[1] = {};
rtSun[2] = {};
rtSun_resolved = {};
rtGUIBlurredBackground[0] = {};
rtGUIBlurredBackground[1] = {};
@@ -1914,6 +1915,8 @@ namespace wi
device->EventBegin("Light Shafts", cmd);
const Texture* texture_fullres = nullptr;
// Render sun stencil cutout:
{
if (getMSAASampleCount() > 1)
@@ -1931,6 +1934,7 @@ namespace wi
RenderPassImage::Resolve(&rtSun_resolved),
};
device->RenderPassBegin(rp, arraysize(rp), cmd);
texture_fullres = &rtSun_resolved;
}
else
{
@@ -1946,6 +1950,7 @@ namespace wi
RenderPassImage::RenderTarget(&rtSun[0], RenderPassImage::LoadOp::CLEAR),
};
device->RenderPassBegin(rp, arraysize(rp), cmd);
texture_fullres = &rtSun[0];
}
Viewport vp;
@@ -1977,10 +1982,13 @@ namespace wi
1.0f, 1.0f, 0.1f, 1.0f,
camera->GetProjection(), camera->GetView(), XMMatrixIdentity());
{
// Downsample to low res first:
wi::renderer::Postprocess_Downsample4x(*texture_fullres, rtSun[2], cmd);
XMFLOAT2 sun;
XMStoreFloat2(&sun, sunPos);
wi::renderer::Postprocess_LightShafts(
getMSAASampleCount() > 1 ? rtSun_resolved : rtSun[0],
rtSun[2],
rtSun[1],
cmd,
sun,
@@ -2919,10 +2927,12 @@ namespace wi
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
desc.sample_count = 1;
desc.width = internalResolution.x / 2;
desc.height = internalResolution.y / 2;
desc.width = internalResolution.x / 4;
desc.height = internalResolution.y / 4;
device->CreateTexture(&desc, nullptr, &rtSun[1]);
device->SetName(&rtSun[1], "rtSun[1]");
device->CreateTexture(&desc, nullptr, &rtSun[2]);
device->SetName(&rtSun[2], "rtSun[2]");
if (getMSAASampleCount() > 1)
{
@@ -2937,6 +2947,7 @@ namespace wi
{
rtSun[0] = {};
rtSun[1] = {};
rtSun[2] = {};
rtSun_resolved = {};
}
}
+1 -1
View File
@@ -107,7 +107,7 @@ namespace wi
wi::graphics::Texture rtBloom_tmp; // temporary for bloom downsampling
wi::graphics::Texture rtAO; // full res AO
wi::graphics::Texture rtShadow; // raytraced shadows mask
wi::graphics::Texture rtSun[2]; // 0: sun render target used for lightshafts (can be MSAA), 1: radial blurred lightshafts
wi::graphics::Texture rtSun[3]; // 0: sun render target used for lightshafts (can be MSAA), 1: radial blurred lightshafts
wi::graphics::Texture rtSun_resolved; // sun render target, but the resolved version if MSAA is enabled
wi::graphics::Texture rtGUIBlurredBackground[3]; // downsampled, gaussian blurred scene for GUI
wi::graphics::Texture rtShadingRate; // UINT8 shading rate per tile
+1 -1
View File
@@ -4417,7 +4417,7 @@ namespace wi::scene
const float dist = std::sqrt(distsq);
const float dist_to_sphere = dist - radius;
object.lod = uint32_t(dist_to_sphere * object.lod_distance_multiplier);
object.lod = std::min(object.lod, mesh.GetLODCount() - 1);
object.lod = std::min(object.lod, uint16_t(mesh.GetLODCount() - 1));
}
}
+1 -1
View File
@@ -4796,7 +4796,7 @@ int MeshComponent_BindLua::SetMeshSubsetMaterialID(lua_State* L)
if (argc >= 2)
{
size_t subsetindex = (uint32_t)wi::lua::SGetLongLong(L, 1);
Entity entity = (uint32_t)wi::lua::SGetLongLong(L, 2);
Entity entity = (Entity)wi::lua::SGetLongLong(L, 2);
const uint32_t lod_count = component->GetLODCount();
for (uint32_t lod = 0; lod < lod_count; ++lod)
File diff suppressed because it is too large Load Diff
+3 -2
View File
@@ -332,7 +332,7 @@ namespace wi::terrain
lod_count++;
}
locker.lock();
std::scoped_lock lck(locker);
free(atlas);
if (tile_count > 1)
{
@@ -852,7 +852,7 @@ namespace wi::terrain
}
// Start the generation on a background thread and keep it running until the next frame
wi::jobsystem::Execute(generator->workload, [=](wi::jobsystem::JobArgs args) {
wi::jobsystem::Execute(generator->workload, [=](wi::jobsystem::JobArgs a) {
wi::Timer timer;
bool generated_something = false;
@@ -926,6 +926,7 @@ namespace wi::terrain
// Do a parallel for loop over all the chunk's vertices and compute their properties:
wi::jobsystem::context ctx;
ctx.priority = wi::jobsystem::Priority::Low;
wi::jobsystem::Dispatch(ctx, vertexCount, chunk_width, [&](wi::jobsystem::JobArgs args) {
uint32_t index = args.jobIndex;
const float x = (float(index % chunk_width) - chunk_half_width) * chunk_scale;
+17 -11
View File
@@ -14,14 +14,21 @@ namespace wi::terrain
{
struct Chunk
{
int x, z;
union
{
struct
{
int x, z;
};
uint64_t raw = 0;
};
constexpr bool operator==(const Chunk& other) const
{
return (x == other.x) && (z == other.z);
return raw == other.raw;
}
inline size_t compute_hash() const
constexpr uint64_t compute_hash() const
{
return ((std::hash<int>()(x) ^ (std::hash<int>()(z) << 1)) >> 1);
return raw;
}
};
}
@@ -31,7 +38,7 @@ namespace std
template <>
struct hash<wi::terrain::Chunk>
{
inline size_t operator()(const wi::terrain::Chunk& chunk) const
constexpr uint64_t operator()(const wi::terrain::Chunk& chunk) const
{
return chunk.compute_hash();
}
@@ -95,7 +102,7 @@ namespace wi::terrain
wi::graphics::GPUBuffer pageBuffer;
wi::graphics::GPUBuffer pageBuffer_CPU_upload[wi::graphics::GraphicsDevice::GetBufferCount()];
bool data_available_CPU[wi::graphics::GraphicsDevice::GetBufferCount()] = {};
int cpu_resource_id = 0;
int16_t cpu_resource_id = 0;
uint32_t resolution = 0;
void init(uint32_t resolution);
@@ -213,10 +220,10 @@ namespace wi::terrain
wi::ecs::Entity entity = wi::ecs::INVALID_ENTITY;
wi::ecs::Entity grass_entity = wi::ecs::INVALID_ENTITY;
wi::ecs::Entity props_entity = wi::ecs::INVALID_ENTITY;
const XMFLOAT3* mesh_vertex_positions = nullptr;
float prop_density_current = 1;
wi::HairParticleSystem grass;
float grass_density_current = 1;
const XMFLOAT3* mesh_vertex_positions = nullptr;
wi::HairParticleSystem grass;
wi::vector<BlendmapLayer> blendmap_layers;
wi::graphics::Texture blendmap;
wi::primitive::Sphere sphere;
@@ -231,7 +238,6 @@ namespace wi::terrain
while (blendmap_layers.size() < materialIndex + 1)
{
blendmap_layers.emplace_back().pixels.resize(vertexCount);
std::fill(blendmap_layers.back().pixels.begin(), blendmap_layers.back().pixels.end(), 0);
}
}
};
@@ -284,15 +290,15 @@ namespace wi::terrain
int grass_chunk_dist = 1;
// For generating scene on a background thread:
float generation_time_budget_milliseconds = 8; // after this much time, the generation thread will start to exit. This can help avoid a very long running, resource consuming and slow cancellation generation
std::shared_ptr<Generator> generator;
float generation_time_budget_milliseconds = 12; // after this much time, the generation thread will exit. This can help avoid a very long running, resource consuming and slow cancellation generation
wi::vector<VirtualTexture*> virtual_textures_in_use;
wi::graphics::Sampler sampler;
VirtualTextureAtlas atlas;
int chunk_buffer_range = 3; // how many chunks to upload to GPU in X and Z directions
wi::graphics::GPUBuffer chunk_buffer;
int chunk_buffer_range = 3; // how many chunks to upload to GPU in X and Z directions
constexpr bool IsCenterToCamEnabled() const { return _flags & CENTER_TO_CAM; }
constexpr bool IsRemovalEnabled() const { return _flags & REMOVAL; }
+2 -2
View File
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 668;
const int revision = 669;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);
@@ -30,7 +30,7 @@ namespace wi::version
return version_string.c_str();
}
#define SUPPORTERS "Nemerle, James Webb, Quifeng Jin, TheGameCreators, Joseph Goldin, Yuri, Sergey K, Yukawa Kanta, Dragon Josh, John, LurkingNinja, Bernardo Del Castillo, Invictus, Scott Hunt, Yazan Altaki, Tuan NV, Robert MacGregor, cybernescence, Alexander Dahlin, blueapples, Delhills, NI NI, Sherief, ktopoet, Justin Macklin, Cédric Fabre, TogetherTeam, Bartosz Boczula, Arne Koenig, Ivan Trajchev, nathants, Fahd Ahmed, Gabriel Jadderson, SAS_Controller, Dominik Madarász, Segfault, Mike amanfo, Dennis Brakhane, rookie, Peter Moore, therealjtgill, Nicolas Embleton, Desuuc, radino1977, Anthony Curtis, manni heck, Matthias Hölzl, Phyffer, Lucas Pinheiro, Tapkaara, gpman, Anthony Python, Gnowos, Klaus, slaughternaut, Paul Brain, Connor Greaves, Alexandr, Lee Bamber, MCAlarm MC2, Titoutan, Willow, Aldo, lokimx, K. Osterman, Nomad, ykl, Alex Krokos, Timmy, Avaflow, mat, Hexegonel Samael Michael, Joe Spataro, soru, GeniokV, Mammoth, Ignacio, datae, Jason Rice, MarsBEKET, Tim, Twisty, Zelf ieats kiezen, Romildo Franco, zNachoh, Dmitriy, Alex Minerva, Stefan Kent, Natty, Sunny Krishna, Vilmos Malárik, Ferrata, Rossakis, Stefana Andrei, Taylor, Gunnar Kriik, 赟 杨, Rex, Lemon Brother, fixy, meta_leap"
#define SUPPORTERS "Nemerle, James Webb, Quifeng Jin, TheGameCreators, Joseph Goldin, Yuri, Sergey K, Yukawa Kanta, Dragon Josh, John, LurkingNinja, Bernardo Del Castillo, Invictus, Scott Hunt, Yazan Altaki, Tuan NV, Robert MacGregor, cybernescence, Alexander Dahlin, blueapples, Delhills, NI NI, Sherief, ktopoet, Justin Macklin, Cédric Fabre, TogetherTeam, Bartosz Boczula, Arne Koenig, Ivan Trajchev, nathants, Fahd Ahmed, Gabriel Jadderson, SAS_Controller, Dominik Madarász, Segfault, Mike amanfo, Dennis Brakhane, rookie, Peter Moore, therealjtgill, Nicolas Embleton, Desuuc, radino1977, Anthony Curtis, manni heck, Matthias Hölzl, Phyffer, Lucas Pinheiro, Tapkaara, gpman, Anthony Python, Gnowos, Klaus, slaughternaut, Paul Brain, Connor Greaves, Alexandr, Lee Bamber, MCAlarm MC2, Titoutan, Willow, Aldo, lokimx, K. Osterman, Nomad, ykl, Alex Krokos, Timmy, Avaflow, mat, Hexegonel Samael Michael, Joe Spataro, soru, GeniokV, Mammoth, Ignacio, datae, Jason Rice, MarsBEKET, Tim, Twisty, Zelf ieats kiezen, Romildo Franco, zNachoh, Dmitriy, Alex Minerva, Stefan Kent, Natty, Sunny Krishna, Vilmos Malárik, Ferrata, Rossakis, Stefana Andrei, Taylor, Gunnar Kriik, 赟 杨, Rex, Lemon Brother, fixy, meta_leap, Edik, jusik5348"
const char* GetCreditsString()
{