lightmap leak fixes; updated xatlas; ortho shader fixes; (#993)

This commit is contained in:
Turánszki János
2024-12-21 16:02:42 +01:00
committed by GitHub
parent fa5fc4b21d
commit 2fda798086
19 changed files with 2314 additions and 2277 deletions
+18 -6
View File
@@ -249,12 +249,14 @@ void MeshWindow::Create(EditorComponent* _editor)
flipCullingButton.SetPos(XMFLOAT2(mod_x, y += step));
flipCullingButton.OnClick([&](wi::gui::EventArgs args) {
wi::scene::Scene& scene = editor->GetCurrentScene();
wi::unordered_set<MeshComponent*> visited_meshes; // fix double visit (straight mesh + object->mesh)
for (auto& x : editor->translator.selected)
{
MeshComponent* mesh = get_mesh(scene, x);
if (mesh == nullptr)
if (mesh == nullptr || visited_meshes.count(mesh) > 0)
continue;
mesh->FlipCulling();
visited_meshes.insert(mesh);
}
SetEntity(entity, subset);
});
@@ -266,12 +268,14 @@ void MeshWindow::Create(EditorComponent* _editor)
flipNormalsButton.SetPos(XMFLOAT2(mod_x, y += step));
flipNormalsButton.OnClick([&](wi::gui::EventArgs args) {
wi::scene::Scene& scene = editor->GetCurrentScene();
wi::unordered_set<MeshComponent*> visited_meshes; // fix double visit (straight mesh + object->mesh)
for (auto& x : editor->translator.selected)
{
MeshComponent* mesh = get_mesh(scene, x);
if (mesh == nullptr)
if (mesh == nullptr || visited_meshes.count(mesh) > 0)
continue;
mesh->FlipNormals();
visited_meshes.insert(mesh);
}
SetEntity(entity, subset);
});
@@ -283,12 +287,14 @@ void MeshWindow::Create(EditorComponent* _editor)
computeNormalsSmoothButton.SetPos(XMFLOAT2(mod_x, y += step));
computeNormalsSmoothButton.OnClick([&](wi::gui::EventArgs args) {
wi::scene::Scene& scene = editor->GetCurrentScene();
wi::unordered_set<MeshComponent*> visited_meshes; // fix double visit (straight mesh + object->mesh)
for (auto& x : editor->translator.selected)
{
MeshComponent* mesh = get_mesh(scene, x);
if (mesh == nullptr)
if (mesh == nullptr || visited_meshes.count(mesh) > 0)
continue;
mesh->ComputeNormals(MeshComponent::COMPUTE_NORMALS_SMOOTH);
visited_meshes.insert(mesh);
}
SetEntity(entity, subset);
});
@@ -300,12 +306,14 @@ void MeshWindow::Create(EditorComponent* _editor)
computeNormalsHardButton.SetPos(XMFLOAT2(mod_x, y += step));
computeNormalsHardButton.OnClick([&](wi::gui::EventArgs args) {
wi::scene::Scene& scene = editor->GetCurrentScene();
wi::unordered_set<MeshComponent*> visited_meshes; // fix double visit (straight mesh + object->mesh)
for (auto& x : editor->translator.selected)
{
MeshComponent* mesh = get_mesh(scene, x);
if (mesh == nullptr)
if (mesh == nullptr || visited_meshes.count(mesh) > 0)
continue;
mesh->ComputeNormals(MeshComponent::COMPUTE_NORMALS_HARD);
visited_meshes.insert(mesh);
}
SetEntity(entity, subset);
});
@@ -317,12 +325,14 @@ void MeshWindow::Create(EditorComponent* _editor)
recenterButton.SetPos(XMFLOAT2(mod_x, y += step));
recenterButton.OnClick([&](wi::gui::EventArgs args) {
wi::scene::Scene& scene = editor->GetCurrentScene();
wi::unordered_set<MeshComponent*> visited_meshes; // fix double visit (straight mesh + object->mesh)
for (auto& x : editor->translator.selected)
{
MeshComponent* mesh = get_mesh(scene, x);
if (mesh == nullptr)
if (mesh == nullptr || visited_meshes.count(mesh) > 0)
continue;
mesh->Recenter();
visited_meshes.insert(mesh);
}
});
AddWidget(&recenterButton);
@@ -333,12 +343,14 @@ void MeshWindow::Create(EditorComponent* _editor)
recenterToBottomButton.SetPos(XMFLOAT2(mod_x, y += step));
recenterToBottomButton.OnClick([&](wi::gui::EventArgs args) {
wi::scene::Scene& scene = editor->GetCurrentScene();
wi::unordered_set<MeshComponent*> visited_meshes; // fix double visit (straight mesh + object->mesh)
for (auto& x : editor->translator.selected)
{
MeshComponent* mesh = get_mesh(scene, x);
if (mesh == nullptr)
if (mesh == nullptr || visited_meshes.count(mesh) > 0)
continue;
mesh->RecenterToBottom();
visited_meshes.insert(mesh);
}
});
AddWidget(&recenterToBottomButton);
+7 -7
View File
@@ -84,7 +84,7 @@ static Atlas_Dim GenerateMeshAtlas(MeshComponent& meshcomponent, uint32_t resolu
mesh.indexCount = (int)meshcomponent.indices.size();
mesh.indexData = meshcomponent.indices.data();
mesh.indexFormat = xatlas::IndexFormat::UInt32;
xatlas::AddMeshError::Enum error = xatlas::AddMesh(atlas, mesh);
xatlas::AddMeshError error = xatlas::AddMesh(atlas, mesh);
if (error != xatlas::AddMeshError::Success) {
wi::helper::messageBox(xatlas::StringForEnum(error), "Adding mesh to xatlas failed!");
return dim;
@@ -94,13 +94,15 @@ static Atlas_Dim GenerateMeshAtlas(MeshComponent& meshcomponent, uint32_t resolu
// Generate atlas:
{
xatlas::ChartOptions chartoptions;
xatlas::ParameterizeOptions parametrizeoptions;
xatlas::PackOptions packoptions;
chartoptions.useInputMeshUvs = true;
chartoptions.fixWinding = true;
xatlas::PackOptions packoptions;
packoptions.resolution = resolution;
packoptions.blockAlign = true;
packoptions.padding = 2;
xatlas::Generate(atlas, chartoptions, parametrizeoptions, packoptions);
xatlas::Generate(atlas, chartoptions, packoptions);
dim.width = atlas->width;
dim.height = atlas->height;
@@ -543,13 +545,11 @@ void ObjectWindow::Create(EditorComponent* _editor)
y += step;
lightmapResolutionSlider.Create(32, 1024, 128, 1024 - 32, "Lightmap resolution: ");
lightmapResolutionSlider.Create(32, 8192, 512, 8192 - 32, "Lightmap resolution: ");
lightmapResolutionSlider.SetTooltip("Set the approximate resolution for this object's lightmap. This will be packed into the larger global lightmap later.");
lightmapResolutionSlider.SetSize(XMFLOAT2(wid, hei));
lightmapResolutionSlider.SetPos(XMFLOAT2(x, y += step));
lightmapResolutionSlider.OnSlide([&](wi::gui::EventArgs args) {
// unfortunately, we must be pow2 with full float lightmap format, otherwise it could be unlimited (but accumulation blending would suffer then)
// or at least for me, downloading the lightmap was glitching out when non-pow 2 and RGBA32_FLOAT format
lightmapResolutionSlider.SetValue(float(wi::math::GetNextPowerOfTwo(uint32_t(args.fValue))));
});
AddWidget(&lightmapResolutionSlider);
+1976 -2106
View File
File diff suppressed because it is too large Load Diff
+71 -73
View File
@@ -31,20 +31,18 @@ Copyright NVIDIA Corporation 2006 -- Ignacio Castano <icastano@nvidia.com>
#pragma once
#ifndef XATLAS_H
#define XATLAS_H
#include <stddef.h>
#include <stdint.h>
namespace xatlas {
struct ChartType
enum class ChartType
{
enum Enum
{
Planar,
Ortho,
LSCM,
Piecewise,
Invalid
};
Planar,
Ortho,
LSCM,
Piecewise,
Invalid
};
// A group of connected faces, belonging to a single atlas.
@@ -53,7 +51,7 @@ struct Chart
uint32_t *faceArray;
uint32_t atlasIndex; // Sub-atlas index.
uint32_t faceCount;
ChartType::Enum type;
ChartType type;
uint32_t material;
};
@@ -87,12 +85,12 @@ struct Atlas
{
uint32_t *image;
Mesh *meshes; // The output meshes, corresponding to each AddMesh call.
float *utilization; // Normalized atlas texel utilization array. E.g. a value of 0.8 means 20% empty space. atlasCount in length.
uint32_t width; // Atlas width in texels.
uint32_t height; // Atlas height in texels.
uint32_t atlasCount; // Number of sub-atlases. Equal to 0 unless PackOptions resolution is changed from default (0).
uint32_t chartCount; // Total number of charts in all meshes.
uint32_t meshCount; // Number of output meshes. Equal to the number of times AddMesh was called.
float *utilization; // Normalized atlas texel utilization array. E.g. a value of 0.8 means 20% empty space. atlasCount in length.
float texelsPerUnit; // Equal to PackOptions texelsPerUnit if texelsPerUnit > 0, otherwise an estimated value to match PackOptions resolution.
};
@@ -101,13 +99,10 @@ Atlas *Create();
void Destroy(Atlas *atlas);
struct IndexFormat
enum class IndexFormat
{
enum Enum
{
UInt16,
UInt32
};
UInt16,
UInt32
};
// Input mesh declaration.
@@ -117,36 +112,43 @@ struct MeshDecl
const void *vertexNormalData = nullptr; // optional
const void *vertexUvData = nullptr; // optional. The input UVs are provided as a hint to the chart generator.
const void *indexData = nullptr; // optional
// Optional. indexCount / 3 (triangle count) in length.
// Optional. Must be faceCount in length.
// Don't atlas faces set to true. Ignored faces still exist in the output meshes, Vertex uv is set to (0, 0) and Vertex atlasIndex to -1.
const bool *faceIgnoreData = nullptr;
// Optional. Must be faceCount in length.
// Only faces with the same material will be assigned to the same chart.
const uint32_t *faceMaterialData = nullptr;
// Optional. Must be faceCount in length.
// Polygon / n-gon support. Faces are assumed to be triangles if this is null.
const uint8_t *faceVertexCount = nullptr;
uint32_t vertexCount = 0;
uint32_t vertexPositionStride = 0;
uint32_t vertexNormalStride = 0; // optional
uint32_t vertexUvStride = 0; // optional
uint32_t indexCount = 0;
int32_t indexOffset = 0; // optional. Add this offset to all indices.
IndexFormat::Enum indexFormat = IndexFormat::UInt16;
uint32_t faceCount = 0; // Optional if faceVertexCount is null. Otherwise assumed to be indexCount / 3.
IndexFormat indexFormat = IndexFormat::UInt16;
// Vertex positions within epsilon distance of each other are considered colocal.
float epsilon = 1.192092896e-07F;
};
struct AddMeshError
enum class AddMeshError
{
enum Enum
{
Success, // No error.
Error, // Unspecified error.
IndexOutOfRange, // An index is >= MeshDecl vertexCount.
InvalidIndexCount // Not evenly divisible by 3 - expecting triangles.
};
Success, // No error.
Error, // Unspecified error.
IndexOutOfRange, // An index is >= MeshDecl vertexCount.
InvalidFaceVertexCount, // Must be >= 3.
InvalidIndexCount // Not evenly divisible by 3 - expecting triangles.
};
// Add a mesh to the atlas. MeshDecl data is copied, so it can be freed after AddMesh returns.
AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint = 0);
AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint = 0);
// Wait for AddMesh async processing to finish. ComputeCharts / Generate call this internally.
void AddMeshJoin(Atlas *atlas);
@@ -155,19 +157,23 @@ struct UvMeshDecl
{
const void *vertexUvData = nullptr;
const void *indexData = nullptr; // optional
const uint32_t *faceMaterialData = nullptr; // Optional. Faces with different materials won't be assigned to the same chart. Must be indexCount / 3 in length.
const uint32_t *faceMaterialData = nullptr; // Optional. Overlapping UVs should be assigned a different material. Must be indexCount / 3 in length.
uint32_t vertexCount = 0;
uint32_t vertexStride = 0;
uint32_t indexCount = 0;
int32_t indexOffset = 0; // optional. Add this offset to all indices.
IndexFormat::Enum indexFormat = IndexFormat::UInt16;
bool rotateCharts = true;
IndexFormat indexFormat = IndexFormat::UInt16;
};
AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl);
AddMeshError AddUvMesh(Atlas *atlas, const UvMeshDecl &decl);
// Custom parameterization function. texcoords initial values are an orthogonal parameterization.
typedef void (*ParameterizeFunc)(const float *positions, float *texcoords, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount);
struct ChartOptions
{
ParameterizeFunc paramFunc = nullptr;
float maxChartArea = 0.0f; // Don't grow charts to be larger than this. 0 means no limit.
float maxBoundaryLength = 0.0f; // Don't grow charts to have a longer boundary than this. 0 means no limit.
@@ -180,38 +186,16 @@ struct ChartOptions
float maxCost = 2.0f; // If total of all metrics * weights > maxCost, don't grow chart. Lower values result in more charts.
uint32_t maxIterations = 1; // Number of iterations of the chart growing and seeding phases. Higher values result in better charts.
bool useInputMeshUvs = false; // Use MeshDecl::vertexUvData for charts.
bool fixWinding = false; // Enforce consistent texture coordinate winding.
};
// Call after all AddMesh calls. Can be called multiple times to recompute charts with different options.
void ComputeCharts(Atlas *atlas, ChartOptions options = ChartOptions());
// Custom parameterization function. texcoords initial values are an orthogonal parameterization.
typedef void (*ParameterizeFunc)(const float *positions, float *texcoords, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount);
struct ParameterizeOptions
{
ParameterizeFunc func = nullptr;
bool closeHoles = true; // If the custom parameterization function works with multiple boundaries, this can be set to false to improve performance.
bool fixTJunctions = true; // If meshes don't have T-junctions, this can be set to false to improve performance.
};
// Call after ComputeCharts. Can be called multiple times to re-parameterize charts with a different ParameterizeFunc.
void ParameterizeCharts(Atlas *atlas, ParameterizeOptions options = ParameterizeOptions());
struct PackOptions
{
// Leave space around charts for texels that would be sampled by bilinear filtering.
bool bilinear = true;
// Align charts to 4x4 blocks. Also improves packing speed, since there are fewer possible chart locations to consider.
bool blockAlign = false;
// Slower, but gives the best result. If false, use random chart placement.
bool bruteForce = false;
// Create Atlas::image
bool createImage = false;
// Charts larger than this will be scaled down. 0 means no limit.
uint32_t maxChartSize = 0;
@@ -227,29 +211,43 @@ struct PackOptions
// If not 0, and texelsPerUnit is not 0, generate one or more atlases with that exact resolution.
// If not 0, and texelsPerUnit is 0, texelsPerUnit is estimated to approximately match the resolution.
uint32_t resolution = 0;
// Leave space around charts for texels that would be sampled by bilinear filtering.
bool bilinear = true;
// Align charts to 4x4 blocks. Also improves packing speed, since there are fewer possible chart locations to consider.
bool blockAlign = false;
// Slower, but gives the best result. If false, use random chart placement.
bool bruteForce = false;
// Create Atlas::image
bool createImage = false;
// Rotate charts to the axis of their convex hull.
bool rotateChartsToAxis = true;
// Rotate charts to improve packing.
bool rotateCharts = true;
};
// Call after ParameterizeCharts. Can be called multiple times to re-pack charts with different options.
// Call after ComputeCharts. Can be called multiple times to re-pack charts with different options.
void PackCharts(Atlas *atlas, PackOptions packOptions = PackOptions());
// Equivalent to calling ComputeCharts, ParameterizeCharts and PackCharts in sequence. Can be called multiple times to regenerate with different options.
void Generate(Atlas *atlas, ChartOptions chartOptions = ChartOptions(), ParameterizeOptions parameterizeOptions = ParameterizeOptions(), PackOptions packOptions = PackOptions());
// Equivalent to calling ComputeCharts and PackCharts in sequence. Can be called multiple times to regenerate with different options.
void Generate(Atlas *atlas, ChartOptions chartOptions = ChartOptions(), PackOptions packOptions = PackOptions());
// Progress tracking.
struct ProgressCategory
enum class ProgressCategory
{
enum Enum
{
AddMesh,
ComputeCharts,
ParameterizeCharts,
PackCharts,
BuildOutputMeshes
};
AddMesh,
ComputeCharts,
PackCharts,
BuildOutputMeshes
};
// May be called from any thread. Return false to cancel.
typedef bool (*ProgressFunc)(ProgressCategory::Enum category, int progress, void *userData);
typedef bool (*ProgressFunc)(ProgressCategory category, int progress, void *userData);
void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc = nullptr, void *progressUserData = nullptr);
@@ -263,8 +261,8 @@ typedef int (*PrintFunc)(const char *, ...);
void SetPrint(PrintFunc print, bool verbose);
// Helper functions for error messages.
const char *StringForEnum(AddMeshError::Enum error);
const char *StringForEnum(ProgressCategory::Enum category);
const char *StringForEnum(AddMeshError error);
const char *StringForEnum(ProgressCategory category);
} // namespace xatlas
+26 -10
View File
@@ -34,25 +34,41 @@
// are--the sRGB curve needs to be removed before involving the colors in linear mathematics such
// as physically based lighting.
// Note: modified for Wicked Engine to use macros, for better half precision mapping
float3 ApplySRGBCurve( float3 x )
{
// Approximately pow(x, 1.0 / 2.2)
return select(x < 0.0031308, 12.92 * x, 1.055 * pow(x, 1.0 / 2.4) - 0.055);
}
// Approximately pow(x, 1.0 / 2.2)
#define ApplySRGBCurve( x ) select(x < 0.0031308, 12.92 * x, 1.055 * pow(x, 1.0 / 2.4) - 0.055)
// Approximately pow(x, 2.2)
#define RemoveSRGBCurve( x ) select(x < 0.04045, x / 12.92, pow((x + 0.055) / 1.055, 2.4))
float3 RemoveSRGBCurve( float3 x )
{
// Approximately pow(x, 2.2)
return select(x < 0.04045, x / 12.92, pow((x + 0.055) / 1.055, 2.4));
}
// These functions avoid pow() to efficiently approximate sRGB with an error < 0.4%.
#define ApplySRGBCurve_Fast( x ) select(x < 0.0031308, 12.92 * x, 1.13005 * sqrt(x - 0.00228) - 0.13448 * x + 0.005719)
float3 ApplySRGBCurve_Fast( float3 x )
{
return select(x < 0.0031308, 12.92 * x, 1.13005 * sqrt(x - 0.00228) - 0.13448 * x + 0.005719);
}
#define RemoveSRGBCurve_Fast( x ) select(x < 0.04045, x / 12.92, -7.43605 * x - 31.24297 * sqrt(-0.53792 * x + 1.279924) + 35.34864)
float3 RemoveSRGBCurve_Fast( float3 x )
{
return select(x < 0.04045, x / 12.92, -7.43605 * x - 31.24297 * sqrt(-0.53792 * x + 1.279924) + 35.34864);
}
// The OETF recommended for content shown on HDTVs. This "gamma ramp" may increase contrast as
// appropriate for viewing in a dark environment. Always use this curve with Limited RGB as it is
// used in conjunction with HDTVs.
#define ApplyREC709Curve( x ) select(x < 0.0181, 4.5 * x, 1.0993 * pow(x, 0.45) - 0.0993)
float3 ApplyREC709Curve( float3 x )
{
return select(x < 0.0181, 4.5 * x, 1.0993 * pow(x, 0.45) - 0.0993);
}
#define RemoveREC709Curve( x ) select(x < 0.08145, x / 4.5, pow((x + 0.0993) / 1.0993, 1.0 / 0.45))
float3 RemoveREC709Curve( float3 x )
{
return select(x < 0.08145, x / 4.5, pow((x + 0.0993) / 1.0993, 1.0 / 0.45));
}
// This is the new HDR transfer function, also called "PQ" for perceptual quantizer. Note that REC2084
// does not also refer to a color space. REC2084 is typically used with the REC2020 color space.
+17 -43
View File
@@ -5,54 +5,28 @@ Texture2D lightmap_input : register(t0);
RWTexture2D<float4> lightmap_output : register(u0);
static const int TILE_BORDER = 4;
static const uint TILE_SIZE = POSTPROCESS_BLOCKSIZE + TILE_BORDER * 2;
groupshared uint2 tile_cache[TILE_SIZE*TILE_SIZE];
static const int2 offsets[] = {
int2(0, -1),
int2(0, 1),
int2(-1, 0),
int2(1, 0),
int2(-1, -1),
int2(1, -1),
int2(1, 1),
int2(-1, -1),
};
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
void main(uint3 DTid : SV_DispatchThreadID)
{
const int2 tile_upperleft = Gid.xy * POSTPROCESS_BLOCKSIZE - TILE_BORDER;
for (uint t = groupIndex; t < TILE_SIZE * TILE_SIZE; t += POSTPROCESS_BLOCKSIZE * POSTPROCESS_BLOCKSIZE)
{
const uint2 pixel = tile_upperleft + unflatten2D(t, TILE_SIZE);
tile_cache[t] = pack_half4(lightmap_input[pixel]);
}
GroupMemoryBarrierWithGroupSync();
int2 pixel = DTid.xy;
float4 color = lightmap_input[pixel];
float4 color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER, TILE_SIZE)]);
if (color.a < 1)
for (uint i = 0; (i < arraysize(offsets)) && (color.a < 1); ++i)
{
// spin outwards from center in spiral pattern and take the first sample which has valid opacity:
int generation = TILE_BORDER;
for (int growth = 0; (growth < generation) && (color.a < 1); ++growth)
{
const int side = 2 * (growth + 1);
int x = -growth - 1;
int y = -growth - 1;
for (int i = 0; (i < side) && (color.a < 1); ++i)
{
color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER + int2(x, y), TILE_SIZE)]);
x++;
}
for (int i = 0; (i < side) && (color.a < 1); ++i)
{
color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER + int2(x, y), TILE_SIZE)]);
y++;
}
for (int i = 0; (i < side) && (color.a < 1); ++i)
{
color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER + int2(x, y), TILE_SIZE)]);
x--;
}
for (int i = 0; (i < side) && (color.a < 1); ++i)
{
color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER + int2(x, y), TILE_SIZE)]);
y--;
}
}
color = lightmap_input[pixel + offsets[i]];
}
lightmap_output[DTid.xy] = color;
lightmap_output[pixel] = color;
}
+108 -7
View File
@@ -1,20 +1,109 @@
#define RAY_BACKFACE_CULLING
#define TEXTURE_SLOT_NONUNIFORM
#include "globals.hlsli"
#include "raytracingHF.hlsli"
#include "lightingHF.hlsli"
#include "stochasticSSRHF.hlsli"
// This value specifies after which bounce the anyhit will be disabled:
static const uint ANYTHIT_CUTOFF_AFTER_BOUNCE_COUNT = 1;
static const uint ANYTHIT_CUTOFF_AFTER_BOUNCE_COUNT = 4;
struct Input
{
float4 pos : SV_POSITION;
float2 uv : TEXCOORD;
float3 pos3D : WORLDPOSITION;
float3 normal : NORMAL;
centroid float2 uv : TEXCOORD;
centroid float3 pos3D : WORLDPOSITION;
centroid float3 normal : NORMAL;
};
static const float2 tangent_directions[] = {
float2(1, 0),
float2(-1, 0),
float2(0, 1),
float2(0, -1),
};
// Bakery pixel pushing: https://ndotl.wordpress.com/2018/08/29/baking-artifact-free-lightmaps/
// This can push position outside of enclosed area within a pixel to remove shadow leaks
// Instead the shadow texel reaching outside, this will make light go inside which is better in most cases
void BakeryPixelPush(inout float3 P, in float3 N, in float2 UV, inout RNG rng, inout float bakerydebug)
{
float3 dUV1 = max(abs(ddx(P)), abs(ddy(P)));
float dPos = max(max(dUV1.x, dUV1.y), dUV1.z);
dPos = dPos * SQRT2; // convert to diagonal (small overshoot)
float3x3 TBN = compute_tangent_frame(N, P, UV);
for (uint i = 0; i < arraysize(tangent_directions); ++i)
{
RayDesc ray;
ray.Origin = P + N * 0.0001;
ray.Direction = normalize(mul(float3(tangent_directions[i], 1), TBN));
ray.TMin = 0.0001;
ray.TMax = dPos;
bool backface_hit = false;
float3 hit_pos = 0;
float3 hit_nor = 0;
Surface surface;
surface.init();
surface.V = -ray.Direction;
#ifdef RTAPI
uint flags = 0;
wiRayQuery q;
q.TraceRayInline(
scene_acceleration_structure, // RaytracingAccelerationStructure AccelerationStructure
flags, // uint RayFlags
xTraceUserData.y, // uint InstanceInclusionMask
ray // RayDesc Ray
);
while (q.Proceed());
if (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT && !q.CommittedTriangleFrontFace())
{
backface_hit = true;
hit_pos = q.WorldRayOrigin() + q.WorldRayDirection() * q.CommittedRayT();
PrimitiveID prim;
prim.primitiveIndex = q.CommittedPrimitiveIndex();
prim.instanceIndex = q.CommittedInstanceID();
prim.subsetIndex = q.CommittedGeometryIndex();
surface.SetBackface(!q.CommittedTriangleFrontFace());
surface.hit_depth = q.CommittedRayT();
if (!surface.load(prim, q.CommittedTriangleBarycentrics()))
return;
hit_nor = surface.facenormal;
}
#else
RayHit hit = TraceRay_Closest(ray, xTraceUserData.y, rng);
if (hit.distance < FLT_MAX && hit.is_backface)
{
backface_hit = true;
hit_pos = ray.Origin + ray.Direction * hit.distance;
surface.SetBackface(hit.is_backface);
surface.hit_depth = hit.distance;
if (!surface.load(hit.primitiveID, hit.bary))
return;
hit_nor = surface.facenormal;
}
#endif // RTAPI
if (backface_hit)
{
bakerydebug = 1;
P = hit_pos - hit_nor * 0.001;
return;
}
}
}
float4 main(Input input) : SV_TARGET
{
Surface surface;
@@ -24,11 +113,16 @@ float4 main(Input input) : SV_TARGET
RNG rng;
rng.init((uint2)input.pos.xy, xTraceSampleIndex);
float3 P = input.pos3D;
float bakerydebug = 0;
BakeryPixelPush(P, surface.N, input.uv, rng, bakerydebug);
float2 uv = input.uv;
RayDesc ray;
ray.Origin = input.pos3D;
ray.Origin = P;
ray.Direction = sample_hemisphere_cos(surface.N, rng);
ray.TMin = 0.001;
ray.TMin = 0.0001;
ray.TMax = FLT_MAX;
float3 result = 0;
float3 energy = 1;
@@ -151,7 +245,7 @@ float4 main(Input input) : SV_TARGET
RayDesc newRay;
newRay.Origin = surface.P;
newRay.TMin = 0.001;
newRay.TMin = 0.0001;
newRay.TMax = dist;
newRay.Direction = L + max3(surface.sss);
@@ -254,6 +348,8 @@ float4 main(Input input) : SV_TARGET
prim.instanceIndex = q.CommittedInstanceID();
prim.subsetIndex = q.CommittedGeometryIndex();
surface.SetBackface(!q.CommittedTriangleFrontFace());
if (!surface.load(prim, q.CommittedTriangleBarycentrics()))
return 0;
@@ -261,6 +357,8 @@ float4 main(Input input) : SV_TARGET
// ray origin updated for next bounce:
ray.Origin = ray.Origin + ray.Direction * hit.distance;
surface.SetBackface(hit.is_backface);
if (!surface.load(hit.primitiveID, hit.bary))
return 0;
@@ -308,5 +406,8 @@ float4 main(Input input) : SV_TARGET
}
//if(bakerydebug > 0)
// result = float3(1,0,0);
return float4(result, xTraceAccumulationFactor);
}
+5 -4
View File
@@ -6,16 +6,16 @@ PUSHCONSTANT(push, LightmapPushConstants);
struct Output
{
float4 pos : SV_POSITION;
float2 uv : TEXCOORD;
float3 pos3D : WORLDPOSITION;
float3 normal : NORMAL;
centroid float2 uv : TEXCOORD;
centroid float3 pos3D : WORLDPOSITION;
centroid float3 normal : NORMAL;
};
Output main(uint vertexID : SV_VertexID)
{
ShaderMeshInstance inst = load_instance(push.instanceIndex);
float3 pos = bindless_buffers_float4[push.vb_pos_wind][vertexID].xyz;
half3 nor = bindless_buffers_half4[push.vb_nor][vertexID].xyz;
float3 nor = bindless_buffers_float4[push.vb_nor][vertexID].xyz;
float2 atl = bindless_buffers_float2[push.vb_atl][vertexID];
Output output;
@@ -23,6 +23,7 @@ Output main(uint vertexID : SV_VertexID)
output.pos = float4(atl, 0, 1);
output.pos.xy = output.pos.xy * 2 - 1;
output.pos.y *= -1;
output.pos.xy += xTracePixelOffset;
output.uv = atl;
+1 -1
View File
@@ -41,7 +41,7 @@ void main(uint2 DTid : SV_DispatchThreadID)
const float3 N = decode_oct(texture_normal[jitterPixel]);
const float3 P = reconstruct_position(jitterUV, depth);
const float3 V = normalize(GetCamera().position - P);
const float3 V = normalize(GetCamera().frustum_corners.screen_to_nearplane(uv) - P); // ortho support
RayPayload payload;
payload.data = 0;
+1 -1
View File
@@ -48,7 +48,7 @@ void main(uint2 DTid : SV_DispatchThreadID)
const float3 N = decode_oct(texture_normal[jitterPixel]);
const float3 P = reconstruct_position(jitterUV, depth);
const float3 V = normalize(GetCamera().position - P);
const float3 V = normalize(GetCamera().frustum_corners.screen_to_nearplane(uv) - P); // ortho support
const float4 GGX = ReflectionDir_GGX(V, N, roughness, blue_noise(DTid.xy).xy);
const float3 R = GGX.xyz;
+1 -1
View File
@@ -53,7 +53,7 @@ void RTReflection_Raygen()
const float3 N = decode_oct(texture_normal[jitterPixel]);
const float3 P = reconstruct_position(jitterUV, depth);
const float3 V = normalize(GetCamera().position - P);
const float3 V = normalize(GetCamera().frustum_corners.screen_to_nearplane(uv) - P); // ortho support
const float4 GGX = ReflectionDir_GGX(V, N, roughness, blue_noise(DTid.xy).xy);
const float3 R = GGX.xyz;
+2 -2
View File
@@ -86,7 +86,7 @@ uint3 hash33(uint3 x)
[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)]
void main(uint3 DTid : SV_DispatchThreadID)
{
const float2 uv = (DTid.xy + 0.5f) * postprocess.resolution_rcp;
const float2 uv = (DTid.xy + 0.5) * postprocess.resolution_rcp;
const float depth = texture_depth[DTid.xy * 2];
const float roughness = texture_roughness[DTid.xy * 2];
@@ -102,7 +102,7 @@ void main(uint3 DTid : SV_DispatchThreadID)
// Everthing in world space:
const float3 P = reconstruct_position(uv, depth);
const float3 N = decode_oct(texture_normal[DTid.xy * 2]);
const float3 V = normalize(GetCamera().position - P);
const float3 V = normalize(GetCamera().frustum_corners.screen_to_nearplane(uv) - P); // ortho support
const float NdotV = saturate(dot(N, V));
const float resolveSpatialScale = saturate(roughness * 5.0); // roughness 0.2 is destination
@@ -9,11 +9,11 @@ float4 main(VertexToPixel input) : SV_Target
{
ShaderEntity light = load_entity(directional_lights().first_item() + (uint)g_xColor.x);
float2 ScreenCoord = input.pos2D.xy / input.pos2D.w * float2(0.5f, -0.5f) + 0.5f;
float2 ScreenCoord = input.pos2D.xy / input.pos2D.w * float2(0.5, -0.5) + 0.5;
float4 depths = texture_depth.GatherRed(sampler_point_clamp, ScreenCoord);
float depth = max(depths.x, max(depths.y, max(depths.z, depths.w)));
float3 P = reconstruct_position(ScreenCoord, depth);
float3 V = GetCamera().position - P;
float3 V = GetCamera().frustum_corners.screen_to_nearplane(ScreenCoord) - P; // ortho support
float cameraDistance = length(V);
V /= cameraDistance;
@@ -8,11 +8,11 @@ float4 main(VertexToPixel input) : SV_TARGET
{
ShaderEntity light = load_entity(pointlights().first_item() + (uint)g_xColor.x);
float2 ScreenCoord = input.pos2D.xy / input.pos2D.w * float2(0.5f, -0.5f) + 0.5f;
float2 ScreenCoord = input.pos2D.xy / input.pos2D.w * float2(0.5, -0.5) + 0.5;
float4 depths = texture_depth.GatherRed(sampler_point_clamp, ScreenCoord);
float depth = max(input.pos.z, max(depths.x, max(depths.y, max(depths.z, depths.w))));
float3 P = reconstruct_position(ScreenCoord, depth);
float3 V = GetCamera().position - P;
float3 V = GetCamera().frustum_corners.screen_to_nearplane(ScreenCoord) - P; // ortho support
float cameraDistance = length(V);
V /= cameraDistance;
@@ -40,11 +40,12 @@ float4 main(VertexToPixel input) : SV_TARGET
{
ShaderEntity light = load_entity(spotlights().first_item() + (uint)g_xColor.x);
float2 ScreenCoord = input.pos2D.xy / input.pos2D.w * float2(0.5f, -0.5f) + 0.5f;
float2 ScreenCoord = input.pos2D.xy / input.pos2D.w * float2(0.5, -0.5) + 0.5;
float4 depths = texture_depth.GatherRed(sampler_point_clamp, ScreenCoord);
float depth = max(input.pos.z, max(depths.x, max(depths.y, max(depths.z, depths.w))));
float3 P = reconstruct_position(ScreenCoord, depth);
float3 V = GetCamera().position - P;
float3 nearP = GetCamera().frustum_corners.screen_to_nearplane(ScreenCoord);
float3 V = nearP - P; // ortho support
float cameraDistance = length(V);
V /= cameraDistance;
@@ -74,7 +75,7 @@ float4 main(VertexToPixel input) : SV_TARGET
float2 sina2_cosa2 = unpack_half2(asuint(g_xColor.z));
if(intersectInfiniteCone(GetCamera().position, -V, light.position, light.GetDirection(), sina2_cosa2.x, sina2_cosa2.y, tnear, tfar))
{
rayEnd = GetCamera().position - V * max(0, tnear);
rayEnd = nearP - V * max(0, tnear);
//return float4(1,0,0,1);
}
}
@@ -19,7 +19,7 @@ void main(uint3 DTid : SV_DispatchThreadID)
const float roughness = texture_roughness.SampleLevel(sampler_point_clamp, uv, 0);
const float3 N = decode_oct(texture_normal.SampleLevel(sampler_point_clamp, uv, 0));
const float3 P = reconstruct_position(uv, depth);
const float3 V = normalize(GetCamera().position - P);
const float3 V = normalize(GetCamera().frustum_corners.screen_to_nearplane(uv) - P); // ortho support
Texture3D<half4> voxels = bindless_textures3D_half4[GetFrame().vxgi.texture_radiance];
half4 color = ConeTraceSpecular(voxels, P, N, V, roughness * roughness, pixel);
+67 -7
View File
@@ -2304,10 +2304,15 @@ void SetUpStates()
rs = rasterizers[RSTYPE_DOUBLESIDED];
// Note: conservative raster can produce bright lightmap pixels, so now it's disabled!
//if (device->CheckCapability(GraphicsDeviceCapability::CONSERVATIVE_RASTERIZATION))
//{
// rs.conservative_rasterization_enable = true;
//}
//else
{
rs.forced_sample_count = 8; // MSAA approximation of conservative rasterization
}
rasterizers[RSTYPE_LIGHTMAP] = rs;
@@ -10355,11 +10360,19 @@ void RefreshLightmaps(const Scene& scene, CommandList cmd)
cb.xTraceResolution_rcp.y = 1.0f / cb.xTraceResolution.y;
cb.xTraceAccumulationFactor = 1.0f / (object.lightmapIterationCount + 1.0f); // accumulation factor (alpha)
cb.xTraceUserData.x = raytraceBounceCount;
XMFLOAT4 halton = wi::math::GetHaltonSequence(object.lightmapIterationCount); // for jittering the rasterization (good for eliminating atlas border artifacts)
cb.xTracePixelOffset.x = (halton.x * 2 - 1) * cb.xTraceResolution_rcp.x;
cb.xTracePixelOffset.y = (halton.y * 2 - 1) * cb.xTraceResolution_rcp.y;
cb.xTracePixelOffset.x *= 1.4f; // boost the jitter by a bit
cb.xTracePixelOffset.y *= 1.4f; // boost the jitter by a bit
uint8_t instanceInclusionMask = 0xFF;
cb.xTraceUserData.y = instanceInclusionMask;
cb.xTraceSampleIndex = object.lightmapIterationCount;
device->BindDynamicConstantBuffer(cb, CB_GETBINDSLOT(RaytracingCB), cmd);
uint32_t indexStart = ~0u;
uint32_t indexEnd = 0;
uint32_t first_subset = 0;
uint32_t last_subset = 0;
mesh.GetLODSubsetRange(0, first_subset, last_subset);
@@ -10368,25 +10381,72 @@ void RefreshLightmaps(const Scene& scene, CommandList cmd)
const MeshComponent::MeshSubset& subset = mesh.subsets[subsetIndex];
if (subset.indexCount == 0)
continue;
device->DrawIndexed(subset.indexCount, subset.indexOffset, 0, cmd);
indexStart = std::min(indexStart, subset.indexOffset);
indexEnd = std::max(indexEnd, subset.indexOffset + subset.indexCount);
}
if (indexEnd > indexStart)
{
const uint32_t indexCount = indexEnd - indexStart;
device->DrawIndexed(indexCount, indexStart, 0, cmd);
object.lightmapIterationCount++;
}
object.lightmapIterationCount++;
device->RenderPassEnd(cmd);
// Expand opaque areas:
{
device->EventBegin("Lightmap expand", cmd);
static Texture lightmap_expand_temp;
if (lightmap_expand_temp.desc.width < object.lightmap.desc.width || lightmap_expand_temp.desc.height < object.lightmap.desc.height)
{
lightmap_expand_temp.desc = object.lightmap.desc;
device->CreateTexture(&lightmap_expand_temp.desc, nullptr, &lightmap_expand_temp);
device->Barrier(GPUBarrier::Image(&lightmap_expand_temp, lightmap_expand_temp.desc.layout, ResourceState::UNORDERED_ACCESS), cmd);
device->ClearUAV(&lightmap_expand_temp, 0, cmd);
device->Barrier(GPUBarrier::Image(&lightmap_expand_temp, ResourceState::UNORDERED_ACCESS, lightmap_expand_temp.desc.layout), cmd);
}
device->BindComputeShader(&shaders[CSTYPE_LIGHTMAP_EXPAND], cmd);
device->BindResource(&object.lightmap_render, 0, cmd);
// render -> lightmap
{
device->BindResource(&object.lightmap_render, 0, cmd);
device->BindUAV(&object.lightmap, 0, cmd);
device->BindUAV(&object.lightmap, 0, cmd);
device->Barrier(GPUBarrier::Image(&object.lightmap, object.lightmap.desc.layout, ResourceState::UNORDERED_ACCESS), cmd);
device->Barrier(GPUBarrier::Image(&object.lightmap, object.lightmap.desc.layout, ResourceState::UNORDERED_ACCESS), cmd);
device->Dispatch((desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd);
device->Dispatch((desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd);
device->Barrier(GPUBarrier::Image(&object.lightmap, ResourceState::UNORDERED_ACCESS, object.lightmap.desc.layout), cmd);
}
for (int repeat = 0; repeat < 2; ++repeat)
{
// lightmap -> temp
{
device->BindResource(&object.lightmap, 0, cmd);
device->BindUAV(&lightmap_expand_temp, 0, cmd);
device->Barrier(GPUBarrier::Image(&object.lightmap, ResourceState::UNORDERED_ACCESS, object.lightmap.desc.layout), cmd);
device->Barrier(GPUBarrier::Image(&lightmap_expand_temp, lightmap_expand_temp.desc.layout, ResourceState::UNORDERED_ACCESS), cmd);
device->Dispatch((desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd);
device->Barrier(GPUBarrier::Image(&lightmap_expand_temp, ResourceState::UNORDERED_ACCESS, lightmap_expand_temp.desc.layout), cmd);
}
// temp -> lightmap
{
device->BindResource(&lightmap_expand_temp, 0, cmd);
device->BindUAV(&object.lightmap, 0, cmd);
device->Barrier(GPUBarrier::Image(&object.lightmap, object.lightmap.desc.layout, ResourceState::UNORDERED_ACCESS), cmd);
device->Dispatch((desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd);
device->Barrier(GPUBarrier::Image(&object.lightmap, ResourceState::UNORDERED_ACCESS, object.lightmap.desc.layout), cmd);
}
}
device->EventEnd(cmd);
}
device->EventEnd(cmd);
+4
View File
@@ -4611,6 +4611,10 @@ namespace wi::scene
{
object.lightmap.desc.format = Format::R32G32B32A32_FLOAT;
}
else if (lightmap_size == object.lightmapWidth * object.lightmapHeight * sizeof(XMHALF4))
{
object.lightmap.desc.format = Format::R16G16B16A16_FLOAT;
}
else if (lightmap_size == object.lightmapWidth * object.lightmapHeight * sizeof(PackedVector::XMFLOAT3PK))
{
object.lightmap.desc.format = Format::R11G11B10_FLOAT;
+1 -1
View File
@@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 636;
const int revision = 637;
const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);