diff --git a/Editor/CameraWindow.cpp b/Editor/CameraWindow.cpp index 0b2268a13..0d2cdd479 100644 --- a/Editor/CameraWindow.cpp +++ b/Editor/CameraWindow.cpp @@ -73,7 +73,7 @@ CameraWindow::CameraWindow(wiGUI* gui) :GUI(gui) - cameraWindow->Translate(XMFLOAT3(30, 30, 0)); + cameraWindow->Translate(XMFLOAT3(800, 500, 0)); cameraWindow->SetVisible(false); } diff --git a/Editor/Editor.cpp b/Editor/Editor.cpp index a63671544..30b81d0e1 100644 --- a/Editor/Editor.cpp +++ b/Editor/Editor.cpp @@ -14,6 +14,7 @@ #include "AnimationWindow.h" #include "EmitterWindow.h" #include "ForceFieldWindow.h" +#include "OceanWindow.h" #include // openfile #include @@ -262,6 +263,7 @@ void EditorComponent::ChangeRenderPath(RENDERPATH path) emitterWnd = new EmitterWindow(&GetGUI()); emitterWnd->SetMaterialWnd(materialWnd); forceFieldWnd = new ForceFieldWindow(&GetGUI()); + oceanWnd = new OceanWindow(&GetGUI()); } void EditorComponent::DeleteWindows() { @@ -276,6 +278,7 @@ void EditorComponent::DeleteWindows() SAFE_DELETE(animWnd); SAFE_DELETE(emitterWnd); SAFE_DELETE(forceFieldWnd); + SAFE_DELETE(oceanWnd); } void EditorComponent::Initialize() @@ -292,6 +295,7 @@ void EditorComponent::Initialize() SAFE_INIT(animWnd); SAFE_INIT(emitterWnd); SAFE_INIT(forceFieldWnd); + SAFE_INIT(oceanWnd); SAFE_INIT(loader); @@ -466,6 +470,15 @@ void EditorComponent::Load() }); GetGUI().AddWidget(forceFieldWnd_Toggle); + wiButton* oceanWnd_Toggle = new wiButton("Ocean"); + oceanWnd_Toggle->SetTooltip("Ocean Simulator properties"); + oceanWnd_Toggle->SetPos(XMFLOAT2(x += step, screenH - 40)); + oceanWnd_Toggle->SetSize(XMFLOAT2(100, 40)); + oceanWnd_Toggle->OnClick([=](wiEventArgs args) { + oceanWnd->oceanWindow->SetVisible(!oceanWnd->oceanWindow->IsVisible()); + }); + GetGUI().AddWidget(oceanWnd_Toggle); + //////////////////////////////////////////////////////////////////////////////////// @@ -929,6 +942,24 @@ void EditorComponent::Update(float dt) originalMouse = wiInputManager::GetInstance()->getpointer(); } + const float buttonrotSpeed = 2.0f / 60.0f; + if (wiInputManager::GetInstance()->down(VK_LEFT)) + { + xDif -= buttonrotSpeed; + } + if (wiInputManager::GetInstance()->down(VK_RIGHT)) + { + xDif += buttonrotSpeed; + } + if (wiInputManager::GetInstance()->down(VK_UP)) + { + yDif -= buttonrotSpeed; + } + if (wiInputManager::GetInstance()->down(VK_DOWN)) + { + yDif += buttonrotSpeed; + } + Camera* cam = wiRenderer::getCamera(); if (cameraWnd->fpscamera) @@ -1372,6 +1403,13 @@ void EditorComponent::Compose() { renderPath->Compose(); + //if (wiRenderer::GetOcean()) + //{ + // wiImageEffects fx(500, 500, 500, 500); + // fx.blendFlag = BLENDMODE_OPAQUE; + // wiImage::Draw(wiRenderer::GetOcean()->getDisplacementMap(), fx, GRAPHICSTHREAD_IMMEDIATE); + //} + //__super::Compose(); for (auto& x : wiRenderer::GetScene().models) @@ -1521,7 +1559,6 @@ void EditorComponent::Compose() } } - } void EditorComponent::Unload() { diff --git a/Editor/Editor.h b/Editor/Editor.h index 6735c1b27..1c1c134a0 100644 --- a/Editor/Editor.h +++ b/Editor/Editor.h @@ -14,6 +14,7 @@ class LightWindow; class AnimationWindow; class EmitterWindow; class ForceFieldWindow; +class OceanWindow; class EditorLoadingScreen : public LoadingScreenComponent { @@ -45,6 +46,7 @@ public: AnimationWindow* animWnd; EmitterWindow* emitterWnd; ForceFieldWindow* forceFieldWnd; + OceanWindow* oceanWnd; Editor* main; diff --git a/Editor/Editor.vcxproj b/Editor/Editor.vcxproj index ffa45db89..2947be691 100644 --- a/Editor/Editor.vcxproj +++ b/Editor/Editor.vcxproj @@ -177,6 +177,7 @@ + @@ -197,6 +198,7 @@ + diff --git a/Editor/Editor.vcxproj.filters b/Editor/Editor.vcxproj.filters index 159aa2f1a..6ad593974 100644 --- a/Editor/Editor.vcxproj.filters +++ b/Editor/Editor.vcxproj.filters @@ -67,6 +67,9 @@ Code + + Code + @@ -117,6 +120,9 @@ Code + + Code + diff --git a/Editor/OceanWindow.cpp b/Editor/OceanWindow.cpp new file mode 100644 index 000000000..c698ee7d8 --- /dev/null +++ b/Editor/OceanWindow.cpp @@ -0,0 +1,103 @@ +#include "stdafx.h" +#include "OceanWindow.h" + + +OceanWindow::OceanWindow(wiGUI* gui) :GUI(gui) +{ + assert(GUI && "Invalid GUI!"); + + float screenW = (float)wiRenderer::GetDevice()->GetScreenWidth(); + float screenH = (float)wiRenderer::GetDevice()->GetScreenHeight(); + + + oceanWindow = new wiWindow(GUI, "Ocean Window"); + oceanWindow->SetSize(XMFLOAT2(700, 300)); + GUI->AddWidget(oceanWindow); + + float x = 200; + float y = 0; + float inc = 35; + + enabledCheckBox = new wiCheckBox("Ocean simulation enabled: "); + enabledCheckBox->SetPos(XMFLOAT2(x, y += inc)); + enabledCheckBox->OnClick([&](wiEventArgs args) { + wiRenderer::SetOceanEnabled(args.bValue, params); + }); + enabledCheckBox->SetCheck(wiRenderer::GetOcean() != nullptr); + oceanWindow->AddWidget(enabledCheckBox); + + + patchSizeSlider = new wiSlider(1, 2000, 1000, 100000, "Patch size: "); + patchSizeSlider->SetSize(XMFLOAT2(100, 30)); + patchSizeSlider->SetPos(XMFLOAT2(x, y += inc)); + patchSizeSlider->SetValue(params.patch_length); + patchSizeSlider->OnSlide([&](wiEventArgs args) { + params.patch_length = args.fValue; + wiRenderer::SetOceanEnabled(enabledCheckBox->GetCheck(), params); + }); + oceanWindow->AddWidget(patchSizeSlider); + + waveAmplitudeSlider = new wiSlider(0, 100, 1000, 100000, "Wave amplitude: "); + waveAmplitudeSlider->SetSize(XMFLOAT2(100, 30)); + waveAmplitudeSlider->SetPos(XMFLOAT2(x, y += inc)); + waveAmplitudeSlider->SetValue(params.wave_amplitude); + waveAmplitudeSlider->OnSlide([&](wiEventArgs args) { + params.wave_amplitude = args.fValue; + wiRenderer::SetOceanEnabled(enabledCheckBox->GetCheck(), params); + }); + oceanWindow->AddWidget(waveAmplitudeSlider); + + choppyScaleSlider = new wiSlider(0, 10, 1000, 100000, "Choppiness: "); + choppyScaleSlider->SetSize(XMFLOAT2(100, 30)); + choppyScaleSlider->SetPos(XMFLOAT2(x, y += inc)); + choppyScaleSlider->SetValue(params.choppy_scale); + choppyScaleSlider->OnSlide([&](wiEventArgs args) { + params.choppy_scale = args.fValue; + wiRenderer::SetOceanEnabled(enabledCheckBox->GetCheck(), params); + }); + oceanWindow->AddWidget(choppyScaleSlider); + + windDependencySlider = new wiSlider(0, 1, 1000, 100000, "Wind dependency: "); + windDependencySlider->SetSize(XMFLOAT2(100, 30)); + windDependencySlider->SetPos(XMFLOAT2(x, y += inc)); + windDependencySlider->SetValue(params.wind_dependency); + windDependencySlider->OnSlide([&](wiEventArgs args) { + params.wind_dependency = args.fValue; + wiRenderer::SetOceanEnabled(enabledCheckBox->GetCheck(), params); + }); + oceanWindow->AddWidget(windDependencySlider); + + timeScaleSlider = new wiSlider(0, 4, 1000, 100000, "Time scale: "); + timeScaleSlider->SetSize(XMFLOAT2(100, 30)); + timeScaleSlider->SetPos(XMFLOAT2(x, y += inc)); + timeScaleSlider->SetValue(params.time_scale); + timeScaleSlider->OnSlide([&](wiEventArgs args) { + params.time_scale = args.fValue; + wiRenderer::SetOceanEnabled(enabledCheckBox->GetCheck(), params); + }); + oceanWindow->AddWidget(timeScaleSlider); + + + colorPicker = new wiColorPicker(GUI, "Water Color"); + colorPicker->SetPos(XMFLOAT2(380, 30)); + colorPicker->RemoveWidgets(); + colorPicker->SetVisible(true); + colorPicker->SetEnabled(true); + colorPicker->OnColorChanged([&](wiEventArgs args) { + if (wiRenderer::GetOcean() != nullptr) + wiRenderer::GetOcean()->waterColor = XMFLOAT3(args.color.x, args.color.y, args.color.z); + }); + oceanWindow->AddWidget(colorPicker); + + + oceanWindow->Translate(XMFLOAT3(800, 50, 0)); + oceanWindow->SetVisible(false); +} + + +OceanWindow::~OceanWindow() +{ + oceanWindow->RemoveWidgets(true); + GUI->RemoveWidget(oceanWindow); + SAFE_DELETE(oceanWindow); +} diff --git a/Editor/OceanWindow.h b/Editor/OceanWindow.h new file mode 100644 index 000000000..9f9297a06 --- /dev/null +++ b/Editor/OceanWindow.h @@ -0,0 +1,31 @@ +#pragma once + +#include "wiOcean.h" + +class wiGUI; +class wiWindow; +class wiLabel; +class wiCheckBox; +class wiSlider; +class wiColorPicker; + +class OceanWindow +{ +public: + OceanWindow(wiGUI* gui); + ~OceanWindow(); + + wiOceanParameter params; + + wiGUI* GUI; + + wiWindow* oceanWindow; + wiCheckBox* enabledCheckBox; + wiSlider* patchSizeSlider; + wiSlider* waveAmplitudeSlider; + wiSlider* choppyScaleSlider; + wiSlider* windDependencySlider; + wiSlider* timeScaleSlider; + wiColorPicker* colorPicker; +}; + diff --git a/Editor/perlin_noise.dds b/Editor/perlin_noise.dds new file mode 100644 index 000000000..0fe2ff0a5 Binary files /dev/null and b/Editor/perlin_noise.dds differ diff --git a/WickedEngine/ConstantBufferMapping.h b/WickedEngine/ConstantBufferMapping.h index cde75e1e0..ff517e3e4 100644 --- a/WickedEngine/ConstantBufferMapping.h +++ b/WickedEngine/ConstantBufferMapping.h @@ -6,30 +6,35 @@ // Persistent buffers: // These are bound once and are alive forever -#define CBSLOT_RENDERER_WORLD 0 -#define CBSLOT_RENDERER_FRAME 1 -#define CBSLOT_RENDERER_CAMERA 2 -#define CBSLOT_RENDERER_MISC 3 +#define CBSLOT_RENDERER_WORLD 0 +#define CBSLOT_RENDERER_FRAME 1 +#define CBSLOT_RENDERER_CAMERA 2 +#define CBSLOT_RENDERER_MISC 3 -#define CBSLOT_IMAGE_IMAGE 4 -#define CBSLOT_IMAGE_POSTPROCESS 5 +#define CBSLOT_IMAGE_IMAGE 4 +#define CBSLOT_IMAGE_POSTPROCESS 5 -#define CBSLOT_API 6 +#define CBSLOT_API 6 // On demand buffers: // These are bound on demand and alive until another is bound at the same slot -#define CBSLOT_RENDERER_MATERIAL 7 -#define CBSLOT_RENDERER_CUBEMAPRENDER 8 -#define CBSLOT_RENDERER_VOLUMELIGHT 8 -#define CBSLOT_RENDERER_DECAL 8 -#define CBSLOT_RENDERER_TESSELLATION 8 -#define CBSLOT_RENDERER_DISPATCHPARAMS 8 -#define CBSLOT_RENDERER_VOXELIZER 8 +#define CBSLOT_RENDERER_MATERIAL 7 +#define CBSLOT_RENDERER_CUBEMAPRENDER 8 +#define CBSLOT_RENDERER_VOLUMELIGHT 8 +#define CBSLOT_RENDERER_DECAL 8 +#define CBSLOT_RENDERER_TESSELLATION 8 +#define CBSLOT_RENDERER_DISPATCHPARAMS 8 +#define CBSLOT_RENDERER_VOXELIZER 8 -#define CBSLOT_OTHER_EMITTEDPARTICLE 8 -#define CBSLOT_OTHER_HAIRPARTICLE 8 -#define CBSLOT_OTHER_LENSFLARE 8 +#define CBSLOT_OTHER_EMITTEDPARTICLE 8 +#define CBSLOT_OTHER_HAIRPARTICLE 8 +#define CBSLOT_OTHER_LENSFLARE 8 +#define CBSLOT_OTHER_FFTGENERATOR 8 +#define CBSLOT_OTHER_OCEAN_SIMULATION_IMMUTABLE 8 +#define CBSLOT_OTHER_OCEAN_SIMULATION_PERFRAME 9 +#define CBSLOT_OTHER_OCEAN_RENDER_SHADING 8 +#define CBSLOT_OTHER_OCEAN_RENDER_PATCH 9 diff --git a/WickedEngine/ShaderInterop_FFTGenerator.h b/WickedEngine/ShaderInterop_FFTGenerator.h new file mode 100644 index 000000000..516888ce1 --- /dev/null +++ b/WickedEngine/ShaderInterop_FFTGenerator.h @@ -0,0 +1,16 @@ +#ifndef _SHADERINTEROP_FFTGENERATOR_H_ +#define _SHADERINTEROP_FFTGENERATOR_H_ +#include "ShaderInterop.h" + +CBUFFER(FFTGeneratorCB, CBSLOT_OTHER_FFTGENERATOR) +{ + uint thread_count; + uint ostride; + uint istride; + uint pstride; + + float phase_base; + float3 FFTGeneratorCB_padding; +}; + +#endif // _SHADERINTEROP_FFTGENERATOR_H_ diff --git a/WickedEngine/ShaderInterop_Ocean.h b/WickedEngine/ShaderInterop_Ocean.h new file mode 100644 index 000000000..079ba974d --- /dev/null +++ b/WickedEngine/ShaderInterop_Ocean.h @@ -0,0 +1,76 @@ +#ifndef _SHADERINTEROP_OCEAN_H_ +#define _SHADERINTEROP_OCEAN_H_ +#include "ShaderInterop.h" + +#define OCEAN_COMPUTE_TILESIZE 16 + +// Simulation constants: + +CBUFFER(Ocean_Simulation_ImmutableCB, CBSLOT_OTHER_OCEAN_SIMULATION_IMMUTABLE) +{ + uint g_ActualDim; + uint g_InWidth; + uint g_OutWidth; + uint g_OutHeight; + + uint g_DtxAddressOffset; + uint g_DtyAddressOffset; + uint2 Ocean_Simulation_ImmutableCB_padding; +}; + +CBUFFER(Ocean_Simulation_PerFrameCB, CBSLOT_OTHER_OCEAN_SIMULATION_PERFRAME) +{ + float g_Time; + float g_ChoppyScale; + float g_GridLen; + float Ocean_Simulation_PerFrameCB_padding; +}; + + +// Rendering constants: + +CBUFFER(Ocean_Rendering_ShadingCB, CBSLOT_OTHER_OCEAN_RENDER_SHADING) +{ + float3 g_SkyColor; + float g_TexelLength_x2; + + float3 g_WaterbodyColor; + float g_UVScale; + + float g_Shineness; + float3 g_SunDir; + + float g_UVOffset; + float3 g_SunColor; + + // The parameter is used for fixing an artifact + float3 g_BendParam; + float Ocean_Rendering_ShadingCB_padding0; + + // Perlin noise for distant wave crest + float g_PerlinSize; + float3 g_PerlinAmplitude; + + float3 g_PerlinOctave; + float Ocean_Rendering_ShadingCB_padding1; + + float3 g_PerlinGradient; + float Ocean_Rendering_ShadingCB_padding2; +}; + +// Per draw call constants +CBUFFER(Ocean_Rendering_PatchCB, CBSLOT_OTHER_OCEAN_RENDER_PATCH) +{ + // Transform matrices + matrix g_matLocal; + matrix g_matWorldViewProj; + + // Misc per draw call constants + float2 g_UVBase; + float2 g_PerlinMovement; + + float3 g_LocalEye; + float Ocean_Rendering_PatchCB_padding; +}; + +#endif // _SHADERINTEROP_OCEAN_H_ diff --git a/WickedEngine/WickedEngine.h b/WickedEngine/WickedEngine.h index d815a984d..9d147e042 100644 --- a/WickedEngine/WickedEngine.h +++ b/WickedEngine/WickedEngine.h @@ -56,6 +56,7 @@ #include "wiSpinLock.h" #include "wiRectPacker.h" #include "wiProfiler.h" +#include "wiOcean.h" #include "RenderableComponent.h" #include "Renderable2DComponent.h" diff --git a/WickedEngine/WickedEngine_SHADERS.vcxproj b/WickedEngine/WickedEngine_SHADERS.vcxproj index a21373572..cdb6670bd 100644 --- a/WickedEngine/WickedEngine_SHADERS.vcxproj +++ b/WickedEngine/WickedEngine_SHADERS.vcxproj @@ -32,6 +32,7 @@ + @@ -182,6 +183,14 @@ Vertex + + Compute + 5.0 + + + Compute + 5.0 + Pixel @@ -445,6 +454,27 @@ Vertex + + Compute + 5.0 + + + Pixel + + + Pixel + + + Vertex + + + Compute + 5.0 + + + Compute + 5.0 + Pixel diff --git a/WickedEngine/WickedEngine_SHADERS.vcxproj.filters b/WickedEngine/WickedEngine_SHADERS.vcxproj.filters index 080f4e313..42d70480d 100644 --- a/WickedEngine/WickedEngine_SHADERS.vcxproj.filters +++ b/WickedEngine/WickedEngine_SHADERS.vcxproj.filters @@ -115,6 +115,9 @@ HF + + HF + @@ -651,6 +654,30 @@ VS + + CS + + + CS + + + CS + + + VS + + + PS + + + PS + + + CS + + + CS + diff --git a/WickedEngine/WickedEngine_SHARED.vcxitems b/WickedEngine/WickedEngine_SHARED.vcxitems index a4019a06b..7fa360db4 100644 --- a/WickedEngine/WickedEngine_SHARED.vcxitems +++ b/WickedEngine/WickedEngine_SHARED.vcxitems @@ -236,10 +236,13 @@ + + + @@ -333,6 +336,7 @@ + @@ -511,6 +515,7 @@ + @@ -671,6 +676,7 @@ + diff --git a/WickedEngine/WickedEngine_SHARED.vcxitems.filters b/WickedEngine/WickedEngine_SHARED.vcxitems.filters index 5a07e96aa..09759cad2 100644 --- a/WickedEngine/WickedEngine_SHARED.vcxitems.filters +++ b/WickedEngine/WickedEngine_SHARED.vcxitems.filters @@ -1104,6 +1104,18 @@ ENGINE\Graphics\GPUMapping + + ENGINE\Graphics + + + ENGINE\Graphics + + + ENGINE\Graphics\GPUMapping + + + ENGINE\Graphics\GPUMapping + @@ -1889,6 +1901,12 @@ ENGINE\Components + + ENGINE\Graphics + + + ENGINE\Graphics + diff --git a/WickedEngine/fft_512x512_c2c_CS.hlsl b/WickedEngine/fft_512x512_c2c_CS.hlsl new file mode 100644 index 000000000..338bb5de4 --- /dev/null +++ b/WickedEngine/fft_512x512_c2c_CS.hlsl @@ -0,0 +1,159 @@ +#include "ShaderInterop_FFTGenerator.h" + +#define COS_PI_4_16 0.70710678118654752440084436210485f +#define TWIDDLE_1_8 COS_PI_4_16, -COS_PI_4_16 +#define TWIDDLE_3_8 -COS_PI_4_16, -COS_PI_4_16 + +#define COHERENCY_GRANULARITY 128 + + +void FT2(inout float2 a, inout float2 b) +{ + float t; + + t = a.x; + a.x += b.x; + b.x = t - b.x; + + t = a.y; + a.y += b.y; + b.y = t - b.y; +} + +void CMUL_forward(inout float2 a, float bx, float by) +{ + float t = a.x; + a.x = t * bx - a.y * by; + a.y = t * by + a.y * bx; +} + +void UPD_forward(inout float2 a, inout float2 b) +{ + float A = a.x; + float B = b.y; + + a.x += b.y; + b.y = a.y + b.x; + a.y -= b.x; + b.x = A - B; +} + +void FFT_forward_4(inout float2 D[8]) +{ + FT2(D[0], D[2]); + FT2(D[1], D[3]); + FT2(D[0], D[1]); + + UPD_forward(D[2], D[3]); +} + +void FFT_forward_8(inout float2 D[8]) +{ + FT2(D[0], D[4]); + FT2(D[1], D[5]); + FT2(D[2], D[6]); + FT2(D[3], D[7]); + + UPD_forward(D[4], D[6]); + UPD_forward(D[5], D[7]); + + CMUL_forward(D[5], TWIDDLE_1_8); + CMUL_forward(D[7], TWIDDLE_3_8); + + FFT_forward_4(D); + FT2(D[4], D[5]); + FT2(D[6], D[7]); +} + +void TWIDDLE(inout float2 d, float phase) +{ + float tx, ty; + + sincos(phase, ty, tx); + float t = d.x; + d.x = t * tx - d.y * ty; + d.y = t * ty + d.y * tx; +} + +void TWIDDLE_8(inout float2 D[8], float phase) +{ + TWIDDLE(D[4], 1 * phase); + TWIDDLE(D[2], 2 * phase); + TWIDDLE(D[6], 3 * phase); + TWIDDLE(D[1], 4 * phase); + TWIDDLE(D[5], 5 * phase); + TWIDDLE(D[3], 6 * phase); + TWIDDLE(D[7], 7 * phase); +} + +STRUCTUREDBUFFER(g_SrcData, float2, TEXSLOT_ONDEMAND0); +RWSTRUCTUREDBUFFER(g_DstData, float2, 0); + +#ifndef FFT_V2 + +[numthreads(COHERENCY_GRANULARITY, 1, 1)] +void main(uint3 thread_id : SV_DispatchThreadID) +{ + if (thread_id.x >= thread_count) + return; + + // Fetch 8 complex numbers + float2 D[8]; + + uint i; + uint imod = thread_id.x & (istride - 1); + uint iaddr = ((thread_id.x - imod) << 3) + imod; + for (i = 0; i < 8; i++) + D[i] = g_SrcData[iaddr + i * istride]; + + // Math + FFT_forward_8(D); + uint p = thread_id.x & (istride - pstride); + float phase = phase_base * (float)p; + TWIDDLE_8(D, phase); + + // Store the result + uint omod = thread_id.x & (ostride - 1); + uint oaddr = ((thread_id.x - omod) << 3) + omod; + g_DstData[oaddr + 0 * ostride] = D[0]; + g_DstData[oaddr + 1 * ostride] = D[4]; + g_DstData[oaddr + 2 * ostride] = D[2]; + g_DstData[oaddr + 3 * ostride] = D[6]; + g_DstData[oaddr + 4 * ostride] = D[1]; + g_DstData[oaddr + 5 * ostride] = D[5]; + g_DstData[oaddr + 6 * ostride] = D[3]; + g_DstData[oaddr + 7 * ostride] = D[7]; +} + +#else + +[numthreads(COHERENCY_GRANULARITY, 1, 1)] +void main(uint3 thread_id : SV_DispatchThreadID) +{ + if (thread_id.x >= thread_count) + return; + + // Fetch 8 complex numbers + uint i; + float2 D[8]; + uint iaddr = thread_id.x << 3; + for (i = 0; i < 8; i++) + D[i] = g_SrcData[iaddr + i]; + + // Math + FFT_forward_8(D); + + // Store the result + uint omod = thread_id.x & (ostride - 1); + uint oaddr = ((thread_id.x - omod) << 3) + omod; + g_DstData[oaddr + 0 * ostride] = D[0]; + g_DstData[oaddr + 1 * ostride] = D[4]; + g_DstData[oaddr + 2 * ostride] = D[2]; + g_DstData[oaddr + 3 * ostride] = D[6]; + g_DstData[oaddr + 4 * ostride] = D[1]; + g_DstData[oaddr + 5 * ostride] = D[5]; + g_DstData[oaddr + 6 * ostride] = D[3]; + g_DstData[oaddr + 7 * ostride] = D[7]; +} + +#endif // FFT_V2 diff --git a/WickedEngine/fft_512x512_c2c_v2_CS.hlsl b/WickedEngine/fft_512x512_c2c_v2_CS.hlsl new file mode 100644 index 000000000..c2ad805cf --- /dev/null +++ b/WickedEngine/fft_512x512_c2c_v2_CS.hlsl @@ -0,0 +1,3 @@ +#define FFT_V2 + +#include "fft_512x512_c2c_CS.hlsl" diff --git a/WickedEngine/oceanSimulatorCS.hlsl b/WickedEngine/oceanSimulatorCS.hlsl new file mode 100644 index 000000000..c42e4c508 --- /dev/null +++ b/WickedEngine/oceanSimulatorCS.hlsl @@ -0,0 +1,46 @@ +#include "ShaderInterop_Ocean.h" + +#define PI 3.1415926536f + +STRUCTUREDBUFFER(g_InputH0, float2, TEXSLOT_ONDEMAND0); +STRUCTUREDBUFFER(g_InputOmega, float, TEXSLOT_ONDEMAND1); +RWSTRUCTUREDBUFFER(g_OutputHt, float2, 0); + +// H(0) -> H(t) +[numthreads(OCEAN_COMPUTE_TILESIZE, OCEAN_COMPUTE_TILESIZE, 1)] +void main(uint3 DTid : SV_DispatchThreadID) +{ + int in_index = DTid.y * g_InWidth + DTid.x; + int in_mindex = (g_ActualDim - DTid.y) * g_InWidth + (g_ActualDim - DTid.x); + int out_index = DTid.y * g_OutWidth + DTid.x; + + // H(0) -> H(t) + float2 h0_k = g_InputH0[in_index]; + float2 h0_mk = g_InputH0[in_mindex]; + float sin_v, cos_v; + sincos(g_InputOmega[in_index] * g_Time, sin_v, cos_v); + + float2 ht; + ht.x = (h0_k.x + h0_mk.x) * cos_v - (h0_k.y + h0_mk.y) * sin_v; + ht.y = (h0_k.x - h0_mk.x) * sin_v + (h0_k.y - h0_mk.y) * cos_v; + + // H(t) -> Dx(t), Dy(t) + float kx = DTid.x - g_ActualDim * 0.5f; + float ky = DTid.y - g_ActualDim * 0.5f; + float sqr_k = kx * kx + ky * ky; + float rsqr_k = 0; + if (sqr_k > 1e-12f) + rsqr_k = 1 / sqrt(sqr_k); + //float rsqr_k = 1 / sqrtf(kx * kx + ky * ky); + kx *= rsqr_k; + ky *= rsqr_k; + float2 dt_x = float2(ht.y * kx, -ht.x * kx); + float2 dt_y = float2(ht.y * ky, -ht.x * ky); + + if ((DTid.x < g_OutWidth) && (DTid.y < g_OutHeight)) + { + g_OutputHt[out_index] = ht; + g_OutputHt[out_index + g_DtxAddressOffset] = dt_x; + g_OutputHt[out_index + g_DtyAddressOffset] = dt_y; + } +} diff --git a/WickedEngine/oceanSurfaceHF.hlsli b/WickedEngine/oceanSurfaceHF.hlsli new file mode 100644 index 000000000..0cf75ae09 --- /dev/null +++ b/WickedEngine/oceanSurfaceHF.hlsli @@ -0,0 +1,24 @@ +#ifndef _OCEAN_SURFACE_HF_ +#define _OCEAN_SURFACE_HF_ +#include "globals.hlsli" +#include "ShaderInterop_Ocean.h" + + +#define PATCH_BLEND_BEGIN 100 +#define PATCH_BLEND_END 2000 + + +#define g_texDisplacement texture_0 // FFT wave displacement map in VS +#define g_texPerlin texture_1 // FFT wave gradient map in PS +#define g_texGradient texture_2 // Perlin wave displacement & gradient map in both VS & PS +TEXTURE1D(g_texFresnel, float4, TEXSLOT_ONDEMAND3); // Fresnel factor lookup table +#define g_texReflectCube texture_env_global + +struct VS_OUTPUT +{ + float4 Position : SV_POSITION; + float2 TexCoord : TEXCOORD0; + float3 LocalPos : TEXCOORD1; +}; + +#endif // _OCEAN_SURFACE_HF_ diff --git a/WickedEngine/oceanSurfacePS.hlsl b/WickedEngine/oceanSurfacePS.hlsl new file mode 100644 index 000000000..49344016f --- /dev/null +++ b/WickedEngine/oceanSurfacePS.hlsl @@ -0,0 +1,79 @@ +#include "globals.hlsli" +#include "oceanSurfaceHF.hlsli" + +float4 main(VS_OUTPUT In) : SV_Target +{ + // Calculate eye vector. + float3 eye_vec = g_LocalEye - In.LocalPos; + float3 eye_dir = normalize(eye_vec); + + + // --------------- Blend perlin noise for reducing the tiling artifacts + + // Blend displacement to avoid tiling artifact + float dist_2d = length(eye_vec.xy); + float blend_factor = (PATCH_BLEND_END - dist_2d) / (PATCH_BLEND_END - PATCH_BLEND_BEGIN); + blend_factor = clamp(blend_factor * blend_factor * blend_factor, 0, 1); + + // Compose perlin waves from three octaves + float2 perlin_tc = In.TexCoord * g_PerlinSize + g_UVBase; + float2 perlin_tc0 = (blend_factor < 1) ? perlin_tc * g_PerlinOctave.x + g_PerlinMovement : 0; + float2 perlin_tc1 = (blend_factor < 1) ? perlin_tc * g_PerlinOctave.y + g_PerlinMovement : 0; + float2 perlin_tc2 = (blend_factor < 1) ? perlin_tc * g_PerlinOctave.z + g_PerlinMovement : 0; + + float2 perlin_0 = g_texPerlin.Sample(sampler_aniso_wrap, perlin_tc0).xy; + float2 perlin_1 = g_texPerlin.Sample(sampler_aniso_wrap, perlin_tc1).xy; + float2 perlin_2 = g_texPerlin.Sample(sampler_aniso_wrap, perlin_tc2).xy; + + float2 perlin = (perlin_0 * g_PerlinGradient.x + perlin_1 * g_PerlinGradient.y + perlin_2 * g_PerlinGradient.z); + + + // --------------- Water body color + + // Texcoord mash optimization: Texcoord of FFT wave is not required when blend_factor > 1 + float2 fft_tc = (blend_factor > 0) ? In.TexCoord : 0; + + float2 grad = g_texGradient.Sample(sampler_aniso_wrap, fft_tc).xy; + grad = lerp(perlin, grad, blend_factor); + + // Calculate normal here. + float3 normal = normalize(float3(grad, g_TexelLength_x2)); + // Reflected ray + float3 reflect_vec = reflect(-eye_dir, normal); + // dot(N, V) + float cos_angle = dot(normal, eye_dir); + + // A coarse way to handle transmitted light + float3 body_color = g_WaterbodyColor; + + + // --------------- Reflected color + + // ramp.x for fresnel term. ramp.y for sky blending + float4 ramp = g_texFresnel.Sample(sampler_linear_clamp, cos_angle).xyzw; + // A workaround to deal with "indirect reflection vectors" (which are rays requiring multiple + // reflections to reach the sky). + if (reflect_vec.z < g_BendParam.x) + ramp = lerp(ramp, g_BendParam.z, (g_BendParam.x - reflect_vec.z) / (g_BendParam.x - g_BendParam.y)); + reflect_vec.z = max(0, reflect_vec.z); + + float3 reflection = g_texReflectCube.Sample(sampler_linear_clamp, reflect_vec).xyz; + // Hack bit: making higher contrast + reflection = reflection * reflection * 2.5f; + + // Blend with predefined sky color + float3 reflected_color = lerp(g_SkyColor, reflection, ramp.y); + + // Combine waterbody color and reflected color + float3 water_color = lerp(body_color, reflected_color, ramp.x); + + + // --------------- Sun spots + + float cos_spec = clamp(dot(reflect_vec, g_SunDir), 0, 1); + float sun_spot = pow(cos_spec, g_Shineness); + water_color += g_SunColor * sun_spot; + + + return float4(water_color, 1); +} diff --git a/WickedEngine/oceanSurfaceSimplePS.hlsl b/WickedEngine/oceanSurfaceSimplePS.hlsl new file mode 100644 index 000000000..65d8e119a --- /dev/null +++ b/WickedEngine/oceanSurfaceSimplePS.hlsl @@ -0,0 +1,4 @@ +float4 main() : SV_TARGET +{ + return float4(1.0f, 1.0f, 1.0f, 1.0f); +} diff --git a/WickedEngine/oceanSurfaceVS.hlsl b/WickedEngine/oceanSurfaceVS.hlsl new file mode 100644 index 000000000..4c6c8cb4a --- /dev/null +++ b/WickedEngine/oceanSurfaceVS.hlsl @@ -0,0 +1,46 @@ +#include "globals.hlsli" +#include "oceanSurfaceHF.hlsli" + +VS_OUTPUT main(float2 vPos : POSITION) +{ + VS_OUTPUT Output; + + // Local position + float4 pos_local = mul(float4(vPos, 0, 1), g_matLocal); + // UV + float2 uv_local = pos_local.xy * g_UVScale + g_UVOffset; + + // Blend displacement to avoid tiling artifact + float3 eye_vec = pos_local.xyz - g_LocalEye; + float dist_2d = length(eye_vec.xy); + float blend_factor = (PATCH_BLEND_END - dist_2d) / (PATCH_BLEND_END - PATCH_BLEND_BEGIN); + blend_factor = clamp(blend_factor, 0, 1); + + // Add perlin noise to distant patches + float perlin = 0; + if (blend_factor < 1) + { + float2 perlin_tc = uv_local * g_PerlinSize + g_UVBase; + float perlin_0 = g_texPerlin.SampleLevel(sampler_aniso_wrap, perlin_tc * g_PerlinOctave.x + g_PerlinMovement, 0).w; + float perlin_1 = g_texPerlin.SampleLevel(sampler_aniso_wrap, perlin_tc * g_PerlinOctave.y + g_PerlinMovement, 0).w; + float perlin_2 = g_texPerlin.SampleLevel(sampler_aniso_wrap, perlin_tc * g_PerlinOctave.z + g_PerlinMovement, 0).w; + + perlin = perlin_0 * g_PerlinAmplitude.x + perlin_1 * g_PerlinAmplitude.y + perlin_2 * g_PerlinAmplitude.z; + } + + // Displacement map + float3 displacement = 0; + if (blend_factor > 0) + displacement = g_texDisplacement.SampleLevel(sampler_point_wrap, uv_local, 0).xyz; + displacement = lerp(float3(0, 0, perlin), displacement, blend_factor); + pos_local.xyz += displacement; + + // Transform + Output.Position = mul(pos_local, g_matWorldViewProj); + Output.LocalPos = pos_local.xyz; + + // Pass thru texture coordinate + Output.TexCoord = uv_local; + + return Output; +} diff --git a/WickedEngine/oceanUpdateDisplacementMapCS.hlsl b/WickedEngine/oceanUpdateDisplacementMapCS.hlsl new file mode 100644 index 000000000..ff5605720 --- /dev/null +++ b/WickedEngine/oceanUpdateDisplacementMapCS.hlsl @@ -0,0 +1,19 @@ +#include "ShaderInterop_Ocean.h" + +STRUCTUREDBUFFER(g_InputDxyz, float2, TEXSLOT_ONDEMAND0); +RWTEXTURE2D(output, float4, 0); + +[numthreads(OCEAN_COMPUTE_TILESIZE, OCEAN_COMPUTE_TILESIZE, 1)] +void main(uint3 DTid : SV_DispatchThreadID) +{ + uint addr = g_OutWidth * DTid.y + DTid.x; + + // cos(pi * (m1 + m2)) + int sign_correction = ((DTid.x + DTid.y) & 1) ? -1 : 1; + + float dx = g_InputDxyz[addr + g_DtxAddressOffset].x * sign_correction * g_ChoppyScale; + float dy = g_InputDxyz[addr + g_DtyAddressOffset].x * sign_correction * g_ChoppyScale; + float dz = g_InputDxyz[addr].x * sign_correction; + + output[DTid.xy] = float4(dx, dy, dz, 1); +} diff --git a/WickedEngine/oceanUpdateGradientFoldingCS.hlsl b/WickedEngine/oceanUpdateGradientFoldingCS.hlsl new file mode 100644 index 000000000..c0d9b8edf --- /dev/null +++ b/WickedEngine/oceanUpdateGradientFoldingCS.hlsl @@ -0,0 +1,39 @@ +#include "globals.hlsli" +#include "ShaderInterop_Ocean.h" + +#define xDisplacementMap texture_0 +RWTEXTURE2D(output, float4, 0); + +[numthreads(OCEAN_COMPUTE_TILESIZE, OCEAN_COMPUTE_TILESIZE, 1)] +void main( uint3 DTid : SV_DispatchThreadID ) +{ + // Sample neighbour texels + float2 one_texel = float2(1.0f / (float)g_OutWidth, 1.0f / (float)g_OutHeight); + + float2 uv = (float2)DTid.xy / float2(g_OutWidth, g_OutHeight); + + float2 tc_left = float2(uv.x - one_texel.x, uv.y); + float2 tc_right = float2(uv.x + one_texel.x, uv.y); + float2 tc_back = float2(uv.x, uv.y - one_texel.y); + float2 tc_front = float2(uv.x, uv.y + one_texel.y); + + float3 displace_left = xDisplacementMap.SampleLevel(sampler_linear_clamp, tc_left, 0).xyz; + float3 displace_right = xDisplacementMap.SampleLevel(sampler_linear_clamp, tc_right, 0).xyz; + float3 displace_back = xDisplacementMap.SampleLevel(sampler_linear_clamp, tc_back, 0).xyz; + float3 displace_front = xDisplacementMap.SampleLevel(sampler_linear_clamp, tc_front, 0).xyz; + + // Do not store the actual normal value. Using gradient instead, which preserves two differential values. + float2 gradient = { -(displace_right.z - displace_left.z), -(displace_front.z - displace_back.z) }; + + + // Calculate Jacobian corelation from the partial differential of height field + float2 Dx = (displace_right.xy - displace_left.xy) * g_ChoppyScale * g_GridLen; + float2 Dy = (displace_front.xy - displace_back.xy) * g_ChoppyScale * g_GridLen; + float J = (1.0f + Dx.x) * (1.0f + Dy.y) - Dx.y * Dy.x; + + // Practical subsurface scale calculation: max[0, (1 - J) + Amplitude * (2 * Coverage - 1)]. + float fold = max(1.0f - J, 0); + + // Output + output[DTid.xy] = float4(gradient, 0, fold); +} \ No newline at end of file diff --git a/WickedEngine/wiFFTGenerator.cpp b/WickedEngine/wiFFTGenerator.cpp new file mode 100644 index 000000000..805135af2 --- /dev/null +++ b/WickedEngine/wiFFTGenerator.cpp @@ -0,0 +1,228 @@ +#include "wiFFTGenerator.h" +#include "wiResourceManager.h" +#include "wiRenderer.h" +#include "ShaderInterop_FFTGenerator.h" + +#include +#include +#include + +using namespace wiGraphicsTypes; + +ComputeShader* CSFFT_512x512_Data_t::pRadix008A_CS = nullptr; +ComputeShader* CSFFT_512x512_Data_t::pRadix008A_CS2 = nullptr; + +void radix008A(CSFFT512x512_Plan* fft_plan, + GPUUnorderedResource* pUAV_Dst, + GPUResource* pSRV_Src, + UINT thread_count, + UINT istride, + GRAPHICSTHREAD threadID) +{ + // Setup execution configuration + UINT grid = thread_count / COHERENCY_GRANULARITY; + + GraphicsDevice* device = wiRenderer::GetDevice(); + + // Buffers + GPUResource* cs_srvs[1] = { pSRV_Src }; + device->BindResourcesCS(cs_srvs, TEXSLOT_ONDEMAND0, 1, threadID); + + GPUUnorderedResource* cs_uavs[1] = { pUAV_Dst }; + device->BindUnorderedAccessResourcesCS(cs_uavs, 0, 1, threadID); + + // Shader + if (istride > 1) + device->BindCS(fft_plan->pRadix008A_CS, threadID); + else + device->BindCS(fft_plan->pRadix008A_CS2, threadID); + + // Execute + device->Dispatch(grid, 1, 1, threadID); + + // Unbind resource + device->UnBindResources(TEXSLOT_ONDEMAND0, 1, threadID); + device->UnBindUnorderedAccessResources(0, 1, threadID); +} + +void fft_512x512_c2c(CSFFT512x512_Plan* fft_plan, + GPUUnorderedResource* pUAV_Dst, + GPUResource* pSRV_Dst, + GPUResource* pSRV_Src, + GRAPHICSTHREAD threadID) +{ + const UINT thread_count = fft_plan->slices * (512 * 512) / 8; + GPUUnorderedResource* pUAV_Tmp = fft_plan->pUAV_Tmp; + GPUResource* pSRV_Tmp = fft_plan->pSRV_Tmp; + GraphicsDevice* device = wiRenderer::GetDevice(); + GPUBuffer* cs_cbs; + + UINT istride = 512 * 512 / 8; + cs_cbs = fft_plan->pRadix008A_CB[0]; + device->BindConstantBufferCS(&cs_cbs[0], CB_GETBINDSLOT(FFTGeneratorCB), threadID); + radix008A(fft_plan, pUAV_Tmp, pSRV_Src, thread_count, istride, threadID); + + istride /= 8; + cs_cbs = fft_plan->pRadix008A_CB[1]; + device->BindConstantBufferCS(&cs_cbs[0], CB_GETBINDSLOT(FFTGeneratorCB), threadID); + radix008A(fft_plan, pUAV_Dst, pSRV_Tmp, thread_count, istride, threadID); + + istride /= 8; + cs_cbs = fft_plan->pRadix008A_CB[2]; + device->BindConstantBufferCS(&cs_cbs[0], CB_GETBINDSLOT(FFTGeneratorCB), threadID); + radix008A(fft_plan, pUAV_Tmp, pSRV_Dst, thread_count, istride, threadID); + + istride /= 8; + cs_cbs = fft_plan->pRadix008A_CB[3]; + device->BindConstantBufferCS(&cs_cbs[0], CB_GETBINDSLOT(FFTGeneratorCB), threadID); + radix008A(fft_plan, pUAV_Dst, pSRV_Tmp, thread_count, istride, threadID); + + istride /= 8; + cs_cbs = fft_plan->pRadix008A_CB[4]; + device->BindConstantBufferCS(&cs_cbs[0], CB_GETBINDSLOT(FFTGeneratorCB), threadID); + radix008A(fft_plan, pUAV_Tmp, pSRV_Dst, thread_count, istride, threadID); + + istride /= 8; + cs_cbs = fft_plan->pRadix008A_CB[5]; + device->BindConstantBufferCS(&cs_cbs[0], CB_GETBINDSLOT(FFTGeneratorCB), threadID); + radix008A(fft_plan, pUAV_Dst, pSRV_Tmp, thread_count, istride, threadID); +} + +void create_cbuffers_512x512(CSFFT512x512_Plan* plan, GraphicsDevice* device, UINT slices) +{ + // Create 6 cbuffers for 512x512 transform. + + GPUBufferDesc cb_desc; + cb_desc.Usage = USAGE_IMMUTABLE; + cb_desc.BindFlags = BIND_CONSTANT_BUFFER; + cb_desc.CPUAccessFlags = 0; + cb_desc.MiscFlags = 0; + cb_desc.ByteWidth = sizeof(FFTGeneratorCB); + cb_desc.StructureByteStride = 0; + + SubresourceData cb_data; + cb_data.SysMemPitch = 0; + cb_data.SysMemSlicePitch = 0; + + //struct CB_Structure + //{ + // UINT thread_count; + // UINT ostride; + // UINT istride; + // UINT pstride; + // float phase_base; + //}; + + for (int i = 0; i < ARRAYSIZE(plan->pRadix008A_CB); ++i) + { + plan->pRadix008A_CB[i] = new GPUBuffer; + } + + // Buffer 0 + const UINT thread_count = slices * (512 * 512) / 8; + UINT ostride = 512 * 512 / 8; + UINT istride = ostride; + double phase_base = -TWO_PI / (512.0 * 512.0); + + FFTGeneratorCB cb_data_buf0 = { thread_count, ostride, istride, 512, (float)phase_base }; + cb_data.pSysMem = &cb_data_buf0; + + device->CreateBuffer(&cb_desc, &cb_data, plan->pRadix008A_CB[0]); + assert(plan->pRadix008A_CB[0]); + + // Buffer 1 + istride /= 8; + phase_base *= 8.0; + + FFTGeneratorCB cb_data_buf1 = { thread_count, ostride, istride, 512, (float)phase_base }; + cb_data.pSysMem = &cb_data_buf1; + + device->CreateBuffer(&cb_desc, &cb_data, plan->pRadix008A_CB[1]); + assert(plan->pRadix008A_CB[1]); + + // Buffer 2 + istride /= 8; + phase_base *= 8.0; + + FFTGeneratorCB cb_data_buf2 = { thread_count, ostride, istride, 512, (float)phase_base }; + cb_data.pSysMem = &cb_data_buf2; + + device->CreateBuffer(&cb_desc, &cb_data, plan->pRadix008A_CB[2]); + assert(plan->pRadix008A_CB[2]); + + // Buffer 3 + istride /= 8; + phase_base *= 8.0; + ostride /= 512; + + FFTGeneratorCB cb_data_buf3 = { thread_count, ostride, istride, 1, (float)phase_base }; + cb_data.pSysMem = &cb_data_buf3; + + device->CreateBuffer(&cb_desc, &cb_data, plan->pRadix008A_CB[3]); + assert(plan->pRadix008A_CB[3]); + + // Buffer 4 + istride /= 8; + phase_base *= 8.0; + + FFTGeneratorCB cb_data_buf4 = { thread_count, ostride, istride, 1, (float)phase_base }; + cb_data.pSysMem = &cb_data_buf4; + + device->CreateBuffer(&cb_desc, &cb_data, plan->pRadix008A_CB[4]); + assert(plan->pRadix008A_CB[4]); + + // Buffer 5 + istride /= 8; + phase_base *= 8.0; + + FFTGeneratorCB cb_data_buf5 = { thread_count, ostride, istride, 1, (float)phase_base }; + cb_data.pSysMem = &cb_data_buf5; + + device->CreateBuffer(&cb_desc, &cb_data, plan->pRadix008A_CB[5]); + assert(plan->pRadix008A_CB[5]); +} + +void fft512x512_create_plan(CSFFT512x512_Plan* plan, UINT slices) +{ + GraphicsDevice* device = wiRenderer::GetDevice(); + + plan->slices = slices; + + + // Constants + // Create 6 cbuffers for 512x512 transform + create_cbuffers_512x512(plan, device, slices); + + // Temp buffer + GPUBufferDesc buf_desc; + buf_desc.ByteWidth = sizeof(float) * 2 * (512 * slices) * 512; + buf_desc.Usage = USAGE_DEFAULT; + buf_desc.BindFlags = BIND_UNORDERED_ACCESS | BIND_SHADER_RESOURCE; + buf_desc.CPUAccessFlags = 0; + buf_desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED; + buf_desc.StructureByteStride = sizeof(float) * 2; + + plan->pBuffer_Tmp = new GPUBuffer; + device->CreateBuffer(&buf_desc, nullptr, plan->pBuffer_Tmp); + + plan->pSRV_Tmp = (GPUResource*)plan->pBuffer_Tmp; + plan->pUAV_Tmp = (GPUUnorderedResource*)plan->pBuffer_Tmp; +} + +void fft512x512_destroy_plan(CSFFT512x512_Plan* plan) +{ + SAFE_DELETE(plan->pBuffer_Tmp); + + for (int i = 0; i < 6; i++) + SAFE_DELETE(plan->pRadix008A_CB[i]); +} + + + +void CSFFT_512x512_Data_t::LoadShaders() +{ + + pRadix008A_CS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "fft_512x512_c2c_CS.cso", wiResourceManager::COMPUTESHADER)); + pRadix008A_CS2 = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "fft_512x512_c2c_v2_CS.cso", wiResourceManager::COMPUTESHADER)); + +} diff --git a/WickedEngine/wiFFTGenerator.h b/WickedEngine/wiFFTGenerator.h new file mode 100644 index 000000000..651eef567 --- /dev/null +++ b/WickedEngine/wiFFTGenerator.h @@ -0,0 +1,57 @@ +#ifndef _FFT_GENERATOR_H_ +#define _FFT_GENERATOR_H_ + +#include "CommonInclude.h" +#include "wiGraphicsAPI.h" + + +//Memory access coherency (in threads) +#define COHERENCY_GRANULARITY 128 + + +/////////////////////////////////////////////////////////////////////////////// +// Common types +/////////////////////////////////////////////////////////////////////////////// + +typedef struct CSFFT_512x512_Data_t +{ + static wiGraphicsTypes::ComputeShader* pRadix008A_CS; + static wiGraphicsTypes::ComputeShader* pRadix008A_CS2; + + // More than one array can be transformed at same time + UINT slices; + + // For 512x512 config, we need 6 constant buffers + wiGraphicsTypes::GPUBuffer* pRadix008A_CB[6]; + + // Temporary buffers + wiGraphicsTypes::GPUBuffer* pBuffer_Tmp; + wiGraphicsTypes::GPUUnorderedResource* pUAV_Tmp; + wiGraphicsTypes::GPUResource* pSRV_Tmp; + + static void LoadShaders(); +} CSFFT512x512_Plan; + +//////////////////////////////////////////////////////////////////////////////// +// Common constants +//////////////////////////////////////////////////////////////////////////////// +#define TWO_PI 6.283185307179586476925286766559 + +#define FFT_DIMENSIONS 3U +#define FFT_PLAN_SIZE_LIMIT (1U << 27) + +#define FFT_FORWARD -1 +#define FFT_INVERSE 1 + + +void fft512x512_create_plan(CSFFT512x512_Plan* plan, UINT slices); +void fft512x512_destroy_plan(CSFFT512x512_Plan* plan); + +void fft_512x512_c2c(CSFFT512x512_Plan* fft_plan, + wiGraphicsTypes::GPUUnorderedResource* pUAV_Dst, + wiGraphicsTypes::GPUResource* pSRV_Dst, + wiGraphicsTypes::GPUResource* pSRV_Src, + GRAPHICSTHREAD threadID); + + +#endif // _FFT_GENERATOR_H_ diff --git a/WickedEngine/wiInitializer.cpp b/WickedEngine/wiInitializer.cpp index 217b72027..f4d697b15 100644 --- a/WickedEngine/wiInitializer.cpp +++ b/WickedEngine/wiInitializer.cpp @@ -7,6 +7,7 @@ #include "wiBackLog.h" #include "wiCpuInfo.h" #include "wiSound.h" +#include "wiOcean.h" #include "wiHelper.h" using namespace std; @@ -28,6 +29,8 @@ namespace wiInitializer wiFont::Initialize(); wiFont::SetUpStaticComponents(); + wiOcean::SetUpStatic(); + if (FAILED(wiSoundEffect::Initialize()) || FAILED(wiMusic::Initialize())) { stringstream ss(""); diff --git a/WickedEngine/wiLoader.h b/WickedEngine/wiLoader.h index 8bacba557..adc388e5c 100644 --- a/WickedEngine/wiLoader.h +++ b/WickedEngine/wiLoader.h @@ -1078,49 +1078,49 @@ struct Camera:public Transform{ XMStoreFloat4x4(&this->InvProjection, InvP); } - XMVECTOR GetEye() + XMVECTOR GetEye() const { return XMLoadFloat3(&translation); } - XMVECTOR GetAt() + XMVECTOR GetAt() const { return XMLoadFloat3(&At); } - XMVECTOR GetUp() + XMVECTOR GetUp() const { return XMLoadFloat3(&Up); } - XMVECTOR GetRight() + XMVECTOR GetRight() const { return XMVector3Cross(GetAt(), GetUp()); } - XMMATRIX GetView() + XMMATRIX GetView() const { return XMLoadFloat4x4(&View); } - XMMATRIX GetInvView() + XMMATRIX GetInvView() const { return XMLoadFloat4x4(&InvView); } - XMMATRIX GetProjection() + XMMATRIX GetProjection() const { return XMLoadFloat4x4(&Projection); } - XMMATRIX GetInvProjection() + XMMATRIX GetInvProjection() const { return XMLoadFloat4x4(&InvProjection); } - XMMATRIX GetViewProjection() + XMMATRIX GetViewProjection() const { return XMLoadFloat4x4(&VP); } - XMMATRIX GetInvViewProjection() + XMMATRIX GetInvViewProjection() const { return XMLoadFloat4x4(&InvVP); } // when the projection matrix is modified for reverse zbuffering, this returns the normal projection - XMMATRIX GetRealProjection() + XMMATRIX GetRealProjection() const { return XMLoadFloat4x4(&realProjection); } diff --git a/WickedEngine/wiOcean.cpp b/WickedEngine/wiOcean.cpp new file mode 100644 index 000000000..18d893685 --- /dev/null +++ b/WickedEngine/wiOcean.cpp @@ -0,0 +1,1338 @@ +#include "wiOcean.h" +#include "wiRenderer.h" +#include "wiResourceManager.h" +#include "ShaderInterop_Ocean.h" + +using namespace wiGraphicsTypes; +using namespace std; + +ComputeShader* wiOcean::m_pUpdateSpectrumCS = nullptr; +ComputeShader* wiOcean::m_pUpdateDisplacementMapCS = nullptr; +ComputeShader* wiOcean::m_pUpdateGradientFoldingCS = nullptr; +VertexShader* wiOcean::g_pOceanSurfVS = nullptr; +PixelShader* wiOcean::g_pWireframePS = nullptr; +PixelShader* wiOcean::g_pOceanSurfPS = nullptr; + +VertexLayout* wiOcean::g_pMeshLayout = nullptr; +Texture1D* wiOcean::g_pFresnelMap = nullptr; +Texture2D* wiOcean::g_pPerlinMap = nullptr; +GPUBuffer* wiOcean::g_pPerCallCB = nullptr; +GPUBuffer* wiOcean::g_pShadingCB = nullptr; +RasterizerState* wiOcean::g_pRSState_Solid = nullptr; +RasterizerState* wiOcean::g_pRSState_Wireframe = nullptr; +DepthStencilState* wiOcean::g_pDSState_Disable = nullptr; +BlendState* wiOcean::g_pBState_Transparent = nullptr; + +CSFFT512x512_Plan wiOcean::m_fft_plan; + +// Disable warning "conditional expression is constant" +#pragma warning(disable:4127) + + +#define HALF_SQRT_2 0.7071068f +#define GRAV_ACCEL 981.0f // The acceleration of gravity, cm/s^2 + +// Generating gaussian random number with mean 0 and standard deviation 1. +float Gauss() +{ + float u1 = rand() / (float)RAND_MAX; + float u2 = rand() / (float)RAND_MAX; + if (u1 < 1e-6f) + u1 = 1e-6f; + return sqrtf(-2 * logf(u1)) * cosf(2 * XM_PI * u2); +} + +// Phillips Spectrum +// K: normalized wave vector, W: wind direction, v: wind velocity, a: amplitude constant +float Phillips(XMFLOAT2 K, XMFLOAT2 W, float v, float a, float dir_depend) +{ + // largest possible wave from constant wind of velocity v + float l = v * v / GRAV_ACCEL; + // damp out waves with very small length w << l + float w = l / 1000; + + float Ksqr = K.x * K.x + K.y * K.y; + float Kcos = K.x * W.x + K.y * W.y; + float phillips = a * expf(-1 / (l * l * Ksqr)) / (Ksqr * Ksqr * Ksqr) * (Kcos * Kcos); + + // filter out waves moving opposite to wind + if (Kcos < 0) + phillips *= dir_depend; + + // damp out waves with very small length w << l + return phillips * expf(-Ksqr * w * w); +} + +void createBufferAndUAV(void* data, UINT byte_width, UINT byte_stride, GPUBuffer** ppBuffer) +{ + *ppBuffer = new GPUBuffer; + + // Create buffer + GPUBufferDesc buf_desc; + buf_desc.ByteWidth = byte_width; + buf_desc.Usage = USAGE_DEFAULT; + buf_desc.BindFlags = BIND_UNORDERED_ACCESS | BIND_SHADER_RESOURCE; + buf_desc.CPUAccessFlags = 0; + buf_desc.MiscFlags = RESOURCE_MISC_BUFFER_STRUCTURED; + buf_desc.StructureByteStride = byte_stride; + + SubresourceData init_data; + init_data.pSysMem = data; + + wiRenderer::GetDevice()->CreateBuffer(&buf_desc, data != NULL ? &init_data : NULL, *ppBuffer); + + + //assert(*ppBuffer); + + //// Create undordered access view + //UNORDERED_ACCESS_VIEW_DESC uav_desc; + //uav_desc.Format = DXGI_FORMAT_UNKNOWN; + //uav_desc.ViewDimension = UAV_DIMENSION_BUFFER; + //uav_desc.Buffer.FirstElement = 0; + //uav_desc.Buffer.NumElements = byte_width / byte_stride; + //uav_desc.Buffer.Flags = 0; + + //device->CreateUnorderedAccessView(*ppBuffer, &uav_desc, ppUAV); + //assert(*ppUAV); + + //// Create shader resource view + //SHADER_RESOURCE_VIEW_DESC srv_desc; + //srv_desc.Format = DXGI_FORMAT_UNKNOWN; + //srv_desc.ViewDimension = SRV_DIMENSION_BUFFER; + //srv_desc.Buffer.FirstElement = 0; + //srv_desc.Buffer.NumElements = byte_width / byte_stride; + + //device->CreateShaderResourceView(*ppBuffer, &srv_desc, ppSRV); + //assert(*ppSRV); +} + +void createTextureAndViews(UINT width, UINT height, FORMAT format, Texture2D** ppTex) +{ + // Create 2D texture + Texture2DDesc tex_desc; + tex_desc.Width = width; + tex_desc.Height = height; + tex_desc.MipLevels = 0; + tex_desc.ArraySize = 1; + tex_desc.Format = format; + tex_desc.SampleDesc.Count = 1; + tex_desc.SampleDesc.Quality = 0; + tex_desc.Usage = USAGE_DEFAULT; + tex_desc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS | BIND_RENDER_TARGET; + tex_desc.CPUAccessFlags = 0; + tex_desc.MiscFlags = RESOURCE_MISC_GENERATE_MIPS; + + *ppTex = new Texture2D; + wiRenderer::GetDevice()->CreateTexture2D(&tex_desc, NULL, ppTex); + + + //assert(*ppTex); + + //// Create shader resource view + //(*ppTex)->GetDesc(&tex_desc); + //if (ppSRV) + //{ + // SHADER_RESOURCE_VIEW_DESC srv_desc; + // srv_desc.Format = format; + // srv_desc.ViewDimension = SRV_DIMENSION_TEXTURE2D; + // srv_desc.Texture2D.MipLevels = tex_desc.MipLevels; + // srv_desc.Texture2D.MostDetailedMip = 0; + + // device->CreateShaderResourceView(*ppTex, &srv_desc, ppSRV); + // assert(*ppSRV); + //} + + //// Create render target view + //if (ppRTV) + //{ + // RENDER_TARGET_VIEW_DESC rtv_desc; + // rtv_desc.Format = format; + // rtv_desc.ViewDimension = RTV_DIMENSION_TEXTURE2D; + // rtv_desc.Texture2D.MipSlice = 0; + + // device->CreateRenderTargetView(*ppTex, &rtv_desc, ppRTV); + // assert(*ppRTV); + //} +} + + + +wiOcean::wiOcean(const wiOceanParameter& params) +{ + m_param = params; + + // Height map H(0) + int height_map_size = (params.dmap_dim + 4) * (params.dmap_dim + 1); + XMFLOAT2* h0_data = new XMFLOAT2[height_map_size * sizeof(XMFLOAT2)]; + float* omega_data = new float[height_map_size * sizeof(float)]; + initHeightMap(h0_data, omega_data); + + int hmap_dim = params.dmap_dim; + int input_full_size = (hmap_dim + 4) * (hmap_dim + 1); + // This value should be (hmap_dim / 2 + 1) * hmap_dim, but we use full sized buffer here for simplicity. + int input_half_size = hmap_dim * hmap_dim; + int output_size = hmap_dim * hmap_dim; + + // For filling the buffer with zeroes. + char* zero_data = new char[3 * output_size * sizeof(float) * 2]; + memset(zero_data, 0, 3 * output_size * sizeof(float) * 2); + + // RW buffer allocations + // H0 + UINT float2_stride = 2 * sizeof(float); + createBufferAndUAV(h0_data, input_full_size * float2_stride, float2_stride, &m_pBuffer_Float2_H0); + + // Notice: The following 3 buffers should be half sized buffer because of conjugate symmetric input. But + // we use full sized buffers due to the CS4.0 restriction. + + // Put H(t), Dx(t) and Dy(t) into one buffer because CS4.0 allows only 1 UAV at a time + createBufferAndUAV(zero_data, 3 * input_half_size * float2_stride, float2_stride, &m_pBuffer_Float2_Ht); + + // omega + createBufferAndUAV(omega_data, input_full_size * sizeof(float), sizeof(float), &m_pBuffer_Float_Omega); + + // Notice: The following 3 should be real number data. But here we use the complex numbers and C2C FFT + // due to the CS4.0 restriction. + // Put Dz, Dx and Dy into one buffer because CS4.0 allows only 1 UAV at a time + createBufferAndUAV(zero_data, 3 * output_size * float2_stride, float2_stride, &m_pBuffer_Float_Dxyz); + + SAFE_DELETE_ARRAY(zero_data); + SAFE_DELETE_ARRAY(h0_data); + SAFE_DELETE_ARRAY(omega_data); + + + createTextureAndViews(hmap_dim, hmap_dim, FORMAT_R32G32B32A32_FLOAT, &m_pDisplacementMap); + createTextureAndViews(hmap_dim, hmap_dim, FORMAT_R16G16B16A16_FLOAT, &m_pGradientMap); + + + // Constant buffers + UINT actual_dim = m_param.dmap_dim; + UINT input_width = actual_dim + 4; + // We use full sized data here. The value "output_width" should be actual_dim/2+1 though. + UINT output_width = actual_dim; + UINT output_height = actual_dim; + UINT dtx_offset = actual_dim * actual_dim; + UINT dty_offset = actual_dim * actual_dim * 2; + Ocean_Simulation_ImmutableCB immutable_consts = { actual_dim, input_width, output_width, output_height, dtx_offset, dty_offset }; + SubresourceData init_cb0; + init_cb0.pSysMem = &immutable_consts; + + GPUBufferDesc cb_desc; + cb_desc.Usage = USAGE_IMMUTABLE; + cb_desc.BindFlags = BIND_CONSTANT_BUFFER; + cb_desc.CPUAccessFlags = 0; + cb_desc.MiscFlags = 0; + cb_desc.ByteWidth = sizeof(Ocean_Simulation_ImmutableCB); + m_pImmutableCB = new GPUBuffer; + wiRenderer::GetDevice()->CreateBuffer(&cb_desc, &init_cb0, m_pImmutableCB); + + cb_desc.Usage = USAGE_DYNAMIC; + cb_desc.BindFlags = BIND_CONSTANT_BUFFER; + cb_desc.CPUAccessFlags = CPU_ACCESS_WRITE; + cb_desc.MiscFlags = 0; + cb_desc.ByteWidth = sizeof(Ocean_Simulation_PerFrameCB); + m_pPerFrameCB = new GPUBuffer; + wiRenderer::GetDevice()->CreateBuffer(&cb_desc, nullptr, m_pPerFrameCB); + + + initRenderResource(); +} + +wiOcean::~wiOcean() +{ + + SAFE_DELETE(m_pBuffer_Float2_H0); + SAFE_DELETE(m_pBuffer_Float_Omega); + SAFE_DELETE(m_pBuffer_Float2_Ht); + SAFE_DELETE(m_pBuffer_Float_Dxyz); + + SAFE_DELETE(m_pDisplacementMap); + SAFE_DELETE(m_pGradientMap); + + + SAFE_DELETE(m_pImmutableCB); + SAFE_DELETE(m_pPerFrameCB); + + + cleanupRenderResource(); +} + + + + +// Simulation functions: + + + +// Initialize the vector field. +// wlen_x: width of wave tile, in meters +// wlen_y: length of wave tile, in meters +void wiOcean::initHeightMap(XMFLOAT2* out_h0, float* out_omega) +{ + int i, j; + XMFLOAT2 K; + + XMFLOAT2 wind_dir; + XMStoreFloat2(&wind_dir, XMVector2Normalize(XMLoadFloat2(&m_param.wind_dir))); + float a = m_param.wave_amplitude * 1e-7f; // It is too small. We must scale it for editing. + float v = m_param.wind_speed; + float dir_depend = m_param.wind_dependency; + + int height_map_dim = m_param.dmap_dim; + float patch_length = m_param.patch_length; + + // initialize random generator. + srand(0); + + for (i = 0; i <= height_map_dim; i++) + { + // K is wave-vector, range [-|DX/W, |DX/W], [-|DY/H, |DY/H] + K.y = (-height_map_dim / 2.0f + i) * (2 * XM_PI / patch_length); + + for (j = 0; j <= height_map_dim; j++) + { + K.x = (-height_map_dim / 2.0f + j) * (2 * XM_PI / patch_length); + + float phil = (K.x == 0 && K.y == 0) ? 0 : sqrtf(Phillips(K, wind_dir, v, a, dir_depend)); + + out_h0[i * (height_map_dim + 4) + j].x = float(phil * Gauss() * HALF_SQRT_2); + out_h0[i * (height_map_dim + 4) + j].y = float(phil * Gauss() * HALF_SQRT_2); + + // The angular frequency is following the dispersion relation: + // out_omega^2 = g*k + // The equation of Gerstner wave: + // x = x0 - K/k * A * sin(dot(K, x0) - sqrt(g * k) * t), x is a 2D vector. + // z = A * cos(dot(K, x0) - sqrt(g * k) * t) + // Gerstner wave shows that a point on a simple sinusoid wave is doing a uniform circular + // motion with the center (x0, y0, z0), radius A, and the circular plane is parallel to + // vector K. + out_omega[i * (height_map_dim + 4) + j] = sqrtf(GRAV_ACCEL * sqrtf(K.x * K.x + K.y * K.y)); + } + } +} + +void wiOcean::UpdateDisplacementMap(float time, GRAPHICSTHREAD threadID) +{ + GraphicsDevice* device = wiRenderer::GetDevice(); + + device->EventBegin("OceanSimulator", threadID); + + // ---------------------------- H(0) -> H(t), D(x, t), D(y, t) -------------------------------- + device->BindCS(m_pUpdateSpectrumCS, threadID); + + // Buffers + GPUResource* cs0_srvs[2] = { + m_pBuffer_Float2_H0, + m_pBuffer_Float_Omega + }; + device->BindResourcesCS(cs0_srvs, TEXSLOT_ONDEMAND0, 2, threadID); + + GPUUnorderedResource* cs0_uavs[1] = { m_pBuffer_Float2_Ht }; + device->BindUnorderedAccessResourcesCS(cs0_uavs, 0, 1, threadID); + + Ocean_Simulation_PerFrameCB perFrameData; + perFrameData.g_Time = time * m_param.time_scale; + perFrameData.g_ChoppyScale = m_param.choppy_scale; + perFrameData.g_GridLen = m_param.dmap_dim / m_param.patch_length; + device->UpdateBuffer(m_pPerFrameCB, &perFrameData, threadID); + + device->BindConstantBufferCS(m_pImmutableCB, CB_GETBINDSLOT(Ocean_Simulation_ImmutableCB), threadID); + device->BindConstantBufferCS(m_pPerFrameCB, CB_GETBINDSLOT(Ocean_Simulation_PerFrameCB), threadID); + + // Run the CS + UINT group_count_x = (m_param.dmap_dim + OCEAN_COMPUTE_TILESIZE - 1) / OCEAN_COMPUTE_TILESIZE; + UINT group_count_y = (m_param.dmap_dim + OCEAN_COMPUTE_TILESIZE - 1) / OCEAN_COMPUTE_TILESIZE; + device->Dispatch(group_count_x, group_count_y, 1, threadID); + + device->UnBindUnorderedAccessResources(0, 1, threadID); + device->UnBindResources(TEXSLOT_ONDEMAND0, 2, threadID); + + + // ------------------------------------ Perform FFT ------------------------------------------- + fft_512x512_c2c(&m_fft_plan, m_pBuffer_Float_Dxyz, m_pBuffer_Float_Dxyz, m_pBuffer_Float2_Ht, threadID); + + + + device->BindConstantBufferCS(m_pImmutableCB, CB_GETBINDSLOT(Ocean_Simulation_ImmutableCB), threadID); + device->BindConstantBufferCS(m_pPerFrameCB, CB_GETBINDSLOT(Ocean_Simulation_PerFrameCB), threadID); + + + // Update displacement map: + device->BindCS(m_pUpdateDisplacementMapCS, threadID); + GPUUnorderedResource* cs_uavs[] = { m_pDisplacementMap }; + device->BindUnorderedAccessResourcesCS(cs_uavs, 0, 1, threadID); + GPUResource* cs_srvs[1] = { m_pBuffer_Float_Dxyz }; + device->BindResourcesCS(cs_srvs, TEXSLOT_ONDEMAND0, 1, threadID); + device->Dispatch(m_param.dmap_dim / OCEAN_COMPUTE_TILESIZE, m_param.dmap_dim / OCEAN_COMPUTE_TILESIZE, 1, threadID); + + + // Update gradient map: + device->BindCS(m_pUpdateGradientFoldingCS, threadID); + cs_uavs[0] = { m_pGradientMap }; + device->BindUnorderedAccessResourcesCS(cs_uavs, 0, 1, threadID); + cs_srvs[0] = m_pDisplacementMap; + device->BindResourcesCS(cs_srvs, TEXSLOT_ONDEMAND0, 1, threadID); + device->Dispatch(m_param.dmap_dim / OCEAN_COMPUTE_TILESIZE, m_param.dmap_dim / OCEAN_COMPUTE_TILESIZE, 1, threadID); + + // Unbind + device->UnBindUnorderedAccessResources(0, 1, threadID); + device->UnBindResources(TEXSLOT_ONDEMAND0, 1, threadID); + device->BindCS(nullptr, threadID); + + + device->GenerateMips(m_pGradientMap, threadID); + + + device->EventEnd(threadID); +} + +Texture2D* wiOcean::getDisplacementMap() +{ + return m_pDisplacementMap; +} + +Texture2D* wiOcean::getGradientMap() +{ + return m_pGradientMap; +} + + +const wiOceanParameter& wiOcean::getParameters() +{ + return m_param; +} + + + + + + +// Rendering functions: + + +void wiOcean::initRenderResource() +{ + GraphicsDevice* device = wiRenderer::GetDevice(); + + g_WindDir = m_param.wind_dir; + + createSurfaceMesh(); +} + +void wiOcean::cleanupRenderResource() +{ + SAFE_DELETE(g_pMeshIB); + SAFE_DELETE(g_pMeshVB); + + g_render_list.clear(); +} + +#define MESH_INDEX_2D(x, y) (((y) + vert_rect.bottom) * (g_MeshDim + 1) + (x) + vert_rect.left) + +// Generate boundary mesh for a patch. Return the number of generated indices +int wiOcean::generateBoundaryMesh(int left_degree, int right_degree, int bottom_degree, int top_degree, + RECT vert_rect, DWORD* output) +{ + // Triangle list for bottom boundary + int i, j; + int counter = 0; + int width = vert_rect.right - vert_rect.left; + + if (bottom_degree > 0) + { + int b_step = width / bottom_degree; + + for (i = 0; i < width; i += b_step) + { + output[counter++] = MESH_INDEX_2D(i, 0); + output[counter++] = MESH_INDEX_2D(i + b_step / 2, 1); + output[counter++] = MESH_INDEX_2D(i + b_step, 0); + + for (j = 0; j < b_step / 2; j++) + { + if (i == 0 && j == 0 && left_degree > 0) + continue; + + output[counter++] = MESH_INDEX_2D(i, 0); + output[counter++] = MESH_INDEX_2D(i + j, 1); + output[counter++] = MESH_INDEX_2D(i + j + 1, 1); + } + + for (j = b_step / 2; j < b_step; j++) + { + if (i == width - b_step && j == b_step - 1 && right_degree > 0) + continue; + + output[counter++] = MESH_INDEX_2D(i + b_step, 0); + output[counter++] = MESH_INDEX_2D(i + j, 1); + output[counter++] = MESH_INDEX_2D(i + j + 1, 1); + } + } + } + + // Right boundary + int height = vert_rect.top - vert_rect.bottom; + + if (right_degree > 0) + { + int r_step = height / right_degree; + + for (i = 0; i < height; i += r_step) + { + output[counter++] = MESH_INDEX_2D(width, i); + output[counter++] = MESH_INDEX_2D(width - 1, i + r_step / 2); + output[counter++] = MESH_INDEX_2D(width, i + r_step); + + for (j = 0; j < r_step / 2; j++) + { + if (i == 0 && j == 0 && bottom_degree > 0) + continue; + + output[counter++] = MESH_INDEX_2D(width, i); + output[counter++] = MESH_INDEX_2D(width - 1, i + j); + output[counter++] = MESH_INDEX_2D(width - 1, i + j + 1); + } + + for (j = r_step / 2; j < r_step; j++) + { + if (i == height - r_step && j == r_step - 1 && top_degree > 0) + continue; + + output[counter++] = MESH_INDEX_2D(width, i + r_step); + output[counter++] = MESH_INDEX_2D(width - 1, i + j); + output[counter++] = MESH_INDEX_2D(width - 1, i + j + 1); + } + } + } + + // Top boundary + if (top_degree > 0) + { + int t_step = width / top_degree; + + for (i = 0; i < width; i += t_step) + { + output[counter++] = MESH_INDEX_2D(i, height); + output[counter++] = MESH_INDEX_2D(i + t_step / 2, height - 1); + output[counter++] = MESH_INDEX_2D(i + t_step, height); + + for (j = 0; j < t_step / 2; j++) + { + if (i == 0 && j == 0 && left_degree > 0) + continue; + + output[counter++] = MESH_INDEX_2D(i, height); + output[counter++] = MESH_INDEX_2D(i + j, height - 1); + output[counter++] = MESH_INDEX_2D(i + j + 1, height - 1); + } + + for (j = t_step / 2; j < t_step; j++) + { + if (i == width - t_step && j == t_step - 1 && right_degree > 0) + continue; + + output[counter++] = MESH_INDEX_2D(i + t_step, height); + output[counter++] = MESH_INDEX_2D(i + j, height - 1); + output[counter++] = MESH_INDEX_2D(i + j + 1, height - 1); + } + } + } + + // Left boundary + if (left_degree > 0) + { + int l_step = height / left_degree; + + for (i = 0; i < height; i += l_step) + { + output[counter++] = MESH_INDEX_2D(0, i); + output[counter++] = MESH_INDEX_2D(1, i + l_step / 2); + output[counter++] = MESH_INDEX_2D(0, i + l_step); + + for (j = 0; j < l_step / 2; j++) + { + if (i == 0 && j == 0 && bottom_degree > 0) + continue; + + output[counter++] = MESH_INDEX_2D(0, i); + output[counter++] = MESH_INDEX_2D(1, i + j); + output[counter++] = MESH_INDEX_2D(1, i + j + 1); + } + + for (j = l_step / 2; j < l_step; j++) + { + if (i == height - l_step && j == l_step - 1 && top_degree > 0) + continue; + + output[counter++] = MESH_INDEX_2D(0, i + l_step); + output[counter++] = MESH_INDEX_2D(1, i + j); + output[counter++] = MESH_INDEX_2D(1, i + j + 1); + } + } + } + + return counter; +} + +// Generate boundary mesh for a patch. Return the number of generated indices +int wiOcean::generateInnerMesh(RECT vert_rect, DWORD* output) +{ + int i, j; + int counter = 0; + int width = vert_rect.right - vert_rect.left; + int height = vert_rect.top - vert_rect.bottom; + + bool reverse = false; + for (i = 0; i < height; i++) + { + if (reverse == false) + { + output[counter++] = MESH_INDEX_2D(0, i); + output[counter++] = MESH_INDEX_2D(0, i + 1); + for (j = 0; j < width; j++) + { + output[counter++] = MESH_INDEX_2D(j + 1, i); + output[counter++] = MESH_INDEX_2D(j + 1, i + 1); + } + } + else + { + output[counter++] = MESH_INDEX_2D(width, i); + output[counter++] = MESH_INDEX_2D(width, i + 1); + for (j = width - 1; j >= 0; j--) + { + output[counter++] = MESH_INDEX_2D(j, i); + output[counter++] = MESH_INDEX_2D(j, i + 1); + } + } + + reverse = !reverse; + } + + return counter; +} + +void wiOcean::createSurfaceMesh() +{ + GraphicsDevice* device = wiRenderer::GetDevice(); + + // --------------------------------- Vertex Buffer ------------------------------- + int num_verts = (g_MeshDim + 1) * (g_MeshDim + 1); + ocean_vertex* pV = new ocean_vertex[num_verts]; + assert(pV); + + int i, j; + for (i = 0; i <= g_MeshDim; i++) + { + for (j = 0; j <= g_MeshDim; j++) + { + pV[i * (g_MeshDim + 1) + j].index_x = (float)j; + pV[i * (g_MeshDim + 1) + j].index_y = (float)i; + } + } + + GPUBufferDesc vb_desc; + vb_desc.ByteWidth = num_verts * sizeof(ocean_vertex); + vb_desc.Usage = USAGE_IMMUTABLE; + vb_desc.BindFlags = BIND_VERTEX_BUFFER; + vb_desc.CPUAccessFlags = 0; + vb_desc.MiscFlags = 0; + vb_desc.StructureByteStride = sizeof(ocean_vertex); + + SubresourceData init_data; + init_data.pSysMem = pV; + init_data.SysMemPitch = 0; + init_data.SysMemSlicePitch = 0; + + g_pMeshVB = new GPUBuffer; + device->CreateBuffer(&vb_desc, &init_data, g_pMeshVB); + + SAFE_DELETE_ARRAY(pV); + + + // --------------------------------- Index Buffer ------------------------------- + // The index numbers for all mesh LODs (up to 256x256) + const int index_size_lookup[] = { 0, 0, 4284, 18828, 69444, 254412, 956916, 3689820, 14464836 }; + + memset(&g_mesh_patterns[0][0][0][0][0], 0, sizeof(g_mesh_patterns)); + + g_Lods = 0; + for (i = g_MeshDim; i > 1; i >>= 1) + g_Lods++; + + // Generate patch meshes. Each patch contains two parts: the inner mesh which is a regular + // grids in a triangle strip. The boundary mesh is constructed w.r.t. the edge degrees to + // meet water-tight requirement. + DWORD* index_array = new DWORD[index_size_lookup[g_Lods]]; + assert(index_array); + + int offset = 0; + int level_size = g_MeshDim; + + // Enumerate patterns + for (int level = 0; level <= g_Lods - 2; level++) + { + int left_degree = level_size; + + for (int left_type = 0; left_type < 3; left_type++) + { + int right_degree = level_size; + + for (int right_type = 0; right_type < 3; right_type++) + { + int bottom_degree = level_size; + + for (int bottom_type = 0; bottom_type < 3; bottom_type++) + { + int top_degree = level_size; + + for (int top_type = 0; top_type < 3; top_type++) + { + QuadRenderParam* pattern = &g_mesh_patterns[level][left_type][right_type][bottom_type][top_type]; + + // Inner mesh (triangle strip) + RECT inner_rect; + inner_rect.left = (left_degree == level_size) ? 0 : 1; + inner_rect.right = (right_degree == level_size) ? level_size : level_size - 1; + inner_rect.bottom = (bottom_degree == level_size) ? 0 : 1; + inner_rect.top = (top_degree == level_size) ? level_size : level_size - 1; + + int num_new_indices = generateInnerMesh(inner_rect, index_array + offset); + + pattern->inner_start_index = offset; + pattern->num_inner_verts = (level_size + 1) * (level_size + 1); + pattern->num_inner_faces = num_new_indices - 2; + offset += num_new_indices; + + // Boundary mesh (triangle list) + int l_degree = (left_degree == level_size) ? 0 : left_degree; + int r_degree = (right_degree == level_size) ? 0 : right_degree; + int b_degree = (bottom_degree == level_size) ? 0 : bottom_degree; + int t_degree = (top_degree == level_size) ? 0 : top_degree; + + RECT outer_rect = { 0, level_size, level_size, 0 }; + num_new_indices = generateBoundaryMesh(l_degree, r_degree, b_degree, t_degree, outer_rect, index_array + offset); + + pattern->boundary_start_index = offset; + pattern->num_boundary_verts = (level_size + 1) * (level_size + 1); + pattern->num_boundary_faces = num_new_indices / 3; + offset += num_new_indices; + + top_degree /= 2; + } + bottom_degree /= 2; + } + right_degree /= 2; + } + left_degree /= 2; + } + level_size /= 2; + } + + assert(offset == index_size_lookup[g_Lods]); + + GPUBufferDesc ib_desc; + ib_desc.ByteWidth = index_size_lookup[g_Lods] * sizeof(DWORD); + ib_desc.Usage = USAGE_IMMUTABLE; + ib_desc.BindFlags = BIND_INDEX_BUFFER; + ib_desc.CPUAccessFlags = 0; + ib_desc.MiscFlags = 0; + ib_desc.StructureByteStride = sizeof(DWORD); + + init_data.pSysMem = index_array; + + g_pMeshIB = new GPUBuffer; + device->CreateBuffer(&ib_desc, &init_data, g_pMeshIB); + + SAFE_DELETE_ARRAY(index_array); +} + +bool wiOcean::checkNodeVisibility(const QuadNode& quad_node, const Camera& camera) +{ + // Plane equation setup + + XMMATRIX matProj = camera.GetRealProjection(); + + // Left plane + float fov_x = atan(1.0f / XMVectorGetX(matProj.r[0])); + XMVECTOR plane_left = XMVectorSet(cos(fov_x), 0, sin(fov_x), 0); + // Right plane + XMVECTOR plane_right = XMVectorSet(-cos(fov_x), 0, sin(fov_x), 0); + + // Bottom plane + float fov_y = atan(1.0f / XMVectorGetY(matProj.r[1])); + XMVECTOR plane_bottom = XMVectorSet(0, cos(fov_y), sin(fov_y), 0); + // Top plane + XMVECTOR plane_top = XMVectorSet(0, -cos(fov_y), sin(fov_y), 0); + + // Test quad corners against view frustum in view space + XMVECTOR corner_verts[4]; + corner_verts[0] = XMVectorSet(quad_node.bottom_left.x, quad_node.bottom_left.y, 0, 1); + corner_verts[1] = corner_verts[0] + XMVectorSet(quad_node.length, 0, 0, 0); + corner_verts[2] = corner_verts[0] + XMVectorSet(quad_node.length, quad_node.length, 0, 0); + corner_verts[3] = corner_verts[0] + XMVectorSet(0, quad_node.length, 0, 0); + + XMMATRIX matView = XMMATRIX(1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) * camera.GetView(); + corner_verts[0] = XMVector4Transform(corner_verts[0], matView); + corner_verts[1] = XMVector4Transform(corner_verts[1], matView); + corner_verts[2] = XMVector4Transform(corner_verts[2], matView); + corner_verts[3] = XMVector4Transform(corner_verts[3], matView); + + // Test against eye plane + if (XMVectorGetZ(corner_verts[0]) < 0 && XMVectorGetZ(corner_verts[1]) < 0 && XMVectorGetZ(corner_verts[2]) < 0 && XMVectorGetZ(corner_verts[3]) < 0) + return false; + + // Test against left plane + float dist_0 = XMVectorGetX(XMVector4Dot(corner_verts[0], plane_left)); + float dist_1 = XMVectorGetX(XMVector4Dot(corner_verts[1], plane_left)); + float dist_2 = XMVectorGetX(XMVector4Dot(corner_verts[2], plane_left)); + float dist_3 = XMVectorGetX(XMVector4Dot(corner_verts[3], plane_left)); + if (dist_0 < 0 && dist_1 < 0 && dist_2 < 0 && dist_3 < 0) + return false; + + // Test against right plane + dist_0 = XMVectorGetX(XMVector4Dot(corner_verts[0], plane_right)); + dist_1 = XMVectorGetX(XMVector4Dot(corner_verts[1], plane_right)); + dist_2 = XMVectorGetX(XMVector4Dot(corner_verts[2], plane_right)); + dist_3 = XMVectorGetX(XMVector4Dot(corner_verts[3], plane_right)); + if (dist_0 < 0 && dist_1 < 0 && dist_2 < 0 && dist_3 < 0) + return false; + + // Test against bottom plane + dist_0 = XMVectorGetX(XMVector4Dot(corner_verts[0], plane_bottom)); + dist_1 = XMVectorGetX(XMVector4Dot(corner_verts[1], plane_bottom)); + dist_2 = XMVectorGetX(XMVector4Dot(corner_verts[2], plane_bottom)); + dist_3 = XMVectorGetX(XMVector4Dot(corner_verts[3], plane_bottom)); + if (dist_0 < 0 && dist_1 < 0 && dist_2 < 0 && dist_3 < 0) + return false; + + // Test against top plane + dist_0 = XMVectorGetX(XMVector4Dot(corner_verts[0], plane_top)); + dist_1 = XMVectorGetX(XMVector4Dot(corner_verts[1], plane_top)); + dist_2 = XMVectorGetX(XMVector4Dot(corner_verts[2], plane_top)); + dist_3 = XMVectorGetX(XMVector4Dot(corner_verts[3], plane_top)); + if (dist_0 < 0 && dist_1 < 0 && dist_2 < 0 && dist_3 < 0) + return false; + + return true; +} + +float wiOcean::estimateGridCoverage(const QuadNode& quad_node, const Camera& camera, float screen_area) +{ + // Estimate projected area + + // Test 16 points on the quad and find out the biggest one. + const static float sample_pos[16][2] = + { + { 0, 0 }, + { 0, 1 }, + { 1, 0 }, + { 1, 1 }, + { 0.5f, 0.333f }, + { 0.25f, 0.667f }, + { 0.75f, 0.111f }, + { 0.125f, 0.444f }, + { 0.625f, 0.778f }, + { 0.375f, 0.222f }, + { 0.875f, 0.556f }, + { 0.0625f, 0.889f }, + { 0.5625f, 0.037f }, + { 0.3125f, 0.37f }, + { 0.8125f, 0.704f }, + { 0.1875f, 0.148f }, + }; + + XMMATRIX matProj = camera.GetRealProjection(); + XMFLOAT3 eye = camera.translation; + XMVECTOR eye_point = XMVectorSet(eye.x, eye.z, eye.y, 0); + float grid_len_world = quad_node.length / g_MeshDim; + + float max_area_proj = 0; + for (int i = 0; i < 16; i++) + { + XMVECTOR test_point = XMVectorSet(quad_node.bottom_left.x + quad_node.length * sample_pos[i][0], quad_node.bottom_left.y + quad_node.length * sample_pos[i][1], 0, 0); + XMVECTOR eye_vec = test_point - eye_point; + float dist = XMVectorGetX(XMVector3Length(eye_vec)); + + float area_world = grid_len_world * grid_len_world;// * abs(eye_point.z) / sqrt(nearest_sqr_dist); + float area_proj = area_world * XMVectorGetX(matProj.r[0]) * XMVectorGetY(matProj.r[1]) / (dist * dist); + + if (max_area_proj < area_proj) + max_area_proj = area_proj; + } + + float pixel_coverage = max_area_proj * screen_area * 0.25f; + + return pixel_coverage; +} + +bool wiOcean::isLeaf(const QuadNode& quad_node) +{ + return (quad_node.sub_node[0] == -1 && quad_node.sub_node[1] == -1 && quad_node.sub_node[2] == -1 && quad_node.sub_node[3] == -1); +} + +int wiOcean::searchLeaf(const vector& node_list, const XMFLOAT2& point) +{ + int index = -1; + + int size = (int)node_list.size(); + QuadNode node = node_list[size - 1]; + + while (!isLeaf(node)) + { + bool found = false; + + for (int i = 0; i < 4; i++) + { + index = node.sub_node[i]; + if (index == -1) + continue; + + QuadNode sub_node = node_list[index]; + if (point.x >= sub_node.bottom_left.x && point.x <= sub_node.bottom_left.x + sub_node.length && + point.y >= sub_node.bottom_left.y && point.y <= sub_node.bottom_left.y + sub_node.length) + { + node = sub_node; + found = true; + break; + } + } + + if (!found) + return -1; + } + + return index; +} + +wiOcean::QuadRenderParam& wiOcean::selectMeshPattern(const QuadNode& quad_node) +{ + // Check 4 adjacent quad. + XMVECTOR bottom_left = XMLoadFloat2(&quad_node.bottom_left); + XMVECTOR tmp; + + XMFLOAT2 point_left; + tmp = bottom_left + XMVectorSet(-m_param.patch_length * 0.5f, quad_node.length * 0.5f, 0, 0); + XMStoreFloat2(&point_left, tmp); + int left_adj_index = searchLeaf(g_render_list, point_left); + + XMFLOAT2 point_right; + tmp = bottom_left + XMVectorSet(quad_node.length + m_param.patch_length * 0.5f, quad_node.length * 0.5f, 0, 0); + XMStoreFloat2(&point_right, tmp); + int right_adj_index = searchLeaf(g_render_list, point_right); + + XMFLOAT2 point_bottom; + tmp = bottom_left + XMVectorSet(quad_node.length * 0.5f, -m_param.patch_length * 0.5f, 0, 0); + XMStoreFloat2(&point_right, tmp); + int bottom_adj_index = searchLeaf(g_render_list, point_bottom); + + XMFLOAT2 point_top; + tmp = bottom_left + XMVectorSet(quad_node.length * 0.5f, quad_node.length + m_param.patch_length * 0.5f, 0, 0); + XMStoreFloat2(&point_right, tmp); + int top_adj_index = searchLeaf(g_render_list, point_top); + + int left_type = 0; + if (left_adj_index != -1 && g_render_list[left_adj_index].length > quad_node.length * 0.999f) + { + QuadNode adj_node = g_render_list[left_adj_index]; + float scale = adj_node.length / quad_node.length * (g_MeshDim >> quad_node.lod) / (g_MeshDim >> adj_node.lod); + if (scale > 3.999f) + left_type = 2; + else if (scale > 1.999f) + left_type = 1; + } + + int right_type = 0; + if (right_adj_index != -1 && g_render_list[right_adj_index].length > quad_node.length * 0.999f) + { + QuadNode adj_node = g_render_list[right_adj_index]; + float scale = adj_node.length / quad_node.length * (g_MeshDim >> quad_node.lod) / (g_MeshDim >> adj_node.lod); + if (scale > 3.999f) + right_type = 2; + else if (scale > 1.999f) + right_type = 1; + } + + int bottom_type = 0; + if (bottom_adj_index != -1 && g_render_list[bottom_adj_index].length > quad_node.length * 0.999f) + { + QuadNode adj_node = g_render_list[bottom_adj_index]; + float scale = adj_node.length / quad_node.length * (g_MeshDim >> quad_node.lod) / (g_MeshDim >> adj_node.lod); + if (scale > 3.999f) + bottom_type = 2; + else if (scale > 1.999f) + bottom_type = 1; + } + + int top_type = 0; + if (top_adj_index != -1 && g_render_list[top_adj_index].length > quad_node.length * 0.999f) + { + QuadNode adj_node = g_render_list[top_adj_index]; + float scale = adj_node.length / quad_node.length * (g_MeshDim >> quad_node.lod) / (g_MeshDim >> adj_node.lod); + if (scale > 3.999f) + top_type = 2; + else if (scale > 1.999f) + top_type = 1; + } + + // Check lookup table, [L][R][B][T] + return g_mesh_patterns[quad_node.lod][left_type][right_type][bottom_type][top_type]; +} + +// Return value: if successful pushed into the list, return the position. If failed, return -1. +int wiOcean::buildNodeList(QuadNode& quad_node, const Camera& camera) +{ + // Check against view frustum + if (!checkNodeVisibility(quad_node, camera)) + return -1; + + // Estimate the min grid coverage + auto res = wiRenderer::GetInternalResolution(); + float min_coverage = estimateGridCoverage(quad_node, camera, (float)res.x*res.y); + + // Recursively attatch sub-nodes. + bool visible = true; + XMVECTOR bottom_left = XMLoadFloat2(&quad_node.bottom_left); + XMFLOAT2 tmp; + if (min_coverage > g_UpperGridCoverage && quad_node.length > m_param.patch_length) + { + // Recursive rendering for sub-quads. + QuadNode sub_node_0 = { quad_node.bottom_left, quad_node.length / 2, 0,{ -1, -1, -1, -1 } }; + quad_node.sub_node[0] = buildNodeList(sub_node_0, camera); + + XMStoreFloat2(&tmp, bottom_left + XMVectorSet(quad_node.length / 2, 0, 0, 0)); + QuadNode sub_node_1 = { tmp, quad_node.length / 2, 0,{ -1, -1, -1, -1 } }; + quad_node.sub_node[1] = buildNodeList(sub_node_1, camera); + + XMStoreFloat2(&tmp, bottom_left + XMVectorSet(quad_node.length / 2, quad_node.length / 2, 0, 0)); + QuadNode sub_node_2 = { tmp, quad_node.length / 2, 0,{ -1, -1, -1, -1 } }; + quad_node.sub_node[2] = buildNodeList(sub_node_2, camera); + + XMStoreFloat2(&tmp, bottom_left + XMVectorSet(0, quad_node.length / 2, 0, 0)); + QuadNode sub_node_3 = { tmp, quad_node.length / 2, 0,{ -1, -1, -1, -1 } }; + quad_node.sub_node[3] = buildNodeList(sub_node_3, camera); + + visible = !isLeaf(quad_node); + } + + if (visible) + { + // Estimate mesh LOD + int lod = 0; + for (lod = 0; lod < g_Lods - 1; lod++) + { + if (min_coverage > g_UpperGridCoverage) + break; + min_coverage *= 4; + } + + // We don't use 1x1 and 2x2 patch. So the highest level is g_Lods - 2. + quad_node.lod = min(lod, g_Lods - 2); + } + else + return -1; + + // Insert into the list + int position = (int)g_render_list.size(); + g_render_list.push_back(quad_node); + + return position; +} + +void wiOcean::Render(const Camera* camera, float time, GRAPHICSTHREAD threadID) +{ + GraphicsDevice* device = wiRenderer::GetDevice(); + bool wire = wiRenderer::IsWireRender(); + + // Build rendering list + g_render_list.clear(); + float ocean_extent = m_param.patch_length * (1 << g_FurthestCover); + QuadNode root_node = { XMFLOAT2(-ocean_extent * 0.5f, -ocean_extent * 0.5f), ocean_extent, 0,{ -1,-1,-1,-1 } }; + buildNodeList(root_node, *camera); + + // Matrices + XMMATRIX matView = XMMATRIX(1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) * camera->GetView(); + XMMATRIX matProj = camera->GetProjection(); + + // VS & PS + device->BindVS(g_pOceanSurfVS, threadID); + device->BindPS(wire ? g_pWireframePS : g_pOceanSurfPS, threadID); + + // Textures + GPUResource* vs_srvs[2] = { m_pDisplacementMap, g_pPerlinMap }; + device->BindResourcesVS(&vs_srvs[0], TEXSLOT_ONDEMAND0, 2, threadID); + + GPUResource* ps_srvs[4] = { g_pPerlinMap, m_pGradientMap, g_pFresnelMap, wiRenderer::GetEnviromentMap() }; + device->BindResourcesPS(&ps_srvs[0], TEXSLOT_ONDEMAND1, 4, threadID); + + // IA setup + device->BindIndexBuffer(g_pMeshIB, INDEXBUFFER_FORMAT::INDEXFORMAT_32BIT, 0, threadID); + + GPUBuffer* vbs[1] = { g_pMeshVB }; + UINT strides[1] = { sizeof(ocean_vertex) }; + UINT offsets[1] = { 0 }; + device->BindVertexBuffers(&vbs[0], 0, 1, &strides[0], &offsets[0], threadID); + + device->BindVertexLayout(g_pMeshLayout, threadID); + + // State blocks + device->BindRasterizerState(wire ? g_pRSState_Wireframe : g_pRSState_Solid, threadID); + device->BindDepthStencilState(g_pDSState_Disable, 0, threadID); + + // Constants + + Ocean_Rendering_ShadingCB shading_data; + // Grid side length * 2 + shading_data.g_TexelLength_x2 = m_param.patch_length / m_param.dmap_dim * 2;; + // Color + shading_data.g_SkyColor = g_SkyColor; + shading_data.g_WaterbodyColor = waterColor; + // Texcoord + shading_data.g_UVScale = 1.0f / m_param.patch_length; + shading_data.g_UVOffset = 0.5f / m_param.dmap_dim; + // Perlin + shading_data.g_PerlinSize = g_PerlinSize; + shading_data.g_PerlinAmplitude = g_PerlinAmplitude; + shading_data.g_PerlinGradient = g_PerlinGradient; + shading_data.g_PerlinOctave = g_PerlinOctave; + // Multiple reflection workaround + shading_data.g_BendParam = g_BendParam; + // Sun streaks + shading_data.g_SunColor = g_SunColor; + shading_data.g_SunDir = g_SunDir; + shading_data.g_Shineness = g_Shineness; + + device->UpdateBuffer(g_pShadingCB, &shading_data, threadID); + + device->BindConstantBufferVS(g_pShadingCB, CB_GETBINDSLOT(Ocean_Rendering_ShadingCB), threadID); + device->BindConstantBufferPS(g_pShadingCB, CB_GETBINDSLOT(Ocean_Rendering_ShadingCB), threadID); + + // We assume the center of the ocean surface at (0, 0, 0). + for (int i = 0; i < (int)g_render_list.size(); i++) + { + QuadNode& node = g_render_list[i]; + + if (!isLeaf(node)) + continue; + + // Check adjacent patches and select mesh pattern + QuadRenderParam& render_param = selectMeshPattern(node); + + // Find the right LOD to render + int level_size = g_MeshDim; + for (int lod = 0; lod < node.lod; lod++) + level_size >>= 1; + + // Matrices and constants + Ocean_Rendering_PatchCB call_consts; + + // Expand of the local coordinate to world space patch size + XMMATRIX matScale = XMMatrixScaling(node.length / level_size, node.length / level_size, 0); + call_consts.g_matLocal = XMMatrixTranspose(matScale); + + // WVP matrix + XMMATRIX matWorld = XMMatrixTranslation(node.bottom_left.x, node.bottom_left.y, 0); + XMMATRIX matWVP = matWorld * matView * matProj; + call_consts.g_matWorldViewProj = XMMatrixTranspose(matWVP); + + // Texcoord for perlin noise + XMVECTOR uv_base = XMLoadFloat2(&node.bottom_left) / m_param.patch_length * g_PerlinSize; + XMStoreFloat2(&call_consts.g_UVBase, uv_base); + + // Constant g_PerlinSpeed need to be adjusted mannually + XMVECTOR perlin_move = -XMLoadFloat2(&g_WindDir) * time * g_PerlinSpeed; + XMStoreFloat2(&call_consts.g_PerlinMovement, perlin_move); + + // Eye point + XMMATRIX matInvWV = XMMatrixInverse(nullptr, matWorld * matView); + XMVECTOR vLocalEye = XMVector3TransformCoord(XMVectorSet(0, 0, 0, 1), matInvWV); + XMStoreFloat3(&call_consts.g_LocalEye, vLocalEye); + + device->UpdateBuffer(g_pPerCallCB, &call_consts, threadID); + + device->BindConstantBufferVS(g_pPerCallCB, CB_GETBINDSLOT(Ocean_Rendering_PatchCB), threadID); + device->BindConstantBufferPS(g_pPerCallCB, CB_GETBINDSLOT(Ocean_Rendering_PatchCB), threadID); + + // Perform draw call + if (render_param.num_inner_faces > 0) + { + // Inner mesh of the patch + device->BindPrimitiveTopology(TRIANGLESTRIP, threadID); + device->DrawIndexed(render_param.num_inner_faces + 2, render_param.inner_start_index, 0, threadID); + } + + if (render_param.num_boundary_faces > 0) + { + // Boundary mesh of the patch + device->BindPrimitiveTopology(TRIANGLELIST, threadID); + device->DrawIndexed(render_param.num_boundary_faces * 3, render_param.boundary_start_index, 0, threadID); + } + } +} + + +void wiOcean::LoadShaders() +{ + + m_pUpdateSpectrumCS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "oceanSimulatorCS.cso", wiResourceManager::COMPUTESHADER)); + m_pUpdateDisplacementMapCS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "oceanUpdateDisplacementMapCS.cso", wiResourceManager::COMPUTESHADER)); + m_pUpdateGradientFoldingCS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "oceanUpdateGradientFoldingCS.cso", wiResourceManager::COMPUTESHADER)); + + + { + VertexLayoutDesc layout[] = + { + { "POSITION", 0, FORMAT_R32G32_FLOAT, 0, 0, INPUT_PER_VERTEX_DATA, 0 }, + }; + UINT numElements = ARRAYSIZE(layout); + VertexShaderInfo* vsinfo = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "oceanSurfaceVS.cso", wiResourceManager::VERTEXSHADER, layout, numElements)); + if (vsinfo != nullptr) { + g_pOceanSurfVS = vsinfo->vertexShader; + g_pMeshLayout = vsinfo->vertexLayout; + } + } + + g_pOceanSurfPS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "oceanSurfacePS.cso", wiResourceManager::PIXELSHADER)); + g_pWireframePS = static_cast(wiResourceManager::GetShaderManager()->add(wiRenderer::SHADERPATH + "oceanSurfaceSimplePS.cso", wiResourceManager::PIXELSHADER)); + +} + +void wiOcean::SetUpStatic() +{ + LoadShaders(); + CSFFT_512x512_Data_t::LoadShaders(); + fft512x512_create_plan(&m_fft_plan, 3); + + GraphicsDevice* device = wiRenderer::GetDevice(); + + static const int FRESNEL_TEX_SIZE = 256; + static const float g_SkyBlending = 16.0f; + + uint32_t* buffer = new uint32_t[FRESNEL_TEX_SIZE]; + for (int i = 0; i < FRESNEL_TEX_SIZE; i++) + { + float cos_a = i / (FLOAT)FRESNEL_TEX_SIZE; + // Using water's refraction index 1.33 + uint32_t fresnel = (uint32_t)(XMVectorGetX(XMFresnelTerm(XMVectorSet(cos_a, cos_a, cos_a, cos_a), XMVectorSet(1.33f, 1.33f, 1.33f, 1.33f))) * 255); + + uint32_t sky_blend = (uint32_t)(powf(1 / (1 + cos_a), g_SkyBlending) * 255); + + buffer[i] = (sky_blend << 8) | fresnel; + } + + + + Texture1DDesc tex_desc; + tex_desc.Width = FRESNEL_TEX_SIZE; + tex_desc.MipLevels = 1; + tex_desc.ArraySize = 1; + tex_desc.Format = FORMAT_R8G8B8A8_UNORM; + tex_desc.Usage = USAGE_IMMUTABLE; + tex_desc.BindFlags = BIND_SHADER_RESOURCE; + tex_desc.CPUAccessFlags = 0; + tex_desc.MiscFlags = 0; + + SubresourceData init_data; + init_data.pSysMem = buffer; + init_data.SysMemPitch = 0; + init_data.SysMemSlicePitch = 0; + + HRESULT hr = wiRenderer::GetDevice()->CreateTexture1D(&tex_desc, &init_data, &g_pFresnelMap); + assert(SUCCEEDED(hr)); + + delete[] buffer; + + + wiRenderer::GetDevice()->CreateTextureFromFile("perlin_noise.dds", &g_pPerlinMap, true, GRAPHICSTHREAD_IMMEDIATE); + + + // Constants + GPUBufferDesc cb_desc; + cb_desc.Usage = USAGE_DYNAMIC; + cb_desc.BindFlags = BIND_CONSTANT_BUFFER; + cb_desc.CPUAccessFlags = CPU_ACCESS_WRITE; + cb_desc.MiscFlags = 0; + cb_desc.ByteWidth = sizeof(Ocean_Rendering_PatchCB); + cb_desc.StructureByteStride = 0; + g_pPerCallCB = new GPUBuffer; + device->CreateBuffer(&cb_desc, nullptr, g_pPerCallCB); + + + cb_desc.Usage = USAGE_DYNAMIC; + cb_desc.CPUAccessFlags = CPU_ACCESS_WRITE; + cb_desc.ByteWidth = sizeof(Ocean_Rendering_ShadingCB); + cb_desc.StructureByteStride = 0; + g_pShadingCB = new GPUBuffer; + device->CreateBuffer(&cb_desc, nullptr, g_pShadingCB); + + // State blocks + RasterizerStateDesc ras_desc; + ras_desc.FillMode = FILL_SOLID; + ras_desc.CullMode = CULL_NONE; + ras_desc.FrontCounterClockwise = false; + ras_desc.DepthBias = 0; + ras_desc.SlopeScaledDepthBias = 0.0f; + ras_desc.DepthBiasClamp = 0.0f; + ras_desc.DepthClipEnable = true; + ras_desc.ScissorEnable = false; + ras_desc.MultisampleEnable = true; + ras_desc.AntialiasedLineEnable = false; + + g_pRSState_Solid = new RasterizerState; + device->CreateRasterizerState(&ras_desc, g_pRSState_Solid); + + ras_desc.FillMode = FILL_WIREFRAME; + + g_pRSState_Wireframe = new RasterizerState; + device->CreateRasterizerState(&ras_desc, g_pRSState_Wireframe); + + DepthStencilStateDesc depth_desc; + memset(&depth_desc, 0, sizeof(DepthStencilStateDesc)); + depth_desc.DepthEnable = true; + depth_desc.DepthWriteMask = DEPTH_WRITE_MASK_ALL; + depth_desc.DepthFunc = COMPARISON_GREATER; + depth_desc.StencilEnable = false; + g_pDSState_Disable = new DepthStencilState; + device->CreateDepthStencilState(&depth_desc, g_pDSState_Disable); + + BlendStateDesc blend_desc; + memset(&blend_desc, 0, sizeof(BlendStateDesc)); + blend_desc.AlphaToCoverageEnable = false; + blend_desc.IndependentBlendEnable = false; + blend_desc.RenderTarget[0].BlendEnable = true; + blend_desc.RenderTarget[0].SrcBlend = BLEND_SRC_ALPHA; + blend_desc.RenderTarget[0].DestBlend = BLEND_INV_SRC_ALPHA; + blend_desc.RenderTarget[0].BlendOp = BLEND_OP_ADD; + blend_desc.RenderTarget[0].SrcBlendAlpha = BLEND_ONE; + blend_desc.RenderTarget[0].DestBlendAlpha = BLEND_ZERO; + blend_desc.RenderTarget[0].BlendOpAlpha = BLEND_OP_ADD; + blend_desc.RenderTarget[0].RenderTargetWriteMask = COLOR_WRITE_ENABLE_ALL; + g_pBState_Transparent = new BlendState; + device->CreateBlendState(&blend_desc, g_pBState_Transparent); +} + +void wiOcean::CleanUpStatic() +{ + fft512x512_destroy_plan(&m_fft_plan); + + SAFE_DELETE(m_pUpdateSpectrumCS); + SAFE_DELETE(m_pUpdateDisplacementMapCS); + SAFE_DELETE(m_pUpdateGradientFoldingCS); + + SAFE_DELETE(g_pOceanSurfVS); + SAFE_DELETE(g_pOceanSurfPS); + SAFE_DELETE(g_pWireframePS); + + SAFE_DELETE(g_pMeshLayout); + + SAFE_DELETE(g_pFresnelMap); + SAFE_DELETE(g_pPerlinMap); + + SAFE_DELETE(g_pPerCallCB); + SAFE_DELETE(g_pShadingCB); + + SAFE_DELETE(g_pRSState_Solid); + SAFE_DELETE(g_pRSState_Wireframe); + SAFE_DELETE(g_pDSState_Disable); + SAFE_DELETE(g_pBState_Transparent); +} diff --git a/WickedEngine/wiOcean.h b/WickedEngine/wiOcean.h new file mode 100644 index 000000000..4287fe8fa --- /dev/null +++ b/WickedEngine/wiOcean.h @@ -0,0 +1,239 @@ +#ifndef _OCEAN_SIMULATOR_H +#define _OCEAN_SIMULATOR_H + +#include "CommonInclude.h" +#include "wiGraphicsAPI.h" +#include "wiFFTGenerator.h" + +#include + + +struct Camera; + +struct wiOceanParameter +{ + // Must be power of 2. + int dmap_dim; + // Typical value is 1000 ~ 2000 + float patch_length; + + // Adjust the time interval for simulation. + float time_scale; + // Amplitude for transverse wave. Around 1.0 + float wave_amplitude; + // Wind direction. Normalization not required. + XMFLOAT2 wind_dir; + // Around 100 ~ 1000 + float wind_speed; + // This value damps out the waves against the wind direction. + // Smaller value means higher wind dependency. + float wind_dependency; + // The amplitude for longitudinal wave. Must be positive. + float choppy_scale; + + wiOceanParameter() + { + // Original version: + //dmap_dim = 512; + //patch_length = 2000.0f; + //time_scale = 0.8f; + //wave_amplitude = 0.35f; + //wind_dir = XMFLOAT2(0.8f, 0.6f); + //wind_speed = 600.0f; + //wind_dependency = 0.07f; + //choppy_scale = 1.3f; + + // Scaled version: + dmap_dim = 512; + patch_length = 200.0f; + time_scale = 0.8f; + wave_amplitude = 80.0f; + wind_dir = XMFLOAT2(0.8f, 0.6f); + wind_speed = 600.0f; + wind_dependency = 0.07f; + choppy_scale = 1.3f; + } +}; + + +class wiOcean +{ +public: + wiOcean(const wiOceanParameter& params); + ~wiOcean(); + + // -------------------------- Initialization & simulation routines ------------------------ + + // Update ocean wave when tick arrives. + void UpdateDisplacementMap(float time, GRAPHICSTHREAD threadID); + void Render(const Camera* camera, float time, GRAPHICSTHREAD threadID); + + // Texture access + wiGraphicsTypes::Texture2D* getDisplacementMap(); + wiGraphicsTypes::Texture2D* getGradientMap(); + + const wiOceanParameter& getParameters(); + + static void LoadShaders(); + static void SetUpStatic(); + static void CleanUpStatic(); + + XMFLOAT3 waterColor = XMFLOAT3(0.07f, 0.15f, 0.2f); + +protected: + wiOceanParameter m_param; + + // Simulation params: + + wiGraphicsTypes::Texture2D* m_pDisplacementMap; // (RGBA32F) + wiGraphicsTypes::Texture2D* m_pGradientMap; // (RGBA16F) + + // Initialize the vector field. + void initHeightMap(XMFLOAT2* out_h0, float* out_omega); + + + // ----------------------------------- CS simulation data --------------------------------- + + // Initial height field H(0) generated by Phillips spectrum & Gauss distribution. + wiGraphicsTypes::GPUBuffer* m_pBuffer_Float2_H0; + + // Angular frequency + wiGraphicsTypes::GPUBuffer* m_pBuffer_Float_Omega; + + // Height field H(t), choppy field Dx(t) and Dy(t) in frequency domain, updated each frame. + wiGraphicsTypes::GPUBuffer* m_pBuffer_Float2_Ht; + + // Height & choppy buffer in the space domain, corresponding to H(t), Dx(t) and Dy(t) + wiGraphicsTypes::GPUBuffer* m_pBuffer_Float_Dxyz; + + // Shaders, layouts and constants + static wiGraphicsTypes::ComputeShader* m_pUpdateSpectrumCS; + static wiGraphicsTypes::ComputeShader* m_pUpdateDisplacementMapCS; + static wiGraphicsTypes::ComputeShader* m_pUpdateGradientFoldingCS; + + wiGraphicsTypes::GPUBuffer* m_pImmutableCB; + wiGraphicsTypes::GPUBuffer* m_pPerFrameCB; + + // FFT wrap-up + static CSFFT512x512_Plan m_fft_plan; + + + + // Rendering params: + struct ocean_vertex + { + float index_x; + float index_y; + }; + + // Mesh properties: + + // Mesh grid dimension, must be 2^n. 4x4 ~ 256x256 + int g_MeshDim = 128; + // Subdivision thredshold. Any quad covers more pixels than this value needs to be subdivided. + float g_UpperGridCoverage = 64.0f; + // Draw distance = g_PatchLength * 2^g_FurthestCover + int g_FurthestCover = 8; + + + // Shading properties: + // Two colors for waterbody and sky color + XMFLOAT3 g_SkyColor = XMFLOAT3(0.38f, 0.45f, 0.56f); + // Blending term for sky cubemap + float g_SkyBlending = 16.0f; + + // Perlin wave parameters + float g_PerlinSize = 1.0f; + float g_PerlinSpeed = 0.06f; + XMFLOAT3 g_PerlinAmplitude = XMFLOAT3(35, 42, 57); + XMFLOAT3 g_PerlinGradient = XMFLOAT3(1.4f, 1.6f, 2.2f); + XMFLOAT3 g_PerlinOctave = XMFLOAT3(1.12f, 0.59f, 0.23f); + XMFLOAT2 g_WindDir; + + XMFLOAT3 g_BendParam = XMFLOAT3(0.1f, -0.4f, 0.2f); + + // Sunspot parameters + XMFLOAT3 g_SunDir = XMFLOAT3(0.936016f, -0.343206f, 0.0780013f); + XMFLOAT3 g_SunColor = XMFLOAT3(1.0f, 1.0f, 0.6f); + float g_Shineness = 400.0f; + + + + + struct QuadNode + { + XMFLOAT2 bottom_left; + float length; + int lod; + + int sub_node[4]; + }; + + struct QuadRenderParam + { + UINT num_inner_verts; + UINT num_inner_faces; + UINT inner_start_index; + + UINT num_boundary_verts; + UINT num_boundary_faces; + UINT boundary_start_index; + }; + + // Quad-tree LOD, 0 to 9 (1x1 ~ 512x512) + int g_Lods = 0; + // Pattern lookup array. Filled at init time. + QuadRenderParam g_mesh_patterns[9][3][3][3][3]; + // Pick a proper mesh pattern according to the adjacent patches. + QuadRenderParam& selectMeshPattern(const QuadNode& quad_node); + + // Rendering list + std::vector g_render_list; + int buildNodeList(QuadNode& quad_node, const Camera& camera); + + // D3D11 buffers and layout + wiGraphicsTypes::GPUBuffer* g_pMeshVB = nullptr; + wiGraphicsTypes::GPUBuffer* g_pMeshIB = nullptr; + static wiGraphicsTypes::VertexLayout* g_pMeshLayout; + + // Color look up 1D texture + static wiGraphicsTypes::Texture1D* g_pFresnelMap; + + // Distant perlin wave + static wiGraphicsTypes::Texture2D* g_pPerlinMap; + + // HLSL shaders + static wiGraphicsTypes::VertexShader* g_pOceanSurfVS; + static wiGraphicsTypes::PixelShader* g_pOceanSurfPS; + static wiGraphicsTypes::PixelShader* g_pWireframePS; + + static wiGraphicsTypes::GPUBuffer* g_pPerCallCB; + static wiGraphicsTypes::GPUBuffer* g_pShadingCB; + + // State blocks + static wiGraphicsTypes::RasterizerState* g_pRSState_Solid; + static wiGraphicsTypes::RasterizerState* g_pRSState_Wireframe; + static wiGraphicsTypes::DepthStencilState* g_pDSState_Disable; + static wiGraphicsTypes::BlendState* g_pBState_Transparent; + + + // init & cleanup + void initRenderResource(); + void cleanupRenderResource(); + // create a triangle strip mesh for ocean surface. + void createSurfaceMesh(); + // create color/fresnel lookup table. + void createFresnelMap(); + // create perlin noise texture for far-sight rendering + void loadTextures(); + + int generateBoundaryMesh(int left_degree, int right_degree, int bottom_degree, int top_degree, + RECT vert_rect, DWORD* output); + int generateInnerMesh(RECT vert_rect, DWORD* output); + bool checkNodeVisibility(const QuadNode& quad_node, const Camera& camera); + float estimateGridCoverage(const QuadNode& quad_node, const Camera& camera, float screen_area); + bool isLeaf(const QuadNode& quad_node); + int searchLeaf(const std::vector& node_list, const XMFLOAT2& point); +}; + +#endif // _OCEAN_SIMULATOR_H diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index d8983f266..a38dc8187 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -24,6 +24,7 @@ #include "wiRectPacker.h" #include "wiBackLog.h" #include "wiProfiler.h" +#include #include @@ -76,6 +77,7 @@ int wiRenderer::visibleCount; wiRenderTarget wiRenderer::normalMapRT, wiRenderer::imagesRT, wiRenderer::imagesRTAdd; Camera *wiRenderer::cam = nullptr, *wiRenderer::refCam = nullptr, *wiRenderer::prevFrameCam = nullptr; PHYSICS* wiRenderer::physicsEngine = nullptr; +wiOcean* wiRenderer::ocean = nullptr; string wiRenderer::SHADERPATH = "shaders/"; #pragma endregion @@ -936,6 +938,8 @@ void wiRenderer::ReloadShaders(const std::string& path) wiFont::LoadShaders(); wiImage::LoadShaders(); wiLensFlare::LoadShaders(); + wiOcean::LoadShaders(); + CSFFT_512x512_Data_t::LoadShaders(); GetDevice()->UNLOCK(); } @@ -2126,6 +2130,12 @@ void wiRenderer::UpdateRenderData(GRAPHICSTHREAD threadID) hair->ComputeCulling(getCamera(), threadID); } + // Compute water simulation: + if (ocean != nullptr) + { + ocean->UpdateDisplacementMap(renderTime, threadID); + } + // Render out of date environment probes: RefreshEnvProbes(threadID); @@ -4669,6 +4679,11 @@ void wiRenderer::DrawWorldTransparent(Camera* camera, SHADERTYPE shaderType, Tex GetDevice()->BindResourcePS(resourceBuffers[RBTYPE_ENTITYINDEXLIST_TRANSPARENT], SBSLOT_ENTITYINDEXLIST, threadID); } + if (ocean != nullptr) + { + ocean->Render(camera, renderTime, threadID); + } + if (grass) { GetDevice()->BindDepthStencilState(depthStencils[DSSTYPE_HAIRALPHACOMPOSITION], STENCILREF_DEFAULT, threadID); // minimizes overdraw by depthcomp = less @@ -6604,3 +6619,13 @@ bool wiRenderer::GetAdvancedRefractionsEnabled() { return advancedRefractions && GetDevice()->CheckCapability(GraphicsDevice::GRAPHICSDEVICE_CAPABILITY_UNORDEREDACCESSTEXTURE_LOAD_FORMAT_EXT); } + +void wiRenderer::SetOceanEnabled(bool enabled, const wiOceanParameter& params) +{ + SAFE_DELETE(ocean); + + if (enabled) + { + ocean = new wiOcean(params); + } +} diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index 973508490..f8d1b27f6 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -42,6 +42,8 @@ struct Cullable; class PHYSICS; class wiRenderTarget; class wiWaterPlane; +class wiOcean; +struct wiOceanParameter; typedef std::map MeshCollection; typedef std::map MaterialCollection; @@ -534,6 +536,10 @@ public: static PHYSICS* physicsEngine; static void SynchronizeWithPhysicsEngine(float dt = 1.0f / 60.0f); + static wiOcean* ocean; + static void SetOceanEnabled(bool enabled, const wiOceanParameter& params); + static wiOcean* GetOcean() { return ocean; } + static Model* LoadModel(const std::string& dir, const std::string& name, const XMMATRIX& transform = XMMatrixIdentity(), const std::string& ident = "common"); static void LoadWorldInfo(const std::string& dir, const std::string& name); static void LoadDefaultLighting(); diff --git a/models/Sample/textures/water_bump.dds b/models/Sample/textures/water_bump.dds new file mode 100644 index 000000000..ec70d109f Binary files /dev/null and b/models/Sample/textures/water_bump.dds differ