Files
WickedEngine/WickedEngine/wiProfiler.cpp
T
2022-06-11 19:42:54 +02:00

379 lines
8.9 KiB
C++

#include "wiProfiler.h"
#include "wiGraphicsDevice.h"
#include "wiFont.h"
#include "wiImage.h"
#include "wiTimer.h"
#include "wiTextureHelper.h"
#include "wiHelper.h"
#include "wiUnorderedMap.h"
#include "wiBacklog.h"
#if __has_include("Superluminal/PerformanceAPI_capi.h")
#include "Superluminal/PerformanceAPI_capi.h"
#include "Superluminal/PerformanceAPI_loader.h"
#endif // superluminal
#include <string>
#include <stack>
#include <mutex>
#include <atomic>
#include <sstream>
using namespace wi::graphics;
namespace wi::profiler
{
bool ENABLED = false;
bool ENABLED_REQUEST = false;
bool initialized = false;
std::mutex lock;
range_id cpu_frame;
range_id gpu_frame;
GPUQueryHeap queryHeap;
GPUBuffer queryResultBuffer[GraphicsDevice::GetBufferCount() + 1];
std::atomic<uint32_t> nextQuery{ 0 };
int queryheap_idx = 0;
#if PERFORMANCEAPI_ENABLED
PerformanceAPI_ModuleHandle superluminal_handle = {};
PerformanceAPI_Functions superluminal_functions = {};
#endif // PERFORMANCEAPI_ENABLED
struct Range
{
bool in_use = false;
std::string name;
float times[20] = {};
int avg_counter = 0;
float time = 0;
CommandList cmd;
wi::Timer cpuTimer;
int gpuBegin[arraysize(queryResultBuffer)];
int gpuEnd[arraysize(queryResultBuffer)];
bool IsCPURange() const { return !cmd.IsValid(); }
};
wi::unordered_map<size_t, Range> ranges;
void BeginFrame()
{
if (ENABLED_REQUEST != ENABLED)
{
ranges.clear();
ENABLED = ENABLED_REQUEST;
}
if (!ENABLED)
return;
if (!initialized)
{
initialized = true;
ranges.reserve(100);
GraphicsDevice* device = wi::graphics::GetDevice();
GPUQueryHeapDesc desc;
desc.type = GpuQueryType::TIMESTAMP;
desc.query_count = 1024;
bool success = device->CreateQueryHeap(&desc, &queryHeap);
assert(success);
GPUBufferDesc bd;
bd.usage = Usage::READBACK;
bd.size = desc.query_count * sizeof(uint64_t);
for (int i = 0; i < arraysize(queryResultBuffer); ++i)
{
success = device->CreateBuffer(&bd, nullptr, &queryResultBuffer[i]);
assert(success);
}
#if PERFORMANCEAPI_ENABLED
superluminal_handle = PerformanceAPI_LoadFrom(L"PerformanceAPI.dll", &superluminal_functions);
if (superluminal_handle)
{
wi::backlog::post("[wi::profiler] Superluminal Performance API loaded");
}
#endif // PERFORMANCEAPI_ENABLED
}
cpu_frame = BeginRangeCPU("CPU Frame");
GraphicsDevice* device = wi::graphics::GetDevice();
CommandList cmd = device->BeginCommandList();
device->QueryReset(
&queryHeap,
0,
queryHeap.desc.query_count,
cmd
);
gpu_frame = BeginRangeGPU("GPU Frame", cmd);
}
void EndFrame(CommandList cmd)
{
if (!ENABLED || !initialized)
return;
GraphicsDevice* device = wi::graphics::GetDevice();
// note: read the GPU Frame end range manually because it will be on a separate command list than start point:
auto& gpu_range = ranges[gpu_frame];
gpu_range.gpuEnd[queryheap_idx] = nextQuery.fetch_add(1);
device->QueryEnd(&queryHeap, gpu_range.gpuEnd[queryheap_idx], cmd);
EndRange(cpu_frame);
double gpu_frequency = (double)device->GetTimestampFrequency() / 1000.0;
device->QueryResolve(
&queryHeap,
0,
nextQuery.load(),
&queryResultBuffer[queryheap_idx],
0ull,
cmd
);
nextQuery.store(0);
queryheap_idx = (queryheap_idx + 1) % arraysize(queryResultBuffer);
uint64_t* queryResults = (uint64_t*)queryResultBuffer[queryheap_idx].mapped_data;
for (auto& x : ranges)
{
auto& range = x.second;
if (!range.IsCPURange())
{
int begin_query = range.gpuBegin[queryheap_idx];
int end_query = range.gpuEnd[queryheap_idx];
if (queryResultBuffer[queryheap_idx].mapped_data != nullptr && begin_query >= 0 && end_query >= 0)
{
uint64_t begin_result = queryResults[begin_query];
uint64_t end_result = queryResults[end_query];
range.time = (float)abs((double)(end_result - begin_result) / gpu_frequency);
}
range.gpuBegin[queryheap_idx] = -1;
range.gpuEnd[queryheap_idx] = -1;
}
range.times[range.avg_counter++ % arraysize(range.times)] = range.time;
if (range.avg_counter > arraysize(range.times))
{
float avg_time = 0;
for (int i = 0; i < arraysize(range.times); ++i)
{
avg_time += range.times[i];
}
range.time = avg_time / arraysize(range.times);
}
range.in_use = false;
}
}
range_id BeginRangeCPU(const char* name)
{
if (!ENABLED || !initialized)
return 0;
#if PERFORMANCEAPI_ENABLED
if (superluminal_handle)
{
superluminal_functions.BeginEvent(name, nullptr, 0xFF0000FF);
}
#endif // PERFORMANCEAPI_ENABLED
range_id id = wi::helper::string_hash(name);
lock.lock();
// If one range name is hit multiple times, differentiate between them!
size_t differentiator = 0;
while (ranges[id].in_use)
{
wi::helper::hash_combine(id, differentiator++);
}
ranges[id].in_use = true;
ranges[id].name = name;
ranges[id].cpuTimer.record();
lock.unlock();
return id;
}
range_id BeginRangeGPU(const char* name, CommandList cmd)
{
if (!ENABLED || !initialized)
return 0;
range_id id = wi::helper::string_hash(name);
lock.lock();
// If one range name is hit multiple times, differentiate between them!
size_t differentiator = 0;
while (ranges[id].in_use)
{
wi::helper::hash_combine(id, differentiator++);
}
ranges[id].in_use = true;
ranges[id].name = name;
ranges[id].cmd = cmd;
ranges[id].gpuBegin[queryheap_idx] = nextQuery.fetch_add(1);
wi::graphics::GetDevice()->QueryEnd(&queryHeap, ranges[id].gpuBegin[queryheap_idx], cmd);
lock.unlock();
return id;
}
void EndRange(range_id id)
{
if (!ENABLED || !initialized)
return;
lock.lock();
auto it = ranges.find(id);
if (it != ranges.end())
{
if (it->second.IsCPURange())
{
it->second.time = (float)it->second.cpuTimer.elapsed();
#if PERFORMANCEAPI_ENABLED
if (superluminal_handle)
{
superluminal_functions.EndEvent();
}
#endif // PERFORMANCEAPI_ENABLED
}
else
{
ranges[id].gpuEnd[queryheap_idx] = nextQuery.fetch_add(1);
wi::graphics::GetDevice()->QueryEnd(&queryHeap, it->second.gpuEnd[queryheap_idx], it->second.cmd);
}
}
else
{
assert(0);
}
lock.unlock();
}
struct Hits
{
uint32_t num_hits = 0;
float total_time = 0;
};
wi::unordered_map<std::string, Hits> time_cache_cpu;
wi::unordered_map<std::string, Hits> time_cache_gpu;
void DrawData(
const wi::Canvas& canvas,
float x,
float y,
CommandList cmd,
ColorSpace colorspace
)
{
if (!ENABLED || !initialized)
return;
wi::image::SetCanvas(canvas);
wi::font::SetCanvas(canvas);
std::stringstream ss("");
ss.precision(2);
ss << "Frame Profiler Ranges:\n----------------------------\n";
for (auto& x : ranges)
{
if (x.second.IsCPURange())
{
if (x.first == cpu_frame)
continue;
time_cache_cpu[x.second.name].num_hits++;
time_cache_cpu[x.second.name].total_time += x.second.time;
}
else
{
if (x.first == gpu_frame)
continue;
time_cache_gpu[x.second.name].num_hits++;
time_cache_gpu[x.second.name].total_time += x.second.time;
}
}
// Print CPU ranges:
ss << ranges[cpu_frame].name << ": " << std::fixed << ranges[cpu_frame].time << " ms" << std::endl;
for (auto& x : time_cache_cpu)
{
if (x.second.num_hits > 1)
{
ss << "\t" << x.first << " (" << x.second.num_hits << "x)" << ": " << std::fixed << x.second.total_time << " ms" << std::endl;
}
else
{
ss << "\t" << x.first << ": " << std::fixed << x.second.total_time << " ms" << std::endl;
}
x.second.num_hits = 0;
x.second.total_time = 0;
}
ss << std::endl;
// Print GPU ranges:
ss << ranges[gpu_frame].name << ": " << std::fixed << ranges[gpu_frame].time << " ms" << std::endl;
for (auto& x : time_cache_gpu)
{
if (x.second.num_hits > 1)
{
ss << "\t" << x.first << " (" << x.second.num_hits << "x)" << ": " << std::fixed << x.second.total_time << " ms" << std::endl;
}
else
{
ss << "\t" << x.first << ": " << std::fixed << x.second.total_time << " ms" << std::endl;
}
x.second.num_hits = 0;
x.second.total_time = 0;
}
wi::font::Params params = wi::font::Params(x, y, wi::font::WIFONTSIZE_DEFAULT - 4, wi::font::WIFALIGN_LEFT, wi::font::WIFALIGN_TOP, wi::Color(255, 255, 255, 255), wi::Color(0, 0, 0, 255));
wi::image::Params fx;
fx.pos.x = (float)params.posX;
fx.pos.y = (float)params.posY;
fx.siz.x = (float)wi::font::TextWidth(ss.str(), params);
fx.siz.y = (float)wi::font::TextHeight(ss.str(), params);
fx.color = wi::Color(20, 20, 20, 230);
if (colorspace != ColorSpace::SRGB)
{
params.enableLinearOutputMapping(9);
fx.enableLinearOutputMapping(9);
}
wi::image::Draw(wi::texturehelper::getWhite(), fx, cmd);
wi::font::Draw(ss.str(), params, cmd);
}
void SetEnabled(bool value)
{
// Don't enable/disable the profiler immediately, only on the next frame
// to avoid enabling inside a Begin/End by mistake
ENABLED_REQUEST = value;
}
bool IsEnabled()
{
return ENABLED;
}
}