379 lines
8.9 KiB
C++
379 lines
8.9 KiB
C++
#include "wiProfiler.h"
|
|
#include "wiGraphicsDevice.h"
|
|
#include "wiFont.h"
|
|
#include "wiImage.h"
|
|
#include "wiTimer.h"
|
|
#include "wiTextureHelper.h"
|
|
#include "wiHelper.h"
|
|
#include "wiUnorderedMap.h"
|
|
#include "wiBacklog.h"
|
|
|
|
#if __has_include("Superluminal/PerformanceAPI_capi.h")
|
|
#include "Superluminal/PerformanceAPI_capi.h"
|
|
#include "Superluminal/PerformanceAPI_loader.h"
|
|
#endif // superluminal
|
|
|
|
#include <string>
|
|
#include <stack>
|
|
#include <mutex>
|
|
#include <atomic>
|
|
#include <sstream>
|
|
|
|
using namespace wi::graphics;
|
|
|
|
namespace wi::profiler
|
|
{
|
|
bool ENABLED = false;
|
|
bool ENABLED_REQUEST = false;
|
|
bool initialized = false;
|
|
std::mutex lock;
|
|
range_id cpu_frame;
|
|
range_id gpu_frame;
|
|
GPUQueryHeap queryHeap;
|
|
GPUBuffer queryResultBuffer[GraphicsDevice::GetBufferCount() + 1];
|
|
std::atomic<uint32_t> nextQuery{ 0 };
|
|
int queryheap_idx = 0;
|
|
|
|
#if PERFORMANCEAPI_ENABLED
|
|
PerformanceAPI_ModuleHandle superluminal_handle = {};
|
|
PerformanceAPI_Functions superluminal_functions = {};
|
|
#endif // PERFORMANCEAPI_ENABLED
|
|
|
|
struct Range
|
|
{
|
|
bool in_use = false;
|
|
std::string name;
|
|
float times[20] = {};
|
|
int avg_counter = 0;
|
|
float time = 0;
|
|
CommandList cmd;
|
|
|
|
wi::Timer cpuTimer;
|
|
|
|
int gpuBegin[arraysize(queryResultBuffer)];
|
|
int gpuEnd[arraysize(queryResultBuffer)];
|
|
|
|
bool IsCPURange() const { return !cmd.IsValid(); }
|
|
};
|
|
wi::unordered_map<size_t, Range> ranges;
|
|
|
|
void BeginFrame()
|
|
{
|
|
if (ENABLED_REQUEST != ENABLED)
|
|
{
|
|
ranges.clear();
|
|
ENABLED = ENABLED_REQUEST;
|
|
}
|
|
|
|
if (!ENABLED)
|
|
return;
|
|
|
|
if (!initialized)
|
|
{
|
|
initialized = true;
|
|
|
|
ranges.reserve(100);
|
|
|
|
GraphicsDevice* device = wi::graphics::GetDevice();
|
|
|
|
GPUQueryHeapDesc desc;
|
|
desc.type = GpuQueryType::TIMESTAMP;
|
|
desc.query_count = 1024;
|
|
bool success = device->CreateQueryHeap(&desc, &queryHeap);
|
|
assert(success);
|
|
|
|
GPUBufferDesc bd;
|
|
bd.usage = Usage::READBACK;
|
|
bd.size = desc.query_count * sizeof(uint64_t);
|
|
|
|
for (int i = 0; i < arraysize(queryResultBuffer); ++i)
|
|
{
|
|
success = device->CreateBuffer(&bd, nullptr, &queryResultBuffer[i]);
|
|
assert(success);
|
|
}
|
|
|
|
#if PERFORMANCEAPI_ENABLED
|
|
superluminal_handle = PerformanceAPI_LoadFrom(L"PerformanceAPI.dll", &superluminal_functions);
|
|
if (superluminal_handle)
|
|
{
|
|
wi::backlog::post("[wi::profiler] Superluminal Performance API loaded");
|
|
}
|
|
#endif // PERFORMANCEAPI_ENABLED
|
|
}
|
|
|
|
cpu_frame = BeginRangeCPU("CPU Frame");
|
|
|
|
GraphicsDevice* device = wi::graphics::GetDevice();
|
|
CommandList cmd = device->BeginCommandList();
|
|
|
|
device->QueryReset(
|
|
&queryHeap,
|
|
0,
|
|
queryHeap.desc.query_count,
|
|
cmd
|
|
);
|
|
|
|
gpu_frame = BeginRangeGPU("GPU Frame", cmd);
|
|
}
|
|
void EndFrame(CommandList cmd)
|
|
{
|
|
if (!ENABLED || !initialized)
|
|
return;
|
|
|
|
GraphicsDevice* device = wi::graphics::GetDevice();
|
|
|
|
// note: read the GPU Frame end range manually because it will be on a separate command list than start point:
|
|
auto& gpu_range = ranges[gpu_frame];
|
|
gpu_range.gpuEnd[queryheap_idx] = nextQuery.fetch_add(1);
|
|
device->QueryEnd(&queryHeap, gpu_range.gpuEnd[queryheap_idx], cmd);
|
|
|
|
EndRange(cpu_frame);
|
|
|
|
double gpu_frequency = (double)device->GetTimestampFrequency() / 1000.0;
|
|
|
|
device->QueryResolve(
|
|
&queryHeap,
|
|
0,
|
|
nextQuery.load(),
|
|
&queryResultBuffer[queryheap_idx],
|
|
0ull,
|
|
cmd
|
|
);
|
|
|
|
nextQuery.store(0);
|
|
queryheap_idx = (queryheap_idx + 1) % arraysize(queryResultBuffer);
|
|
uint64_t* queryResults = (uint64_t*)queryResultBuffer[queryheap_idx].mapped_data;
|
|
|
|
for (auto& x : ranges)
|
|
{
|
|
auto& range = x.second;
|
|
|
|
if (!range.IsCPURange())
|
|
{
|
|
int begin_query = range.gpuBegin[queryheap_idx];
|
|
int end_query = range.gpuEnd[queryheap_idx];
|
|
if (queryResultBuffer[queryheap_idx].mapped_data != nullptr && begin_query >= 0 && end_query >= 0)
|
|
{
|
|
uint64_t begin_result = queryResults[begin_query];
|
|
uint64_t end_result = queryResults[end_query];
|
|
range.time = (float)abs((double)(end_result - begin_result) / gpu_frequency);
|
|
}
|
|
range.gpuBegin[queryheap_idx] = -1;
|
|
range.gpuEnd[queryheap_idx] = -1;
|
|
}
|
|
range.times[range.avg_counter++ % arraysize(range.times)] = range.time;
|
|
|
|
if (range.avg_counter > arraysize(range.times))
|
|
{
|
|
float avg_time = 0;
|
|
for (int i = 0; i < arraysize(range.times); ++i)
|
|
{
|
|
avg_time += range.times[i];
|
|
}
|
|
range.time = avg_time / arraysize(range.times);
|
|
}
|
|
|
|
range.in_use = false;
|
|
}
|
|
}
|
|
|
|
range_id BeginRangeCPU(const char* name)
|
|
{
|
|
if (!ENABLED || !initialized)
|
|
return 0;
|
|
|
|
#if PERFORMANCEAPI_ENABLED
|
|
if (superluminal_handle)
|
|
{
|
|
superluminal_functions.BeginEvent(name, nullptr, 0xFF0000FF);
|
|
}
|
|
#endif // PERFORMANCEAPI_ENABLED
|
|
|
|
range_id id = wi::helper::string_hash(name);
|
|
|
|
lock.lock();
|
|
|
|
// If one range name is hit multiple times, differentiate between them!
|
|
size_t differentiator = 0;
|
|
while (ranges[id].in_use)
|
|
{
|
|
wi::helper::hash_combine(id, differentiator++);
|
|
}
|
|
ranges[id].in_use = true;
|
|
ranges[id].name = name;
|
|
ranges[id].cpuTimer.record();
|
|
|
|
lock.unlock();
|
|
|
|
return id;
|
|
}
|
|
range_id BeginRangeGPU(const char* name, CommandList cmd)
|
|
{
|
|
if (!ENABLED || !initialized)
|
|
return 0;
|
|
|
|
range_id id = wi::helper::string_hash(name);
|
|
|
|
lock.lock();
|
|
|
|
// If one range name is hit multiple times, differentiate between them!
|
|
size_t differentiator = 0;
|
|
while (ranges[id].in_use)
|
|
{
|
|
wi::helper::hash_combine(id, differentiator++);
|
|
}
|
|
ranges[id].in_use = true;
|
|
ranges[id].name = name;
|
|
ranges[id].cmd = cmd;
|
|
|
|
ranges[id].gpuBegin[queryheap_idx] = nextQuery.fetch_add(1);
|
|
wi::graphics::GetDevice()->QueryEnd(&queryHeap, ranges[id].gpuBegin[queryheap_idx], cmd);
|
|
|
|
lock.unlock();
|
|
|
|
return id;
|
|
}
|
|
void EndRange(range_id id)
|
|
{
|
|
if (!ENABLED || !initialized)
|
|
return;
|
|
|
|
lock.lock();
|
|
|
|
auto it = ranges.find(id);
|
|
if (it != ranges.end())
|
|
{
|
|
if (it->second.IsCPURange())
|
|
{
|
|
it->second.time = (float)it->second.cpuTimer.elapsed();
|
|
|
|
#if PERFORMANCEAPI_ENABLED
|
|
if (superluminal_handle)
|
|
{
|
|
superluminal_functions.EndEvent();
|
|
}
|
|
#endif // PERFORMANCEAPI_ENABLED
|
|
}
|
|
else
|
|
{
|
|
ranges[id].gpuEnd[queryheap_idx] = nextQuery.fetch_add(1);
|
|
wi::graphics::GetDevice()->QueryEnd(&queryHeap, it->second.gpuEnd[queryheap_idx], it->second.cmd);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
assert(0);
|
|
}
|
|
|
|
lock.unlock();
|
|
}
|
|
|
|
struct Hits
|
|
{
|
|
uint32_t num_hits = 0;
|
|
float total_time = 0;
|
|
};
|
|
wi::unordered_map<std::string, Hits> time_cache_cpu;
|
|
wi::unordered_map<std::string, Hits> time_cache_gpu;
|
|
void DrawData(
|
|
const wi::Canvas& canvas,
|
|
float x,
|
|
float y,
|
|
CommandList cmd,
|
|
ColorSpace colorspace
|
|
)
|
|
{
|
|
if (!ENABLED || !initialized)
|
|
return;
|
|
|
|
wi::image::SetCanvas(canvas);
|
|
wi::font::SetCanvas(canvas);
|
|
|
|
std::stringstream ss("");
|
|
ss.precision(2);
|
|
ss << "Frame Profiler Ranges:\n----------------------------\n";
|
|
|
|
for (auto& x : ranges)
|
|
{
|
|
if (x.second.IsCPURange())
|
|
{
|
|
if (x.first == cpu_frame)
|
|
continue;
|
|
time_cache_cpu[x.second.name].num_hits++;
|
|
time_cache_cpu[x.second.name].total_time += x.second.time;
|
|
}
|
|
else
|
|
{
|
|
if (x.first == gpu_frame)
|
|
continue;
|
|
time_cache_gpu[x.second.name].num_hits++;
|
|
time_cache_gpu[x.second.name].total_time += x.second.time;
|
|
}
|
|
}
|
|
|
|
// Print CPU ranges:
|
|
ss << ranges[cpu_frame].name << ": " << std::fixed << ranges[cpu_frame].time << " ms" << std::endl;
|
|
for (auto& x : time_cache_cpu)
|
|
{
|
|
if (x.second.num_hits > 1)
|
|
{
|
|
ss << "\t" << x.first << " (" << x.second.num_hits << "x)" << ": " << std::fixed << x.second.total_time << " ms" << std::endl;
|
|
}
|
|
else
|
|
{
|
|
ss << "\t" << x.first << ": " << std::fixed << x.second.total_time << " ms" << std::endl;
|
|
}
|
|
x.second.num_hits = 0;
|
|
x.second.total_time = 0;
|
|
}
|
|
ss << std::endl;
|
|
|
|
// Print GPU ranges:
|
|
ss << ranges[gpu_frame].name << ": " << std::fixed << ranges[gpu_frame].time << " ms" << std::endl;
|
|
for (auto& x : time_cache_gpu)
|
|
{
|
|
if (x.second.num_hits > 1)
|
|
{
|
|
ss << "\t" << x.first << " (" << x.second.num_hits << "x)" << ": " << std::fixed << x.second.total_time << " ms" << std::endl;
|
|
}
|
|
else
|
|
{
|
|
ss << "\t" << x.first << ": " << std::fixed << x.second.total_time << " ms" << std::endl;
|
|
}
|
|
x.second.num_hits = 0;
|
|
x.second.total_time = 0;
|
|
}
|
|
|
|
wi::font::Params params = wi::font::Params(x, y, wi::font::WIFONTSIZE_DEFAULT - 4, wi::font::WIFALIGN_LEFT, wi::font::WIFALIGN_TOP, wi::Color(255, 255, 255, 255), wi::Color(0, 0, 0, 255));
|
|
|
|
wi::image::Params fx;
|
|
fx.pos.x = (float)params.posX;
|
|
fx.pos.y = (float)params.posY;
|
|
fx.siz.x = (float)wi::font::TextWidth(ss.str(), params);
|
|
fx.siz.y = (float)wi::font::TextHeight(ss.str(), params);
|
|
fx.color = wi::Color(20, 20, 20, 230);
|
|
|
|
if (colorspace != ColorSpace::SRGB)
|
|
{
|
|
params.enableLinearOutputMapping(9);
|
|
fx.enableLinearOutputMapping(9);
|
|
}
|
|
|
|
wi::image::Draw(wi::texturehelper::getWhite(), fx, cmd);
|
|
wi::font::Draw(ss.str(), params, cmd);
|
|
}
|
|
|
|
void SetEnabled(bool value)
|
|
{
|
|
// Don't enable/disable the profiler immediately, only on the next frame
|
|
// to avoid enabling inside a Begin/End by mistake
|
|
ENABLED_REQUEST = value;
|
|
}
|
|
|
|
bool IsEnabled()
|
|
{
|
|
return ENABLED;
|
|
}
|
|
|
|
}
|