From 35481dc9cb4391342bdafc9188ce4caee6551a62 Mon Sep 17 00:00:00 2001 From: Glenn Watson Date: Mon, 6 Mar 2017 15:09:42 +1000 Subject: [PATCH] Expose (a subset of) the profile counters via an API. This will allow us to add external reporting of some of the profile stats for each test run in wrench. --- webrender/src/device.rs | 31 +++++++++---- webrender/src/profiler.rs | 8 ++++ webrender/src/renderer.rs | 91 +++++++++++++++++++++++++++++++++++---- 3 files changed, 113 insertions(+), 17 deletions(-) diff --git a/webrender/src/device.rs b/webrender/src/device.rs index 194d88749e..dd55b7b07a 100644 --- a/webrender/src/device.rs +++ b/webrender/src/device.rs @@ -22,6 +22,9 @@ use std::path::PathBuf; use webrender_traits::{ColorF, ImageFormat}; use webrender_traits::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintSize}; +#[derive(Debug, Copy, Clone)] +pub struct FrameId(usize); + #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] const GL_FORMAT_A: gl::GLuint = gl::RED; @@ -497,6 +500,7 @@ pub struct GpuFrameProfile { samples: Vec>, next_query: usize, pending_query: gl::GLuint, + frame_id: FrameId, } impl GpuFrameProfile { @@ -509,6 +513,7 @@ impl GpuFrameProfile { samples: Vec::new(), next_query: 0, pending_query: 0, + frame_id: FrameId(0), } } @@ -522,7 +527,8 @@ impl GpuFrameProfile { } } - fn begin_frame(&mut self) { + fn begin_frame(&mut self, frame_id: FrameId) { + self.frame_id = frame_id; self.next_query = 0; self.pending_query = 0; self.samples.clear(); @@ -572,7 +578,7 @@ impl GpuFrameProfile { } fn is_valid(&self) -> bool { - self.next_query <= MAX_EVENTS_PER_FRAME + self.next_query > 0 && self.next_query <= MAX_EVENTS_PER_FRAME } #[cfg(not(target_os = "android"))] @@ -619,18 +625,18 @@ impl GpuProfiler { } } - pub fn build_samples(&mut self) -> Option>> { + pub fn build_samples(&mut self) -> Option<(FrameId, Vec>)> { let frame = &mut self.frames[self.next_frame]; if frame.is_valid() { - Some(frame.build_samples()) + Some((frame.frame_id, frame.build_samples())) } else { None } } - pub fn begin_frame(&mut self) { + pub fn begin_frame(&mut self, frame_id: FrameId) { let frame = &mut self.frames[self.next_frame]; - frame.begin_frame(); + frame.begin_frame(frame_id); } pub fn end_frame(&mut self) { @@ -821,6 +827,10 @@ pub struct Device { next_vao_id: gl::GLuint, max_texture_size: u32, + + // Frame counter. This is used to map between CPU + // frames and GPU frames. + frame_id: FrameId, } impl Device { @@ -860,7 +870,8 @@ impl Device { next_vao_id: 1, //file_watcher: file_watcher, - max_texture_size: gl::get_integer_v(gl::MAX_TEXTURE_SIZE) as u32 + max_texture_size: gl::get_integer_v(gl::MAX_TEXTURE_SIZE) as u32, + frame_id: FrameId(0), } } @@ -903,7 +914,7 @@ impl Device { } } - pub fn begin_frame(&mut self, device_pixel_ratio: f32) { + pub fn begin_frame(&mut self, device_pixel_ratio: f32) -> FrameId { debug_assert!(!self.inside_frame); self.inside_frame = true; self.device_pixel_ratio = device_pixel_ratio; @@ -938,6 +949,8 @@ impl Device { // Default is sampler 0, always gl::active_texture(gl::TEXTURE0); + + self.frame_id } pub fn bind_texture(&mut self, @@ -1826,6 +1839,8 @@ impl Device { } gl::active_texture(gl::TEXTURE0); + + self.frame_id.0 += 1; } pub fn assign_ubo_binding(&self, program_id: ProgramId, name: &str, value: u32) -> u32 { diff --git a/webrender/src/profiler.rs b/webrender/src/profiler.rs index c239abf537..5bcc984baa 100644 --- a/webrender/src/profiler.rs +++ b/webrender/src/profiler.rs @@ -68,6 +68,10 @@ impl IntProfileCounter { pub fn set(&mut self, amount: usize) { self.value = amount; } + + pub fn get(&self) -> usize { + self.value + } } impl ProfileCounter for IntProfileCounter { @@ -153,6 +157,10 @@ impl TimeProfileCounter { self.nanoseconds += ns; val } + + pub fn get(&self) -> u64 { + self.nanoseconds + } } impl ProfileCounter for TimeProfileCounter { diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs index 1fa2e13ef2..c4408be8d1 100644 --- a/webrender/src/renderer.rs +++ b/webrender/src/renderer.rs @@ -11,8 +11,8 @@ use debug_colors; use debug_render::DebugRenderer; -use device::{DepthFunction, Device, ProgramId, TextureId, VertexFormat, GpuMarker, GpuProfiler}; -use device::{TextureFilter, VAOId, VertexUsageHint, FileWatcherHandler, TextureTarget, ShaderError}; +use device::{DepthFunction, Device, FrameId, ProgramId, TextureId, VertexFormat, GpuMarker, GpuProfiler}; +use device::{GpuSample, TextureFilter, VAOId, VertexUsageHint, FileWatcherHandler, TextureTarget, ShaderError}; use euclid::Matrix4D; use fnv::FnvHasher; use frame_builder::FrameBuilderConfig; @@ -29,7 +29,7 @@ use render_backend::RenderBackend; use render_task::RenderTaskData; use std; use std::cmp; -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; use std::f32; use std::hash::BuildHasherDefault; use std::marker::PhantomData; @@ -81,6 +81,44 @@ pub enum RendererKind { OSMesa, } +#[derive(Debug)] +pub struct GpuProfile { + pub frame_id: FrameId, + pub paint_time_ns: u64, +} + +impl GpuProfile { + fn new(frame_id: FrameId, samples: &[GpuSample]) -> GpuProfile { + let mut paint_time_ns = 0; + for sample in samples { + paint_time_ns += sample.time_ns; + } + GpuProfile { + frame_id: frame_id, + paint_time_ns: paint_time_ns, + } + } +} + +#[derive(Debug)] +pub struct CpuProfile { + pub frame_id: FrameId, + pub composite_time_ns: u64, + pub draw_calls: usize, +} + +impl CpuProfile { + fn new(frame_id: FrameId, + composite_time_ns: u64, + draw_calls: usize) -> CpuProfile { + CpuProfile { + frame_id: frame_id, + composite_time_ns: composite_time_ns, + draw_calls: draw_calls, + } + } +} + #[derive(Debug, Copy, Clone, PartialEq)] pub enum BlendMode { None, @@ -431,6 +469,7 @@ pub struct Renderer { notifier: Arc>>>, enable_profiler: bool, + max_recorded_profiles: usize, clear_framebuffer: bool, clear_color: ColorF, debug: DebugRenderer, @@ -479,7 +518,12 @@ pub struct Renderer { // Optional trait object that handles WebVR commands. // Some WebVR commands such as SubmitFrame must be synced with the WebGL render thread. - vr_compositor_handler: Arc>>> + vr_compositor_handler: Arc>>>, + + /// List of profile results from previous frames. Can be retrieved + /// via get_frame_profiles(). + cpu_profiles: VecDeque, + gpu_profiles: VecDeque, } #[derive(Debug)] @@ -846,6 +890,7 @@ impl Renderer { profile_counters: RendererProfileCounters::new(), profiler: Profiler::new(), enable_profiler: options.enable_profiler, + max_recorded_profiles: options.max_recorded_profiles, clear_framebuffer: options.clear_framebuffer, clear_color: options.clear_color, last_time: 0, @@ -862,7 +907,9 @@ impl Renderer { dummy_cache_texture_id: dummy_cache_texture_id, external_image_handler: None, external_images: HashMap::with_hasher(Default::default()), - vr_compositor_handler: vr_compositor + vr_compositor_handler: vr_compositor, + cpu_profiles: VecDeque::new(), + gpu_profiles: VecDeque::new(), }; let sender = RenderApiSender::new(api_tx, payload_tx); @@ -964,6 +1011,13 @@ impl Renderer { self.external_image_handler = Some(handler); } + /// Retrieve (and clear) the current list of recorded frame profiles. + pub fn get_frame_profiles(&mut self) -> (Vec, Vec) { + let cpu_profiles = self.cpu_profiles.drain(..).collect(); + let gpu_profiles = self.gpu_profiles.drain(..).collect(); + (cpu_profiles, gpu_profiles) + } + /// Renders the current frame. /// /// A Frame is supplied by calling [set_root_stacking_context()][newframe]. @@ -977,13 +1031,19 @@ impl Renderer { // Block CPU waiting for last frame's GPU profiles to arrive. // In general this shouldn't block unless heavily GPU limited. - if let Some(samples) = self.gpu_profile.build_samples() { + if let Some((gpu_frame_id, samples)) = self.gpu_profile.build_samples() { + if self.max_recorded_profiles > 0 { + while self.gpu_profiles.len() >= self.max_recorded_profiles { + self.gpu_profiles.pop_front(); + } + self.gpu_profiles.push_back(GpuProfile::new(gpu_frame_id, &samples)); + } profile_timers.gpu_samples = samples; } - profile_timers.cpu_time.profile(|| { - self.device.begin_frame(frame.device_pixel_ratio); - self.gpu_profile.begin_frame(); + let cpu_frame_id = profile_timers.cpu_time.profile(|| { + let cpu_frame_id = self.device.begin_frame(frame.device_pixel_ratio); + self.gpu_profile.begin_frame(cpu_frame_id); { let _gm = self.gpu_profile.add_marker(GPU_TAG_INIT); @@ -998,12 +1058,23 @@ impl Renderer { self.draw_tile_frame(frame, &framebuffer_size); self.gpu_profile.end_frame(); + cpu_frame_id }); let current_time = precise_time_ns(); let ns = current_time - self.last_time; self.profile_counters.frame_time.set(ns); + if self.max_recorded_profiles > 0 { + while self.cpu_profiles.len() >= self.max_recorded_profiles { + self.cpu_profiles.pop_front(); + } + let cpu_profile = CpuProfile::new(cpu_frame_id, + profile_timers.cpu_time.get(), + self.profile_counters.draw_calls.get()); + self.cpu_profiles.push_back(cpu_profile); + } + if self.enable_profiler { self.profiler.draw_profile(&frame.profile_counters, &self.backend_profile_counters, @@ -1770,6 +1841,7 @@ pub struct RendererOptions { pub resource_override_path: Option, pub enable_aa: bool, pub enable_profiler: bool, + pub max_recorded_profiles: usize, pub debug: bool, pub enable_scrollbars: bool, pub precache_shaders: bool, @@ -1791,6 +1863,7 @@ impl Default for RendererOptions { resource_override_path: None, enable_aa: true, enable_profiler: false, + max_recorded_profiles: 0, debug: false, enable_scrollbars: false, precache_shaders: false,