From 19731c1610f0370128f9efe2714720b279bd49e4 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 14 Nov 2017 20:54:18 -0500 Subject: [PATCH] GPU queries refactor --- .taskcluster.yml | 4 +- .travis.yml | 2 +- debugger/js/app.js | 26 ++ webrender/Cargo.toml | 1 - webrender/examples/common/boilerplate.rs | 10 +- webrender/src/debug_render.rs | 5 +- webrender/src/debug_server.rs | 4 + webrender/src/device.rs | 302 --------------------- webrender/src/lib.rs | 1 + webrender/src/profiler.rs | 43 ++- webrender/src/query.rs | 321 +++++++++++++++++++++++ webrender/src/renderer.rs | 123 +++++---- webrender_api/src/api.rs | 18 +- wrench/Cargo.toml | 2 +- wrench/src/main.rs | 5 +- wrench/src/wrench.rs | 3 +- 16 files changed, 473 insertions(+), 397 deletions(-) create mode 100644 webrender/src/query.rs diff --git a/.taskcluster.yml b/.taskcluster.yml index 91bf037242..a8f71f5a9f 100644 --- a/.taskcluster.yml +++ b/.taskcluster.yml @@ -98,7 +98,7 @@ tasks: servo-tidy && (cd webrender_api && cargo test --verbose --features "ipc") && (cd webrender && cargo build --verbose --no-default-features) && - (cd webrender && cargo build --verbose --features profiler,query) && + (cd webrender && cargo build --verbose --features profiler) && (cargo test --all --verbose) routes: - "index.garbage.webrender.ci.{{event.head.user.login}}.{{event.head.repo.branch}}.linux-debug" @@ -171,7 +171,7 @@ tasks: export PKG_CONFIG_PATH="/usr/local/opt/zlib/lib/pkgconfig:$PKG_CONFIG_PATH" && (cd webrender_api && cargo test --verbose --features "ipc") && (cd webrender && cargo build --verbose --no-default-features) && - (cd webrender && cargo build --verbose --features profiler,query) && + (cd webrender && cargo build --verbose --features profiler) && (cargo test --all --verbose) routes: - "index.garbage.webrender.ci.{{event.head.user.login}}.{{event.head.repo.branch}}.osx-debug" diff --git a/.travis.yml b/.travis.yml index 3287a6c7fb..fc25713132 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,7 +37,7 @@ script: - servo-tidy - if [ $BUILD_KIND = DEBUG ]; then (cd webrender_api && cargo test --verbose --features "ipc"); fi - if [ $BUILD_KIND = DEBUG ]; then (cd webrender && cargo build --verbose --no-default-features); fi - - if [ $BUILD_KIND = DEBUG ]; then (cd webrender && cargo build --verbose --features profiler,query); fi + - if [ $BUILD_KIND = DEBUG ]; then (cd webrender && cargo build --verbose --features profiler); fi - if [ $BUILD_KIND = DEBUG ]; then (cargo test --all --verbose); fi - if [ $BUILD_KIND = RELEASE ]; then (cd wrench && python headless.py reftest); fi - if [ $BUILD_KIND = RELEASE ]; then (cd wrench && cargo build --release); fi diff --git a/debugger/js/app.js b/debugger/js/app.js index ebfed730ab..5ca2028d11 100644 --- a/debugger/js/app.js +++ b/debugger/js/app.js @@ -147,6 +147,20 @@ Vue.component('options', { connection.send("disable_alpha_rects_debug"); } } + setGpuTimeQueries(enabled) { + if (enabled) { + connection.send("enable_gpu_time_queries"); + } else { + connection.send("disable_gpu_time_queries"); + } + } + setGpuSampleQueries(enabled) { + if (enabled) { + connection.send("enable_gpu_sample_queries"); + } else { + connection.send("disable_gpu_sample_queries"); + } + } }, template: `
@@ -174,6 +188,18 @@ Vue.component('options', { Alpha primitive rects debugger
+
+ +
+
+ +
` }) diff --git a/webrender/Cargo.toml b/webrender/Cargo.toml index e5cb224cb3..babf300c39 100644 --- a/webrender/Cargo.toml +++ b/webrender/Cargo.toml @@ -11,7 +11,6 @@ default = ["freetype-lib"] freetype-lib = ["freetype/servo-freetype-sys"] profiler = ["thread_profiler/thread_profiler"] debugger = ["ws", "serde_json", "serde", "serde_derive"] -query = [] [dependencies] app_units = "0.5.6" diff --git a/webrender/examples/common/boilerplate.rs b/webrender/examples/common/boilerplate.rs index 2822c0c8c5..6f7d56f333 100644 --- a/webrender/examples/common/boilerplate.rs +++ b/webrender/examples/common/boilerplate.rs @@ -182,8 +182,7 @@ pub fn main_wrapper(example: &mut Example, options: Option break 'outer, + glutin::Event::KeyboardInput(_, _, Some(glutin::VirtualKeyCode::Escape)) => break 'outer, glutin::Event::KeyboardInput( glutin::ElementState::Pressed, @@ -221,6 +220,13 @@ pub fn main_wrapper(example: &mut Example, options: Option { + renderer.toggle_queries_enabled(); + } glutin::Event::KeyboardInput( glutin::ElementState::Pressed, _, diff --git a/webrender/src/debug_render.rs b/webrender/src/debug_render.rs index d388a0c8e2..b2bdfc1dd1 100644 --- a/webrender/src/debug_render.rs +++ b/webrender/src/debug_render.rs @@ -4,7 +4,7 @@ use api::{ColorU, DeviceIntRect, DeviceUintSize, ImageFormat}; use debug_font_data; -use device::{Device, GpuMarker, Program, Texture, TextureSlot, VertexDescriptor, VAO}; +use device::{Device, Program, Texture, TextureSlot, VertexDescriptor, VAO}; use device::{TextureFilter, TextureTarget, VertexAttribute, VertexAttributeKind, VertexUsageHint}; use euclid::{Point2D, Rect, Size2D, Transform3D}; use internal_types::{ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE}; @@ -105,7 +105,7 @@ pub struct DebugRenderer { } impl DebugRenderer { - pub fn new(device: &mut Device) -> DebugRenderer { + pub fn new(device: &mut Device) -> Self { let font_program = device.create_program("debug_font", "", &DESC_FONT).unwrap(); device.bind_shader_samplers(&font_program, &[("sColor0", DebugSampler::Font)]); @@ -263,7 +263,6 @@ impl DebugRenderer { } pub fn render(&mut self, device: &mut Device, viewport_size: &DeviceUintSize) { - let _gm = GpuMarker::new(device.rc_gl(), "debug"); device.disable_depth(); device.set_blend(true); device.set_blend_mode_premultiplied_alpha(); diff --git a/webrender/src/debug_server.rs b/webrender/src/debug_server.rs index 7924ff5e42..ae895158b2 100644 --- a/webrender/src/debug_server.rs +++ b/webrender/src/debug_server.rs @@ -53,6 +53,10 @@ impl ws::Handler for Server { "disable_render_target_debug" => DebugCommand::EnableRenderTargetDebug(false), "enable_alpha_rects_debug" => DebugCommand::EnableAlphaRectsDebug(true), "disable_alpha_rects_debug" => DebugCommand::EnableAlphaRectsDebug(false), + "enable_gpu_time_queries" => DebugCommand::EnableGpuTimeQueries(true), + "disable_gpu_time_queries" => DebugCommand::EnableGpuTimeQueries(false), + "enable_gpu_sample_queries" => DebugCommand::EnableGpuSampleQueries(true), + "disable_gpu_sample_queries" => DebugCommand::EnableGpuSampleQueries(false), "fetch_passes" => DebugCommand::FetchPasses, "fetch_documents" => DebugCommand::FetchDocuments, "fetch_clipscrolltree" => DebugCommand::FetchClipScrollTree, diff --git a/webrender/src/device.rs b/webrender/src/device.rs index 41a9823b1c..013d7f56b9 100644 --- a/webrender/src/device.rs +++ b/webrender/src/device.rs @@ -485,308 +485,6 @@ pub struct VBOId(gl::GLuint); #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)] struct IBOId(gl::GLuint); -#[cfg(feature = "query")] -const MAX_PROFILE_FRAMES: usize = 4; - -pub trait NamedTag { - fn get_label(&self) -> &str; -} - -#[derive(Debug, Clone)] -pub struct GpuTimer { - pub tag: T, - pub time_ns: u64, -} - -#[derive(Debug, Clone)] -pub struct GpuSampler { - pub tag: T, - pub count: u64, -} - -#[cfg(feature = "query")] -pub struct QuerySet { - set: Vec, - data: Vec, - pending: gl::GLuint, -} - -#[cfg(feature = "query")] -impl QuerySet { - fn new(set: Vec) -> Self { - QuerySet { - set, - data: Vec::new(), - pending: 0, - } - } - - fn reset(&mut self) { - self.data.clear(); - self.pending = 0; - } - - fn add(&mut self, value: T) -> Option { - assert_eq!(self.pending, 0); - self.set.get(self.data.len()).cloned().map(|query_id| { - self.data.push(value); - self.pending = query_id; - query_id - }) - } - - fn take(&mut self, fun: F) -> Vec { - let mut data = mem::replace(&mut self.data, Vec::new()); - for (value, &query) in data.iter_mut().zip(self.set.iter()) { - fun(value, query) - } - data - } -} - -#[cfg(feature = "query")] -pub struct GpuFrameProfile { - gl: Rc, - timers: QuerySet>, - samplers: QuerySet>, - frame_id: FrameId, - inside_frame: bool, -} - -#[cfg(feature = "query")] -impl GpuFrameProfile { - const MAX_TIMERS_PER_FRAME: usize = 256; - // disable samplers on OSX due to driver bugs - #[cfg(target_os = "macos")] - const MAX_SAMPLERS_PER_FRAME: usize = 0; - #[cfg(not(target_os = "macos"))] - const MAX_SAMPLERS_PER_FRAME: usize = 16; - - fn new(gl: Rc) -> Self { - assert_eq!(gl.get_type(), gl::GlType::Gl); - let time_queries = gl.gen_queries(Self::MAX_TIMERS_PER_FRAME as _); - let sample_queries = gl.gen_queries(Self::MAX_SAMPLERS_PER_FRAME as _); - - GpuFrameProfile { - gl, - timers: QuerySet::new(time_queries), - samplers: QuerySet::new(sample_queries), - frame_id: FrameId(0), - inside_frame: false, - } - } - - fn begin_frame(&mut self, frame_id: FrameId) { - self.frame_id = frame_id; - self.timers.reset(); - self.samplers.reset(); - self.inside_frame = true; - } - - fn end_frame(&mut self) { - self.done_marker(); - self.done_sampler(); - self.inside_frame = false; - } - - fn done_marker(&mut self) { - debug_assert!(self.inside_frame); - if self.timers.pending != 0 { - self.gl.end_query(gl::TIME_ELAPSED); - self.timers.pending = 0; - } - } - - fn add_marker(&mut self, tag: T) -> GpuMarker - where - T: NamedTag, - { - self.done_marker(); - - let marker = GpuMarker::new(&self.gl, tag.get_label()); - - if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) { - self.gl.begin_query(gl::TIME_ELAPSED, query); - } - - marker - } - - fn done_sampler(&mut self) { - debug_assert!(self.inside_frame); - if self.samplers.pending != 0 { - self.gl.end_query(gl::SAMPLES_PASSED); - self.samplers.pending = 0; - } - } - - fn add_sampler(&mut self, tag: T) - where - T: NamedTag, - { - self.done_sampler(); - - if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) { - self.gl.begin_query(gl::SAMPLES_PASSED, query); - } - } - - fn is_valid(&self) -> bool { - !self.timers.set.is_empty() || !self.samplers.set.is_empty() - } - - fn build_samples(&mut self) -> (Vec>, Vec>) { - debug_assert!(!self.inside_frame); - let gl = &self.gl; - - ( - self.timers.take(|timer, query| { - timer.time_ns = gl.get_query_object_ui64v(query, gl::QUERY_RESULT) - }), - self.samplers.take(|sampler, query| { - sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT) - }), - ) - } -} - -#[cfg(feature = "query")] -impl Drop for GpuFrameProfile { - fn drop(&mut self) { - if !self.timers.set.is_empty() { - self.gl.delete_queries(&self.timers.set); - } - if !self.samplers.set.is_empty() { - self.gl.delete_queries(&self.samplers.set); - } - } -} - -#[cfg(feature = "query")] -pub struct GpuProfiler { - frames: [GpuFrameProfile; MAX_PROFILE_FRAMES], - next_frame: usize, -} - -#[cfg(feature = "query")] -impl GpuProfiler { - pub fn new(gl: &Rc) -> Self { - GpuProfiler { - next_frame: 0, - frames: [ - GpuFrameProfile::new(Rc::clone(gl)), - GpuFrameProfile::new(Rc::clone(gl)), - GpuFrameProfile::new(Rc::clone(gl)), - GpuFrameProfile::new(Rc::clone(gl)), - ], - } - } - - pub fn build_samples(&mut self) -> Option<(FrameId, Vec>, Vec>)> { - let frame = &mut self.frames[self.next_frame]; - if frame.is_valid() { - let (timers, samplers) = frame.build_samples(); - Some((frame.frame_id, timers, samplers)) - } else { - None - } - } - - pub fn begin_frame(&mut self, frame_id: FrameId) { - let frame = &mut self.frames[self.next_frame]; - frame.begin_frame(frame_id); - } - - pub fn end_frame(&mut self) { - let frame = &mut self.frames[self.next_frame]; - frame.end_frame(); - self.next_frame = (self.next_frame + 1) % MAX_PROFILE_FRAMES; - } - - pub fn add_marker(&mut self, tag: T) -> GpuMarker - where - T: NamedTag, - { - self.frames[self.next_frame].add_marker(tag) - } - - pub fn add_sampler(&mut self, tag: T) - where - T: NamedTag, - { - self.frames[self.next_frame].add_sampler(tag) - } - - pub fn done_sampler(&mut self) { - self.frames[self.next_frame].done_sampler() - } -} - -#[cfg(not(feature = "query"))] -pub struct GpuProfiler(Option); - -#[cfg(not(feature = "query"))] -impl GpuProfiler { - pub fn new(_: &Rc) -> Self { - GpuProfiler(None) - } - - pub fn build_samples(&mut self) -> Option<(FrameId, Vec>, Vec>)> { - None - } - - pub fn begin_frame(&mut self, _: FrameId) {} - - pub fn end_frame(&mut self) {} - - pub fn add_marker(&mut self, _: T) -> GpuMarker { - GpuMarker {} - } - - pub fn add_sampler(&mut self, _: T) {} - - pub fn done_sampler(&mut self) {} -} - - -#[must_use] -pub struct GpuMarker { - #[cfg(feature = "query")] - gl: Rc, -} - -#[cfg(feature = "query")] -impl GpuMarker { - pub fn new(gl: &Rc, message: &str) -> Self { - debug_assert_eq!(gl.get_type(), gl::GlType::Gl); - gl.push_group_marker_ext(message); - GpuMarker { gl: Rc::clone(gl) } - } - - pub fn fire(gl: &gl::Gl, message: &str) { - debug_assert_eq!(gl.get_type(), gl::GlType::Gl); - gl.insert_event_marker_ext(message); - } -} - -#[cfg(feature = "query")] -impl Drop for GpuMarker { - fn drop(&mut self) { - self.gl.pop_group_marker_ext(); - } -} - -#[cfg(not(feature = "query"))] -impl GpuMarker { - #[inline] - pub fn new(_: &Rc, _: &str) -> Self { - GpuMarker{} - } - #[inline] - pub fn fire(_: &gl::Gl, _: &str) {} -} - - #[derive(Debug, Copy, Clone)] pub enum VertexUsageHint { Static, diff --git a/webrender/src/lib.rs b/webrender/src/lib.rs index f0cae58b65..f3ad6dacdf 100644 --- a/webrender/src/lib.rs +++ b/webrender/src/lib.rs @@ -76,6 +76,7 @@ mod picture; mod prim_store; mod print_tree; mod profiler; +mod query; mod record; mod render_backend; mod render_task; diff --git a/webrender/src/profiler.rs b/webrender/src/profiler.rs index d99d45aca4..0befc36f37 100644 --- a/webrender/src/profiler.rs +++ b/webrender/src/profiler.rs @@ -4,11 +4,10 @@ use api::{ColorF, ColorU}; use debug_render::DebugRenderer; -use device::{Device, GpuMarker, GpuSampler, GpuTimer, NamedTag}; use euclid::{Point2D, Rect, Size2D, vec2}; +use query::{GpuSampler, GpuTimer, NamedTag}; use std::collections::vec_deque::VecDeque; -use std::f32; -use std::mem; +use std::{f32, mem}; use time::precise_time_ns; const GRAPH_WIDTH: f32 = 1024.0; @@ -794,7 +793,6 @@ impl Profiler { pub fn draw_profile( &mut self, - device: &mut Device, frame_profile: &FrameProfileCounters, backend_profile: &BackendProfileCounters, renderer_profile: &RendererProfileCounters, @@ -803,15 +801,14 @@ impl Profiler { screen_fraction: f32, debug_renderer: &mut DebugRenderer, ) { - let _gm = GpuMarker::new(device.rc_gl(), "profile"); self.x_left = 20.0; self.y_left = 40.0; self.x_right = 400.0; self.y_right = 40.0; let mut gpu_time = 0; - let gpu_samples = mem::replace(&mut renderer_timers.gpu_samples, Vec::new()); - for sample in &gpu_samples { + let gpu_timers = mem::replace(&mut renderer_timers.gpu_samples, Vec::new()); + for sample in &gpu_timers { gpu_time += sample.time_ns; } renderer_timers.gpu_time.set(gpu_time); @@ -882,22 +879,24 @@ impl Profiler { false, ); - let mut samplers = Vec::::new(); - // Gathering unique GPU samplers. This has O(N^2) complexity, - // but we only have a few samplers per target. - for sampler in gpu_samplers { - let value = sampler.count as f32 * screen_fraction; - match samplers.iter().position(|s| { - s.description as *const _ == sampler.tag.label as *const _ - }) { - Some(pos) => samplers[pos].value += value, - None => samplers.push(FloatProfileCounter { - description: sampler.tag.label, - value, - }), + if !gpu_samplers.is_empty() { + let mut samplers = Vec::::new(); + // Gathering unique GPU samplers. This has O(N^2) complexity, + // but we only have a few samplers per target. + for sampler in gpu_samplers { + let value = sampler.count as f32 * screen_fraction; + match samplers.iter().position(|s| { + s.description as *const _ == sampler.tag.label as *const _ + }) { + Some(pos) => samplers[pos].value += value, + None => samplers.push(FloatProfileCounter { + description: sampler.tag.label, + value, + }), + } } + self.draw_counters(&samplers, debug_renderer, false); } - self.draw_counters(&samplers, debug_renderer, false); self.backend_time .push(backend_profile.total_time.nanoseconds); @@ -906,7 +905,7 @@ impl Profiler { self.ipc_time .push(backend_profile.ipc.total_time.nanoseconds); self.gpu_time.push(gpu_time); - self.gpu_frames.push(gpu_time, gpu_samples); + self.gpu_frames.push(gpu_time, gpu_timers); let rect = diff --git a/webrender/src/query.rs b/webrender/src/query.rs new file mode 100644 index 0000000000..402559ca68 --- /dev/null +++ b/webrender/src/query.rs @@ -0,0 +1,321 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use gleam::gl; +use std::mem; +use std::rc::Rc; + +use device::FrameId; + + +pub trait NamedTag { + fn get_label(&self) -> &str; +} + +#[derive(Debug, Clone)] +pub struct GpuTimer { + pub tag: T, + pub time_ns: u64, +} + +#[derive(Debug, Clone)] +pub struct GpuSampler { + pub tag: T, + pub count: u64, +} + +pub struct QuerySet { + set: Vec, + data: Vec, + pending: gl::GLuint, +} + +impl QuerySet { + fn new() -> Self { + QuerySet { + set: Vec::new(), + data: Vec::new(), + pending: 0, + } + } + + fn reset(&mut self) { + self.data.clear(); + self.pending = 0; + } + + fn add(&mut self, value: T) -> Option { + assert_eq!(self.pending, 0); + self.set.get(self.data.len()).cloned().map(|query_id| { + self.data.push(value); + self.pending = query_id; + query_id + }) + } + + fn take(&mut self, fun: F) -> Vec { + let mut data = mem::replace(&mut self.data, Vec::new()); + for (value, &query) in data.iter_mut().zip(self.set.iter()) { + fun(value, query) + } + data + } +} + +pub struct GpuFrameProfile { + gl: Rc, + timers: QuerySet>, + samplers: QuerySet>, + frame_id: FrameId, + inside_frame: bool, +} + +impl GpuFrameProfile { + fn new(gl: Rc) -> Self { + assert_eq!(gl.get_type(), gl::GlType::Gl); + + GpuFrameProfile { + gl, + timers: QuerySet::new(), + samplers: QuerySet::new(), + frame_id: FrameId::new(0), + inside_frame: false, + } + } + + fn enable_timers(&mut self, count: i32) { + self.timers.set = self.gl.gen_queries(count); + } + + fn disable_timers(&mut self) { + if !self.timers.set.is_empty() { + self.gl.delete_queries(&self.timers.set); + } + self.timers.set = Vec::new(); + } + + fn enable_samplers(&mut self, count: i32) { + self.samplers.set = self.gl.gen_queries(count); + } + + fn disable_samplers(&mut self) { + if !self.samplers.set.is_empty() { + self.gl.delete_queries(&self.samplers.set); + } + self.samplers.set = Vec::new(); + } + + fn begin_frame(&mut self, frame_id: FrameId) { + self.frame_id = frame_id; + self.timers.reset(); + self.samplers.reset(); + self.inside_frame = true; + } + + fn end_frame(&mut self) { + self.finish_timer(); + self.finish_sampler(); + self.inside_frame = false; + } + + fn finish_timer(&mut self) { + debug_assert!(self.inside_frame); + if self.timers.pending != 0 { + self.gl.end_query(gl::TIME_ELAPSED); + self.timers.pending = 0; + } + } + + fn finish_sampler(&mut self) { + debug_assert!(self.inside_frame); + if self.samplers.pending != 0 { + self.gl.end_query(gl::SAMPLES_PASSED); + self.samplers.pending = 0; + } + } +} + +impl GpuFrameProfile { + fn start_timer(&mut self, tag: T) -> GpuTimeQuery { + self.finish_timer(); + + let marker = GpuMarker::new(&self.gl, tag.get_label()); + + if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) { + self.gl.begin_query(gl::TIME_ELAPSED, query); + } + + GpuTimeQuery(marker) + } + + fn start_sampler(&mut self, tag: T) -> GpuSampleQuery { + self.finish_sampler(); + + if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) { + self.gl.begin_query(gl::SAMPLES_PASSED, query); + } + + GpuSampleQuery + } + + fn build_samples(&mut self) -> (FrameId, Vec>, Vec>) { + debug_assert!(!self.inside_frame); + let gl = &self.gl; + + ( + self.frame_id, + self.timers.take(|timer, query| { + timer.time_ns = gl.get_query_object_ui64v(query, gl::QUERY_RESULT) + }), + self.samplers.take(|sampler, query| { + sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT) + }), + ) + } +} + +impl Drop for GpuFrameProfile { + fn drop(&mut self) { + self.disable_timers(); + self.disable_samplers(); + } +} + +pub struct GpuProfiler { + gl: Rc, + frames: Vec>, + next_frame: usize, +} + +impl GpuProfiler { + pub fn new(gl: Rc) -> Self { + const MAX_PROFILE_FRAMES: usize = 4; + let frames = (0 .. MAX_PROFILE_FRAMES) + .map(|_| GpuFrameProfile::new(Rc::clone(&gl))) + .collect(); + + GpuProfiler { + gl, + next_frame: 0, + frames, + } + } + + pub fn enable_timers(&mut self) { + const MAX_TIMERS_PER_FRAME: i32 = 256; + + for frame in &mut self.frames { + frame.enable_timers(MAX_TIMERS_PER_FRAME); + } + } + + pub fn disable_timers(&mut self) { + for frame in &mut self.frames { + frame.disable_timers(); + } + } + + pub fn toggle_timers_enabled(&mut self) { + if self.frames[0].timers.set.is_empty() { + self.enable_timers(); + } else { + self.disable_timers(); + } + } + + pub fn enable_samplers(&mut self) { + const MAX_SAMPLERS_PER_FRAME: i32 = 16; + if cfg!(target_os = "macos") { + warn!("Expect OSX driver bugs related to sample queries") + } + + for frame in &mut self.frames { + frame.enable_samplers(MAX_SAMPLERS_PER_FRAME); + } + } + + pub fn disable_samplers(&mut self) { + for frame in &mut self.frames { + frame.disable_samplers(); + } + } + + pub fn toggle_samplers_enabled(&mut self) { + if self.frames[0].samplers.set.is_empty() { + self.enable_samplers(); + } else { + self.disable_samplers(); + } + } +} + +impl GpuProfiler { + pub fn build_samples(&mut self) -> (FrameId, Vec>, Vec>) { + self.frames[self.next_frame].build_samples() + } + + pub fn begin_frame(&mut self, frame_id: FrameId) { + self.frames[self.next_frame].begin_frame(frame_id); + } + + pub fn end_frame(&mut self) { + self.frames[self.next_frame].end_frame(); + self.next_frame = (self.next_frame + 1) % self.frames.len(); + } + + pub fn start_timer(&mut self, tag: T) -> GpuTimeQuery { + self.frames[self.next_frame].start_timer(tag) + } + + pub fn start_sampler(&mut self, tag: T) -> GpuSampleQuery { + self.frames[self.next_frame].start_sampler(tag) + } + + pub fn finish_sampler(&mut self, _sampler: GpuSampleQuery) { + self.frames[self.next_frame].finish_sampler() + } + + pub fn start_marker(&mut self, label: &str) -> GpuMarker { + GpuMarker::new(&self.gl, label) + } + + pub fn place_marker(&mut self, label: &str) { + GpuMarker::fire(&self.gl, label) + } +} + +#[must_use] +pub struct GpuMarker { + gl: Option>, +} + +impl GpuMarker { + fn new(gl: &Rc, message: &str) -> Self { + if gl.get_type() == gl::GlType::Gl { + gl.push_group_marker_ext(message); + GpuMarker { gl: Some(Rc::clone(gl)) } + } else { + GpuMarker { gl: None } + } + } + + fn fire(gl: &Rc, message: &str) { + if gl.get_type() == gl::GlType::Gl { + gl.insert_event_marker_ext(message); + } + } +} + +impl Drop for GpuMarker { + fn drop(&mut self) { + if let Some(ref gl) = self.gl { + gl.pop_group_marker_ext(); + } + } +} + +#[must_use] +pub struct GpuTimeQuery(GpuMarker); +#[must_use] +pub struct GpuSampleQuery; diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs index f33fd1dd14..199e867a36 100644 --- a/webrender/src/renderer.rs +++ b/webrender/src/renderer.rs @@ -24,11 +24,11 @@ use debug_colors; use debug_render::DebugRenderer; #[cfg(feature = "debugger")] use debug_server::{self, DebugServer}; -use device::{DepthFunction, Device, FrameId, GpuMarker, GpuProfiler, Program, Texture, +use device::{DepthFunction, Device, FrameId, Program, Texture, VertexDescriptor, PBO}; use device::{get_gl_format_bgra, ExternalTexture, FBOId, TextureSlot, VertexAttribute, VertexAttributeKind}; -use device::{FileWatcherHandler, GpuTimer, ShaderError, TextureFilter, TextureTarget, +use device::{FileWatcherHandler, ShaderError, TextureFilter, TextureTarget, VertexUsageHint, VAO}; use euclid::{rect, Transform3D}; use frame_builder::FrameBuilderConfig; @@ -41,6 +41,7 @@ use internal_types::{CacheTextureId, FastHashMap, RendererFrame, ResultMsg, Text use internal_types::{DebugOutput, RenderTargetMode, TextureUpdateList, TextureUpdateSource}; use profiler::{BackendProfileCounters, Profiler}; use profiler::{GpuProfileTag, RendererProfileCounters, RendererProfileTimers}; +use query::{GpuProfiler, GpuTimer}; use rayon::Configuration as ThreadPoolConfig; use rayon::ThreadPool; use record::ApiRecordingReceiver; @@ -1838,8 +1839,7 @@ impl Renderer { }; let gpu_cache_texture = CacheTexture::new(&mut device); - - let gpu_profile = GpuProfiler::new(device.rc_gl()); + let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl())); let renderer = Renderer { result_rx, @@ -2153,6 +2153,16 @@ impl Renderer { } else { self.debug_flags.remove(DebugFlags::ALPHA_PRIM_DBG); }, + DebugCommand::EnableGpuTimeQueries(enable) => if enable { + self.gpu_profile.enable_timers(); + } else { + self.gpu_profile.disable_timers(); + }, + DebugCommand::EnableGpuSampleQueries(enable) => if enable { + self.gpu_profile.enable_samplers(); + } else { + self.gpu_profile.disable_samplers(); + }, DebugCommand::FetchDocuments => {} DebugCommand::FetchClipScrollTree => {} DebugCommand::FetchPasses => { @@ -2162,6 +2172,11 @@ impl Renderer { } } + pub fn toggle_queries_enabled(&mut self) { + self.gpu_profile.toggle_timers_enabled(); + self.gpu_profile.toggle_samplers_enabled(); + } + /// Set a callback for handling external images. pub fn set_external_image_handler(&mut self, handler: Box) { self.external_image_handler = Some(handler); @@ -2189,30 +2204,26 @@ impl Renderer { if let Some(mut frame) = self.current_frame.take() { if let Some(ref mut frame) = frame.frame { let mut profile_timers = RendererProfileTimers::new(); - let mut profile_samplers = Vec::new(); - - { - //Note: avoiding `self.gpu_profile.add_marker` - it would block here - let _gm = GpuMarker::new(self.device.rc_gl(), "build samples"); + let profile_samplers = { + let _gm = self.gpu_profile.start_marker("build samples"); // Block CPU waiting for last frame's GPU profiles to arrive. // In general this shouldn't block unless heavily GPU limited. - if let Some((gpu_frame_id, timers, samplers)) = self.gpu_profile.build_samples() - { - if self.max_recorded_profiles > 0 { - while self.gpu_profiles.len() >= self.max_recorded_profiles { - self.gpu_profiles.pop_front(); - } - self.gpu_profiles - .push_back(GpuProfile::new(gpu_frame_id, &timers)); + let (gpu_frame_id, timers, samplers) = self.gpu_profile.build_samples(); + + if self.max_recorded_profiles > 0 { + while self.gpu_profiles.len() >= self.max_recorded_profiles { + self.gpu_profiles.pop_front(); } - profile_timers.gpu_samples = timers; - profile_samplers = samplers; + self.gpu_profiles + .push_back(GpuProfile::new(gpu_frame_id, &timers)); } - } + profile_timers.gpu_samples = timers; + samplers + }; let cpu_frame_id = profile_timers.cpu_time.profile(|| { let cpu_frame_id = { - let _gm = GpuMarker::new(self.device.rc_gl(), "begin frame"); + let _gm = self.gpu_profile.start_marker("begin frame"); let frame_id = self.device.begin_frame(frame.device_pixel_ratio); self.gpu_profile.begin_frame(frame_id); @@ -2257,10 +2268,10 @@ impl Renderer { } if self.debug_flags.contains(DebugFlags::PROFILER_DBG) { + let _gm = self.gpu_profile.start_marker("profile"); let screen_fraction = 1.0 / //TODO: take device/pixel ratio into equation? (framebuffer_size.width as f32 * framebuffer_size.height as f32); self.profiler.draw_profile( - &mut self.device, &frame.profile_counters, &self.backend_profile_counters, &self.profile_counters, @@ -2274,13 +2285,16 @@ impl Renderer { self.profile_counters.reset(); self.profile_counters.frame_counter.inc(); - let debug_size = DeviceUintSize::new( - framebuffer_size.width as u32, - framebuffer_size.height as u32, - ); - self.debug.render(&mut self.device, &debug_size); { - let _gm = GpuMarker::new(self.device.rc_gl(), "end frame"); + let _gm = self.gpu_profile.start_marker("debug"); + let debug_size = DeviceUintSize::new( + framebuffer_size.width as u32, + framebuffer_size.height as u32, + ); + self.debug.render(&mut self.device, &debug_size); + } + { + let _gm = self.gpu_profile.start_marker("end frame"); self.device.end_frame(); } self.last_time = current_time; @@ -2304,7 +2318,7 @@ impl Renderer { } fn update_gpu_cache(&mut self, frame: &mut Frame) { - let _gm = GpuMarker::new(self.device.rc_gl(), "gpu cache update"); + let _gm = self.gpu_profile.start_marker("gpu cache update"); for update_list in self.pending_gpu_cache_updates.drain(..) { self.gpu_cache_texture .update(&mut self.device, &update_list); @@ -2314,7 +2328,7 @@ impl Renderer { } fn update_texture_cache(&mut self) { - let _gm = GpuMarker::new(self.device.rc_gl(), "texture cache update"); + let _gm = self.gpu_profile.start_marker("texture cache update"); let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]); for update_list in pending_texture_updates.drain(..) { @@ -2711,7 +2725,7 @@ impl Renderer { _ => {} } - let _gm = self.gpu_profile.add_marker(marker); + let _timer = self.gpu_profile.start_timer(marker); self.draw_instanced_batch(instances, VertexArrayKind::Primitive, &key.textures); } @@ -2750,7 +2764,7 @@ impl Renderer { frame_id: FrameId, ) { { - let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET); + let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET); self.device .bind_draw_target(render_target, Some(target_size)); self.device.disable_depth(); @@ -2781,7 +2795,7 @@ impl Renderer { // fast path blur shaders for common // blur radii with fixed weights. if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() { - let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR); + let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR); self.device.set_blend(false); self.cs_blur_rgba8 @@ -2816,7 +2830,7 @@ impl Renderer { self.device.set_blend(true); self.device.set_blend_mode_premultiplied_alpha(); - let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_TEXT_RUN); + let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_TEXT_RUN); self.cs_text_run .bind(&mut self.device, projection, 0, &mut self.renderer_errors); for (texture_id, instances) in &target.text_run_cache_prims { @@ -2833,7 +2847,7 @@ impl Renderer { self.device.set_blend(true); self.device.set_blend_mode_premultiplied_alpha(); - let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_LINE); + let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_LINE); self.cs_line .bind(&mut self.device, projection, 0, &mut self.renderer_errors); self.draw_instanced_batch( @@ -2846,11 +2860,11 @@ impl Renderer { //TODO: record the pixel count for cached primitives if !target.alpha_batcher.is_empty() { - let _gm2 = GpuMarker::new(self.device.rc_gl(), "alpha batches"); + let _gl = self.gpu_profile.start_marker("alpha batches"); self.device.set_blend(false); let mut prev_blend_mode = BlendMode::None; - self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_OPAQUE); + let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE); //Note: depth equality is needed for split planes self.device.set_depth_func(DepthFunction::LessEqual); @@ -2878,7 +2892,8 @@ impl Renderer { } self.device.disable_depth_write(); - self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_TRANSPARENT); + self.gpu_profile.finish_sampler(opaque_sampler); + let transparent_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT); for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches { if self.debug_flags.contains(DebugFlags::ALPHA_PRIM_DBG) { @@ -2904,7 +2919,7 @@ impl Renderer { // 1) Use dual source blending where available (almost all recent hardware). // 2) Use frame buffer fetch where available (most modern hardware). // 3) Consider the old constant color blend method where no clip is applied. - let _gm = self.gpu_profile.add_marker(GPU_TAG_PRIM_TEXT_RUN); + let _timer = self.gpu_profile.start_timer(GPU_TAG_PRIM_TEXT_RUN); self.device.set_blend(true); @@ -3078,7 +3093,7 @@ impl Renderer { self.device.disable_depth(); self.device.set_blend(false); - self.gpu_profile.done_sampler(); + self.gpu_profile.finish_sampler(transparent_sampler); } // For any registered image outputs on this render target, @@ -3123,10 +3138,10 @@ impl Renderer { projection: &Transform3D, render_tasks: &RenderTaskTree, ) { - self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_ALPHA); + let alpha_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_ALPHA); { - let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET); + let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET); self.device .bind_draw_target(Some(render_target), Some(target_size)); self.device.disable_depth(); @@ -3157,7 +3172,7 @@ impl Renderer { // fast path blur shaders for common // blur radii with fixed weights. if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() { - let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR); + let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR); self.device.set_blend(false); self.cs_blur_a8 @@ -3185,7 +3200,7 @@ impl Renderer { if !target.brush_mask_corners.is_empty() { self.device.set_blend(false); - let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK); + let _timer = self.gpu_profile.start_timer(GPU_TAG_BRUSH_MASK); self.brush_mask_corner .bind(&mut self.device, projection, 0, &mut self.renderer_errors); self.draw_instanced_batch( @@ -3198,7 +3213,7 @@ impl Renderer { if !target.brush_mask_rounded_rects.is_empty() { self.device.set_blend(false); - let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK); + let _timer = self.gpu_profile.start_timer(GPU_TAG_BRUSH_MASK); self.brush_mask_rounded_rect .bind(&mut self.device, projection, 0, &mut self.renderer_errors); self.draw_instanced_batch( @@ -3210,13 +3225,13 @@ impl Renderer { // Draw the clip items into the tiled alpha mask. { - let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_CLIP); + let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_CLIP); // If we have border corner clips, the first step is to clear out the // area in the clip mask. This allows drawing multiple invididual clip // in regions below. if !target.clip_batcher.border_clears.is_empty() { - let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip borders [clear]"); + let _gm = self.gpu_profile.start_marker("clip borders [clear]"); self.device.set_blend(false); self.cs_clip_border .bind(&mut self.device, projection, 0, &mut self.renderer_errors); @@ -3229,7 +3244,7 @@ impl Renderer { // Draw any dots or dashes for border corners. if !target.clip_batcher.borders.is_empty() { - let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip borders"); + let _gm = self.gpu_profile.start_marker("clip borders"); // We are masking in parts of the corner (dots or dashes) here. // Blend mode is set to max to allow drawing multiple dots. // The individual dots and dashes in a border never overlap, so using @@ -3251,7 +3266,7 @@ impl Renderer { // draw rounded cornered rectangles if !target.clip_batcher.rectangles.is_empty() { - let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip rectangles"); + let _gm = self.gpu_profile.start_marker("clip rectangles"); self.cs_clip_rectangle.bind( &mut self.device, projection, @@ -3266,7 +3281,7 @@ impl Renderer { } // draw image masks for (mask_texture_id, items) in target.clip_batcher.images.iter() { - let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip images"); + let _gm = self.gpu_profile.start_marker("clip images"); let textures = BatchTextures { colors: [ mask_texture_id.clone(), @@ -3280,7 +3295,7 @@ impl Renderer { } } - self.gpu_profile.done_sampler(); + self.gpu_profile.finish_sampler(alpha_sampler); } fn update_deferred_resolves(&mut self, frame: &mut Frame) { @@ -3294,7 +3309,7 @@ impl Renderer { .expect("Found external image, but no handler set!"); for deferred_resolve in &frame.deferred_resolves { - GpuMarker::fire(self.device.gl(), "deferred resolve"); + self.gpu_profile.place_marker("deferred resolve"); let props = &deferred_resolve.image_properties; let ext_image = props .external_image @@ -3365,7 +3380,7 @@ impl Renderer { } fn start_frame(&mut self, frame: &mut Frame) { - let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_DATA); + let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_DATA); // Assign render targets to the passes. for pass in &mut frame.passes { @@ -3447,7 +3462,7 @@ impl Renderer { framebuffer_size: DeviceUintSize, frame_id: FrameId, ) { - let _gm = GpuMarker::new(self.device.rc_gl(), "tile frame draw"); + let _gm = self.gpu_profile.start_marker("tile frame draw"); // Some tests use a restricted viewport smaller than the main screen size. // Ensure we clear the framebuffer in these tests. diff --git a/webrender_api/src/api.rs b/webrender_api/src/api.rs index 45d1411037..2ba990106e 100644 --- a/webrender_api/src/api.rs +++ b/webrender_api/src/api.rs @@ -243,19 +243,23 @@ impl fmt::Debug for DocumentMsg { #[derive(Debug, Clone, Deserialize, Serialize)] pub enum DebugCommand { - // Display the frame profiler on screen. + /// Display the frame profiler on screen. EnableProfiler(bool), - // Display all texture cache pages on screen. + /// Display all texture cache pages on screen. EnableTextureCacheDebug(bool), - // Display intermediate render targets on screen. + /// Display intermediate render targets on screen. EnableRenderTargetDebug(bool), - // Display alpha primitive rects. + /// Display alpha primitive rects. EnableAlphaRectsDebug(bool), - // Fetch current documents and display lists. + /// Display GPU timing results. + EnableGpuTimeQueries(bool), + /// Display GPU overdraw results + EnableGpuSampleQueries(bool), + /// Fetch current documents and display lists. FetchDocuments, - // Fetch current passes and batches. + /// Fetch current passes and batches. FetchPasses, - // Fetch clip-scroll tree. + /// Fetch clip-scroll tree. FetchClipScrollTree, } diff --git a/wrench/Cargo.toml b/wrench/Cargo.toml index 3b48f9ce8c..be3e892413 100644 --- a/wrench/Cargo.toml +++ b/wrench/Cargo.toml @@ -24,7 +24,7 @@ time = "0.1" crossbeam = "0.2" osmesa-sys = { version = "0.1.2", optional = true } osmesa-src = { git = "https://github.com/servo/osmesa-src", optional = true } -webrender = {path = "../webrender", features=["debugger","query"]} +webrender = {path = "../webrender", features=["debugger"]} serde = {version = "1.0", features = ["derive"] } [features] diff --git a/wrench/src/main.rs b/wrench/src/main.rs index 4f12280cb6..a237abb99f 100644 --- a/wrench/src/main.rs +++ b/wrench/src/main.rs @@ -577,7 +577,7 @@ fn main() { } glutin::Event::KeyboardInput(ElementState::Pressed, _scan_code, Some(vk)) => match vk { - VirtualKeyCode::Escape | VirtualKeyCode::Q => { + VirtualKeyCode::Escape => { break 'outer; } VirtualKeyCode::P => { @@ -600,6 +600,9 @@ fn main() { flags.toggle(webrender::DebugFlags::ALPHA_PRIM_DBG); wrench.renderer.set_debug_flags(flags); } + VirtualKeyCode::Q => { + wrench.renderer.toggle_queries_enabled(); + } VirtualKeyCode::M => { wrench.api.notify_memory_pressure(); } diff --git a/wrench/src/wrench.rs b/wrench/src/wrench.rs index d9848ead1b..6d4d72d589 100644 --- a/wrench/src/wrench.rs +++ b/wrench/src/wrench.rs @@ -478,13 +478,14 @@ impl Wrench { pub fn show_onscreen_help(&mut self) { let help_lines = [ - "Esc, Q - Quit", + "Esc - Quit", "H - Toggle help", "R - Toggle recreating display items each frame", "P - Toggle profiler", "O - Toggle showing intermediate targets", "I - Toggle showing texture caches", "B - Toggle showing alpha primitive rects", + "Q - Toggle GPU queries for time and samples", "M - Trigger memory pressure event", ];