diff --git a/.taskcluster.yml b/.taskcluster.yml
index 91bf037242..a8f71f5a9f 100644
--- a/.taskcluster.yml
+++ b/.taskcluster.yml
@@ -98,7 +98,7 @@ tasks:
servo-tidy &&
(cd webrender_api && cargo test --verbose --features "ipc") &&
(cd webrender && cargo build --verbose --no-default-features) &&
- (cd webrender && cargo build --verbose --features profiler,query) &&
+ (cd webrender && cargo build --verbose --features profiler) &&
(cargo test --all --verbose)
routes:
- "index.garbage.webrender.ci.{{event.head.user.login}}.{{event.head.repo.branch}}.linux-debug"
@@ -171,7 +171,7 @@ tasks:
export PKG_CONFIG_PATH="/usr/local/opt/zlib/lib/pkgconfig:$PKG_CONFIG_PATH" &&
(cd webrender_api && cargo test --verbose --features "ipc") &&
(cd webrender && cargo build --verbose --no-default-features) &&
- (cd webrender && cargo build --verbose --features profiler,query) &&
+ (cd webrender && cargo build --verbose --features profiler) &&
(cargo test --all --verbose)
routes:
- "index.garbage.webrender.ci.{{event.head.user.login}}.{{event.head.repo.branch}}.osx-debug"
diff --git a/.travis.yml b/.travis.yml
index 3287a6c7fb..fc25713132 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,7 +37,7 @@ script:
- servo-tidy
- if [ $BUILD_KIND = DEBUG ]; then (cd webrender_api && cargo test --verbose --features "ipc"); fi
- if [ $BUILD_KIND = DEBUG ]; then (cd webrender && cargo build --verbose --no-default-features); fi
- - if [ $BUILD_KIND = DEBUG ]; then (cd webrender && cargo build --verbose --features profiler,query); fi
+ - if [ $BUILD_KIND = DEBUG ]; then (cd webrender && cargo build --verbose --features profiler); fi
- if [ $BUILD_KIND = DEBUG ]; then (cargo test --all --verbose); fi
- if [ $BUILD_KIND = RELEASE ]; then (cd wrench && python headless.py reftest); fi
- if [ $BUILD_KIND = RELEASE ]; then (cd wrench && cargo build --release); fi
diff --git a/debugger/js/app.js b/debugger/js/app.js
index ebfed730ab..5ca2028d11 100644
--- a/debugger/js/app.js
+++ b/debugger/js/app.js
@@ -147,6 +147,20 @@ Vue.component('options', {
connection.send("disable_alpha_rects_debug");
}
}
+ setGpuTimeQueries(enabled) {
+ if (enabled) {
+ connection.send("enable_gpu_time_queries");
+ } else {
+ connection.send("disable_gpu_time_queries");
+ }
+ }
+ setGpuSampleQueries(enabled) {
+ if (enabled) {
+ connection.send("enable_gpu_sample_queries");
+ } else {
+ connection.send("disable_gpu_sample_queries");
+ }
+ }
},
template: `
@@ -174,6 +188,18 @@ Vue.component('options', {
Alpha primitive rects debugger
+
+
+
+
+
+
`
})
diff --git a/webrender/Cargo.toml b/webrender/Cargo.toml
index e5cb224cb3..babf300c39 100644
--- a/webrender/Cargo.toml
+++ b/webrender/Cargo.toml
@@ -11,7 +11,6 @@ default = ["freetype-lib"]
freetype-lib = ["freetype/servo-freetype-sys"]
profiler = ["thread_profiler/thread_profiler"]
debugger = ["ws", "serde_json", "serde", "serde_derive"]
-query = []
[dependencies]
app_units = "0.5.6"
diff --git a/webrender/examples/common/boilerplate.rs b/webrender/examples/common/boilerplate.rs
index 2822c0c8c5..6f7d56f333 100644
--- a/webrender/examples/common/boilerplate.rs
+++ b/webrender/examples/common/boilerplate.rs
@@ -182,8 +182,7 @@ pub fn main_wrapper(example: &mut Example, options: Option break 'outer,
+ glutin::Event::KeyboardInput(_, _, Some(glutin::VirtualKeyCode::Escape)) => break 'outer,
glutin::Event::KeyboardInput(
glutin::ElementState::Pressed,
@@ -221,6 +220,13 @@ pub fn main_wrapper(example: &mut Example, options: Option {
+ renderer.toggle_queries_enabled();
+ }
glutin::Event::KeyboardInput(
glutin::ElementState::Pressed,
_,
diff --git a/webrender/src/debug_render.rs b/webrender/src/debug_render.rs
index d388a0c8e2..b2bdfc1dd1 100644
--- a/webrender/src/debug_render.rs
+++ b/webrender/src/debug_render.rs
@@ -4,7 +4,7 @@
use api::{ColorU, DeviceIntRect, DeviceUintSize, ImageFormat};
use debug_font_data;
-use device::{Device, GpuMarker, Program, Texture, TextureSlot, VertexDescriptor, VAO};
+use device::{Device, Program, Texture, TextureSlot, VertexDescriptor, VAO};
use device::{TextureFilter, TextureTarget, VertexAttribute, VertexAttributeKind, VertexUsageHint};
use euclid::{Point2D, Rect, Size2D, Transform3D};
use internal_types::{ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
@@ -105,7 +105,7 @@ pub struct DebugRenderer {
}
impl DebugRenderer {
- pub fn new(device: &mut Device) -> DebugRenderer {
+ pub fn new(device: &mut Device) -> Self {
let font_program = device.create_program("debug_font", "", &DESC_FONT).unwrap();
device.bind_shader_samplers(&font_program, &[("sColor0", DebugSampler::Font)]);
@@ -263,7 +263,6 @@ impl DebugRenderer {
}
pub fn render(&mut self, device: &mut Device, viewport_size: &DeviceUintSize) {
- let _gm = GpuMarker::new(device.rc_gl(), "debug");
device.disable_depth();
device.set_blend(true);
device.set_blend_mode_premultiplied_alpha();
diff --git a/webrender/src/debug_server.rs b/webrender/src/debug_server.rs
index 7924ff5e42..ae895158b2 100644
--- a/webrender/src/debug_server.rs
+++ b/webrender/src/debug_server.rs
@@ -53,6 +53,10 @@ impl ws::Handler for Server {
"disable_render_target_debug" => DebugCommand::EnableRenderTargetDebug(false),
"enable_alpha_rects_debug" => DebugCommand::EnableAlphaRectsDebug(true),
"disable_alpha_rects_debug" => DebugCommand::EnableAlphaRectsDebug(false),
+ "enable_gpu_time_queries" => DebugCommand::EnableGpuTimeQueries(true),
+ "disable_gpu_time_queries" => DebugCommand::EnableGpuTimeQueries(false),
+ "enable_gpu_sample_queries" => DebugCommand::EnableGpuSampleQueries(true),
+ "disable_gpu_sample_queries" => DebugCommand::EnableGpuSampleQueries(false),
"fetch_passes" => DebugCommand::FetchPasses,
"fetch_documents" => DebugCommand::FetchDocuments,
"fetch_clipscrolltree" => DebugCommand::FetchClipScrollTree,
diff --git a/webrender/src/device.rs b/webrender/src/device.rs
index 41a9823b1c..013d7f56b9 100644
--- a/webrender/src/device.rs
+++ b/webrender/src/device.rs
@@ -485,308 +485,6 @@ pub struct VBOId(gl::GLuint);
#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
struct IBOId(gl::GLuint);
-#[cfg(feature = "query")]
-const MAX_PROFILE_FRAMES: usize = 4;
-
-pub trait NamedTag {
- fn get_label(&self) -> &str;
-}
-
-#[derive(Debug, Clone)]
-pub struct GpuTimer {
- pub tag: T,
- pub time_ns: u64,
-}
-
-#[derive(Debug, Clone)]
-pub struct GpuSampler {
- pub tag: T,
- pub count: u64,
-}
-
-#[cfg(feature = "query")]
-pub struct QuerySet {
- set: Vec,
- data: Vec,
- pending: gl::GLuint,
-}
-
-#[cfg(feature = "query")]
-impl QuerySet {
- fn new(set: Vec) -> Self {
- QuerySet {
- set,
- data: Vec::new(),
- pending: 0,
- }
- }
-
- fn reset(&mut self) {
- self.data.clear();
- self.pending = 0;
- }
-
- fn add(&mut self, value: T) -> Option {
- assert_eq!(self.pending, 0);
- self.set.get(self.data.len()).cloned().map(|query_id| {
- self.data.push(value);
- self.pending = query_id;
- query_id
- })
- }
-
- fn take(&mut self, fun: F) -> Vec {
- let mut data = mem::replace(&mut self.data, Vec::new());
- for (value, &query) in data.iter_mut().zip(self.set.iter()) {
- fun(value, query)
- }
- data
- }
-}
-
-#[cfg(feature = "query")]
-pub struct GpuFrameProfile {
- gl: Rc,
- timers: QuerySet>,
- samplers: QuerySet>,
- frame_id: FrameId,
- inside_frame: bool,
-}
-
-#[cfg(feature = "query")]
-impl GpuFrameProfile {
- const MAX_TIMERS_PER_FRAME: usize = 256;
- // disable samplers on OSX due to driver bugs
- #[cfg(target_os = "macos")]
- const MAX_SAMPLERS_PER_FRAME: usize = 0;
- #[cfg(not(target_os = "macos"))]
- const MAX_SAMPLERS_PER_FRAME: usize = 16;
-
- fn new(gl: Rc) -> Self {
- assert_eq!(gl.get_type(), gl::GlType::Gl);
- let time_queries = gl.gen_queries(Self::MAX_TIMERS_PER_FRAME as _);
- let sample_queries = gl.gen_queries(Self::MAX_SAMPLERS_PER_FRAME as _);
-
- GpuFrameProfile {
- gl,
- timers: QuerySet::new(time_queries),
- samplers: QuerySet::new(sample_queries),
- frame_id: FrameId(0),
- inside_frame: false,
- }
- }
-
- fn begin_frame(&mut self, frame_id: FrameId) {
- self.frame_id = frame_id;
- self.timers.reset();
- self.samplers.reset();
- self.inside_frame = true;
- }
-
- fn end_frame(&mut self) {
- self.done_marker();
- self.done_sampler();
- self.inside_frame = false;
- }
-
- fn done_marker(&mut self) {
- debug_assert!(self.inside_frame);
- if self.timers.pending != 0 {
- self.gl.end_query(gl::TIME_ELAPSED);
- self.timers.pending = 0;
- }
- }
-
- fn add_marker(&mut self, tag: T) -> GpuMarker
- where
- T: NamedTag,
- {
- self.done_marker();
-
- let marker = GpuMarker::new(&self.gl, tag.get_label());
-
- if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
- self.gl.begin_query(gl::TIME_ELAPSED, query);
- }
-
- marker
- }
-
- fn done_sampler(&mut self) {
- debug_assert!(self.inside_frame);
- if self.samplers.pending != 0 {
- self.gl.end_query(gl::SAMPLES_PASSED);
- self.samplers.pending = 0;
- }
- }
-
- fn add_sampler(&mut self, tag: T)
- where
- T: NamedTag,
- {
- self.done_sampler();
-
- if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) {
- self.gl.begin_query(gl::SAMPLES_PASSED, query);
- }
- }
-
- fn is_valid(&self) -> bool {
- !self.timers.set.is_empty() || !self.samplers.set.is_empty()
- }
-
- fn build_samples(&mut self) -> (Vec>, Vec>) {
- debug_assert!(!self.inside_frame);
- let gl = &self.gl;
-
- (
- self.timers.take(|timer, query| {
- timer.time_ns = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
- }),
- self.samplers.take(|sampler, query| {
- sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
- }),
- )
- }
-}
-
-#[cfg(feature = "query")]
-impl Drop for GpuFrameProfile {
- fn drop(&mut self) {
- if !self.timers.set.is_empty() {
- self.gl.delete_queries(&self.timers.set);
- }
- if !self.samplers.set.is_empty() {
- self.gl.delete_queries(&self.samplers.set);
- }
- }
-}
-
-#[cfg(feature = "query")]
-pub struct GpuProfiler {
- frames: [GpuFrameProfile; MAX_PROFILE_FRAMES],
- next_frame: usize,
-}
-
-#[cfg(feature = "query")]
-impl GpuProfiler {
- pub fn new(gl: &Rc) -> Self {
- GpuProfiler {
- next_frame: 0,
- frames: [
- GpuFrameProfile::new(Rc::clone(gl)),
- GpuFrameProfile::new(Rc::clone(gl)),
- GpuFrameProfile::new(Rc::clone(gl)),
- GpuFrameProfile::new(Rc::clone(gl)),
- ],
- }
- }
-
- pub fn build_samples(&mut self) -> Option<(FrameId, Vec>, Vec>)> {
- let frame = &mut self.frames[self.next_frame];
- if frame.is_valid() {
- let (timers, samplers) = frame.build_samples();
- Some((frame.frame_id, timers, samplers))
- } else {
- None
- }
- }
-
- pub fn begin_frame(&mut self, frame_id: FrameId) {
- let frame = &mut self.frames[self.next_frame];
- frame.begin_frame(frame_id);
- }
-
- pub fn end_frame(&mut self) {
- let frame = &mut self.frames[self.next_frame];
- frame.end_frame();
- self.next_frame = (self.next_frame + 1) % MAX_PROFILE_FRAMES;
- }
-
- pub fn add_marker(&mut self, tag: T) -> GpuMarker
- where
- T: NamedTag,
- {
- self.frames[self.next_frame].add_marker(tag)
- }
-
- pub fn add_sampler(&mut self, tag: T)
- where
- T: NamedTag,
- {
- self.frames[self.next_frame].add_sampler(tag)
- }
-
- pub fn done_sampler(&mut self) {
- self.frames[self.next_frame].done_sampler()
- }
-}
-
-#[cfg(not(feature = "query"))]
-pub struct GpuProfiler(Option);
-
-#[cfg(not(feature = "query"))]
-impl GpuProfiler {
- pub fn new(_: &Rc) -> Self {
- GpuProfiler(None)
- }
-
- pub fn build_samples(&mut self) -> Option<(FrameId, Vec>, Vec>)> {
- None
- }
-
- pub fn begin_frame(&mut self, _: FrameId) {}
-
- pub fn end_frame(&mut self) {}
-
- pub fn add_marker(&mut self, _: T) -> GpuMarker {
- GpuMarker {}
- }
-
- pub fn add_sampler(&mut self, _: T) {}
-
- pub fn done_sampler(&mut self) {}
-}
-
-
-#[must_use]
-pub struct GpuMarker {
- #[cfg(feature = "query")]
- gl: Rc,
-}
-
-#[cfg(feature = "query")]
-impl GpuMarker {
- pub fn new(gl: &Rc, message: &str) -> Self {
- debug_assert_eq!(gl.get_type(), gl::GlType::Gl);
- gl.push_group_marker_ext(message);
- GpuMarker { gl: Rc::clone(gl) }
- }
-
- pub fn fire(gl: &gl::Gl, message: &str) {
- debug_assert_eq!(gl.get_type(), gl::GlType::Gl);
- gl.insert_event_marker_ext(message);
- }
-}
-
-#[cfg(feature = "query")]
-impl Drop for GpuMarker {
- fn drop(&mut self) {
- self.gl.pop_group_marker_ext();
- }
-}
-
-#[cfg(not(feature = "query"))]
-impl GpuMarker {
- #[inline]
- pub fn new(_: &Rc, _: &str) -> Self {
- GpuMarker{}
- }
- #[inline]
- pub fn fire(_: &gl::Gl, _: &str) {}
-}
-
-
#[derive(Debug, Copy, Clone)]
pub enum VertexUsageHint {
Static,
diff --git a/webrender/src/lib.rs b/webrender/src/lib.rs
index f0cae58b65..f3ad6dacdf 100644
--- a/webrender/src/lib.rs
+++ b/webrender/src/lib.rs
@@ -76,6 +76,7 @@ mod picture;
mod prim_store;
mod print_tree;
mod profiler;
+mod query;
mod record;
mod render_backend;
mod render_task;
diff --git a/webrender/src/profiler.rs b/webrender/src/profiler.rs
index d99d45aca4..0befc36f37 100644
--- a/webrender/src/profiler.rs
+++ b/webrender/src/profiler.rs
@@ -4,11 +4,10 @@
use api::{ColorF, ColorU};
use debug_render::DebugRenderer;
-use device::{Device, GpuMarker, GpuSampler, GpuTimer, NamedTag};
use euclid::{Point2D, Rect, Size2D, vec2};
+use query::{GpuSampler, GpuTimer, NamedTag};
use std::collections::vec_deque::VecDeque;
-use std::f32;
-use std::mem;
+use std::{f32, mem};
use time::precise_time_ns;
const GRAPH_WIDTH: f32 = 1024.0;
@@ -794,7 +793,6 @@ impl Profiler {
pub fn draw_profile(
&mut self,
- device: &mut Device,
frame_profile: &FrameProfileCounters,
backend_profile: &BackendProfileCounters,
renderer_profile: &RendererProfileCounters,
@@ -803,15 +801,14 @@ impl Profiler {
screen_fraction: f32,
debug_renderer: &mut DebugRenderer,
) {
- let _gm = GpuMarker::new(device.rc_gl(), "profile");
self.x_left = 20.0;
self.y_left = 40.0;
self.x_right = 400.0;
self.y_right = 40.0;
let mut gpu_time = 0;
- let gpu_samples = mem::replace(&mut renderer_timers.gpu_samples, Vec::new());
- for sample in &gpu_samples {
+ let gpu_timers = mem::replace(&mut renderer_timers.gpu_samples, Vec::new());
+ for sample in &gpu_timers {
gpu_time += sample.time_ns;
}
renderer_timers.gpu_time.set(gpu_time);
@@ -882,22 +879,24 @@ impl Profiler {
false,
);
- let mut samplers = Vec::::new();
- // Gathering unique GPU samplers. This has O(N^2) complexity,
- // but we only have a few samplers per target.
- for sampler in gpu_samplers {
- let value = sampler.count as f32 * screen_fraction;
- match samplers.iter().position(|s| {
- s.description as *const _ == sampler.tag.label as *const _
- }) {
- Some(pos) => samplers[pos].value += value,
- None => samplers.push(FloatProfileCounter {
- description: sampler.tag.label,
- value,
- }),
+ if !gpu_samplers.is_empty() {
+ let mut samplers = Vec::::new();
+ // Gathering unique GPU samplers. This has O(N^2) complexity,
+ // but we only have a few samplers per target.
+ for sampler in gpu_samplers {
+ let value = sampler.count as f32 * screen_fraction;
+ match samplers.iter().position(|s| {
+ s.description as *const _ == sampler.tag.label as *const _
+ }) {
+ Some(pos) => samplers[pos].value += value,
+ None => samplers.push(FloatProfileCounter {
+ description: sampler.tag.label,
+ value,
+ }),
+ }
}
+ self.draw_counters(&samplers, debug_renderer, false);
}
- self.draw_counters(&samplers, debug_renderer, false);
self.backend_time
.push(backend_profile.total_time.nanoseconds);
@@ -906,7 +905,7 @@ impl Profiler {
self.ipc_time
.push(backend_profile.ipc.total_time.nanoseconds);
self.gpu_time.push(gpu_time);
- self.gpu_frames.push(gpu_time, gpu_samples);
+ self.gpu_frames.push(gpu_time, gpu_timers);
let rect =
diff --git a/webrender/src/query.rs b/webrender/src/query.rs
new file mode 100644
index 0000000000..402559ca68
--- /dev/null
+++ b/webrender/src/query.rs
@@ -0,0 +1,321 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use gleam::gl;
+use std::mem;
+use std::rc::Rc;
+
+use device::FrameId;
+
+
+pub trait NamedTag {
+ fn get_label(&self) -> &str;
+}
+
+#[derive(Debug, Clone)]
+pub struct GpuTimer {
+ pub tag: T,
+ pub time_ns: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct GpuSampler {
+ pub tag: T,
+ pub count: u64,
+}
+
+pub struct QuerySet {
+ set: Vec,
+ data: Vec,
+ pending: gl::GLuint,
+}
+
+impl QuerySet {
+ fn new() -> Self {
+ QuerySet {
+ set: Vec::new(),
+ data: Vec::new(),
+ pending: 0,
+ }
+ }
+
+ fn reset(&mut self) {
+ self.data.clear();
+ self.pending = 0;
+ }
+
+ fn add(&mut self, value: T) -> Option {
+ assert_eq!(self.pending, 0);
+ self.set.get(self.data.len()).cloned().map(|query_id| {
+ self.data.push(value);
+ self.pending = query_id;
+ query_id
+ })
+ }
+
+ fn take(&mut self, fun: F) -> Vec {
+ let mut data = mem::replace(&mut self.data, Vec::new());
+ for (value, &query) in data.iter_mut().zip(self.set.iter()) {
+ fun(value, query)
+ }
+ data
+ }
+}
+
+pub struct GpuFrameProfile {
+ gl: Rc,
+ timers: QuerySet>,
+ samplers: QuerySet>,
+ frame_id: FrameId,
+ inside_frame: bool,
+}
+
+impl GpuFrameProfile {
+ fn new(gl: Rc) -> Self {
+ assert_eq!(gl.get_type(), gl::GlType::Gl);
+
+ GpuFrameProfile {
+ gl,
+ timers: QuerySet::new(),
+ samplers: QuerySet::new(),
+ frame_id: FrameId::new(0),
+ inside_frame: false,
+ }
+ }
+
+ fn enable_timers(&mut self, count: i32) {
+ self.timers.set = self.gl.gen_queries(count);
+ }
+
+ fn disable_timers(&mut self) {
+ if !self.timers.set.is_empty() {
+ self.gl.delete_queries(&self.timers.set);
+ }
+ self.timers.set = Vec::new();
+ }
+
+ fn enable_samplers(&mut self, count: i32) {
+ self.samplers.set = self.gl.gen_queries(count);
+ }
+
+ fn disable_samplers(&mut self) {
+ if !self.samplers.set.is_empty() {
+ self.gl.delete_queries(&self.samplers.set);
+ }
+ self.samplers.set = Vec::new();
+ }
+
+ fn begin_frame(&mut self, frame_id: FrameId) {
+ self.frame_id = frame_id;
+ self.timers.reset();
+ self.samplers.reset();
+ self.inside_frame = true;
+ }
+
+ fn end_frame(&mut self) {
+ self.finish_timer();
+ self.finish_sampler();
+ self.inside_frame = false;
+ }
+
+ fn finish_timer(&mut self) {
+ debug_assert!(self.inside_frame);
+ if self.timers.pending != 0 {
+ self.gl.end_query(gl::TIME_ELAPSED);
+ self.timers.pending = 0;
+ }
+ }
+
+ fn finish_sampler(&mut self) {
+ debug_assert!(self.inside_frame);
+ if self.samplers.pending != 0 {
+ self.gl.end_query(gl::SAMPLES_PASSED);
+ self.samplers.pending = 0;
+ }
+ }
+}
+
+impl GpuFrameProfile {
+ fn start_timer(&mut self, tag: T) -> GpuTimeQuery {
+ self.finish_timer();
+
+ let marker = GpuMarker::new(&self.gl, tag.get_label());
+
+ if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
+ self.gl.begin_query(gl::TIME_ELAPSED, query);
+ }
+
+ GpuTimeQuery(marker)
+ }
+
+ fn start_sampler(&mut self, tag: T) -> GpuSampleQuery {
+ self.finish_sampler();
+
+ if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) {
+ self.gl.begin_query(gl::SAMPLES_PASSED, query);
+ }
+
+ GpuSampleQuery
+ }
+
+ fn build_samples(&mut self) -> (FrameId, Vec>, Vec>) {
+ debug_assert!(!self.inside_frame);
+ let gl = &self.gl;
+
+ (
+ self.frame_id,
+ self.timers.take(|timer, query| {
+ timer.time_ns = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
+ }),
+ self.samplers.take(|sampler, query| {
+ sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
+ }),
+ )
+ }
+}
+
+impl Drop for GpuFrameProfile {
+ fn drop(&mut self) {
+ self.disable_timers();
+ self.disable_samplers();
+ }
+}
+
+pub struct GpuProfiler {
+ gl: Rc,
+ frames: Vec>,
+ next_frame: usize,
+}
+
+impl GpuProfiler {
+ pub fn new(gl: Rc) -> Self {
+ const MAX_PROFILE_FRAMES: usize = 4;
+ let frames = (0 .. MAX_PROFILE_FRAMES)
+ .map(|_| GpuFrameProfile::new(Rc::clone(&gl)))
+ .collect();
+
+ GpuProfiler {
+ gl,
+ next_frame: 0,
+ frames,
+ }
+ }
+
+ pub fn enable_timers(&mut self) {
+ const MAX_TIMERS_PER_FRAME: i32 = 256;
+
+ for frame in &mut self.frames {
+ frame.enable_timers(MAX_TIMERS_PER_FRAME);
+ }
+ }
+
+ pub fn disable_timers(&mut self) {
+ for frame in &mut self.frames {
+ frame.disable_timers();
+ }
+ }
+
+ pub fn toggle_timers_enabled(&mut self) {
+ if self.frames[0].timers.set.is_empty() {
+ self.enable_timers();
+ } else {
+ self.disable_timers();
+ }
+ }
+
+ pub fn enable_samplers(&mut self) {
+ const MAX_SAMPLERS_PER_FRAME: i32 = 16;
+ if cfg!(target_os = "macos") {
+ warn!("Expect OSX driver bugs related to sample queries")
+ }
+
+ for frame in &mut self.frames {
+ frame.enable_samplers(MAX_SAMPLERS_PER_FRAME);
+ }
+ }
+
+ pub fn disable_samplers(&mut self) {
+ for frame in &mut self.frames {
+ frame.disable_samplers();
+ }
+ }
+
+ pub fn toggle_samplers_enabled(&mut self) {
+ if self.frames[0].samplers.set.is_empty() {
+ self.enable_samplers();
+ } else {
+ self.disable_samplers();
+ }
+ }
+}
+
+impl GpuProfiler {
+ pub fn build_samples(&mut self) -> (FrameId, Vec>, Vec>) {
+ self.frames[self.next_frame].build_samples()
+ }
+
+ pub fn begin_frame(&mut self, frame_id: FrameId) {
+ self.frames[self.next_frame].begin_frame(frame_id);
+ }
+
+ pub fn end_frame(&mut self) {
+ self.frames[self.next_frame].end_frame();
+ self.next_frame = (self.next_frame + 1) % self.frames.len();
+ }
+
+ pub fn start_timer(&mut self, tag: T) -> GpuTimeQuery {
+ self.frames[self.next_frame].start_timer(tag)
+ }
+
+ pub fn start_sampler(&mut self, tag: T) -> GpuSampleQuery {
+ self.frames[self.next_frame].start_sampler(tag)
+ }
+
+ pub fn finish_sampler(&mut self, _sampler: GpuSampleQuery) {
+ self.frames[self.next_frame].finish_sampler()
+ }
+
+ pub fn start_marker(&mut self, label: &str) -> GpuMarker {
+ GpuMarker::new(&self.gl, label)
+ }
+
+ pub fn place_marker(&mut self, label: &str) {
+ GpuMarker::fire(&self.gl, label)
+ }
+}
+
+#[must_use]
+pub struct GpuMarker {
+ gl: Option>,
+}
+
+impl GpuMarker {
+ fn new(gl: &Rc, message: &str) -> Self {
+ if gl.get_type() == gl::GlType::Gl {
+ gl.push_group_marker_ext(message);
+ GpuMarker { gl: Some(Rc::clone(gl)) }
+ } else {
+ GpuMarker { gl: None }
+ }
+ }
+
+ fn fire(gl: &Rc, message: &str) {
+ if gl.get_type() == gl::GlType::Gl {
+ gl.insert_event_marker_ext(message);
+ }
+ }
+}
+
+impl Drop for GpuMarker {
+ fn drop(&mut self) {
+ if let Some(ref gl) = self.gl {
+ gl.pop_group_marker_ext();
+ }
+ }
+}
+
+#[must_use]
+pub struct GpuTimeQuery(GpuMarker);
+#[must_use]
+pub struct GpuSampleQuery;
diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs
index f33fd1dd14..199e867a36 100644
--- a/webrender/src/renderer.rs
+++ b/webrender/src/renderer.rs
@@ -24,11 +24,11 @@ use debug_colors;
use debug_render::DebugRenderer;
#[cfg(feature = "debugger")]
use debug_server::{self, DebugServer};
-use device::{DepthFunction, Device, FrameId, GpuMarker, GpuProfiler, Program, Texture,
+use device::{DepthFunction, Device, FrameId, Program, Texture,
VertexDescriptor, PBO};
use device::{get_gl_format_bgra, ExternalTexture, FBOId, TextureSlot, VertexAttribute,
VertexAttributeKind};
-use device::{FileWatcherHandler, GpuTimer, ShaderError, TextureFilter, TextureTarget,
+use device::{FileWatcherHandler, ShaderError, TextureFilter, TextureTarget,
VertexUsageHint, VAO};
use euclid::{rect, Transform3D};
use frame_builder::FrameBuilderConfig;
@@ -41,6 +41,7 @@ use internal_types::{CacheTextureId, FastHashMap, RendererFrame, ResultMsg, Text
use internal_types::{DebugOutput, RenderTargetMode, TextureUpdateList, TextureUpdateSource};
use profiler::{BackendProfileCounters, Profiler};
use profiler::{GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
+use query::{GpuProfiler, GpuTimer};
use rayon::Configuration as ThreadPoolConfig;
use rayon::ThreadPool;
use record::ApiRecordingReceiver;
@@ -1838,8 +1839,7 @@ impl Renderer {
};
let gpu_cache_texture = CacheTexture::new(&mut device);
-
- let gpu_profile = GpuProfiler::new(device.rc_gl());
+ let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()));
let renderer = Renderer {
result_rx,
@@ -2153,6 +2153,16 @@ impl Renderer {
} else {
self.debug_flags.remove(DebugFlags::ALPHA_PRIM_DBG);
},
+ DebugCommand::EnableGpuTimeQueries(enable) => if enable {
+ self.gpu_profile.enable_timers();
+ } else {
+ self.gpu_profile.disable_timers();
+ },
+ DebugCommand::EnableGpuSampleQueries(enable) => if enable {
+ self.gpu_profile.enable_samplers();
+ } else {
+ self.gpu_profile.disable_samplers();
+ },
DebugCommand::FetchDocuments => {}
DebugCommand::FetchClipScrollTree => {}
DebugCommand::FetchPasses => {
@@ -2162,6 +2172,11 @@ impl Renderer {
}
}
+ pub fn toggle_queries_enabled(&mut self) {
+ self.gpu_profile.toggle_timers_enabled();
+ self.gpu_profile.toggle_samplers_enabled();
+ }
+
/// Set a callback for handling external images.
pub fn set_external_image_handler(&mut self, handler: Box) {
self.external_image_handler = Some(handler);
@@ -2189,30 +2204,26 @@ impl Renderer {
if let Some(mut frame) = self.current_frame.take() {
if let Some(ref mut frame) = frame.frame {
let mut profile_timers = RendererProfileTimers::new();
- let mut profile_samplers = Vec::new();
-
- {
- //Note: avoiding `self.gpu_profile.add_marker` - it would block here
- let _gm = GpuMarker::new(self.device.rc_gl(), "build samples");
+ let profile_samplers = {
+ let _gm = self.gpu_profile.start_marker("build samples");
// Block CPU waiting for last frame's GPU profiles to arrive.
// In general this shouldn't block unless heavily GPU limited.
- if let Some((gpu_frame_id, timers, samplers)) = self.gpu_profile.build_samples()
- {
- if self.max_recorded_profiles > 0 {
- while self.gpu_profiles.len() >= self.max_recorded_profiles {
- self.gpu_profiles.pop_front();
- }
- self.gpu_profiles
- .push_back(GpuProfile::new(gpu_frame_id, &timers));
+ let (gpu_frame_id, timers, samplers) = self.gpu_profile.build_samples();
+
+ if self.max_recorded_profiles > 0 {
+ while self.gpu_profiles.len() >= self.max_recorded_profiles {
+ self.gpu_profiles.pop_front();
}
- profile_timers.gpu_samples = timers;
- profile_samplers = samplers;
+ self.gpu_profiles
+ .push_back(GpuProfile::new(gpu_frame_id, &timers));
}
- }
+ profile_timers.gpu_samples = timers;
+ samplers
+ };
let cpu_frame_id = profile_timers.cpu_time.profile(|| {
let cpu_frame_id = {
- let _gm = GpuMarker::new(self.device.rc_gl(), "begin frame");
+ let _gm = self.gpu_profile.start_marker("begin frame");
let frame_id = self.device.begin_frame(frame.device_pixel_ratio);
self.gpu_profile.begin_frame(frame_id);
@@ -2257,10 +2268,10 @@ impl Renderer {
}
if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
+ let _gm = self.gpu_profile.start_marker("profile");
let screen_fraction = 1.0 / //TODO: take device/pixel ratio into equation?
(framebuffer_size.width as f32 * framebuffer_size.height as f32);
self.profiler.draw_profile(
- &mut self.device,
&frame.profile_counters,
&self.backend_profile_counters,
&self.profile_counters,
@@ -2274,13 +2285,16 @@ impl Renderer {
self.profile_counters.reset();
self.profile_counters.frame_counter.inc();
- let debug_size = DeviceUintSize::new(
- framebuffer_size.width as u32,
- framebuffer_size.height as u32,
- );
- self.debug.render(&mut self.device, &debug_size);
{
- let _gm = GpuMarker::new(self.device.rc_gl(), "end frame");
+ let _gm = self.gpu_profile.start_marker("debug");
+ let debug_size = DeviceUintSize::new(
+ framebuffer_size.width as u32,
+ framebuffer_size.height as u32,
+ );
+ self.debug.render(&mut self.device, &debug_size);
+ }
+ {
+ let _gm = self.gpu_profile.start_marker("end frame");
self.device.end_frame();
}
self.last_time = current_time;
@@ -2304,7 +2318,7 @@ impl Renderer {
}
fn update_gpu_cache(&mut self, frame: &mut Frame) {
- let _gm = GpuMarker::new(self.device.rc_gl(), "gpu cache update");
+ let _gm = self.gpu_profile.start_marker("gpu cache update");
for update_list in self.pending_gpu_cache_updates.drain(..) {
self.gpu_cache_texture
.update(&mut self.device, &update_list);
@@ -2314,7 +2328,7 @@ impl Renderer {
}
fn update_texture_cache(&mut self) {
- let _gm = GpuMarker::new(self.device.rc_gl(), "texture cache update");
+ let _gm = self.gpu_profile.start_marker("texture cache update");
let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
for update_list in pending_texture_updates.drain(..) {
@@ -2711,7 +2725,7 @@ impl Renderer {
_ => {}
}
- let _gm = self.gpu_profile.add_marker(marker);
+ let _timer = self.gpu_profile.start_timer(marker);
self.draw_instanced_batch(instances, VertexArrayKind::Primitive, &key.textures);
}
@@ -2750,7 +2764,7 @@ impl Renderer {
frame_id: FrameId,
) {
{
- let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
self.device
.bind_draw_target(render_target, Some(target_size));
self.device.disable_depth();
@@ -2781,7 +2795,7 @@ impl Renderer {
// fast path blur shaders for common
// blur radii with fixed weights.
if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
- let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
self.device.set_blend(false);
self.cs_blur_rgba8
@@ -2816,7 +2830,7 @@ impl Renderer {
self.device.set_blend(true);
self.device.set_blend_mode_premultiplied_alpha();
- let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_TEXT_RUN);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_TEXT_RUN);
self.cs_text_run
.bind(&mut self.device, projection, 0, &mut self.renderer_errors);
for (texture_id, instances) in &target.text_run_cache_prims {
@@ -2833,7 +2847,7 @@ impl Renderer {
self.device.set_blend(true);
self.device.set_blend_mode_premultiplied_alpha();
- let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_LINE);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_LINE);
self.cs_line
.bind(&mut self.device, projection, 0, &mut self.renderer_errors);
self.draw_instanced_batch(
@@ -2846,11 +2860,11 @@ impl Renderer {
//TODO: record the pixel count for cached primitives
if !target.alpha_batcher.is_empty() {
- let _gm2 = GpuMarker::new(self.device.rc_gl(), "alpha batches");
+ let _gl = self.gpu_profile.start_marker("alpha batches");
self.device.set_blend(false);
let mut prev_blend_mode = BlendMode::None;
- self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_OPAQUE);
+ let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
//Note: depth equality is needed for split planes
self.device.set_depth_func(DepthFunction::LessEqual);
@@ -2878,7 +2892,8 @@ impl Renderer {
}
self.device.disable_depth_write();
- self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+ self.gpu_profile.finish_sampler(opaque_sampler);
+ let transparent_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
if self.debug_flags.contains(DebugFlags::ALPHA_PRIM_DBG) {
@@ -2904,7 +2919,7 @@ impl Renderer {
// 1) Use dual source blending where available (almost all recent hardware).
// 2) Use frame buffer fetch where available (most modern hardware).
// 3) Consider the old constant color blend method where no clip is applied.
- let _gm = self.gpu_profile.add_marker(GPU_TAG_PRIM_TEXT_RUN);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_PRIM_TEXT_RUN);
self.device.set_blend(true);
@@ -3078,7 +3093,7 @@ impl Renderer {
self.device.disable_depth();
self.device.set_blend(false);
- self.gpu_profile.done_sampler();
+ self.gpu_profile.finish_sampler(transparent_sampler);
}
// For any registered image outputs on this render target,
@@ -3123,10 +3138,10 @@ impl Renderer {
projection: &Transform3D,
render_tasks: &RenderTaskTree,
) {
- self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_ALPHA);
+ let alpha_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_ALPHA);
{
- let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
self.device
.bind_draw_target(Some(render_target), Some(target_size));
self.device.disable_depth();
@@ -3157,7 +3172,7 @@ impl Renderer {
// fast path blur shaders for common
// blur radii with fixed weights.
if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
- let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
self.device.set_blend(false);
self.cs_blur_a8
@@ -3185,7 +3200,7 @@ impl Renderer {
if !target.brush_mask_corners.is_empty() {
self.device.set_blend(false);
- let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_BRUSH_MASK);
self.brush_mask_corner
.bind(&mut self.device, projection, 0, &mut self.renderer_errors);
self.draw_instanced_batch(
@@ -3198,7 +3213,7 @@ impl Renderer {
if !target.brush_mask_rounded_rects.is_empty() {
self.device.set_blend(false);
- let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_BRUSH_MASK);
self.brush_mask_rounded_rect
.bind(&mut self.device, projection, 0, &mut self.renderer_errors);
self.draw_instanced_batch(
@@ -3210,13 +3225,13 @@ impl Renderer {
// Draw the clip items into the tiled alpha mask.
{
- let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_CLIP);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_CLIP);
// If we have border corner clips, the first step is to clear out the
// area in the clip mask. This allows drawing multiple invididual clip
// in regions below.
if !target.clip_batcher.border_clears.is_empty() {
- let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip borders [clear]");
+ let _gm = self.gpu_profile.start_marker("clip borders [clear]");
self.device.set_blend(false);
self.cs_clip_border
.bind(&mut self.device, projection, 0, &mut self.renderer_errors);
@@ -3229,7 +3244,7 @@ impl Renderer {
// Draw any dots or dashes for border corners.
if !target.clip_batcher.borders.is_empty() {
- let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip borders");
+ let _gm = self.gpu_profile.start_marker("clip borders");
// We are masking in parts of the corner (dots or dashes) here.
// Blend mode is set to max to allow drawing multiple dots.
// The individual dots and dashes in a border never overlap, so using
@@ -3251,7 +3266,7 @@ impl Renderer {
// draw rounded cornered rectangles
if !target.clip_batcher.rectangles.is_empty() {
- let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip rectangles");
+ let _gm = self.gpu_profile.start_marker("clip rectangles");
self.cs_clip_rectangle.bind(
&mut self.device,
projection,
@@ -3266,7 +3281,7 @@ impl Renderer {
}
// draw image masks
for (mask_texture_id, items) in target.clip_batcher.images.iter() {
- let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip images");
+ let _gm = self.gpu_profile.start_marker("clip images");
let textures = BatchTextures {
colors: [
mask_texture_id.clone(),
@@ -3280,7 +3295,7 @@ impl Renderer {
}
}
- self.gpu_profile.done_sampler();
+ self.gpu_profile.finish_sampler(alpha_sampler);
}
fn update_deferred_resolves(&mut self, frame: &mut Frame) {
@@ -3294,7 +3309,7 @@ impl Renderer {
.expect("Found external image, but no handler set!");
for deferred_resolve in &frame.deferred_resolves {
- GpuMarker::fire(self.device.gl(), "deferred resolve");
+ self.gpu_profile.place_marker("deferred resolve");
let props = &deferred_resolve.image_properties;
let ext_image = props
.external_image
@@ -3365,7 +3380,7 @@ impl Renderer {
}
fn start_frame(&mut self, frame: &mut Frame) {
- let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_DATA);
+ let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_DATA);
// Assign render targets to the passes.
for pass in &mut frame.passes {
@@ -3447,7 +3462,7 @@ impl Renderer {
framebuffer_size: DeviceUintSize,
frame_id: FrameId,
) {
- let _gm = GpuMarker::new(self.device.rc_gl(), "tile frame draw");
+ let _gm = self.gpu_profile.start_marker("tile frame draw");
// Some tests use a restricted viewport smaller than the main screen size.
// Ensure we clear the framebuffer in these tests.
diff --git a/webrender_api/src/api.rs b/webrender_api/src/api.rs
index 45d1411037..2ba990106e 100644
--- a/webrender_api/src/api.rs
+++ b/webrender_api/src/api.rs
@@ -243,19 +243,23 @@ impl fmt::Debug for DocumentMsg {
#[derive(Debug, Clone, Deserialize, Serialize)]
pub enum DebugCommand {
- // Display the frame profiler on screen.
+ /// Display the frame profiler on screen.
EnableProfiler(bool),
- // Display all texture cache pages on screen.
+ /// Display all texture cache pages on screen.
EnableTextureCacheDebug(bool),
- // Display intermediate render targets on screen.
+ /// Display intermediate render targets on screen.
EnableRenderTargetDebug(bool),
- // Display alpha primitive rects.
+ /// Display alpha primitive rects.
EnableAlphaRectsDebug(bool),
- // Fetch current documents and display lists.
+ /// Display GPU timing results.
+ EnableGpuTimeQueries(bool),
+ /// Display GPU overdraw results
+ EnableGpuSampleQueries(bool),
+ /// Fetch current documents and display lists.
FetchDocuments,
- // Fetch current passes and batches.
+ /// Fetch current passes and batches.
FetchPasses,
- // Fetch clip-scroll tree.
+ /// Fetch clip-scroll tree.
FetchClipScrollTree,
}
diff --git a/wrench/Cargo.toml b/wrench/Cargo.toml
index 3b48f9ce8c..be3e892413 100644
--- a/wrench/Cargo.toml
+++ b/wrench/Cargo.toml
@@ -24,7 +24,7 @@ time = "0.1"
crossbeam = "0.2"
osmesa-sys = { version = "0.1.2", optional = true }
osmesa-src = { git = "https://github.com/servo/osmesa-src", optional = true }
-webrender = {path = "../webrender", features=["debugger","query"]}
+webrender = {path = "../webrender", features=["debugger"]}
serde = {version = "1.0", features = ["derive"] }
[features]
diff --git a/wrench/src/main.rs b/wrench/src/main.rs
index 4f12280cb6..a237abb99f 100644
--- a/wrench/src/main.rs
+++ b/wrench/src/main.rs
@@ -577,7 +577,7 @@ fn main() {
}
glutin::Event::KeyboardInput(ElementState::Pressed, _scan_code, Some(vk)) => match vk {
- VirtualKeyCode::Escape | VirtualKeyCode::Q => {
+ VirtualKeyCode::Escape => {
break 'outer;
}
VirtualKeyCode::P => {
@@ -600,6 +600,9 @@ fn main() {
flags.toggle(webrender::DebugFlags::ALPHA_PRIM_DBG);
wrench.renderer.set_debug_flags(flags);
}
+ VirtualKeyCode::Q => {
+ wrench.renderer.toggle_queries_enabled();
+ }
VirtualKeyCode::M => {
wrench.api.notify_memory_pressure();
}
diff --git a/wrench/src/wrench.rs b/wrench/src/wrench.rs
index d9848ead1b..6d4d72d589 100644
--- a/wrench/src/wrench.rs
+++ b/wrench/src/wrench.rs
@@ -478,13 +478,14 @@ impl Wrench {
pub fn show_onscreen_help(&mut self) {
let help_lines = [
- "Esc, Q - Quit",
+ "Esc - Quit",
"H - Toggle help",
"R - Toggle recreating display items each frame",
"P - Toggle profiler",
"O - Toggle showing intermediate targets",
"I - Toggle showing texture caches",
"B - Toggle showing alpha primitive rects",
+ "Q - Toggle GPU queries for time and samples",
"M - Trigger memory pressure event",
];