From 196d6c015e4ffe081f7f6e9072eb65363172596a Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Tue, 30 Oct 2018 15:13:32 -0700 Subject: [PATCH 1/8] Track the total number of GPU bytes allocated by WebRender. This is useful for sanity-checking memory reports, and we'll also use it for our texture eviction heuristic. Differential Revision: https://phabricator.services.mozilla.com/D10672 --- webrender/src/device/gl.rs | 49 +++++++++++++++++++++++++++++++++----- webrender/src/lib.rs | 2 +- webrender_api/src/api.rs | 8 +++++++ 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/webrender/src/device/gl.rs b/webrender/src/device/gl.rs index 7d73344279..022b09adff 100644 --- a/webrender/src/device/gl.rs +++ b/webrender/src/device/gl.rs @@ -26,6 +26,7 @@ use std::ptr; use std::rc::Rc; use std::slice; use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering}; use std::thread; #[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)] @@ -33,6 +34,30 @@ use std::thread; #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct FrameId(usize); +/// Tracks the total number of GPU bytes allocated across all WebRender instances. +/// +/// Assuming all WebRender instances run on the same thread, this doesn't need +/// to be atomic per se, but we make it atomic to satisfy the thread safety +/// invariants in the type system. We could also put the value in TLS, but that +/// would be more expensive to access. +static GPU_BYTES_ALLOCATED: AtomicUsize = ATOMIC_USIZE_INIT; + +/// Returns the number of GPU bytes currently allocated. +pub fn total_gpu_bytes_allocated() -> usize { + GPU_BYTES_ALLOCATED.load(Ordering::Relaxed) +} + +/// Records an allocation in VRAM. +fn record_gpu_alloc(num_bytes: usize) { + GPU_BYTES_ALLOCATED.fetch_add(num_bytes, Ordering::Relaxed); +} + +/// Records an deallocation in VRAM. +fn record_gpu_free(num_bytes: usize) { + let old = GPU_BYTES_ALLOCATED.fetch_sub(num_bytes, Ordering::Relaxed); + assert!(old >= num_bytes, "Freeing {} bytes but only {} allocated", num_bytes, old); +} + impl FrameId { pub fn new(value: usize) -> Self { FrameId(value) @@ -117,6 +142,14 @@ pub unsafe trait Texel: Copy {} unsafe impl Texel for u8 {} unsafe impl Texel for f32 {} +/// Returns the size in bytes of a depth target with the given dimensions. +fn depth_target_size_in_bytes(dimensions: &DeviceUintSize) -> usize { + // DEPTH24 textures generally reserve 3 bytes for depth and 1 byte + // for stencil, so we measure them as 32 bits. + let pixels = dimensions.width * dimensions.height; + (pixels as usize) * 4 +} + #[derive(Clone, Copy, Debug, PartialEq)] pub enum ReadPixelsFormat { Standard(ImageFormat), @@ -1428,6 +1461,8 @@ impl Device { } } + record_gpu_alloc(texture.size_in_bytes()); + texture } @@ -1591,6 +1626,9 @@ impl Device { refcount: 0, } }); + if target.refcount == 0 { + record_gpu_alloc(depth_target_size_in_bytes(&dimensions)); + } target.refcount += 1; target.rbo_id } @@ -1603,8 +1641,9 @@ impl Device { debug_assert!(entry.get().refcount != 0); entry.get_mut().refcount -= 1; if entry.get().refcount == 0 { - let t = entry.remove(); - self.gl.delete_renderbuffers(&[t.rbo_id.0]); + let (dimensions, target) = entry.remove_entry(); + self.gl.delete_renderbuffers(&[target.rbo_id.0]); + record_gpu_free(depth_target_size_in_bytes(&dimensions)); } } @@ -1627,6 +1666,7 @@ impl Device { pub fn delete_texture(&mut self, mut texture: Texture) { debug_assert!(self.inside_frame); + record_gpu_free(texture.size_in_bytes()); let had_depth = texture.supports_depth(); self.deinit_fbos(&mut texture.fbos); self.deinit_fbos(&mut texture.fbos_with_depth); @@ -2465,10 +2505,7 @@ impl Device { pub fn report_memory(&self) -> MemoryReport { let mut report = MemoryReport::default(); for dim in self.depth_targets.keys() { - // DEPTH24 textures generally reserve 3 bytes for depth and 1 byte - // for stencil, so we measure them as 32 bytes. - let pixels: u32 = dim.width * dim.height; - report.depth_target_textures += (pixels as usize) * 4; + report.depth_target_textures += depth_target_size_in_bytes(dim); } report } diff --git a/webrender/src/lib.rs b/webrender/src/lib.rs index 2400b83be8..a29433d66c 100644 --- a/webrender/src/lib.rs +++ b/webrender/src/lib.rs @@ -182,7 +182,7 @@ pub extern crate webrender_api; #[doc(hidden)] pub use device::{build_shader_strings, ReadPixelsFormat, UploadMethod, VertexUsageHint}; pub use device::{ProgramBinary, ProgramCache, ProgramCacheObserver, ProgramSources}; -pub use device::{Device}; +pub use device::{Device, total_gpu_bytes_allocated}; pub use frame_builder::ChasePrimitive; pub use renderer::{AsyncPropertySampler, CpuProfile, DebugFlags, OutputImageHandler, RendererKind}; pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource, GpuProfile}; diff --git a/webrender_api/src/api.rs b/webrender_api/src/api.rs index f12234833d..7f3c065ddb 100644 --- a/webrender_api/src/api.rs +++ b/webrender_api/src/api.rs @@ -792,6 +792,10 @@ pub struct MemoryReport { pub render_target_textures: usize, pub texture_cache_textures: usize, pub depth_target_textures: usize, + // + // GPU memory total (tracked separately, should equal the sum of the above). + // + pub total_gpu_bytes_allocated: usize, } impl ::std::ops::AddAssign for MemoryReport { @@ -810,6 +814,10 @@ impl ::std::ops::AddAssign for MemoryReport { self.render_target_textures += other.render_target_textures; self.texture_cache_textures += other.texture_cache_textures; self.depth_target_textures += other.depth_target_textures; + + // The total_gpu_memory value accounts for all WebRender instances, and + // thus can't be aggregated. It should really be reported out of band, + // but putting it in this struct facilitates sending it across Gecko IPC. } } From 1cde227490460c09a31d8daacc0801e828a80c5a Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Wed, 31 Oct 2018 10:43:49 -0700 Subject: [PATCH 2/8] Switch GpuCache frame_id tracking from device::FrameId to render_backend::FrameId. It should really be using the latter, so that it can avoid hand-rolling the frame increments and just use the one already available on frame_builder. In the next patch, we rename the types to make the difference clearer. Differential Revision: https://phabricator.services.mozilla.com/D10673 --- webrender/src/frame_builder.rs | 2 +- webrender/src/gpu_cache.rs | 10 +++++----- webrender/src/render_backend.rs | 18 ++++++++++++++++++ webrender/src/renderer.rs | 10 +++++----- webrender/src/tiling.rs | 4 ++-- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/webrender/src/frame_builder.rs b/webrender/src/frame_builder.rs index 90af43f385..14436caae8 100644 --- a/webrender/src/frame_builder.rs +++ b/webrender/src/frame_builder.rs @@ -337,7 +337,7 @@ impl FrameBuilder { .set(self.prim_store.prim_count()); resource_cache.begin_frame(frame_id); - gpu_cache.begin_frame(); + gpu_cache.begin_frame(frame_id); let mut transform_palette = TransformPalette::new(); clip_scroll_tree.update_tree( diff --git a/webrender/src/gpu_cache.rs b/webrender/src/gpu_cache.rs index 4ba82e1bd5..fc55d87dfc 100644 --- a/webrender/src/gpu_cache.rs +++ b/webrender/src/gpu_cache.rs @@ -26,9 +26,9 @@ use api::{PremultipliedColorF, TexelRect}; use api::{VoidPtrToSizeFn}; -use device::FrameId; use euclid::TypedRect; use profiler::GpuCacheProfileCounters; +use render_backend::FrameId; use renderer::MAX_VERTEX_TEXTURE_WIDTH; use std::{mem, u16, u32}; use std::ops::Add; @@ -36,7 +36,7 @@ use std::os::raw::c_void; pub const GPU_CACHE_INITIAL_HEIGHT: u32 = 512; -const FRAMES_BEFORE_EVICTION: usize = 10; +const FRAMES_BEFORE_EVICTION: u32 = 10; const NEW_ROWS_PER_RESIZE: u32 = 512; #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -549,7 +549,7 @@ pub struct GpuCache { impl GpuCache { pub fn new() -> Self { GpuCache { - frame_id: FrameId::new(0), + frame_id: FrameId::invalid(), texture: Texture::new(), saved_block_count: 0, in_debug: false, @@ -557,9 +557,9 @@ impl GpuCache { } /// Begin a new frame. - pub fn begin_frame(&mut self) { + pub fn begin_frame(&mut self, frame_id: FrameId) { debug_assert!(self.texture.pending_blocks.is_empty()); - self.frame_id = self.frame_id + 1; + self.frame_id = frame_id; self.texture.evict_old_blocks(self.frame_id); self.saved_block_count = 0; } diff --git a/webrender/src/render_backend.rs b/webrender/src/render_backend.rs index 346088c7de..ac22624a81 100644 --- a/webrender/src/render_backend.rs +++ b/webrender/src/render_backend.rs @@ -85,6 +85,24 @@ impl DocumentView { #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct FrameId(pub u32); +impl FrameId { + /// Returns an invalid sentinel FrameId, which will always compare less than + /// any valid FrameId. + /// + /// FIXME(bholley): That last part isn't entirely true yet, but will be shortly. + pub fn invalid() -> Self { + FrameId(0) + } +} + +impl ::std::ops::Add for FrameId { + type Output = FrameId; + + fn add(self, other: u32) -> FrameId { + FrameId(self.0 + other) + } +} + // A collection of resources that are shared by clips, primitives // between display lists. #[cfg_attr(feature = "capture", derive(Serialize))] diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs index bceac6748b..4e4711ce5d 100644 --- a/webrender/src/renderer.rs +++ b/webrender/src/renderer.rs @@ -1557,7 +1557,7 @@ pub struct Renderer { #[cfg(feature = "debug_renderer")] gpu_cache_debug_chunks: Vec, - gpu_cache_frame_id: FrameId, + gpu_cache_frame_id: ::render_backend::FrameId, gpu_cache_overflow: bool, pipeline_info: PipelineInfo, @@ -2022,7 +2022,7 @@ impl Renderer { gpu_cache_texture, #[cfg(feature = "debug_renderer")] gpu_cache_debug_chunks: Vec::new(), - gpu_cache_frame_id: FrameId::new(0), + gpu_cache_frame_id: ::render_backend::FrameId::invalid(), gpu_cache_overflow: false, texture_cache_upload_pbo, texture_resolver, @@ -2736,7 +2736,7 @@ impl Renderer { let gpu_cache_height = self.gpu_cache_texture.get_height(); if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST { self.pending_gpu_cache_updates.push(GpuCacheUpdateList { - frame_id: FrameId::new(0), + frame_id: ::render_backend::FrameId::invalid(), height: gpu_cache_height, blocks: vec![[1f32; 4].into()], updates: Vec::new(), @@ -3745,7 +3745,7 @@ impl Renderer { .expect("Found external image, but no handler set!"); let mut list = GpuCacheUpdateList { - frame_id: FrameId::new(0), + frame_id: ::render_backend::FrameId::invalid(), height: self.gpu_cache_texture.get_height(), blocks: Vec::new(), updates: Vec::new(), @@ -4708,7 +4708,7 @@ struct PlainTexture { #[cfg_attr(feature = "replay", derive(Deserialize))] struct PlainRenderer { gpu_cache: PlainTexture, - gpu_cache_frame_id: FrameId, + gpu_cache_frame_id: ::render_backend::FrameId, textures: FastHashMap, external_images: Vec } diff --git a/webrender/src/tiling.rs b/webrender/src/tiling.rs index 8fe39ed1fc..dba71b2490 100644 --- a/webrender/src/tiling.rs +++ b/webrender/src/tiling.rs @@ -8,7 +8,7 @@ use api::{MixBlendMode, PipelineId, DeviceRect, LayoutSize}; use batch::{AlphaBatchBuilder, AlphaBatchContainer, ClipBatcher, resolve_image}; use clip::ClipStore; use clip_scroll_tree::{ClipScrollTree}; -use device::{FrameId, Texture}; +use device::{Texture}; #[cfg(feature = "pathfinder")] use euclid::{TypedPoint2D, TypedVector2D}; use gpu_cache::{GpuCache}; @@ -20,7 +20,7 @@ use pathfinder_partitioner::mesh::Mesh; use picture::SurfaceInfo; use prim_store::{PrimitiveStore, DeferredResolve}; use profiler::FrameProfileCounters; -use render_backend::FrameResources; +use render_backend::{FrameId, FrameResources}; use render_task::{BlitSource, RenderTaskAddress, RenderTaskId, RenderTaskKind}; use render_task::{BlurTask, ClearMode, GlyphTask, RenderTaskLocation, RenderTaskTree, ScalingTask}; use resource_cache::ResourceCache; From d82754dc5c40245212a5a5610f7b0eaa1a61359c Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Wed, 31 Oct 2018 10:22:20 -0700 Subject: [PATCH 3/8] Rename device::FrameId to device::GpuFrameId. This type identified frames as sequenced by the device layer, whereas render_backend::FrameId identifies frames as sequenced by the render backend. I'm not sure whether there are known cases where these will diverge, but it seems plausible that they might, and so we should more clearly distinguish the types. Differential Revision: https://phabricator.services.mozilla.com/D10674 --- webrender/src/device/gl.rs | 27 ++++++++++++++------------- webrender/src/device/query_gl.rs | 14 +++++++------- webrender/src/renderer.rs | 30 +++++++++++++++--------------- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/webrender/src/device/gl.rs b/webrender/src/device/gl.rs index 022b09adff..0fd6f40be4 100644 --- a/webrender/src/device/gl.rs +++ b/webrender/src/device/gl.rs @@ -29,10 +29,11 @@ use std::sync::Arc; use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering}; use std::thread; +/// Sequence number for frames, as tracked by the device layer. #[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)] #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] -pub struct FrameId(usize); +pub struct GpuFrameId(usize); /// Tracks the total number of GPU bytes allocated across all WebRender instances. /// @@ -58,17 +59,17 @@ fn record_gpu_free(num_bytes: usize) { assert!(old >= num_bytes, "Freeing {} bytes but only {} allocated", num_bytes, old); } -impl FrameId { +impl GpuFrameId { pub fn new(value: usize) -> Self { - FrameId(value) + GpuFrameId(value) } } -impl Add for FrameId { - type Output = FrameId; +impl Add for GpuFrameId { + type Output = GpuFrameId; - fn add(self, other: usize) -> FrameId { - FrameId(self.0 + other) + fn add(self, other: usize) -> GpuFrameId { + GpuFrameId(self.0 + other) } } @@ -498,7 +499,7 @@ pub struct Texture { /// configurations). But that would complicate a lot of logic in this module, /// and FBOs are cheap enough to create. fbos_with_depth: Vec, - last_frame_used: FrameId, + last_frame_used: GpuFrameId, } impl Texture { @@ -523,13 +524,13 @@ impl Texture { !self.fbos_with_depth.is_empty() } - pub fn used_in_frame(&self, frame_id: FrameId) -> bool { + pub fn used_in_frame(&self, frame_id: GpuFrameId) -> bool { self.last_frame_used == frame_id } /// Returns true if this texture was used within `threshold` frames of /// the current frame. - pub fn used_recently(&self, current_frame_id: FrameId, threshold: usize) -> bool { + pub fn used_recently(&self, current_frame_id: GpuFrameId, threshold: usize) -> bool { self.last_frame_used + threshold >= current_frame_id } @@ -813,7 +814,7 @@ pub struct Device { // Frame counter. This is used to map between CPU // frames and GPU frames. - frame_id: FrameId, + frame_id: GpuFrameId, /// Whether glTexStorage* is supported. We prefer this over glTexImage* /// because it guarantees that mipmaps won't be generated (which they @@ -970,7 +971,7 @@ impl Device { max_texture_size, renderer_name, cached_programs, - frame_id: FrameId(0), + frame_id: GpuFrameId(0), extensions, supports_texture_storage, } @@ -1055,7 +1056,7 @@ impl Device { } } - pub fn begin_frame(&mut self) -> FrameId { + pub fn begin_frame(&mut self) -> GpuFrameId { debug_assert!(!self.inside_frame); self.inside_frame = true; diff --git a/webrender/src/device/query_gl.rs b/webrender/src/device/query_gl.rs index 3e7a0d00d8..0c62f5d69d 100644 --- a/webrender/src/device/query_gl.rs +++ b/webrender/src/device/query_gl.rs @@ -7,7 +7,7 @@ use gleam::gl; use std::mem; use std::rc::Rc; -use device::FrameId; +use device::GpuFrameId; pub trait NamedTag { @@ -69,7 +69,7 @@ pub struct GpuFrameProfile { gl: Rc, timers: QuerySet>, samplers: QuerySet>, - frame_id: FrameId, + frame_id: GpuFrameId, inside_frame: bool, ext_debug_marker: bool } @@ -80,7 +80,7 @@ impl GpuFrameProfile { gl, timers: QuerySet::new(), samplers: QuerySet::new(), - frame_id: FrameId::new(0), + frame_id: GpuFrameId::new(0), inside_frame: false, ext_debug_marker } @@ -108,7 +108,7 @@ impl GpuFrameProfile { self.samplers.set = Vec::new(); } - fn begin_frame(&mut self, frame_id: FrameId) { + fn begin_frame(&mut self, frame_id: GpuFrameId) { self.frame_id = frame_id; self.timers.reset(); self.samplers.reset(); @@ -162,7 +162,7 @@ impl GpuFrameProfile { } #[cfg(feature = "debug_renderer")] - fn build_samples(&mut self) -> (FrameId, Vec>, Vec>) { + fn build_samples(&mut self) -> (GpuFrameId, Vec>, Vec>) { debug_assert!(!self.inside_frame); let gl = &self.gl; @@ -241,11 +241,11 @@ impl GpuProfiler { impl GpuProfiler { #[cfg(feature = "debug_renderer")] - pub fn build_samples(&mut self) -> (FrameId, Vec>, Vec>) { + pub fn build_samples(&mut self) -> (GpuFrameId, Vec>, Vec>) { self.frames[self.next_frame].build_samples() } - pub fn begin_frame(&mut self, frame_id: FrameId) { + pub fn begin_frame(&mut self, frame_id: GpuFrameId) { self.frames[self.next_frame].begin_frame(frame_id); } diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs index 4e4711ce5d..0f1ce06e27 100644 --- a/webrender/src/renderer.rs +++ b/webrender/src/renderer.rs @@ -35,7 +35,7 @@ use batch::{BatchKind, BatchTextures, BrushBatchKind}; #[cfg(any(feature = "capture", feature = "replay"))] use capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage}; use debug_colors; -use device::{DepthFunction, Device, FrameId, Program, UploadMethod, Texture, PBO}; +use device::{DepthFunction, Device, GpuFrameId, Program, UploadMethod, Texture, PBO}; use device::{DrawTarget, ExternalTexture, FBOId, ReadTarget, TextureSlot}; use device::{ShaderError, TextureFilter, VertexUsageHint, VAO, VBO, CustomVAO}; @@ -62,7 +62,7 @@ use profiler::{BackendProfileCounters, FrameProfileCounters, use device::query::GpuProfiler; use rayon::{ThreadPool, ThreadPoolBuilder}; use record::ApiRecordingReceiver; -use render_backend::RenderBackend; +use render_backend::{FrameId, RenderBackend}; use scene_builder::{SceneBuilder, LowPrioritySceneBuilder}; use shade::{Shaders, WrShaders}; use smallvec::SmallVec; @@ -705,13 +705,13 @@ pub enum RendererKind { #[derive(Debug)] pub struct GpuProfile { - pub frame_id: FrameId, + pub frame_id: GpuFrameId, pub paint_time_ns: u64, } impl GpuProfile { #[cfg(feature = "debug_renderer")] - fn new(frame_id: FrameId, timers: &[GpuTimer]) -> GpuProfile { + fn new(frame_id: GpuFrameId, timers: &[GpuTimer]) -> GpuProfile { let mut paint_time_ns = 0; for timer in timers { paint_time_ns += timer.time_ns; @@ -725,7 +725,7 @@ impl GpuProfile { #[derive(Debug)] pub struct CpuProfile { - pub frame_id: FrameId, + pub frame_id: GpuFrameId, pub backend_time_ns: u64, pub composite_time_ns: u64, pub draw_calls: usize, @@ -733,7 +733,7 @@ pub struct CpuProfile { impl CpuProfile { fn new( - frame_id: FrameId, + frame_id: GpuFrameId, backend_time_ns: u64, composite_time_ns: u64, draw_calls: usize, @@ -853,7 +853,7 @@ impl TextureResolver { assert!(self.saved_targets.is_empty()); } - fn end_frame(&mut self, device: &mut Device, frame_id: FrameId) { + fn end_frame(&mut self, device: &mut Device, frame_id: GpuFrameId) { // return the cached targets to the pool self.end_pass(device, None, None); // return the saved targets as well @@ -1441,7 +1441,7 @@ impl VertexDataTexture { } struct FrameOutput { - last_access: FrameId, + last_access: GpuFrameId, fbo_id: FBOId, } @@ -1557,7 +1557,7 @@ pub struct Renderer { #[cfg(feature = "debug_renderer")] gpu_cache_debug_chunks: Vec, - gpu_cache_frame_id: ::render_backend::FrameId, + gpu_cache_frame_id: FrameId, gpu_cache_overflow: bool, pipeline_info: PipelineInfo, @@ -2022,7 +2022,7 @@ impl Renderer { gpu_cache_texture, #[cfg(feature = "debug_renderer")] gpu_cache_debug_chunks: Vec::new(), - gpu_cache_frame_id: ::render_backend::FrameId::invalid(), + gpu_cache_frame_id: FrameId::invalid(), gpu_cache_overflow: false, texture_cache_upload_pbo, texture_resolver, @@ -2736,7 +2736,7 @@ impl Renderer { let gpu_cache_height = self.gpu_cache_texture.get_height(); if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST { self.pending_gpu_cache_updates.push(GpuCacheUpdateList { - frame_id: ::render_backend::FrameId::invalid(), + frame_id: FrameId::invalid(), height: gpu_cache_height, blocks: vec![[1f32; 4].into()], updates: Vec::new(), @@ -3109,7 +3109,7 @@ impl Renderer { clear_color: Option<[f32; 4]>, render_tasks: &RenderTaskTree, projection: &Transform3D, - frame_id: FrameId, + frame_id: GpuFrameId, stats: &mut RendererStats, ) { self.profile_counters.color_targets.inc(); @@ -3745,7 +3745,7 @@ impl Renderer { .expect("Found external image, but no handler set!"); let mut list = GpuCacheUpdateList { - frame_id: ::render_backend::FrameId::invalid(), + frame_id: FrameId::invalid(), height: self.gpu_cache_texture.get_height(), blocks: Vec::new(), updates: Vec::new(), @@ -3945,7 +3945,7 @@ impl Renderer { frame: &mut Frame, framebuffer_size: Option, framebuffer_depth_is_ready: bool, - frame_id: FrameId, + frame_id: GpuFrameId, stats: &mut RendererStats, ) { let _gm = self.gpu_profile.start_marker("tile frame draw"); @@ -4708,7 +4708,7 @@ struct PlainTexture { #[cfg_attr(feature = "replay", derive(Deserialize))] struct PlainRenderer { gpu_cache: PlainTexture, - gpu_cache_frame_id: ::render_backend::FrameId, + gpu_cache_frame_id: FrameId, textures: FastHashMap, external_images: Vec } From 80ddfc3e4c64c26dbde0387e76f4c4ce562bae69 Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Wed, 31 Oct 2018 11:41:47 -0700 Subject: [PATCH 4/8] Clean up semantics around render_backend::FrameId. Differential Revision: https://phabricator.services.mozilla.com/D10675 --- webrender/src/glyph_rasterizer/mod.rs | 2 +- webrender/src/gpu_cache.rs | 2 +- webrender/src/prim_store.rs | 2 +- webrender/src/render_backend.rs | 49 +++++++++++++++++++++------ webrender/src/resource_cache.rs | 4 +-- webrender/src/texture_cache.rs | 8 ++--- 6 files changed, 48 insertions(+), 19 deletions(-) diff --git a/webrender/src/glyph_rasterizer/mod.rs b/webrender/src/glyph_rasterizer/mod.rs index 16e8ac4016..03a3a7a798 100644 --- a/webrender/src/glyph_rasterizer/mod.rs +++ b/webrender/src/glyph_rasterizer/mod.rs @@ -734,7 +734,7 @@ mod test_glyph_rasterizer { let mut gpu_cache = GpuCache::new(); let mut texture_cache = TextureCache::new(2048); let mut render_task_cache = RenderTaskCache::new(); - let mut render_task_tree = RenderTaskTree::new(FrameId(0)); + let mut render_task_tree = RenderTaskTree::new(FrameId::invalid()); let mut special_render_passes = SpecialRenderPasses::new(&DeviceIntSize::new(1366, 768)); let mut font_file = diff --git a/webrender/src/gpu_cache.rs b/webrender/src/gpu_cache.rs index fc55d87dfc..0258f70c2a 100644 --- a/webrender/src/gpu_cache.rs +++ b/webrender/src/gpu_cache.rs @@ -36,7 +36,7 @@ use std::os::raw::c_void; pub const GPU_CACHE_INITIAL_HEIGHT: u32 = 512; -const FRAMES_BEFORE_EVICTION: u32 = 10; +const FRAMES_BEFORE_EVICTION: usize = 10; const NEW_ROWS_PER_RESIZE: u32 = 512; #[derive(Debug, Copy, Clone, Eq, PartialEq)] diff --git a/webrender/src/prim_store.rs b/webrender/src/prim_store.rs index 5b03ef969b..40c395ed8b 100644 --- a/webrender/src/prim_store.rs +++ b/webrender/src/prim_store.rs @@ -1814,7 +1814,7 @@ impl PrimitiveInstance { combined_local_clip_rect: LayoutRect::zero(), clipped_world_rect: None, #[cfg(debug_assertions)] - prepared_frame_id: FrameId(0), + prepared_frame_id: FrameId::invalid(), #[cfg(debug_assertions)] id: PrimitiveDebugId(NEXT_PRIM_ID.fetch_add(1, Ordering::Relaxed)), clip_task_id: None, diff --git a/webrender/src/render_backend.rs b/webrender/src/render_backend.rs index ac22624a81..1352f051d0 100644 --- a/webrender/src/render_backend.rs +++ b/webrender/src/render_backend.rs @@ -83,26 +83,52 @@ impl DocumentView { #[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Debug, Eq, Ord)] #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] -pub struct FrameId(pub u32); +pub struct FrameId(usize); impl FrameId { /// Returns an invalid sentinel FrameId, which will always compare less than /// any valid FrameId. - /// - /// FIXME(bholley): That last part isn't entirely true yet, but will be shortly. pub fn invalid() -> Self { FrameId(0) } -} -impl ::std::ops::Add for FrameId { - type Output = FrameId; + /// Returns a FrameId corresponding to the first frame. + /// + /// Note that we use 0 as the internal id here because the current code + /// increments the frame id at the beginning of the frame, rather than + /// at the end, and we want the first frame to be 1. It would probably + /// be sensible to move the advance() call to after frame-building, and + /// then make this method return FrameId(1). + pub fn first() -> Self { + FrameId(0) + } + + /// Returns the backing usize for this FrameId. + pub fn as_usize(&self) -> usize { + self.0 + } + + /// Advances this FrameId to the next frame. + fn advance(&mut self) { + self.0 += 1; + } +} - fn add(self, other: u32) -> FrameId { +impl ::std::ops::Add for FrameId { + type Output = Self; + fn add(self, other: usize) -> FrameId { FrameId(self.0 + other) } } +impl ::std::ops::Sub for FrameId { + type Output = Self; + fn sub(self, other: usize) -> FrameId { + assert!(self.0 >= other, "Underflow subtracting FrameIds"); + FrameId(self.0 - other) + } +} + // A collection of resources that are shared by clips, primitives // between display lists. #[cfg_attr(feature = "capture", derive(Serialize))] @@ -190,7 +216,7 @@ impl Document { device_pixel_ratio: default_device_pixel_ratio, }, clip_scroll_tree: ClipScrollTree::new(), - frame_id: FrameId(0), + frame_id: FrameId::first(), frame_builder: None, output_pipelines: FastHashSet::default(), hit_tester: None, @@ -319,6 +345,9 @@ impl Document { let accumulated_scale_factor = self.view.accumulated_scale_factor(); let pan = self.view.pan.to_f32() / accumulated_scale_factor; + assert!(self.frame_id != FrameId::invalid(), + "First frame increment must happen before build_frame()"); + let frame = { let frame_builder = self.frame_builder.as_mut().unwrap(); let frame = frame_builder.build( @@ -420,7 +449,7 @@ impl Document { self.clip_scroll_tree.finalize_and_apply_pending_scroll_offsets(old_scrolling_states); // Advance to the next frame. - self.frame_id.0 += 1; + self.frame_id.advance(); } } @@ -1500,7 +1529,7 @@ impl RenderBackend { removed_pipelines: Vec::new(), view: view.clone(), clip_scroll_tree: ClipScrollTree::new(), - frame_id: FrameId(0), + frame_id: FrameId::first(), frame_builder: Some(FrameBuilder::empty()), output_pipelines: FastHashSet::default(), dynamic_properties: SceneProperties::new(), diff --git a/webrender/src/resource_cache.rs b/webrender/src/resource_cache.rs index 5269afb627..1bf3eec460 100644 --- a/webrender/src/resource_cache.rs +++ b/webrender/src/resource_cache.rs @@ -444,7 +444,7 @@ impl ResourceCache { cached_glyph_dimensions: FastHashMap::default(), texture_cache, state: State::Idle, - current_frame_id: FrameId(0), + current_frame_id: FrameId::invalid(), pending_image_requests: FastHashSet::default(), glyph_rasterizer, blob_image_handler, @@ -2032,7 +2032,7 @@ impl ResourceCache { self.texture_cache = cached.textures; } None => { - self.current_frame_id = FrameId(0); + self.current_frame_id = FrameId::invalid(); self.cached_glyphs.clear(); self.cached_glyph_dimensions.clear(); self.cached_images.clear(); diff --git a/webrender/src/texture_cache.rs b/webrender/src/texture_cache.rs index 7f93ec42a8..e8f5f8f266 100644 --- a/webrender/src/texture_cache.rs +++ b/webrender/src/texture_cache.rs @@ -271,7 +271,7 @@ impl TextureCache { ), next_id: CacheTextureId(1), pending_updates: TextureUpdateList::new(), - frame_id: FrameId(0), + frame_id: FrameId::invalid(), entries: FreeList::new(), standalone_entry_handles: Vec::new(), shared_entry_handles: Vec::new(), @@ -587,9 +587,9 @@ impl TextureCache { pub fn mark_unused(&mut self, handle: &TextureCacheHandle) { if let Some(ref handle) = handle.entry { if let Some(entry) = self.entries.get_opt_mut(handle) { - // Set a very low last accessed frame to make it very likely that this entry - // will get cleaned up next time we try to expire entries. - entry.last_access = FrameId(0); + // Set last accessed frame to invalid to ensure it gets cleaned up + // next time we expire entries. + entry.last_access = FrameId::invalid(); entry.eviction = Eviction::Auto; } } From a0fb35803be206d86556cb269ea28c575604c2c3 Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Mon, 29 Oct 2018 15:01:33 -0700 Subject: [PATCH 5/8] Improve some documentation. Differential Revision: https://phabricator.services.mozilla.com/D10676 --- webrender/src/freelist.rs | 22 ++++++++++++++++++---- webrender/src/texture_cache.rs | 12 +++--------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/webrender/src/freelist.rs b/webrender/src/freelist.rs index cbf22d3d92..2af644eed4 100644 --- a/webrender/src/freelist.rs +++ b/webrender/src/freelist.rs @@ -2,11 +2,25 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use std::marker::PhantomData; +//! A generic backing store for caches. +//! +//! `FreeList` is a simple vector-backed data structure where each entry in the +//! vector contains an Option. It maintains an index-based (rather than +//! pointer-based) free list to efficiently locate the next unused entry. If all +//! entries are occupied, insertion appends a new element to the vector. +//! +//! It also supports both strong and weak handle semantics. There is exactly one +//! (non-Clonable) strong handle per occupied entry, which must be passed by +//! value into `free()` to release an entry. Strong handles can produce an +//! unlimited number of (Clonable) weak handles, which are used to perform +//! lookups which may fail of the entry has been freed. A per-entry epoch ensures +//! that weak handle lookups properly fail even if the entry has been freed and +//! reused. +//! +//! TODO(gw): Add an occupied list head, for fast iteration of the occupied list +//! to implement retain() style functionality. -// TODO(gw): Add an occupied list head, for fast -// iteration of the occupied list to implement -// retain() style functionality. +use std::marker::PhantomData; #[derive(Debug, Copy, Clone, PartialEq)] #[cfg_attr(feature = "capture", derive(Serialize))] diff --git a/webrender/src/texture_cache.rs b/webrender/src/texture_cache.rs index e8f5f8f266..49aee0566b 100644 --- a/webrender/src/texture_cache.rs +++ b/webrender/src/texture_cache.rs @@ -700,8 +700,7 @@ impl TextureCache { fn free(&mut self, entry: CacheEntry) -> Option<&TextureRegion> { match entry.kind { EntryKind::Standalone { .. } => { - // This is a standalone texture allocation. Just push it back onto the free - // list. + // This is a standalone texture allocation. Free it directly. self.pending_updates.push(TextureUpdate { id: entry.texture_id, op: TextureUpdateOp::Free, @@ -761,13 +760,8 @@ impl TextureCache { height: texture_array.dimensions, format: descriptor.format, filter: texture_array.filter, - // TODO(gw): Creating a render target here is only used - // for the texture cache debugger display. In - // the future, we should change the debug - // display to use a shader that blits the - // texture, and then we can remove this - // memory allocation (same for the other - // standalone texture below). + // This needs to be a render target because some render + // tasks get rendered into the texture cache. render_target: Some(RenderTargetInfo { has_depth: false }), layer_count: texture_array.layer_count as i32, }, From 2b0604e9d5ba89ccd5ccd548b907b069bfbab70f Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Mon, 29 Oct 2018 16:59:43 -0700 Subject: [PATCH 6/8] Support always-invalid WeakFreeListHandle and eliminate Option from TextureCacheHandle. This simplifies the texture cache code. Differential Revision: https://phabricator.services.mozilla.com/D10677 --- webrender/src/freelist.rs | 71 +++++- .../src/glyph_rasterizer/no_pathfinder.rs | 2 +- webrender/src/lib.rs | 10 + webrender/src/render_task.rs | 2 +- webrender/src/resource_cache.rs | 6 +- webrender/src/texture_cache.rs | 202 +++++++----------- 6 files changed, 155 insertions(+), 138 deletions(-) diff --git a/webrender/src/freelist.rs b/webrender/src/freelist.rs index 2af644eed4..98ae68ee10 100644 --- a/webrender/src/freelist.rs +++ b/webrender/src/freelist.rs @@ -20,6 +20,7 @@ //! TODO(gw): Add an occupied list head, for fast iteration of the occupied list //! to implement retain() style functionality. +use std::fmt; use std::marker::PhantomData; #[derive(Debug, Copy, Clone, PartialEq)] @@ -27,7 +28,20 @@ use std::marker::PhantomData; #[cfg_attr(feature = "replay", derive(Deserialize))] struct Epoch(u32); -#[derive(Debug)] +impl Epoch { + /// Mints a new epoch. + /// + /// We start at 1 so that 0 is always an invalid epoch. + fn new() -> Self { + Epoch(1) + } + + /// Returns an always-invalid epoch. + fn invalid() -> Self { + Epoch(0) + } +} + #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct FreeListHandle { @@ -36,6 +50,16 @@ pub struct FreeListHandle { _marker: PhantomData, } +/// More-compact textual representation for debug logging. +impl fmt::Debug for FreeListHandle { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("StrongHandle") + .field("index", &self.index) + .field("epoch", &self.epoch.0) + .finish() + } +} + impl FreeListHandle { pub fn weak(&self) -> WeakFreeListHandle { WeakFreeListHandle { @@ -56,7 +80,12 @@ impl Clone for WeakFreeListHandle { } } -#[derive(Debug)] +impl PartialEq for WeakFreeListHandle { + fn eq(&self, other: &Self) -> bool { + self.index == other.index && self.epoch == other.epoch + } +} + #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct WeakFreeListHandle { @@ -65,6 +94,27 @@ pub struct WeakFreeListHandle { _marker: PhantomData, } +/// More-compact textual representation for debug logging. +impl fmt::Debug for WeakFreeListHandle { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("WeakHandle") + .field("index", &self.index) + .field("epoch", &self.epoch.0) + .finish() + } +} + +impl WeakFreeListHandle { + /// Returns an always-invalid handle. + pub fn invalid() -> Self { + Self { + index: 0, + epoch: Epoch::invalid(), + _marker: PhantomData, + } + } +} + #[derive(Debug)] #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] @@ -90,10 +140,21 @@ pub enum UpsertResult { } impl FreeList { + /// Mints a new `FreeList` with no entries. + /// + /// Triggers a 1-entry allocation. pub fn new() -> Self { + // We guarantee that we never have zero entries by starting with one + // free entry. This allows WeakFreeListHandle::invalid() to work + // without adding any additional branches. + let first_slot = Slot { + next: None, + epoch: Epoch::new(), + value: None, + }; FreeList { - slots: Vec::new(), - free_list_head: None, + slots: vec![first_slot], + free_list_head: Some(0), active_count: 0, _marker: PhantomData, } @@ -168,7 +229,7 @@ impl FreeList { } None => { let index = self.slots.len() as u32; - let epoch = Epoch(0); + let epoch = Epoch::new(); self.slots.push(Slot { next: None, diff --git a/webrender/src/glyph_rasterizer/no_pathfinder.rs b/webrender/src/glyph_rasterizer/no_pathfinder.rs index abdc2a7736..e94496564a 100644 --- a/webrender/src/glyph_rasterizer/no_pathfinder.rs +++ b/webrender/src/glyph_rasterizer/no_pathfinder.rs @@ -169,7 +169,7 @@ impl GlyphRasterizer { } GlyphRasterResult::Bitmap(glyph) => { assert_eq!((glyph.left.fract(), glyph.top.fract()), (0.0, 0.0)); - let mut texture_cache_handle = TextureCacheHandle::new(); + let mut texture_cache_handle = TextureCacheHandle::invalid(); texture_cache.request(&texture_cache_handle, gpu_cache); texture_cache.update( &mut texture_cache_handle, diff --git a/webrender/src/lib.rs b/webrender/src/lib.rs index a29433d66c..e1fef4faa7 100644 --- a/webrender/src/lib.rs +++ b/webrender/src/lib.rs @@ -40,6 +40,16 @@ they're nestable. [notifier]: renderer/struct.Renderer.html#method.set_render_notifier */ +// Cribbed from the |matches| crate, for simplicity. +macro_rules! matches { + ($expression:expr, $($pattern:tt)+) => { + match $expression { + $($pattern)+ => true, + _ => false + } + } +} + #[macro_use] extern crate bitflags; #[macro_use] diff --git a/webrender/src/render_task.rs b/webrender/src/render_task.rs index 6903580cc8..6516230424 100644 --- a/webrender/src/render_task.rs +++ b/webrender/src/render_task.rs @@ -1248,7 +1248,7 @@ impl RenderTaskCache { .entry(key) .or_insert_with(|| { let entry = RenderTaskCacheEntry { - handle: TextureCacheHandle::new(), + handle: TextureCacheHandle::invalid(), pending_render_task_id: None, user_data, is_opaque, diff --git a/webrender/src/resource_cache.rs b/webrender/src/resource_cache.rs index 1bf3eec460..e45b7ee1d8 100644 --- a/webrender/src/resource_cache.rs +++ b/webrender/src/resource_cache.rs @@ -924,7 +924,7 @@ impl ResourceCache { let untiled_entry = match entry { &mut ImageResult::UntiledAuto(ref mut entry) => { Some(mem::replace(entry, CachedImageInfo { - texture_cache_handle: TextureCacheHandle::new(), + texture_cache_handle: TextureCacheHandle::invalid(), dirty_rect: None, manual_eviction: false, })) @@ -947,7 +947,7 @@ impl ResourceCache { Vacant(entry) => { entry.insert(if request.is_untiled_auto() { ImageResult::UntiledAuto(CachedImageInfo { - texture_cache_handle: TextureCacheHandle::new(), + texture_cache_handle: TextureCacheHandle::invalid(), dirty_rect: Some(template.descriptor.full_rect()), manual_eviction: false, }) @@ -964,7 +964,7 @@ impl ResourceCache { ImageResult::Multi(ref mut entries) => { entries.entry(request.into()) .or_insert(CachedImageInfo { - texture_cache_handle: TextureCacheHandle::new(), + texture_cache_handle: TextureCacheHandle::invalid(), dirty_rect: Some(template.descriptor.full_rect()), manual_eviction: false, }) diff --git a/webrender/src/texture_cache.rs b/webrender/src/texture_cache.rs index 49aee0566b..f338aa9b49 100644 --- a/webrender/src/texture_cache.rs +++ b/webrender/src/texture_cache.rs @@ -39,6 +39,13 @@ enum EntryKind { }, } +impl EntryKind { + /// Returns true if this corresponds to a standalone cache entry. + fn is_standalone(&self) -> bool { + matches!(*self, EntryKind::Standalone) + } +} + #[derive(Debug)] pub enum CacheEntryMarker {} @@ -130,26 +137,14 @@ impl CacheEntry { } } -type WeakCacheEntryHandle = WeakFreeListHandle; - -// A texture cache handle is a weak reference to a cache entry. -// If the handle has not been inserted into the cache yet, the -// value will be None. Even when the value is Some(), the location -// may not actually be valid if it has been evicted by the cache. -// In this case, the cache handle needs to re-upload this item -// to the texture cache (see request() below). -#[derive(Debug)] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -pub struct TextureCacheHandle { - entry: Option, -} -impl TextureCacheHandle { - pub fn new() -> Self { - TextureCacheHandle { entry: None } - } -} +/// A texture cache handle is a weak reference to a cache entry. +/// +/// If the handle has not been inserted into the cache yet, or if the entry was +/// previously inserted and then evicted, lookup of the handle will fail, and +/// the cache handle needs to re-upload this item to the texture cache (see +/// request() below). +pub type TextureCacheHandle = WeakFreeListHandle; #[derive(Copy, Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "capture", derive(Serialize))] @@ -358,18 +353,13 @@ impl TextureCache { // texture cache (either never uploaded, or has been // evicted on a previous frame). pub fn request(&mut self, handle: &TextureCacheHandle, gpu_cache: &mut GpuCache) -> bool { - match handle.entry { - Some(ref handle) => { - match self.entries.get_opt_mut(handle) { - // If an image is requested that is already in the cache, - // refresh the GPU cache data associated with this item. - Some(entry) => { - entry.last_access = self.frame_id; - entry.update_gpu_cache(gpu_cache); - false - } - None => true, - } + match self.entries.get_opt_mut(handle) { + // If an image is requested that is already in the cache, + // refresh the GPU cache data associated with this item. + Some(entry) => { + entry.last_access = self.frame_id; + entry.update_gpu_cache(gpu_cache); + false } None => true, } @@ -379,10 +369,7 @@ impl TextureCache { // texture cache (either never uploaded, or has been // evicted on a previous frame). pub fn needs_upload(&self, handle: &TextureCacheHandle) -> bool { - match handle.entry { - Some(ref handle) => self.entries.get_opt(handle).is_none(), - None => true, - } + self.entries.get_opt(handle).is_none() } pub fn max_texture_size(&self) -> u32 { @@ -413,20 +400,12 @@ impl TextureCache { // - Never been in the cache // - Has been in the cache but was evicted. // - Exists in the cache but dimensions / format have changed. - let realloc = match handle.entry { - Some(ref handle) => { - match self.entries.get_opt(handle) { - Some(entry) => { - entry.size != descriptor.size || entry.format != descriptor.format - } - None => { - // Was previously allocated but has been evicted. - true - } - } + let realloc = match self.entries.get_opt(handle) { + Some(entry) => { + entry.size != descriptor.size || entry.format != descriptor.format } None => { - // This handle has not been allocated yet. + // Not allocated, or was previously allocated but has been evicted. true } }; @@ -444,8 +423,7 @@ impl TextureCache { dirty_rect = None; } - let entry = self.entries - .get_opt_mut(handle.entry.as_ref().unwrap()) + let entry = self.entries.get_opt_mut(handle) .expect("BUG: handle must be valid now"); // Install the new eviction notice for this update, if applicable. @@ -514,9 +492,7 @@ impl TextureCache { // Check if a given texture handle has a valid allocation // in the texture cache. pub fn is_allocated(&self, handle: &TextureCacheHandle) -> bool { - handle.entry.as_ref().map_or(false, |handle| { - self.entries.get_opt(handle).is_some() - }) + self.entries.get_opt(handle).is_some() } // Retrieve the details of an item in the cache. This is used @@ -525,30 +501,25 @@ impl TextureCache { // This function will assert in debug modes if the caller // tries to get a handle that was not requested this frame. pub fn get(&self, handle: &TextureCacheHandle) -> CacheItem { - match handle.entry { - Some(ref handle) => { - let entry = self.entries - .get_opt(handle) - .expect("BUG: was dropped from cache or not updated!"); - debug_assert_eq!(entry.last_access, self.frame_id); - let (layer_index, origin) = match entry.kind { - EntryKind::Standalone { .. } => { - (0, DeviceUintPoint::zero()) - } - EntryKind::Cache { - layer_index, - origin, - .. - } => (layer_index, origin), - }; - CacheItem { - uv_rect_handle: entry.uv_rect_handle, - texture_id: TextureSource::TextureCache(entry.texture_id), - uv_rect: DeviceUintRect::new(origin, entry.size), - texture_layer: layer_index as i32, - } + let entry = self.entries + .get_opt(handle) + .expect("BUG: was dropped from cache or not updated!"); + debug_assert_eq!(entry.last_access, self.frame_id); + let (layer_index, origin) = match entry.kind { + EntryKind::Standalone { .. } => { + (0, DeviceUintPoint::zero()) } - None => panic!("BUG: handle not requested earlier in frame"), + EntryKind::Cache { + layer_index, + origin, + .. + } => (layer_index, origin), + }; + CacheItem { + uv_rect_handle: entry.uv_rect_handle, + texture_id: TextureSource::TextureCache(entry.texture_id), + uv_rect: DeviceUintRect::new(origin, entry.size), + texture_layer: layer_index as i32, } } @@ -560,11 +531,6 @@ impl TextureCache { &self, handle: &TextureCacheHandle, ) -> (CacheTextureId, LayerIndex, DeviceUintRect) { - let handle = handle - .entry - .as_ref() - .expect("BUG: handle not requested earlier in frame"); - let entry = self.entries .get_opt(handle) .expect("BUG: was dropped from cache or not updated!"); @@ -585,13 +551,11 @@ impl TextureCache { } pub fn mark_unused(&mut self, handle: &TextureCacheHandle) { - if let Some(ref handle) = handle.entry { - if let Some(entry) = self.entries.get_opt_mut(handle) { - // Set last accessed frame to invalid to ensure it gets cleaned up - // next time we expire entries. - entry.last_access = FrameId::invalid(); - entry.eviction = Eviction::Auto; - } + if let Some(entry) = self.entries.get_opt_mut(handle) { + // Set last accessed frame to invalid to ensure it gets cleaned up + // next time we expire entries. + entry.last_access = FrameId::invalid(); + entry.eviction = Eviction::Auto; } } @@ -889,50 +853,32 @@ impl TextureCache { allocated_in_shared_cache = false; } - let new_cache_entry = new_cache_entry.expect("BUG: must have allocated by now"); - // We need to update the texture cache handle now, so that it - // points to the correct location. - let new_entry_handle = match handle.entry { - Some(ref existing_entry) => { - // If the handle already exists, there's two possibilities: - // 1) It points to a valid entry in the freelist. - // 2) It points to a stale entry in the freelist (i.e. has been evicted). - // - // For (1) we want to replace the cache entry with our - // newly updated location. We also need to ensure that - // the storage (region or standalone) associated with the - // previous entry here gets freed. - // - // For (2) we need to add the data to a new location - // in the freelist. - // - // This is managed with a database style upsert operation. - match self.entries.upsert(existing_entry, new_cache_entry) { - UpsertResult::Updated(old_entry) => { - self.free(old_entry); - None - } - UpsertResult::Inserted(new_handle) => Some(new_handle), - } + // If the handle points to a valid cache entry, we want to replace the + // cache entry with our newly updated location. We also need to ensure + // that the storage (region or standalone) associated with the previous + // entry here gets freed. + // + // If the handle is invalid, we need to insert the data, and append the + // result to the corresponding vector. + // + // This is managed with a database style upsert operation. + match self.entries.upsert(handle, new_cache_entry) { + UpsertResult::Updated(old_entry) => { + assert_eq!(allocated_in_shared_cache, !old_entry.kind.is_standalone(), + "We currently have a bug in this edge case, since we don't \ + move strong handles from one list to the other. This gets \ + fixed in the next patch."); + self.free(old_entry); } - None => { - // This handle has never been allocated, so just - // insert a new cache entry. - Some(self.entries.insert(new_cache_entry)) - } - }; - - // If the cache entry is new, update it in the cache handle. - if let Some(new_entry_handle) = new_entry_handle { - handle.entry = Some(new_entry_handle.weak()); - // Store the strong handle in the list that we scan for - // cache evictions. - if allocated_in_shared_cache { - self.shared_entry_handles.push(new_entry_handle); - } else { - self.standalone_entry_handles.push(new_entry_handle); + UpsertResult::Inserted(new_handle) => { + *handle = new_handle.weak(); + if allocated_in_shared_cache { + self.shared_entry_handles.push(new_handle); + } else { + self.standalone_entry_handles.push(new_handle); + } } } } From 95cf6380ac567603301b296945302405cbb05bca Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Tue, 30 Oct 2018 13:44:08 -0700 Subject: [PATCH 7/8] Properly handle entries moving between shared and standalone. We also switch the boolean from 'in_shared_cache' to 'standalone', since the latter is a bit shorter. Differential Revision: https://phabricator.services.mozilla.com/D10678 --- webrender/src/texture_cache.rs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/webrender/src/texture_cache.rs b/webrender/src/texture_cache.rs index f338aa9b49..a7210ac40f 100644 --- a/webrender/src/texture_cache.rs +++ b/webrender/src/texture_cache.rs @@ -792,7 +792,7 @@ impl TextureCache { filter, &descriptor, ); - let mut allocated_in_shared_cache = true; + let mut allocated_standalone = false; let mut new_cache_entry = None; let frame_id = self.frame_id; @@ -851,7 +851,7 @@ impl TextureCache { uv_rect_kind, )); - allocated_in_shared_cache = false; + allocated_standalone = true; } let new_cache_entry = new_cache_entry.expect("BUG: must have allocated by now"); @@ -866,18 +866,26 @@ impl TextureCache { // This is managed with a database style upsert operation. match self.entries.upsert(handle, new_cache_entry) { UpsertResult::Updated(old_entry) => { - assert_eq!(allocated_in_shared_cache, !old_entry.kind.is_standalone(), - "We currently have a bug in this edge case, since we don't \ - move strong handles from one list to the other. This gets \ - fixed in the next patch."); + if allocated_standalone != old_entry.kind.is_standalone() { + // Handle the rare case than an update moves an entry from + // shared to standalone or vice versa. This involves a linear + // search, but should be rare enough not to matter. + let (from, to) = if allocated_standalone { + (&mut self.shared_entry_handles, &mut self.standalone_entry_handles) + } else { + (&mut self.standalone_entry_handles, &mut self.shared_entry_handles) + }; + let idx = from.iter().position(|h| h.weak() == *handle).unwrap(); + to.push(from.remove(idx)); + } self.free(old_entry); } UpsertResult::Inserted(new_handle) => { *handle = new_handle.weak(); - if allocated_in_shared_cache { - self.shared_entry_handles.push(new_handle); - } else { + if allocated_standalone { self.standalone_entry_handles.push(new_handle); + } else { + self.shared_entry_handles.push(new_handle); } } } From 231b522bc9db1388449dfa3359099c142f2ea71a Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Tue, 30 Oct 2018 16:15:16 -0700 Subject: [PATCH 8/8] Fix standalone texture eviction. Differential Revision: https://phabricator.services.mozilla.com/D10679 --- webrender/src/render_task.rs | 10 +++- webrender/src/texture_cache.rs | 102 +++++++++++++++++++++------------ 2 files changed, 73 insertions(+), 39 deletions(-) diff --git a/webrender/src/render_task.rs b/webrender/src/render_task.rs index 6516230424..1cc474fdd6 100644 --- a/webrender/src/render_task.rs +++ b/webrender/src/render_task.rs @@ -1201,6 +1201,14 @@ impl RenderTaskCache { // Allocate space in the texture cache, but don't supply // and CPU-side data to be uploaded. + // + // Note that we currently use Eager eviction for cached render + // tasks, which means that any cached item not used in the last + // frame is discarded. There's room to be a lot smarter here, + // especially by considering the relative costs of re-rendering + // each type of item (box shadow blurs are an order of magnitude + // more expensive than borders, for example). Telemetry could + // inform our decisions here as well. texture_cache.update( &mut entry.handle, descriptor, @@ -1211,7 +1219,7 @@ impl RenderTaskCache { gpu_cache, None, render_task.uv_rect_kind(), - Eviction::Auto, + Eviction::Eager, ); // Get the allocation details in the texture cache, and store diff --git a/webrender/src/texture_cache.rs b/webrender/src/texture_cache.rs index a7210ac40f..e5d1c60a2e 100644 --- a/webrender/src/texture_cache.rs +++ b/webrender/src/texture_cache.rs @@ -5,7 +5,7 @@ use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize}; use api::{ExternalImageType, ImageData, ImageFormat}; use api::ImageDescriptor; -use device::TextureFilter; +use device::{TextureFilter, total_gpu_bytes_allocated}; use freelist::{FreeList, FreeListHandle, UpsertResult, WeakFreeListHandle}; use gpu_cache::{GpuCache, GpuCacheHandle}; use gpu_types::{ImageSource, UvRectKind}; @@ -146,12 +146,21 @@ impl CacheEntry { /// request() below). pub type TextureCacheHandle = WeakFreeListHandle; +/// Describes the eviction policy for a given entry in the texture cache. #[derive(Copy, Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] pub enum Eviction { + /// The entry will be evicted under the normal rules (which differ between + /// standalone and shared entries). Auto, + /// The entry will not be evicted until the policy is explicitly set to a + /// different value. Manual, + /// The entry will be evicted if it was not used in the last frame. + /// + /// FIXME(bholley): Currently this only applies to the standalone case. + Eager, } // An eviction notice is a shared condition useful for detecting @@ -559,46 +568,63 @@ impl TextureCache { } } - // Expire old standalone textures. + /// Expires old standalone textures. Called at the end of every frame. + /// + /// Most of the time, standalone cache entries correspond to images whose + /// width or height is greater than the region size in the shared cache, i.e. + /// 512 pixels. Cached render tasks also frequently get standalone entries, + /// but those use the Eviction::Eager policy (for now). So the tradeoff here + /// is largely around reducing texture upload jank while keeping memory usage + /// at an acceptable level. + /// + /// Our eviction scheme is based on the age of the entry, i.e. the difference + /// between the current frame index and that of the last frame in + /// which the entry was used. It does not directly consider the size of the + /// entry, but does consider overall memory usage by WebRender, by making + /// eviction increasingly aggressive as overall memory usage increases. fn expire_old_standalone_entries(&mut self) { - let mut eviction_candidates = Vec::new(); - let mut retained_entries = Vec::new(); - - // Build a list of eviction candidates (which are - // anything not used this frame). - for handle in self.standalone_entry_handles.drain(..) { - let entry = self.entries.get(&handle); - if entry.eviction == Eviction::Manual || entry.last_access == self.frame_id { - retained_entries.push(handle); - } else { - eviction_candidates.push(handle); + // These parameters are based on some discussion and local tuning, but + // no hard measurement. There may be room for improvement. + // + // See discussion at https://mozilla.logbot.info/gfx/20181030#c15541654 + const MAX_FRAME_AGE_WITHOUT_PRESSURE: f64 = 75.0; + const MAX_MEMORY_PRESSURE_BYTES: f64 = (500 * 1024 * 1024) as f64; + + // Compute the memory pressure factor in the range of [0, 1.0]. + let pressure_factor = + (total_gpu_bytes_allocated() as f64 / MAX_MEMORY_PRESSURE_BYTES as f64).min(1.0); + + // Use the pressure factor to compute the maximum number of frames that + // a standalone texture can go unused before being evicted. + let max_frame_age_raw = + ((1.0 - pressure_factor) * MAX_FRAME_AGE_WITHOUT_PRESSURE) as usize; + + // We clamp max_frame_age to frame_id - 1 so that entries with FrameId(0) + // always get evicted, even early in the lifetime of the Renderer. + let max_frame_age = max_frame_age_raw.min(self.frame_id.as_usize() - 1); + + // Compute the oldest FrameId for which we will not evict. + let frame_id_threshold = self.frame_id - max_frame_age; + + // Iterate over the entries in reverse order, evicting the ones older than + // the frame age threshold. Reverse order avoids iterator invalidation when + // removing entries. + for i in (0..self.standalone_entry_handles.len()).rev() { + let evict = { + let entry = self.entries.get(&self.standalone_entry_handles[i]); + match entry.eviction { + Eviction::Manual => false, + Eviction::Auto => entry.last_access < frame_id_threshold, + Eviction::Eager => entry.last_access < self.frame_id, + } + }; + if evict { + let handle = self.standalone_entry_handles.swap_remove(i); + let entry = self.entries.free(handle); + entry.evict(); + self.free(entry); } } - - // Sort by access time so we remove the oldest ones first. - eviction_candidates.sort_by_key(|handle| { - let entry = self.entries.get(handle); - entry.last_access - }); - - // We only allow an arbitrary number of unused - // standalone textures to remain in GPU memory. - // TODO(gw): We should make this a better heuristic, - // for example based on total memory size. - if eviction_candidates.len() > 32 { - let entries_to_keep = eviction_candidates.split_off(32); - retained_entries.extend(entries_to_keep); - } - - // Free the selected items - for handle in eviction_candidates { - let entry = self.entries.free(handle); - entry.evict(); - self.free(entry); - } - - // Keep a record of the remaining handles for next frame. - self.standalone_entry_handles = retained_entries; } // Expire old shared items. Pass in the allocation size