diff --git a/webrender/src/device/gl.rs b/webrender/src/device/gl.rs index 7d73344279..0fd6f40be4 100644 --- a/webrender/src/device/gl.rs +++ b/webrender/src/device/gl.rs @@ -26,24 +26,50 @@ use std::ptr; use std::rc::Rc; use std::slice; use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering}; use std::thread; +/// Sequence number for frames, as tracked by the device layer. #[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)] #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] -pub struct FrameId(usize); +pub struct GpuFrameId(usize); -impl FrameId { +/// Tracks the total number of GPU bytes allocated across all WebRender instances. +/// +/// Assuming all WebRender instances run on the same thread, this doesn't need +/// to be atomic per se, but we make it atomic to satisfy the thread safety +/// invariants in the type system. We could also put the value in TLS, but that +/// would be more expensive to access. +static GPU_BYTES_ALLOCATED: AtomicUsize = ATOMIC_USIZE_INIT; + +/// Returns the number of GPU bytes currently allocated. +pub fn total_gpu_bytes_allocated() -> usize { + GPU_BYTES_ALLOCATED.load(Ordering::Relaxed) +} + +/// Records an allocation in VRAM. +fn record_gpu_alloc(num_bytes: usize) { + GPU_BYTES_ALLOCATED.fetch_add(num_bytes, Ordering::Relaxed); +} + +/// Records an deallocation in VRAM. +fn record_gpu_free(num_bytes: usize) { + let old = GPU_BYTES_ALLOCATED.fetch_sub(num_bytes, Ordering::Relaxed); + assert!(old >= num_bytes, "Freeing {} bytes but only {} allocated", num_bytes, old); +} + +impl GpuFrameId { pub fn new(value: usize) -> Self { - FrameId(value) + GpuFrameId(value) } } -impl Add for FrameId { - type Output = FrameId; +impl Add for GpuFrameId { + type Output = GpuFrameId; - fn add(self, other: usize) -> FrameId { - FrameId(self.0 + other) + fn add(self, other: usize) -> GpuFrameId { + GpuFrameId(self.0 + other) } } @@ -117,6 +143,14 @@ pub unsafe trait Texel: Copy {} unsafe impl Texel for u8 {} unsafe impl Texel for f32 {} +/// Returns the size in bytes of a depth target with the given dimensions. +fn depth_target_size_in_bytes(dimensions: &DeviceUintSize) -> usize { + // DEPTH24 textures generally reserve 3 bytes for depth and 1 byte + // for stencil, so we measure them as 32 bits. + let pixels = dimensions.width * dimensions.height; + (pixels as usize) * 4 +} + #[derive(Clone, Copy, Debug, PartialEq)] pub enum ReadPixelsFormat { Standard(ImageFormat), @@ -465,7 +499,7 @@ pub struct Texture { /// configurations). But that would complicate a lot of logic in this module, /// and FBOs are cheap enough to create. fbos_with_depth: Vec, - last_frame_used: FrameId, + last_frame_used: GpuFrameId, } impl Texture { @@ -490,13 +524,13 @@ impl Texture { !self.fbos_with_depth.is_empty() } - pub fn used_in_frame(&self, frame_id: FrameId) -> bool { + pub fn used_in_frame(&self, frame_id: GpuFrameId) -> bool { self.last_frame_used == frame_id } /// Returns true if this texture was used within `threshold` frames of /// the current frame. - pub fn used_recently(&self, current_frame_id: FrameId, threshold: usize) -> bool { + pub fn used_recently(&self, current_frame_id: GpuFrameId, threshold: usize) -> bool { self.last_frame_used + threshold >= current_frame_id } @@ -780,7 +814,7 @@ pub struct Device { // Frame counter. This is used to map between CPU // frames and GPU frames. - frame_id: FrameId, + frame_id: GpuFrameId, /// Whether glTexStorage* is supported. We prefer this over glTexImage* /// because it guarantees that mipmaps won't be generated (which they @@ -937,7 +971,7 @@ impl Device { max_texture_size, renderer_name, cached_programs, - frame_id: FrameId(0), + frame_id: GpuFrameId(0), extensions, supports_texture_storage, } @@ -1022,7 +1056,7 @@ impl Device { } } - pub fn begin_frame(&mut self) -> FrameId { + pub fn begin_frame(&mut self) -> GpuFrameId { debug_assert!(!self.inside_frame); self.inside_frame = true; @@ -1428,6 +1462,8 @@ impl Device { } } + record_gpu_alloc(texture.size_in_bytes()); + texture } @@ -1591,6 +1627,9 @@ impl Device { refcount: 0, } }); + if target.refcount == 0 { + record_gpu_alloc(depth_target_size_in_bytes(&dimensions)); + } target.refcount += 1; target.rbo_id } @@ -1603,8 +1642,9 @@ impl Device { debug_assert!(entry.get().refcount != 0); entry.get_mut().refcount -= 1; if entry.get().refcount == 0 { - let t = entry.remove(); - self.gl.delete_renderbuffers(&[t.rbo_id.0]); + let (dimensions, target) = entry.remove_entry(); + self.gl.delete_renderbuffers(&[target.rbo_id.0]); + record_gpu_free(depth_target_size_in_bytes(&dimensions)); } } @@ -1627,6 +1667,7 @@ impl Device { pub fn delete_texture(&mut self, mut texture: Texture) { debug_assert!(self.inside_frame); + record_gpu_free(texture.size_in_bytes()); let had_depth = texture.supports_depth(); self.deinit_fbos(&mut texture.fbos); self.deinit_fbos(&mut texture.fbos_with_depth); @@ -2465,10 +2506,7 @@ impl Device { pub fn report_memory(&self) -> MemoryReport { let mut report = MemoryReport::default(); for dim in self.depth_targets.keys() { - // DEPTH24 textures generally reserve 3 bytes for depth and 1 byte - // for stencil, so we measure them as 32 bytes. - let pixels: u32 = dim.width * dim.height; - report.depth_target_textures += (pixels as usize) * 4; + report.depth_target_textures += depth_target_size_in_bytes(dim); } report } diff --git a/webrender/src/device/query_gl.rs b/webrender/src/device/query_gl.rs index 3e7a0d00d8..0c62f5d69d 100644 --- a/webrender/src/device/query_gl.rs +++ b/webrender/src/device/query_gl.rs @@ -7,7 +7,7 @@ use gleam::gl; use std::mem; use std::rc::Rc; -use device::FrameId; +use device::GpuFrameId; pub trait NamedTag { @@ -69,7 +69,7 @@ pub struct GpuFrameProfile { gl: Rc, timers: QuerySet>, samplers: QuerySet>, - frame_id: FrameId, + frame_id: GpuFrameId, inside_frame: bool, ext_debug_marker: bool } @@ -80,7 +80,7 @@ impl GpuFrameProfile { gl, timers: QuerySet::new(), samplers: QuerySet::new(), - frame_id: FrameId::new(0), + frame_id: GpuFrameId::new(0), inside_frame: false, ext_debug_marker } @@ -108,7 +108,7 @@ impl GpuFrameProfile { self.samplers.set = Vec::new(); } - fn begin_frame(&mut self, frame_id: FrameId) { + fn begin_frame(&mut self, frame_id: GpuFrameId) { self.frame_id = frame_id; self.timers.reset(); self.samplers.reset(); @@ -162,7 +162,7 @@ impl GpuFrameProfile { } #[cfg(feature = "debug_renderer")] - fn build_samples(&mut self) -> (FrameId, Vec>, Vec>) { + fn build_samples(&mut self) -> (GpuFrameId, Vec>, Vec>) { debug_assert!(!self.inside_frame); let gl = &self.gl; @@ -241,11 +241,11 @@ impl GpuProfiler { impl GpuProfiler { #[cfg(feature = "debug_renderer")] - pub fn build_samples(&mut self) -> (FrameId, Vec>, Vec>) { + pub fn build_samples(&mut self) -> (GpuFrameId, Vec>, Vec>) { self.frames[self.next_frame].build_samples() } - pub fn begin_frame(&mut self, frame_id: FrameId) { + pub fn begin_frame(&mut self, frame_id: GpuFrameId) { self.frames[self.next_frame].begin_frame(frame_id); } diff --git a/webrender/src/frame_builder.rs b/webrender/src/frame_builder.rs index 90af43f385..14436caae8 100644 --- a/webrender/src/frame_builder.rs +++ b/webrender/src/frame_builder.rs @@ -337,7 +337,7 @@ impl FrameBuilder { .set(self.prim_store.prim_count()); resource_cache.begin_frame(frame_id); - gpu_cache.begin_frame(); + gpu_cache.begin_frame(frame_id); let mut transform_palette = TransformPalette::new(); clip_scroll_tree.update_tree( diff --git a/webrender/src/freelist.rs b/webrender/src/freelist.rs index cbf22d3d92..98ae68ee10 100644 --- a/webrender/src/freelist.rs +++ b/webrender/src/freelist.rs @@ -2,18 +2,46 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use std::marker::PhantomData; +//! A generic backing store for caches. +//! +//! `FreeList` is a simple vector-backed data structure where each entry in the +//! vector contains an Option. It maintains an index-based (rather than +//! pointer-based) free list to efficiently locate the next unused entry. If all +//! entries are occupied, insertion appends a new element to the vector. +//! +//! It also supports both strong and weak handle semantics. There is exactly one +//! (non-Clonable) strong handle per occupied entry, which must be passed by +//! value into `free()` to release an entry. Strong handles can produce an +//! unlimited number of (Clonable) weak handles, which are used to perform +//! lookups which may fail of the entry has been freed. A per-entry epoch ensures +//! that weak handle lookups properly fail even if the entry has been freed and +//! reused. +//! +//! TODO(gw): Add an occupied list head, for fast iteration of the occupied list +//! to implement retain() style functionality. -// TODO(gw): Add an occupied list head, for fast -// iteration of the occupied list to implement -// retain() style functionality. +use std::fmt; +use std::marker::PhantomData; #[derive(Debug, Copy, Clone, PartialEq)] #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] struct Epoch(u32); -#[derive(Debug)] +impl Epoch { + /// Mints a new epoch. + /// + /// We start at 1 so that 0 is always an invalid epoch. + fn new() -> Self { + Epoch(1) + } + + /// Returns an always-invalid epoch. + fn invalid() -> Self { + Epoch(0) + } +} + #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct FreeListHandle { @@ -22,6 +50,16 @@ pub struct FreeListHandle { _marker: PhantomData, } +/// More-compact textual representation for debug logging. +impl fmt::Debug for FreeListHandle { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("StrongHandle") + .field("index", &self.index) + .field("epoch", &self.epoch.0) + .finish() + } +} + impl FreeListHandle { pub fn weak(&self) -> WeakFreeListHandle { WeakFreeListHandle { @@ -42,7 +80,12 @@ impl Clone for WeakFreeListHandle { } } -#[derive(Debug)] +impl PartialEq for WeakFreeListHandle { + fn eq(&self, other: &Self) -> bool { + self.index == other.index && self.epoch == other.epoch + } +} + #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct WeakFreeListHandle { @@ -51,6 +94,27 @@ pub struct WeakFreeListHandle { _marker: PhantomData, } +/// More-compact textual representation for debug logging. +impl fmt::Debug for WeakFreeListHandle { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("WeakHandle") + .field("index", &self.index) + .field("epoch", &self.epoch.0) + .finish() + } +} + +impl WeakFreeListHandle { + /// Returns an always-invalid handle. + pub fn invalid() -> Self { + Self { + index: 0, + epoch: Epoch::invalid(), + _marker: PhantomData, + } + } +} + #[derive(Debug)] #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] @@ -76,10 +140,21 @@ pub enum UpsertResult { } impl FreeList { + /// Mints a new `FreeList` with no entries. + /// + /// Triggers a 1-entry allocation. pub fn new() -> Self { + // We guarantee that we never have zero entries by starting with one + // free entry. This allows WeakFreeListHandle::invalid() to work + // without adding any additional branches. + let first_slot = Slot { + next: None, + epoch: Epoch::new(), + value: None, + }; FreeList { - slots: Vec::new(), - free_list_head: None, + slots: vec![first_slot], + free_list_head: Some(0), active_count: 0, _marker: PhantomData, } @@ -154,7 +229,7 @@ impl FreeList { } None => { let index = self.slots.len() as u32; - let epoch = Epoch(0); + let epoch = Epoch::new(); self.slots.push(Slot { next: None, diff --git a/webrender/src/glyph_rasterizer/mod.rs b/webrender/src/glyph_rasterizer/mod.rs index 16e8ac4016..03a3a7a798 100644 --- a/webrender/src/glyph_rasterizer/mod.rs +++ b/webrender/src/glyph_rasterizer/mod.rs @@ -734,7 +734,7 @@ mod test_glyph_rasterizer { let mut gpu_cache = GpuCache::new(); let mut texture_cache = TextureCache::new(2048); let mut render_task_cache = RenderTaskCache::new(); - let mut render_task_tree = RenderTaskTree::new(FrameId(0)); + let mut render_task_tree = RenderTaskTree::new(FrameId::invalid()); let mut special_render_passes = SpecialRenderPasses::new(&DeviceIntSize::new(1366, 768)); let mut font_file = diff --git a/webrender/src/glyph_rasterizer/no_pathfinder.rs b/webrender/src/glyph_rasterizer/no_pathfinder.rs index abdc2a7736..e94496564a 100644 --- a/webrender/src/glyph_rasterizer/no_pathfinder.rs +++ b/webrender/src/glyph_rasterizer/no_pathfinder.rs @@ -169,7 +169,7 @@ impl GlyphRasterizer { } GlyphRasterResult::Bitmap(glyph) => { assert_eq!((glyph.left.fract(), glyph.top.fract()), (0.0, 0.0)); - let mut texture_cache_handle = TextureCacheHandle::new(); + let mut texture_cache_handle = TextureCacheHandle::invalid(); texture_cache.request(&texture_cache_handle, gpu_cache); texture_cache.update( &mut texture_cache_handle, diff --git a/webrender/src/gpu_cache.rs b/webrender/src/gpu_cache.rs index 4ba82e1bd5..0258f70c2a 100644 --- a/webrender/src/gpu_cache.rs +++ b/webrender/src/gpu_cache.rs @@ -26,9 +26,9 @@ use api::{PremultipliedColorF, TexelRect}; use api::{VoidPtrToSizeFn}; -use device::FrameId; use euclid::TypedRect; use profiler::GpuCacheProfileCounters; +use render_backend::FrameId; use renderer::MAX_VERTEX_TEXTURE_WIDTH; use std::{mem, u16, u32}; use std::ops::Add; @@ -549,7 +549,7 @@ pub struct GpuCache { impl GpuCache { pub fn new() -> Self { GpuCache { - frame_id: FrameId::new(0), + frame_id: FrameId::invalid(), texture: Texture::new(), saved_block_count: 0, in_debug: false, @@ -557,9 +557,9 @@ impl GpuCache { } /// Begin a new frame. - pub fn begin_frame(&mut self) { + pub fn begin_frame(&mut self, frame_id: FrameId) { debug_assert!(self.texture.pending_blocks.is_empty()); - self.frame_id = self.frame_id + 1; + self.frame_id = frame_id; self.texture.evict_old_blocks(self.frame_id); self.saved_block_count = 0; } diff --git a/webrender/src/lib.rs b/webrender/src/lib.rs index 2400b83be8..e1fef4faa7 100644 --- a/webrender/src/lib.rs +++ b/webrender/src/lib.rs @@ -40,6 +40,16 @@ they're nestable. [notifier]: renderer/struct.Renderer.html#method.set_render_notifier */ +// Cribbed from the |matches| crate, for simplicity. +macro_rules! matches { + ($expression:expr, $($pattern:tt)+) => { + match $expression { + $($pattern)+ => true, + _ => false + } + } +} + #[macro_use] extern crate bitflags; #[macro_use] @@ -182,7 +192,7 @@ pub extern crate webrender_api; #[doc(hidden)] pub use device::{build_shader_strings, ReadPixelsFormat, UploadMethod, VertexUsageHint}; pub use device::{ProgramBinary, ProgramCache, ProgramCacheObserver, ProgramSources}; -pub use device::{Device}; +pub use device::{Device, total_gpu_bytes_allocated}; pub use frame_builder::ChasePrimitive; pub use renderer::{AsyncPropertySampler, CpuProfile, DebugFlags, OutputImageHandler, RendererKind}; pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource, GpuProfile}; diff --git a/webrender/src/prim_store.rs b/webrender/src/prim_store.rs index 5b03ef969b..40c395ed8b 100644 --- a/webrender/src/prim_store.rs +++ b/webrender/src/prim_store.rs @@ -1814,7 +1814,7 @@ impl PrimitiveInstance { combined_local_clip_rect: LayoutRect::zero(), clipped_world_rect: None, #[cfg(debug_assertions)] - prepared_frame_id: FrameId(0), + prepared_frame_id: FrameId::invalid(), #[cfg(debug_assertions)] id: PrimitiveDebugId(NEXT_PRIM_ID.fetch_add(1, Ordering::Relaxed)), clip_task_id: None, diff --git a/webrender/src/render_backend.rs b/webrender/src/render_backend.rs index 346088c7de..1352f051d0 100644 --- a/webrender/src/render_backend.rs +++ b/webrender/src/render_backend.rs @@ -83,7 +83,51 @@ impl DocumentView { #[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Debug, Eq, Ord)] #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] -pub struct FrameId(pub u32); +pub struct FrameId(usize); + +impl FrameId { + /// Returns an invalid sentinel FrameId, which will always compare less than + /// any valid FrameId. + pub fn invalid() -> Self { + FrameId(0) + } + + /// Returns a FrameId corresponding to the first frame. + /// + /// Note that we use 0 as the internal id here because the current code + /// increments the frame id at the beginning of the frame, rather than + /// at the end, and we want the first frame to be 1. It would probably + /// be sensible to move the advance() call to after frame-building, and + /// then make this method return FrameId(1). + pub fn first() -> Self { + FrameId(0) + } + + /// Returns the backing usize for this FrameId. + pub fn as_usize(&self) -> usize { + self.0 + } + + /// Advances this FrameId to the next frame. + fn advance(&mut self) { + self.0 += 1; + } +} + +impl ::std::ops::Add for FrameId { + type Output = Self; + fn add(self, other: usize) -> FrameId { + FrameId(self.0 + other) + } +} + +impl ::std::ops::Sub for FrameId { + type Output = Self; + fn sub(self, other: usize) -> FrameId { + assert!(self.0 >= other, "Underflow subtracting FrameIds"); + FrameId(self.0 - other) + } +} // A collection of resources that are shared by clips, primitives // between display lists. @@ -172,7 +216,7 @@ impl Document { device_pixel_ratio: default_device_pixel_ratio, }, clip_scroll_tree: ClipScrollTree::new(), - frame_id: FrameId(0), + frame_id: FrameId::first(), frame_builder: None, output_pipelines: FastHashSet::default(), hit_tester: None, @@ -301,6 +345,9 @@ impl Document { let accumulated_scale_factor = self.view.accumulated_scale_factor(); let pan = self.view.pan.to_f32() / accumulated_scale_factor; + assert!(self.frame_id != FrameId::invalid(), + "First frame increment must happen before build_frame()"); + let frame = { let frame_builder = self.frame_builder.as_mut().unwrap(); let frame = frame_builder.build( @@ -402,7 +449,7 @@ impl Document { self.clip_scroll_tree.finalize_and_apply_pending_scroll_offsets(old_scrolling_states); // Advance to the next frame. - self.frame_id.0 += 1; + self.frame_id.advance(); } } @@ -1482,7 +1529,7 @@ impl RenderBackend { removed_pipelines: Vec::new(), view: view.clone(), clip_scroll_tree: ClipScrollTree::new(), - frame_id: FrameId(0), + frame_id: FrameId::first(), frame_builder: Some(FrameBuilder::empty()), output_pipelines: FastHashSet::default(), dynamic_properties: SceneProperties::new(), diff --git a/webrender/src/render_task.rs b/webrender/src/render_task.rs index 6903580cc8..1cc474fdd6 100644 --- a/webrender/src/render_task.rs +++ b/webrender/src/render_task.rs @@ -1201,6 +1201,14 @@ impl RenderTaskCache { // Allocate space in the texture cache, but don't supply // and CPU-side data to be uploaded. + // + // Note that we currently use Eager eviction for cached render + // tasks, which means that any cached item not used in the last + // frame is discarded. There's room to be a lot smarter here, + // especially by considering the relative costs of re-rendering + // each type of item (box shadow blurs are an order of magnitude + // more expensive than borders, for example). Telemetry could + // inform our decisions here as well. texture_cache.update( &mut entry.handle, descriptor, @@ -1211,7 +1219,7 @@ impl RenderTaskCache { gpu_cache, None, render_task.uv_rect_kind(), - Eviction::Auto, + Eviction::Eager, ); // Get the allocation details in the texture cache, and store @@ -1248,7 +1256,7 @@ impl RenderTaskCache { .entry(key) .or_insert_with(|| { let entry = RenderTaskCacheEntry { - handle: TextureCacheHandle::new(), + handle: TextureCacheHandle::invalid(), pending_render_task_id: None, user_data, is_opaque, diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs index bceac6748b..0f1ce06e27 100644 --- a/webrender/src/renderer.rs +++ b/webrender/src/renderer.rs @@ -35,7 +35,7 @@ use batch::{BatchKind, BatchTextures, BrushBatchKind}; #[cfg(any(feature = "capture", feature = "replay"))] use capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage}; use debug_colors; -use device::{DepthFunction, Device, FrameId, Program, UploadMethod, Texture, PBO}; +use device::{DepthFunction, Device, GpuFrameId, Program, UploadMethod, Texture, PBO}; use device::{DrawTarget, ExternalTexture, FBOId, ReadTarget, TextureSlot}; use device::{ShaderError, TextureFilter, VertexUsageHint, VAO, VBO, CustomVAO}; @@ -62,7 +62,7 @@ use profiler::{BackendProfileCounters, FrameProfileCounters, use device::query::GpuProfiler; use rayon::{ThreadPool, ThreadPoolBuilder}; use record::ApiRecordingReceiver; -use render_backend::RenderBackend; +use render_backend::{FrameId, RenderBackend}; use scene_builder::{SceneBuilder, LowPrioritySceneBuilder}; use shade::{Shaders, WrShaders}; use smallvec::SmallVec; @@ -705,13 +705,13 @@ pub enum RendererKind { #[derive(Debug)] pub struct GpuProfile { - pub frame_id: FrameId, + pub frame_id: GpuFrameId, pub paint_time_ns: u64, } impl GpuProfile { #[cfg(feature = "debug_renderer")] - fn new(frame_id: FrameId, timers: &[GpuTimer]) -> GpuProfile { + fn new(frame_id: GpuFrameId, timers: &[GpuTimer]) -> GpuProfile { let mut paint_time_ns = 0; for timer in timers { paint_time_ns += timer.time_ns; @@ -725,7 +725,7 @@ impl GpuProfile { #[derive(Debug)] pub struct CpuProfile { - pub frame_id: FrameId, + pub frame_id: GpuFrameId, pub backend_time_ns: u64, pub composite_time_ns: u64, pub draw_calls: usize, @@ -733,7 +733,7 @@ pub struct CpuProfile { impl CpuProfile { fn new( - frame_id: FrameId, + frame_id: GpuFrameId, backend_time_ns: u64, composite_time_ns: u64, draw_calls: usize, @@ -853,7 +853,7 @@ impl TextureResolver { assert!(self.saved_targets.is_empty()); } - fn end_frame(&mut self, device: &mut Device, frame_id: FrameId) { + fn end_frame(&mut self, device: &mut Device, frame_id: GpuFrameId) { // return the cached targets to the pool self.end_pass(device, None, None); // return the saved targets as well @@ -1441,7 +1441,7 @@ impl VertexDataTexture { } struct FrameOutput { - last_access: FrameId, + last_access: GpuFrameId, fbo_id: FBOId, } @@ -2022,7 +2022,7 @@ impl Renderer { gpu_cache_texture, #[cfg(feature = "debug_renderer")] gpu_cache_debug_chunks: Vec::new(), - gpu_cache_frame_id: FrameId::new(0), + gpu_cache_frame_id: FrameId::invalid(), gpu_cache_overflow: false, texture_cache_upload_pbo, texture_resolver, @@ -2736,7 +2736,7 @@ impl Renderer { let gpu_cache_height = self.gpu_cache_texture.get_height(); if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST { self.pending_gpu_cache_updates.push(GpuCacheUpdateList { - frame_id: FrameId::new(0), + frame_id: FrameId::invalid(), height: gpu_cache_height, blocks: vec![[1f32; 4].into()], updates: Vec::new(), @@ -3109,7 +3109,7 @@ impl Renderer { clear_color: Option<[f32; 4]>, render_tasks: &RenderTaskTree, projection: &Transform3D, - frame_id: FrameId, + frame_id: GpuFrameId, stats: &mut RendererStats, ) { self.profile_counters.color_targets.inc(); @@ -3745,7 +3745,7 @@ impl Renderer { .expect("Found external image, but no handler set!"); let mut list = GpuCacheUpdateList { - frame_id: FrameId::new(0), + frame_id: FrameId::invalid(), height: self.gpu_cache_texture.get_height(), blocks: Vec::new(), updates: Vec::new(), @@ -3945,7 +3945,7 @@ impl Renderer { frame: &mut Frame, framebuffer_size: Option, framebuffer_depth_is_ready: bool, - frame_id: FrameId, + frame_id: GpuFrameId, stats: &mut RendererStats, ) { let _gm = self.gpu_profile.start_marker("tile frame draw"); diff --git a/webrender/src/resource_cache.rs b/webrender/src/resource_cache.rs index 5269afb627..e45b7ee1d8 100644 --- a/webrender/src/resource_cache.rs +++ b/webrender/src/resource_cache.rs @@ -444,7 +444,7 @@ impl ResourceCache { cached_glyph_dimensions: FastHashMap::default(), texture_cache, state: State::Idle, - current_frame_id: FrameId(0), + current_frame_id: FrameId::invalid(), pending_image_requests: FastHashSet::default(), glyph_rasterizer, blob_image_handler, @@ -924,7 +924,7 @@ impl ResourceCache { let untiled_entry = match entry { &mut ImageResult::UntiledAuto(ref mut entry) => { Some(mem::replace(entry, CachedImageInfo { - texture_cache_handle: TextureCacheHandle::new(), + texture_cache_handle: TextureCacheHandle::invalid(), dirty_rect: None, manual_eviction: false, })) @@ -947,7 +947,7 @@ impl ResourceCache { Vacant(entry) => { entry.insert(if request.is_untiled_auto() { ImageResult::UntiledAuto(CachedImageInfo { - texture_cache_handle: TextureCacheHandle::new(), + texture_cache_handle: TextureCacheHandle::invalid(), dirty_rect: Some(template.descriptor.full_rect()), manual_eviction: false, }) @@ -964,7 +964,7 @@ impl ResourceCache { ImageResult::Multi(ref mut entries) => { entries.entry(request.into()) .or_insert(CachedImageInfo { - texture_cache_handle: TextureCacheHandle::new(), + texture_cache_handle: TextureCacheHandle::invalid(), dirty_rect: Some(template.descriptor.full_rect()), manual_eviction: false, }) @@ -2032,7 +2032,7 @@ impl ResourceCache { self.texture_cache = cached.textures; } None => { - self.current_frame_id = FrameId(0); + self.current_frame_id = FrameId::invalid(); self.cached_glyphs.clear(); self.cached_glyph_dimensions.clear(); self.cached_images.clear(); diff --git a/webrender/src/texture_cache.rs b/webrender/src/texture_cache.rs index 7f93ec42a8..e5d1c60a2e 100644 --- a/webrender/src/texture_cache.rs +++ b/webrender/src/texture_cache.rs @@ -5,7 +5,7 @@ use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize}; use api::{ExternalImageType, ImageData, ImageFormat}; use api::ImageDescriptor; -use device::TextureFilter; +use device::{TextureFilter, total_gpu_bytes_allocated}; use freelist::{FreeList, FreeListHandle, UpsertResult, WeakFreeListHandle}; use gpu_cache::{GpuCache, GpuCacheHandle}; use gpu_types::{ImageSource, UvRectKind}; @@ -39,6 +39,13 @@ enum EntryKind { }, } +impl EntryKind { + /// Returns true if this corresponds to a standalone cache entry. + fn is_standalone(&self) -> bool { + matches!(*self, EntryKind::Standalone) + } +} + #[derive(Debug)] pub enum CacheEntryMarker {} @@ -130,33 +137,30 @@ impl CacheEntry { } } -type WeakCacheEntryHandle = WeakFreeListHandle; - -// A texture cache handle is a weak reference to a cache entry. -// If the handle has not been inserted into the cache yet, the -// value will be None. Even when the value is Some(), the location -// may not actually be valid if it has been evicted by the cache. -// In this case, the cache handle needs to re-upload this item -// to the texture cache (see request() below). -#[derive(Debug)] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -pub struct TextureCacheHandle { - entry: Option, -} -impl TextureCacheHandle { - pub fn new() -> Self { - TextureCacheHandle { entry: None } - } -} +/// A texture cache handle is a weak reference to a cache entry. +/// +/// If the handle has not been inserted into the cache yet, or if the entry was +/// previously inserted and then evicted, lookup of the handle will fail, and +/// the cache handle needs to re-upload this item to the texture cache (see +/// request() below). +pub type TextureCacheHandle = WeakFreeListHandle; +/// Describes the eviction policy for a given entry in the texture cache. #[derive(Copy, Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] pub enum Eviction { + /// The entry will be evicted under the normal rules (which differ between + /// standalone and shared entries). Auto, + /// The entry will not be evicted until the policy is explicitly set to a + /// different value. Manual, + /// The entry will be evicted if it was not used in the last frame. + /// + /// FIXME(bholley): Currently this only applies to the standalone case. + Eager, } // An eviction notice is a shared condition useful for detecting @@ -271,7 +275,7 @@ impl TextureCache { ), next_id: CacheTextureId(1), pending_updates: TextureUpdateList::new(), - frame_id: FrameId(0), + frame_id: FrameId::invalid(), entries: FreeList::new(), standalone_entry_handles: Vec::new(), shared_entry_handles: Vec::new(), @@ -358,18 +362,13 @@ impl TextureCache { // texture cache (either never uploaded, or has been // evicted on a previous frame). pub fn request(&mut self, handle: &TextureCacheHandle, gpu_cache: &mut GpuCache) -> bool { - match handle.entry { - Some(ref handle) => { - match self.entries.get_opt_mut(handle) { - // If an image is requested that is already in the cache, - // refresh the GPU cache data associated with this item. - Some(entry) => { - entry.last_access = self.frame_id; - entry.update_gpu_cache(gpu_cache); - false - } - None => true, - } + match self.entries.get_opt_mut(handle) { + // If an image is requested that is already in the cache, + // refresh the GPU cache data associated with this item. + Some(entry) => { + entry.last_access = self.frame_id; + entry.update_gpu_cache(gpu_cache); + false } None => true, } @@ -379,10 +378,7 @@ impl TextureCache { // texture cache (either never uploaded, or has been // evicted on a previous frame). pub fn needs_upload(&self, handle: &TextureCacheHandle) -> bool { - match handle.entry { - Some(ref handle) => self.entries.get_opt(handle).is_none(), - None => true, - } + self.entries.get_opt(handle).is_none() } pub fn max_texture_size(&self) -> u32 { @@ -413,20 +409,12 @@ impl TextureCache { // - Never been in the cache // - Has been in the cache but was evicted. // - Exists in the cache but dimensions / format have changed. - let realloc = match handle.entry { - Some(ref handle) => { - match self.entries.get_opt(handle) { - Some(entry) => { - entry.size != descriptor.size || entry.format != descriptor.format - } - None => { - // Was previously allocated but has been evicted. - true - } - } + let realloc = match self.entries.get_opt(handle) { + Some(entry) => { + entry.size != descriptor.size || entry.format != descriptor.format } None => { - // This handle has not been allocated yet. + // Not allocated, or was previously allocated but has been evicted. true } }; @@ -444,8 +432,7 @@ impl TextureCache { dirty_rect = None; } - let entry = self.entries - .get_opt_mut(handle.entry.as_ref().unwrap()) + let entry = self.entries.get_opt_mut(handle) .expect("BUG: handle must be valid now"); // Install the new eviction notice for this update, if applicable. @@ -514,9 +501,7 @@ impl TextureCache { // Check if a given texture handle has a valid allocation // in the texture cache. pub fn is_allocated(&self, handle: &TextureCacheHandle) -> bool { - handle.entry.as_ref().map_or(false, |handle| { - self.entries.get_opt(handle).is_some() - }) + self.entries.get_opt(handle).is_some() } // Retrieve the details of an item in the cache. This is used @@ -525,30 +510,25 @@ impl TextureCache { // This function will assert in debug modes if the caller // tries to get a handle that was not requested this frame. pub fn get(&self, handle: &TextureCacheHandle) -> CacheItem { - match handle.entry { - Some(ref handle) => { - let entry = self.entries - .get_opt(handle) - .expect("BUG: was dropped from cache or not updated!"); - debug_assert_eq!(entry.last_access, self.frame_id); - let (layer_index, origin) = match entry.kind { - EntryKind::Standalone { .. } => { - (0, DeviceUintPoint::zero()) - } - EntryKind::Cache { - layer_index, - origin, - .. - } => (layer_index, origin), - }; - CacheItem { - uv_rect_handle: entry.uv_rect_handle, - texture_id: TextureSource::TextureCache(entry.texture_id), - uv_rect: DeviceUintRect::new(origin, entry.size), - texture_layer: layer_index as i32, - } + let entry = self.entries + .get_opt(handle) + .expect("BUG: was dropped from cache or not updated!"); + debug_assert_eq!(entry.last_access, self.frame_id); + let (layer_index, origin) = match entry.kind { + EntryKind::Standalone { .. } => { + (0, DeviceUintPoint::zero()) } - None => panic!("BUG: handle not requested earlier in frame"), + EntryKind::Cache { + layer_index, + origin, + .. + } => (layer_index, origin), + }; + CacheItem { + uv_rect_handle: entry.uv_rect_handle, + texture_id: TextureSource::TextureCache(entry.texture_id), + uv_rect: DeviceUintRect::new(origin, entry.size), + texture_layer: layer_index as i32, } } @@ -560,11 +540,6 @@ impl TextureCache { &self, handle: &TextureCacheHandle, ) -> (CacheTextureId, LayerIndex, DeviceUintRect) { - let handle = handle - .entry - .as_ref() - .expect("BUG: handle not requested earlier in frame"); - let entry = self.entries .get_opt(handle) .expect("BUG: was dropped from cache or not updated!"); @@ -585,56 +560,71 @@ impl TextureCache { } pub fn mark_unused(&mut self, handle: &TextureCacheHandle) { - if let Some(ref handle) = handle.entry { - if let Some(entry) = self.entries.get_opt_mut(handle) { - // Set a very low last accessed frame to make it very likely that this entry - // will get cleaned up next time we try to expire entries. - entry.last_access = FrameId(0); - entry.eviction = Eviction::Auto; - } + if let Some(entry) = self.entries.get_opt_mut(handle) { + // Set last accessed frame to invalid to ensure it gets cleaned up + // next time we expire entries. + entry.last_access = FrameId::invalid(); + entry.eviction = Eviction::Auto; } } - // Expire old standalone textures. + /// Expires old standalone textures. Called at the end of every frame. + /// + /// Most of the time, standalone cache entries correspond to images whose + /// width or height is greater than the region size in the shared cache, i.e. + /// 512 pixels. Cached render tasks also frequently get standalone entries, + /// but those use the Eviction::Eager policy (for now). So the tradeoff here + /// is largely around reducing texture upload jank while keeping memory usage + /// at an acceptable level. + /// + /// Our eviction scheme is based on the age of the entry, i.e. the difference + /// between the current frame index and that of the last frame in + /// which the entry was used. It does not directly consider the size of the + /// entry, but does consider overall memory usage by WebRender, by making + /// eviction increasingly aggressive as overall memory usage increases. fn expire_old_standalone_entries(&mut self) { - let mut eviction_candidates = Vec::new(); - let mut retained_entries = Vec::new(); - - // Build a list of eviction candidates (which are - // anything not used this frame). - for handle in self.standalone_entry_handles.drain(..) { - let entry = self.entries.get(&handle); - if entry.eviction == Eviction::Manual || entry.last_access == self.frame_id { - retained_entries.push(handle); - } else { - eviction_candidates.push(handle); + // These parameters are based on some discussion and local tuning, but + // no hard measurement. There may be room for improvement. + // + // See discussion at https://mozilla.logbot.info/gfx/20181030#c15541654 + const MAX_FRAME_AGE_WITHOUT_PRESSURE: f64 = 75.0; + const MAX_MEMORY_PRESSURE_BYTES: f64 = (500 * 1024 * 1024) as f64; + + // Compute the memory pressure factor in the range of [0, 1.0]. + let pressure_factor = + (total_gpu_bytes_allocated() as f64 / MAX_MEMORY_PRESSURE_BYTES as f64).min(1.0); + + // Use the pressure factor to compute the maximum number of frames that + // a standalone texture can go unused before being evicted. + let max_frame_age_raw = + ((1.0 - pressure_factor) * MAX_FRAME_AGE_WITHOUT_PRESSURE) as usize; + + // We clamp max_frame_age to frame_id - 1 so that entries with FrameId(0) + // always get evicted, even early in the lifetime of the Renderer. + let max_frame_age = max_frame_age_raw.min(self.frame_id.as_usize() - 1); + + // Compute the oldest FrameId for which we will not evict. + let frame_id_threshold = self.frame_id - max_frame_age; + + // Iterate over the entries in reverse order, evicting the ones older than + // the frame age threshold. Reverse order avoids iterator invalidation when + // removing entries. + for i in (0..self.standalone_entry_handles.len()).rev() { + let evict = { + let entry = self.entries.get(&self.standalone_entry_handles[i]); + match entry.eviction { + Eviction::Manual => false, + Eviction::Auto => entry.last_access < frame_id_threshold, + Eviction::Eager => entry.last_access < self.frame_id, + } + }; + if evict { + let handle = self.standalone_entry_handles.swap_remove(i); + let entry = self.entries.free(handle); + entry.evict(); + self.free(entry); } } - - // Sort by access time so we remove the oldest ones first. - eviction_candidates.sort_by_key(|handle| { - let entry = self.entries.get(handle); - entry.last_access - }); - - // We only allow an arbitrary number of unused - // standalone textures to remain in GPU memory. - // TODO(gw): We should make this a better heuristic, - // for example based on total memory size. - if eviction_candidates.len() > 32 { - let entries_to_keep = eviction_candidates.split_off(32); - retained_entries.extend(entries_to_keep); - } - - // Free the selected items - for handle in eviction_candidates { - let entry = self.entries.free(handle); - entry.evict(); - self.free(entry); - } - - // Keep a record of the remaining handles for next frame. - self.standalone_entry_handles = retained_entries; } // Expire old shared items. Pass in the allocation size @@ -700,8 +690,7 @@ impl TextureCache { fn free(&mut self, entry: CacheEntry) -> Option<&TextureRegion> { match entry.kind { EntryKind::Standalone { .. } => { - // This is a standalone texture allocation. Just push it back onto the free - // list. + // This is a standalone texture allocation. Free it directly. self.pending_updates.push(TextureUpdate { id: entry.texture_id, op: TextureUpdateOp::Free, @@ -761,13 +750,8 @@ impl TextureCache { height: texture_array.dimensions, format: descriptor.format, filter: texture_array.filter, - // TODO(gw): Creating a render target here is only used - // for the texture cache debugger display. In - // the future, we should change the debug - // display to use a shader that blits the - // texture, and then we can remove this - // memory allocation (same for the other - // standalone texture below). + // This needs to be a render target because some render + // tasks get rendered into the texture cache. render_target: Some(RenderTargetInfo { has_depth: false }), layer_count: texture_array.layer_count as i32, }, @@ -834,7 +818,7 @@ impl TextureCache { filter, &descriptor, ); - let mut allocated_in_shared_cache = true; + let mut allocated_standalone = false; let mut new_cache_entry = None; let frame_id = self.frame_id; @@ -893,52 +877,42 @@ impl TextureCache { uv_rect_kind, )); - allocated_in_shared_cache = false; + allocated_standalone = true; } - let new_cache_entry = new_cache_entry.expect("BUG: must have allocated by now"); - // We need to update the texture cache handle now, so that it - // points to the correct location. - let new_entry_handle = match handle.entry { - Some(ref existing_entry) => { - // If the handle already exists, there's two possibilities: - // 1) It points to a valid entry in the freelist. - // 2) It points to a stale entry in the freelist (i.e. has been evicted). - // - // For (1) we want to replace the cache entry with our - // newly updated location. We also need to ensure that - // the storage (region or standalone) associated with the - // previous entry here gets freed. - // - // For (2) we need to add the data to a new location - // in the freelist. - // - // This is managed with a database style upsert operation. - match self.entries.upsert(existing_entry, new_cache_entry) { - UpsertResult::Updated(old_entry) => { - self.free(old_entry); - None - } - UpsertResult::Inserted(new_handle) => Some(new_handle), + // If the handle points to a valid cache entry, we want to replace the + // cache entry with our newly updated location. We also need to ensure + // that the storage (region or standalone) associated with the previous + // entry here gets freed. + // + // If the handle is invalid, we need to insert the data, and append the + // result to the corresponding vector. + // + // This is managed with a database style upsert operation. + match self.entries.upsert(handle, new_cache_entry) { + UpsertResult::Updated(old_entry) => { + if allocated_standalone != old_entry.kind.is_standalone() { + // Handle the rare case than an update moves an entry from + // shared to standalone or vice versa. This involves a linear + // search, but should be rare enough not to matter. + let (from, to) = if allocated_standalone { + (&mut self.shared_entry_handles, &mut self.standalone_entry_handles) + } else { + (&mut self.standalone_entry_handles, &mut self.shared_entry_handles) + }; + let idx = from.iter().position(|h| h.weak() == *handle).unwrap(); + to.push(from.remove(idx)); } + self.free(old_entry); } - None => { - // This handle has never been allocated, so just - // insert a new cache entry. - Some(self.entries.insert(new_cache_entry)) - } - }; - - // If the cache entry is new, update it in the cache handle. - if let Some(new_entry_handle) = new_entry_handle { - handle.entry = Some(new_entry_handle.weak()); - // Store the strong handle in the list that we scan for - // cache evictions. - if allocated_in_shared_cache { - self.shared_entry_handles.push(new_entry_handle); - } else { - self.standalone_entry_handles.push(new_entry_handle); + UpsertResult::Inserted(new_handle) => { + *handle = new_handle.weak(); + if allocated_standalone { + self.standalone_entry_handles.push(new_handle); + } else { + self.shared_entry_handles.push(new_handle); + } } } } diff --git a/webrender/src/tiling.rs b/webrender/src/tiling.rs index 8fe39ed1fc..dba71b2490 100644 --- a/webrender/src/tiling.rs +++ b/webrender/src/tiling.rs @@ -8,7 +8,7 @@ use api::{MixBlendMode, PipelineId, DeviceRect, LayoutSize}; use batch::{AlphaBatchBuilder, AlphaBatchContainer, ClipBatcher, resolve_image}; use clip::ClipStore; use clip_scroll_tree::{ClipScrollTree}; -use device::{FrameId, Texture}; +use device::{Texture}; #[cfg(feature = "pathfinder")] use euclid::{TypedPoint2D, TypedVector2D}; use gpu_cache::{GpuCache}; @@ -20,7 +20,7 @@ use pathfinder_partitioner::mesh::Mesh; use picture::SurfaceInfo; use prim_store::{PrimitiveStore, DeferredResolve}; use profiler::FrameProfileCounters; -use render_backend::FrameResources; +use render_backend::{FrameId, FrameResources}; use render_task::{BlitSource, RenderTaskAddress, RenderTaskId, RenderTaskKind}; use render_task::{BlurTask, ClearMode, GlyphTask, RenderTaskLocation, RenderTaskTree, ScalingTask}; use resource_cache::ResourceCache; diff --git a/webrender_api/src/api.rs b/webrender_api/src/api.rs index f12234833d..7f3c065ddb 100644 --- a/webrender_api/src/api.rs +++ b/webrender_api/src/api.rs @@ -792,6 +792,10 @@ pub struct MemoryReport { pub render_target_textures: usize, pub texture_cache_textures: usize, pub depth_target_textures: usize, + // + // GPU memory total (tracked separately, should equal the sum of the above). + // + pub total_gpu_bytes_allocated: usize, } impl ::std::ops::AddAssign for MemoryReport { @@ -810,6 +814,10 @@ impl ::std::ops::AddAssign for MemoryReport { self.render_target_textures += other.render_target_textures; self.texture_cache_textures += other.texture_cache_textures; self.depth_target_textures += other.depth_target_textures; + + // The total_gpu_memory value accounts for all WebRender instances, and + // thus can't be aggregated. It should really be reported out of band, + // but putting it in this struct facilitates sending it across Gecko IPC. } }