From d516a3c680ccc744c350838e2f5d24e75f73eb5f Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Fri, 3 Apr 2020 01:11:25 +0000 Subject: [PATCH 1/7] Bug 1626614 - Don't reset ipc profile counters every frame. r=Bert Differential Revision: https://phabricator.services.mozilla.com/D69227 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/27e38d793959eb04381346a00a46cd59bc009933 --- webrender/src/profiler.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/webrender/src/profiler.rs b/webrender/src/profiler.rs index da3f6902f9..b92e80b63a 100644 --- a/webrender/src/profiler.rs +++ b/webrender/src/profiler.rs @@ -753,6 +753,12 @@ impl IpcProfileCounters { consume_end: u64, display_len: usize, ) { + self.total_time.reset(); + self.build_time.reset(); + self.consume_time.reset(); + self.send_time.reset(); + self.display_lists.reset(); + let build_time = build_end - build_start; let consume_time = consume_end - consume_start; let send_time = consume_start - send_start; @@ -837,11 +843,6 @@ impl BackendProfileCounters { pub fn reset(&mut self) { self.total_time.reset(); - self.ipc.total_time.reset(); - self.ipc.build_time.reset(); - self.ipc.consume_time.reset(); - self.ipc.send_time.reset(); - self.ipc.display_lists.reset(); self.resources.texture_cache.rasterized_blob_pixels.reset(); } } From a93c8e3d7ae69b96cb5c00dd05e1d3331d4282f5 Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Fri, 3 Apr 2020 01:11:35 +0000 Subject: [PATCH 2/7] Bug 1626666 - Rework the ipc profile counters. r=Bert,gw Before this patch: - Consume time merely is the time it takes to push something into a vector (always displays zero). - Total IPC time and the DisplayList IPC graph measure the time between api.set_display_list and the render backend picking the message up, plus the time it took to build the display list (but doesn't take into account the time it took for actual IPC in between). - Send time is only the time between api.set_display_list and the render backend picking the message up but doesn't take into account the time it took between the content thread sending the DL and the compositor thread forwarding it. After this patch: - Content send time measures the time between the content thread sending the display list and the compositor forwarding it (actual IPC). - Api send time measures the time between the compostor thread forwarding the DL and the render backend picking it up. - Consume time is removed. - Total send time is the sum of content and api times. - Display list build times and display list IPC (total send time) are on separate graphs. Depends on D69227 Differential Revision: https://phabricator.services.mozilla.com/D69228 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/ea22f0b1e2a0d0a237048452f930529bc773ffbd --- webrender/src/profiler.rs | 64 +++++++++++++++++++-------------- webrender/src/render_backend.rs | 6 ---- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/webrender/src/profiler.rs b/webrender/src/profiler.rs index b92e80b63a..9ae6316378 100644 --- a/webrender/src/profiler.rs +++ b/webrender/src/profiler.rs @@ -43,7 +43,6 @@ pub mod expected { pub const GPU_CACHE_BLOCKS_UPDATED: Range = 0..1000; pub const GPU_CACHE_BLOCKS_SAVED: Range = 0..50_000; pub const DISPLAY_LIST_BUILD_TIME: Range = 0.0..3.0; - pub const DISPLAY_LIST_CONSUME_TIME: Range = 0.0..2.0; pub const MAX_SCENE_BUILD_TIME: Range = 0.0..3.0; pub const DISPLAY_LIST_SEND_TIME: Range = 0.0..1.0; pub const DISPLAY_LIST_TOTAL_TIME: Range = 0.0..4.0; @@ -704,9 +703,13 @@ pub struct ResourceProfileCounters { #[derive(Clone)] pub struct IpcProfileCounters { pub build_time: TimeProfileCounter, - pub consume_time: TimeProfileCounter, - pub send_time: TimeProfileCounter, - pub total_time: TimeProfileCounter, + /// Time between when the display list is built and when it is sent by the API. + pub content_send_time: TimeProfileCounter, + /// Time between sending the SetDisplayList from the API and picking it up on + /// the render backend thread. + pub api_send_time: TimeProfileCounter, + /// Sum of content_send_time and api_send_time. + pub total_send_time: TimeProfileCounter, pub display_lists: ResourceProfileCounter, } @@ -750,22 +753,21 @@ impl IpcProfileCounters { build_end: u64, send_start: u64, consume_start: u64, - consume_end: u64, display_len: usize, ) { - self.total_time.reset(); self.build_time.reset(); - self.consume_time.reset(); - self.send_time.reset(); + self.content_send_time.reset(); + self.api_send_time.reset(); + self.total_send_time.reset(); self.display_lists.reset(); let build_time = build_end - build_start; - let consume_time = consume_end - consume_start; - let send_time = consume_start - send_start; + let content_send_time = send_start - build_end; + let api_send_time = consume_start - send_start; self.build_time.inc(build_time); - self.consume_time.inc(consume_time); - self.send_time.inc(send_time); - self.total_time.inc(build_time + consume_time + send_time); + self.content_send_time.inc(content_send_time); + self.api_send_time.inc(api_send_time); + self.total_send_time.inc(content_send_time + api_send_time); self.display_lists.inc(display_len); } } @@ -801,23 +803,23 @@ impl BackendProfileCounters { }, ipc: IpcProfileCounters { build_time: TimeProfileCounter::new( - "Display List Build Time", false, + "DisplayList Build Time", false, Some(expected::DISPLAY_LIST_BUILD_TIME) ), - consume_time: TimeProfileCounter::new( - "Display List Consume Time", false, - Some(expected::DISPLAY_LIST_CONSUME_TIME), + content_send_time: TimeProfileCounter::new( + "Content Send Time", false, + Some(expected::DISPLAY_LIST_SEND_TIME), ), - send_time: TimeProfileCounter::new( - "Display List Send Time", false, + api_send_time: TimeProfileCounter::new( + "API Send Time", false, Some(expected::DISPLAY_LIST_SEND_TIME), ), - total_time: TimeProfileCounter::new( - "Total Display List Time", false, + total_send_time: TimeProfileCounter::new( + "Total IPC Time", false, Some(expected::DISPLAY_LIST_TOTAL_TIME), ), display_lists: ResourceProfileCounter::new( - "Display Lists Sent", + "DisplayLists Sent", None, Some(expected::DISPLAY_LIST_MB), ), }, @@ -1220,6 +1222,7 @@ pub struct Profiler { renderer_graph: ProfileGraph, gpu_graph: ProfileGraph, ipc_graph: ProfileGraph, + display_list_build_graph: ProfileGraph, scene_build_graph: ProfileGraph, blob_raster_graph: ProfileGraph, backend_time: AverageTimeProfileCounter, @@ -1244,6 +1247,7 @@ impl Profiler { renderer_graph: ProfileGraph::new(600, to_ms_scale, "Renderer:", "ms"), gpu_graph: ProfileGraph::new(600, to_ms_scale, "GPU:", "ms"), ipc_graph: ProfileGraph::new(600, to_ms_scale, "IPC:", "ms"), + display_list_build_graph: ProfileGraph::new(600, to_ms_scale, "DisplayList build", "ms"), scene_build_graph: ProfileGraph::new(600, to_ms_scale, "Scene build:", "ms"), blob_raster_graph: ProfileGraph::new(600, 1.0, "Rasterized blob pixels:", "px"), gpu_frames: GpuFrameCollection::new(), @@ -1583,9 +1587,9 @@ impl Profiler { Profiler::draw_counters( &[ &backend_profile.ipc.build_time, - &backend_profile.ipc.send_time, - &backend_profile.ipc.consume_time, - &backend_profile.ipc.total_time, + &backend_profile.ipc.content_send_time, + &backend_profile.ipc.api_send_time, + &backend_profile.ipc.total_send_time, ], None, debug_renderer, @@ -1667,6 +1671,10 @@ impl Profiler { .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "DisplayList IPC", debug_renderer); self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + let rect = self.display_list_build_graph + .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "DisplayList build", debug_renderer); + self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + let rect = self.scene_build_graph .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "Scene build", debug_renderer); self.draw_state.y_right += rect.size.height + PROFILE_PADDING; @@ -1778,12 +1786,14 @@ impl Profiler { .push(renderer_timers.cpu_time.nanoseconds); self.renderer_time.set(renderer_timers.cpu_time.nanoseconds); self.ipc_graph - .push(backend_profile.ipc.total_time.nanoseconds); + .push(backend_profile.ipc.total_send_time.nanoseconds); + self.display_list_build_graph + .push(backend_profile.ipc.build_time.nanoseconds); self.scene_build_graph .push(backend_profile.scene_build_time.nanoseconds); self.blob_raster_graph .push(backend_profile.resources.texture_cache.rasterized_blob_pixels.size as u64); - self.ipc_time.set(backend_profile.ipc.total_time.nanoseconds); + self.ipc_time.set(backend_profile.ipc.total_send_time.nanoseconds); self.gpu_graph.push(gpu_graph); self.gpu_time.set(gpu_graph); self.gpu_frames.push(gpu_graph, gpu_graphrs); diff --git a/webrender/src/render_backend.rs b/webrender/src/render_backend.rs index 358b4a1221..854a60fbaa 100644 --- a/webrender/src/render_backend.rs +++ b/webrender/src/render_backend.rs @@ -858,17 +858,11 @@ impl RenderBackend { content_size, }); - // Note: this isn't quite right as auxiliary values will be - // pulled out somewhere in the prim_store, but aux values are - // really simple and cheap to access, so it's not a big deal. - let display_list_consumed_time = precise_time_ns(); - ipc_profile_counters.set( builder_start_time, builder_finish_time, send_start_time, display_list_received_time, - display_list_consumed_time, display_list_len, ); } From 90bbfb7e24f0a8d277b4ca7add6cc7538654ff5e Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Fri, 3 Apr 2020 01:11:45 +0000 Subject: [PATCH 3/7] Bug 1625365 - Improve the blob image overview documentation. r=jrmuizel Differential Revision: https://phabricator.services.mozilla.com/D68985 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/7ce59e5c51d63f3f88a882092e59a9f1bf3453a5 --- webrender/doc/blob.md | 46 +++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/webrender/doc/blob.md b/webrender/doc/blob.md index d7ae917a7e..b910f6f76a 100644 --- a/webrender/doc/blob.md +++ b/webrender/doc/blob.md @@ -1,17 +1,43 @@ # Blob images -The blob image mechanism now has two traits: -- [`BlobImageHandler`](https://github.com/servo/webrender/pull/2785/files#diff-2b72a28a40b83edf41a59adfd46b1a11R188) is roughly the equivalent of the previous `BlobImageRenderer` except that it doesn't do any rendering (it manages the state of the blob commands, and resources like fonts). -- [`AsyncBlobImageRasterizer`](https://github.com/servo/webrender/pull/2785/files#diff-2b72a28a40b83edf41a59adfd46b1a11R211) is created by the handler and sent over to the scene builder thread. the async rasterizer is meant to be a snapshot of the state of blob image commands that can execute the commands if provided some requests. +Blob image is fallback mechanism for webrender that Gecko uses to render primitives that aren't currently supported by webrender. The main idea is to provide webrender with a custom handler that can take arbitray drawing commands serialized as buffers of bytes (the blobs) and turn them into images that webrender internally will treat as regular images. -When receiving a transaction, the render backend / resource cache look at the list of added and updated blob images in that transaction, [collect the list of blob images and tiles that need to be rendered](https://github.com/servo/webrender/pull/2785/files#diff-77cbdf7ba9ebae81feb38a64c21b8454R848), create a rasterizer, and ship the two to the scene builder. -After building the scene the rasterizer gets handed the list of blob requests and [does all of the rasterization](https://github.com/servo/webrender/pull/2785/files#diff-856af4d4ff2333d4204e7e5a87a93c58R153), blocking the scene builder thread until the work is done. +At the API level, blob images are treated as other images. They are resources created and associated with image keys, and are used in the display list with regular image display items. -When the scene building and rasterization is done, the render backend receives the rasterized blobs and [stores them](https://github.com/servo/webrender/pull/2785/files#diff-77cbdf7ba9ebae81feb38a64c21b8454R520) so that they are available when frame building needs them. -Because blob images can be huge, we don't always want to rasterize them entirely during scene building. To decide what should be rasterized, we rely on gecko giving us a hint through the added `set_image_visible_area` API. When the render backend receives that message [it decides which tiles are going to be rasterized](https://github.com/servo/webrender/pull/2785/files#diff-77cbdf7ba9ebae81feb38a64c21b8454R469). This information is also used to [decide which tiles to evict](https://github.com/servo/webrender/pull/2785/files#diff-77cbdf7ba9ebae81feb38a64c21b8454R430), so that we don't keep thousands of tiles if we scroll through a massive blob image. The idea is for the visible area to correspond to the size of the display list. +## Active area -Sometimes, however, Gecko gets this visible area "wrong", or at least gives webrender a certain visible area but eventually webrender requests tiles during frame building that weren't in that area. I think that this is inevitable because the culling logic in gecko and webrender works very differently, so relying on them to match exactly is fragile at best. -So to work around this type of situation, [keep around the async blob rasterizer](https://github.com/servo/webrender/pull/2785/files#diff-3722af8f0bcba9c3ce197a9aa3052014R769) that we sent to the scene builder, and store it in the resource cache when we swap the scene. This blob rasterizer represents the state of the blob commands at the time the transaction was built (and is potentially different from the state of the blob image handler). Frame building [collects a list of blob images](https://github.com/servo/webrender/pull/2785/files#diff-77cbdf7ba9ebae81feb38a64c21b8454R811) (or blob tiles) that are not already rasterized, and asks the current async blob rasterizer to rasterize them synchronously on the render backend. The hope is that this would happen rarely. +In order to support scrolling very large content, blob images don't necessarily have a finite size. They can grow in any direction. At any time they do have an "active area", also called "visible area" which defines the portion that has to be rasterized. Typically this active area moves along large blob images depending on the scroll position. +The coordinate system of active area the *should* be the one of the blob's drawing commands (this is really up to the blob handler implementation to enforce that, Gecko does), and its scale should correspond to device pixels. The active area's coordinates can be negative. -Another important detail is that for this to work, resources that are used by blob images (so currently only fonts), need to be in sync with the blobs. Fortunately, fonts are currently immutable so we mostly need to make sure they are added [before the transaction](https://github.com/servo/webrender/pull/2785/files#diff-77cbdf7ba9ebae81feb38a64c21b8454R440) is built and [removed after](https://github.com/servo/webrender/pull/2785/files#diff-77cbdf7ba9ebae81feb38a64c21b8454R400) the transaction is swapped. If blob images were to use images, then we'd have to either do the same for these images (and disallow updating them), or maintain the state of images before and after scene building like we effectively do for blobs. +As far as positioning goes, the active area maps to the image display item's bounds. In other words the content at the top-left corner of the active area will be rendered on screen at the position of the top-left corner of the display item's local rect. + +In Gecko, the active area corresponds to the intersection of the fallback content's rect and the displayport. + +The terms "visible area" and "visible rect" are used a lot in the blobs code, unfortunately they collide with frame building's visibility/culling terminology. They don't correspond to what is visible to the user, but rather what is in the displayport. + + +## Tiling + +Blob images can be either tiled or non-tiled. Non-tiled blob images support invalid rects while tiled blob images track only validty at the tile level. In gecko all blobs are tiled with a tile size of 256x256. + +Just like regular tiled images, blob image tiles along the border of the image are shrinked to fit the remaining size. The only difference is that the tiling pattern always starts at the top-left corner for regular images (smaller boundary tiles only along the right and bottom edges), while it can be aribtrarily positioned for blob images (smaller boundary tiles potentially on all sides). + +The tiling logic is in webrender/src/image.rs. + + +## Async rasterization + +Blobs are typically too slow to rasterize on the critical path. We try to avoid blocking frame building on blob image rasterization. In order to do that we rasterize blobs as part of scene building. Rather than rasterize tiles on demand from visibility informating, we rasterize the entire active area during scene building. This means we potentially process a lot more content than will be displayed if the user doesn't scroll through all of the visible area. + +When the render backend receives a transaction, it looks for all new and update blob images, and generate blob rasterization requests for all tiles of the blob images that overlap their active area. The requests are bundled with an `AsyncBlobImageRasterizer` object in the transaction that is sent to the scene builder thread. The async rasterizer is created by the `BlobImageHandler` at each transaction. It is a snapshot of the state of the blobs as well as external information such as fonts, and does the actual rasterization. + +While tiles are rasterized eagerly during scene building, their content is uploaded lazily to the texture cache depending on the result of the visibility pass during frame building. + + +## Late rasterization + +In some case we run into a missing blob image during frame building and have to rasterize it synchronously. This happens when a rasterized tile is uploaded to the texture cache (at which point the CPU side is discarded), the texture cache entry expires and after scrolling back into view the tile is needed again. +We should really keep the rasterized blobs around just like we keep regular images in the cache. Hopefully this section will become obsolete eventually and we'll be able to remove late blob rasterization. + +The information needed for async rasterization corresponds to the state of blobs before scene building while late rasterization needs the state of blobs after the last complete scene build. This means we have to be careful about which version we manipulate in the resource cache. From 6fd3705f46e21072884401e7dd7311e1fc74b292 Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Fri, 3 Apr 2020 01:11:56 +0000 Subject: [PATCH 4/7] Bug 1624627 - Reimplement the slow frame indicator. r=jrmuizel This removes the WebRender side of the previous slow frame indicator and replace it with a simple implementation that only looks at the CPU time on the render backend and renderer thread involved for building a frame. A followup patch will add a separate indicator for when the displaylist/ipc/scene bits take too long. Differential Revision: https://phabricator.services.mozilla.com/D69247 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/3ca010d7a52d68e7fbc2e91edaeb980cdd6e76fb --- webrender/src/profiler.rs | 6 +++++- webrender/src/renderer.rs | 11 +++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/webrender/src/profiler.rs b/webrender/src/profiler.rs index 9ae6316378..7ecaa8cc3a 100644 --- a/webrender/src/profiler.rs +++ b/webrender/src/profiler.rs @@ -485,6 +485,10 @@ impl TimeProfileCounter { pub fn get(&self) -> u64 { self.nanoseconds } + + pub fn get_ms(&self) -> f64 { + self.nanoseconds as f64 / 1000000.0 + } } impl ProfileCounter for TimeProfileCounter { @@ -496,7 +500,7 @@ impl ProfileCounter for TimeProfileCounter { if self.invert { format!("{:.2} fps", 1000000000.0 / self.nanoseconds as f64) } else { - format!("{:.2} ms", self.nanoseconds as f64 / 1000000.0) + format!("{:.2} ms", self.get_ms()) } } diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs index 4f8fb06395..b56c991c67 100644 --- a/webrender/src/renderer.rs +++ b/webrender/src/renderer.rs @@ -3020,10 +3020,6 @@ impl Renderer { (cpu_profiles, gpu_profiles) } - pub fn notify_slow_frame(&mut self) { - self.slow_frame_indicator.changed(); - } - /// Reset the current partial present state. This forces the entire framebuffer /// to be refreshed next time `render` is called. pub fn force_redraw(&mut self) { @@ -3343,6 +3339,13 @@ impl Renderer { self.profile_counters.frame_time.set(ns); } + let frame_cpu_time_ns = self.backend_profile_counters.total_time.get() + + profile_timers.cpu_time.get(); + let frame_cpu_time_ms = frame_cpu_time_ns as f64 / 1000000.0; + if frame_cpu_time_ms > 16.0 { + self.slow_frame_indicator.changed(); + } + if self.max_recorded_profiles > 0 { while self.cpu_profiles.len() >= self.max_recorded_profiles { self.cpu_profiles.pop_front(); From afa7706f5f11de58462d47a5e77362026bd3892a Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Fri, 3 Apr 2020 01:12:06 +0000 Subject: [PATCH 5/7] Bug 1624627 - Add a slow transaction indicator. r=jrmuizel It moves when DL building + IPC + scene building takes more than 100ms. Depends on D69247 Differential Revision: https://phabricator.services.mozilla.com/D69254 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/af4366c6c434242d656db2c4b6779dd50a35b94a --- webrender/src/profiler.rs | 5 ++++- webrender/src/render_backend.rs | 1 + webrender/src/renderer.rs | 19 ++++++++++++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/webrender/src/profiler.rs b/webrender/src/profiler.rs index 7ecaa8cc3a..0d1305e4d8 100644 --- a/webrender/src/profiler.rs +++ b/webrender/src/profiler.rs @@ -693,6 +693,7 @@ pub struct BackendProfileCounters { pub resources: ResourceProfileCounters, pub ipc: IpcProfileCounters, pub intern: InternProfileCounters, + pub scene_changed: bool, } #[derive(Clone)] @@ -844,12 +845,14 @@ impl BackendProfileCounters { filter_data: ResourceProfileCounter::new("Interned filter data", None, None), backdrop: ResourceProfileCounter::new("Interned backdrops", None, None), }, + scene_changed: false, } } pub fn reset(&mut self) { self.total_time.reset(); self.resources.texture_cache.rasterized_blob_pixels.reset(); + self.scene_changed = false; } } @@ -1772,7 +1775,7 @@ impl Profiler { style: ProfileStyle, ) { self.draw_state.x_left = 20.0; - self.draw_state.y_left = 40.0; + self.draw_state.y_left = 50.0; self.draw_state.x_right = 450.0; self.draw_state.y_right = 40.0; diff --git a/webrender/src/render_backend.rs b/webrender/src/render_backend.rs index 854a60fbaa..c11fb7e246 100644 --- a/webrender/src/render_backend.rs +++ b/webrender/src/render_backend.rs @@ -915,6 +915,7 @@ impl RenderBackend { let scene_build_time = txn.scene_build_end_time - txn.scene_build_start_time; profile_counters.scene_build_time.set(scene_build_time); + profile_counters.scene_changed = true; } if let Some(doc) = self.documents.get_mut(&txn.document_id) { diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs index b56c991c67..87ef69a971 100644 --- a/webrender/src/renderer.rs +++ b/webrender/src/renderer.rs @@ -1883,6 +1883,7 @@ pub struct Renderer { new_frame_indicator: ChangeIndicator, new_scene_indicator: ChangeIndicator, slow_frame_indicator: ChangeIndicator, + slow_txn_indicator: ChangeIndicator, last_time: u64, @@ -2465,6 +2466,7 @@ impl Renderer { new_frame_indicator: ChangeIndicator::new(), new_scene_indicator: ChangeIndicator::new(), slow_frame_indicator: ChangeIndicator::new(), + slow_txn_indicator: ChangeIndicator::new(), max_recorded_profiles: options.max_recorded_profiles, clear_color: options.clear_color, enable_clear_scissor: options.enable_clear_scissor, @@ -3346,6 +3348,16 @@ impl Renderer { self.slow_frame_indicator.changed(); } + if self.backend_profile_counters.scene_changed { + let txn_time_ns = self.backend_profile_counters.ipc.total_send_time.get() + + self.backend_profile_counters.ipc.build_time.get() + + self.backend_profile_counters.scene_build_time.get(); + let txn_time_ms = txn_time_ns as f64 / 1000000.0; + if txn_time_ms > 100.0 { + self.slow_txn_indicator.changed(); + } + } + if self.max_recorded_profiles > 0 { while self.cpu_profiles.len() >= self.max_recorded_profiles { self.cpu_profiles.pop_front(); @@ -3412,8 +3424,13 @@ impl Renderer { if self.debug_flags.contains(DebugFlags::SLOW_FRAME_INDICATOR) { if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { - self.slow_frame_indicator.draw( + self.slow_txn_indicator.draw( x, 0.0, + ColorU::new(250, 80, 80, 255), + debug_renderer, + ); + self.slow_frame_indicator.draw( + x, 10.0, ColorU::new(220, 30, 10, 255), debug_renderer, ); From 5e35f2165553e5dc5ec4e045c269e9fb451f9ae6 Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Fri, 3 Apr 2020 01:12:16 +0000 Subject: [PATCH 6/7] Bug 1626666 - Ensure ipc profile counters follow transactions through the scene builder thread. r=jrmuizel Instead of collecting so-called ipc counters when receving the SetDisplayList on the render backend, pass the information through the scene builder thread and update the profile on the render backend after the scene is swapped. This prevents ipc counters to be displayed while the transaction is still being processed by the scene builder thread. Differential Revision: https://phabricator.services.mozilla.com/D69414 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/1a729c8ea63134fb2746cad5639cde022ea4919b --- webrender/src/profiler.rs | 43 +++++++++++++++------------ webrender/src/render_backend.rs | 42 +++++++++++++++----------- webrender/src/renderer.rs | 4 +-- webrender/src/scene_builder_thread.rs | 24 +++++++++++++++ 4 files changed, 74 insertions(+), 39 deletions(-) diff --git a/webrender/src/profiler.rs b/webrender/src/profiler.rs index 0d1305e4d8..0d80e1f6ee 100644 --- a/webrender/src/profiler.rs +++ b/webrender/src/profiler.rs @@ -689,7 +689,6 @@ impl GpuCacheProfileCounters { #[derive(Clone)] pub struct BackendProfileCounters { pub total_time: TimeProfileCounter, - pub scene_build_time: TimeProfileCounter, pub resources: ResourceProfileCounters, pub ipc: IpcProfileCounters, pub intern: InternProfileCounters, @@ -707,11 +706,12 @@ pub struct ResourceProfileCounters { #[derive(Clone)] pub struct IpcProfileCounters { - pub build_time: TimeProfileCounter, + pub display_list_build_time: TimeProfileCounter, + pub scene_build_time: TimeProfileCounter, /// Time between when the display list is built and when it is sent by the API. pub content_send_time: TimeProfileCounter, /// Time between sending the SetDisplayList from the API and picking it up on - /// the render backend thread. + /// the render scene builder thread. pub api_send_time: TimeProfileCounter, /// Sum of content_send_time and api_send_time. pub total_send_time: TimeProfileCounter, @@ -754,22 +754,26 @@ enumerate_interners!(declare_intern_profile_counters); impl IpcProfileCounters { pub fn set( &mut self, - build_start: u64, - build_end: u64, + dl_build_start: u64, + dl_build_end: u64, send_start: u64, - consume_start: u64, + scene_build_start: u64, + scene_build_end: u64, display_len: usize, ) { - self.build_time.reset(); + self.display_list_build_time.reset(); self.content_send_time.reset(); self.api_send_time.reset(); self.total_send_time.reset(); + self.scene_build_time.reset(); self.display_lists.reset(); - let build_time = build_end - build_start; - let content_send_time = send_start - build_end; - let api_send_time = consume_start - send_start; - self.build_time.inc(build_time); + let dl_build_time = dl_build_end - dl_build_start; + let scene_build_time = scene_build_end - scene_build_start; + let content_send_time = send_start - dl_build_end; + let api_send_time = scene_build_start - send_start; + self.display_list_build_time.inc(dl_build_time); + self.scene_build_time.inc(scene_build_time); self.content_send_time.inc(content_send_time); self.api_send_time.inc(api_send_time); self.total_send_time.inc(content_send_time + api_send_time); @@ -784,10 +788,6 @@ impl BackendProfileCounters { "Backend CPU Time", false, Some(expected::MAX_BACKEND_CPU_TIME), ), - scene_build_time: TimeProfileCounter::new( - "Scene build time", false, - Some(expected::MAX_SCENE_BUILD_TIME), - ), resources: ResourceProfileCounters { font_templates: ResourceProfileCounter::new( "Font Templates", @@ -807,10 +807,14 @@ impl BackendProfileCounters { gpu_cache: GpuCacheProfileCounters::new(), }, ipc: IpcProfileCounters { - build_time: TimeProfileCounter::new( + display_list_build_time: TimeProfileCounter::new( "DisplayList Build Time", false, Some(expected::DISPLAY_LIST_BUILD_TIME) ), + scene_build_time: TimeProfileCounter::new( + "Scene build time", false, + Some(expected::MAX_SCENE_BUILD_TIME), + ), content_send_time: TimeProfileCounter::new( "Content Send Time", false, Some(expected::DISPLAY_LIST_SEND_TIME), @@ -1593,7 +1597,8 @@ impl Profiler { Profiler::draw_counters( &[ - &backend_profile.ipc.build_time, + &backend_profile.ipc.display_list_build_time, + &backend_profile.ipc.scene_build_time, &backend_profile.ipc.content_send_time, &backend_profile.ipc.api_send_time, &backend_profile.ipc.total_send_time, @@ -1795,9 +1800,9 @@ impl Profiler { self.ipc_graph .push(backend_profile.ipc.total_send_time.nanoseconds); self.display_list_build_graph - .push(backend_profile.ipc.build_time.nanoseconds); + .push(backend_profile.ipc.display_list_build_time.nanoseconds); self.scene_build_graph - .push(backend_profile.scene_build_time.nanoseconds); + .push(backend_profile.ipc.scene_build_time.nanoseconds); self.blob_raster_graph .push(backend_profile.resources.texture_cache.rasterized_blob_pixels.size as u64); self.ipc_time.set(backend_profile.ipc.total_send_time.nanoseconds); diff --git a/webrender/src/render_backend.rs b/webrender/src/render_backend.rs index c11fb7e246..b8b75d7cc4 100644 --- a/webrender/src/render_backend.rs +++ b/webrender/src/render_backend.rs @@ -35,7 +35,7 @@ use crate::picture::{RetainedTiles, TileCacheLogger}; use crate::prim_store::{PrimitiveScratchBuffer, PrimitiveInstance}; use crate::prim_store::{PrimitiveInstanceKind, PrimTemplateCommonData}; use crate::prim_store::interned::*; -use crate::profiler::{BackendProfileCounters, IpcProfileCounters, ResourceProfileCounters}; +use crate::profiler::{BackendProfileCounters, ResourceProfileCounters}; use crate::record::ApiRecordingReceiver; use crate::record::LogRecorder; use crate::render_task_graph::RenderTaskGraphCounters; @@ -786,7 +786,6 @@ impl RenderBackend { message: SceneMsg, frame_counter: u32, txn: &mut Transaction, - ipc_profile_counters: &mut IpcProfileCounters, ) { let doc = self.documents.get_mut(&document_id).expect("No document?"); @@ -845,9 +844,8 @@ impl RenderBackend { } let display_list_len = built_display_list.data().len(); - let (builder_start_time, builder_finish_time, send_start_time) = + let (builder_start_time_ns, builder_end_time_ns, send_time_ns) = built_display_list.times(); - let display_list_received_time = precise_time_ns(); txn.display_list_updates.push(DisplayListUpdate { built_display_list, @@ -856,15 +854,16 @@ impl RenderBackend { background, viewport_size, content_size, + timings: TransactionTimings { + builder_start_time_ns, + builder_end_time_ns, + send_time_ns, + scene_build_start_time_ns: 0, + scene_build_end_time_ns: 0, + blob_rasterization_end_time_ns: 0, + display_list_len, + }, }); - - ipc_profile_counters.set( - builder_start_time, - builder_finish_time, - send_start_time, - display_list_received_time, - display_list_len, - ); } SceneMsg::SetRootPipeline(pipeline_id) => { profile_scope!("SetRootPipeline"); @@ -911,11 +910,19 @@ impl RenderBackend { for mut txn in txns.drain(..) { let has_built_scene = txn.built_scene.is_some(); - if has_built_scene { - let scene_build_time = - txn.scene_build_end_time - txn.scene_build_start_time; - profile_counters.scene_build_time.set(scene_build_time); - profile_counters.scene_changed = true; + if let Some(timings) = txn.timings { + if has_built_scene { + profile_counters.scene_changed = true; + } + + profile_counters.ipc.set( + timings.builder_start_time_ns, + timings.builder_end_time_ns, + timings.send_time_ns, + timings.scene_build_start_time_ns, + timings.scene_build_end_time_ns, + timings.display_list_len, + ); } if let Some(doc) = self.documents.get_mut(&txn.document_id) { @@ -1397,7 +1404,6 @@ impl RenderBackend { scene_msg, *frame_counter, &mut txn, - &mut profile_counters.ipc, ) } diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs index 87ef69a971..cad12dfa56 100644 --- a/webrender/src/renderer.rs +++ b/webrender/src/renderer.rs @@ -3350,8 +3350,8 @@ impl Renderer { if self.backend_profile_counters.scene_changed { let txn_time_ns = self.backend_profile_counters.ipc.total_send_time.get() - + self.backend_profile_counters.ipc.build_time.get() - + self.backend_profile_counters.scene_build_time.get(); + + self.backend_profile_counters.ipc.display_list_build_time.get() + + self.backend_profile_counters.ipc.scene_build_time.get(); let txn_time_ms = txn_time_ns as f64 / 1000000.0; if txn_time_ms > 100.0 { self.slow_txn_indicator.changed(); diff --git a/webrender/src/scene_builder_thread.rs b/webrender/src/scene_builder_thread.rs index c9de7740cc..ec2d58eee9 100644 --- a/webrender/src/scene_builder_thread.rs +++ b/webrender/src/scene_builder_thread.rs @@ -39,6 +39,17 @@ use crate::debug_server; #[cfg(feature = "debugger")] use api::{BuiltDisplayListIter, DisplayItem}; +/// Various timing information that will be truned into +/// IpcProfileCounters later down the pipeline. +pub struct TransactionTimings { + pub builder_start_time_ns: u64, + pub builder_end_time_ns: u64, + pub send_time_ns: u64, + pub scene_build_start_time_ns: u64, + pub scene_build_end_time_ns: u64, + pub blob_rasterization_end_time_ns: u64, + pub display_list_len: usize, +} /// Represents the work associated to a transaction before scene building. pub struct Transaction { @@ -102,6 +113,7 @@ pub struct BuiltTransaction { pub scene_build_end_time: u64, pub render_frame: bool, pub invalidate_rendered_frame: bool, + pub timings: Option, } pub struct DisplayListUpdate { @@ -111,6 +123,7 @@ pub struct DisplayListUpdate { pub background: Option, pub viewport_size: LayoutSize, pub content_size: LayoutSize, + pub timings: TransactionTimings, } /// Contains the render backend data needed to build a scene. @@ -452,6 +465,7 @@ impl SceneBuilderThread { scene_build_start_time, scene_build_end_time: precise_time_ns(), interner_updates, + timings: None, })]; self.forward_built_transactions(txns); @@ -548,6 +562,7 @@ impl SceneBuilderThread { self.removed_pipelines.insert(pipeline_id); } + let mut timings = None; for update in txn.display_list_updates.drain(..) { if self.removed_pipelines.contains(&update.pipeline_id) { continue; @@ -561,6 +576,8 @@ impl SceneBuilderThread { update.viewport_size, update.content_size, ); + + timings = Some(update.timings); } self.removed_pipelines.clear(); @@ -596,6 +613,12 @@ impl SceneBuilderThread { let is_low_priority = false; txn.rasterize_blobs(is_low_priority); + if let Some(timings) = timings.as_mut() { + timings.blob_rasterization_end_time_ns = precise_time_ns(); + timings.scene_build_start_time_ns = scene_build_start_time; + timings.scene_build_end_time_ns = scene_build_end_time; + } + drain_filter( &mut txn.notifications, |n| { n.when() == Checkpoint::SceneBuilt }, @@ -620,6 +643,7 @@ impl SceneBuilderThread { interner_updates, scene_build_start_time, scene_build_end_time, + timings, }) } From 9956e79f4a59275494f72f4191c0d6c47a8f1c36 Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Fri, 3 Apr 2020 01:12:26 +0000 Subject: [PATCH 7/7] Bug 1626666 - Rename IpcProfileCounters into TransactionProfileCounters. r=jrmuizel Few of the counters actually have anything to do with IPC although they all relate to events of layout transactions. Depends on D69414 Differential Revision: https://phabricator.services.mozilla.com/D69415 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/d2771cdff401747f61d5302cdf7a284eb851b1ce --- webrender/src/profiler.rs | 30 +++++++++++++-------------- webrender/src/render_backend.rs | 2 +- webrender/src/renderer.rs | 6 +++--- webrender/src/scene_builder_thread.rs | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/webrender/src/profiler.rs b/webrender/src/profiler.rs index 0d80e1f6ee..5cb88e9170 100644 --- a/webrender/src/profiler.rs +++ b/webrender/src/profiler.rs @@ -690,7 +690,7 @@ impl GpuCacheProfileCounters { pub struct BackendProfileCounters { pub total_time: TimeProfileCounter, pub resources: ResourceProfileCounters, - pub ipc: IpcProfileCounters, + pub txn: TransactionProfileCounters, pub intern: InternProfileCounters, pub scene_changed: bool, } @@ -705,7 +705,7 @@ pub struct ResourceProfileCounters { } #[derive(Clone)] -pub struct IpcProfileCounters { +pub struct TransactionProfileCounters { pub display_list_build_time: TimeProfileCounter, pub scene_build_time: TimeProfileCounter, /// Time between when the display list is built and when it is sent by the API. @@ -751,7 +751,7 @@ macro_rules! declare_intern_profile_counters { enumerate_interners!(declare_intern_profile_counters); -impl IpcProfileCounters { +impl TransactionProfileCounters { pub fn set( &mut self, dl_build_start: u64, @@ -806,7 +806,7 @@ impl BackendProfileCounters { texture_cache: TextureCacheProfileCounters::new(), gpu_cache: GpuCacheProfileCounters::new(), }, - ipc: IpcProfileCounters { + txn: TransactionProfileCounters { display_list_build_time: TimeProfileCounter::new( "DisplayList Build Time", false, Some(expected::DISPLAY_LIST_BUILD_TIME) @@ -1587,7 +1587,7 @@ impl Profiler { &backend_profile.resources.texture_cache.pages_alpha8_linear, &backend_profile.resources.texture_cache.pages_color8_linear, &backend_profile.resources.texture_cache.pages_color8_nearest, - &backend_profile.ipc.display_lists, + &backend_profile.txn.display_lists, ], None, debug_renderer, @@ -1597,11 +1597,11 @@ impl Profiler { Profiler::draw_counters( &[ - &backend_profile.ipc.display_list_build_time, - &backend_profile.ipc.scene_build_time, - &backend_profile.ipc.content_send_time, - &backend_profile.ipc.api_send_time, - &backend_profile.ipc.total_send_time, + &backend_profile.txn.display_list_build_time, + &backend_profile.txn.scene_build_time, + &backend_profile.txn.content_send_time, + &backend_profile.txn.api_send_time, + &backend_profile.txn.total_send_time, ], None, debug_renderer, @@ -1751,7 +1751,7 @@ impl Profiler { &backend_profile.resources.image_templates, &backend_profile.resources.font_templates, &backend_profile.resources.texture_cache.rasterized_blob_pixels, - &backend_profile.ipc.display_lists, + &backend_profile.txn.display_lists, ], ]; @@ -1798,14 +1798,14 @@ impl Profiler { .push(renderer_timers.cpu_time.nanoseconds); self.renderer_time.set(renderer_timers.cpu_time.nanoseconds); self.ipc_graph - .push(backend_profile.ipc.total_send_time.nanoseconds); + .push(backend_profile.txn.total_send_time.nanoseconds); self.display_list_build_graph - .push(backend_profile.ipc.display_list_build_time.nanoseconds); + .push(backend_profile.txn.display_list_build_time.nanoseconds); self.scene_build_graph - .push(backend_profile.ipc.scene_build_time.nanoseconds); + .push(backend_profile.txn.scene_build_time.nanoseconds); self.blob_raster_graph .push(backend_profile.resources.texture_cache.rasterized_blob_pixels.size as u64); - self.ipc_time.set(backend_profile.ipc.total_send_time.nanoseconds); + self.ipc_time.set(backend_profile.txn.total_send_time.nanoseconds); self.gpu_graph.push(gpu_graph); self.gpu_time.set(gpu_graph); self.gpu_frames.push(gpu_graph, gpu_graphrs); diff --git a/webrender/src/render_backend.rs b/webrender/src/render_backend.rs index b8b75d7cc4..3ac8c00e42 100644 --- a/webrender/src/render_backend.rs +++ b/webrender/src/render_backend.rs @@ -915,7 +915,7 @@ impl RenderBackend { profile_counters.scene_changed = true; } - profile_counters.ipc.set( + profile_counters.txn.set( timings.builder_start_time_ns, timings.builder_end_time_ns, timings.send_time_ns, diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs index cad12dfa56..fe17540e59 100644 --- a/webrender/src/renderer.rs +++ b/webrender/src/renderer.rs @@ -3349,9 +3349,9 @@ impl Renderer { } if self.backend_profile_counters.scene_changed { - let txn_time_ns = self.backend_profile_counters.ipc.total_send_time.get() - + self.backend_profile_counters.ipc.display_list_build_time.get() - + self.backend_profile_counters.ipc.scene_build_time.get(); + let txn_time_ns = self.backend_profile_counters.txn.total_send_time.get() + + self.backend_profile_counters.txn.display_list_build_time.get() + + self.backend_profile_counters.txn.scene_build_time.get(); let txn_time_ms = txn_time_ns as f64 / 1000000.0; if txn_time_ms > 100.0 { self.slow_txn_indicator.changed(); diff --git a/webrender/src/scene_builder_thread.rs b/webrender/src/scene_builder_thread.rs index ec2d58eee9..e8d582b3fe 100644 --- a/webrender/src/scene_builder_thread.rs +++ b/webrender/src/scene_builder_thread.rs @@ -40,7 +40,7 @@ use crate::debug_server; use api::{BuiltDisplayListIter, DisplayItem}; /// Various timing information that will be truned into -/// IpcProfileCounters later down the pipeline. +/// TransactionProfileCounters later down the pipeline. pub struct TransactionTimings { pub builder_start_time_ns: u64, pub builder_end_time_ns: u64,