diff --git a/glsl-to-cxx/src/hir.rs b/glsl-to-cxx/src/hir.rs index e1511e613d..e9b39fecf6 100644 --- a/glsl-to-cxx/src/hir.rs +++ b/glsl-to-cxx/src/hir.rs @@ -2295,20 +2295,13 @@ fn translate_expression(state: &mut State, e: &syntax::Expr) -> Expr { _ => panic!(), }); - let mut sel = SwizzleSelector::parse(i.as_str()); + let sel = SwizzleSelector::parse(i.as_str()); - if let ExprKind::Variable(ref mut sym) = &mut e.kind { + if let ExprKind::Variable(ref sym) = &mut e.kind { if state.sym(*sym).name == "gl_FragCoord" { for c in &sel.components { state.used_fragcoord |= 1 << c; } - *sym = state.lookup("gl_FragCoordXY").unwrap(); - for c in &mut sel.components { - if *c >= 2 { - *c -= 2; - *sym = state.lookup("gl_FragCoordZW").unwrap(); - } - } } } @@ -3447,14 +3440,6 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio "gl_FragCoord", SymDecl::Global(StorageClass::In, None, Type::new(Vec4), RunClass::Vector), ); - state.declare( - "gl_FragCoordXY", - SymDecl::Global(StorageClass::In, None, Type::new(Vec2), RunClass::Vector), - ); - state.declare( - "gl_FragCoordZW", - SymDecl::Global(StorageClass::In, None, Type::new(Vec2), RunClass::Scalar), - ); state.declare( "gl_FragColor", SymDecl::Global(StorageClass::Out, None, Type::new(Vec4), RunClass::Vector), diff --git a/glsl-to-cxx/src/lib.rs b/glsl-to-cxx/src/lib.rs index a4b2873380..9e09d83670 100644 --- a/glsl-to-cxx/src/lib.rs +++ b/glsl-to-cxx/src/lib.rs @@ -183,6 +183,7 @@ fn translate_shader( deps: RefCell::new(Vec::new()), vector_mask: 0, uses_discard: false, + uses_perspective: name.contains("PERSPECTIVE"), has_draw_span_rgba8: false, has_draw_span_r8: false, used_globals: RefCell::new(Vec::new()), @@ -528,7 +529,7 @@ fn write_bind_attrib_location(state: &mut OutputState, attribs: &[hir::SymRef]) for i in attribs { let sym = state.hir.sym(*i); write!(state, - " if (strcmp(\"{0}\", name) == 0) {{\ + " if (strcmp(\"{0}\", name) == 0) {{ \ return attrib_locations.{0} != NULL_ATTRIB ? attrib_locations.{0} : -1;\ }}\n", sym.name.as_str()); @@ -703,20 +704,57 @@ fn write_read_inputs(state: &mut OutputState, inputs: &[hir::SymRef]) { write!(state, "}}\n"); write!(state, "InterpInputs interp_step;\n"); + + let mut has_perspective: bool = false; + if state.uses_perspective { + for i in inputs { + let sym = state.hir.sym(*i); + match &sym.decl { + hir::SymDecl::Global(_, _, ty, run_class) => { + if *run_class != hir::RunClass::Scalar { + if !has_perspective { + has_perspective = true; + write!(state, "struct InterpPerspective {{\n"); + } + show_type(state, ty); + write!(state, " {};\n", sym.name.as_str()); + } + } + _ => panic!(), + } + } + if has_perspective { + write!(state, "}};\n"); + write!(state, "InterpPerspective interp_perspective;\n"); + } + } + write!(state, "static void read_interp_inputs(\ Self *self, const InterpInputs *init, const InterpInputs *step, float step_width) {{\n"); + if has_perspective { + write!(state, " Float w = 1.0f / self->gl_FragCoord.w;\n"); + } for i in inputs { let sym = state.hir.sym(*i); match &sym.decl { hir::SymDecl::Global(_, _, _, run_class) => { if *run_class != hir::RunClass::Scalar { let name = sym.name.as_str(); - write!( - state, - " self->{0} = init_interp(init->{0}, step->{0});\n", - name - ); + if has_perspective { + write!( + state, + " self->interp_perspective.{0} = init_interp(init->{0}, step->{0});\n", + name + ); + write!(state, " self->{0} = self->interp_perspective.{0} * w;\n", name); + } else { + write!( + state, + " self->{0} = init_interp(init->{0}, step->{0});\n", + name + ); + } write!( state, " self->interp_step.{0} = step->{0} * step_width;\n", @@ -733,13 +771,24 @@ fn write_read_inputs(state: &mut OutputState, inputs: &[hir::SymRef]) { if (state.hir.used_fragcoord & 1) != 0 { write!(state, " step_fragcoord();\n"); } + if state.uses_perspective { + write!(state, " step_perspective();\n"); + } + if has_perspective { + write!(state, " Float w = 1.0f / gl_FragCoord.w;\n"); + } for i in inputs { let sym = state.hir.sym(*i); match &sym.decl { hir::SymDecl::Global(_, _, _, run_class) => { if *run_class != hir::RunClass::Scalar { let name = sym.name.as_str(); - write!(state, " {} += interp_step.{};\n", name, name); + if has_perspective { + write!(state, " interp_perspective.{0} += interp_step.{0};\n", name); + write!(state, " {0} = w * interp_perspective.{0};\n", name); + } else { + write!(state, " {0} += interp_step.{0};\n", name); + } } } _ => panic!(), @@ -755,13 +804,24 @@ fn write_read_inputs(state: &mut OutputState, inputs: &[hir::SymRef]) { if (state.hir.used_fragcoord & 1) != 0 { write!(state, " step_fragcoord(chunks);\n"); } + if state.uses_perspective { + write!(state, " step_perspective(chunks);\n"); + } + if has_perspective { + write!(state, " Float w = 1.0f / gl_FragCoord.w;\n"); + } for i in inputs { let sym = state.hir.sym(*i); match &sym.decl { hir::SymDecl::Global(_, _, _, run_class) => { if *run_class != hir::RunClass::Scalar { let name = sym.name.as_str(); - write!(state, " {} += interp_step.{} * chunks;\n", name, name); + if has_perspective { + write!(state, " interp_perspective.{0} += interp_step.{0} * chunks;\n", name); + write!(state, " {0} = interp_perspective.{0} * w;\n", name); + } else { + write!(state, " {0} += interp_step.{0} * chunks;\n", name); + } } } _ => panic!(), @@ -820,6 +880,7 @@ pub struct OutputState { deps: RefCell>, vector_mask: u32, uses_discard: bool, + uses_perspective: bool, has_draw_span_rgba8: bool, has_draw_span_r8: bool, used_globals: RefCell>, @@ -3596,9 +3657,6 @@ pub fn show_translation_unit(state: &mut OutputState, tu: &hir::TranslationUnit) fn write_abi(state: &mut OutputState) { match state.kind { ShaderKind::Fragment => { - state.write("static bool use_discard(Self*) { return "); - state.write(if state.uses_discard { "true" } else { "false" }); - state.write("; }\n"); state.write("static void run(Self *self) {\n"); if state.uses_discard { state.write(" self->isPixelDiscarded = false;\n"); @@ -3612,12 +3670,12 @@ fn write_abi(state: &mut OutputState) { state.write("}\n"); if state.has_draw_span_rgba8 { state.write( - "static void draw_span_RGBA8(Self* self, uint32_t* buf, int len) {\ + "static void draw_span_RGBA8(Self* self, uint32_t* buf, int len) { \ dispatch_draw_span(self, buf, len); }\n"); } if state.has_draw_span_r8 { state.write( - "static void draw_span_R8(Self* self, uint8_t* buf, int len) {\ + "static void draw_span_R8(Self* self, uint8_t* buf, int len) { \ dispatch_draw_span(self, buf, len); }\n"); } @@ -3645,13 +3703,18 @@ fn write_abi(state: &mut OutputState) { state.write(" init_span_func = (InitSpanFunc)&read_interp_inputs;\n"); state.write(" run_func = (RunFunc)&run;\n"); state.write(" skip_func = (SkipFunc)&skip;\n"); - state.write(" use_discard_func = (UseDiscardFunc)&use_discard;\n"); if state.has_draw_span_rgba8 { state.write(" draw_span_RGBA8_func = (DrawSpanRGBA8Func)&draw_span_RGBA8;\n"); } if state.has_draw_span_r8 { state.write(" draw_span_R8_func = (DrawSpanR8Func)&draw_span_R8;\n"); } + if state.uses_discard { + state.write(" enable_discard();\n"); + } + if state.uses_perspective { + state.write(" enable_perspective();\n"); + } } ShaderKind::Vertex => { state.write(" init_batch_func = (InitBatchFunc)&bind_textures;\n"); diff --git a/swgl/src/gl.cc b/swgl/src/gl.cc index f2679e7ac2..8d6628318b 100644 --- a/swgl/src/gl.cc +++ b/swgl/src/gl.cc @@ -436,11 +436,13 @@ struct Context { GLuint texture_2d_binding = 0; GLuint texture_3d_binding = 0; GLuint texture_2d_array_binding = 0; + GLuint texture_rectangle_binding = 0; void unlink(GLuint n) { ::unlink(texture_2d_binding, n); ::unlink(texture_3d_binding, n); ::unlink(texture_2d_array_binding, n); + ::unlink(texture_rectangle_binding, n); } }; TextureUnit texture_units[MAX_TEXTURE_UNITS]; @@ -478,6 +480,8 @@ struct Context { return texture_units[active_texture_unit].texture_2d_array_binding; case GL_TEXTURE_3D: return texture_units[active_texture_unit].texture_3d_binding; + case GL_TEXTURE_RECTANGLE: + return texture_units[active_texture_unit].texture_rectangle_binding; case GL_TIME_ELAPSED: return time_elapsed_query; case GL_SAMPLES_PASSED: @@ -494,6 +498,22 @@ struct Context { return unknown_binding; } } + + Texture& get_texture(sampler2D, int unit) { + return textures[texture_units[unit].texture_2d_binding]; + } + + Texture& get_texture(isampler2D, int unit) { + return textures[texture_units[unit].texture_2d_binding]; + } + + Texture& get_texture(sampler2DArray, int unit) { + return textures[texture_units[unit].texture_2d_array_binding]; + } + + Texture& get_texture(sampler2DRect, int unit) { + return textures[texture_units[unit].texture_rectangle_binding]; + } }; static Context* ctx = nullptr; static ProgramImpl* program_impl = nullptr; @@ -530,7 +550,7 @@ static inline void init_sampler(S* s, Texture& t) { template S* lookup_sampler(S* s, int texture) { - Texture& t = ctx->textures[ctx->texture_units[texture].texture_2d_binding]; + Texture& t = ctx->get_texture(s, texture); if (!t.buf) { *s = S(); } else { @@ -542,7 +562,7 @@ S* lookup_sampler(S* s, int texture) { template S* lookup_isampler(S* s, int texture) { - Texture& t = ctx->textures[ctx->texture_units[texture].texture_2d_binding]; + Texture& t = ctx->get_texture(s, texture); if (!t.buf) { *s = S(); } else { @@ -553,8 +573,7 @@ S* lookup_isampler(S* s, int texture) { template S* lookup_sampler_array(S* s, int texture) { - Texture& t = - ctx->textures[ctx->texture_units[texture].texture_2d_array_binding]; + Texture& t = ctx->get_texture(s, texture); if (!t.buf) { *s = S(); } else { @@ -2211,10 +2230,9 @@ static ALWAYS_INLINE int check_depth8(uint16_t z, uint16_t* zbuf, } template -static ALWAYS_INLINE bool check_depth4(uint16_t z, uint16_t* zbuf, +static ALWAYS_INLINE bool check_depth4(ZMask4 src, uint16_t* zbuf, ZMask4& outmask, int span = 0) { ZMask4 dest = unaligned_load(zbuf); - ZMask4 src = int16_t(z); // Invert the depth test to check which pixels failed and should be discarded. ZMask4 mask = ctx->depthfunc == GL_LEQUAL ? @@ -2236,26 +2254,42 @@ static ALWAYS_INLINE bool check_depth4(uint16_t z, uint16_t* zbuf, return true; } -static inline ZMask4 packZMask4(Bool a) { +template +static ALWAYS_INLINE bool check_depth4(uint16_t z, uint16_t* zbuf, + ZMask4& outmask, int span = 0) { + return check_depth4(ZMask4(int16_t(z)), zbuf, outmask, + span); +} + +template +static inline ZMask4 packZMask4(T a) { #if USE_SSE2 return lowHalf(bit_cast(_mm_packs_epi32(a, a))); #elif USE_NEON - return vqmovun_s32(a); + return vqmovn_s32(a); #else return CONVERT(a, ZMask4); #endif } -static ALWAYS_INLINE void discard_depth(uint16_t z, uint16_t* zbuf, +static ALWAYS_INLINE ZMask4 packDepth() { + return packZMask4(cast(fragment_shader->gl_FragCoord.z * 0xFFFF) - 0x8000); +} + +static ALWAYS_INLINE void discard_depth(ZMask4 src, uint16_t* zbuf, ZMask4 mask) { if (ctx->depthmask) { ZMask4 dest = unaligned_load(zbuf); - ZMask4 src = int16_t(z); mask |= packZMask4(fragment_shader->isPixelDiscarded); unaligned_store(zbuf, (mask & dest) | (~mask & src)); } } +static ALWAYS_INLINE void discard_depth(uint16_t z, uint16_t* zbuf, + ZMask4 mask) { + discard_depth(ZMask4(int16_t(z)), zbuf, mask); +} + static inline WideRGBA8 pack_pixels_RGBA8(const vec4& v) { ivec4 i = roundfast(v, 255.49f); HalfRGBA8 xz = packRGBA8(i.z, i.x); @@ -2394,31 +2428,6 @@ UNUSED static inline void commit_texture_span(uint32_t* buf, uint32_t* src, } } -template -static inline void commit_output(uint32_t* buf, uint16_t z, uint16_t* zbuf) { - ZMask4 zmask; - if (check_depth4(z, zbuf, zmask)) { - commit_output(buf, unpack(zmask, buf)); - if (DISCARD) { - discard_depth(z, zbuf, zmask); - } - } else { - fragment_shader->skip(); - } -} - -template -static inline void commit_output(uint32_t* buf, uint16_t z, uint16_t* zbuf, - int span) { - ZMask4 zmask; - if (check_depth4(z, zbuf, zmask, span)) { - commit_output(buf, unpack(zmask, buf)); - if (DISCARD) { - discard_depth(z, zbuf, zmask); - } - } -} - static inline PackedRGBA8 span_mask_RGBA8(int span) { return bit_cast(I32(span) < I32{1, 2, 3, 4}); } @@ -2500,8 +2509,17 @@ UNUSED static inline void commit_solid_span(uint8_t* buf, PackedR8 r, int len) { } } +static inline WideR8 span_mask_R8(int span) { + return bit_cast(WideR8(span) < WideR8{1, 2, 3, 4}); +} + template -static inline void commit_output(uint8_t* buf, uint16_t z, uint16_t* zbuf) { +static inline void commit_output(uint8_t* buf, int span) { + commit_output(buf, span_mask_R8(span)); +} + +template +static inline void commit_output(P* buf, Z z, uint16_t* zbuf) { ZMask4 zmask; if (check_depth4(z, zbuf, zmask)) { commit_output(buf, unpack(zmask, buf)); @@ -2513,9 +2531,8 @@ static inline void commit_output(uint8_t* buf, uint16_t z, uint16_t* zbuf) { } } -template -static inline void commit_output(uint8_t* buf, uint16_t z, uint16_t* zbuf, - int span) { +template +static inline void commit_output(P* buf, Z z, uint16_t* zbuf, int span) { ZMask4 zmask; if (check_depth4(z, zbuf, zmask, span)) { commit_output(buf, unpack(zmask, buf)); @@ -2525,15 +2542,6 @@ static inline void commit_output(uint8_t* buf, uint16_t z, uint16_t* zbuf, } } -static inline WideR8 span_mask_R8(int span) { - return bit_cast(WideR8(span) < WideR8{1, 2, 3, 4}); -} - -template -static inline void commit_output(uint8_t* buf, int span) { - commit_output(buf, span_mask_R8(span)); -} - static const size_t MAX_FLATS = 64; typedef float Flats[MAX_FLATS]; @@ -2637,15 +2645,46 @@ static inline void draw_depth_span(uint16_t z, P* buf, uint16_t* depth, } } -typedef vec2_scalar Point; +typedef vec2_scalar Point2D; +typedef vec4_scalar Point3D; + +struct ClipRect { + float x0; + float y0; + float x1; + float y1; + + ClipRect(Texture& t) : x0(0), y0(0), x1(t.width), y1(t.height) { + if (ctx->scissortest) { + scissor(ctx->scissor); + } + } + + void scissor(const IntRect& scissor) { + x0 = max(x0, float(scissor.x)); + y0 = max(y0, float(scissor.y)); + x1 = min(x1, float(scissor.x + scissor.width)); + y1 = min(y1, float(scissor.y + scissor.height)); + } + + template + bool overlaps(int nump, const P* p) const { + int sides = 0; + for (int i = 0; i < nump; i++) { + sides |= p[i].x < x1 ? (p[i].x > x0 ? 1 | 2 : 1) : 2; + sides |= p[i].y < y1 ? (p[i].y > y0 ? 4 | 8 : 4) : 8; + } + return sides == 0xF; + } +}; template -static inline void draw_quad_spans(int nump, Point p[4], uint16_t z, +static inline void draw_quad_spans(int nump, Point2D p[4], uint16_t z, Interpolants interp_outs[4], Texture& colortex, int layer, - Texture& depthtex, float fx0, float fy0, - float fx1, float fy1) { - Point l0, r0, l1, r1; + Texture& depthtex, + const ClipRect& clipRect) { + Point2D l0, r0, l1, r1; int l0i, r0i, l1i, r1i; { int top = nump > 3 && p[3].y < p[2].y @@ -2696,7 +2735,7 @@ static inline void draw_quad_spans(int nump, Point p[4], uint16_t z, float rk = 1.0f / (r1.y - r0.y); float rm = (r1.x - r0.x) * rk; assert(l0.y == r0.y); - float y = floor(max(l0.y, fy0) + 0.5f) + 0.5f; + float y = floor(max(l0.y, clipRect.y0) + 0.5f) + 0.5f; lx += (y - l0.y) * lm; rx += (y - r0.y) * rm; Interpolants lo = interp_outs[l0i]; @@ -2710,7 +2749,7 @@ static inline void draw_quad_spans(int nump, Point p[4], uint16_t z, uint16_t* fdepth = (uint16_t*)depthtex.buf + int(y) * depthtex.stride(sizeof(uint16_t)) / sizeof(uint16_t); - while (y < fy1) { + while (y < clipRect.y1) { if (y > l1.y) { l0i = l1i; l0 = l1; @@ -2737,8 +2776,8 @@ static inline void draw_quad_spans(int nump, Point p[4], uint16_t z, rom = (interp_outs[r1i] - ro) * rk; ro += rom * (y - r0.y); } - int startx = int(max(min(lx, rx), fx0) + 0.5f); - int endx = int(min(max(lx, rx), fx1) + 0.5f); + int startx = int(max(min(lx, rx), clipRect.x0) + 0.5f); + int endx = int(min(max(lx, rx), clipRect.x1) + 0.5f); int span = endx - startx; if (span > 0) { ctx->shaded_rows++; @@ -2798,8 +2837,8 @@ static inline void draw_quad_spans(int nump, Point p[4], uint16_t z, } } } - fragment_shader->gl_FragCoordXY.x = init_interp(startx + 0.5f, 1); - fragment_shader->gl_FragCoordXY.y = y; + fragment_shader->gl_FragCoord.x = init_interp(startx + 0.5f, 1); + fragment_shader->gl_FragCoord.y = y; { Interpolants step = (ro - lo) * (1.0f / (rx - lx)); Interpolants o = lo + step * (startx + 0.5f - lx); @@ -2857,7 +2896,7 @@ static inline void draw_quad_spans(int nump, Point p[4], uint16_t z, commit_output(buf, z, depth, span); } } else { - for (; span >= 4; span -= 4, buf += 4, depth += 4) { + for (; span >= 4; span -= 4, buf += 4) { commit_output(buf); } if (span > 0) { @@ -2877,60 +2916,338 @@ static inline void draw_quad_spans(int nump, Point p[4], uint16_t z, } } +template +static inline void draw_perspective_spans(int nump, Point3D* p, + Interpolants* interp_outs, + Texture& colortex, int layer, + Texture& depthtex, + const ClipRect& clipRect) { + Point3D l0, r0, l1, r1; + int l0i, r0i, l1i, r1i; + { + // find a top point + int top = 0; + for (int i = 1; i < nump; i++) { + if (p[i].y < p[top].y) { + top = i; + } + } + // find left-most top point + l0i = top; + for (int i = top + 1; i < nump && p[i].y == p[top].y; i++) { + l0i = i; + } + if (l0i == nump - 1) { + for (int i = 0; i <= top && p[i].y == p[top].y; i++) { + l0i = i; + } + } + // find right-most top point + r0i = top; + for (int i = top - 1; i >= 0 && p[i].y == p[top].y; i--) { + r0i = i; + } + if (r0i == 0) { + for (int i = nump - 1; i >= top && p[i].y == p[top].y; i--) { + r0i = i; + } + } + l1i = NEXT_POINT(l0i); + r1i = PREV_POINT(r0i); + l0 = p[l0i]; + r0 = p[r0i]; + l1 = p[l1i]; + r1 = p[r1i]; + } + + Point3D lc = l0; + float lk = 1.0f / (l1.y - l0.y); + Point3D lm = (l1 - l0) * lk; + Point3D rc = r0; + float rk = 1.0f / (r1.y - r0.y); + Point3D rm = (r1 - r0) * rk; + assert(l0.y == r0.y); + float y = floor(max(l0.y, clipRect.y0) + 0.5f) + 0.5f; + lc += (y - l0.y) * lm; + rc += (y - r0.y) * rm; + Interpolants lo = interp_outs[l0i] * l0.w; + Interpolants lom = (interp_outs[l1i] * l1.w - lo) * lk; + lo = lo + lom * (y - l0.y); + Interpolants ro = interp_outs[r0i] * r0.w; + Interpolants rom = (interp_outs[r1i] * r1.w - ro) * rk; + ro = ro + rom * (y - r0.y); + P* fbuf = (P*)colortex.buf + (layer * colortex.height + int(y)) * + colortex.stride(sizeof(P)) / sizeof(P); + uint16_t* fdepth = + (uint16_t*)depthtex.buf + + int(y) * depthtex.stride(sizeof(uint16_t)) / sizeof(uint16_t); + while (y < clipRect.y1) { + if (y > l1.y) { + l0i = l1i; + l0 = l1; + l1i = NEXT_POINT(l1i); + l1 = p[l1i]; + if (l1.y <= l0.y) break; + lk = 1.0f / (l1.y - l0.y); + lm = (l1 - l0) * lk; + lc = l0 + (y - l0.y) * lm; + lo = interp_outs[l0i] * l0.w; + lom = (interp_outs[l1i] * l1.w - lo) * lk; + lo += lom * (y - l0.y); + } + if (y > r1.y) { + r0i = r1i; + r0 = r1; + r1i = PREV_POINT(r1i); + r1 = p[r1i]; + if (r1.y <= r0.y) break; + rk = 1.0f / (r1.y - r0.y); + rm = (r1 - r0) * rk; + rc = r0 + (y - r0.y) * rm; + ro = interp_outs[r0i] * r0.w; + rom = (interp_outs[r1i] * r1.w - ro) * rk; + ro += rom * (y - r0.y); + } + int startx = int(max(min(lc.x, rc.x), clipRect.x0) + 0.5f); + int endx = int(min(max(lc.x, rc.x), clipRect.x1) + 0.5f); + int span = endx - startx; + if (span > 0) { + ctx->shaded_rows++; + ctx->shaded_pixels += span; + P* buf = fbuf + startx; + uint16_t* depth = fdepth + startx; + bool use_depth = depthtex.buf != nullptr; + bool use_discard = fragment_shader->use_discard(); + if (depthtex.delay_clear) { + int yi = int(y); + uint32_t& mask = depthtex.cleared_rows[yi / 32]; + if ((mask & (1 << (yi & 31))) == 0) { + mask |= 1 << (yi & 31); + depthtex.delay_clear--; + clear_buffer(depthtex, depthtex.clear_val, 0, + depthtex.width, yi, yi + 1); + } + } + if (colortex.delay_clear) { + int yi = int(y); + uint32_t& mask = colortex.cleared_rows[yi / 32]; + if ((mask & (1 << (yi & 31))) == 0) { + mask |= 1 << (yi & 31); + colortex.delay_clear--; + if (use_depth || blend_key || use_discard) { + clear_buffer

(colortex, colortex.clear_val, 0, colortex.width, yi, + yi + 1, layer); + } else if (startx > 0 || endx < colortex.width) { + clear_buffer

(colortex, colortex.clear_val, 0, colortex.width, yi, + yi + 1, layer, startx, endx); + } + } + } + fragment_shader->gl_FragCoord.x = init_interp(startx + 0.5f, 1); + fragment_shader->gl_FragCoord.y = y; + { + vec2_scalar stepZW = + (rc.sel(Z, W) - lc.sel(Z, W)) * (1.0f / (rc.x - lc.x)); + vec2_scalar zw = lc.sel(Z, W) + stepZW * (startx + 0.5f - lc.x); + fragment_shader->gl_FragCoord.z = init_interp(zw.x, stepZW.x); + fragment_shader->gl_FragCoord.w = init_interp(zw.y, stepZW.y); + fragment_shader->stepZW = stepZW * 4.0f; + Interpolants step = (ro - lo) * (1.0f / (rc.x - lc.x)); + Interpolants o = lo + step * (startx + 0.5f - lc.x); + fragment_shader->init_span(&o, &step, 4.0f); + } + if (!use_discard) { + if (use_depth) { + for (; span >= 4; span -= 4, buf += 4, depth += 4) { + commit_output(buf, packDepth(), depth); + } + if (span > 0) { + commit_output(buf, packDepth(), depth, span); + } + } else { + for (; span >= 4; span -= 4, buf += 4) { + commit_output(buf); + } + if (span > 0) { + commit_output(buf, span); + } + } + } else { + if (use_depth) { + for (; span >= 4; span -= 4, buf += 4, depth += 4) { + commit_output(buf, packDepth(), depth); + } + if (span > 0) { + commit_output(buf, packDepth(), depth, span); + } + } else { + for (; span >= 4; span -= 4, buf += 4) { + commit_output(buf); + } + if (span > 0) { + commit_output(buf, span); + } + } + } + } + lc += lm; + rc += rm; + y++; + lo += lom; + ro += rom; + fbuf += colortex.stride(sizeof(P)) / sizeof(P); + fdepth += depthtex.stride(sizeof(uint16_t)) / sizeof(uint16_t); + } +} + +// Clip a primitive against the near or far Z planes, producing intermediate +// vertexes with interpolated attributes that will no longer intersect the +// selected plane. This overwrites the vertexes in-place, producing at most +// N+1 vertexes for each invocation, so appropriate storage should be reserved +// before calling. +template +static int clip_near_far(int nump, Point3D* p, Interpolants* interp) { + int numClip = 0; + Point3D prev = p[nump - 1]; + Interpolants prevInterp = interp[nump - 1]; + float prevDist = SIDE * prev.z - prev.w; + for (int i = 0; i < nump; i++) { + Point3D cur = p[i]; + Interpolants curInterp = interp[i]; + float curDist = SIDE * cur.z - cur.w; + if (curDist < 0.0f && prevDist < 0.0f) { + p[numClip] = cur; + interp[numClip] = curInterp; + numClip++; + } else if (curDist < 0.0f || prevDist < 0.0f) { + float k = prevDist / (prevDist - curDist); + p[numClip] = prev + (cur - prev) * k; + interp[numClip] = prevInterp + (curInterp - prevInterp) * k; + numClip++; + } + prev = cur; + prevInterp = curInterp; + prevDist = curDist; + } + return numClip; +} + +// Draws a perspective-correct 3D primitive with varying Z value, as opposed +// to a simple 2D planar primitive with a constant Z value that could be +// trivially Z rejected. This requires clipping the primitive against the near +// and far planes to ensure it stays within the valid Z-buffer range. The Z +// and W of each fragment of the primitives are interpolated across the +// generated spans and then depth-tested as appropriate. +// Additionally, vertex attributes must be interpolated with perspective- +// correction by dividing by W before interpolation, and then later multiplied +// by W again to produce the final correct attribute value for each fragment. +// This process is expensive and should be avoided if possible for primitive +// batches that are known ahead of time to not need perspective-correction. +// To trigger this path, the shader should use the PERSPECTIVE feature so that +// the glsl-to-cxx compiler can generate the appropriate interpolation code +// needed to participate with SWGL's perspective-correction. +static void draw_perspective(int nump, Texture& colortex, int layer, + Texture& depthtex) { + Flats flat_outs; + Interpolants interp_outs[6] = {0}; + vertex_shader->run((char*)flat_outs, (char*)interp_outs, + sizeof(Interpolants)); + + Point3D p[6]; + vec4 pos = vertex_shader->gl_Position; + vec3_scalar scale(ctx->viewport.width * 0.5f, ctx->viewport.height * 0.5f, + 0.5f); + vec3_scalar offset(ctx->viewport.x, ctx->viewport.y, 0.0f); + offset += scale; + if (test_none(pos.z < -pos.w || pos.z > pos.w)) { + Float w = 1.0f / pos.w; + vec3 screen = pos.sel(X, Y, Z) * w * scale + offset; + p[0] = Point3D(screen.x.x, screen.y.x, screen.z.x, w.x); + p[1] = Point3D(screen.x.y, screen.y.y, screen.z.y, w.y); + p[2] = Point3D(screen.x.z, screen.y.z, screen.z.z, w.z); + p[3] = Point3D(screen.x.w, screen.y.w, screen.z.w, w.w); + } else { + p[0] = Point3D(pos.x.x, pos.y.x, pos.z.x, pos.w.x); + p[1] = Point3D(pos.x.y, pos.y.y, pos.z.y, pos.w.y); + p[2] = Point3D(pos.x.z, pos.y.z, pos.z.z, pos.w.z); + p[3] = Point3D(pos.x.w, pos.y.w, pos.z.w, pos.w.w); + nump = clip_near_far<-1>(nump, p, interp_outs); + if (nump < 3) { + return; + } + nump = clip_near_far<1>(nump, p, interp_outs); + if (nump < 3) { + return; + } + for (int i = 0; i < nump; i++) { + float w = 1.0f / p[i].w; + p[i] = Point3D(p[i].sel(X, Y, Z) * w * scale + offset, w); + } + } + + ClipRect clipRect(colortex); + if (!clipRect.overlaps(nump, p)) { + return; + } + + fragment_shader->init_primitive(flat_outs); + + if (colortex.internal_format == GL_RGBA8) { + draw_perspective_spans(nump, p, interp_outs, colortex, layer, + depthtex, clipRect); + } else if (colortex.internal_format == GL_R8) { + draw_perspective_spans(nump, p, interp_outs, colortex, layer, + depthtex, clipRect); + } else { + assert(false); + } +} + static void draw_quad(int nump, Texture& colortex, int layer, Texture& depthtex) { + if (fragment_shader->use_perspective()) { + draw_perspective(nump, colortex, layer, depthtex); + return; + } + Flats flat_outs; Interpolants interp_outs[4] = {0}; vertex_shader->run((char*)flat_outs, (char*)interp_outs, sizeof(Interpolants)); - Float w = 1.0f / vertex_shader->gl_Position.w; - vec3 clip = vertex_shader->gl_Position.sel(X, Y, Z) * w; - vec3 screen = (clip + 1) * vec3(ctx->viewport.width / 2, - ctx->viewport.height / 2, 0.5f) + - vec3(ctx->viewport.x, ctx->viewport.y, 0); - Point p[4] = {{screen.x.x, screen.y.x}, - {screen.x.y, screen.y.y}, - {screen.x.z, screen.y.z}, - {screen.x.w, screen.y.w}}; - - auto top_left = min(min(p[0], p[1]), p[2]); - auto bot_right = max(max(p[0], p[1]), p[2]); - if (nump > 3) { - top_left = min(top_left, p[3]); - bot_right = max(bot_right, p[3]); - } - // debugf("bbox: %f %f %f %f\n", top_left.x, top_left.y, bot_right.x, - // bot_right.y); - - float fx0 = 0; - float fy0 = 0; - float fx1 = colortex.width; - float fy1 = colortex.height; - if (ctx->scissortest) { - fx0 = max(fx0, float(ctx->scissor.x)); - fy0 = max(fy0, float(ctx->scissor.y)); - fx1 = min(fx1, float(ctx->scissor.x + ctx->scissor.width)); - fy1 = min(fy1, float(ctx->scissor.y + ctx->scissor.height)); - } - if (top_left.x >= fx1 || top_left.y >= fy1 || bot_right.x <= fx0 || - bot_right.y <= fy0) { + vec4 pos = vertex_shader->gl_Position; + float w = 1.0f / pos.w.x; + vec2 screen = + (pos.sel(X, Y) * w + 1) * + vec2_scalar(ctx->viewport.width * 0.5f, ctx->viewport.height * 0.5f) + + vec2_scalar(ctx->viewport.x, ctx->viewport.y); + Point2D p[4] = {{screen.x.x, screen.y.x}, + {screen.x.y, screen.y.y}, + {screen.x.z, screen.y.z}, + {screen.x.w, screen.y.w}}; + + ClipRect clipRect(colortex); + if (!clipRect.overlaps(nump, p)) { return; } + float screenZ = (vertex_shader->gl_Position.z.x * w + 1) * 0.5f; + if (screenZ < 0 || screenZ > 1) { + return; + } // SSE2 does not support unsigned comparison, so bias Z to be negative. - uint16_t z = uint16_t(0xFFFF * screen.z.x) - 0x8000; - fragment_shader->gl_FragCoordZW.x = screen.z.x; - fragment_shader->gl_FragCoordZW.y = w.x; + uint16_t z = uint16_t(0xFFFF * screenZ) - 0x8000; + fragment_shader->gl_FragCoord.z = screenZ; + fragment_shader->gl_FragCoord.w = w; fragment_shader->init_primitive(flat_outs); if (colortex.internal_format == GL_RGBA8) { draw_quad_spans(nump, p, z, interp_outs, colortex, layer, - depthtex, fx0, fy0, fx1, fy1); + depthtex, clipRect); } else if (colortex.internal_format == GL_R8) { draw_quad_spans(nump, p, z, interp_outs, colortex, layer, depthtex, - fx0, fy0, fx1, fy1); + clipRect); } else { assert(false); } diff --git a/swgl/src/gl_defs.h b/swgl/src/gl_defs.h index 3cd774e49b..c7e87230a3 100644 --- a/swgl/src/gl_defs.h +++ b/swgl/src/gl_defs.h @@ -100,6 +100,7 @@ typedef intptr_t GLintptr; #define GL_TEXTURE_2D 0x0DE1 #define GL_TEXTURE_3D 0x806F #define GL_TEXTURE_2D_ARRAY 0x8C1A +#define GL_TEXTURE_RECTANGLE 0x84F5 #define GL_TEXTURE0 0x84C0 #define GL_TEXTURE1 0x84C1 #define GL_TEXTURE2 0x84C2 diff --git a/swgl/src/glsl.h b/swgl/src/glsl.h index d5e492f6ef..4832ee73bd 100644 --- a/swgl/src/glsl.h +++ b/swgl/src/glsl.h @@ -1129,6 +1129,9 @@ struct vec3_scalar { return vec2_scalar_ref(select(c1), select(c2)); } + friend vec3_scalar operator*(vec3_scalar a, vec3_scalar b) { + return vec3_scalar{a.x * b.x, a.y * b.y, a.z * b.z}; + } friend vec3_scalar operator*(vec3_scalar a, float b) { return vec3_scalar{a.x * b, a.y * b, a.z * b}; } @@ -1144,6 +1147,13 @@ struct vec3_scalar { return vec3_scalar{a.x / b, a.y / b, a.z / b}; } + vec3_scalar operator+=(vec3_scalar a) { + x += a.x; + y += a.y; + z += a.z; + return *this; + } + friend bool operator==(const vec3_scalar& l, const vec3_scalar& r) { return l.x == r.x && l.y == r.y && l.z == r.z; } @@ -1361,6 +1371,7 @@ struct vec4_scalar { constexpr vec4_scalar(float a) : x(a), y(a), z(a), w(a) {} constexpr vec4_scalar(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {} + vec4_scalar(vec3_scalar xyz, float w) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {} float& select(XYZW c) { switch (c) { @@ -1414,6 +1425,15 @@ struct vec4_scalar { friend vec4_scalar operator/(vec4_scalar a, vec4_scalar b) { return vec4_scalar{a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w}; } + + vec4_scalar& operator+=(vec4_scalar a) { + x += a.x; + y += a.y; + z += a.z; + w += a.w; + return *this; + } + vec4_scalar& operator/=(vec4_scalar a) { x /= a.x; y /= a.y; @@ -2691,11 +2711,6 @@ vec4 texture(sampler2DRect sampler, vec2 P) { } } -vec4 texture(sampler2DArray sampler, vec3 P, Float layer) { - assert(0); - return vec4(); -} - vec4 texture(sampler2DArray sampler, vec3 P) { if (sampler->filter == TextureFilter::LINEAR) { I32 zoffset = @@ -2716,6 +2731,11 @@ vec4 texture(sampler2DArray sampler, vec3 P) { } } +vec4 texture(sampler2DArray sampler, vec3 P, float bias) { + assert(bias == 0.0); + return texture(sampler, P); +} + vec4 textureLod(sampler2DArray sampler, vec3 P, float lod) { assert(lod == 0.0); return texture(sampler, P); diff --git a/swgl/src/program.h b/swgl/src/program.h index 120a5a2bb5..0779f630f2 100644 --- a/swgl/src/program.h +++ b/swgl/src/program.h @@ -79,7 +79,6 @@ struct FragmentShaderImpl : ShaderImpl { const void* step, float step_width); typedef void (*RunFunc)(FragmentShaderImpl*); typedef void (*SkipFunc)(FragmentShaderImpl*, int chunks); - typedef bool (*UseDiscardFunc)(FragmentShaderImpl*); typedef void (*DrawSpanRGBA8Func)(FragmentShaderImpl*, uint32_t* buf, int len); typedef void (*DrawSpanR8Func)(FragmentShaderImpl*, uint8_t* buf, int len); @@ -89,20 +88,41 @@ struct FragmentShaderImpl : ShaderImpl { InitSpanFunc init_span_func = nullptr; RunFunc run_func = nullptr; SkipFunc skip_func = nullptr; - UseDiscardFunc use_discard_func = nullptr; DrawSpanRGBA8Func draw_span_RGBA8_func = nullptr; DrawSpanR8Func draw_span_R8_func = nullptr; - vec2 gl_FragCoordXY; - vec2_scalar gl_FragCoordZW; + enum FLAGS { + DISCARD = 1 << 0, + PERSPECTIVE = 1 << 1, + }; + int flags = 0; + void enable_discard() { flags |= DISCARD; } + void enable_perspective() { flags |= PERSPECTIVE; } + ALWAYS_INLINE bool use_discard() const { return (flags & DISCARD) != 0; } + ALWAYS_INLINE bool use_perspective() const { + return (flags & PERSPECTIVE) != 0; + } + + vec4 gl_FragCoord; + vec2_scalar stepZW; Bool isPixelDiscarded; vec4 gl_FragColor; vec4 gl_SecondaryFragColor; - ALWAYS_INLINE void step_fragcoord() { gl_FragCoordXY.x += 4; } + ALWAYS_INLINE void step_fragcoord() { gl_FragCoord.x += 4; } ALWAYS_INLINE void step_fragcoord(int chunks) { - gl_FragCoordXY.x += 4 * chunks; + gl_FragCoord.x += 4 * chunks; + } + + ALWAYS_INLINE void step_perspective() { + gl_FragCoord.z += stepZW.x; + gl_FragCoord.w += stepZW.y; + } + + ALWAYS_INLINE void step_perspective(int chunks) { + gl_FragCoord.z += stepZW.x * chunks; + gl_FragCoord.w += stepZW.y * chunks; } void init_batch(ProgramImpl* prog) { (*init_batch_func)(this, prog); } @@ -120,8 +140,6 @@ struct FragmentShaderImpl : ShaderImpl { ALWAYS_INLINE void skip(int chunks = 1) { (*skip_func)(this, chunks); } - ALWAYS_INLINE bool use_discard() { return (*use_discard_func)(this); } - ALWAYS_INLINE void draw_span(uint32_t* buf, int len) { (*draw_span_RGBA8_func)(this, buf, len); } diff --git a/swgl/src/vector_type.h b/swgl/src/vector_type.h index da291cc587..fdfe154c37 100644 --- a/swgl/src/vector_type.h +++ b/swgl/src/vector_type.h @@ -304,6 +304,7 @@ struct VectorType { # define xyxy swizzle(0, 1, 0, 1) # define zwzw swizzle(2, 3, 2, 3) # define zyxw swizzle(2, 1, 0, 3) +# define xyzz swizzle(0, 1, 2, 2) # define xxxxyyyy XXXXYYYY() VectorType XXXXYYYY() const { return swizzle(0, 0, 0, 0).combine(swizzle(1, 1, 1, 1)); diff --git a/webrender/res/cs_svg_filter.glsl b/webrender/res/cs_svg_filter.glsl index 62d808a1e3..cfb6dd13cf 100644 --- a/webrender/res/cs_svg_filter.glsl +++ b/webrender/res/cs_svg_filter.glsl @@ -509,7 +509,7 @@ vec4 composite(vec4 Cs, vec4 Cb, int mode) { vec4 sampleInUvRect(sampler2DArray sampler, vec3 uv, vec4 uvRect) { vec2 clamped = clamp(uv.xy, uvRect.xy, uvRect.zw); - return texture(sampler, vec3(clamped, uv.z), 0.0); + return texture(sampler, vec3(clamped, uv.z)); } void main(void) {