From 9a4a0cf32fc75d2566f122efab6a350daaf3c028 Mon Sep 17 00:00:00 2001 From: bogdanvlviv Date: Sun, 5 May 2019 18:25:00 +0000 Subject: [PATCH 1/3] Add `Array#extract` The method removes and returns the elements for which the block returns a true value. If no block is given, an Enumerator is returned instead. ```ruby numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] odd_numbers = numbers.extract { |number| number.odd? } # => [1, 3, 5, 7, 9] numbers # => [0, 2, 4, 6, 8] ``` This method was added to Active Support as `extract!` in https://github.com/rails/rails/pull/33137 In this post, you can find use cases of this method https://bogdanvlviv.com/posts/ruby/rails/array-extract-to-activesupport-6-0.html --- array.c | 56 ++++++++++++++++++++++++++++++++++++++ benchmark/array_extract.rb | 10 +++++++ test/ruby/test_array.rb | 45 ++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 benchmark/array_extract.rb diff --git a/array.c b/array.c index 65c9a96de8c64e..6335eb340675b6 100644 --- a/array.c +++ b/array.c @@ -3569,6 +3569,61 @@ rb_ary_reject(VALUE ary) return rejected_ary; } +static VALUE +extract_i(VALUE a) +{ + volatile struct select_bang_arg *arg = (void *)a; + VALUE ary = arg->ary; + VALUE result = rb_ary_new(); + long i1, i2; + + for (i1 = i2 = 0; i1 < RARRAY_LEN(ary); arg->len[0] = ++i1) { + VALUE v = RARRAY_AREF(ary, i1); + if (RTEST(rb_yield(v))) { + rb_ary_push(result, v); + } else { + if (i1 != i2) { + rb_ary_store(ary, i2, v); + } + arg->len[1] = ++i2; + } + } + + return result; +} + +static VALUE +ary_extract(VALUE ary) +{ + struct select_bang_arg args; + rb_ary_modify_check(ary); + args.ary = ary; + args.len[0] = args.len[1] = 0; + return rb_ensure(extract_i, (VALUE)&args, select_bang_ensure, (VALUE)&args); +} + +/* + * call-seq: + * ary.extract {|item| block} -> new_ary + * ary.extract -> Enumerator + * + * Removes and returns the elements for which the block evaluates to +true+. + * + * If no block is given, an Enumerator is returned instead. + * + * numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + * odd_numbers = numbers.extract { |number| number.odd? } # => [1, 3, 5, 7, 9] + * numbers # => [0, 2, 4, 6, 8] + */ + +static VALUE +rb_ary_extract(VALUE ary) +{ + RETURN_SIZED_ENUMERATOR(ary, 0, 0, ary_enum_length); + rb_ary_modify(ary); + return ary_extract(ary); +} + /* * call-seq: * ary.delete_if {|item| block} -> ary @@ -6862,6 +6917,7 @@ Init_Array(void) rb_define_method(rb_cArray, "delete_if", rb_ary_delete_if, 0); rb_define_method(rb_cArray, "reject", rb_ary_reject, 0); rb_define_method(rb_cArray, "reject!", rb_ary_reject_bang, 0); + rb_define_method(rb_cArray, "extract", rb_ary_extract, 0); rb_define_method(rb_cArray, "zip", rb_ary_zip, -1); rb_define_method(rb_cArray, "transpose", rb_ary_transpose, 0); rb_define_method(rb_cArray, "replace", rb_ary_replace, 1); diff --git a/benchmark/array_extract.rb b/benchmark/array_extract.rb new file mode 100644 index 00000000000000..bbc5f90eaab8e5 --- /dev/null +++ b/benchmark/array_extract.rb @@ -0,0 +1,10 @@ +require 'benchmark' + +Benchmark.bmbm do |x| + x.report('Array#extract') do + arrays = Array.new(1000) { (0..10000).to_a } + arrays.each do |numbers| + _odd_numbers = numbers.extract { |number| number.odd? } + end + end +end diff --git a/test/ruby/test_array.rb b/test/ruby/test_array.rb index df9f08a26ddc90..c96d24ad44f251 100644 --- a/test/ruby/test_array.rb +++ b/test/ruby/test_array.rb @@ -1381,6 +1381,51 @@ def test_iseq_shared_array_reject! [1, 3, 4]], c, bug90781 end + def test_extract + numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + array_id = numbers.object_id + + odd_numbers = numbers.extract(&:odd?) + + assert_equal [1, 3, 5, 7, 9], odd_numbers + assert_equal [0, 2, 4, 6, 8], numbers + assert_equal array_id, numbers.object_id + + numbers = [0, 1, 2, 3] + assert_equal [], numbers.extract { false } + assert_equal [0, 1, 2, 3], numbers + + assert_equal [0, 1, 2, 3], numbers.extract { true } + assert_equal [], numbers + end + + def test_extract_without_block + numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + array_id = numbers.object_id + + extract_enumerator = numbers.extract + + assert_instance_of Enumerator, extract_enumerator + assert_equal numbers.size, extract_enumerator.size + + odd_numbers = extract_enumerator.each(&:odd?) + + assert_equal [1, 3, 5, 7, 9], odd_numbers + assert_equal [0, 2, 4, 6, 8], numbers + assert_equal array_id, numbers.object_id + end + + def test_extract_on_empty_array + empty_array = [] + array_id = empty_array.object_id + + new_empty_array = empty_array.extract {} + + assert_equal [], new_empty_array + assert_equal [], empty_array + assert_equal array_id, empty_array.object_id + end + def test_replace a = @cls[ 1, 2, 3] a_id = a.__id__ From 8e2193dc1e9d0a3fd513d2e93184c5f2f4f9be22 Mon Sep 17 00:00:00 2001 From: bogdanvlviv Date: Sun, 5 May 2019 19:49:57 +0000 Subject: [PATCH 2/3] Add `Hash#extract` The method removes and returns the key/value pairs for which the block evaluates to +true+. If no block is given, an Enumerator is returned instead. ```ruby h = {a: 100, b: 200, c: 300} h.extract {|k, v| v > 150} # => {:b=>200, :c=>300} h # => {:a=>100} ``` Note that there is method `extract!` in Active Support that was added in 2009, see https://github.com/rails/rails/commit/8dcf91ca113579646e95b0fd7a864dfb6512a53b But I think we should upstream `extract!` to Ruby as `slice!`. --- benchmark/hash_extract.rb | 10 +++++++++ hash.c | 40 ++++++++++++++++++++++++++++++++++++ test/ruby/test_hash.rb | 43 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 benchmark/hash_extract.rb diff --git a/benchmark/hash_extract.rb b/benchmark/hash_extract.rb new file mode 100644 index 00000000000000..cf72c7b8b3a868 --- /dev/null +++ b/benchmark/hash_extract.rb @@ -0,0 +1,10 @@ +require 'benchmark' + +Benchmark.bmbm do |x| + x.report('Hash#extract') do + 10000.times do + hash_for_extract = { a: 1, b: 2, c: 3, d: 4 } + _executed = hash_for_extract.extract {|k, v| v > 2} + end + end +end diff --git a/hash.c b/hash.c index b4b0415c84b2f0..48145a5b68728e 100644 --- a/hash.c +++ b/hash.c @@ -2370,6 +2370,45 @@ rb_hash_reject(VALUE hash) return result; } +static int +extract_i(VALUE key, VALUE value, VALUE result) +{ + if (RTEST(rb_yield_values(2, key, value))) { + rb_hash_aset(result, key, value); + return ST_DELETE; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.extract {|key, value| block} -> new_hash + * hsh.extract -> an_enumerator + * + * Removes and returns the key/value pairs for which the block evaluates to +true+. + * + * If no block is given, an Enumerator is returned instead. + * + * h = {a: 100, b: 200, c: 300} + * h.extract {|k, v| v > 150} # => {:b=>200, :c=>300} + * h # => {:a=>100} + */ + +VALUE +rb_hash_extract(VALUE hash) +{ + st_index_t n; + VALUE result; + + RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); + rb_hash_modify(hash); + n = RHASH_SIZE(hash); + result = rb_hash_new(); + if (!n) return result; + rb_hash_foreach(hash, extract_i, result); + return result; +} + /* * call-seq: * hsh.slice(*keys) -> a_hash @@ -5999,6 +6038,7 @@ Init_Hash(void) rb_define_method(rb_cHash, "filter!", rb_hash_select_bang, 0); rb_define_method(rb_cHash, "reject", rb_hash_reject, 0); rb_define_method(rb_cHash, "reject!", rb_hash_reject_bang, 0); + rb_define_method(rb_cHash, "extract", rb_hash_extract, 0); rb_define_method(rb_cHash, "slice", rb_hash_slice, -1); rb_define_method(rb_cHash, "clear", rb_hash_clear, 0); rb_define_method(rb_cHash, "invert", rb_hash_invert, 0); diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index c934d1015eb51d..6ee33f4ccd4423 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -735,6 +735,49 @@ def test_reject! assert_equal(h3, h) end + def test_extract + h = {a: 1, b: 2, c: 3, d: 4} + hash_id = h.object_id + + assert_equal({c: 3, d: 4}, h.extract {|k, v| v > 2}) + assert_equal({a: 1, b: 2}, h) + assert_equal hash_id, h.object_id + + h = {a: 1, b: 2, c: 3, d: 4} + assert_equal({}, h.extract {false}) + assert_equal({a: 1, b: 2, c: 3, d: 4}, h) + + assert_equal({a: 1, b: 2, c: 3, d: 4}, h.extract {true}) + assert_equal({}, h) + end + + def test_extract_without_block + h = {a: 1, b: 2, c: 3, d: 4} + hash_id = h.object_id + + extract_enumerator = h.extract + + assert_instance_of Enumerator, extract_enumerator + assert_equal h.size, extract_enumerator.size + + extracted_hash = extract_enumerator.each {|k, v| v > 2} + + assert_equal({c: 3, d: 4}, extracted_hash) + assert_equal({a: 1, b: 2}, h) + assert_equal hash_id, h.object_id + end + + def test_extract_on_empty_hash + empty_hash = {} + hash_id = empty_hash.object_id + + new_empty_hash = empty_hash.extract {} + + assert_equal({}, new_empty_hash) + assert_equal({}, empty_hash) + assert_equal hash_id, empty_hash.object_id + end + def test_replace h = @cls[ 1 => 2, 3 => 4 ] h1 = h.replace(@cls[ 9 => 8, 7 => 6 ]) From 591c6818c745669c297fdb40345a5f8b0809db8b Mon Sep 17 00:00:00 2001 From: bogdanvlviv Date: Sun, 5 May 2019 20:25:33 +0000 Subject: [PATCH 3/3] Add `ENV.extract` The method removes and returns the key/value pairs for which the block evaluates to +true+. If no block is given, an Enumerator is returned instead. ```ruby ENV.extract {|k, v| k == "PORT"} # => {"PORT"=>"3000"} ``` --- benchmark/env_extract.rb | 13 ++++++++++ hash.c | 38 +++++++++++++++++++++++++++++ test/ruby/test_env.rb | 52 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 benchmark/env_extract.rb diff --git a/benchmark/env_extract.rb b/benchmark/env_extract.rb new file mode 100644 index 00000000000000..7211379f5cb996 --- /dev/null +++ b/benchmark/env_extract.rb @@ -0,0 +1,13 @@ +require 'benchmark' + +Benchmark.bmbm do |x| + x.report('ENV.extract') do + 10000.times do + ENV.clear + ENV['foo'] = 'bar' + ENV['baz'] = 'qux' + ENV['bar'] = 'rab' + _extracted = ENV.extract {|k, v| v == 'qux'} + end + end +end diff --git a/hash.c b/hash.c index 48145a5b68728e..8f857a481073c1 100644 --- a/hash.c +++ b/hash.c @@ -5252,6 +5252,43 @@ env_delete_if(VALUE ehash) return envtbl; } +/* + * call-seq: + * ENV.extract {|key, value| block} -> Hash + * ENV.extract -> Enumerator + * + * Removes and returns the key/value pairs for which the block evaluates to +true+. + * + * If no block is given, an Enumerator is returned instead. + * + * ENV.extract {|k, v| k == "PORT"} # => {"PORT"=>"3000"} + */ +static VALUE +env_extract(VALUE ehash) +{ + VALUE result; + VALUE keys; + long i; + + RETURN_SIZED_ENUMERATOR(ehash, 0, 0, rb_env_size); + result = rb_hash_new(); + keys = env_keys(); + RBASIC_CLEAR_CLASS(keys); + for (i=0; i Array @@ -6100,6 +6137,7 @@ Init_Hash(void) rb_define_singleton_method(envtbl, "clear", rb_env_clear, 0); rb_define_singleton_method(envtbl, "reject", env_reject, 0); rb_define_singleton_method(envtbl, "reject!", env_reject_bang, 0); + rb_define_singleton_method(envtbl, "extract", env_extract, 0); rb_define_singleton_method(envtbl, "select", env_select, 0); rb_define_singleton_method(envtbl, "select!", env_select_bang, 0); rb_define_singleton_method(envtbl, "filter", env_select, 0); diff --git a/test/ruby/test_env.rb b/test/ruby/test_env.rb index 6343642ac1be66..2e61ff150f40d3 100644 --- a/test/ruby/test_env.rb +++ b/test/ruby/test_env.rb @@ -292,6 +292,58 @@ def test_slice assert_equal({"foo"=>"bar", "baz"=>"qux"}, ENV.slice("foo", "baz")) end + def test_extract + ENV.clear + ENV["foo"] = "bar" + ENV["baz"] = "qux" + ENV["bar"] = "rab" + env_id = ENV.object_id + + assert_equal({"baz" => "qux"}, ENV.extract {|k, v| v == "qux"}) + assert_equal({"foo" => "bar", "bar" => "rab"}, ENV.to_hash) + assert_equal env_id, ENV.object_id + + ENV.clear + ENV["foo"] = "bar" + ENV["baz"] = "qux" + ENV["bar"] = "rab" + assert_equal({}, ENV.extract {false}) + assert_equal({"foo" => "bar", "baz" => "qux", "bar" => "rab"}, ENV.to_hash) + + assert_equal({"foo" => "bar", "baz" => "qux", "bar" => "rab"}, ENV.extract {true}) + assert_equal({}, ENV.to_hash) + end + + def test_extract_without_block + ENV.clear + ENV["foo"] = "bar" + ENV["baz"] = "qux" + ENV["bar"] = "rab" + env_id = ENV.object_id + + extract_enumerator = ENV.extract + + assert_instance_of Enumerator, extract_enumerator + assert_equal ENV.size, extract_enumerator.size + + extracted_hash = extract_enumerator.each {|k, v| v == "qux"} + + assert_equal({"baz" => "qux"}, extracted_hash) + assert_equal({"foo" => "bar", "bar" => "rab"}, ENV.to_hash) + assert_equal env_id, ENV.object_id + end + + def test_extract_on_empty_env + ENV.clear + env_id = ENV.object_id + + new_empty_hash = ENV.extract {} + + assert_equal({}, new_empty_hash) + assert_equal({}, ENV.to_hash) + assert_equal env_id, ENV.object_id + end + def test_clear ENV.clear assert_equal(0, ENV.size)