Project

General

Profile

Feature #17016 ยป scan_left_example.rb

Small script showing `scan` usage with other approaches to the same behavior - parker (Parker Finch), 07/10/2020 08:52 PM

 
require "net/http"
require "tempfile"
require "uri"

# This is a small example of how `#scan_left` could be used, and how the
# functionality could be had with other approaches.
#
# Say that we're reading a large file (in this case the text of 'The Adventures
# of Sherlock Holmes') and want to find how many times "Watson" is mentioned by
# the point "Sherlock" has been mentioned 10 times.
#
# This needs to be done lazily, since we don't want to process the entire file!
#
# Example run:
# > ruby ../scan_left_example.rb
# With scan: {"sherlock"=>10, "watson"=>6}
# With map: {"sherlock"=>10, "watson"=>6}
# With each: {"sherlock"=>10, "watson"=>6}
# With inject: {"sherlock"=>102, "watson"=>81}

URL = "http://www.gutenberg.org/files/1661/1661-0.txt"

INITIAL_STATE = { "sherlock" => 0, "watson" => 0 }

BIG_FILE = Tempfile.new

# Method to fetch the file.
def download
uri = URI(URL)

Net::HTTP.start(uri.host, uri.port) do |http|
request = Net::HTTP::Get.new uri

http.request request do |response|
open BIG_FILE.path, "w" do |io|
response.read_body do |chunk|
io.write chunk
end
end
end
end
end

# This is the lazy stream that we'll process.
def stream
File.open(BIG_FILE.path, "r").each_line.lazy
end

# This is our state transition function.
def new_state(state, line)
sherlock_count = line.scan(/sherlock/i).length
watson_count = line.scan(/watson/i).length

{
"sherlock" => state["sherlock"] + sherlock_count,
"watson" => state["watson"] + watson_count
}
end

# Transform the stream with the `scan` operation:
def with_scan
stream.scan_left(INITIAL_STATE) { |state, line| new_state(state, line) }
end

# Alternatively, transform the stream with a `map`, which requires an explicit
# variable that is changed in the block:
def with_map
state = INITIAL_STATE

stream.map { |line| state = new_state(state, line) }
end

# We can't get the same behavior with `each` because we would need to know when
# to stop iterating.
def with_each
state = INITIAL_STATE

stream.each do |line|
state = new_state(state, line)
# We would need to know to stop iterating right here! So we can't chain this
# through to further methods. Since in this example we know our criteria we
# can manually break here, but this is less flexible than `map` or `scan`.
break if end_state?(state)
end

state
end

# We can't get the desired behavior with `inject` because it is not lazy.
def with_inject
stream.inject(INITIAL_STATE) { |state, line| new_state(state, line) }
end

# This is just a helper method that represents when our condition is satisfied.
def end_state?(state)
state["sherlock"] >= 10
end

# Get the file so that we can easily stream it.
download

# Find the line that matches our end state, and print that state.
puts "With scan: #{with_scan.find(&method(:end_state?))}"
puts "With map: #{with_map.find(&method(:end_state?))}"
puts "With each: #{with_each}"
puts "With inject: #{with_inject}"
    (1-1/1)