|
require "net/http"
|
|
require "tempfile"
|
|
require "uri"
|
|
|
|
# This is a small example of how `#scan_left` could be used, and how the
|
|
# functionality could be had with other approaches.
|
|
#
|
|
# Say that we're reading a large file (in this case the text of 'The Adventures
|
|
# of Sherlock Holmes') and want to find how many times "Watson" is mentioned by
|
|
# the point "Sherlock" has been mentioned 10 times.
|
|
#
|
|
# This needs to be done lazily, since we don't want to process the entire file!
|
|
#
|
|
# Example run:
|
|
# > ruby ../scan_left_example.rb
|
|
# With scan: {"sherlock"=>10, "watson"=>6}
|
|
# With map: {"sherlock"=>10, "watson"=>6}
|
|
# With each: {"sherlock"=>10, "watson"=>6}
|
|
# With inject: {"sherlock"=>102, "watson"=>81}
|
|
|
|
URL = "http://www.gutenberg.org/files/1661/1661-0.txt"
|
|
|
|
INITIAL_STATE = { "sherlock" => 0, "watson" => 0 }
|
|
|
|
BIG_FILE = Tempfile.new
|
|
|
|
# Method to fetch the file.
|
|
def download
|
|
uri = URI(URL)
|
|
|
|
Net::HTTP.start(uri.host, uri.port) do |http|
|
|
request = Net::HTTP::Get.new uri
|
|
|
|
http.request request do |response|
|
|
open BIG_FILE.path, "w" do |io|
|
|
response.read_body do |chunk|
|
|
io.write chunk
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
# This is the lazy stream that we'll process.
|
|
def stream
|
|
File.open(BIG_FILE.path, "r").each_line.lazy
|
|
end
|
|
|
|
# This is our state transition function.
|
|
def new_state(state, line)
|
|
sherlock_count = line.scan(/sherlock/i).length
|
|
watson_count = line.scan(/watson/i).length
|
|
|
|
{
|
|
"sherlock" => state["sherlock"] + sherlock_count,
|
|
"watson" => state["watson"] + watson_count
|
|
}
|
|
end
|
|
|
|
# Transform the stream with the `scan` operation:
|
|
def with_scan
|
|
stream.scan_left(INITIAL_STATE) { |state, line| new_state(state, line) }
|
|
end
|
|
|
|
# Alternatively, transform the stream with a `map`, which requires an explicit
|
|
# variable that is changed in the block:
|
|
def with_map
|
|
state = INITIAL_STATE
|
|
|
|
stream.map { |line| state = new_state(state, line) }
|
|
end
|
|
|
|
# We can't get the same behavior with `each` because we would need to know when
|
|
# to stop iterating.
|
|
def with_each
|
|
state = INITIAL_STATE
|
|
|
|
stream.each do |line|
|
|
state = new_state(state, line)
|
|
# We would need to know to stop iterating right here! So we can't chain this
|
|
# through to further methods. Since in this example we know our criteria we
|
|
# can manually break here, but this is less flexible than `map` or `scan`.
|
|
break if end_state?(state)
|
|
end
|
|
|
|
state
|
|
end
|
|
|
|
# We can't get the desired behavior with `inject` because it is not lazy.
|
|
def with_inject
|
|
stream.inject(INITIAL_STATE) { |state, line| new_state(state, line) }
|
|
end
|
|
|
|
# This is just a helper method that represents when our condition is satisfied.
|
|
def end_state?(state)
|
|
state["sherlock"] >= 10
|
|
end
|
|
|
|
# Get the file so that we can easily stream it.
|
|
download
|
|
|
|
# Find the line that matches our end state, and print that state.
|
|
puts "With scan: #{with_scan.find(&method(:end_state?))}"
|
|
puts "With map: #{with_map.find(&method(:end_state?))}"
|
|
puts "With each: #{with_each}"
|
|
puts "With inject: #{with_inject}"
|