Project

General

Profile

Feature #17016 ยป scan_left_example.rb

Small script showing `scan` usage with other approaches to the same behavior - parker (Parker Finch), 07/10/2020 08:52 PM

 
1
require "net/http"
2
require "tempfile"
3
require "uri"
4

    
5
# This is a small example of how `#scan_left` could be used, and how the
6
# functionality could be had with other approaches.
7
#
8
# Say that we're reading a large file (in this case the text of 'The Adventures
9
# of Sherlock Holmes') and want to find how many times "Watson" is mentioned by
10
# the point "Sherlock" has been mentioned 10 times.
11
#
12
# This needs to be done lazily, since we don't want to process the entire file!
13
#
14
# Example run:
15
# > ruby ../scan_left_example.rb
16
# With scan: {"sherlock"=>10, "watson"=>6}
17
# With map: {"sherlock"=>10, "watson"=>6}
18
# With each: {"sherlock"=>10, "watson"=>6}
19
# With inject: {"sherlock"=>102, "watson"=>81}
20

    
21
URL = "http://www.gutenberg.org/files/1661/1661-0.txt"
22

    
23
INITIAL_STATE = { "sherlock" => 0, "watson" => 0 }
24

    
25
BIG_FILE = Tempfile.new
26

    
27
# Method to fetch the file.
28
def download
29
  uri = URI(URL)
30

    
31
  Net::HTTP.start(uri.host, uri.port) do |http|
32
    request = Net::HTTP::Get.new uri
33

    
34
    http.request request do |response|
35
      open BIG_FILE.path, "w" do |io|
36
        response.read_body do |chunk|
37
          io.write chunk
38
        end
39
      end
40
    end
41
  end
42
end
43

    
44
# This is the lazy stream that we'll process.
45
def stream
46
  File.open(BIG_FILE.path, "r").each_line.lazy
47
end
48

    
49
# This is our state transition function.
50
def new_state(state, line)
51
  sherlock_count = line.scan(/sherlock/i).length
52
  watson_count = line.scan(/watson/i).length
53

    
54
  {
55
    "sherlock" => state["sherlock"] + sherlock_count,
56
    "watson" => state["watson"] + watson_count
57
  }
58
end
59

    
60
# Transform the stream with the `scan` operation:
61
def with_scan
62
  stream.scan_left(INITIAL_STATE) { |state, line| new_state(state, line) }
63
end
64

    
65
# Alternatively, transform the stream with a `map`, which requires an explicit
66
# variable that is changed in the block:
67
def with_map
68
  state = INITIAL_STATE
69

    
70
  stream.map { |line| state = new_state(state, line) }
71
end
72

    
73
# We can't get the same behavior with `each` because we would need to know when
74
# to stop iterating.
75
def with_each
76
  state = INITIAL_STATE
77

    
78
  stream.each do |line|
79
    state = new_state(state, line)
80
    # We would need to know to stop iterating right here! So we can't chain this
81
    # through to further methods. Since in this example we know our criteria we
82
    # can manually break here, but this is less flexible than `map` or `scan`.
83
    break if end_state?(state)
84
  end
85

    
86
  state
87
end
88

    
89
# We can't get the desired behavior with `inject` because it is not lazy.
90
def with_inject
91
  stream.inject(INITIAL_STATE) { |state, line| new_state(state, line) }
92
end
93

    
94
# This is just a helper method that represents when our condition is satisfied.
95
def end_state?(state)
96
  state["sherlock"] >= 10
97
end
98

    
99
# Get the file so that we can easily stream it.
100
download
101

    
102
# Find the line that matches our end state, and print that state.
103
puts "With scan: #{with_scan.find(&method(:end_state?))}"
104
puts "With map: #{with_map.find(&method(:end_state?))}"
105
puts "With each: #{with_each}"
106
puts "With inject: #{with_inject}"