Class: Net::HTTPResponse

Inherits:
Object show all
Defined in:
/home/apoc/projects/ruby/rbot/lib/rbot/core/utils/httputil.rb

Instance Attribute Summary (collapse)

Instance Method Summary (collapse)

Instance Attribute Details

- (Object) no_cache

Returns the value of attribute no_cache



38
39
40
# File '/home/apoc/projects/ruby/rbot/lib/rbot/core/utils/httputil.rb', line 38

def no_cache
  @no_cache
end

Instance Method Details

- (Object) body_charset(str = self.raw_body)



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File '/home/apoc/projects/ruby/rbot/lib/rbot/core/utils/httputil.rb', line 43

def body_charset(str=self.raw_body)
  ctype = self['content-type'] || 'text/html'
  return nil unless ctype =~ /^text/i || ctype =~ /x(ht)?ml/i

  charsets = ['ISO-8859-1'] # should be in config

  if ctype.match(/charset=["']?([^\s"']+)["']?/i)
    charsets << $1
    debug "charset #{charsets.last} added from header"
  end

  # str might be invalid utf-8 that will crash on the pattern match:
  str.encode!('UTF-8', 'UTF-8', :invalid => :replace)
  case str
  when /<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/i
    charsets << $1
    debug "xml charset #{charsets.last} added from xml pi"
  when /<(meta\s[^>]*http-equiv=["']?Content-Type["']?[^>]*)>/i
    meta = $1
    if meta =~ /charset=['"]?([^\s'";]+)['"]?/
      charsets << $1
      debug "html charset #{charsets.last} added from meta"
    end
  end
  return charsets.uniq
end

- (Object) body_to_utf(str)



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File '/home/apoc/projects/ruby/rbot/lib/rbot/core/utils/httputil.rb', line 70

def body_to_utf(str)
  charsets = self.body_charset(str) or return str

  charsets.reverse_each do |charset|
    begin
      debug "try decoding using #{charset}"
      str.force_encoding(charset)
      tmp = str.encode('UTF-16le', :invalid => :replace, :replace => '').encode('UTF-8')
      if tmp
        str = tmp
        break
      end
    rescue
      error 'failed to use encoding'
      error $!
    end
  end

  return str
end

- (Object) cooked_body



140
141
142
# File '/home/apoc/projects/ruby/rbot/lib/rbot/core/utils/httputil.rb', line 140

def cooked_body
  return self.body_to_utf(self.decompress_body(self.raw_body))
end

- (Object) decompress_body(str)



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File '/home/apoc/projects/ruby/rbot/lib/rbot/core/utils/httputil.rb', line 91

def decompress_body(str)
  method = self['content-encoding']
  case method
  when nil
    return str
  when /gzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers
    debug "gunzipping body"
    begin
      return Zlib::GzipReader.new(StringIO.new(str)).read
    rescue Zlib::Error => e
      # If we can't unpack the whole stream (e.g. because we're doing a
      # partial read
      debug "full gunzipping failed (#{e}), trying to recover as much as possible"
      ret = ''
      ret.force_encoding(Encoding::ASCII_8BIT)
      begin
        Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte|
          ret << byte
        }
      rescue
      end
      return ret
    end
  when 'deflate'
    debug "inflating body"
    # From http://www.koders.com/ruby/fid927B4382397E5115AC0ABE21181AB5C1CBDD5C17.aspx?s=thread:
    # -MAX_WBITS stops zlib from looking for a zlib header
    inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS)
    begin
      return inflater.inflate(str)
    rescue Zlib::Error => e
      raise e
      # TODO
      # debug "full inflation failed (#{e}), trying to recover as much as possible"
    end
  when /^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/i
    # B0rked servers (Freshmeat being one of them) sometimes return the charset
    # in the content-encoding; in this case we assume that the document has
    # a standard content-encoding
    old_hsh = self.to_hash
    self['content-type']= self['content-type']+"; charset="+method.downcase
    warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}"
    return str
  else
    debug self.to_hash
    raise "Unhandled content encoding #{method}"
  end
end

- (Object) partial_body(size = 0, &block)

Read chunks from the body until we have at least size bytes, yielding the partial text at each chunk. Return the partial body.



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File '/home/apoc/projects/ruby/rbot/lib/rbot/core/utils/httputil.rb', line 146

def partial_body(size=0, &block)

  partial = String.new

  if @read
    debug "using body() as partial"
    partial = self.body
    yield self.body_to_utf(self.decompress_body(partial)) if block_given?
  else
    debug "disabling cache"
    self.no_cache = true
    self.read_body { |chunk|
      partial << chunk
      yield self.body_to_utf(self.decompress_body(partial)) if block_given?
      break if size and size > 0 and partial.length >= size
    }
  end

  return self.body_to_utf(self.decompress_body(partial))
end