Object
The Mechanize library is used for automating interaction with a website. It can follow links, and submit forms. Form fields can be populated and submitted. A history of URL’s is maintained and can be queried.
require 'rubygems'
require 'mechanize'
require 'logger'
agent = Mechanize.new { |a| a.log = Logger.new("mech.log") }
agent.user_agent_alias = 'Mac Safari'
page = agent.get("http://www.google.com/")
search_form = page.form_with(:name => "f")
search_form.field_with(:name => "q").value = "Hello"
search_results = agent.submit(search_form)
puts search_results.body
The version of Mechanize you are using.
User Agent aliases
(Not documented)
# File lib/mechanize.rb, line 109 def inherited(child) child.html_parser ||= html_parser child.log ||= log super end
(Not documented)
# File lib/mechanize.rb, line 116 def initialize # attr_accessors @cookie_jar = CookieJar.new @log = nil @open_timeout = nil @read_timeout = nil @user_agent = AGENT_ALIASES['Mechanize'] @watch_for_set = nil @history_added = nil @ca_file = nil # OpenSSL server certificate file # callback for OpenSSL errors while verifying the server certificate # chain, can be used for debugging or to ignore errors by always # returning _true_ @verify_callback = nil @cert = nil # OpenSSL Certificate @key = nil # OpenSSL Private Key @pass = nil # OpenSSL Password @redirect_ok = true # Should we follow redirects? @gzip_enabled = true # attr_readers @history = Mechanize::History.new @pluggable_parser = PluggableParser.new # Auth variables @user = nil # Auth User @password = nil # Auth Password @digest = nil # DigestAuth Digest @auth_hash = {} # Keep track of urls for sending auth @request_headers= {} # A hash of request headers to be used # Proxy settings @proxy_addr = nil @proxy_pass = nil @proxy_port = nil @proxy_user = nil @conditional_requests = true @follow_meta_refresh = false @redirection_limit = 20 # Connection Cache & Keep alive @connection_cache = {} @keep_alive_time = 300 @keep_alive = true @scheme_handlers = Hash.new { |h,k| h[k] = lambda { |link, page| raise UnsupportedSchemeError.new(k) } } @scheme_handlers['http'] = lambda { |link, page| link } @scheme_handlers['https'] = @scheme_handlers['http'] @scheme_handlers['relative'] = @scheme_handlers['http'] @scheme_handlers['file'] = @scheme_handlers['http'] @pre_connect_hook = Chain::PreConnectHook.new @post_connect_hook = Chain::PostConnectHook.new @html_parser = self.class.html_parser yield self if block_given? end
Sets the user and password to be used for authentication.
# File lib/mechanize.rb, line 213 def auth(user, password) @user = user @password = password end
Equivalent to the browser back button. Returns the most recent page visited.
# File lib/mechanize.rb, line 320 def back @history.pop end
Clicks the Mechanize::Link object passed in and returns the page fetched.
# File lib/mechanize.rb, line 311 def click(link) referer = link.page rescue referer = nil href = link.respond_to?(:href) ? link.href : (link['href'] || link['src']) get(:url => href, :referer => (referer || current_page())) end
Returns the current page loaded by Mechanize
# File lib/mechanize.rb, line 408 def current_page @history.last end
DELETE to url with query_params, and setting options:
delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
# File lib/mechanize.rb, line 280 def delete(url, query_params = {}, options = {}) page = head(url, query_params, options.merge({:verb => :delete})) add_to_history(page) page end
Fetches the URL passed in and returns a page.
# File lib/mechanize.rb, line 220 def get(options, parameters = [], referer = nil) verb = :get unless options.is_a? Hash url = options unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0 referer = parameters parameters = [] end else raise ArgumentError.new("url must be specified") unless url = options[:url] parameters = options[:params] || [] referer = options[:referer] headers = options[:headers] verb = options[:verb] || verb end unless referer if url.to_s =~ /^http/ referer = Page.new(nil, {'content-type'=>'text/html'}) else referer = current_page || Page.new(nil, {'content-type'=>'text/html'}) end end # FIXME: Huge hack so that using a URI as a referer works. I need to # refactor everything to pass around URIs but still support # Mechanize::Page#base unless referer.is_a?(Mechanize::File) referer = referer.is_a?(String) ? Page.new(URI.parse(referer), {'content-type' => 'text/html'}) : Page.new(referer, {'content-type' => 'text/html'}) end # fetch the page page = fetch_page( :uri => url, :referer => referer, :headers => headers || {}, :verb => verb, :params => parameters ) add_to_history(page) yield page if block_given? page end
Fetch a file and return the contents of the file.
# File lib/mechanize.rb, line 305 def get_file(url) get(url).body end
HEAD to url with query_params, and setting options:
head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
# File lib/mechanize.rb, line 291 def head(url, query_params = {}, options = {}) options = { :uri => url, :headers => {}, :params => query_params, :verb => :head }.merge(options) # fetch the page page = fetch_page(options) yield page if block_given? page end
(Not documented)
# File lib/mechanize.rb, line 185 def log; self.class.log end
(Not documented)
# File lib/mechanize.rb, line 184 def log=(l); self.class.log = l end
(Not documented)
# File lib/mechanize.rb, line 183 def max_history; @history.max_size end
(Not documented)
# File lib/mechanize.rb, line 182 def max_history=(length); @history.max_size = length end
Posts to the given URL with the request entity. The request entity is specified by either a string, or a list of key-value pairs represented by a hash or an array of arrays.
Examples:
agent.post('http://example.com/', "foo" => "bar")
agent.post('http://example.com/', [ ["foo", "bar"] ])
agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
# File lib/mechanize.rb, line 334 def post(url, query={}, headers={}) if query.is_a?(String) return request_with_entity(:post, url, query, :headers => headers) end node = {} # Create a fake form class << node def search(*args); []; end end node['method'] = 'POST' node['enctype'] = 'application/x-www-form-urlencoded' form = Form.new(node) query.each { |k,v| if v.is_a?(IO) form.enctype = 'multipart/form-data' ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path)) ul.file_data = v.read form.file_uploads << ul else form.fields << Form::Field.new({'name' => k.to_s},v) end } post_form(url, form, headers) end
(Not documented)
# File lib/mechanize.rb, line 191 def post_connect_hooks @post_connect_hook.hooks end
(Not documented)
# File lib/mechanize.rb, line 187 def pre_connect_hooks @pre_connect_hook.hooks end
PUT to url with entity, and setting options:
put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})
# File lib/mechanize.rb, line 271 def put(url, entity, options = {}) request_with_entity(:put, url, entity, options) end
(Not documented)
# File lib/mechanize.rb, line 382 def request_with_entity(verb, url, entity, options={}) cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'}) options = { :uri => url, :referer => cur_page, :headers => {}, }.update(options) headers = { 'Content-Type' => 'application/octet-stream', 'Content-Length' => entity.size.to_s, }.update(options[:headers]) options.update({ :verb => verb, :params => [entity], :headers => headers, }) page = fetch_page(options) add_to_history(page) page end
Sets the proxy address, port, user, and password addr should be a host, with no “http://“
# File lib/mechanize.rb, line 197 def set_proxy(addr, port, user = nil, pass = nil) @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass end
Submit a form with an optional button. Without a button:
page = agent.get('http://example.com')
agent.submit(page.forms.first)
With a button
agent.submit(page.forms.first, page.forms.first.buttons.first)
# File lib/mechanize.rb, line 366 def submit(form, button=nil, headers={}) form.add_button_to_query(button) if button case form.method.upcase when 'POST' post_form(form.action, form, headers) when 'GET' get( :url => form.action.gsub(/\?[^\?]*$/, ''), :params => form.build_query, :headers => headers, :referer => form.page ) else raise "unsupported method: #{form.method.upcase}" end end
Runs given block, then resets the page history as it was before. self is given as a parameter to the block. Returns the value of the block.
# File lib/mechanize.rb, line 427 def transact history_backup = @history.dup begin yield self ensure @history = history_backup end end
Set the user agent for the Mechanize object. See AGENT_ALIASES
# File lib/mechanize.rb, line 203 def user_agent_alias=(al) self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias") end
(Not documented)
# File lib/mechanize.rb, line 638 def add_to_history(page) @history.push(page, resolve(page.uri)) history_added.call(page) if history_added end
uri is an absolute URI
# File lib/mechanize.rb, line 470 def fetch_page(params) options = { :request => nil, :response => nil, :connection => nil, :referer => current_page(), :uri => nil, :verb => :get, :agent => self, :redirects => 0, :params => [], :headers => {}, }.merge(params) before_connect = Chain.new([ Chain::URIResolver.new(@scheme_handlers), Chain::ParameterResolver.new, Chain::RequestResolver.new, Chain::ConnectionResolver.new( @connection_cache, @keep_alive, @proxy_addr, @proxy_port, @proxy_user, @proxy_pass ), Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass), Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest), Chain::HeaderResolver.new( @keep_alive, @keep_alive_time, @cookie_jar, @user_agent, @gzip_enabled, @request_headers ), Chain::CustomHeaders.new, @pre_connect_hook, ]) before_connect.handle(options) uri = options[:uri] request = options[:request] cur_page = options[:referer] request_data = options[:params] redirects = options[:redirects] http_obj = options[:connection] # Add If-Modified-Since if page is in history if( (page = visited_page(uri)) && page.response['Last-Modified'] ) request['If-Modified-Since'] = page.response['Last-Modified'] end if(@conditional_requests) http_obj.mu_lock # Specify timeouts if given http_obj.open_timeout = @open_timeout if @open_timeout http_obj.read_timeout = @read_timeout if @read_timeout http_obj.start unless http_obj.started? # Log specified headers for the request log.info("#{ request.class }: #{ request.path }") if log request.each_header do |k, v| log.debug("request-header: #{ k } => #{ v }") end if log # Send the request attempts = 0 begin response = http_obj.request(request, *request_data) { |r| connection_chain = Chain.new([ Chain::ResponseReader.new(r), Chain::BodyDecodingHandler.new, ]) connection_chain.handle(options) } rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x log.error("Rescuing EOF error") if log http_obj.finish raise x if attempts >= 2 request.body = nil http_obj.start attempts += 1 retry end after_connect = Chain.new([ @post_connect_hook, Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set), Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache), ]) after_connect.handle(options) http_obj.mu_unlock res_klass = options[:res_klass] response_body = options[:response_body] page = options[:page] log.info("status: #{ page.code }") if log if follow_meta_refresh redirect_uri = nil referer = page if (page.respond_to?(:meta) && (redirect = page.meta.first)) redirect_uri = redirect.uri.to_s sleep redirect.node['delay'].to_f referer = Page.new(nil, {'content-type'=>'text/html'}) elsif refresh = response['refresh'] delay, redirect_uri = Page::Meta.parse(refresh, uri) raise StandardError, "Invalid refresh http header" unless delay if redirects + 1 > redirection_limit raise RedirectLimitReachedError.new(page, redirects) end sleep delay.to_f end if redirect_uri @history.push(page, page.uri) return fetch_page( :uri => redirect_uri, :referer => referer, :params => [], :verb => :get, :redirects => redirects + 1 ) end end return page if res_klass <= Net::HTTPSuccess if res_klass == Net::HTTPNotModified log.debug("Got cached page") if log return visited_page(uri) || page elsif res_klass <= Net::HTTPRedirection return page unless follow_redirect? log.info("follow redirect to: #{ response['Location'] }") if log from_uri = page.uri raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit redirect_verb = options[:verb] == :head ? :head : :get page = fetch_page( :uri => response['Location'].to_s, :referer => page, :params => [], :verb => redirect_verb, :redirects => redirects + 1 ) @history.push(page, from_uri) return page elsif res_klass <= Net::HTTPUnauthorized raise ResponseCodeError.new(page) unless @user || @password raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host) if response['www-authenticate'] =~ /Digest/i @auth_hash[uri.host] = :digest if response['server'] =~ /Microsoft-IIS/ @auth_hash[uri.host] = :iis_digest end @digest = response['www-authenticate'] else @auth_hash[uri.host] = :basic end return fetch_page( :uri => uri, :referer => cur_page, :verb => request.method.downcase.to_sym, :params => request_data, :headers => options[:headers] ) end raise ResponseCodeError.new(page), "Unhandled response", caller end
(Not documented)
# File lib/mechanize.rb, line 448 def post_form(url, form, headers = {}) cur_page = form.page || current_page || Page.new( nil, {'content-type'=>'text/html'}) request_data = form.request_data log.debug("query: #{ request_data.inspect }") if log # fetch the page page = fetch_page( :uri => url, :referer => cur_page, :verb => :post, :params => [request_data], :headers => { 'Content-Type' => form.enctype, 'Content-Length' => request_data.size.to_s, }.merge(headers)) add_to_history(page) page end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.