class Mechanize::Page

This class encapsulates an HTML page. If Mechanize finds a content type of 'text/html', this class will be instantiated and returned.

Example:

require 'mechanize'

agent = Mechanize.new
agent.get('http://google.com/').class # => Mechanize::Page

Constants

DEFAULT_RESPONSE

Attributes

encodings[R]

Possible encodings for this page based on HTTP headers and meta elements

mech[RW]

Public Class Methods

charset(content_type) click to toggle source
# File lib/mechanize/page.rb, line 432
def charset content_type
  charset = content_type[/;(?:\s*,)?\s*charset\s*=\s*([^()<>@,;:\\"\/\[\]?={}\s]+)/, 1]
  return nil if charset == 'none'
  charset
end
Also aliased as: charset_from_content_type
charset_from_content_type(content_type)
Alias for: charset
meta_charset(body) click to toggle source

Retrieves all charsets from meta tags in body

# File lib/mechanize/page.rb, line 454
def self.meta_charset body
  # HACK use .map
  body.scan(/<meta .*?>/).map do |meta|
    if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/ then
      $2
    elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/ then
      meta =~ /content\s*=\s*(["'])?(.*?)\1/

      m_charset = charset $2 if $2

      m_charset if m_charset
    end
  end.compact
end
meta_content_type(body) click to toggle source

Retrieves the last content-type set by a meta tag in body

# File lib/mechanize/page.rb, line 472
def self.meta_content_type body
  body.scan(/<meta .*?>/).reverse.map do |meta|
    if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/ then
      meta =~ /content=(["'])?(.*?)\1/

      return $2
    end
  end

  nil
end
new(uri=nil, response=nil, body=nil, code=nil, mech=nil) click to toggle source
Calls superclass method Mechanize::File.new
# File lib/mechanize/page.rb, line 27
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
  response ||= DEFAULT_RESPONSE

  @meta_content_type = nil
  @encoding = nil
  @encodings = [nil]
  raise 'no' if mech and not Mechanize === mech
  @mech = mech

  reset

  @encodings << Mechanize::Util.detect_charset(body) if body

  @encodings.concat self.class.response_header_charset(response)

  if body
    # Force the encoding to be 8BIT so we can perform regular expressions.
    # We'll set it to the detected encoding later
    body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding

    @encodings.concat self.class.meta_charset body

    meta_content_type = self.class.meta_content_type body
    @meta_content_type = meta_content_type if meta_content_type
  end

  @encodings << mech.default_encoding if mech and mech.default_encoding

  super uri, response, body, code
end
response_header_charset(response) click to toggle source
# File lib/mechanize/page.rb, line 441
def self.response_header_charset response
  charsets = []
  response.each do |header, value|
    next unless header == 'content-type'
    next unless value =~ /charset/
    charsets << charset(value)
  end
  charsets
end

Public Instance Methods

/()
Alias for: search
at() click to toggle source

Search through the page for path under namespace using Nokogiri's at. The path may be either a CSS or XPath expression.

See also Nokogiri::XML::Node#at

# File lib/mechanize/page.rb, line 212
def_delegator :parser, :at, :at
base_with(criteria) click to toggle source

Find a single base tag matching criteria. Example:

page.base_with(:href => /foo/).click
# File lib/mechanize/page.rb, line 271
  
bases() click to toggle source

Return a list of all base tags

# File lib/mechanize/page.rb, line 383
def bases
  @bases ||=
    search('base').map { |node| Base.new(node, @mech, self) }
end
bases_with(criteria) click to toggle source

Find all base tags matching criteria. Example:

page.bases_with(:href => /foo/).each do |base|
  puts base.href
end
# File lib/mechanize/page.rb, line 282
elements_with :base
canonical_uri() click to toggle source

Return the canonical URI for the page if there is a link tag with href=“canonical”.

# File lib/mechanize/page.rb, line 177
def canonical_uri
  link = at('link[@rel="canonical"][@href]')
  return unless link
  href = link['href']

  URI href
rescue URI::InvalidURIError
  URI Mechanize::Util.uri_escape href
end
content_type() click to toggle source

Get the content type

# File lib/mechanize/page.rb, line 188
def content_type
  @meta_content_type || response['content-type']
end
detected_encoding() click to toggle source
# File lib/mechanize/page.rb, line 74
def detected_encoding
  Mechanize::Util.detect_charset(body)
end
encoding() click to toggle source
# File lib/mechanize/page.rb, line 94
def encoding
  parser.respond_to?(:encoding) ? parser.encoding : nil
end
encoding=(encoding) click to toggle source
# File lib/mechanize/page.rb, line 78
def encoding=(encoding)
  reset

  @encoding = encoding

  if @parser
    parser_encoding = @parser.encoding
    if parser_encoding && encoding && parser_encoding.casecmp(encoding) != 0
      # lazy reinitialize the parser with the new encoding
      @parser = nil
    end
  end

  encoding
end
encoding_error?(parser=nil) click to toggle source

Return whether parser result has errors related to encoding or not. false indicates just parser has no encoding errors, not encoding is vaild.

# File lib/mechanize/page.rb, line 100
def encoding_error?(parser=nil)
  parser = self.parser unless parser
  return false if parser.errors.empty?
  parser.errors.any? do |error|
    error.message =~ /(indicate\ encoding)|
                      (Invalid\ char)|
                      (input\ conversion\ failed)/
  end
end
form_with(criteria) click to toggle source

Find a single form matching criteria. Example:

page.form_with(:action => '/post/login.php') do |f|
  ...
end
# File lib/mechanize/page.rb, line 226
  
forms() click to toggle source

Return a list of all form tags

# File lib/mechanize/page.rb, line 362
def forms
  @forms ||= search('form').map do |html_form|
    form = Mechanize::Form.new(html_form, @mech, self)
    form.action ||= @uri.to_s
    form
  end
end
forms_with(criteria) click to toggle source

Find all forms form matching criteria. Example:

page.forms_with(:action => '/post/login.php').each do |f|
  ...
end
# File lib/mechanize/page.rb, line 237
elements_with :form
frame_with(criteria) click to toggle source

Find a single frame tag matching criteria. Example:

page.frame_with(:src => /foo/).click
# File lib/mechanize/page.rb, line 293
  
frames() click to toggle source

Return a list of all frame tags

# File lib/mechanize/page.rb, line 390
def frames
  @frames ||=
    search('frame').map { |node| Frame.new(node, @mech, self) }
end
frames_with(criteria) click to toggle source

Find all frame tags matching criteria. Example:

page.frames_with(:src => /foo/).each do |frame|
  p frame.src
end
# File lib/mechanize/page.rb, line 304
elements_with :frame
iframe_with(criteria) click to toggle source

Find a single iframe tag matching criteria. Example:

page.iframe_with(:src => /foo/).click
# File lib/mechanize/page.rb, line 315
  
iframes() click to toggle source

Return a list of all iframe tags

# File lib/mechanize/page.rb, line 397
def iframes
  @iframes ||=
    search('iframe').map { |node| Frame.new(node, @mech, self) }
end
iframes_with(criteria) click to toggle source

Find all iframe tags matching criteria. Example:

page.iframes_with(:src => /foo/).each do |iframe|
  p iframe.src
end
# File lib/mechanize/page.rb, line 326
elements_with :iframe
image_urls() click to toggle source
# File lib/mechanize/page.rb, line 409
def image_urls
  @image_urls ||= images.map(&:url).uniq
end
image_with(criteria) click to toggle source

Find a single image matching criteria. Example:

page.image_with(:alt => /main/).fetch.save
# File lib/mechanize/page.rb, line 337
  
images() click to toggle source

Return a list of all img tags

# File lib/mechanize/page.rb, line 404
def images
  @images ||=
    search('img').map { |node| Image.new(node, self) }
end
images_with(criteria) click to toggle source

Find all images matching criteria. Example:

page.images_with(:src => /jpg\Z/).each do |img|
  img.fetch.save
end
# File lib/mechanize/page.rb, line 348
elements_with :image
labels() click to toggle source

Return a list of all label tags

# File lib/mechanize/page.rb, line 415
def labels
  @labels ||=
    search('label').map { |node| Label.new(node, self) }
end
labels_hash() click to toggle source
# File lib/mechanize/page.rb, line 420
def labels_hash
  unless @labels_hash
    hash = {}
    labels.each do |label|
      hash[label.node['for']] = label if label.for
    end
    @labels_hash = hash
  end
  return @labels_hash
end
meta_charset() click to toggle source
# File lib/mechanize/page.rb, line 70
def meta_charset
  self.class.meta_charset(body)
end
meta_refresh() click to toggle source

Return a list of all meta refresh elements

# File lib/mechanize/page.rb, line 373
def meta_refresh
  query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'

  @meta_refresh ||= search(query).map do |node|
    MetaRefresh.from_node node, self
  end.compact
end
parser() click to toggle source
# File lib/mechanize/page.rb, line 110
def parser
  return @parser if @parser
  return nil unless @body

  if @encoding then
    @parser = @mech.html_parser.parse html_body, nil, @encoding
  elsif mech.force_default_encoding then
    @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
  else
    @encodings.reverse_each do |encoding|
      @parser = @mech.html_parser.parse html_body, nil, encoding

      break unless encoding_error? @parser
    end
  end

  @parser
end
Also aliased as: root
reset() click to toggle source
# File lib/mechanize/page.rb, line 162
def reset
  @bases = nil
  @forms = nil
  @frames = nil
  @iframes = nil
  @links = nil
  @labels = nil
  @labels_hash = nil
  @meta_refresh = nil
  @parser = nil
  @title = nil
end
response_header_charset() click to toggle source
# File lib/mechanize/page.rb, line 66
def response_header_charset
  self.class.response_header_charset(response)
end
root()
Alias for: parser
title() click to toggle source
# File lib/mechanize/page.rb, line 58
def title
  @title ||=
    if doc = parser
      title = doc.search('title').inner_text
      title.empty? ? nil : title
    end
end