module Scrubyt
  ##
  #=<tt>Fetching pages (and related functionality)</tt>
  #
  #Since lot of things are happening during (and before)
  #the fetching of a document, I decided to move out fetching related
  #functionality to a separate class - so if you are looking for anything
  #which is loading a document (even by submitting a form or clicking a link)
  #and related things like setting a proxy etc. you should find it here.
  module FetchAction

    @@current_doc_url = nil
    @@current_doc_protocol = nil
    @@base_dir = nil
    @@host_name = nil
    @@agent = WWW::Mechanize.new
    @@history = []

    ##
    #Action to fetch a document (either a file or a http address)
    #
    #*parameters*
    #
    #_doc_url_ - the url or file name to fetch
    def self.fetch(doc_url, *args)
      #Refactor this crap!!! with option_accessor stuff

      if args.size > 0
        proxy = args[0][:proxy]
        mechanize_doc = args[0][:mechanize_doc]
        resolve = args[0][:resolve]
        basic_auth = args[0][:basic_auth]
        user_agent = args[0][:user_agent] || "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.4) Gecko/20061201 Firefox/2.0.0.4 (Ubuntu-feisty)"
        # timeouts (sorry for not refactor 'the crap')
        open_timeout = args[0][:open_timeout]
        read_timeout = args[0][:read_timeout]
        set_timeouts(open_timeout, basic_auth) if open_timeout || read_timeout
        #Refactor this whole stuff as well!!! It looks awful...
        parse_and_set_proxy(proxy) if proxy
        set_user_agent(user_agent)
        parse_and_set_basic_auth(basic_auth) if basic_auth
      else
        mechanize_doc = nil
        resolve = :full
      end

      @@current_doc_url = doc_url
      @@current_doc_protocol = determine_protocol

      if mechanize_doc.nil? && @@current_doc_protocol != 'file'
        handle_relative_path(doc_url)
        handle_relative_url(doc_url, resolve)

        Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"

        unless 'file' == @@current_doc_protocol
          @@mechanize_doc = @@agent.get(@@current_doc_url)
        end
      else
        @@mechanize_doc = mechanize_doc
      end

      if @@current_doc_protocol == 'file'
        @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(open(@@current_doc_url).read))
      else
        @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc.body))
        store_host_name(self.get_current_doc_url)   # in case we're on a new host
      end
    end

    ##
    #Timeout
    def self.set_timeouts(open, read)
      @@agent.set_timeouts(open, read)
    end

    ##
    #Submit the last form;
    def self.submit(current_form, button=nil, type=nil)
      Scrubyt.log :ACTION, 'Submitting form...'
      if button == nil
        result_page = @@agent.submit(current_form)
      elsif type
        result_page = current_form.submit(button)
      else
        result_page = @@agent.submit(current_form, button)
      end
      @@current_doc_url = result_page.uri.to_s
      Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
      fetch(@@current_doc_url, :mechanize_doc => result_page)
    end

    ##
    #Click the link specified by the text
    def self.click_link(link_spec,index = 0)
      Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
      if link_spec.is_a? Hash
        clicked_elem = CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index)
      else
        clicked_elem = SimpleExampleLookup.find_node_from_text(@@hpricot_doc, link_spec, false, index)
      end
      clicked_elem = XPathUtils.find_nearest_node_with_attribute(clicked_elem, 'href')
      result_page = @@agent.click(clicked_elem)
      @@current_doc_url = result_page.uri.to_s
      Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
      fetch(@@current_doc_url, :mechanize_doc => result_page)
    end

    def self.click_image_map(index = 0)
      Scrubyt.log :ACTION, "Clicking image map at index: %p" % index
      uri = @@mechanize_doc.search("//area")[index]['href']
      result_page = @@agent.get(uri)
      @@current_doc_url = result_page.uri.to_s
      Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
      fetch(@@current_doc_url, :mechanize_doc => result_page)
    end

    ##
    # At any given point, the current document can be queried with this method; Typically used
    # when the navigation is over and the result document is passed to the wrapper
    def self.get_current_doc_url
      @@current_doc_url
    end

    def self.get_mechanize_doc
      @@mechanize_doc
    end

    def self.get_hpricot_doc
      @@hpricot_doc
    end

    def get_host_name
      @@host_name
    end

    def restore_host_name
      return if @@current_doc_protocol == 'file'
      @@host_name = @@original_host_name
    end

    def store_page
      @@history.push @@hpricot_doc
    end

    def restore_page
      @@hpricot_doc = @@history.pop
    end

    def store_host_name(doc_url)
      FetchAction.store_host_name(doc_url)
    end

    def self.store_host_name(doc_url)
      @@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'http'
      @@host_name = 'https://' + @@mechanize_doc.uri.to_s.scan(/https:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'https'
      @@host_name = doc_url if @@host_name == nil
      @@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
      @@original_host_name ||= @@host_name
    end #end of method store_host_name

    def self.determine_protocol
      old_protocol = @@current_doc_protocol
      new_protocol = case @@current_doc_url
        when /^https/
          'https'
        when /^http/
          'http'
        when /^www/
          'http'
        else
          'file'
        end
      return 'http' if ((old_protocol == 'http') && new_protocol == 'file')
      return 'https' if ((old_protocol == 'https') && new_protocol == 'file')
      new_protocol
    end

    def self.parse_and_set_proxy(proxy)
      if proxy.downcase == 'localhost'
        @@host = 'localhost'
        @@port = proxy.split(':').last
      else
        parts = proxy.split(':')
        @@port = parts.delete_at(-1)
        @@host = parts.join(':')
        if (@@host == nil || @@port == nil)# !@@host =~ /^http/)
          Scrubyt.log :ERROR, "Invalid proxy specification! Neither host nor port can be nil!"
          exit
        end
      end
      Scrubyt.log :ACTION, "Setting proxy: host=<#{@@host}>, port=<#{@@port}>"
      @@agent.set_proxy(@@host, @@port)
    end

    def self.parse_and_set_basic_auth(basic_auth)
      login, pass = basic_auth.split('@')
      Scrubyt.log :ACTION, "Basic authentication: login=<#{login}>, pass=<#{pass}>"
      @@agent.basic_auth(login, pass)
    end

    def self.set_user_agent(user_agent)
      Scrubyt.log :ACTION, "Setting user-agent to #{user_agent}"
      @@agent.user_agent = user_agent
    end

    def self.handle_relative_path(doc_url)
      if @@base_dir == nil
        @@base_dir = doc_url.scan(/.+\//)[0] if @@current_doc_protocol == 'file'
      else
        @@current_doc_url = ((@@base_dir + doc_url) if doc_url !~ /#{@@base_dir}/)
      end
    end

    def self.handle_relative_url(doc_url, resolve)
      return if doc_url =~ /^http/
      if doc_url !~ /^\//
        first_char = doc_url[0..0]
        doc_url = ( first_char == '?'  ? '' : '/'  ) + doc_url
        if first_char == '?' #This is an ugly hack... really have to throw this shit out and go with mechanize's
          current_uri = @@mechanize_doc.uri.to_s
          current_uri = @@agent.history.first.uri.to_s if current_uri =~ /\/popup\//
          if (current_uri.include? '?')
            current_uri = current_uri.scan(/.+\//)[0]
          else
            current_uri += '/' unless current_uri[-1..-1] == '/'
          end
          @@current_doc_url = current_uri + doc_url
          return
        end
      end
      case resolve
        when :full
          @@current_doc_url = (@@host_name + doc_url) if ( @@host_name != nil && (doc_url !~ /#{@@host_name}/))
          @@current_doc_url = @@current_doc_url.split('/').uniq.join('/')
        when :host
          base_host_name = (@@host_name.count("/") == 2 ? @@host_name : @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0])
          @@current_doc_url = base_host_name + doc_url
        else
          #custom resilving
          @@current_doc_url = resolve + doc_url
      end
    end
  end
end
