#!/usr/bin/ruby ################################################################### # # Author : Tim Hobbs # Date : 11/12/2004 # Site : http://www.pahanna.com # Email : uniquely_tim@yahoo.com # # Version 1.01 on 11/24/2004 # # Change Log : # # 11/24/2004 Added the config file for setup # 11/24/2004 Wrapped in a class # # Timothy's Goods and Services License # Copyright 2004 Timothy Hobbs. All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, # are permitted provided that the following conditions are met: # # 1.. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # --------------------------- DISCLAIMER ------------------------- # # THIS SOFTWARE IS PROVIDED BY TIMOTHY HOBBS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL # TIMOTHY HOBBS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; # OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # The views and conclusions contained in the software and documentation # are those of the author(s) and should not be interpreted as representing # official policies, either expressed or implied, of anyone represented by the # author(s) in an employment or 'work for hire' capacity. # ####################################################################### ##################### # Usage Information ##################### # Requires a config file like : # # # # # # # # # # # # # source # calendar # ruby # time # xml # search # txmpc # javascript # gumstix # # # Call Like "web_indexer.rb path-to-config-file" ###################### # REQUIRED MODULES ###################### require 'rexml/document' include REXML require 'net/ftp' class WEBIndexer ################################################## # CLASS LEVEL DEFINITIONS [ LOAD FROM XML FILE ] ################################################## #--- must have at least the config file name if ARGV[0].nil? puts "Requires first [ and only ] argument to be the path to the config file" exit end if !File.exists?(ARGV[0]) puts "Cannot find " + ARGV[0] exit end #---- processing the xml input xml = Document.new(File.open(ARGV[0],"r")) #---- user authentication on your ftp site list = xml.elements.to_a( "/config/ftp" ) @@ftp_site = list[0].attributes['site'] @@ftp_user = list[0].attributes['user'] @@ftp_password = list[0].attributes['password'] #---- if "no", then file will not be uploaded to the ftp, anything else, it will @@use_ftp = list[0].attributes['use'] #---- local and remote file paths list = xml.elements.to_a("/config/directories") #---- the base of your local web files @@web_local = list[0].attributes['local'] #---- the base of your web site @@web_remote = list[0].attributes['remote'] #---- relevant files for search and output list = xml.elements.to_a("/config/files") #---- index output file name @@index_file = list[0].attributes['index_file'] #---- index output path from web root @@index_path = list[0].attributes['index_path'] #---- full path calculations @@index_local_path = @@web_local + @@index_path + "/" + @@index_file @@index_remote_path = @@web_remote + @@index_path + "/" + @@index_file #---- file types to be searched [ specifically excluded ] @@excluded_files = list[0].attributes['exclude'] #---- add the ruby subdirectory search string @@root = @@web_local + "/**" #---- you can define a few test keywords here [ generally it will be empty ] @@keywords = %w[ ] ; index = 0 #---- the keys to search XPath.each(xml, "/config/keys/key") do |element| @@keywords.push(element.text) end ######################## # PROGRAM MAIN LOOP ######################## #---- Main Loop [ Recurses through all directories from @@web_local ] def main #---- Create an output XML file @@doc = Document.new("") #---- get a list of the files and subdirectories on the root directory only a = Array.new(Dir[@@root]) #loop through the list, ignore the files and add all new directories found a.each do |x| #---- if a directory... if(test(?d,x)) #---- use this new dir find to add to the original list of directories @@root = x + "/**" #---- adds the new directories to the array b = Dir[@@root] b.each do |x| if(test(?d,x)) a.push(x) #appends to the end of the array... end end end end #---- add the original root directory a.push(@@web_local) #---- delete the files from the array... a.each do |x| #---- if it is a file... if(!test(?d,x)) a.delete(x) #---- it is a directory... else #---- search files and build the XML output fx(x + "/**") end end #---- Save the XML file output saveTo = File.new(@@index_local_path, "w") @@doc.write saveTo saveTo.close #---- FTP the file to the site send_file(@@index_local_path,@@index_remote_path) end ######################### # FIND FILES TO SEARCH ######################### #---- adds found keys and links to the output XML def fx(d) a = Array.new(Dir[d]) a.sort #---- loop through and filter the file list a.each do |x| #---- adjust the filters to the files you want to see... if(!test(?d,x)) # only web files if x !~ Regexp.new(@@excluded_files) #---- search the file and print results t = sx(x) if t == true puts @@web_remote + x.sub(@@web_local,'') end end end end end ############## # FTP A FILE ############## def send_file(local,remote) if @@use_ftp == "no" return end #---- send file by FTP to the web site begin ftp = Net::FTP::new(@@ftp_site) ftp.login(@@ftp_user,@@ftp_password) #---- to get past the firewall... ftp.passive = true ftp.puttextfile(local,remote) rescue Net::FTPError $stderr.print "FTP failed: " + $! ensure ftp.close() if ftp end end ################################## # ADD TEXT 'KEYS' TO XML OUTPUT ################################## #---- Searches a file [ f ] for specific text elements in an array def sx(fn) f = File.open(fn, "r") found = false keylist = '' el = Element.new "location" keys = Element.new "keys" f.each do |line| @@keywords.each do |text| if line =~ Regexp.new(text) #--- process only keys we have not seen before if keylist !~ Regexp.new(text) #---- add the new key to the list of processed keys keylist += "|" + text #---- add XML for the new key key = Element.new "key" key.attributes["desc"] = text #---- add key elements to list of keys rt = keys rt << key found = true end end end end #---- if we found some keys, then add this group if found == true #---- add keys to 'location' rt = el rt << keys el.attributes["url"] = fn.sub(@@web_local,'') el.attributes["desc"] = fn.sub(@@web_local,'') # ---- add all the root rt = @@doc.root rt << el end return found end end #---- make the class and call main i = WEBIndexer.new i.main