#!/usr/bin/ruby
###################################################################
#
# Author : Tim Hobbs
# Date : 11/12/2004
# Site : http://www.pahanna.com
# Email : uniquely_tim@yahoo.com
#
# Version 1.01 on 11/24/2004
#
# Change Log :
#
# 11/24/2004 Added the config file for setup
# 11/24/2004 Wrapped in a class
#
# Timothy's Goods and Services License
# Copyright 2004 Timothy Hobbs. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# 1.. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# --------------------------- DISCLAIMER -------------------------
#
# THIS SOFTWARE IS PROVIDED BY TIMOTHY HOBBS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
# TIMOTHY HOBBS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation
# are those of the author(s) and should not be interpreted as representing
# official policies, either expressed or implied, of anyone represented by the
# author(s) in an employment or 'work for hire' capacity.
#
#######################################################################
#####################
# Usage Information
#####################
# Requires a config file like :
#
#
#
#
#
#
#
#
#
#
#
#
# source
# calendar
# ruby
# time
# xml
# search
# txmpc
# javascript
# gumstix
#
#
# Call Like "web_indexer.rb path-to-config-file"
######################
# REQUIRED MODULES
######################
require 'rexml/document'
include REXML
require 'net/ftp'
class WEBIndexer
##################################################
# CLASS LEVEL DEFINITIONS [ LOAD FROM XML FILE ]
##################################################
#--- must have at least the config file name
if ARGV[0].nil?
puts "Requires first [ and only ] argument to be the path to the config file"
exit
end
if !File.exists?(ARGV[0])
puts "Cannot find " + ARGV[0]
exit
end
#---- processing the xml input
xml = Document.new(File.open(ARGV[0],"r"))
#---- user authentication on your ftp site
list = xml.elements.to_a( "/config/ftp" )
@@ftp_site = list[0].attributes['site']
@@ftp_user = list[0].attributes['user']
@@ftp_password = list[0].attributes['password']
#---- if "no", then file will not be uploaded to the ftp, anything else, it will
@@use_ftp = list[0].attributes['use']
#---- local and remote file paths
list = xml.elements.to_a("/config/directories")
#---- the base of your local web files
@@web_local = list[0].attributes['local']
#---- the base of your web site
@@web_remote = list[0].attributes['remote']
#---- relevant files for search and output
list = xml.elements.to_a("/config/files")
#---- index output file name
@@index_file = list[0].attributes['index_file']
#---- index output path from web root
@@index_path = list[0].attributes['index_path']
#---- full path calculations
@@index_local_path = @@web_local + @@index_path + "/" + @@index_file
@@index_remote_path = @@web_remote + @@index_path + "/" + @@index_file
#---- file types to be searched [ specifically excluded ]
@@excluded_files = list[0].attributes['exclude']
#---- add the ruby subdirectory search string
@@root = @@web_local + "/**"
#---- you can define a few test keywords here [ generally it will be empty ]
@@keywords = %w[ ] ; index = 0
#---- the keys to search
XPath.each(xml, "/config/keys/key") do |element|
@@keywords.push(element.text)
end
########################
# PROGRAM MAIN LOOP
########################
#---- Main Loop [ Recurses through all directories from @@web_local ]
def main
#---- Create an output XML file
@@doc = Document.new("")
#---- get a list of the files and subdirectories on the root directory only
a = Array.new(Dir[@@root])
#loop through the list, ignore the files and add all new directories found
a.each do |x|
#---- if a directory...
if(test(?d,x))
#---- use this new dir find to add to the original list of directories
@@root = x + "/**"
#---- adds the new directories to the array
b = Dir[@@root]
b.each do |x|
if(test(?d,x))
a.push(x) #appends to the end of the array...
end
end
end
end
#---- add the original root directory
a.push(@@web_local)
#---- delete the files from the array...
a.each do |x|
#---- if it is a file...
if(!test(?d,x))
a.delete(x)
#---- it is a directory...
else
#---- search files and build the XML output
fx(x + "/**")
end
end
#---- Save the XML file output
saveTo = File.new(@@index_local_path, "w")
@@doc.write saveTo
saveTo.close
#---- FTP the file to the site
send_file(@@index_local_path,@@index_remote_path)
end
#########################
# FIND FILES TO SEARCH
#########################
#---- adds found keys and links to the output XML
def fx(d)
a = Array.new(Dir[d])
a.sort
#---- loop through and filter the file list
a.each do |x|
#---- adjust the filters to the files you want to see...
if(!test(?d,x))
# only web files
if x !~ Regexp.new(@@excluded_files)
#---- search the file and print results
t = sx(x)
if t == true
puts @@web_remote + x.sub(@@web_local,'')
end
end
end
end
end
##############
# FTP A FILE
##############
def send_file(local,remote)
if @@use_ftp == "no"
return
end
#---- send file by FTP to the web site
begin
ftp = Net::FTP::new(@@ftp_site)
ftp.login(@@ftp_user,@@ftp_password)
#---- to get past the firewall...
ftp.passive = true
ftp.puttextfile(local,remote)
rescue Net::FTPError
$stderr.print "FTP failed: " + $!
ensure
ftp.close() if ftp
end
end
##################################
# ADD TEXT 'KEYS' TO XML OUTPUT
##################################
#---- Searches a file [ f ] for specific text elements in an array
def sx(fn)
f = File.open(fn, "r")
found = false
keylist = ''
el = Element.new "location"
keys = Element.new "keys"
f.each do |line|
@@keywords.each do |text|
if line =~ Regexp.new(text)
#--- process only keys we have not seen before
if keylist !~ Regexp.new(text)
#---- add the new key to the list of processed keys
keylist += "|" + text
#---- add XML for the new key
key = Element.new "key"
key.attributes["desc"] = text
#---- add key elements to list of keys
rt = keys
rt << key
found = true
end
end
end
end
#---- if we found some keys, then add this group
if found == true
#---- add keys to 'location'
rt = el
rt << keys
el.attributes["url"] = fn.sub(@@web_local,'')
el.attributes["desc"] = fn.sub(@@web_local,'')
# ---- add all the root
rt = @@doc.root
rt << el
end
return found
end
end
#---- make the class and call main
i = WEBIndexer.new
i.main