<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul, pre { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[608] trunk/alexandria: new provider worldcat.org</title>
</head>
<body>
<div id="msg">
<dl>
<dt>Revision</dt> <dd>608</dd>
<dt>Author</dt> <dd>laurusnobilis</dd>
<dt>Date</dt> <dd>2007-02-19 04:52:35 -0500 (Mon, 19 Feb 2007)</dd>
</dl>
<h3>Log Message</h3>
<pre>new provider worldcat.org</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkalexandrialibalexandriabook_providersrb">trunk/alexandria/lib/alexandria/book_providers.rb</a></li>
<li><a href="#trunkalexandriateststest_providersrb">trunk/alexandria/tests/test_providers.rb</a></li>
</ul>
<h3>Added Paths</h3>
<ul>
<li><a href="#trunkalexandrialibalexandriabook_providersworldcatrb">trunk/alexandria/lib/alexandria/book_providers/worldcat.rb</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkalexandrialibalexandriabook_providersworldcatrb"></a>
<div class="addfile"><h4>Added: trunk/alexandria/lib/alexandria/book_providers/worldcat.rb (0 => 608)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/worldcat.rb         (rev 0)
+++ trunk/alexandria/lib/alexandria/book_providers/worldcat.rb        2007-02-19 09:52:35 UTC (rev 608)
</span><span class="lines">@@ -0,0 +1,153 @@
</span><ins>+# Copyright (C) 2007 Marco Costantini
+# based on ibs_it.rb by Claudio Belotti
+#
+# Alexandria is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# Alexandria is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with Alexandria; see the file COPYING. If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# http://en.wikipedia.org/wiki/WorldCat
+# See http://www.oclc.org/worldcat/policies/terms/
+
+require 'fileutils'
+require 'net/http'
+require 'open-uri'
+#require 'cgi'
+
+module Alexandria
+class BookProviders
+ class WorldcatProvider < GenericProvider
+ BASE_URI = "http://worldcat.org"
+ CACHE_DIR = File.join(Alexandria::Library::DIR, '.worldcat_cache')
+ REFERER = BASE_URI
+ def initialize
+ super("Worldcat", "Worldcat")
+ FileUtils.mkdir_p(CACHE_DIR) unless File.exists?(CACHE_DIR)
+ # no preferences for the moment
+ at_exit { clean_cache }
+ end
+
+ def search(criterion, type)
+ req = BASE_URI + "/"
+ req += case type
+ when SEARCH_BY_ISBN
+ "isbn/"
+
+ when SEARCH_BY_TITLE
+ "search?q=ti%3A"
+
+ when SEARCH_BY_AUTHORS
+ "search?q=au%3A"
+
+ when SEARCH_BY_KEYWORD
+ "search?q="
+
+ else
+ raise InvalidSearchTypeError
+
+ end
+
+ # this provider supports both isbn-10 and isbn-13
+ req += CGI.escape(criterion)
+ p req if $DEBUG
+         data = transport.get(URI.parse(req))
+ if type == SEARCH_BY_ISBN
+ to_book(data) #rescue raise NoResultsError
+ else
+ begin
+ results = []
+ each_book_page(data) do |code, title|
+ results << to_book(transport.get(URI.parse(BASE_URI + "/oclc/" + code)))
+ end
+ return results
+ rescue
+ raise NoResultsError
+ end
+ end
+ end
+
+ def url(book)
+ return nil unless book.isbn
+ BASE_URI + "/isbn/" + book.isbn
+ end
+
+ #######
+ private
+ #######
+
+ def to_book(data)
+
+ raise unless md = /<h1 class="title"> (<div class=vernacular lang="[^"]+">)?([^<]+)/.match(data)
+ title = CGI.unescape(md[2].strip)
+
+         authors = []
+ if md = /title="Search for more by this author">([^<]+)/.match(data)
+ authors = [CGI.unescape(md[1].strip)]
+# md[1].strip.split(', ').each { |a| authors << CGI.unescape(a.strip) }
+ end
+
+ raise unless md = /<strong>ISBN: <\/strong>\w+\W+(\d+)\D/.match(data)
+ isbn = md[1].strip
+
+# The provider returns
+# City : Publisher[ ; City2 : Publisher2], *year? [&copy;year]
+# currently the match is not good in case of City2 : Publisher2 and in case of &copy;year
+
+ if md = /<li class="publisher"><strong>Publisher: <\/strong>[^:<]+ : ([^<]+), [^,<]*(\d\d\d\d).?<\/li>/.match(data)
+         publisher = CGI.unescape(md[1].strip)
+ publish_year = CGI.unescape(md[2].strip)[-4 .. -1].to_i
+ publish_year = nil if publish_year == 0
+ else
+ publisher = nil
+ publish_year = nil
+ end
+
+ edition = nil
+
+
+ if md = /<td class="illustration"><img src="([^"]+)/.match(data)
+ cover_url = BASE_URI + md[1].strip
+ cover_filename = isbn + ".tmp"
+ Dir.chdir(CACHE_DIR) do
+ File.open(cover_filename, "w") do |file|
+ file.write open(cover_url, "Referer" => REFERER ).read
+ end
+ end
+
+ medium_cover = CACHE_DIR + "/" + cover_filename
+ if File.size(medium_cover) > 0
+ puts medium_cover + " has non-0 size" if $DEBUG
+ return [ Book.new(title, authors, isbn, publisher, publish_year, edition),medium_cover ]
+ end
+ puts medium_cover + " has 0 size, removing ..." if $DEBUG
+ File.delete(medium_cover)
+ end
+ return [ Book.new(title, authors, isbn, publisher, publish_year, edition) ]
+ end
+
+ def each_book_page(data)
+ raise if data.scan(/<div class="name"><a href="\/oclc\/(\d+)&/) { |a| yield a}.empty?
+ end
+
+ def clean_cache
+ #FIXME begin ... rescue ... end?
+ Dir.chdir(CACHE_DIR) do
+ Dir.glob("*.tmp") do |file|
+ puts "removing " + file if $DEBUG
+ File.delete(file)
+ end
+ end
+ end
+ end
+end
+end
</ins></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers.rb (607 => 608)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-18 15:07:36 UTC (rev 607)
+++ trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-19 09:52:35 UTC (rev 608)
</span><span class="lines">@@ -239,6 +239,7 @@
</span><span class="cx"> require 'alexandria/book_providers/ls'
</span><span class="cx"> require 'alexandria/book_providers/bol_it'
</span><span class="cx"> require 'alexandria/book_providers/webster_it'
</span><ins>+ require 'alexandria/book_providers/worldcat'
</ins><span class="cx">
</span><span class="cx"> # Ruby/Amazon is optional
</span><span class="cx"> begin
</span></span></pre></div>
<a id="trunkalexandriateststest_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/tests/test_providers.rb (607 => 608)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/tests/test_providers.rb        2007-02-18 15:07:36 UTC (rev 607)
+++ trunk/alexandria/tests/test_providers.rb        2007-02-19 09:52:35 UTC (rev 608)
</span><span class="lines">@@ -121,4 +121,12 @@
</span><span class="cx"> '9782894723388')
</span><span class="cx"> end
</span><span class="cx">
</span><ins>+ def test_worldcat
+ __test_provider(Alexandria::BookProviders::WorldcatProvider,
+ '9780521247108')
+ # this one is with <div class=vernacular lang="[^"]+">)
+ __test_provider(Alexandria::BookProviders::WorldcatProvider,
+ '9785941454136')
+ end
+
</ins><span class="cx"> end
</span></span></pre>
</div>
</div>
</body>
</html>