<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul, pre { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[608] trunk/alexandria: new provider worldcat.org</title>
</head>
<body>

<div id="msg">
<dl>
<dt>Revision</dt> <dd>608</dd>
<dt>Author</dt> <dd>laurusnobilis</dd>
<dt>Date</dt> <dd>2007-02-19 04:52:35 -0500 (Mon, 19 Feb 2007)</dd>
</dl>

<h3>Log Message</h3>
<pre>new provider worldcat.org</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkalexandrialibalexandriabook_providersrb">trunk/alexandria/lib/alexandria/book_providers.rb</a></li>
<li><a href="#trunkalexandriateststest_providersrb">trunk/alexandria/tests/test_providers.rb</a></li>
</ul>

<h3>Added Paths</h3>
<ul>
<li><a href="#trunkalexandrialibalexandriabook_providersworldcatrb">trunk/alexandria/lib/alexandria/book_providers/worldcat.rb</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkalexandrialibalexandriabook_providersworldcatrb"></a>
<div class="addfile"><h4>Added: trunk/alexandria/lib/alexandria/book_providers/worldcat.rb (0 => 608)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/worldcat.rb                                (rev 0)
+++ trunk/alexandria/lib/alexandria/book_providers/worldcat.rb        2007-02-19 09:52:35 UTC (rev 608)
</span><span class="lines">@@ -0,0 +1,153 @@
</span><ins>+# Copyright (C) 2007 Marco Costantini
+# based on ibs_it.rb by Claudio Belotti
+#
+# Alexandria is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# Alexandria is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with Alexandria; see the file COPYING.  If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# http://en.wikipedia.org/wiki/WorldCat
+# See http://www.oclc.org/worldcat/policies/terms/
+
+require 'fileutils'
+require 'net/http'
+require 'open-uri'
+#require 'cgi'
+
+module Alexandria
+class BookProviders
+    class WorldcatProvider &lt; GenericProvider
+        BASE_URI = &quot;http://worldcat.org&quot;
+        CACHE_DIR = File.join(Alexandria::Library::DIR, '.worldcat_cache')
+        REFERER = BASE_URI
+        def initialize
+            super(&quot;Worldcat&quot;, &quot;Worldcat&quot;)
+            FileUtils.mkdir_p(CACHE_DIR) unless File.exists?(CACHE_DIR)            
+            # no preferences for the moment
+            at_exit { clean_cache }
+        end
+        
+        def search(criterion, type)
+            req = BASE_URI + &quot;/&quot;
+            req += case type
+                when SEARCH_BY_ISBN
+                    &quot;isbn/&quot;
+
+                when SEARCH_BY_TITLE
+                    &quot;search?q=ti%3A&quot;
+
+                when SEARCH_BY_AUTHORS
+                    &quot;search?q=au%3A&quot;
+
+                when SEARCH_BY_KEYWORD
+                    &quot;search?q=&quot;
+
+                else
+                    raise InvalidSearchTypeError
+
+            end
+            
+            # this provider supports both isbn-10 and isbn-13
+            req += CGI.escape(criterion)
+            p req if $DEBUG
+                data = transport.get(URI.parse(req))
+            if type == SEARCH_BY_ISBN
+                to_book(data) #rescue raise NoResultsError
+            else
+                begin
+                    results = [] 
+                    each_book_page(data) do |code, title|
+                        results &lt;&lt; to_book(transport.get(URI.parse(BASE_URI + &quot;/oclc/&quot; + code)))
+                    end
+                    return results 
+                rescue
+                    raise NoResultsError
+                end
+            end
+        end
+
+        def url(book)
+            return nil unless book.isbn
+            BASE_URI + &quot;/isbn/&quot; + book.isbn
+        end
+
+        #######
+        private
+        #######
+    
+        def to_book(data)
+
+            raise unless md = /&lt;h1 class=&quot;title&quot;&gt; (&lt;div class=vernacular lang=&quot;[^&quot;]+&quot;&gt;)?([^&lt;]+)/.match(data)
+            title = CGI.unescape(md[2].strip)
+
+            authors = []
+            if md = /title=&quot;Search for more by this author&quot;&gt;([^&lt;]+)/.match(data)
+                 authors = [CGI.unescape(md[1].strip)]
+#                 md[1].strip.split(', ').each { |a| authors &lt;&lt; CGI.unescape(a.strip) }
+            end
+
+            raise unless md = /&lt;strong&gt;ISBN: &lt;\/strong&gt;\w+\W+(\d+)\D/.match(data)
+            isbn = md[1].strip
+
+# The provider returns
+# City : Publisher[ ; City2 : Publisher2], *year? [&amp;copy;year]
+# currently the match is not good in case of City2 : Publisher2 and in case of &amp;copy;year
+
+            if md = /&lt;li class=&quot;publisher&quot;&gt;&lt;strong&gt;Publisher: &lt;\/strong&gt;[^:&lt;]+ : ([^&lt;]+), [^,&lt;]*(\d\d\d\d).?&lt;\/li&gt;/.match(data)
+                publisher = CGI.unescape(md[1].strip)
+                publish_year = CGI.unescape(md[2].strip)[-4 .. -1].to_i
+                publish_year = nil if publish_year == 0
+            else
+                publisher = nil
+                publish_year = nil
+             end
+
+             edition = nil
+
+
+  if md = /&lt;td class=&quot;illustration&quot;&gt;&lt;img src=&quot;([^&quot;]+)/.match(data)
+            cover_url = BASE_URI + md[1].strip
+            cover_filename = isbn + &quot;.tmp&quot;
+            Dir.chdir(CACHE_DIR) do
+                File.open(cover_filename, &quot;w&quot;) do |file|
+                    file.write open(cover_url, &quot;Referer&quot; =&gt; REFERER ).read
+                end                    
+            end
+
+            medium_cover = CACHE_DIR + &quot;/&quot; + cover_filename
+            if File.size(medium_cover) &gt; 0
+                puts medium_cover + &quot; has non-0 size&quot; if $DEBUG
+                return [ Book.new(title, authors, isbn, publisher, publish_year, edition),medium_cover ]
+            end
+            puts medium_cover + &quot; has 0 size, removing ...&quot; if $DEBUG
+            File.delete(medium_cover)
+  end
+            return [ Book.new(title, authors, isbn, publisher, publish_year, edition) ]
+        end
+
+        def each_book_page(data)
+            raise if data.scan(/&lt;div class=&quot;name&quot;&gt;&lt;a href=&quot;\/oclc\/(\d+)&amp;/) { |a| yield a}.empty?
+        end
+    
+        def clean_cache
+            #FIXME begin ... rescue ... end?
+            Dir.chdir(CACHE_DIR) do
+                Dir.glob(&quot;*.tmp&quot;) do |file|
+                    puts &quot;removing &quot; + file if $DEBUG
+                    File.delete(file)    
+                end
+            end
+        end
+    end
+end
+end
</ins></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers.rb (607 => 608)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-18 15:07:36 UTC (rev 607)
+++ trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-19 09:52:35 UTC (rev 608)
</span><span class="lines">@@ -239,6 +239,7 @@
</span><span class="cx">         require 'alexandria/book_providers/ls'
</span><span class="cx">         require 'alexandria/book_providers/bol_it'
</span><span class="cx">         require 'alexandria/book_providers/webster_it'
</span><ins>+        require 'alexandria/book_providers/worldcat'
</ins><span class="cx"> 
</span><span class="cx">         # Ruby/Amazon is optional
</span><span class="cx">         begin
</span></span></pre></div>
<a id="trunkalexandriateststest_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/tests/test_providers.rb (607 => 608)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/tests/test_providers.rb        2007-02-18 15:07:36 UTC (rev 607)
+++ trunk/alexandria/tests/test_providers.rb        2007-02-19 09:52:35 UTC (rev 608)
</span><span class="lines">@@ -121,4 +121,12 @@
</span><span class="cx">                         '9782894723388') 
</span><span class="cx">     end
</span><span class="cx"> 
</span><ins>+    def test_worldcat
+        __test_provider(Alexandria::BookProviders::WorldcatProvider,
+                        '9780521247108') 
+        # this one is with &lt;div class=vernacular lang=&quot;[^&quot;]+&quot;&gt;)
+        __test_provider(Alexandria::BookProviders::WorldcatProvider,
+                        '9785941454136') 
+    end
+
</ins><span class="cx"> end
</span></span></pre>
</div>
</div>

</body>
</html>