<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul, pre { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[605] trunk/alexandria: provider Deastore, working well enough</title>
</head>
<body>

<div id="msg">
<dl>
<dt>Revision</dt> <dd>605</dd>
<dt>Author</dt> <dd>laurusnobilis</dd>
<dt>Date</dt> <dd>2007-02-16 13:54:50 -0500 (Fri, 16 Feb 2007)</dd>
</dl>

<h3>Log Message</h3>
<pre>provider Deastore, working well enough</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkalexandrialibalexandriabook_providersdea_store_itrb">trunk/alexandria/lib/alexandria/book_providers/dea_store_it.rb</a></li>
<li><a href="#trunkalexandriateststest_providersrb">trunk/alexandria/tests/test_providers.rb</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkalexandrialibalexandriabook_providersdea_store_itrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/dea_store_it.rb (604 => 605)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/dea_store_it.rb        2007-02-16 04:06:46 UTC (rev 604)
+++ trunk/alexandria/lib/alexandria/book_providers/dea_store_it.rb        2007-02-16 18:54:50 UTC (rev 605)
</span><span class="lines">@@ -16,17 +16,21 @@
</span><span class="cx"> # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
</span><span class="cx"> # Boston, MA 02111-1307, USA.
</span><span class="cx"> 
</span><ins>+require 'fileutils'
</ins><span class="cx"> require 'net/http'
</span><ins>+require 'mechanize'
</ins><span class="cx"> #require 'cgi'
</span><del>-require 'mechanize'
</del><span class="cx"> 
</span><span class="cx"> module Alexandria
</span><span class="cx"> class BookProviders
</span><span class="cx">     class DeaStore_itProvider &lt; GenericProvider
</span><span class="cx">         BASE_URI = &quot;http://www.deastore.com&quot;
</span><ins>+        CACHE_DIR = File.join(Alexandria::Library::DIR, '.deastore_it_cache')
</ins><span class="cx">         def initialize
</span><span class="cx">             super(&quot;DeaStore_it&quot;, &quot;DeaStore Italia&quot;)
</span><ins>+            FileUtils.mkdir_p(CACHE_DIR) unless File.exists?(CACHE_DIR)            
</ins><span class="cx">             # no preferences for the moment
</span><ins>+            at_exit { clean_cache }
</ins><span class="cx">         end
</span><span class="cx">         
</span><span class="cx">         def search(criterion, type)
</span><span class="lines">@@ -49,21 +53,27 @@
</span><span class="cx"> 
</span><span class="cx">             end
</span><span class="cx">             
</span><ins>+if type == SEARCH_BY_ISBN
+            req += Library.canonicalise_isbn(criterion)
+else
</ins><span class="cx">             req += CGI.escape(criterion)
</span><ins>+end
</ins><span class="cx">             p req if $DEBUG
</span><del>-                        agent = WWW::Mechanize.new
-                        agent.user_agent_alias = 'Mac Safari'
-                #data = transport.get(URI.parse(req))
-                data = agent.get(URI.parse(req))
-                data = agent.get(URI.parse(req)) rescue data = agent.get(URI.parse(req)) #try again
</del><ins>+
+            agent = WWW::Mechanize.new
+            agent.user_agent_alias = 'Mac Safari'
+            #data = transport.get(URI.parse(req))
+            data = agent.get(URI.parse(req)).content
+
</ins><span class="cx">             if type == SEARCH_BY_ISBN
</span><span class="cx">                 to_book(data) #rescue raise NoResultsError
</span><span class="cx">             else
</span><span class="cx">                 begin
</span><span class="cx">                     results = [] 
</span><span class="cx">                     each_book_page(data) do |code, title|
</span><del>-                        #results &lt;&lt; to_book(transport.get(URI.parse(&quot;http://www.internetbookshop.it/ser/serdsp.asp?c=&quot; + code)))
-                        results &lt;&lt; to_book(agent.get(URI.parse(&quot;http://www.internetbookshop.it/ser/serdsp.asp?c=&quot; + code)))
</del><ins>+                        agent = WWW::Mechanize.new
+                        agent.user_agent_alias = 'Mac Safari'
+                        results &lt;&lt; to_book(agent.get(URI.parse(BASE_URI + &quot;/&quot; + code)).content)
</ins><span class="cx">                     end
</span><span class="cx">                     return results 
</span><span class="cx">                 rescue
</span><span class="lines">@@ -74,7 +84,7 @@
</span><span class="cx"> 
</span><span class="cx">         def url(book)
</span><span class="cx">             return nil unless book.isbn
</span><del>-            BASE_URI + &quot;/product.asp?isbn=&quot; + book.isbn
</del><ins>+            BASE_URI + &quot;/product.asp?isbn=&quot; + Library.canonicalise_isbn(book.isbn)
</ins><span class="cx">         end
</span><span class="cx"> 
</span><span class="cx">         #######
</span><span class="lines">@@ -82,42 +92,70 @@
</span><span class="cx">         #######
</span><span class="cx">     
</span><span class="cx">         def to_book(data)
</span><del>-                        data = data.content
-        data = data.convert(&quot;UTF-8&quot;, &quot;windows-1252&quot;)
</del><ins>+            data = data.convert(&quot;UTF-8&quot;, &quot;windows-1252&quot;)
</ins><span class="cx"> 
</span><del>-            raise &quot;No title.&quot; unless md = /&lt;span class=&quot;BDtitoloLibro&quot;&gt; (.+)&lt;\/span&gt;/.match(data)
</del><ins>+            raise &quot;No title.&quot; unless md = /&lt;span class=&quot;BDtitoloLibro&quot;&gt;([^&lt;]+)/.match(data)
</ins><span class="cx">             title = CGI.unescape(md[1].strip)
</span><span class="cx"> 
</span><span class="cx">             authors = []
</span><del>-            if md = /&lt;span class=&quot;BDauthLibro&quot;&gt;by:(.+)&lt;\/span&gt;&lt;span class=&quot;BDformatoLibro&quot;&gt;/.match(data)
-                md[1].strip.split('; ').each { |a| authors &lt;&lt; CGI.unescape(a.strip) }
</del><ins>+            if md = /&lt;span class=&quot;BDauthLibro&quot;&gt;by:([^&lt;]+)/.match(data)
+                md[1].strip.split('- ').each { |a| authors &lt;&lt; CGI.unescape(a.strip) }
</ins><span class="cx">             end
</span><span class="cx"> 
</span><del>-            raise &quot;No ISBN&quot; unless md = /&lt;span class=&quot;isbn&quot;&gt;(.+)&lt;\/span&gt;&lt;br \/&gt;/.match(data)
</del><ins>+            raise &quot;No ISBN&quot; unless md = /&lt;span class=&quot;BDEticLibro&quot;&gt;ISBN 13: &lt;\/span&gt;&lt;span class=&quot;isbn&quot;&gt;([^&lt;]+)/.match(data)
</ins><span class="cx">             isbn = md[1].strip.gsub!(&quot;-&quot;,&quot;&quot;)
</span><span class="cx"> 
</span><del>-            raise &quot;No Publisher&quot; unless md = /&lt;span class=&quot;BDEticLibro&quot;&gt;Publisher &amp;amp; Imprint&lt;\/span&gt;(.+)&lt;\/p&gt;/.match(data)
</del><ins>+            raise &quot;No Publisher&quot; unless md = /&lt;span class=&quot;BDeditoreLibro&quot;&gt;([^&lt;]+)/.match(data)
</ins><span class="cx">                 publisher = CGI.unescape(md[1].strip)
</span><span class="cx"> 
</span><del>-            unless md = /&lt;strong&gt;More info&lt;\/strong&gt;&lt;\/font&gt;&lt;br&gt;&lt;font face=&quot;Verdana, Geneva, Arial, Helvetica, sans-serif&quot; style=&quot;font-size : 7.5pt;&quot; size=&quot;1&quot;&gt;([^&lt;]+)/.match(data)
</del><ins>+            unless md = /&lt;span class=&quot;BDEticLibro&quot;&gt;More info&lt;\/span&gt;&lt;br \/&gt;([^&lt;]+)/.match(data)
</ins><span class="cx">                     edition = nil
</span><span class="cx">             else
</span><span class="cx">                     edition = CGI.unescape(md[1].strip)
</span><span class="cx">             end
</span><span class="cx"> 
</span><del>-                        publish_year = 0
-            if data =~ /Ingrandire immagine/
-                        small_cover = &quot;http://www.deastore.com/covers/ie_cd1/batch1/&quot; + isbn + &quot;.jpg&quot;
-                        medium_cover = &quot;http://www.deastore.com/covers/ie_cd1/batch2/&quot; + isbn + &quot;.jpg&quot;
-                        # big_cover = &quot;http://www.deastore.com/covers/ie_cd1/batch3/&quot; + isbn + &quot;.jpg&quot;
-                    return [ Book.new(title, authors, isbn, publisher, edition),medium_cover ]
</del><ins>+            publish_year = nil
+            if md = /&lt;span class=&quot;BDdataPubbLibro&quot;&gt;([^&lt;]+)/.match(data)
+                publish_year = CGI.unescape(md[1].strip)[-4 .. -1].to_i
+                publish_year = nil if publish_year == 0 or publish_year == 1900
</ins><span class="cx">             end
</span><del>-                return [ Book.new(title, authors, isbn, publisher, publish_year, edition)]
</del><ins>+
+  if md = /&lt;div class=&quot;imageLg&quot;&gt;&lt;a href=&quot;javascript:void\(''\);&quot; onclick=&quot;popUpCover\('\/covers\/([^\/]+)/.match(data)
+            cover_url = BASE_URI + &quot;/covers/&quot; + md[1].strip + &quot;/batch1/&quot; + Library.canonicalise_isbn(isbn) + &quot;.jpg&quot; # use batch2 or batch3 for bigger images
+
+            cover_filename = isbn + &quot;.tmp&quot;
+            Dir.chdir(CACHE_DIR) do
+                File.open(cover_filename, &quot;w&quot;) do |file|
+                    agent = WWW::Mechanize.new
+                    agent.user_agent_alias = 'Mac Safari'
+                    file.write agent.get(URI.parse(cover_url)).content
+                end                    
+            end
+
+            medium_cover = CACHE_DIR + &quot;/&quot; + cover_filename
+            if File.size(medium_cover) &gt; 0
+                puts medium_cover + &quot; has non-0 size&quot; if $DEBUG
+                return [ Book.new(title, authors, isbn, publisher, publish_year, edition),medium_cover ]
+            end
+            puts medium_cover + &quot; has 0 size, removing ...&quot; if $DEBUG
+            File.delete(medium_cover)
+  end
+            return [ Book.new(title, authors, isbn, publisher, publish_year, edition) ]
</ins><span class="cx">         end
</span><span class="cx"> 
</span><span class="cx">         def each_book_page(data)
</span><del>-                raise if data.scan(/&lt;a href=&quot;http:\/\/www.internetbookshop.it\/ser\/serdsp.asp\?shop=1&amp;amp;c=([\w\d]+)&quot;&gt;&lt;b&gt;([^&lt;]+)/) { |a| yield a}.empty?
</del><ins>+                raise if data.scan(/&lt;span class=&quot;BDtitoloLibro&quot;&gt;&lt;a href=&quot;([^&quot;]+)/) { |a| yield a}.empty?
</ins><span class="cx">         end
</span><ins>+    
+        def clean_cache
+            #FIXME begin ... rescue ... end?
+            Dir.chdir(CACHE_DIR) do
+                Dir.glob(&quot;*.tmp&quot;) do |file|
+                    puts &quot;removing &quot; + file if $DEBUG
+                    File.delete(file)    
+                end
+            end
+        end
</ins><span class="cx">     end
</span><span class="cx"> end
</span><span class="cx"> end
</span></span></pre></div>
<a id="trunkalexandriateststest_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/tests/test_providers.rb (604 => 605)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/tests/test_providers.rb        2007-02-16 04:06:46 UTC (rev 604)
+++ trunk/alexandria/tests/test_providers.rb        2007-02-16 18:54:50 UTC (rev 605)
</span><span class="lines">@@ -70,12 +70,11 @@
</span><span class="cx">                         '3896673300') 
</span><span class="cx">     end
</span><span class="cx"> 
</span><del>-    def test_ibs_it_1 # this tests a book without image but with author
</del><ins>+    def test_ibs_it
+        # this tests a book without image but with author
</ins><span class="cx">         __test_provider(Alexandria::BookProviders::IBS_itProvider,
</span><span class="cx">                         '9788886973816') 
</span><del>-    end
-    
-    def test_ibs_it_2 # this tests a book with image but without author
</del><ins>+        # this tests a book with image but without author
</ins><span class="cx">         __test_provider(Alexandria::BookProviders::IBS_itProvider,
</span><span class="cx">                         '9788807710148') 
</span><span class="cx">     end
</span><span class="lines">@@ -92,7 +91,9 @@
</span><span class="cx"> 
</span><span class="cx">     def test_dea
</span><span class="cx">         __test_provider(Alexandria::BookProviders::DeaStore_itProvider,
</span><del>-                        '881701298X') 
</del><ins>+                        '9788817012980') 
+        __test_provider(Alexandria::BookProviders::DeaStore_itProvider,
+                        '9788806134747') 
</ins><span class="cx">     end
</span><span class="cx"> 
</span><span class="cx">     def test_bol
</span><span class="lines">@@ -101,8 +102,18 @@
</span><span class="cx">     end
</span><span class="cx"> 
</span><span class="cx">     def test_webster
</span><ins>+        # BIT
</ins><span class="cx">         __test_provider(Alexandria::BookProviders::Webster_itProvider,
</span><span class="cx">                         '9788817012980') 
</span><ins>+        # BUK
+        __test_provider(Alexandria::BookProviders::Webster_itProvider,
+                        '9781853260803') 
+        # BUS
+        __test_provider(Alexandria::BookProviders::Webster_itProvider,
+                        '9780307237699') 
+        # BDE
+        __test_provider(Alexandria::BookProviders::Webster_itProvider,
+                        '9783442460878') 
</ins><span class="cx">     end
</span><span class="cx"> 
</span><span class="cx">     def test_renaud
</span></span></pre>
</div>
</div>

</body>
</html>