<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul, pre { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[592] trunk/alexandria: provider bol.it, now working well enough</title>
</head>
<body>

<div id="msg">
<dl>
<dt>Revision</dt> <dd>592</dd>
<dt>Author</dt> <dd>laurusnobilis</dd>
<dt>Date</dt> <dd>2007-02-15 02:54:27 -0500 (Thu, 15 Feb 2007)</dd>
</dl>

<h3>Log Message</h3>
<pre>provider bol.it, now working well enough</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkalexandrialibalexandriabook_providersbol_itrb">trunk/alexandria/lib/alexandria/book_providers/bol_it.rb</a></li>
<li><a href="#trunkalexandrialibalexandriabook_providersrb">trunk/alexandria/lib/alexandria/book_providers.rb</a></li>
<li><a href="#trunkalexandriateststest_providersrb">trunk/alexandria/tests/test_providers.rb</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkalexandrialibalexandriabook_providersbol_itrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/bol_it.rb (591 => 592)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/bol_it.rb        2007-02-15 07:24:07 UTC (rev 591)
+++ trunk/alexandria/lib/alexandria/book_providers/bol_it.rb        2007-02-15 07:54:27 UTC (rev 592)
</span><span class="lines">@@ -26,7 +26,8 @@
</span><span class="cx">     class BOL_itProvider &lt; GenericProvider
</span><span class="cx">         BASE_URI = &quot;http://www.bol.it&quot;
</span><span class="cx">         CACHE_DIR = File.join(Alexandria::Library::DIR, '.bol_it_cache')
</span><del>-        REFERER = &quot;http://www.bol.it&quot;
</del><ins>+        REFERER = BASE_URI
+        LOCALE = &quot;libri&quot; # possible locales are: &quot;libri&quot;, &quot;inglesi&quot;, &quot;video&quot;, &quot;musica&quot;, &quot;choco&quot;
</ins><span class="cx">         def initialize
</span><span class="cx">             super(&quot;BOL_it&quot;, &quot;BOL Italia&quot;)
</span><span class="cx">             FileUtils.mkdir_p(CACHE_DIR) unless File.exists?(CACHE_DIR)            
</span><span class="lines">@@ -35,7 +36,7 @@
</span><span class="cx">         end
</span><span class="cx">         
</span><span class="cx">         def search(criterion, type)
</span><del>-            req = BASE_URI + &quot;/libri/&quot;
</del><ins>+            req = BASE_URI + &quot;/&quot; + LOCALE + &quot;/&quot;
</ins><span class="cx">             req += case type
</span><span class="cx">                 when SEARCH_BY_ISBN
</span><span class="cx">                     &quot;scheda/&quot;
</span><span class="lines">@@ -56,7 +57,7 @@
</span><span class="cx">             
</span><span class="cx"> if type == SEARCH_BY_ISBN
</span><span class="cx"> ## warning: this provider uses pages like http://www.bol.it/libri/scheda/ea978888584104 with 12 numbers, without the checksum
</span><del>-            req += &quot;ea978&quot; + Library.canonicalise_isbn(criterion)[0 .. 8] + &quot;.html&quot;
</del><ins>+            req += &quot;ea978&quot; + Library.canonicalise_isbn(criterion)[0 .. -2] + &quot;.html&quot;
</ins><span class="cx"> else
</span><span class="cx">             req += CGI.escape(criterion)
</span><span class="cx"> end
</span><span class="lines">@@ -68,7 +69,7 @@
</span><span class="cx">                 begin
</span><span class="cx">                     results = [] 
</span><span class="cx">                     each_book_page(data) do |code, title|
</span><del>-                        results &lt;&lt; to_book(transport.get(URI.parse(&quot;http://www.bol.it/libri/scheda/ea978&quot; + code)))
</del><ins>+                        results &lt;&lt; to_book(transport.get(URI.parse(BASE_URI + &quot;/#{LOCALE}/scheda/ea&quot; + code)))
</ins><span class="cx">                     end
</span><span class="cx">                     return results 
</span><span class="cx">                 rescue
</span><span class="lines">@@ -79,7 +80,7 @@
</span><span class="cx"> 
</span><span class="cx">         def url(book)
</span><span class="cx">             return nil unless book.isbn
</span><del>-            &quot;http://www.bol.it/libri/scheda/ea978&quot; + Library.canonicalise_isbn(book.isbn)[0 .. 8] + &quot;.html&quot;
</del><ins>+            BASE_URI + &quot;/#{LOCALE}/scheda/ea978&quot; + Library.canonicalise_isbn(book.isbn)[0 .. -2] + &quot;.html&quot;
</ins><span class="cx">         end
</span><span class="cx"> 
</span><span class="cx">         #######
</span><span class="lines">@@ -89,8 +90,8 @@
</span><span class="cx">         def to_book(data)
</span><span class="cx">             raise unless md = /&lt;INPUT type =hidden name =&quot;mailTitolo&quot; value=&quot;([^&quot;]+)/.match(data)
</span><span class="cx">             title = CGI.unescape(md[1].strip)
</span><ins>+
</ins><span class="cx">             authors = []
</span><del>-            
</del><span class="cx">           if md = /&lt;INPUT type =HIDDEN name =&quot;mailAutore&quot; value=&quot;([^&quot;]+)/.match(data)
</span><span class="cx">             md[1].split(', ').each { |a| authors &lt;&lt; CGI.unescape(a.strip) }
</span><span class="cx">           end
</span><span class="lines">@@ -98,15 +99,21 @@
</span><span class="cx">             raise unless md = /&lt;INPUT type =HIDDEN name =&quot;mailEAN&quot; value=&quot;([^&quot;]+)/.match(data)
</span><span class="cx">             isbn = md[1].strip
</span><span class="cx">             isbn += String( Library.ean_checksum( Library.extract_numbers( isbn ) ) )
</span><ins>+            isbn = Library.canonicalise_isbn(isbn)
</ins><span class="cx"> 
</span><span class="cx">             raise unless md = /&lt;INPUT type =HIDDEN name =&quot;mailEditore&quot; value=&quot;([^&quot;]+)/.match(data)
</span><span class="cx">                 publisher = CGI.unescape(md[1].strip)
</span><span class="cx"> 
</span><span class="cx">             raise unless md = /&lt;INPUT type =HIDDEN name =&quot;mailFormato&quot; value=&quot;([^&quot;]+)/.match(data)
</span><span class="cx">             edition = CGI.unescape(md[1].strip)
</span><del>-            if md = /\&amp;nbsp\;\|\&amp;nbsp\;([^&amp;]+)\&amp;nbsp\;\|\&amp;nbsp\;/.match(data) and md[1] != &quot;0&quot;
-                edition = CGI.unescape(md[1].strip) + &quot; p., &quot; + edition
</del><ins>+            if md = /#{edition}\&amp;nbsp\;\|\&amp;nbsp\;(\d+)\&amp;nbsp\;\|\&amp;nbsp\;/.match(data)
+                nr_pages = CGI.unescape(md[1].strip)
+            elsif md = / (\d+) pagine \| /.match(data)
+                nr_pages = CGI.unescape(md[1].strip)
</ins><span class="cx">             end
</span><ins>+            if nr_pages != &quot;0&quot; and  nr_pages != nil
+                edition = nr_pages + &quot; p., &quot; + edition
+            end
</ins><span class="cx"> 
</span><span class="cx">             publish_year = nil
</span><span class="cx">             if md = /&lt;INPUT type =HIDDEN name =&quot;mailAnnoPubbl&quot; value=&quot;([^&quot;]+)/.match(data)
</span><span class="lines">@@ -114,7 +121,7 @@
</span><span class="cx">                 publish_year = nil if publish_year == 0
</span><span class="cx">             end
</span><span class="cx">           
</span><del>-            cover_url = &quot;http://www.bol.it/bol/includes/tornaImmagine.jsp?cdSoc=BL&amp;ean=&quot; + isbn[0 .. 11] + &quot;&amp;tipoOggetto=PIB&amp;cdSito=BL&quot; 
</del><ins>+            cover_url = BASE_URI + &quot;/bol/includes/tornaImmagine.jsp?cdSoc=BL&amp;ean=&quot; + isbn[0 .. 11] + &quot;&amp;tipoOggetto=PIB&amp;cdSito=BL&quot; # use &quot;FRB&quot; instead of &quot;PIB&quot; for smaller images
</ins><span class="cx">             cover_filename = isbn + &quot;.tmp&quot;
</span><span class="cx">             Dir.chdir(CACHE_DIR) do
</span><span class="cx">                 File.open(cover_filename, &quot;w&quot;) do |file|
</span><span class="lines">@@ -123,7 +130,7 @@
</span><span class="cx">             end
</span><span class="cx"> 
</span><span class="cx">             medium_cover = CACHE_DIR + &quot;/&quot; + cover_filename
</span><del>-            if File.size(medium_cover) &gt; 0
</del><ins>+            if File.size(medium_cover) &gt; 43
</ins><span class="cx">                 puts medium_cover + &quot; has non-0 size&quot; if $DEBUG
</span><span class="cx">                 return [ Book.new(title, authors, isbn, publisher, publish_year, edition),medium_cover ]
</span><span class="cx">             end
</span><span class="lines">@@ -133,7 +140,7 @@
</span><span class="cx">         end
</span><span class="cx"> 
</span><span class="cx">         def each_book_page(data)
</span><del>-                raise if data.scan(/&lt;a href=&quot;\/libri\/scheda\/ea978([\d]+).html&quot;&gt;/) { |a| yield a}.empty?
</del><ins>+                raise if data.scan(/&lt;a href=&quot;\/#{LOCALE}\/scheda\/ea(\d+)\.html;jsessionid=([^&quot;]+)&quot;&gt;(\s*)Scheda completa(\s*)&lt;\/a&gt;/) { |a| yield a}.empty?
</ins><span class="cx">         end
</span><span class="cx">     
</span><span class="cx">         def clean_cache
</span><span class="lines">@@ -148,4 +155,3 @@
</span><span class="cx">     end
</span><span class="cx"> end
</span><span class="cx"> end
</span><del>-
</del></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers.rb (591 => 592)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-15 07:24:07 UTC (rev 591)
+++ trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-15 07:54:27 UTC (rev 592)
</span><span class="lines">@@ -238,6 +238,8 @@
</span><span class="cx">         require 'alexandria/book_providers/renaud'
</span><span class="cx">         require 'alexandria/book_providers/adlibris'
</span><span class="cx">         require 'alexandria/book_providers/ls'
</span><ins>+        require 'alexandria/book_providers/bol_it'
+
</ins><span class="cx">         
</span><span class="cx">         # mechanize is optional
</span><span class="cx">         begin
</span></span></pre></div>
<a id="trunkalexandriateststest_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/tests/test_providers.rb (591 => 592)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/tests/test_providers.rb        2007-02-15 07:24:07 UTC (rev 591)
+++ trunk/alexandria/tests/test_providers.rb        2007-02-15 07:54:27 UTC (rev 592)
</span><span class="lines">@@ -73,4 +73,9 @@
</span><span class="cx">                         '881701298X') 
</span><span class="cx">     end
</span><span class="cx"> 
</span><ins>+    def test_bol
+        __test_provider(Alexandria::BookProviders::BOL_itProvider,
+                        '881701298X') 
+    end
+
</ins><span class="cx"> end
</span></span></pre>
</div>
</div>

</body>
</html>