<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul, pre { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[592] trunk/alexandria: provider bol.it, now working well enough</title>
</head>
<body>
<div id="msg">
<dl>
<dt>Revision</dt> <dd>592</dd>
<dt>Author</dt> <dd>laurusnobilis</dd>
<dt>Date</dt> <dd>2007-02-15 02:54:27 -0500 (Thu, 15 Feb 2007)</dd>
</dl>
<h3>Log Message</h3>
<pre>provider bol.it, now working well enough</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkalexandrialibalexandriabook_providersbol_itrb">trunk/alexandria/lib/alexandria/book_providers/bol_it.rb</a></li>
<li><a href="#trunkalexandrialibalexandriabook_providersrb">trunk/alexandria/lib/alexandria/book_providers.rb</a></li>
<li><a href="#trunkalexandriateststest_providersrb">trunk/alexandria/tests/test_providers.rb</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkalexandrialibalexandriabook_providersbol_itrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/bol_it.rb (591 => 592)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/bol_it.rb        2007-02-15 07:24:07 UTC (rev 591)
+++ trunk/alexandria/lib/alexandria/book_providers/bol_it.rb        2007-02-15 07:54:27 UTC (rev 592)
</span><span class="lines">@@ -26,7 +26,8 @@
</span><span class="cx"> class BOL_itProvider < GenericProvider
</span><span class="cx"> BASE_URI = "http://www.bol.it"
</span><span class="cx"> CACHE_DIR = File.join(Alexandria::Library::DIR, '.bol_it_cache')
</span><del>- REFERER = "http://www.bol.it"
</del><ins>+ REFERER = BASE_URI
+ LOCALE = "libri" # possible locales are: "libri", "inglesi", "video", "musica", "choco"
</ins><span class="cx"> def initialize
</span><span class="cx"> super("BOL_it", "BOL Italia")
</span><span class="cx"> FileUtils.mkdir_p(CACHE_DIR) unless File.exists?(CACHE_DIR)
</span><span class="lines">@@ -35,7 +36,7 @@
</span><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> def search(criterion, type)
</span><del>- req = BASE_URI + "/libri/"
</del><ins>+ req = BASE_URI + "/" + LOCALE + "/"
</ins><span class="cx"> req += case type
</span><span class="cx"> when SEARCH_BY_ISBN
</span><span class="cx"> "scheda/"
</span><span class="lines">@@ -56,7 +57,7 @@
</span><span class="cx">
</span><span class="cx"> if type == SEARCH_BY_ISBN
</span><span class="cx"> ## warning: this provider uses pages like http://www.bol.it/libri/scheda/ea978888584104 with 12 numbers, without the checksum
</span><del>- req += "ea978" + Library.canonicalise_isbn(criterion)[0 .. 8] + ".html"
</del><ins>+ req += "ea978" + Library.canonicalise_isbn(criterion)[0 .. -2] + ".html"
</ins><span class="cx"> else
</span><span class="cx"> req += CGI.escape(criterion)
</span><span class="cx"> end
</span><span class="lines">@@ -68,7 +69,7 @@
</span><span class="cx"> begin
</span><span class="cx"> results = []
</span><span class="cx"> each_book_page(data) do |code, title|
</span><del>- results << to_book(transport.get(URI.parse("http://www.bol.it/libri/scheda/ea978" + code)))
</del><ins>+ results << to_book(transport.get(URI.parse(BASE_URI + "/#{LOCALE}/scheda/ea" + code)))
</ins><span class="cx"> end
</span><span class="cx"> return results
</span><span class="cx"> rescue
</span><span class="lines">@@ -79,7 +80,7 @@
</span><span class="cx">
</span><span class="cx"> def url(book)
</span><span class="cx"> return nil unless book.isbn
</span><del>- "http://www.bol.it/libri/scheda/ea978" + Library.canonicalise_isbn(book.isbn)[0 .. 8] + ".html"
</del><ins>+ BASE_URI + "/#{LOCALE}/scheda/ea978" + Library.canonicalise_isbn(book.isbn)[0 .. -2] + ".html"
</ins><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> #######
</span><span class="lines">@@ -89,8 +90,8 @@
</span><span class="cx"> def to_book(data)
</span><span class="cx"> raise unless md = /<INPUT type =hidden name ="mailTitolo" value="([^"]+)/.match(data)
</span><span class="cx"> title = CGI.unescape(md[1].strip)
</span><ins>+
</ins><span class="cx"> authors = []
</span><del>-        
</del><span class="cx">          if md = /<INPUT type =HIDDEN name ="mailAutore" value="([^"]+)/.match(data)
</span><span class="cx"> md[1].split(', ').each { |a| authors << CGI.unescape(a.strip) }
</span><span class="cx"> end
</span><span class="lines">@@ -98,15 +99,21 @@
</span><span class="cx"> raise unless md = /<INPUT type =HIDDEN name ="mailEAN" value="([^"]+)/.match(data)
</span><span class="cx"> isbn = md[1].strip
</span><span class="cx"> isbn += String( Library.ean_checksum( Library.extract_numbers( isbn ) ) )
</span><ins>+ isbn = Library.canonicalise_isbn(isbn)
</ins><span class="cx">
</span><span class="cx"> raise unless md = /<INPUT type =HIDDEN name ="mailEditore" value="([^"]+)/.match(data)
</span><span class="cx">          publisher = CGI.unescape(md[1].strip)
</span><span class="cx">
</span><span class="cx"> raise unless md = /<INPUT type =HIDDEN name ="mailFormato" value="([^"]+)/.match(data)
</span><span class="cx"> edition = CGI.unescape(md[1].strip)
</span><del>- if md = /\&nbsp\;\|\&nbsp\;([^&]+)\&nbsp\;\|\&nbsp\;/.match(data) and md[1] != "0"
- edition = CGI.unescape(md[1].strip) + " p., " + edition
</del><ins>+ if md = /#{edition}\&nbsp\;\|\&nbsp\;(\d+)\&nbsp\;\|\&nbsp\;/.match(data)
+ nr_pages = CGI.unescape(md[1].strip)
+ elsif md = / (\d+) pagine \| /.match(data)
+ nr_pages = CGI.unescape(md[1].strip)
</ins><span class="cx"> end
</span><ins>+ if nr_pages != "0" and nr_pages != nil
+ edition = nr_pages + " p., " + edition
+ end
</ins><span class="cx">
</span><span class="cx"> publish_year = nil
</span><span class="cx"> if md = /<INPUT type =HIDDEN name ="mailAnnoPubbl" value="([^"]+)/.match(data)
</span><span class="lines">@@ -114,7 +121,7 @@
</span><span class="cx"> publish_year = nil if publish_year == 0
</span><span class="cx"> end
</span><span class="cx">
</span><del>- cover_url = "http://www.bol.it/bol/includes/tornaImmagine.jsp?cdSoc=BL&ean=" + isbn[0 .. 11] + "&tipoOggetto=PIB&cdSito=BL"
</del><ins>+ cover_url = BASE_URI + "/bol/includes/tornaImmagine.jsp?cdSoc=BL&ean=" + isbn[0 .. 11] + "&tipoOggetto=PIB&cdSito=BL" # use "FRB" instead of "PIB" for smaller images
</ins><span class="cx"> cover_filename = isbn + ".tmp"
</span><span class="cx"> Dir.chdir(CACHE_DIR) do
</span><span class="cx"> File.open(cover_filename, "w") do |file|
</span><span class="lines">@@ -123,7 +130,7 @@
</span><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> medium_cover = CACHE_DIR + "/" + cover_filename
</span><del>- if File.size(medium_cover) > 0
</del><ins>+ if File.size(medium_cover) > 43
</ins><span class="cx"> puts medium_cover + " has non-0 size" if $DEBUG
</span><span class="cx"> return [ Book.new(title, authors, isbn, publisher, publish_year, edition),medium_cover ]
</span><span class="cx"> end
</span><span class="lines">@@ -133,7 +140,7 @@
</span><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> def each_book_page(data)
</span><del>-         raise if data.scan(/<a href="\/libri\/scheda\/ea978([\d]+).html">/) { |a| yield a}.empty?
</del><ins>+         raise if data.scan(/<a href="\/#{LOCALE}\/scheda\/ea(\d+)\.html;jsessionid=([^"]+)">(\s*)Scheda completa(\s*)<\/a>/) { |a| yield a}.empty?
</ins><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> def clean_cache
</span><span class="lines">@@ -148,4 +155,3 @@
</span><span class="cx"> end
</span><span class="cx"> end
</span><span class="cx"> end
</span><del>-
</del></span></pre></div>
<a id="trunkalexandrialibalexandriabook_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers.rb (591 => 592)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-15 07:24:07 UTC (rev 591)
+++ trunk/alexandria/lib/alexandria/book_providers.rb        2007-02-15 07:54:27 UTC (rev 592)
</span><span class="lines">@@ -238,6 +238,8 @@
</span><span class="cx"> require 'alexandria/book_providers/renaud'
</span><span class="cx"> require 'alexandria/book_providers/adlibris'
</span><span class="cx"> require 'alexandria/book_providers/ls'
</span><ins>+ require 'alexandria/book_providers/bol_it'
+
</ins><span class="cx">
</span><span class="cx"> # mechanize is optional
</span><span class="cx"> begin
</span></span></pre></div>
<a id="trunkalexandriateststest_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/tests/test_providers.rb (591 => 592)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/tests/test_providers.rb        2007-02-15 07:24:07 UTC (rev 591)
+++ trunk/alexandria/tests/test_providers.rb        2007-02-15 07:54:27 UTC (rev 592)
</span><span class="lines">@@ -73,4 +73,9 @@
</span><span class="cx"> '881701298X')
</span><span class="cx"> end
</span><span class="cx">
</span><ins>+ def test_bol
+ __test_provider(Alexandria::BookProviders::BOL_itProvider,
+ '881701298X')
+ end
+
</ins><span class="cx"> end
</span></span></pre>
</div>
</div>
</body>
</html>