<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul, pre { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[605] trunk/alexandria: provider Deastore, working well enough</title>
</head>
<body>
<div id="msg">
<dl>
<dt>Revision</dt> <dd>605</dd>
<dt>Author</dt> <dd>laurusnobilis</dd>
<dt>Date</dt> <dd>2007-02-16 13:54:50 -0500 (Fri, 16 Feb 2007)</dd>
</dl>
<h3>Log Message</h3>
<pre>provider Deastore, working well enough</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkalexandrialibalexandriabook_providersdea_store_itrb">trunk/alexandria/lib/alexandria/book_providers/dea_store_it.rb</a></li>
<li><a href="#trunkalexandriateststest_providersrb">trunk/alexandria/tests/test_providers.rb</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkalexandrialibalexandriabook_providersdea_store_itrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/lib/alexandria/book_providers/dea_store_it.rb (604 => 605)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/lib/alexandria/book_providers/dea_store_it.rb        2007-02-16 04:06:46 UTC (rev 604)
+++ trunk/alexandria/lib/alexandria/book_providers/dea_store_it.rb        2007-02-16 18:54:50 UTC (rev 605)
</span><span class="lines">@@ -16,17 +16,21 @@
</span><span class="cx"> # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
</span><span class="cx"> # Boston, MA 02111-1307, USA.
</span><span class="cx">
</span><ins>+require 'fileutils'
</ins><span class="cx"> require 'net/http'
</span><ins>+require 'mechanize'
</ins><span class="cx"> #require 'cgi'
</span><del>-require 'mechanize'
</del><span class="cx">
</span><span class="cx"> module Alexandria
</span><span class="cx"> class BookProviders
</span><span class="cx"> class DeaStore_itProvider < GenericProvider
</span><span class="cx"> BASE_URI = "http://www.deastore.com"
</span><ins>+ CACHE_DIR = File.join(Alexandria::Library::DIR, '.deastore_it_cache')
</ins><span class="cx"> def initialize
</span><span class="cx"> super("DeaStore_it", "DeaStore Italia")
</span><ins>+ FileUtils.mkdir_p(CACHE_DIR) unless File.exists?(CACHE_DIR)
</ins><span class="cx"> # no preferences for the moment
</span><ins>+ at_exit { clean_cache }
</ins><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> def search(criterion, type)
</span><span class="lines">@@ -49,21 +53,27 @@
</span><span class="cx">
</span><span class="cx"> end
</span><span class="cx">
</span><ins>+if type == SEARCH_BY_ISBN
+ req += Library.canonicalise_isbn(criterion)
+else
</ins><span class="cx"> req += CGI.escape(criterion)
</span><ins>+end
</ins><span class="cx"> p req if $DEBUG
</span><del>-                        agent = WWW::Mechanize.new
-                        agent.user_agent_alias = 'Mac Safari'
-         #data = transport.get(URI.parse(req))
-         data = agent.get(URI.parse(req))
-                data = agent.get(URI.parse(req)) rescue data = agent.get(URI.parse(req)) #try again
</del><ins>+
+ agent = WWW::Mechanize.new
+ agent.user_agent_alias = 'Mac Safari'
+ #data = transport.get(URI.parse(req))
+ data = agent.get(URI.parse(req)).content
+
</ins><span class="cx"> if type == SEARCH_BY_ISBN
</span><span class="cx"> to_book(data) #rescue raise NoResultsError
</span><span class="cx"> else
</span><span class="cx"> begin
</span><span class="cx"> results = []
</span><span class="cx"> each_book_page(data) do |code, title|
</span><del>- #results << to_book(transport.get(URI.parse("http://www.internetbookshop.it/ser/serdsp.asp?c=" + code)))
- results << to_book(agent.get(URI.parse("http://www.internetbookshop.it/ser/serdsp.asp?c=" + code)))
</del><ins>+ agent = WWW::Mechanize.new
+ agent.user_agent_alias = 'Mac Safari'
+ results << to_book(agent.get(URI.parse(BASE_URI + "/" + code)).content)
</ins><span class="cx"> end
</span><span class="cx"> return results
</span><span class="cx"> rescue
</span><span class="lines">@@ -74,7 +84,7 @@
</span><span class="cx">
</span><span class="cx"> def url(book)
</span><span class="cx"> return nil unless book.isbn
</span><del>- BASE_URI + "/product.asp?isbn=" + book.isbn
</del><ins>+ BASE_URI + "/product.asp?isbn=" + Library.canonicalise_isbn(book.isbn)
</ins><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> #######
</span><span class="lines">@@ -82,42 +92,70 @@
</span><span class="cx"> #######
</span><span class="cx">
</span><span class="cx"> def to_book(data)
</span><del>-                        data = data.content
- data = data.convert("UTF-8", "windows-1252")
</del><ins>+ data = data.convert("UTF-8", "windows-1252")
</ins><span class="cx">
</span><del>- raise "No title." unless md = /<span class="BDtitoloLibro"> (.+)<\/span>/.match(data)
</del><ins>+ raise "No title." unless md = /<span class="BDtitoloLibro">([^<]+)/.match(data)
</ins><span class="cx"> title = CGI.unescape(md[1].strip)
</span><span class="cx">
</span><span class="cx"> authors = []
</span><del>- if md = /<span class="BDauthLibro">by:(.+)<\/span><span class="BDformatoLibro">/.match(data)
- md[1].strip.split('; ').each { |a| authors << CGI.unescape(a.strip) }
</del><ins>+         if md = /<span class="BDauthLibro">by:([^<]+)/.match(data)
+ md[1].strip.split('- ').each { |a| authors << CGI.unescape(a.strip) }
</ins><span class="cx"> end
</span><span class="cx">
</span><del>- raise "No ISBN" unless md = /<span class="isbn">(.+)<\/span><br \/>/.match(data)
</del><ins>+ raise "No ISBN" unless md = /<span class="BDEticLibro">ISBN 13: <\/span><span class="isbn">([^<]+)/.match(data)
</ins><span class="cx"> isbn = md[1].strip.gsub!("-","")
</span><span class="cx">
</span><del>- raise "No Publisher" unless md = /<span class="BDEticLibro">Publisher &amp; Imprint<\/span>(.+)<\/p>/.match(data)
</del><ins>+ raise "No Publisher" unless md = /<span class="BDeditoreLibro">([^<]+)/.match(data)
</ins><span class="cx">          publisher = CGI.unescape(md[1].strip)
</span><span class="cx">
</span><del>- unless md = /<strong>More info<\/strong><\/font><br><font face="Verdana, Geneva, Arial, Helvetica, sans-serif" style="font-size : 7.5pt;" size="1">([^<]+)/.match(data)
</del><ins>+ unless md = /<span class="BDEticLibro">More info<\/span><br \/>([^<]+)/.match(data)
</ins><span class="cx">         edition = nil
</span><span class="cx"> else
</span><span class="cx">         edition = CGI.unescape(md[1].strip)
</span><span class="cx"> end
</span><span class="cx">
</span><del>-                        publish_year = 0
- if data =~ /Ingrandire immagine/
-                  small_cover = "http://www.deastore.com/covers/ie_cd1/batch1/" + isbn + ".jpg"
-                  medium_cover = "http://www.deastore.com/covers/ie_cd1/batch2/" + isbn + ".jpg"
-                  # big_cover = "http://www.deastore.com/covers/ie_cd1/batch3/" + isbn + ".jpg"
-         return [ Book.new(title, authors, isbn, publisher, edition),medium_cover ]
</del><ins>+ publish_year = nil
+ if md = /<span class="BDdataPubbLibro">([^<]+)/.match(data)
+ publish_year = CGI.unescape(md[1].strip)[-4 .. -1].to_i
+ publish_year = nil if publish_year == 0 or publish_year == 1900
</ins><span class="cx"> end
</span><del>-         return [ Book.new(title, authors, isbn, publisher, publish_year, edition)]
</del><ins>+
+ if md = /<div class="imageLg"><a href="javascript:void\(''\);" onclick="popUpCover\('\/covers\/([^\/]+)/.match(data)
+ cover_url = BASE_URI + "/covers/" + md[1].strip + "/batch1/" + Library.canonicalise_isbn(isbn) + ".jpg" # use batch2 or batch3 for bigger images
+
+ cover_filename = isbn + ".tmp"
+ Dir.chdir(CACHE_DIR) do
+ File.open(cover_filename, "w") do |file|
+ agent = WWW::Mechanize.new
+ agent.user_agent_alias = 'Mac Safari'
+ file.write agent.get(URI.parse(cover_url)).content
+ end
+ end
+
+ medium_cover = CACHE_DIR + "/" + cover_filename
+ if File.size(medium_cover) > 0
+ puts medium_cover + " has non-0 size" if $DEBUG
+ return [ Book.new(title, authors, isbn, publisher, publish_year, edition),medium_cover ]
+ end
+ puts medium_cover + " has 0 size, removing ..." if $DEBUG
+ File.delete(medium_cover)
+ end
+ return [ Book.new(title, authors, isbn, publisher, publish_year, edition) ]
</ins><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> def each_book_page(data)
</span><del>-         raise if data.scan(/<a href="http:\/\/www.internetbookshop.it\/ser\/serdsp.asp\?shop=1&amp;c=([\w\d]+)"><b>([^<]+)/) { |a| yield a}.empty?
</del><ins>+         raise if data.scan(/<span class="BDtitoloLibro"><a href="([^"]+)/) { |a| yield a}.empty?
</ins><span class="cx"> end
</span><ins>+
+ def clean_cache
+ #FIXME begin ... rescue ... end?
+ Dir.chdir(CACHE_DIR) do
+ Dir.glob("*.tmp") do |file|
+ puts "removing " + file if $DEBUG
+ File.delete(file)
+ end
+ end
+ end
</ins><span class="cx"> end
</span><span class="cx"> end
</span><span class="cx"> end
</span></span></pre></div>
<a id="trunkalexandriateststest_providersrb"></a>
<div class="modfile"><h4>Modified: trunk/alexandria/tests/test_providers.rb (604 => 605)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/alexandria/tests/test_providers.rb        2007-02-16 04:06:46 UTC (rev 604)
+++ trunk/alexandria/tests/test_providers.rb        2007-02-16 18:54:50 UTC (rev 605)
</span><span class="lines">@@ -70,12 +70,11 @@
</span><span class="cx"> '3896673300')
</span><span class="cx"> end
</span><span class="cx">
</span><del>- def test_ibs_it_1 # this tests a book without image but with author
</del><ins>+ def test_ibs_it
+ # this tests a book without image but with author
</ins><span class="cx"> __test_provider(Alexandria::BookProviders::IBS_itProvider,
</span><span class="cx"> '9788886973816')
</span><del>- end
-
- def test_ibs_it_2 # this tests a book with image but without author
</del><ins>+ # this tests a book with image but without author
</ins><span class="cx"> __test_provider(Alexandria::BookProviders::IBS_itProvider,
</span><span class="cx"> '9788807710148')
</span><span class="cx"> end
</span><span class="lines">@@ -92,7 +91,9 @@
</span><span class="cx">
</span><span class="cx"> def test_dea
</span><span class="cx"> __test_provider(Alexandria::BookProviders::DeaStore_itProvider,
</span><del>- '881701298X')
</del><ins>+ '9788817012980')
+ __test_provider(Alexandria::BookProviders::DeaStore_itProvider,
+ '9788806134747')
</ins><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> def test_bol
</span><span class="lines">@@ -101,8 +102,18 @@
</span><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> def test_webster
</span><ins>+ # BIT
</ins><span class="cx"> __test_provider(Alexandria::BookProviders::Webster_itProvider,
</span><span class="cx"> '9788817012980')
</span><ins>+ # BUK
+ __test_provider(Alexandria::BookProviders::Webster_itProvider,
+ '9781853260803')
+ # BUS
+ __test_provider(Alexandria::BookProviders::Webster_itProvider,
+ '9780307237699')
+ # BDE
+ __test_provider(Alexandria::BookProviders::Webster_itProvider,
+ '9783442460878')
</ins><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> def test_renaud
</span></span></pre>
</div>
</div>
</body>
</html>