<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre, #msg p { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[561] blacklight_importer/src: Updates to support more permissive reading</title>
</head>
<body>

<div id="msg">
<dl>
<dt>Revision</dt> <dd>561</dd>
<dt>Author</dt> <dd>haschart</dd>
<dt>Date</dt> <dd>2008-05-14 17:43:20 -0400 (Wed, 14 May 2008)</dd>
</dl>

<h3>Log Message</h3>
<pre>Updates to support more permissive reading</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#blacklight_importersrcMarcFilteredReaderjava">blacklight_importer/src/MarcFilteredReader.java</a></li>
<li><a href="#blacklight_importersrcMarcImporterjava">blacklight_importer/src/MarcImporter.java</a></li>
<li><a href="#blacklight_importersrcMarcPrinterjava">blacklight_importer/src/MarcPrinter.java</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="blacklight_importersrcMarcFilteredReaderjava"></a>
<div class="modfile"><h4>Modified: blacklight_importer/src/MarcFilteredReader.java (560 => 561)</h4>
<pre class="diff"><span>
<span class="info">--- blacklight_importer/src/MarcFilteredReader.java        2008-05-14 21:42:42 UTC (rev 560)
+++ blacklight_importer/src/MarcFilteredReader.java        2008-05-14 21:43:20 UTC (rev 561)
</span><span class="lines">@@ -1,5 +1,6 @@
</span><span class="cx"> 
</span><span class="cx"> 
</span><ins>+import org.marc4j.MarcException;
</ins><span class="cx"> import org.marc4j.MarcReader;
</span><span class="cx"> import org.marc4j.marc.Record;
</span><span class="cx"> import java.util.Set;
</span><span class="lines">@@ -57,9 +58,16 @@
</span><span class="cx">         while (currentRecord == null)
</span><span class="cx">         {
</span><span class="cx">             if (!reader.hasNext()) return(null);
</span><del>-            Record rec = reader.next();
-            if (includeRecordIfFieldPresent != null)
</del><ins>+            Record rec = null;
+            try {
+                rec = reader.next();
+            }
+            catch (MarcException me)
</ins><span class="cx">             {
</span><ins>+                System.err.println(&quot;Error reading Marc Record: &quot;+ me.getMessage());               
+            }
+            if (rec != null &amp;&amp; includeRecordIfFieldPresent != null)
+            {
</ins><span class="cx">                 Set&lt;String&gt; fields = SolrIndexer.getFieldList(rec, includeRecordIfFieldPresent);
</span><span class="cx">                 if (fields.size() != 0)
</span><span class="cx">                 {
</span><span class="lines">@@ -69,9 +77,9 @@
</span><span class="cx">                     }
</span><span class="cx">                 }
</span><span class="cx">             }
</span><del>-            if (includeRecordIfFieldMissing != null)
</del><ins>+            if (rec != null &amp;&amp; includeRecordIfFieldMissing != null)
</ins><span class="cx">             {
</span><del>-                Set&lt;String&gt; fields = SolrIndexer.getFieldList(rec, includeRecordIfFieldPresent);
</del><ins>+                Set&lt;String&gt; fields = SolrIndexer.getFieldList(rec, includeRecordIfFieldMissing);
</ins><span class="cx">                 if ((fields.size() == 0 &amp;&amp; includeRecordIfFieldDoesntContain == null) ||
</span><span class="cx">                     (fields.size() != 0 &amp;&amp; includeRecordIfFieldDoesntContain != null &amp;&amp; !Utils.setItemContains(fields, includeRecordIfFieldDoesntContain)))
</span><span class="cx">                 {
</span></span></pre></div>
<a id="blacklight_importersrcMarcImporterjava"></a>
<div class="modfile"><h4>Modified: blacklight_importer/src/MarcImporter.java (560 => 561)</h4>
<pre class="diff"><span>
<span class="info">--- blacklight_importer/src/MarcImporter.java        2008-05-14 21:42:42 UTC (rev 560)
+++ blacklight_importer/src/MarcImporter.java        2008-05-14 21:43:20 UTC (rev 561)
</span><span class="lines">@@ -38,6 +38,7 @@
</span><span class="cx"> import java.util.Properties;
</span><span class="cx"> 
</span><span class="cx"> import marcoverride.MarcDirStreamReader;
</span><ins>+import marcoverride.MarcPermissiveStreamReader;
</ins><span class="cx"> 
</span><span class="cx"> import org.apache.solr.core.SolrConfig;
</span><span class="cx"> import org.apache.solr.core.SolrCore;
</span><span class="lines">@@ -149,7 +150,8 @@
</span><span class="cx">             }
</span><span class="cx">         }
</span><span class="cx">         SolrHostURL = getProperty(props, &quot;solr.hosturl&quot;);
</span><del>-        
</del><ins>+
+        boolean permissiveReader = Boolean.parseBoolean(System.getProperty(&quot;marc.permissive&quot;));        
</ins><span class="cx">         verbose = Boolean.parseBoolean(getProperty(props, &quot;marc.verbose&quot;));
</span><span class="cx">         to_utf_8 = Boolean.parseBoolean(getProperty(props, &quot;marc.to_utf_8&quot;));
</span><span class="cx">         deleteRecordListFilename = getProperty(props, &quot;marc.ids_to_delete&quot;);
</span><span class="lines">@@ -162,11 +164,11 @@
</span><span class="cx">         reader = null;
</span><span class="cx">         if (source.equals(&quot;FILE&quot;))
</span><span class="cx">         {
</span><del>-            reader = new MarcStreamReader(new FileInputStream(getProperty(props, &quot;marc.path&quot;).trim()));
</del><ins>+            reader = new MarcPermissiveStreamReader(new FileInputStream(getProperty(props, &quot;marc.path&quot;).trim()), permissiveReader);
</ins><span class="cx">         }
</span><span class="cx">         else if (source.equals(&quot;DIR&quot;))
</span><span class="cx">         {
</span><del>-            reader = new MarcDirStreamReader(getProperty(props, &quot;marc.path&quot;).trim());
</del><ins>+            reader = new MarcDirStreamReader(getProperty(props, &quot;marc.path&quot;).trim(), permissiveReader);
</ins><span class="cx">         }
</span><span class="cx">         else if (source.equals(&quot;Z3950&quot;))
</span><span class="cx">         {
</span><span class="lines">@@ -262,6 +264,18 @@
</span><span class="cx">                 System.out.println(&quot;Adding record &quot; + recordCounter + &quot;: &quot; + record.getControlNumber());
</span><span class="cx">                 addToIndex(record);
</span><span class="cx">             }
</span><ins>+            catch (org.apache.solr.common.SolrException e)
+            {
+               if (e.getMessage().contains(&quot;missing required fields&quot;))
+               {
+                   System.err.println(&quot;Warning : &quot; + e.getMessage()+  &quot;at record count = &quot;+ recordCounter);
+               }
+               else
+               {
+                   System.err.println(&quot;Error indexing&quot;);
+                   e.printStackTrace();
+               }
+            }
</ins><span class="cx">             catch(Exception e)
</span><span class="cx">             {
</span><span class="cx">                 // keep going?
</span><span class="lines">@@ -283,6 +297,7 @@
</span><span class="cx">         
</span><span class="cx">         // finish up
</span><span class="cx">         addcmd.doc = builder.getDoc();
</span><ins>+        
</ins><span class="cx">         if (verbose)
</span><span class="cx">         {
</span><span class="cx">             System.out.println(record.toString());
</span></span></pre></div>
<a id="blacklight_importersrcMarcPrinterjava"></a>
<div class="modfile"><h4>Modified: blacklight_importer/src/MarcPrinter.java (560 => 561)</h4>
<pre class="diff"><span>
<span class="info">--- blacklight_importer/src/MarcPrinter.java        2008-05-14 21:42:42 UTC (rev 560)
+++ blacklight_importer/src/MarcPrinter.java        2008-05-14 21:43:20 UTC (rev 561)
</span><span class="lines">@@ -16,8 +16,10 @@
</span><span class="cx"> import java.util.Set;
</span><span class="cx"> 
</span><span class="cx"> import marcoverride.MarcDirStreamReader;
</span><ins>+import marcoverride.MarcPermissiveStreamReader;
</ins><span class="cx"> 
</span><span class="cx"> import org.apache.solr.update.DeleteUpdateCommand;
</span><ins>+import org.marc4j.MarcException;
</ins><span class="cx"> import org.marc4j.MarcReader;
</span><span class="cx"> import org.marc4j.MarcStreamReader;
</span><span class="cx"> import org.marc4j.MarcStreamWriter;
</span><span class="lines">@@ -42,15 +44,16 @@
</span><span class="cx">         String fileStr = args[1];
</span><span class="cx">         File file = new File(fileStr);
</span><span class="cx">         MarcReader reader;
</span><ins>+        boolean permissiveReader = Boolean.parseBoolean(System.getProperty(&quot;marc.permissive&quot;));
</ins><span class="cx">         
</span><span class="cx">         if (file.isDirectory())
</span><span class="cx">         {
</span><del>-            reader = new MarcDirStreamReader(file);            
</del><ins>+            reader = new MarcDirStreamReader(file, permissiveReader);            
</ins><span class="cx">         }
</span><span class="cx">         else
</span><span class="cx">         {       
</span><span class="cx">             InputStream in = new FileInputStream(file);
</span><del>-            reader = new MarcStreamReader(in);
</del><ins>+            reader = new MarcPermissiveStreamReader(in, permissiveReader);
</ins><span class="cx">         }
</span><span class="cx">             
</span><span class="cx">         String marcIncludeIfPresent = System.getProperty(&quot;marc.include_if_present&quot;);
</span><span class="lines">@@ -101,20 +104,63 @@
</span><span class="cx">             map = indexer.findMap(fieldVals[3]);
</span><span class="cx">         }
</span><span class="cx"> 
</span><del>-        if (mode.equals(&quot;translate&quot;))
</del><ins>+        if (mode.equals(&quot;count&quot;))
</ins><span class="cx">         {
</span><ins>+            int count = 0;
+            while (reader.hasNext()) 
+            {
+                try {
+                    Record rec = reader.next();
+                    count++;
+                    if (count % 1000 == 0) System.err.println(count);
+                    if (verbose) System.out.println(rec.toString());
+                }
+                catch (MarcException me)
+                {
+                    System.err.println(&quot;Error reading Record &quot;+ me.getMessage());
+                }
+            }
+            System.out.println(&quot;Total records= &quot;+ count);
+        }
+        else if (mode.equals(&quot;translate&quot;))
+        {
</ins><span class="cx">             to_utf_8 = true;
</span><span class="cx">             writer = new MarcStreamWriter(System.out, &quot;UTF-8&quot;);
</span><span class="cx">             while (reader.hasNext()) 
</span><span class="cx">             {
</span><del>-                Record rec = reader.next();
-                writer.write(rec);
</del><ins>+                try {
+                    Record rec = reader.next();
+                    writer.write(rec);
+                }
+                catch (MarcException me)
+                {
+                    System.err.println(&quot;Error reading Record &quot;+ me.getMessage());
+                }
+
</ins><span class="cx">             }
</span><ins>+            writer.close();
</ins><span class="cx">         }
</span><span class="cx">         else if (mode.equals(&quot;print&quot;))
</span><span class="cx">         {
</span><span class="cx">             while (reader.hasNext()) 
</span><span class="cx">             {
</span><ins>+                try {
+                    Record rec = reader.next();
+//                    Leader ldr = rec.getLeader();
+//                    if (ldr.getBaseAddressOfData() != 0) continue;
+                    if (verbose) System.out.println(rec.toString());
+                }
+                catch (MarcException me)
+                {
+                    System.err.println(&quot;Error reading Marc Record: &quot;+ me.getMessage());                                   
+                }
+            }
+        }
+
+        else if (mode.equals(&quot;era&quot;))
+        {
+            while (reader.hasNext()) 
+            {
</ins><span class="cx">                 Record rec = reader.next();
</span><span class="cx">                 if (verbose) System.out.println(rec.toString());
</span><span class="cx">                 if (map != null)
</span></span></pre>
</div>
</div>

</body>
</html>