<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre, #msg p { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[607] trunk/solr/conf/schema.xml:
schema change to use plugin and library for normalizing Unicode characters</title>
</head>
<body>
<div id="msg">
<dl>
<dt>Revision</dt> <dd>607</dd>
<dt>Author</dt> <dd>haschart</dd>
<dt>Date</dt> <dd>2008-06-18 11:50:41 -0400 (Wed, 18 Jun 2008)</dd>
</dl>
<h3>Log Message</h3>
<pre>schema change to use plugin and library for normalizing Unicode characters</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunksolrconfschemaxml">trunk/solr/conf/schema.xml</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunksolrconfschemaxml"></a>
<div class="modfile"><h4>Modified: trunk/solr/conf/schema.xml (606 => 607)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/solr/conf/schema.xml        2008-06-18 15:50:00 UTC (rev 606)
+++ trunk/solr/conf/schema.xml        2008-06-18 15:50:41 UTC (rev 607)
</span><span class="lines">@@ -1,135 +1,139 @@
</span><del>-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- This is the Solr schema file. This file should be named "schema.xml" and
- should be in the conf directory under the solr home
- (i.e. ./solr/conf/schema.xml by default)
- or located where the classloader for the Solr webapp can find it.
-
- This example schema is the recommended starting point for users.
- It should be kept correct and concise, usable out-of-the-box.
-
- For more information, on how to customize this file, please see
- http://wiki.apache.org/solr/SchemaXml
--->
-
-<schema name="solr_int" version="0.2">
- <types>
- <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="false"/>
- <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="false"/>
- <fieldType name="integer" class="solr.IntField" omitNorms="false"/>
- <fieldType name="long" class="solr.LongField" omitNorms="false"/>
- <fieldType name="float" class="solr.FloatField" omitNorms="false"/>
- <fieldType name="double" class="solr.DoubleField" omitNorms="false"/>
- <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="false"/>
- <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="false"/>
- <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="false"/>
- <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="false"/>
- <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="false"/>
- <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- </analyzer>
- </fieldType>
-
- <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
- <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
- <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
- <analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
- <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
- <analyzer>
- <tokenizer class="solr.KeywordTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory" />
- <filter class="solr.TrimFilterFactory" />
- <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replacement="" replace="all"/>
- </analyzer>
- </fieldType>
-
- <fieldtype name="text_zh" class="solr.TextField">
- <analyzer class="org.apache.lucene.analysis.cn.ChineseAnalyzer"/>
- </fieldtype>
-        
-        <fieldType name="spellText" class="solr.TextField" positionIncrementGap="100">
-                <analyzer type="index">
-                        <tokenizer class="solr.StandardTokenizerFactory"/>
-                        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
-                        <filter class="solr.StandardFilterFactory"/>
-                        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
-                </analyzer>
-                <analyzer type="query">
-                        <tokenizer class="solr.StandardTokenizerFactory"/>
-                        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-                        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
-                        <filter class="solr.StandardFilterFactory"/>
-                        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
-                </analyzer>
-        </fieldType>
-        
- </types>
-
-
- <fields>
-        <field name="id" type="string" indexed="true" stored="true" required="true" />
-        <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
-        <field name="word" type="spellText" indexed="true" stored="true" />
-        <!-- Here, default is used to create a "timestamp" field indicating When each document was indexed. -->
-        <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
-        <dynamicField name="*_text" type="text" indexed="true" stored="false" multiValued="true"/>
-        <dynamicField name="*_facet" type="string" indexed="true" stored="true" multiValued="true" omitNorms="true" />
-        <dynamicField name="*_display" type="text" indexed="false" stored="true" multiValued="true"/>
-        <dynamicField name="*_multisort_i" type="sint" indexed="true" stored="false" multiValued="true"/>
- </fields>
-
-        <uniqueKey>id</uniqueKey>
-        <defaultSearchField>text</defaultSearchField>
-        <copyField source="*_text" dest="text"/>
-        <copyField source="*_text" dest="word"/>
-        <copyField source="*_facet" dest="word"/>
-        <copyField source="*_facet" dest="text"/>
-        <solrQueryParser defaultOperator="AND"/>
-
</del><ins>+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default)
+ or located where the classloader for the Solr webapp can find it.
+
+ This example schema is the recommended starting point for users.
+ It should be kept correct and concise, usable out-of-the-box.
+
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
+-->
+
+<schema name="solr_int" version="0.2">
+ <types>
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="false"/>
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="false"/>
+ <fieldType name="integer" class="solr.IntField" omitNorms="false"/>
+ <fieldType name="long" class="solr.LongField" omitNorms="false"/>
+ <fieldType name="float" class="solr.FloatField" omitNorms="false"/>
+ <fieldType name="double" class="solr.DoubleField" omitNorms="false"/>
+ <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="false"/>
+ <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="false"/>
+ <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="false"/>
+ <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="false"/>
+ <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="false"/>
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="schema.UnicodeNormalizationFilterFactory"/>
+ <filter class="solr.ISOLatin1AccentFilterFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="schema.UnicodeNormalizationFilterFactory"/>
+ <filter class="solr.ISOLatin1AccentFilterFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+ <analyzer>
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory" />
+ <filter class="solr.TrimFilterFactory" />
+ <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replacement="" replace="all"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldtype name="text_zh" class="solr.TextField">
+ <analyzer class="org.apache.lucene.analysis.cn.ChineseAnalyzer"/>
+ </fieldtype>
+        
+        <fieldType name="spellText" class="solr.TextField" positionIncrementGap="100">
+                <analyzer type="index">
+                        <tokenizer class="solr.StandardTokenizerFactory"/>
+                        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+                        <filter class="solr.StandardFilterFactory"/>
+                        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+                </analyzer>
+                <analyzer type="query">
+                        <tokenizer class="solr.StandardTokenizerFactory"/>
+                        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+                        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+                        <filter class="solr.StandardFilterFactory"/>
+                        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+                </analyzer>
+        </fieldType>
+        
+ </types>
+
+
+ <fields>
+        <field name="id" type="string" indexed="true" stored="true" required="true" />
+        <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
+        <field name="word" type="spellText" indexed="true" stored="true" />
+        <!-- Here, default is used to create a "timestamp" field indicating When each document was indexed. -->
+        <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
+        <dynamicField name="*_text" type="text" indexed="true" stored="false" multiValued="true"/>
+        <dynamicField name="*_facet" type="string" indexed="true" stored="true" multiValued="true" omitNorms="true" />
+        <dynamicField name="*_display" type="text" indexed="false" stored="true" multiValued="true"/>
+        <dynamicField name="*_multisort_i" type="sint" indexed="true" stored="false" multiValued="true"/>
+ </fields>
+
+        <uniqueKey>id</uniqueKey>
+        <defaultSearchField>text</defaultSearchField>
+        <copyField source="*_text" dest="text"/>
+        <copyField source="*_text" dest="word"/>
+        <copyField source="*_facet" dest="word"/>
+        <copyField source="*_facet" dest="text"/>
+        <solrQueryParser defaultOperator="AND"/>
+
</ins><span class="cx"> </schema>
</span><span class="cx">\ No newline at end of file
</span></span></pre>
</div>
</div>
</body>
</html>