root/trunk/config/bwbuild/.defaults/schema.xml

Revision 3542 (checked in by douglm, 1 year ago)

Solr config files

Line 
1 <?xml version="1.0" encoding="UTF-8" ?>
2 <!--
3  Licensed to the Apache Software Foundation (ASF) under one or more
4  contributor license agreements.  See the NOTICE file distributed with
5  this work for additional information regarding copyright ownership.
6  The ASF licenses this file to You under the Apache License, Version 2.0
7  (the "License"); you may not use this file except in compliance with
8  the License.  You may obtain a copy of the License at
9
10      http://www.apache.org/licenses/LICENSE-2.0
11
12  Unless required by applicable law or agreed to in writing, software
13  distributed under the License is distributed on an "AS IS" BASIS,
14  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  See the License for the specific language governing permissions and
16  limitations under the License.
17 -->
18
19 <!-- 
20  This is the Solr schema file. This file should be named "schema.xml" and
21  should be in the conf directory under the solr home
22  (i.e. ./solr/conf/schema.xml by default)
23  or located where the classloader for the Solr webapp can find it.
24
25  This example schema is the recommended starting point for users.
26  It should be kept correct and concise, usable out-of-the-box.
27
28  For more information, on how to customize this file, please see
29  http://wiki.apache.org/solr/SchemaXml
30
31  PERFORMANCE NOTE: this schema includes many optional features and should not
32  be used for benchmarking.  To improve performance one could
33   - set stored="false" for all fields possible (esp large fields) when you
34     only need to search on the field but don't need to return the original
35     value.
36   - set indexed="false" if you don't need to search on the field, but only
37     return the field as a result of searching on other indexed fields.
38   - remove all unneeded copyField statements
39   - for best index size and searching performance, set "index" to false
40     for all general text fields, use copyField to copy them to the
41     catchall "text" field, and use that for searching.
42   - For maximum indexing performance, use the StreamingUpdateSolrServer
43     java client.
44   - Remember to run the JVM in server mode, and use a higher logging level
45     that avoids logging every request
46 -->
47
48 <schema name="bwpublic" version="1.5">
49   <!-- attribute "name" is the name of this schema and is only used for display purposes.
50        Applications should change this to reflect the nature of the search collection.
51        version="x.y" is Solr's version number for the schema syntax and semantics.  It should
52        not normally be changed by applications.
53        1.0: multiValued attribute did not exist, all fields are multiValued by nature
54        1.1: multiValued attribute introduced, false by default
55        1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
56        1.3: removed optional field compress feature
57        1.4: default auto-phrase (QueryParser feature) to off
58        1.5: omitNorms defaults to true for primitive field types (int, float, boolean, string...)
59      -->
60
61   <types>
62     <!-- field type definitions. The "name" attribute is
63        just a label to be used by field definitions.  The "class"
64        attribute and any other attributes determine the real
65        behavior of the fieldType.
66          Class names starting with "solr" refer to java classes in the
67        org.apache.solr.analysis package.
68     -->
69
70     <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
71     <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
72
73     <!-- boolean type: "true" or "false" -->
74     <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
75     <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
76     <fieldtype name="binary" class="solr.BinaryField"/>
77
78     <!-- The optional sortMissingLast and sortMissingFirst attributes are
79          currently supported on types that are sorted internally as strings
80          and on numeric types.
81                This includes "string","boolean", and, as of 3.5 (and 4.x),
82                int, float, long, date, double, including the "Trie" variants.
83        - If sortMissingLast="true", then a sort on this field will cause documents
84          without the field to come after documents with the field,
85          regardless of the requested sort order (asc or desc).
86        - If sortMissingFirst="true", then a sort on this field will cause documents
87          without the field to come before documents with the field,
88          regardless of the requested sort order.
89        - If sortMissingLast="false" and sortMissingFirst="false" (the default),
90          then default lucene sorting will be used which places docs without the
91          field first in an ascending sort and last in a descending sort.
92     -->   
93
94     <!--
95       Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
96     -->
97     <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
98     <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
99     <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
100     <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
101
102     <!--
103      Numeric field types that index each value at various levels of precision
104      to accelerate range queries when the number of values between the range
105      endpoints is large. See the javadoc for NumericRangeQuery for internal
106      implementation details.
107
108      Smaller precisionStep values (specified in bits) will lead to more tokens
109      indexed per value, slightly larger index size, and faster range queries.
110      A precisionStep of 0 disables indexing at different precision levels.
111     -->
112     <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
113     <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
114     <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
115     <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
116
117     <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
118          is a more restricted form of the canonical representation of dateTime
119          http://www.w3.org/TR/xmlschema-2/#dateTime   
120          The trailing "Z" designates UTC time and is mandatory.
121          Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
122          All other components are mandatory.
123
124          Expressions can also be used to denote calculations that should be
125          performed relative to "NOW" to determine the value, ie...
126
127                NOW/HOUR
128                   ... Round to the start of the current hour
129                NOW-1DAY
130                   ... Exactly 1 day prior to now
131                NOW/DAY+6MONTHS+3DAYS
132                   ... 6 months and 3 days in the future from the start of
133                       the current day
134                      
135          Consult the DateField javadocs for more information.
136
137          Note: For faster range queries, consider the tdate type
138       -->
139     <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
140
141     <!-- A Trie based date field for faster date range queries and date faceting. -->
142     <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
143
144
145     <!--
146       Note:
147       These should only be used for compatibility with existing indexes (created with older Solr versions)
148       or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
149
150       Plain numeric field types that store and index the text
151       value verbatim (and hence don't support range queries, since the
152       lexicographic ordering isn't equal to the numeric ordering)
153     <fieldType name="pint" class="solr.IntField"/>
154     <fieldType name="plong" class="solr.LongField"/>
155     <fieldType name="pfloat" class="solr.FloatField"/>
156     <fieldType name="pdouble" class="solr.DoubleField"/>
157     <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
158     -->
159
160
161     <!--
162       Note:
163       These should only be used for compatibility with existing indexes (created with older Solr versions).
164       Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
165
166       Numeric field types that manipulate the value into
167       a string value that isn't human-readable in its internal form,
168       but with a lexicographic ordering the same as the numeric ordering,
169       so that range queries work correctly.
170     <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
171     <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
172     <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
173     <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
174     -->
175
176
177     <!-- The "RandomSortField" is not used to store or search any
178          data.  You can declare fields of this type it in your schema
179          to generate pseudo-random orderings of your docs for sorting
180          purposes.  The ordering is generated based on the field name
181          and the version of the index, As long as the index version
182          remains unchanged, and the same field name is reused,
183          the ordering of the docs will be consistent. 
184          If you want different psuedo-random orderings of documents,
185          for the same version of the index, use a dynamicField and
186          change the name
187      -->
188     <fieldType name="random" class="solr.RandomSortField" indexed="true" />
189
190     <!-- solr.TextField allows the specification of custom text analyzers
191          specified as a tokenizer and a list of token filters. Different
192          analyzers may be specified for indexing and querying.
193
194          The optional positionIncrementGap puts space between multiple fields of
195          this type on the same document, with the purpose of preventing false phrase
196          matching across fields.
197
198          For more info on customizing your analyzer chain, please see
199          http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
200      -->
201
202     <!-- One can also specify an existing Analyzer class that has a
203          default constructor via the class attribute on the analyzer element
204     <fieldType name="text_greek" class="solr.TextField">
205       <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
206     </fieldType>
207     -->
208
209     <!-- A text field that only splits on whitespace for exact matching of words -->
210     <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
211       <analyzer>
212         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
213       </analyzer>
214     </fieldType>
215
216     <!-- A general text field that has reasonable, generic
217          cross-language defaults: it tokenizes with StandardTokenizer,
218          removes stop words from case-insensitive "stopwords.txt"
219          (empty by default), and down cases.  At query time only, it
220          also applies synonyms. -->
221     <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
222       <analyzer type="index">
223         <tokenizer class="solr.StandardTokenizerFactory"/>
224         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
225         <!-- in this example, we will only use synonyms at query time
226         <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
227         -->
228         <filter class="solr.LowerCaseFilterFactory"/>
229       </analyzer>
230       <analyzer type="query">
231         <tokenizer class="solr.StandardTokenizerFactory"/>
232         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
233         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
234         <filter class="solr.LowerCaseFilterFactory"/>
235       </analyzer>
236     </fieldType>
237
238     <!-- A text field with defaults appropriate for English: it
239          tokenizes with StandardTokenizer, removes English stop words
240          (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
241          finally applies Porter's stemming.  The query time analyzer
242          also applies synonyms from synonyms.txt. -->
243     <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
244       <analyzer type="index">
245         <tokenizer class="solr.StandardTokenizerFactory"/>
246         <!-- in this example, we will only use synonyms at query time
247         <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
248         -->
249         <!-- Case insensitive stop word removal.
250           add enablePositionIncrements=true in both the index and query
251           analyzers to leave a 'gap' for more accurate phrase queries.
252         -->
253         <filter class="solr.StopFilterFactory"
254                 ignoreCase="true"
255                 words="lang/stopwords_en.txt"
256                 enablePositionIncrements="true"
257                 />
258         <filter class="solr.LowerCaseFilterFactory"/>
259         <filter class="solr.EnglishPossessiveFilterFactory"/>
260         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
261         <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
262         <filter class="solr.EnglishMinimalStemFilterFactory"/>
263         -->
264         <filter class="solr.PorterStemFilterFactory"/>
265       </analyzer>
266       <analyzer type="query">
267         <tokenizer class="solr.StandardTokenizerFactory"/>
268         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
269         <filter class="solr.StopFilterFactory"
270                 ignoreCase="true"
271                 words="lang/stopwords_en.txt"
272                 enablePositionIncrements="true"
273                 />
274         <filter class="solr.LowerCaseFilterFactory"/>
275         <filter class="solr.EnglishPossessiveFilterFactory"/>
276         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
277         <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
278         <filter class="solr.EnglishMinimalStemFilterFactory"/>
279         -->
280         <filter class="solr.PorterStemFilterFactory"/>
281       </analyzer>
282     </fieldType>
283
284     <!-- A text field with defaults appropriate for English, plus
285          aggressive word-splitting and autophrase features enabled.
286          This field is just like text_en, except it adds
287          WordDelimiterFilter to enable splitting and matching of
288          words on case-change, alpha numeric boundaries, and
289          non-alphanumeric chars.  This means certain compound word
290          cases will work, for example query "wi fi" will match
291          document "WiFi" or "wi-fi".  However, other cases will still
292          not match, for example if the query is "wifi" and the
293          document is "wi fi" or if the query is "wi-fi" and the
294          document is "wifi".
295         -->
296     <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
297       <analyzer type="index">
298         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
299         <!-- in this example, we will only use synonyms at query time
300         <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
301         -->
302         <!-- Case insensitive stop word removal.
303           add enablePositionIncrements=true in both the index and query
304           analyzers to leave a 'gap' for more accurate phrase queries.
305         -->
306         <filter class="solr.StopFilterFactory"
307                 ignoreCase="true"
308                 words="lang/stopwords_en.txt"
309                 enablePositionIncrements="true"
310                 />
311         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
312         <filter class="solr.LowerCaseFilterFactory"/>
313         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
314         <filter class="solr.PorterStemFilterFactory"/>
315       </analyzer>
316       <analyzer type="query">
317         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
318         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
319         <filter class="solr.StopFilterFactory"
320                 ignoreCase="true"
321                 words="lang/stopwords_en.txt"
322                 enablePositionIncrements="true"
323                 />
324         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
325         <filter class="solr.LowerCaseFilterFactory"/>
326         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
327         <filter class="solr.PorterStemFilterFactory"/>
328       </analyzer>
329     </fieldType>
330
331     <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
332          but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
333     <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
334       <analyzer>
335         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
336         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
337         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
338         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
339         <filter class="solr.LowerCaseFilterFactory"/>
340         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
341         <filter class="solr.EnglishMinimalStemFilterFactory"/>
342         <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
343              possible with WordDelimiterFilter in conjuncton with stemming. -->
344         <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
345       </analyzer>
346     </fieldType>
347
348     <!-- Just like text_general except it reverses the characters of
349          each token, to enable more efficient leading wildcard queries. -->
350     <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
351       <analyzer type="index">
352         <tokenizer class="solr.StandardTokenizerFactory"/>
353         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
354         <filter class="solr.LowerCaseFilterFactory"/>
355         <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
356            maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
357       </analyzer>
358       <analyzer type="query">
359         <tokenizer class="solr.StandardTokenizerFactory"/>
360         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
361         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
362         <filter class="solr.LowerCaseFilterFactory"/>
363       </analyzer>
364     </fieldType>
365
366     <!-- charFilter + WhitespaceTokenizer  -->
367     <!--
368     <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
369       <analyzer>
370         <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
371         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
372       </analyzer>
373     </fieldType>
374     -->
375
376     <!-- This is an example of using the KeywordTokenizer along
377          With various TokenFilterFactories to produce a sortable field
378          that does not include some properties of the source text
379       -->
380     <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
381       <analyzer>
382         <!-- KeywordTokenizer does no actual tokenizing, so the entire
383              input string is preserved as a single token
384           -->
385         <tokenizer class="solr.KeywordTokenizerFactory"/>
386         <!-- The LowerCase TokenFilter does what you expect, which can be
387              when you want your sorting to be case insensitive
388           -->
389         <filter class="solr.LowerCaseFilterFactory" />
390         <!-- The TrimFilter removes any leading or trailing whitespace -->
391         <filter class="solr.TrimFilterFactory" />
392         <!-- The PatternReplaceFilter gives you the flexibility to use
393              Java Regular expression to replace any sequence of characters
394              matching a pattern with an arbitrary replacement string,
395              which may include back references to portions of the original
396              string matched by the pattern.
397              
398              See the Java Regular Expression documentation for more
399              information on pattern and replacement string syntax.
400              
401              http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
402           -->
403         <filter class="solr.PatternReplaceFilterFactory"
404                 pattern="([^a-z])" replacement="" replace="all"
405         />
406       </analyzer>
407     </fieldType>
408    
409     <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
410       <analyzer>
411         <tokenizer class="solr.StandardTokenizerFactory"/>
412         <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
413       </analyzer>
414     </fieldtype>
415
416     <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
417       <analyzer>
418         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
419         <!--
420         The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
421         a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
422         Attributes of the DelimitedPayloadTokenFilterFactory :
423          "delimiter" - a one character delimiter. Default is | (pipe)
424          "encoder" - how to encode the following value into a playload
425             float -> org.apache.lucene.analysis.payloads.FloatEncoder,
426             integer -> o.a.l.a.p.IntegerEncoder
427             identity -> o.a.l.a.p.IdentityEncoder
428             Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
429          -->
430         <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
431       </analyzer>
432     </fieldtype>
433
434     <!-- lowercases the entire field value, keeping it as a single token.  -->
435     <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
436       <analyzer>
437         <tokenizer class="solr.KeywordTokenizerFactory"/>
438         <filter class="solr.LowerCaseFilterFactory" />
439       </analyzer>
440     </fieldType>
441
442     <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
443       <analyzer>
444         <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
445       </analyzer>
446     </fieldType>
447    
448
449     <!-- since fields of this type are by default not stored or indexed,
450          any data added to them will be ignored outright.  -->
451     <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
452
453     <!-- This point type indexes the coordinates as separate fields (subFields)
454       If subFieldType is defined, it references a type, and a dynamic field
455       definition is created matching *___<typename>.  Alternately, if
456       subFieldSuffix is defined, that is used to create the subFields.
457       Example: if subFieldType="double", then the coordinates would be
458         indexed in fields myloc_0___double,myloc_1___double.
459       Example: if subFieldSuffix="_d" then the coordinates would be indexed
460         in fields myloc_0_d,myloc_1_d
461       The subFields are an implementation detail of the fieldType, and end
462       users normally should not need to know about them.
463      -->
464     <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
465
466     <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
467     <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
468
469    <!--
470     A Geohash is a compact representation of a latitude longitude pair in a single field.
471     See http://wiki.apache.org/solr/SpatialSearch
472    -->
473     <fieldtype name="geohash" class="solr.GeoHashField"/>
474    <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
475         Parameters:
476           defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
477           precisionStep:   Specifies the precisionStep for the TrieLong field used for the amount
478           providerClass:   Lets you plug in other exchange provider backend:
479                            solr.FileExchangeRateProvider is the default and takes one parameter:
480                              currencyConfig: name of an xml file holding exhange rates
481    -->
482     <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
483              
484    <!-- some examples for different languages (generally ordered by ISO code) -->
485
486     <!-- Arabic -->
487     <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
488       <analyzer>
489         <tokenizer class="solr.StandardTokenizerFactory"/>
490         <!-- for any non-arabic -->
491         <filter class="solr.LowerCaseFilterFactory"/>
492         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/>
493         <!-- normalizes ﻯ to ﻱ, etc -->
494         <filter class="solr.ArabicNormalizationFilterFactory"/>
495         <filter class="solr.ArabicStemFilterFactory"/>
496       </analyzer>
497     </fieldType>
498
499     <!-- Bulgarian -->
500     <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
501       <analyzer>
502         <tokenizer class="solr.StandardTokenizerFactory"/>
503         <filter class="solr.LowerCaseFilterFactory"/>
504         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/>
505         <filter class="solr.BulgarianStemFilterFactory"/>       
506       </analyzer>
507     </fieldType>
508    
509     <!-- Catalan -->
510     <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
511       <analyzer>
512         <tokenizer class="solr.StandardTokenizerFactory"/>
513         <!-- removes l', etc -->
514         <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
515         <filter class="solr.LowerCaseFilterFactory"/>
516         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/>
517         <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>       
518       </analyzer>
519     </fieldType>
520    
521     <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
522     <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
523       <analyzer>
524         <tokenizer class="solr.StandardTokenizerFactory"/>
525         <!-- normalize width before bigram, as e.g. half-width dakuten combine  -->
526         <filter class="solr.CJKWidthFilterFactory"/>
527         <!-- for any non-CJK -->
528         <filter class="solr.LowerCaseFilterFactory"/>
529         <filter class="solr.CJKBigramFilterFactory"/>
530       </analyzer>
531     </fieldType>
532
533     <!-- Czech -->
534     <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
535       <analyzer>
536         <tokenizer class="solr.StandardTokenizerFactory"/>
537         <filter class="solr.LowerCaseFilterFactory"/>
538         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/>
539         <filter class="solr.CzechStemFilterFactory"/>       
540       </analyzer>
541     </fieldType>
542    
543     <!-- Danish -->
544     <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
545       <analyzer>
546         <tokenizer class="solr.StandardTokenizerFactory"/>
547         <filter class="solr.LowerCaseFilterFactory"/>
548         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/>
549         <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>       
550       </analyzer>
551     </fieldType>
552    
553     <!-- German -->
554     <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
555       <analyzer>
556         <tokenizer class="solr.StandardTokenizerFactory"/>
557         <filter class="solr.LowerCaseFilterFactory"/>
558         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/>
559         <filter class="solr.GermanNormalizationFilterFactory"/>
560         <filter class="solr.GermanLightStemFilterFactory"/>
561         <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
562         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
563       </analyzer>
564     </fieldType>
565    
566     <!-- Greek -->
567     <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
568       <analyzer>
569         <tokenizer class="solr.StandardTokenizerFactory"/>
570         <!-- greek specific lowercase for sigma -->
571         <filter class="solr.GreekLowerCaseFilterFactory"/>
572         <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
573         <filter class="solr.GreekStemFilterFactory"/>
574       </analyzer>
575     </fieldType>
576    
577     <!-- Spanish -->
578     <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
579       <analyzer>
580         <tokenizer class="solr.StandardTokenizerFactory"/>
581         <filter class="solr.LowerCaseFilterFactory"/>
582         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/>
583         <filter class="solr.SpanishLightStemFilterFactory"/>
584         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
585       </analyzer>
586     </fieldType>
587    
588     <!-- Basque -->
589     <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
590       <analyzer>
591         <tokenizer class="solr.StandardTokenizerFactory"/>
592         <filter class="solr.LowerCaseFilterFactory"/>
593         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/>
594         <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
595       </analyzer>
596     </fieldType>
597    
598     <!-- Persian -->
599     <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
600       <analyzer>
601         <!-- for ZWNJ -->
602         <charFilter class="solr.PersianCharFilterFactory"/>
603         <tokenizer class="solr.StandardTokenizerFactory"/>
604         <filter class="solr.LowerCaseFilterFactory"/>
605         <filter class="solr.ArabicNormalizationFilterFactory"/>
606         <filter class="solr.PersianNormalizationFilterFactory"/>
607         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/>
608       </analyzer>
609     </fieldType>
610    
611     <!-- Finnish -->
612     <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
613       <analyzer>
614         <tokenizer class="solr.StandardTokenizerFactory"/>
615         <filter class="solr.LowerCaseFilterFactory"/>
616         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/>
617         <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
618         <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
619       </analyzer>
620     </fieldType>
621    
622     <!-- French -->
623     <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
624       <analyzer>
625         <tokenizer class="solr.StandardTokenizerFactory"/>
626         <!-- removes l', etc -->
627         <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
628         <filter class="solr.LowerCaseFilterFactory"/>
629         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/>
630         <filter class="solr.FrenchLightStemFilterFactory"/>
631         <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
632         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
633       </analyzer>
634     </fieldType>
635    
636     <!-- Irish -->
637     <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
638       <analyzer>
639         <tokenizer class="solr.StandardTokenizerFactory"/>
640         <!-- removes d', etc -->
641         <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
642         <!-- removes n-, etc. position increments is intentionally false! -->
643         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
644         <filter class="solr.IrishLowerCaseFilterFactory"/>
645         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
646         <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
647       </analyzer>
648     </fieldType>
649    
650     <!-- Galician -->
651     <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
652       <analyzer>
653         <tokenizer class="solr.StandardTokenizerFactory"/>
654         <filter class="solr.LowerCaseFilterFactory"/>
655         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/>
656         <filter class="solr.GalicianStemFilterFactory"/>
657         <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
658       </analyzer>
659     </fieldType>
660    
661     <!-- Hindi -->
662     <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
663       <analyzer>
664         <tokenizer class="solr.StandardTokenizerFactory"/>
665         <filter class="solr.LowerCaseFilterFactory"/>
666         <!-- normalizes unicode representation -->
667         <filter class="solr.IndicNormalizationFilterFactory"/>
668         <!-- normalizes variation in spelling -->
669         <filter class="solr.HindiNormalizationFilterFactory"/>
670         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/>
671         <filter class="solr.HindiStemFilterFactory"/>
672       </analyzer>
673     </fieldType>
674    
675     <!-- Hungarian -->
676     <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
677       <analyzer>
678         <tokenizer class="solr.StandardTokenizerFactory"/>
679         <filter class="solr.LowerCaseFilterFactory"/>
680         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/>
681         <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
682         <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->   
683       </analyzer>
684     </fieldType>
685    
686     <!-- Armenian -->
687     <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
688       <analyzer>
689         <tokenizer class="solr.StandardTokenizerFactory"/>
690         <filter class="solr.LowerCaseFilterFactory"/>
691         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/>
692         <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
693       </analyzer>
694     </fieldType>
695    
696     <!-- Indonesian -->
697     <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
698       <analyzer>
699         <tokenizer class="solr.StandardTokenizerFactory"/>
700         <filter class="solr.LowerCaseFilterFactory"/>
701         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/>
702         <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
703         <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
704       </analyzer>
705     </fieldType>
706    
707     <!-- Italian -->
708     <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
709       <analyzer>
710         <tokenizer class="solr.StandardTokenizerFactory"/>
711         <!-- removes l', etc -->
712         <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
713         <filter class="solr.LowerCaseFilterFactory"/>
714         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/>
715         <filter class="solr.ItalianLightStemFilterFactory"/>
716         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
717       </analyzer>
718     </fieldType>
719    
720     <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
721
722          NOTE: If you want to optimize search for precision, use default operator AND in your query
723          parser config with <solrQueryParser defaultOperator="AND"/> further down in this file.  Use
724          OR if you would like to optimize for recall (default).
725     -->
726     <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
727       <analyzer>
728       <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
729
730            Kuromoji has a search mode (default) that does segmentation useful for search.  A heuristic
731            is used to segment compounds into its parts and the compound itself is kept as synonym.
732
733            Valid values for attribute mode are:
734               normal: regular segmentation
735               search: segmentation useful for search with synonyms compounds (default)
736             extended: same as search mode, but unigrams unknown words (experimental)
737
738            For some applications it might be good to use search mode for indexing and normal mode for
739            queries to reduce recall and prevent parts of compounds from being matched and highlighted.
740            Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
741
742            Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
743            model with your own entries for segmentation, part-of-speech tags and readings without a need
744            to specify weights.  Notice that user dictionaries have not been subject to extensive testing.
745
746            User dictionary attributes are:
747                      userDictionary: user dictionary filename
748              userDictionaryEncoding: user dictionary encoding (default is UTF-8)
749
750            See lang/userdict_ja.txt for a sample user dictionary file.
751
752            See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
753         -->
754         <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
755         <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
756         <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
757         <filter class="solr.JapaneseBaseFormFilterFactory"/>
758         <!-- Removes tokens with certain part-of-speech tags -->
759         <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/>
760         <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
761         <filter class="solr.CJKWidthFilterFactory"/>
762         <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
763         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />
764         <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
765         <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
766         <!-- Lower-cases romaji characters -->
767         <filter class="solr.LowerCaseFilterFactory"/>
768       </analyzer>
769     </fieldType>
770    
771     <!-- Latvian -->
772     <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
773       <analyzer>
774         <tokenizer class="solr.StandardTokenizerFactory"/>
775         <filter class="solr.LowerCaseFilterFactory"/>
776         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/>
777         <filter class="solr.LatvianStemFilterFactory"/>
778       </analyzer>
779     </fieldType>
780    
781     <!-- Dutch -->
782     <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
783       <analyzer>
784         <tokenizer class="solr.StandardTokenizerFactory"/>
785         <filter class="solr.LowerCaseFilterFactory"/>
786         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/>
787         <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
788         <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
789       </analyzer>
790     </fieldType>
791    
792     <!-- Norwegian -->
793     <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
794       <analyzer>
795         <tokenizer class="solr.StandardTokenizerFactory"/>
796         <filter class="solr.LowerCaseFilterFactory"/>
797         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/>
798         <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
799         <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
800         <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
801       </analyzer>
802     </fieldType>
803    
804     <!-- Portuguese -->
805     <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
806       <analyzer>
807         <tokenizer class="solr.StandardTokenizerFactory"/>
808         <filter class="solr.LowerCaseFilterFactory"/>
809         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/>
810         <filter class="solr.PortugueseLightStemFilterFactory"/>
811         <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
812         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
813         <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
814       </analyzer>
815     </fieldType>
816    
817     <!-- Romanian -->
818     <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
819       <analyzer>
820         <tokenizer class="solr.StandardTokenizerFactory"/>
821         <filter class="solr.LowerCaseFilterFactory"/>
822         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/>
823         <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
824       </analyzer>
825     </fieldType>
826    
827     <!-- Russian -->
828     <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
829       <analyzer>
830         <tokenizer class="solr.StandardTokenizerFactory"/>
831         <filter class="solr.LowerCaseFilterFactory"/>
832         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/>
833         <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
834         <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
835       </analyzer>
836     </fieldType>
837    
838     <!-- Swedish -->
839     <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
840       <analyzer>
841         <tokenizer class="solr.StandardTokenizerFactory"/>
842         <filter class="solr.LowerCaseFilterFactory"/>
843         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/>
844         <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
845         <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
846       </analyzer>
847     </fieldType>
848    
849     <!-- Thai -->
850     <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
851       <analyzer>
852         <tokenizer class="solr.StandardTokenizerFactory"/>
853         <filter class="solr.LowerCaseFilterFactory"/>
854         <filter class="solr.ThaiWordFilterFactory"/>
855         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/>
856       </analyzer>
857     </fieldType>
858    
859     <!-- Turkish -->
860     <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
861       <analyzer>
862         <tokenizer class="solr.StandardTokenizerFactory"/>
863         <filter class="solr.TurkishLowerCaseFilterFactory"/>
864         <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/>
865         <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
866       </analyzer>
867     </fieldType>
868  </types>
869
870
871  <fields>
872    <!-- Valid attributes for fields:
873      name: mandatory - the name for the field
874      type: mandatory - the name of a previously defined type from the
875        <types> section
876      indexed: true if this field should be indexed (searchable or sortable)
877      stored: true if this field should be retrievable
878      multiValued: true if this field may contain multiple values per document
879      omitNorms: (expert) set to true to omit the norms associated with
880        this field (this disables length normalization and index-time
881        boosting for the field, and saves some memory).  Only full-text
882        fields or fields that need an index-time boost need norms.
883        Norms are omitted for primitive (non-analyzed) types by default.
884      termVectors: [false] set to true to store the term vector for a
885        given field.
886        When using MoreLikeThis, fields used for similarity should be
887        stored for best performance.
888      termPositions: Store position information with the term vector. 
889        This will increase storage costs.
890      termOffsets: Store offset information with the term vector. This
891        will increase storage costs.
892      default: a value that should be used if no value is specified
893        when adding a document.
894    -->
895
896    <!-- key is a manufactured multi-part string - will be unique -->
897    <field name="key" type="string" indexed="true" stored="true" required="true" />
898
899    <!-- type of the indexed entity - related to class of object calendar/event etc-->
900    <field name="itemType" type="string" indexed="true" stored="true" required="true" />
901
902    <!-- type of an indexed event class - may be task/event etc-->
903    <field name="eventType" type="string" indexed="true" stored="true" />
904
905    <!-- name of the indexed entity -->
906    <field name="name" type="string" indexed="true" stored="true" required="true" />
907
908    <!-- path to the indexed entity -->
909    <field name="path" type="string" indexed="true" stored="true" required="true" />
910
911    <!--  Event/todo/journal fields  -->
912
913    <field name="uid" type="string" stored="true" />
914
915    <!-- start_utc and end_utc define the range
916         start_local is the local time in the entity - may be null
917         start_tzid is the timezone - may also be null
918         start_floating is the float flag
919      -->
920    <field name="start_utc" type="date" indexed="true" stored="true" />
921    <field name="start_local" type="string" stored="true" />
922    <field name="start_tzid" type="string" stored="true" />
923    <field name="start_floating" type="boolean" stored="true" />
924
925    <field name="end_utc" type="date" indexed="true" stored="true" />
926    <field name="end_local" type="string" stored="true" />
927    <field name="end_tzid" type="string" stored="true" />
928    <field name="end_floating" type="boolean" stored="true" />
929
930    <field name="start_present" type="boolean" stored="true" />
931    <field name="end_type" type="string" stored="true" />
932
933    <field name="recurrenceid" type="string" stored="true" />
934
935    <field name="comment" type="text_general" indexed="true" stored="true"/>
936    <field name="contact" type="text_general" indexed="true" stored="true"/>
937
938    <field name="cost" type="string" stored="true"/>
939    <field name="image_url" type="string" stored="true"/>
940    <field name="thumb_image_url" type="string" stored="true"/>
941
942    <field name="location_str" type="text_general" indexed="true" stored="true"/>
943    <field name="location_uid" type="string" indexed="true" stored="true"/>
944    <field name="location" type="location" indexed="true" stored="true"/>
945
946    <field name="resources" type="text_general" indexed="true" stored="true" multiValued="true"/>
947    <field name="topical_area" type="string" indexed="true" stored="true" multiValued="true"/>
948
949    <!--  Common fields  -->
950
951    <field name="summary" type="text_general" indexed="true" stored="true"/>
952    <field name="description" type="text_general" indexed="true" stored="true"/>
953
954    <field name="created" type="date" indexed="true" stored="true"/>
955    <field name="last_modified" type="date" indexed="true" stored="true"/>
956    <field name="category" type="text_general" indexed="true" stored="true" multiValued="true"/>
957    <field name="category_uid" type="string" indexed="true" multiValued="true"/>
958
959    <field name="creator" type="string" indexed="true" stored="true" required="true" />
960    <field name="owner" type="string" indexed="true" stored="true" required="true" />
961
962    <!-- catchall field, containing all other searchable text fields (implemented
963         via copyField further on in this schema  -->
964    <field name="default" type="text_general" indexed="true" stored="false" multiValued="true"/>
965
966    <!-- catchall text field that indexes tokens both normally and in reverse for efficient
967         leading wildcard queries. -->
968    <field name="default_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
969
970    <!-- non-tokenized version of manufacturer to make it easier to sort or group
971         results by manufacturer.  copied from "manu" via copyField
972    <field name="manu_exact" type="string" indexed="true" stored="false"/>
973     -->
974  
975    <!-- A "timestamp" field using
976         a default value of "NOW" to indicate when each document was indexed.
977      -->
978    <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
979
980    <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
981         will be used if the name matches any of the patterns.
982         RESTRICTION: the glob-like pattern in the name attribute must have
983         a "*" only at the start or the end.
984         EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
985         Longer patterns will be matched first.  if equal size patterns
986         both match, the first appearing in the schema will be used.  -->
987    <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
988    <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
989    <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
990    <dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/>
991    <dynamicField name="*_txt" type="text_general"    indexed="true"  stored="true" multiValued="true"/>
992    <dynamicField name="*_en"  type="text_en"    indexed="true"  stored="true" multiValued="true" />
993    <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
994    <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
995    <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
996
997    <!-- Type used to index the lat and lon components for the "location" FieldType -->
998    <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false"/>
999
1000    <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
1001    <dynamicField name="*_p"  type="location" indexed="true" stored="true"/>
1002
1003    <!-- some trie-coded dynamic fields for faster range queries -->
1004    <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
1005    <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
1006    <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
1007    <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
1008    <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
1009    
1010    <!-- <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/> -->
1011    <dynamicField name="*_c"   type="currency" indexed="true"  stored="true"/>
1012
1013    <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
1014    <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
1015
1016    <dynamicField name="random_*" type="random" />
1017
1018    <!-- uncomment the following to ignore any fields that don't already match an existing
1019         field name or dynamic field, rather than reporting them as an error.
1020         alternately, change the type="ignored" to some other type e.g. "text" if you want
1021         unknown fields indexed and/or stored by default -->
1022    <!--dynamicField name="*" type="ignored" multiValued="true" /-->
1023    
1024  </fields>
1025
1026  <!-- Field to use to determine and enforce document uniqueness.
1027       Unless this field is marked with required="false", it will be a required field
1028    -->
1029  <uniqueKey>key</uniqueKey>
1030
1031   <!-- field for the QueryParser to use when an explicit fieldname is absent
1032  DEPRECATED: specify "df" in your request handler instead.
1033  <defaultSearchField>text</defaultSearchField> -->
1034
1035   <!-- SolrQueryParser configuration: defaultOperator="AND|OR"
1036  DEPRECATED: specify "q.op" in your request handler to switch from the default of OR.
1037  <solrQueryParser defaultOperator="OR"/> -->
1038
1039   <!-- copyField commands copy one field to another at the time a document
1040         is added to the index.  It's used either to index the same field differently,
1041         or to add multiple fields to the same field for easier/faster searching.  -->
1042
1043    <copyField source="comment" dest="default"/>
1044    <copyField source="contact" dest="default"/>
1045    <copyField source="location_str" dest="default"/>
1046    <copyField source="resources" dest="default"/>
1047    <copyField source="summary" dest="default"/>
1048    <copyField source="description" dest="default"/>
1049
1050    <!-- Above, multiple source fields are copied to the [text] field.
1051           Another way to map multiple source fields to the same
1052           destination field is to use the dynamic field syntax.
1053           copyField also supports a maxChars to copy setting.  -->
1054            
1055    <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
1056
1057    <!-- copy name to alphaNameSort, a field designed for sorting by name -->
1058    <!-- <copyField source="name" dest="alphaNameSort"/> -->
1059  
1060
1061  <!-- Similarity is the scoring routine for each document vs. a query.
1062       A custom similarity may be specified here, but the default is fine
1063       for most applications.  -->
1064  <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
1065  <!-- ... OR ...
1066       Specify a SimilarityFactory class name implementation
1067       allowing parameters to be used.
1068  -->
1069  <!--
1070  <similarity class="com.example.solr.CustomSimilarityFactory">
1071    <str name="paramkey">param value</str>
1072  </similarity>
1073  -->
1074
1075
1076 </schema>
Note: See TracBrowser for help on using the browser.