I know the documents I’m indexing are written in Spanish, and adding the following filters to my field definition, I believe I have resolved my problem:
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="Spanish" />
In other words, my searchable content is defined thus:
<field name=“text" type="text_general" indexed="true" stored="true" multiValued="false" />
And “text_general” is defined to include the filters in both the index and query sections:
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="Spanish" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.SnowballPorterFilterFactory" language="Spanish" />
</analyzer>
</fieldType>
|