Hi there,
I have a question about the way Lucene computes the length norm of field norm for its documents.
My documents are indexed using Solr.
These are the documents that where indexed (ignore 'score', that is not part of the document itself)
<doc>
<float name="score">1.00711</float>
<str name="_id">ejn01:2560000000075596</str>
<str name="title">Journal of neurology research</str>
</doc>
<doc>
<float name="score">1.00711</float>
<str name="_id">ejn01:954925518616</str>
<str name="title">Journal of neurology</str>
</doc>
The field "title" has the following definition in schema.xml:
<fieldType name="utf8text" class="solr.TextField" positionIncrementGap="100" omitNorms="false">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="1024"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" format="solr" ignoreCase="false" expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="1024"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" format="solr" ignoreCase="false" expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
If I use the query "journal of neurology", both documents have the same score, although the second document is more exact. Supplying a phrase query does not fix the issue. I also see that the computed fieldNorm is "0.5" for both documents. Does this have something to do with the loss of precision when storing the length norm into one byte?
These are all the supplied parameters (defaults in solrconfig.xml):
<str name="lowercaseOperators">false</str>
<str name="mm">-10%</str>
<str name="pf">author^3 title^2</str>
<str name="sort">score desc</str>
<arr name="bq">
<str>source:ser01^10</str>
<str>source:ejn01^10</str>
<str>(*:* -type:article)^999</str>
</arr>
<str name="echoParams">all</str>
<str name="df">all</str>
<str name="tie">0</str>
<str name="qf">
author^15 title^10 subject^1 summary^1 library^1 location^1 publisher^1 place_published^1 issn^1 isbn^1
</str>
<str name="q.alt">*:*</str>
<str name="ps">2</str>
<str name="defType">edismax</str>
<str name="q">journal of neurology</str>
<str name="echoParams">all</str>
<str name="sort">score desc</str>
Looking the computation of the score, I see no single difference between them (see down below)
Any idea why the fieldNorm is the same for both documents?
Thanks in advance!
Greetings,
Nicolas
<str name="ejn01:2560000000075596">
1.0071099 = (MATCH) sum of:
0.0053001107 = (MATCH) sum of:
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:journal^10.0 in 0), product of:
0.005943145 = queryWeight(title:journal^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:journal in 0), product of:
1.0 = tf(termFreq(title:journal)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=0)
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:of^10.0 in 0), product of:
0.005943145 = queryWeight(title:of^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:of in 0), product of:
1.0 = tf(termFreq(title:of)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=0)
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:neurology^10.0 in 0), product of:
0.005943145 = queryWeight(title:neurology^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:neurology in 0), product of:
1.0 = tf(termFreq(title:neurology)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=0)
0.0031800664 = (MATCH) max of:
0.0031800664 = (MATCH) weight(title:"journal of neurology"~2^2.0 in 0), product of:
0.0035658872 = queryWeight(title:"journal of neurology"~2^2.0), product of:
2.0 = boost
1.7836046 = idf(title: journal=2 of=2 neurology=2)
9.996294E-4 = queryNorm
0.8918023 = fieldWeight(title:"journal of neurology" in 0), product of:
1.0 = tf(phraseFreq=1.0)
1.7836046 = idf(title: journal=2 of=2 neurology=2)
0.5 = fieldNorm(field=title, doc=0)
0.99862975 = (MATCH) sum of:
0.99862975 = (MATCH) MatchAllDocsQuery, product of:
0.99862975 = queryNorm
</str>
<str name="ejn01:954925518616">
1.0071099 = (MATCH) sum of:
0.0053001107 = (MATCH) sum of:
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:journal^10.0 in 1), product of:
0.005943145 = queryWeight(title:journal^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:journal in 1), product of:
1.0 = tf(termFreq(title:journal)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=1)
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:of^10.0 in 1), product of:
0.005943145 = queryWeight(title:of^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:of in 1), product of:
1.0 = tf(termFreq(title:of)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=1)
0.0017667036 = (MATCH) max of:
0.0017667036 = (MATCH) weight(title:neurology^10.0 in 1), product of:
0.005943145 = queryWeight(title:neurology^10.0), product of:
10.0 = boost
0.5945349 = idf(docFreq=2, maxDocs=2)
9.996294E-4 = queryNorm
0.29726744 = (MATCH) fieldWeight(title:neurology in 1), product of:
1.0 = tf(termFreq(title:neurology)=1)
0.5945349 = idf(docFreq=2, maxDocs=2)
0.5 = fieldNorm(field=title, doc=1)
0.0031800664 = (MATCH) max of:
0.0031800664 = (MATCH) weight(title:"journal of neurology"~2^2.0 in 1), product of:
0.0035658872 = queryWeight(title:"journal of neurology"~2^2.0), product of:
2.0 = boost
1.7836046 = idf(title: journal=2 of=2 neurology=2)
9.996294E-4 = queryNorm
0.8918023 = fieldWeight(title:"journal of neurology" in 1), product of:
1.0 = tf(phraseFreq=1.0)
1.7836046 = idf(title: journal=2 of=2 neurology=2)
<b>0.5 = fieldNorm(field=title, doc=1)
0.99862975 = (MATCH) sum of:
0.99862975 = (MATCH) MatchAllDocsQuery, product of:
0.99862975 = queryNorm
</str>
|