From d83b1300a8b469fa19e5fd9ae8264f6fa448bb18 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Tue, 21 Nov 2017 14:02:09 -0500 Subject: [PATCH 01/25] Makes QueryBuilder synonym matching configurable --- .../org/apache/lucene/util/QueryBuilder.java | 28 ++++++++++++++ .../apache/lucene/util/TestQueryBuilder.java | 37 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index f077bfd642d9..503c82b5d6cf 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; @@ -62,6 +63,15 @@ public class QueryBuilder { protected boolean enablePositionIncrements = true; protected boolean enableGraphQueries = true; protected boolean autoGenerateMultiTermSynonymsPhraseQuery = false; + protected SYN_MATCH_TYPE synQueryType = SYN_MATCH_TYPE.BLENDED; + + public static enum SYN_MATCH_TYPE { + BLENDED, /* default, blends doc freq for terms in same posn: SynonymQuery(A B)*/ + BEST_SYN, /* picks dismax over synonyms, choosing best scoring per doc: (A | B) */ + MOST_SYN /* picks combination (A OR B). The more synonyms the better*/ + } + + protected boolean blendSynonyms = true; /** Creates a new QueryBuilder using the given analyzer. */ public QueryBuilder(Analyzer analyzer) { @@ -258,6 +268,10 @@ public boolean getEnableGraphQueries() { return enableGraphQueries; } + public void setSynQueryType(SYN_MATCH_TYPE v) { synQueryType = v;} + + public SYN_MATCH_TYPE getSynQueryType() { return synQueryType;} + /** * Creates a query from a token stream. * @@ -621,6 +635,20 @@ protected BooleanQuery.Builder newBooleanQuery() { * @return new Query instance */ protected Query newSynonymQuery(Term terms[]) { + if (synQueryType == SYN_MATCH_TYPE.BEST_SYN) { + List currPosnClauses = new ArrayList(); + for (Term term : terms) { + currPosnClauses.add(newTermQuery(term)); + } + DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f); + return dm; + } else if (synQueryType == SYN_MATCH_TYPE.MOST_SYN) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Term term : terms) { + builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD); + } + return builder.build(); + } return new SynonymQuery(terms); } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java index fece16697cf4..25abc71515e4 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java @@ -18,6 +18,8 @@ import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; @@ -31,6 +33,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; @@ -144,6 +147,40 @@ public void testSynonyms() throws Exception { assertEquals(expected, builder.createBooleanQuery("field", "dogs", BooleanClause.Occur.MUST)); assertEquals(expected, builder.createPhraseQuery("field", "dogs")); } + + + /** synonym dismax expansion instead of blended */ + public void testBestSynonyms() throws Exception { + List queries = new ArrayList(); + queries.add(new TermQuery(new Term("field", "dogs"))); + queries.add(new TermQuery(new Term("field", "dog"))); + + DisjunctionMaxQuery expected = new DisjunctionMaxQuery(queries, 0.0f); + QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); + builder.setSynQueryType(QueryBuilder.SYN_MATCH_TYPE.BEST_SYN); + + assertEquals(expected, builder.createBooleanQuery("field", "dogs")); + assertEquals(expected, builder.createPhraseQuery("field", "dogs")); + assertEquals(expected, builder.createBooleanQuery("field", "dogs", BooleanClause.Occur.MUST)); + assertEquals(expected, builder.createPhraseQuery("field", "dogs")); + } + + + /** synonym boolean expansion instead of blended */ + public void testMostSynonyms() throws Exception { + Query expected = new BooleanQuery.Builder() + .add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD) + .build(); + + QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); + builder.setSynQueryType(QueryBuilder.SYN_MATCH_TYPE.MOST_SYN); + + assertEquals(expected, builder.createBooleanQuery("field", "dogs")); + assertEquals(expected, builder.createPhraseQuery("field", "dogs")); + assertEquals(expected, builder.createBooleanQuery("field", "dogs", BooleanClause.Occur.MUST)); + assertEquals(expected, builder.createPhraseQuery("field", "dogs")); + } /** forms multiphrase query */ public void testSynonymsPhrase() throws Exception { From f279435b46f81232181a658be5e856bdbca9924f Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Tue, 21 Nov 2017 15:03:54 -0500 Subject: [PATCH 02/25] plumb through the field type setting --- .../org/apache/lucene/util/QueryBuilder.java | 8 +++--- .../apache/lucene/util/TestQueryBuilder.java | 4 +-- .../org/apache/solr/parser/QueryParser.java | 5 ++-- .../solr/parser/SolrQueryParserBase.java | 28 +++++++++++++++---- .../org/apache/solr/schema/FieldType.java | 3 ++ .../org/apache/solr/schema/TextField.java | 9 ++++++ .../solr/search/ExtendedDismaxQParser.java | 5 ++-- .../solr/search/TestExtendedDismaxParser.java | 5 ++-- 8 files changed, 49 insertions(+), 18 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index 503c82b5d6cf..05c59f7c2b31 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -67,8 +67,8 @@ public class QueryBuilder { public static enum SYN_MATCH_TYPE { BLENDED, /* default, blends doc freq for terms in same posn: SynonymQuery(A B)*/ - BEST_SYN, /* picks dismax over synonyms, choosing best scoring per doc: (A | B) */ - MOST_SYN /* picks combination (A OR B). The more synonyms the better*/ + BEST, /* picks dismax over synonyms, choosing best scoring per doc: (A | B) */ + MOST /* picks combination (A OR B). The more synonyms the better*/ } protected boolean blendSynonyms = true; @@ -635,14 +635,14 @@ protected BooleanQuery.Builder newBooleanQuery() { * @return new Query instance */ protected Query newSynonymQuery(Term terms[]) { - if (synQueryType == SYN_MATCH_TYPE.BEST_SYN) { + if (synQueryType == SYN_MATCH_TYPE.BEST) { List currPosnClauses = new ArrayList(); for (Term term : terms) { currPosnClauses.add(newTermQuery(term)); } DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f); return dm; - } else if (synQueryType == SYN_MATCH_TYPE.MOST_SYN) { + } else if (synQueryType == SYN_MATCH_TYPE.MOST) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (Term term : terms) { builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java index 25abc71515e4..67ce75f63a9a 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java @@ -157,7 +157,7 @@ public void testBestSynonyms() throws Exception { DisjunctionMaxQuery expected = new DisjunctionMaxQuery(queries, 0.0f); QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); - builder.setSynQueryType(QueryBuilder.SYN_MATCH_TYPE.BEST_SYN); + builder.setSynQueryType(QueryBuilder.SYN_MATCH_TYPE.BEST); assertEquals(expected, builder.createBooleanQuery("field", "dogs")); assertEquals(expected, builder.createPhraseQuery("field", "dogs")); @@ -174,7 +174,7 @@ public void testMostSynonyms() throws Exception { .build(); QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); - builder.setSynQueryType(QueryBuilder.SYN_MATCH_TYPE.MOST_SYN); + builder.setSynQueryType(QueryBuilder.SYN_MATCH_TYPE.MOST); assertEquals(expected, builder.createBooleanQuery("field", "dogs")); assertEquals(expected, builder.createPhraseQuery("field", "dogs")); diff --git a/solr/core/src/java/org/apache/solr/parser/QueryParser.java b/solr/core/src/java/org/apache/solr/parser/QueryParser.java index 50e36dd916af..34e444316efc 100644 --- a/solr/core/src/java/org/apache/solr/parser/QueryParser.java +++ b/solr/core/src/java/org/apache/solr/parser/QueryParser.java @@ -52,13 +52,14 @@ private static boolean allowedPostMultiTerm(int tokenKind) { @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, - boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries) + boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries, + SYN_MATCH_TYPE synMatchType) throws SyntaxError { setAutoGenerateMultiTermSynonymsPhraseQuery(fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()); // Don't auto-quote graph-aware field queries boolean treatAsQuoted = getSplitOnWhitespace() ? (quoted || fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()) : quoted; - return super.newFieldQuery(analyzer, field, queryText, treatAsQuoted, false, fieldEnableGraphQueries); + return super.newFieldQuery(analyzer, field, queryText, treatAsQuoted, false, fieldEnableGraphQueries, synMatchType); } // * Query ::= ( Clause )* diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index 4c62e853fdae..b65ab210d47c 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -16,6 +16,7 @@ */ package org.apache.solr.parser; +import javax.xml.soap.Text; import java.io.StringReader; import java.util.ArrayList; import java.util.Collections; @@ -438,13 +439,15 @@ protected void addMultiTermClause(List clauses, Query q) { } protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, - boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries) + boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries, + QueryBuilder.SYN_MATCH_TYPE synQueryType) throws SyntaxError { BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; setEnableGraphQueries(fieldEnableGraphQueries); Query query = createFieldQuery(analyzer, occur, field, queryText, quoted || fieldAutoGenPhraseQueries || autoGeneratePhraseQueries, phraseSlop); setEnableGraphQueries(true); // reset back to default + setSynQueryType(synQueryType); return query; } @@ -639,8 +642,13 @@ protected Query getBooleanQuery(List clauses) throws SyntaxError boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); + SYN_MATCH_TYPE synMatchType = SYN_MATCH_TYPE.BLENDED; + if (ft instanceof TextField) { + synMatchType = ((TextField)(ft)).getSynonymQueryType(); + } + subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(), - false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries); + false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); } else { for (String externalVal : rawq.getExternalVals()) { @@ -957,7 +965,11 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo if (ft.isTokenized() && sf.indexed()) { boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries); + SYN_MATCH_TYPE synMatchType = SYN_MATCH_TYPE.BLENDED; + if (ft instanceof TextField) { + synMatchType = ((TextField)(ft)).getSynonymQueryType(); + } + return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); } else { if (raw) { return new RawQuery(sf, queryText); @@ -968,7 +980,7 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo } // default to a normal field query - return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true); + return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true, SYN_MATCH_TYPE.BLENDED); } // Assumption: quoted is always false @@ -1002,8 +1014,12 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); + SYN_MATCH_TYPE synMatchType = SYN_MATCH_TYPE.BLENDED; + if (ft instanceof TextField) { + synMatchType = ((TextField)(ft)).getSynonymQueryType(); + } return newFieldQuery - (getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries); + (getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); } else { if (raw) { return new RawQuery(sf, queryTerms); @@ -1035,7 +1051,7 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw // default to a normal field query String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); - return newFieldQuery(getAnalyzer(), field, queryText, false, false, true); + return newFieldQuery(getAnalyzer(), field, queryText, false, false, true, SYN_MATCH_TYPE.BLENDED); } protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){ diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 5494cf11080b..ed920df187a4 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -51,6 +51,7 @@ import org.apache.lucene.search.SortedSetSortField; import org.apache.lucene.search.SortedNumericSelector; import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.similarities.Similarity; @@ -905,6 +906,7 @@ protected void checkSupportsDocValues() { protected static final String ENABLE_GRAPH_QUERIES = "enableGraphQueries"; private static final String ARGS = "args"; private static final String POSITION_INCREMENT_GAP = "positionIncrementGap"; + protected static final String SYNONYM_QUERY_TYPE = "synonymQueryType"; /** * Get a map of property name -> value for this field type. @@ -926,6 +928,7 @@ public SimpleOrderedMap getNamedPropertyValues(boolean showDefaults) { if (this instanceof TextField) { namedPropertyValues.add(AUTO_GENERATE_PHRASE_QUERIES, ((TextField) this).getAutoGeneratePhraseQueries()); namedPropertyValues.add(ENABLE_GRAPH_QUERIES, ((TextField) this).getEnableGraphQueries()); + namedPropertyValues.add(SYNONYM_QUERY_TYPE, ((TextField) this).getSynonymQueryType()); } namedPropertyValues.add(getPropertyName(INDEXED), hasProperty(INDEXED)); namedPropertyValues.add(getPropertyName(STORED), hasProperty(STORED)); diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index effadc40caeb..022bfa7a4a9b 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -41,6 +41,7 @@ public class TextField extends FieldType { protected boolean autoGeneratePhraseQueries; protected boolean enableGraphQueries; + protected QueryBuilder.SYN_MATCH_TYPE synonymQueryType; /** * Analyzer set by schema for text types to use when searching fields @@ -72,6 +73,12 @@ protected void init(IndexSchema schema, Map args) { String autoGeneratePhraseQueriesStr = args.remove(AUTO_GENERATE_PHRASE_QUERIES); if (autoGeneratePhraseQueriesStr != null) autoGeneratePhraseQueries = Boolean.parseBoolean(autoGeneratePhraseQueriesStr); + + synonymQueryType = QueryBuilder.SYN_MATCH_TYPE.BLENDED; + String synonymQueryTypeStr = args.remove(SYNONYM_QUERY_TYPE); + if (synonymQueryTypeStr != null) { + synonymQueryType = QueryBuilder.SYN_MATCH_TYPE.valueOf(synonymQueryTypeStr); + } enableGraphQueries = true; String enableGraphQueriesStr = args.remove(ENABLE_GRAPH_QUERIES); @@ -104,6 +111,8 @@ public boolean getEnableGraphQueries() { return enableGraphQueries; } + public QueryBuilder.SYN_MATCH_TYPE getSynonymQueryType() {return synonymQueryType;} + @Override public SortField getSortField(SchemaField field, boolean reverse) { /* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in its chain? */ diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index 173f0393420a..b77f806277ae 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -1080,7 +1080,8 @@ protected Query getPrefixQuery(String field, String val) throws SyntaxError { @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, - boolean quoted, boolean fieldAutoGenPhraseQueries, boolean enableGraphQueries) + boolean quoted, boolean fieldAutoGenPhraseQueries, boolean enableGraphQueries, + SYN_MATCH_TYPE synMatchType) throws SyntaxError { Analyzer actualAnalyzer; if (removeStopFilter) { @@ -1094,7 +1095,7 @@ protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, } else { actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer(); } - return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, enableGraphQueries); + return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, enableGraphQueries, synMatchType); } @Override diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index 0dc327db376b..9add2b251cb3 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -2001,10 +2001,11 @@ protected Query getFieldQuery(String field, **/ @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, - boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries) + boolean quoted, boolean fieldAutoGenPhraseQueries, + boolean fieldEnableGraphQueries, SYN_MATCH_TYPE synMatchType) throws SyntaxError { Query q = super.newFieldQuery - (analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries); + (analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); if (q instanceof BooleanQuery) { boolean rewrittenSubQ = false; // dirty flag: rebuild the repacked query? BooleanQuery.Builder builder = newBooleanQuery(); From 1e9e41c4cccff10effd4a29da30c378ee21dac3d Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Tue, 21 Nov 2017 15:17:56 -0500 Subject: [PATCH 03/25] Fix enum style --- .../org/apache/lucene/search/BlendedTermQuery.java | 2 +- .../java/org/apache/lucene/util/QueryBuilder.java | 12 ++++++------ .../org/apache/lucene/util/TestQueryBuilder.java | 4 ++-- .../java/org/apache/solr/parser/QueryParser.java | 2 +- .../org/apache/solr/parser/SolrQueryParserBase.java | 13 ++++++------- .../src/java/org/apache/solr/schema/TextField.java | 8 ++++---- .../apache/solr/search/ExtendedDismaxQParser.java | 2 +- .../solr/search/TestExtendedDismaxParser.java | 2 +- 8 files changed, 22 insertions(+), 23 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java index 219d4535827c..f846c8d1f5c1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java @@ -246,7 +246,7 @@ public int hashCode() { @Override public String toString(String field) { - StringBuilder builder = new StringBuilder("Blended("); + StringBuilder builder = new StringBuilder("BLENDED("); for (int i = 0; i < terms.length; ++i) { if (i != 0) { builder.append(" "); diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index 05c59f7c2b31..3b3ba2f8c05c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -63,9 +63,9 @@ public class QueryBuilder { protected boolean enablePositionIncrements = true; protected boolean enableGraphQueries = true; protected boolean autoGenerateMultiTermSynonymsPhraseQuery = false; - protected SYN_MATCH_TYPE synQueryType = SYN_MATCH_TYPE.BLENDED; + protected SynQueryType synQueryType = SynQueryType.BLENDED; - public static enum SYN_MATCH_TYPE { + public static enum SynQueryType { BLENDED, /* default, blends doc freq for terms in same posn: SynonymQuery(A B)*/ BEST, /* picks dismax over synonyms, choosing best scoring per doc: (A | B) */ MOST /* picks combination (A OR B). The more synonyms the better*/ @@ -268,9 +268,9 @@ public boolean getEnableGraphQueries() { return enableGraphQueries; } - public void setSynQueryType(SYN_MATCH_TYPE v) { synQueryType = v;} + public void setSynQueryType(SynQueryType v) { synQueryType = v;} - public SYN_MATCH_TYPE getSynQueryType() { return synQueryType;} + public SynQueryType getSynQueryType() { return synQueryType;} /** * Creates a query from a token stream. @@ -635,14 +635,14 @@ protected BooleanQuery.Builder newBooleanQuery() { * @return new Query instance */ protected Query newSynonymQuery(Term terms[]) { - if (synQueryType == SYN_MATCH_TYPE.BEST) { + if (synQueryType == SynQueryType.BEST) { List currPosnClauses = new ArrayList(); for (Term term : terms) { currPosnClauses.add(newTermQuery(term)); } DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f); return dm; - } else if (synQueryType == SYN_MATCH_TYPE.MOST) { + } else if (synQueryType == SynQueryType.MOST) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (Term term : terms) { builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java index 67ce75f63a9a..95c079ad0ef4 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java @@ -157,7 +157,7 @@ public void testBestSynonyms() throws Exception { DisjunctionMaxQuery expected = new DisjunctionMaxQuery(queries, 0.0f); QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); - builder.setSynQueryType(QueryBuilder.SYN_MATCH_TYPE.BEST); + builder.setSynQueryType(QueryBuilder.SynQueryType.BEST); assertEquals(expected, builder.createBooleanQuery("field", "dogs")); assertEquals(expected, builder.createPhraseQuery("field", "dogs")); @@ -174,7 +174,7 @@ public void testMostSynonyms() throws Exception { .build(); QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); - builder.setSynQueryType(QueryBuilder.SYN_MATCH_TYPE.MOST); + builder.setSynQueryType(QueryBuilder.SynQueryType.MOST); assertEquals(expected, builder.createBooleanQuery("field", "dogs")); assertEquals(expected, builder.createPhraseQuery("field", "dogs")); diff --git a/solr/core/src/java/org/apache/solr/parser/QueryParser.java b/solr/core/src/java/org/apache/solr/parser/QueryParser.java index 34e444316efc..54f6cecb9262 100644 --- a/solr/core/src/java/org/apache/solr/parser/QueryParser.java +++ b/solr/core/src/java/org/apache/solr/parser/QueryParser.java @@ -53,7 +53,7 @@ private static boolean allowedPostMultiTerm(int tokenKind) { @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries, - SYN_MATCH_TYPE synMatchType) + SynQueryType synMatchType) throws SyntaxError { setAutoGenerateMultiTermSynonymsPhraseQuery(fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()); // Don't auto-quote graph-aware field queries diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index b65ab210d47c..7c40c0d10ee6 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -16,7 +16,6 @@ */ package org.apache.solr.parser; -import javax.xml.soap.Text; import java.io.StringReader; import java.util.ArrayList; import java.util.Collections; @@ -440,7 +439,7 @@ protected void addMultiTermClause(List clauses, Query q) { protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries, - QueryBuilder.SYN_MATCH_TYPE synQueryType) + SynQueryType synQueryType) throws SyntaxError { BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; setEnableGraphQueries(fieldEnableGraphQueries); @@ -642,7 +641,7 @@ protected Query getBooleanQuery(List clauses) throws SyntaxError boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - SYN_MATCH_TYPE synMatchType = SYN_MATCH_TYPE.BLENDED; + SynQueryType synMatchType = SynQueryType.BLENDED; if (ft instanceof TextField) { synMatchType = ((TextField)(ft)).getSynonymQueryType(); } @@ -965,7 +964,7 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo if (ft.isTokenized() && sf.indexed()) { boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - SYN_MATCH_TYPE synMatchType = SYN_MATCH_TYPE.BLENDED; + SynQueryType synMatchType = SynQueryType.BLENDED; if (ft instanceof TextField) { synMatchType = ((TextField)(ft)).getSynonymQueryType(); } @@ -980,7 +979,7 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo } // default to a normal field query - return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true, SYN_MATCH_TYPE.BLENDED); + return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true, SynQueryType.BLENDED); } // Assumption: quoted is always false @@ -1014,7 +1013,7 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - SYN_MATCH_TYPE synMatchType = SYN_MATCH_TYPE.BLENDED; + SynQueryType synMatchType = SynQueryType.BLENDED; if (ft instanceof TextField) { synMatchType = ((TextField)(ft)).getSynonymQueryType(); } @@ -1051,7 +1050,7 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw // default to a normal field query String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); - return newFieldQuery(getAnalyzer(), field, queryText, false, false, true, SYN_MATCH_TYPE.BLENDED); + return newFieldQuery(getAnalyzer(), field, queryText, false, false, true, SynQueryType.BLENDED); } protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){ diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 022bfa7a4a9b..860e700db3d6 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -41,7 +41,7 @@ public class TextField extends FieldType { protected boolean autoGeneratePhraseQueries; protected boolean enableGraphQueries; - protected QueryBuilder.SYN_MATCH_TYPE synonymQueryType; + protected QueryBuilder.SynQueryType synonymQueryType; /** * Analyzer set by schema for text types to use when searching fields @@ -74,10 +74,10 @@ protected void init(IndexSchema schema, Map args) { if (autoGeneratePhraseQueriesStr != null) autoGeneratePhraseQueries = Boolean.parseBoolean(autoGeneratePhraseQueriesStr); - synonymQueryType = QueryBuilder.SYN_MATCH_TYPE.BLENDED; + synonymQueryType = QueryBuilder.SynQueryType.BLENDED; String synonymQueryTypeStr = args.remove(SYNONYM_QUERY_TYPE); if (synonymQueryTypeStr != null) { - synonymQueryType = QueryBuilder.SYN_MATCH_TYPE.valueOf(synonymQueryTypeStr); + synonymQueryType = QueryBuilder.SynQueryType.valueOf(synonymQueryTypeStr); } enableGraphQueries = true; @@ -111,7 +111,7 @@ public boolean getEnableGraphQueries() { return enableGraphQueries; } - public QueryBuilder.SYN_MATCH_TYPE getSynonymQueryType() {return synonymQueryType;} + public QueryBuilder.SynQueryType getSynonymQueryType() {return synonymQueryType;} @Override public SortField getSortField(SchemaField field, boolean reverse) { diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index b77f806277ae..931f186c19da 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -1081,7 +1081,7 @@ protected Query getPrefixQuery(String field, String val) throws SyntaxError { @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, boolean enableGraphQueries, - SYN_MATCH_TYPE synMatchType) + SynQueryType synMatchType) throws SyntaxError { Analyzer actualAnalyzer; if (removeStopFilter) { diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index 9add2b251cb3..e66e3d1c5815 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -2002,7 +2002,7 @@ protected Query getFieldQuery(String field, @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, - boolean fieldEnableGraphQueries, SYN_MATCH_TYPE synMatchType) + boolean fieldEnableGraphQueries, SynQueryType synMatchType) throws SyntaxError { Query q = super.newFieldQuery (analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); From 8eb875fcccf533d3799b15d266c724c868e13d34 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Tue, 21 Nov 2017 16:47:04 -0500 Subject: [PATCH 04/25] Renaming to scoreOverlaps --- .../org/apache/lucene/util/QueryBuilder.java | 18 ++++++++-------- .../apache/lucene/util/TestQueryBuilder.java | 6 +++--- .../org/apache/solr/parser/QueryParser.java | 2 +- .../solr/parser/SolrQueryParserBase.java | 21 ++++++++++--------- .../org/apache/solr/schema/FieldType.java | 5 ++--- .../org/apache/solr/schema/TextField.java | 12 +++++------ .../solr/search/ExtendedDismaxQParser.java | 2 +- .../solr/search/TestExtendedDismaxParser.java | 2 +- 8 files changed, 34 insertions(+), 34 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index 3b3ba2f8c05c..283f5ec17e59 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -63,12 +63,12 @@ public class QueryBuilder { protected boolean enablePositionIncrements = true; protected boolean enableGraphQueries = true; protected boolean autoGenerateMultiTermSynonymsPhraseQuery = false; - protected SynQueryType synQueryType = SynQueryType.BLENDED; + protected ScoreOverlaps scoreOverlaps = ScoreOverlaps.AS_SAME_TERM; - public static enum SynQueryType { - BLENDED, /* default, blends doc freq for terms in same posn: SynonymQuery(A B)*/ - BEST, /* picks dismax over synonyms, choosing best scoring per doc: (A | B) */ - MOST /* picks combination (A OR B). The more synonyms the better*/ + public static enum ScoreOverlaps { + AS_SAME_TERM, /* default, blends doc freq for terms in same posn: SynonymQuery(A B)*/ + PICK_BEST, /* picks dismax over synonyms, choosing best scoring per doc: (A | B) */ + AS_DISTINCT_TERMS /* picks combination (A OR B).*/ } protected boolean blendSynonyms = true; @@ -268,9 +268,9 @@ public boolean getEnableGraphQueries() { return enableGraphQueries; } - public void setSynQueryType(SynQueryType v) { synQueryType = v;} + public void setScoreOverlaps(ScoreOverlaps v) { scoreOverlaps = v;} - public SynQueryType getSynQueryType() { return synQueryType;} + public ScoreOverlaps getScoreOverlaps() { return scoreOverlaps;} /** * Creates a query from a token stream. @@ -635,14 +635,14 @@ protected BooleanQuery.Builder newBooleanQuery() { * @return new Query instance */ protected Query newSynonymQuery(Term terms[]) { - if (synQueryType == SynQueryType.BEST) { + if (scoreOverlaps == ScoreOverlaps.PICK_BEST) { List currPosnClauses = new ArrayList(); for (Term term : terms) { currPosnClauses.add(newTermQuery(term)); } DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f); return dm; - } else if (synQueryType == SynQueryType.MOST) { + } else if (scoreOverlaps == ScoreOverlaps.AS_DISTINCT_TERMS) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (Term term : terms) { builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java index 95c079ad0ef4..a00642bdcdfe 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java @@ -157,7 +157,7 @@ public void testBestSynonyms() throws Exception { DisjunctionMaxQuery expected = new DisjunctionMaxQuery(queries, 0.0f); QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); - builder.setSynQueryType(QueryBuilder.SynQueryType.BEST); + builder.setScoreOverlaps(QueryBuilder.ScoreOverlaps.PICK_BEST); assertEquals(expected, builder.createBooleanQuery("field", "dogs")); assertEquals(expected, builder.createPhraseQuery("field", "dogs")); @@ -167,14 +167,14 @@ public void testBestSynonyms() throws Exception { /** synonym boolean expansion instead of blended */ - public void testMostSynonyms() throws Exception { + public void testSynonymsDistinct() throws Exception { Query expected = new BooleanQuery.Builder() .add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD) .build(); QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); - builder.setSynQueryType(QueryBuilder.SynQueryType.MOST); + builder.setScoreOverlaps(QueryBuilder.ScoreOverlaps.AS_DISTINCT_TERMS); assertEquals(expected, builder.createBooleanQuery("field", "dogs")); assertEquals(expected, builder.createPhraseQuery("field", "dogs")); diff --git a/solr/core/src/java/org/apache/solr/parser/QueryParser.java b/solr/core/src/java/org/apache/solr/parser/QueryParser.java index 54f6cecb9262..6a14b2ea9e6d 100644 --- a/solr/core/src/java/org/apache/solr/parser/QueryParser.java +++ b/solr/core/src/java/org/apache/solr/parser/QueryParser.java @@ -53,7 +53,7 @@ private static boolean allowedPostMultiTerm(int tokenKind) { @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries, - SynQueryType synMatchType) + ScoreOverlaps synMatchType) throws SyntaxError { setAutoGenerateMultiTermSynonymsPhraseQuery(fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()); // Don't auto-quote graph-aware field queries diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index 7c40c0d10ee6..04512ed2ee57 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -439,14 +439,15 @@ protected void addMultiTermClause(List clauses, Query q) { protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries, - SynQueryType synQueryType) + ScoreOverlaps scoreOverlaps) throws SyntaxError { BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; setEnableGraphQueries(fieldEnableGraphQueries); + setScoreOverlaps(scoreOverlaps); Query query = createFieldQuery(analyzer, occur, field, queryText, quoted || fieldAutoGenPhraseQueries || autoGeneratePhraseQueries, phraseSlop); setEnableGraphQueries(true); // reset back to default - setSynQueryType(synQueryType); + setScoreOverlaps(ScoreOverlaps.AS_SAME_TERM); return query; } @@ -641,9 +642,9 @@ protected Query getBooleanQuery(List clauses) throws SyntaxError boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - SynQueryType synMatchType = SynQueryType.BLENDED; + ScoreOverlaps synMatchType = ScoreOverlaps.AS_SAME_TERM; if (ft instanceof TextField) { - synMatchType = ((TextField)(ft)).getSynonymQueryType(); + synMatchType = ((TextField)(ft)).getScoreOverlapsMethod(); } subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(), @@ -964,9 +965,9 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo if (ft.isTokenized() && sf.indexed()) { boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - SynQueryType synMatchType = SynQueryType.BLENDED; + ScoreOverlaps synMatchType = ScoreOverlaps.AS_SAME_TERM; if (ft instanceof TextField) { - synMatchType = ((TextField)(ft)).getSynonymQueryType(); + synMatchType = ((TextField)(ft)).getScoreOverlapsMethod(); } return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); } else { @@ -979,7 +980,7 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo } // default to a normal field query - return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true, SynQueryType.BLENDED); + return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true, ScoreOverlaps.AS_SAME_TERM); } // Assumption: quoted is always false @@ -1013,9 +1014,9 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - SynQueryType synMatchType = SynQueryType.BLENDED; + ScoreOverlaps synMatchType = ScoreOverlaps.AS_SAME_TERM; if (ft instanceof TextField) { - synMatchType = ((TextField)(ft)).getSynonymQueryType(); + synMatchType = ((TextField)(ft)).getScoreOverlapsMethod(); } return newFieldQuery (getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); @@ -1050,7 +1051,7 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw // default to a normal field query String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); - return newFieldQuery(getAnalyzer(), field, queryText, false, false, true, SynQueryType.BLENDED); + return newFieldQuery(getAnalyzer(), field, queryText, false, false, true, ScoreOverlaps.AS_SAME_TERM); } protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){ diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index ed920df187a4..d3b6ebfe2758 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -51,7 +51,6 @@ import org.apache.lucene.search.SortedSetSortField; import org.apache.lucene.search.SortedNumericSelector; import org.apache.lucene.search.SortedSetSelector; -import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.similarities.Similarity; @@ -906,7 +905,7 @@ protected void checkSupportsDocValues() { protected static final String ENABLE_GRAPH_QUERIES = "enableGraphQueries"; private static final String ARGS = "args"; private static final String POSITION_INCREMENT_GAP = "positionIncrementGap"; - protected static final String SYNONYM_QUERY_TYPE = "synonymQueryType"; + protected static final String SCORE_OVERLAPS = "scoreOverlaps"; /** * Get a map of property name -> value for this field type. @@ -928,7 +927,7 @@ public SimpleOrderedMap getNamedPropertyValues(boolean showDefaults) { if (this instanceof TextField) { namedPropertyValues.add(AUTO_GENERATE_PHRASE_QUERIES, ((TextField) this).getAutoGeneratePhraseQueries()); namedPropertyValues.add(ENABLE_GRAPH_QUERIES, ((TextField) this).getEnableGraphQueries()); - namedPropertyValues.add(SYNONYM_QUERY_TYPE, ((TextField) this).getSynonymQueryType()); + namedPropertyValues.add(SCORE_OVERLAPS, ((TextField) this).getScoreOverlapsMethod()); } namedPropertyValues.add(getPropertyName(INDEXED), hasProperty(INDEXED)); namedPropertyValues.add(getPropertyName(STORED), hasProperty(STORED)); diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 860e700db3d6..3a44e99c692e 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -41,7 +41,7 @@ public class TextField extends FieldType { protected boolean autoGeneratePhraseQueries; protected boolean enableGraphQueries; - protected QueryBuilder.SynQueryType synonymQueryType; + protected QueryBuilder.ScoreOverlaps scoreOverlapsMethod; /** * Analyzer set by schema for text types to use when searching fields @@ -74,10 +74,10 @@ protected void init(IndexSchema schema, Map args) { if (autoGeneratePhraseQueriesStr != null) autoGeneratePhraseQueries = Boolean.parseBoolean(autoGeneratePhraseQueriesStr); - synonymQueryType = QueryBuilder.SynQueryType.BLENDED; - String synonymQueryTypeStr = args.remove(SYNONYM_QUERY_TYPE); - if (synonymQueryTypeStr != null) { - synonymQueryType = QueryBuilder.SynQueryType.valueOf(synonymQueryTypeStr); + scoreOverlapsMethod = QueryBuilder.ScoreOverlaps.AS_SAME_TERM; + String scoreOverlapsMethod = args.remove(SCORE_OVERLAPS); + if (scoreOverlapsMethod != null) { + this.scoreOverlapsMethod = QueryBuilder.ScoreOverlaps.valueOf(scoreOverlapsMethod.toUpperCase()); } enableGraphQueries = true; @@ -111,7 +111,7 @@ public boolean getEnableGraphQueries() { return enableGraphQueries; } - public QueryBuilder.SynQueryType getSynonymQueryType() {return synonymQueryType;} + public QueryBuilder.ScoreOverlaps getScoreOverlapsMethod() {return scoreOverlapsMethod;} @Override public SortField getSortField(SchemaField field, boolean reverse) { diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index 931f186c19da..00e4048e3f2a 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -1081,7 +1081,7 @@ protected Query getPrefixQuery(String field, String val) throws SyntaxError { @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, boolean enableGraphQueries, - SynQueryType synMatchType) + ScoreOverlaps synMatchType) throws SyntaxError { Analyzer actualAnalyzer; if (removeStopFilter) { diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index e66e3d1c5815..e222bcf71eb2 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -2002,7 +2002,7 @@ protected Query getFieldQuery(String field, @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, - boolean fieldEnableGraphQueries, SynQueryType synMatchType) + boolean fieldEnableGraphQueries, ScoreOverlaps synMatchType) throws SyntaxError { Query q = super.newFieldQuery (analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); From 28be940e6b09e7f06f0ed39269cb94d4758cf5f9 Mon Sep 17 00:00:00 2001 From: Erick Date: Tue, 21 Nov 2017 09:55:40 -0800 Subject: [PATCH 05/25] SOLR-11426: TestLazyCores fails too often, debugging --- .../java/org/apache/solr/core/SolrCores.java | 30 ++++++++----------- .../solr/update/DirectUpdateHandler2.java | 2 +- .../solrconfig.snippet.randomindexconfig.xml | 3 ++ .../org/apache/solr/core/TestLazyCores.java | 8 +++++ 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/core/SolrCores.java b/solr/core/src/java/org/apache/solr/core/SolrCores.java index 52b106380d90..546686e445a8 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCores.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCores.java @@ -19,17 +19,12 @@ import com.google.common.collect.Lists; import org.apache.http.annotation.Experimental; import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.logging.MDCLoggingContext; -import org.apache.solr.request.LocalSolrQueryRequest; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.update.CommitUpdateCommand; import org.apache.solr.util.DefaultSolrThreadFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collection; @@ -539,21 +534,22 @@ public boolean isCoreLoading(String name) { return false; } - // Let transient cache implementation tell us when it ages out a corel + // Let transient cache implementation tell us when it ages out a core @Override public void update(Observable o, Object arg) { synchronized (modifyLock) { - SolrCore core = (SolrCore) arg; - SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams()); - CommitUpdateCommand cmd = new CommitUpdateCommand(req, false); - cmd.openSearcher = false; - cmd.waitSearcher = false; - try { - core.getUpdateHandler().commit(cmd); - } catch (IOException e) { - log.warn("Caught exception trying to close a transient core, ignoring as it should be benign"); - } - pendingCloses.add(core); // Essentially just queue this core up for closing. + // Erick Erickson debugging TestLazyCores. With this un-commented, we get no testLazyCores failures. +// SolrCore core = (SolrCore) arg; +// SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams()); +// CommitUpdateCommand cmd = new CommitUpdateCommand(req, false); +// cmd.openSearcher = false; +// cmd.waitSearcher = false; +// try { +// core.getUpdateHandler().commit(cmd); +// } catch (IOException e) { +// log.warn("Caught exception trying to close a transient core, ignoring as it should be benign"); +// } + pendingCloses.add((SolrCore) arg); // Essentially just queue this core up for closing. modifyLock.notifyAll(); // Wakes up closer thread too } } diff --git a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java index d51c97022044..21a689640d12 100644 --- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java +++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java @@ -813,7 +813,7 @@ public UpdateLog getUpdateLog() { @Override public void close() throws IOException { log.debug("closing " + this); - + commitTracker.close(); softCommitTracker.close(); diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml index ecf1f1468fc6..8f90d26273dc 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml @@ -43,4 +43,7 @@ A solrconfig.xml snippet containing indexConfig settings for randomized testing. to vary the lockType canset it as needed. --> ${solr.tests.lockType:single} + + ${solr.tests.infostream:false} + diff --git a/solr/core/src/test/org/apache/solr/core/TestLazyCores.java b/solr/core/src/test/org/apache/solr/core/TestLazyCores.java index 6a5697a93f4e..4c787d62d695 100644 --- a/solr/core/src/test/org/apache/solr/core/TestLazyCores.java +++ b/solr/core/src/test/org/apache/solr/core/TestLazyCores.java @@ -786,6 +786,9 @@ public void run() { // Cores 2, 3, 6, 7, 8, 9 are transient @Test public void testNoCommit() throws Exception { + String infoStream = System.getProperty("solr.tests.infostream"); + System.setProperty("solr.tests.infostream","true"); + CoreContainer cc = init(); String[] coreList = new String[]{ "collection2", @@ -832,6 +835,11 @@ public void testNoCommit() throws Exception { } finally { cc.shutdown(); } + if (infoStream != null) { + System.setProperty("solr.tests.infostream", infoStream); + } else { + System.clearProperty("solr.tests.infostream"); + } } private void add10(SolrCore core) throws IOException { From 76216d780e501e26467b9ffaadea76d3861f77ea Mon Sep 17 00:00:00 2001 From: Cassandra Targett Date: Tue, 21 Nov 2017 13:41:27 -0600 Subject: [PATCH 06/25] SOLR-11573: Add solr-root-paths param to allow pulling in code blocks from outside solr-ref-guide tree --- solr/solr-ref-guide/build.xml | 5 +---- solr/solr-ref-guide/src/_config.yml.template | 2 ++ solr/solr-ref-guide/src/using-solrj.adoc | 22 +++++++++++--------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/solr/solr-ref-guide/build.xml b/solr/solr-ref-guide/build.xml index 6f989b911949..d7b1c7c6b60a 100644 --- a/solr/solr-ref-guide/build.xml +++ b/solr/solr-ref-guide/build.xml @@ -133,10 +133,6 @@ - Copying all examples to build directory... - - - Copying all non template files from src ... @@ -210,6 +206,7 @@ + diff --git a/solr/solr-ref-guide/src/_config.yml.template b/solr/solr-ref-guide/src/_config.yml.template index 9d40209324b2..04942a455605 100755 --- a/solr/solr-ref-guide/src/_config.yml.template +++ b/solr/solr-ref-guide/src/_config.yml.template @@ -69,6 +69,7 @@ asciidoc: {} # NOTE: If you add any attributes here for use in adoc files, you almost certainly need to also add # them to the ant task for building the PDF as well. solr-attributes: &solr-attributes-ref + solr-root-path: "../../../" solr-guide-draft-status: "${solr-guide-draft-status}" solr-guide-version: "${solr-guide-version}" solr-guide-version-path: "${solr-guide-version-path}" @@ -79,6 +80,7 @@ solr-attributes: &solr-attributes-ref build-year: "${current.year}" asciidoctor: + safe: 0 attributes: <<: *solr-attributes-ref icons: "font" diff --git a/solr/solr-ref-guide/src/using-solrj.adoc b/solr/solr-ref-guide/src/using-solrj.adoc index 4151032d4c07..822b0d0fd314 100644 --- a/solr/solr-ref-guide/src/using-solrj.adoc +++ b/solr/solr-ref-guide/src/using-solrj.adoc @@ -1,4 +1,6 @@ = Using SolrJ +:solr-root-path: ../../ +:example-source-dir: {solr-root-path}solrj/src/test/org/apache/solr/client/ref_guide_examples/ // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -95,8 +97,8 @@ Most SolrJ configuration happens at the `SolrClient` level. The most common/imp ==== Base URLs Most `SolrClient` implementations (with the notable exception of `CloudSolrClient`) require users to specify one or more Solr base URLs, which the client then uses to send HTTP requests to Solr. The path users include on the base URL they provide has an effect on the behavior of the created client from that point on. -. A URL with a path pointing to a specific core or collection (e.g., `http://hostname:8983/solr/core1`). When a core or collection is specified in the base URL, subsequent requests made with that client are not required to re-specify the affected collection. However, the client is limited to sending requests to that core/collection, and can not send requests to any others. -. A URL pointing to the root Solr path (e.g., `http://hostname:8983/solr`). When no core or collection is specified in the base URL, requests can be made to any core/collection, but the affected core/collection must be specified on all requests. +. A URL with a path pointing to a specific core or collection (e.g., `\http://hostname:8983/solr/core1`). When a core or collection is specified in the base URL, subsequent requests made with that client are not required to re-specify the affected collection. However, the client is limited to sending requests to that core/collection, and can not send requests to any others. +. A URL pointing to the root Solr path (e.g., `\http://hostname:8983/solr`). When no core or collection is specified in the base URL, requests can be made to any core/collection, but the affected core/collection must be specified on all requests. Generally speaking, if your `SolrClient` will only be used on a single core/collection, including that entity in the path is the most convenient. Where more flexibility is required, the collection/core should be excluded. @@ -105,7 +107,7 @@ All `SolrClient` implementations allow users to specify the connection and read [source,java,indent=0] ---- -include::UsingSolrJRefGuideExamplesTest.java[tag=solrj-solrclient-timeouts] +include::{example-source-dir}UsingSolrJRefGuideExamplesTest.java[tag=solrj-solrclient-timeouts] ---- When these values are not explicitly provided, SolrJ falls back to using the defaults for the OS/environment is running on. @@ -117,14 +119,14 @@ The following snippet uses a SolrClient to query Solr's "techproducts" example c [source,java,indent=0] ---- -include::UsingSolrJRefGuideExamplesTest.java[tag=solrj-query-with-raw-solrparams] +include::{example-source-dir}UsingSolrJRefGuideExamplesTest.java[tag=solrj-query-with-raw-solrparams] ---- `SolrParams` has a `SolrQuery` subclass, which provides some convenience methods that greatly simplifies query creation. The following snippet shows how the query from the previous example can be built using some of the convenience methods in `SolrQuery`: [source,java,indent=0] ---- -include::UsingSolrJRefGuideExamplesTest.java[tag=solrj-query-with-solrquery] +include::{example-source-dir}UsingSolrJRefGuideExamplesTest.java[tag=solrj-query-with-solrquery] ---- == Indexing in SolrJ @@ -135,7 +137,7 @@ The following example shows how to use SolrJ to add a document to Solr's "techpr [source,java,indent=0] ---- -include::UsingSolrJRefGuideExamplesTest.java[tag=solrj-index-with-raw-solrinputdoc] +include::{example-source-dir}UsingSolrJRefGuideExamplesTest.java[tag=solrj-index-with-raw-solrinputdoc] ---- CAUTION: The indexing examples above are intended to show syntax. For brevity, they break several Solr indexing best-practices. Under normal circumstances, documents should be indexed in larger batches, instead of one at a time. It is also suggested that Solr administrators commit documents using Solr's autocommit settings, and not using explicit `commit()` invocations. @@ -149,21 +151,21 @@ The example snippet below shows an annotated `TechProduct` class that can be use [source,java,indent=0] ---- -include::UsingSolrJRefGuideExamplesTest.java[tag=solrj-techproduct-value-type] +include::{example-source-dir}UsingSolrJRefGuideExamplesTest.java[tag=solrj-techproduct-value-type] ---- Application code with access to the annotated `TechProduct` class above can index `TechProduct` objects directly without any conversion, as in the example snippet below: [source,java,indent=0] ---- -include::UsingSolrJRefGuideExamplesTest.java[tag=solrj-index-bean-value-type] +include::{example-source-dir}UsingSolrJRefGuideExamplesTest.java[tag=solrj-index-bean-value-type] ---- Similarly, search results can be converted directly into bean objects using the `getBeans()` method on `QueryResponse`: [source,java,indent=0] ---- -include::UsingSolrJRefGuideExamplesTest.java[tag=solrj-query-bean-value-type] +include::{example-source-dir}UsingSolrJRefGuideExamplesTest.java[tag=solrj-query-bean-value-type] ---- == Other APIs @@ -173,5 +175,5 @@ The example below shows how SolrJ users can call the CLUSTERSTATUS API of SolrCl [source,java,indent=0] ---- -include::UsingSolrJRefGuideExamplesTest.java[tag=solrj-other-apis] +include::{example-source-dir}UsingSolrJRefGuideExamplesTest.java[tag=solrj-other-apis] ---- From 9b005379f99f3f809b5beaf976e97667db4465b5 Mon Sep 17 00:00:00 2001 From: Karl Wright Date: Tue, 21 Nov 2017 18:05:20 -0500 Subject: [PATCH 07/25] LUCENE-8054: Use backbounds to stop spuriosly rejecting points that are within the exact circle. --- .../spatial3d/geom/GeoBaseAreaShape.java | 2 +- .../lucene/spatial3d/geom/GeoExactCircle.java | 135 +++++++++++++++--- .../lucene/spatial3d/geom/GeoCircleTest.java | 12 ++ .../geom/RandomGeoShapeRelationshipTest.java | 25 ++++ 4 files changed, 157 insertions(+), 17 deletions(-) diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoBaseAreaShape.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoBaseAreaShape.java index a2b2fa1b0c12..0a1d5799e78e 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoBaseAreaShape.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoBaseAreaShape.java @@ -110,7 +110,7 @@ public int getRelationship(GeoShape geoShape) { if (insideGeoAreaShape == ALL_INSIDE && insideShape==ALL_INSIDE) { return GeoArea.OVERLAPS; } - + if (intersects(geoShape)){ return GeoArea.OVERLAPS; } diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java index a7b1b6058593..cedcab55aec5 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java @@ -40,7 +40,9 @@ class GeoExactCircle extends GeoBaseCircle { /** Planes describing the circle */ protected final List circlePlanes; /** Bounds for the planes */ - protected final Map eitherBounds; + protected final Map eitherBounds; + /** Back bounds for the planes */ + protected final Map backBounds; /** A point that is on the world and on the circle plane */ protected final GeoPoint[] edgePoints; /** The set of notable points for each edge */ @@ -78,11 +80,6 @@ public GeoExactCircle(final PlanetModel planetModel, final double lat, final dou actualAccuracy = accuracy; } - // Since the provide cutoff angle is really a surface distance, we need to use the point-on-bearing even for spheres. - final List circlePlanes = new ArrayList<>(); - // If it turns out that there's only one circle plane, this array will be populated but unused - final List notableEdgePoints = new ArrayList<>(); - // We construct approximation planes until we have a low enough error estimate final List slices = new ArrayList<>(100); // Construct four cardinal points, and then we'll build the first two planes @@ -103,7 +100,10 @@ public GeoExactCircle(final PlanetModel planetModel, final double lat, final dou slices.add(new ApproximationSlice(center, southPoint, Math.PI, northPoint, Math.PI * 2.0, westPoint, Math.PI * 1.5)); edgePoint = northPoint; } - + //System.out.println("Edgepoint = " + edgePoint); + + final List activeSlices = new ArrayList<>(); + // Now, iterate over slices until we have converted all of them into safe SidedPlanes. while (slices.size() > 0) { // Peel off a slice from the back @@ -115,12 +115,17 @@ public GeoExactCircle(final PlanetModel planetModel, final double lat, final dou final GeoPoint interpPoint1 = planetModel.surfacePointOnBearing(center, cutoffAngle, interpPoint1Bearing); final double interpPoint2Bearing = (thisSlice.point2Bearing + thisSlice.middlePointBearing) * 0.5; final GeoPoint interpPoint2 = planetModel.surfacePointOnBearing(center, cutoffAngle, interpPoint2Bearing); + // Is this point on the plane? (that is, is the approximation good enough?) if (Math.abs(thisSlice.plane.evaluate(interpPoint1)) < actualAccuracy && Math.abs(thisSlice.plane.evaluate(interpPoint2)) < actualAccuracy) { - // Good enough; add it to the list of planes, unless it was identical to the previous plane - if (circlePlanes.size() == 0 || !circlePlanes.get(circlePlanes.size()-1).isNumericallyIdentical(thisSlice.plane)) { - circlePlanes.add(thisSlice.plane); - notableEdgePoints.add(new GeoPoint[]{thisSlice.endPoint1, thisSlice.endPoint2}); + if (activeSlices.size() == 0 || !activeSlices.get(activeSlices.size()-1).plane.isNumericallyIdentical(thisSlice.plane)) { + activeSlices.add(new PlaneDescription(thisSlice.plane, thisSlice.endPoint1, thisSlice.endPoint2, thisSlice.middlePoint)); + //System.out.println("Point1 bearing = "+thisSlice.point1Bearing); + } else if (activeSlices.size() > 0) { + // Numerically identical plane; create a new slice to replace the one there. + final PlaneDescription oldSlice = activeSlices.remove(activeSlices.size()-1); + activeSlices.add(new PlaneDescription(thisSlice.plane, oldSlice.endPoint1, thisSlice.endPoint2, thisSlice.endPoint1)); + //System.out.println(" new endpoint2 bearing: "+thisSlice.point2Bearing); } } else { // Split the plane into two, and add it back to the end @@ -135,24 +140,93 @@ public GeoExactCircle(final PlanetModel planetModel, final double lat, final dou } } + // Since the provide cutoff angle is really a surface distance, we need to use the point-on-bearing even for spheres. + final List circlePlanes = new ArrayList<>(activeSlices.size()); + // If it turns out that there's only one circle plane, this array will be populated but unused + final List notableEdgePoints = new ArrayList<>(activeSlices.size()); + // Back planes + final List backPlanes = new ArrayList<>(activeSlices.size()); + + // Compute bounding planes and actual circle planes + for (int i = 0; i < activeSlices.size(); i++) { + final PlaneDescription pd = activeSlices.get(i); + // Calculate the backplane + final Membership thisPlane = pd.plane; + // Go back through all the earlier points until we find one that's not within + GeoPoint backArticulationPoint = null; + for (int j = 1; j < activeSlices.size(); j++) { + int k = i - j; + if (k < 0) { + k += activeSlices.size(); + } + final GeoPoint thisPoint = activeSlices.get(k).endPoint1; + if (!thisPlane.isWithin(thisPoint)) { + // Back up a notch + k++; + if (k >= activeSlices.size()) { + k -= activeSlices.size(); + } + backArticulationPoint = activeSlices.get(k).endPoint1; + break; + } + } + // Go forward until we find one that's not within + GeoPoint forwardArticulationPoint = null; + for (int j = 1; j < activeSlices.size(); j++) { + int k = i + j; + if (k >= activeSlices.size()) { + k -= activeSlices.size(); + } + final GeoPoint thisPoint = activeSlices.get(k).endPoint2; + if (!thisPlane.isWithin(thisPoint)) { + // back up + k--; + if (k < 0) { + k += activeSlices.size(); + } + forwardArticulationPoint = activeSlices.get(k).endPoint2; + break; + } + } + + final Membership backPlane; + if (backArticulationPoint != null && forwardArticulationPoint != null) { + // We want a sided plane that goes through both identified articulation points and the center of the world. + backPlane = new SidedPlane(pd.onSidePoint, true, backArticulationPoint, forwardArticulationPoint); + } else { + backPlane = null; + } + + circlePlanes.add(pd.plane); + backPlanes.add(backPlane); + notableEdgePoints.add(new GeoPoint[]{pd.endPoint1, pd.endPoint2}); + } + //System.out.println("Number of planes needed: "+circlePlanes.size()); this.edgePoints = new GeoPoint[]{edgePoint}; this.circlePlanes = circlePlanes; // Compute bounds if (circlePlanes.size() == 1) { + this.backBounds = null; this.eitherBounds = null; this.notableEdgePoints = null; } else { this.notableEdgePoints = notableEdgePoints; + this.backBounds = new HashMap<>(circlePlanes.size()); this.eitherBounds = new HashMap<>(circlePlanes.size()); for (int i = 0; i < circlePlanes.size(); i++) { final SidedPlane thisPlane = circlePlanes.get(i); final SidedPlane previousPlane = (i == 0)?circlePlanes.get(circlePlanes.size()-1):circlePlanes.get(i-1); final SidedPlane nextPlane = (i == circlePlanes.size()-1)?circlePlanes.get(0):circlePlanes.get(i+1); + if (backPlanes.get(i) != null) { + backBounds.put(thisPlane, backPlanes.get(i)); + } eitherBounds.put(thisPlane, new EitherBound(previousPlane, nextPlane)); } } + + //System.out.println("Is edgepoint within? "+isWithin(edgePoint)); } @@ -222,8 +296,11 @@ public boolean isWithin(final double x, final double y, final double z) { return true; } for (final Membership plane : circlePlanes) { - if (!plane.isWithin(x, y, z)) { - return false; + final Membership backPlane = (backBounds==null)?null:backBounds.get(plane); + if (backPlane == null || backPlane.isWithin(x, y, z)) { + if (!plane.isWithin(x, y, z)) { + return false; + } } } return true; @@ -318,14 +395,14 @@ public String toString() { */ protected static class EitherBound implements Membership { - protected final SidedPlane sideBound1; - protected final SidedPlane sideBound2; + protected final Membership sideBound1; + protected final Membership sideBound2; /** Constructor. * @param sideBound1 is the first side bound. * @param sideBound2 is the second side bound. */ - public EitherBound(final SidedPlane sideBound1, final SidedPlane sideBound2) { + public EitherBound(final Membership sideBound1, final Membership sideBound2) { this.sideBound1 = sideBound1; this.sideBound2 = sideBound2; } @@ -346,6 +423,22 @@ public String toString() { } } + /** A temporary description of a plane that's part of an exact circle. + */ + protected static class PlaneDescription { + public final SidedPlane plane; + public final GeoPoint endPoint1; + public final GeoPoint endPoint2; + public final GeoPoint onSidePoint; + + public PlaneDescription(final SidedPlane plane, final GeoPoint endPoint1, final GeoPoint endPoint2, final GeoPoint onSidePoint) { + this.plane = plane; + this.endPoint1 = endPoint1; + this.endPoint2 = endPoint2; + this.onSidePoint = onSidePoint; + } + } + /** A temporary description of a section of circle. */ protected static class ApproximationSlice { @@ -372,8 +465,18 @@ public ApproximationSlice(final GeoPoint center, if (this.plane == null) { throw new IllegalArgumentException("Either circle is too large to fit on ellipsoid or accuracy is too high; could not construct a plane with endPoint1="+endPoint1+" bearing "+point1Bearing+", endPoint2="+endPoint2+" bearing "+point2Bearing+", middle="+middlePoint+" bearing "+middlePointBearing); } + if (plane.isWithin(center) == false || !plane.evaluateIsZero(endPoint1) || !plane.evaluateIsZero(endPoint2) || !plane.evaluateIsZero(middlePoint)) + throw new IllegalStateException("SidedPlane constructor built a bad plane!!"); + } + + @Override + public String toString() { + return "{end point 1 = " + endPoint1 + " bearing 1 = "+point1Bearing + + " end point 2 = " + endPoint2 + " bearing 2 = " + point2Bearing + + " middle point = " + middlePoint + " middle bearing = " + middlePointBearing + "}"; } } + } diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java index a486038836e7..9206e4f70e8f 100755 --- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java +++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java @@ -22,6 +22,18 @@ public class GeoCircleTest extends LuceneTestCase { + @Test + public void testExactCircleLUCENE8054() { + // [junit4] > Throwable #1: java.lang.AssertionError: circle1: GeoExactCircle: + // {planetmodel=PlanetModel.WGS84, center=[lat=-1.2097332228999564, lon=0.749061883738567([X=0.25823775418663625, Y=0.2401212674846636, Z=-0.9338185278804293])], + // radius=0.20785254459485322(11.909073566339822), accuracy=6.710701666727661E-9} + // [junit4] > circle2: GeoExactCircle: {planetmodel=PlanetModel.WGS84, center=[lat=-1.2097332228999564, lon=0.749061883738567([X=0.25823775418663625, Y=0.2401212674846636, Z=-0.9338185278804293])], + // radius=0.20701584142315682(11.861134005896407), accuracy=1.0E-5} + final GeoCircle c1 = new GeoExactCircle(PlanetModel.WGS84, -1.2097332228999564, 0.749061883738567, 0.20785254459485322, 6.710701666727661E-9); + final GeoCircle c2 = new GeoExactCircle(PlanetModel.WGS84, -1.2097332228999564, 0.749061883738567, 0.20701584142315682, 1.0E-5); + assertTrue("cannot be disjoint", c1.getRelationship(c2) != GeoArea.DISJOINT); + } + @Test public void testExactCircle() { GeoCircle c; diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoShapeRelationshipTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoShapeRelationshipTest.java index 9c791b7f238b..48de9fddcb7a 100644 --- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoShapeRelationshipTest.java +++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoShapeRelationshipTest.java @@ -285,4 +285,29 @@ public void testRandomOverlaps() { assertEquals(b.toString(), GeoArea.OVERLAPS, rel); } } + + /** + * in LUCENE-8054 we have problems with exact circles that have + * edges that are close together. This test creates those circles with the same + * center and slightly different radius. It is able to reproduce + * the problem. + */ + @Test + @Repeat(iterations = 100) + public void testRandom_LUCENE8054() { + PlanetModel planetModel = PlanetModel.WGS84; + GeoCircle circle1 = (GeoCircle) randomGeoAreaShape(EXACT_CIRCLE, planetModel); + // new radius, a bit smaller than the generated one! + double radius = circle1.getRadius() * (1 - 0.01 * random().nextDouble()); + //circle with same center and new radius + GeoCircle circle2 = GeoCircleFactory.makeExactGeoCircle(planetModel, + circle1.getCenter().getLatitude(), + circle1.getCenter().getLongitude(), + radius, 1e-5 ); + StringBuilder b = new StringBuilder(); + b.append("circle1: " + circle1 + "\n"); + b.append("circle2: " + circle2); + //It cannot be disjoint, same center! + assertTrue(b.toString(), circle1.getRelationship(circle2) != GeoArea.DISJOINT); + } } From e016db39a6798ee7c27b6dbe43a14da2f2f54eae Mon Sep 17 00:00:00 2001 From: Karl Wright Date: Wed, 22 Nov 2017 06:51:19 -0500 Subject: [PATCH 08/25] LUCENE-8056: Use perpendicular bounding planes for segments of exact circles. --- lucene/CHANGES.txt | 3 +++ .../lucene/spatial3d/geom/GeoExactCircle.java | 24 ++++++++----------- .../lucene/spatial3d/geom/GeoCircleTest.java | 15 +++++++++++- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d7fd0c37c05e..b6618fdb2e92 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -77,6 +77,9 @@ New Features Bug Fixes +* LUCENE-8056: Exact circle segment bounding suffered from precision errors. + (Karl Wright) + * LUCENE-8054: Fix the exact circle case where relationships fail when the planet model has c <= ab, because the planes are constructed incorrectly. (Ignacio Vera) diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java index cedcab55aec5..bbd1ff03248f 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java @@ -145,7 +145,9 @@ public GeoExactCircle(final PlanetModel planetModel, final double lat, final dou // If it turns out that there's only one circle plane, this array will be populated but unused final List notableEdgePoints = new ArrayList<>(activeSlices.size()); // Back planes - final List backPlanes = new ArrayList<>(activeSlices.size()); + final Map backPlanes = new HashMap<>(activeSlices.size()); + // Bounds + final Map bounds = new HashMap<>(activeSlices.size()); // Compute bounding planes and actual circle planes for (int i = 0; i < activeSlices.size(); i++) { @@ -198,13 +200,15 @@ public GeoExactCircle(final PlanetModel planetModel, final double lat, final dou } circlePlanes.add(pd.plane); - backPlanes.add(backPlane); + if (backPlane != null) { + backPlanes.put(pd.plane, backPlane); + } notableEdgePoints.add(new GeoPoint[]{pd.endPoint1, pd.endPoint2}); + bounds.put(pd.plane, new EitherBound(new SidedPlane(pd.onSidePoint, pd.endPoint1, center), new SidedPlane(pd.onSidePoint, pd.endPoint2, center))); } //System.out.println("Number of planes needed: "+circlePlanes.size()); - this.edgePoints = new GeoPoint[]{edgePoint}; this.circlePlanes = circlePlanes; // Compute bounds if (circlePlanes.size() == 1) { @@ -213,19 +217,11 @@ public GeoExactCircle(final PlanetModel planetModel, final double lat, final dou this.notableEdgePoints = null; } else { this.notableEdgePoints = notableEdgePoints; - this.backBounds = new HashMap<>(circlePlanes.size()); - this.eitherBounds = new HashMap<>(circlePlanes.size()); - for (int i = 0; i < circlePlanes.size(); i++) { - final SidedPlane thisPlane = circlePlanes.get(i); - final SidedPlane previousPlane = (i == 0)?circlePlanes.get(circlePlanes.size()-1):circlePlanes.get(i-1); - final SidedPlane nextPlane = (i == circlePlanes.size()-1)?circlePlanes.get(0):circlePlanes.get(i+1); - if (backPlanes.get(i) != null) { - backBounds.put(thisPlane, backPlanes.get(i)); - } - eitherBounds.put(thisPlane, new EitherBound(previousPlane, nextPlane)); - } + this.eitherBounds = bounds; + this.backBounds = backPlanes; } + this.edgePoints = new GeoPoint[]{edgePoint}; //System.out.println("Is edgepoint within? "+isWithin(edgePoint)); } diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java index 9206e4f70e8f..d203b095f293 100755 --- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java +++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java @@ -520,5 +520,18 @@ public void testLUCENE8054(){ int rel = circle1.getRelationship(circle2); assertTrue(rel != GeoArea.DISJOINT); } - + + @Test + public void testLUCENE8056(){ + GeoCircle circle = GeoCircleFactory.makeExactGeoCircle(PlanetModel.WGS84, 0.647941905154693, 0.8542472362428436, 0.8917883700569315, 1.2173787103955335E-8); + GeoBBox bBox = GeoBBoxFactory.makeGeoBBox(PlanetModel.WGS84, 0.5890486225480862, 0.4908738521234052, 1.9634954084936207, 2.159844949342983); + //Center iis out of the shape + assertFalse(circle.isWithin(bBox.getCenter())); + //Edge point is in the shape + assertTrue(circle.isWithin(bBox.getEdgePoints()[0])); + //Shape should intersect!!! + assertTrue(bBox.getRelationship(circle) == GeoArea.OVERLAPS); + } + + } From 7a715573310502ab569ecfcf396052b84cd13f7a Mon Sep 17 00:00:00 2001 From: David Smiley Date: Wed, 22 Nov 2017 08:42:42 -0500 Subject: [PATCH 09/25] SOLR-11501: limit query parser switching to thwart hacking * starting a query with {!...} only works when the default parser is lucene or func. * edismax now requires uf=_query_ to allow advanced sub-queries. --- solr/CHANGES.txt | 19 ++++ .../solr/parser/SolrQueryParserBase.java | 29 +++++- .../solr/search/ExtendedDismaxQParser.java | 90 ++----------------- .../java/org/apache/solr/search/Grouping.java | 2 +- .../org/apache/solr/search/LuceneQParser.java | 1 + .../solr/search/NestedQParserPlugin.java | 5 ++ .../java/org/apache/solr/search/QParser.java | 56 +++++++----- .../join/ChildFieldValueSourceParser.java | 4 +- .../apache/solr/DisMaxRequestHandlerTest.java | 20 +++++ .../solr/highlight/HighlighterTest.java | 2 +- .../apache/solr/search/QueryEqualityTest.java | 2 +- .../TestComplexPhraseQParserPlugin.java | 10 +++ .../solr/search/TestExtendedDismaxParser.java | 36 ++++++-- .../apache/solr/search/TestQueryTypes.java | 2 +- 14 files changed, 160 insertions(+), 118 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 694a5bfcc873..2e3ff42df117 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -45,6 +45,25 @@ Apache UIMA 2.3.1 Apache ZooKeeper 3.4.10 Jetty 9.3.20.v20170531 +Upgrade Notes +---------------------- + +* SOLR-11501: Starting a query string with local-params {!myparser ...} is used to switch the query parser to another, + and is intended for use by Solr system developers, not end users doing searches. To reduce negative side-effects of + unintended hack-ability, we've limited the cases that local-params will be parsed to only contexts in which the + default parser is "lucene" or "func". + So if defType=edismax then q={!myparser ...} won't work. In that example, put the desired query parser into defType. + Another example is if deftype=edismax then hl.q={!myparser ...} won't work for the same reason. In that example, + either put the desired query parser into hl.qparser or set hl.qparser=lucene. Most users won't run into these cases + but some will and must change. + If you must have full backwards compatibility, use luceneMatchVersion=7.1.0 or something earlier. (David Smiley) + +* SOLR-11501: The edismax parser by default no longer allows subqueries that specify a Solr parser using either + local-params, or the older _query_ magic field trick. For example + {!prefix f=myfield v=enterp} or _query_:"{!prefix f=myfield v=enterp}" are not supported by default anymore. + If you want to allow power-users to do this, set uf=*,_query_ or some other value that includes _query_. + If you must have full backwards compatibility, use luceneMatchVersion=7.1.0 or something earlier. (David Smiley) + New Features ---------------------- * SOLR-11448: Implement an option in collection commands to wait for final results. (ab) diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index 04512ed2ee57..e7e303cbac05 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -43,6 +43,7 @@ import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.QueryBuilder; +import org.apache.lucene.util.Version; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; @@ -96,6 +97,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; boolean autoGeneratePhraseQueries = false; + boolean allowSubQueryParsing = false; int flags; protected IndexSchema schema; @@ -189,6 +191,10 @@ public void init(String defaultField, QParser parser) { this.flags = parser.getFlags(); this.defaultField = defaultField; setAnalyzer(schema.getQueryAnalyzer()); + // TODO in 8.0(?) remove this. Prior to 7.2 we defaulted to allowing sub-query parsing by default + if (!parser.getReq().getCore().getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_7_2_0)) { + setAllowSubQueryParsing(true); + } // otherwise defaults to false } // Turn on the "filter" bit and return the previous flags for the caller to save @@ -307,6 +313,22 @@ public int getPhraseSlop() { return phraseSlop; } + /** @see #setAllowLeadingWildcard(boolean) */ + public boolean isAllowSubQueryParsing() { + return allowSubQueryParsing; + } + + /** + * Set to enable subqueries to be parsed. If now allowed, the default, a {@link SyntaxError} + * will likely be thrown. + * Here is the preferred syntax using local-params: + * {!prefix f=field v=foo} + * and here is the older one, using a magic field name: + * _query_:"{!prefix f=field v=foo}". + */ + public void setAllowSubQueryParsing(boolean allowSubQueryParsing) { + this.allowSubQueryParsing = allowSubQueryParsing; + } /** * Set to true to allow leading wildcard characters. @@ -947,7 +969,7 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo } else { // intercept magic field name of "_" to use as a hook for our // own functions. - if (field.charAt(0) == '_' && parser != null) { + if (allowSubQueryParsing && field.charAt(0) == '_' && parser != null) { MagicFieldName magic = MagicFieldName.get(field); if (null != magic) { subQParser = parser.subQuery(queryText, magic.subParser); @@ -995,7 +1017,7 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw } else { // intercept magic field name of "_" to use as a hook for our // own functions. - if (field.charAt(0) == '_' && parser != null) { + if (allowSubQueryParsing && field.charAt(0) == '_' && parser != null) { MagicFieldName magic = MagicFieldName.get(field); if (null != magic) { subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser); @@ -1148,6 +1170,9 @@ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) // called from parser protected Query getLocalParams(String qfield, String lparams) throws SyntaxError { + if (!allowSubQueryParsing) { + throw new SyntaxError("local-params subquery is disabled"); + } QParser nested = parser.subQuery(lparams, null); return nested.getQuery(); } diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index 00e4048e3f2a..999a069975f2 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -146,6 +146,7 @@ public Query parse() throws SyntaxError { addAliasesFromRequest(up, config.tiebreaker); up.setPhraseSlop(config.qslop); // slop for explicit user phrase queries up.setAllowLeadingWildcard(true); + up.setAllowSubQueryParsing(config.userFields.isAllowed(MagicFieldName.QUERY.field)); // defer escaping and only do if lucene parsing fails, or we need phrases // parsing fails. Need to sloppy phrase queries anyway though. @@ -618,85 +619,7 @@ public void addDebugInfo(NamedList debugInfo) { } debugInfo.add("boostfuncs", getReq().getParams().getParams(DisMaxParams.BF)); } - - - // FIXME: Not in use - // public static CharSequence partialEscape(CharSequence s) { - // StringBuilder sb = new StringBuilder(); - // - // int len = s.length(); - // for (int i = 0; i < len; i++) { - // char c = s.charAt(i); - // if (c == ':') { - // // look forward to make sure it's something that won't - // // cause a parse exception (something that won't be escaped... like - // // +,-,:, whitespace - // if (i+10) { - // char ch = s.charAt(i+1); - // if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) { - // // OK, at this point the chars after the ':' will be fine. - // // now look back and try to determine if this is a fieldname - // // [+,-]? [letter,_] [letter digit,_,-,.]* - // // This won't cover *all* possible lucene fieldnames, but we should - // // only pick nice names to begin with - // int start, pos; - // for (start=i-1; start>=0; start--) { - // ch = s.charAt(start); - // if (Character.isWhitespace(ch)) break; - // } - // - // // skip whitespace - // pos = start+1; - // - // // skip leading + or - - // ch = s.charAt(pos); - // if (ch=='+' || ch=='-') { - // pos++; - // } - // - // // we don't need to explicitly check for end of string - // // since ':' will act as our sentinal - // - // // first char can't be '-' or '.' - // ch = s.charAt(pos++); - // if (Character.isJavaIdentifierPart(ch)) { - // - // for(;;) { - // ch = s.charAt(pos++); - // if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) { - // break; - // } - // } - // - // if (pos<=i) { - // // OK, we got to the ':' and everything looked like a valid fieldname, so - // // don't escape the ':' - // sb.append(':'); - // continue; // jump back to start of outer-most loop - // } - // - // } - // - // - // } - // } - // - // // we fell through to here, so we should escape this like other reserved chars. - // sb.append('\\'); - // } - // else if (c == '\\' || c == '!' || c == '(' || c == ')' || - // c == '^' || c == '[' || c == ']' || - // c == '{' || c == '}' || c == '~' || c == '*' || c == '?' - // ) - // { - // sb.append('\\'); - // } - // sb.append(c); - // } - // return sb; - // } - - + protected static class Clause { boolean isBareWord() { @@ -1510,7 +1433,7 @@ static class UserFields { private DynamicField[] dynamicUserFields; private DynamicField[] negativeDynamicUserFields; - UserFields(Map ufm) { + UserFields(Map ufm, boolean forbidSubQueryByDefault) { userFieldsMap = ufm; if (0 == userFieldsMap.size()) { userFieldsMap.put("*", null); @@ -1527,6 +1450,10 @@ static class UserFields { dynUserFields.add(new DynamicField(f)); } } + // unless "_query_" was expressly allowed, we forbid it. + if (forbidSubQueryByDefault && !userFieldsMap.containsKey(MagicFieldName.QUERY.field)) { + userFieldsMap.put("-" + MagicFieldName.QUERY.field, null); + } Collections.sort(dynUserFields); dynamicUserFields = dynUserFields.toArray(new DynamicField[dynUserFields.size()]); Collections.sort(negDynUserFields); @@ -1675,7 +1602,8 @@ public ExtendedDismaxConfiguration(SolrParams localParams, SolrParams params, SolrQueryRequest req) { solrParams = SolrParams.wrapDefaults(localParams, params); minShouldMatch = DisMaxQParser.parseMinShouldMatch(req.getSchema(), solrParams); // req.getSearcher() here causes searcher refcount imbalance - userFields = new UserFields(U.parseFieldBoosts(solrParams.getParams(DMP.UF))); + final boolean forbidSubQueryByDefault = req.getCore().getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_7_2_0); + userFields = new UserFields(U.parseFieldBoosts(solrParams.getParams(DMP.UF)), forbidSubQueryByDefault); try { queryFields = DisMaxQParser.parseQueryFields(req.getSchema(), solrParams); // req.getSearcher() here causes searcher refcount imbalance } catch (SyntaxError e) { diff --git a/solr/core/src/java/org/apache/solr/search/Grouping.java b/solr/core/src/java/org/apache/solr/search/Grouping.java index 245320dc17fc..fe781d8aacee 100644 --- a/solr/core/src/java/org/apache/solr/search/Grouping.java +++ b/solr/core/src/java/org/apache/solr/search/Grouping.java @@ -177,7 +177,7 @@ public void addFieldCommand(String field, SolrQueryRequest request) throws Synta } public void addFunctionCommand(String groupByStr, SolrQueryRequest request) throws SyntaxError { - QParser parser = QParser.getParser(groupByStr, "func", request); + QParser parser = QParser.getParser(groupByStr, FunctionQParserPlugin.NAME, request); Query q = parser.getQuery(); final Grouping.Command gc; if (q instanceof FunctionQuery) { diff --git a/solr/core/src/java/org/apache/solr/search/LuceneQParser.java b/solr/core/src/java/org/apache/solr/search/LuceneQParser.java index 753b36b6d0d1..ab6f97da1e35 100644 --- a/solr/core/src/java/org/apache/solr/search/LuceneQParser.java +++ b/solr/core/src/java/org/apache/solr/search/LuceneQParser.java @@ -44,6 +44,7 @@ public Query parse() throws SyntaxError { lparser.setDefaultOperator(QueryParsing.parseOP(getParam(QueryParsing.OP))); lparser.setSplitOnWhitespace(StrUtils.parseBool (getParam(QueryParsing.SPLIT_ON_WHITESPACE), SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE)); + lparser.setAllowSubQueryParsing(true); return lparser.parse(qstr); } diff --git a/solr/core/src/java/org/apache/solr/search/NestedQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/NestedQParserPlugin.java index 9b0dc5a07973..1cf282fd7908 100644 --- a/solr/core/src/java/org/apache/solr/search/NestedQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/NestedQParserPlugin.java @@ -18,6 +18,7 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; @@ -36,6 +37,10 @@ public class NestedQParserPlugin extends QParserPlugin { @Override public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + if (localParams == null) { // avoid an NPE later + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "the 'query' QParser must be invoked with local-params, e.g. {!query defType=...}"); + } return new QParser(qstr, localParams, params, req) { QParser baseParser; ValueSource vs; diff --git a/solr/core/src/java/org/apache/solr/search/QParser.java b/solr/core/src/java/org/apache/solr/search/QParser.java index 7392cbcd27b6..54b84e7129c4 100644 --- a/solr/core/src/java/org/apache/solr/search/QParser.java +++ b/solr/core/src/java/org/apache/solr/search/QParser.java @@ -161,8 +161,9 @@ public void setString(String s) { /** * Returns the resulting query from this QParser, calling parse() only the - * first time and caching the Query result. + * first time and caching the Query result. A null return is possible! */ + //TODO never return null; standardize the semantics public Query getQuery() throws SyntaxError { if (query==null) { query=parse(); @@ -292,9 +293,10 @@ public void addDebugInfo(NamedList debugInfo) { debugInfo.add("QParser", this.getClass().getSimpleName()); } - /** Create a QParser to parse qstr, + /** + * Create a {@link QParser} to parse qstr, * using the "lucene" (QParserPlugin.DEFAULT_QTYPE) query parser. - * The query parser may be overridden by local parameters in the query + * The query parser may be overridden by local-params in the query * string itself. For example if * qstr={!prefix f=myfield}foo * then the prefix query parser will be used. @@ -303,25 +305,41 @@ public static QParser getParser(String qstr, SolrQueryRequest req) throws Syntax return getParser(qstr, QParserPlugin.DEFAULT_QTYPE, req); } - /** Create a QParser to parse qstr, - * assuming that the default query parser is defaultParser. - * The query parser may be overridden by local parameters in the query - * string itself. For example if defaultParser="dismax" - * and qstr=foo, then the dismax query parser will be used - * to parse and construct the query object. However - * if qstr={!prefix f=myfield}foo - * then the prefix query parser will be used. + /** + * Create a {@link QParser} to parse qstr using the defaultParser. + * Note that local-params is only parsed when the defaultParser is "lucene" or "func". */ public static QParser getParser(String qstr, String defaultParser, SolrQueryRequest req) throws SyntaxError { - // SolrParams localParams = QueryParsing.getLocalParams(qstr, req.getParams()); + boolean allowLocalParams = defaultParser == null || defaultParser.equals(QParserPlugin.DEFAULT_QTYPE) + || defaultParser.equals(FunctionQParserPlugin.NAME); + return getParser(qstr, defaultParser, allowLocalParams, req); + } + /** + * Expert: Create a {@link QParser} to parse {@code qstr} using the {@code parserName} parser, while allowing a + * toggle for whether local-params may be parsed. + * The query parser may be overridden by local parameters in the query string itself + * (assuming {@code allowLocalParams}. + * For example if parserName=dismax and qstr=foo, + * then the dismax query parser will be used to parse and construct the query object. + * However if qstr={!prefix f=myfield}foo then the prefix query parser will be used. + * + * @param allowLocalParams Whether this query parser should parse local-params syntax. + * Note that the "lucene" query parser natively parses local-params regardless. + * @lucene.internal + */ + public static QParser getParser(String qstr, String parserName, boolean allowLocalParams, SolrQueryRequest req) throws SyntaxError { + // SolrParams localParams = QueryParsing.getLocalParams(qstr, req.getParams()); + if (parserName == null) { + parserName = QParserPlugin.DEFAULT_QTYPE;//"lucene" + } String stringIncludingLocalParams = qstr; ModifiableSolrParams localParams = null; SolrParams globalParams = req.getParams(); boolean valFollowedParams = true; int localParamsEnd = -1; - if (qstr != null && qstr.startsWith(QueryParsing.LOCALPARAM_START)) { + if (allowLocalParams && qstr != null && qstr.startsWith(QueryParsing.LOCALPARAM_START)) { localParams = new ModifiableSolrParams(); localParamsEnd = QueryParsing.parseLocalParams(qstr, 0, localParams, globalParams); @@ -329,26 +347,18 @@ public static QParser getParser(String qstr, String defaultParser, SolrQueryRequ if (val != null) { // val was directly specified in localParams via v= or v=$arg valFollowedParams = false; + //TODO if remainder of query string after '}' is non-empty, then what? Throw error? Fall back to lucene QParser? } else { // use the remainder of the string as the value valFollowedParams = true; val = qstr.substring(localParamsEnd); localParams.set(QueryParsing.V, val); } - } - - String parserName; - - if (localParams == null) { - parserName = defaultParser; - } else { - parserName = localParams.get(QueryParsing.TYPE,defaultParser); + parserName = localParams.get(QueryParsing.TYPE,parserName); qstr = localParams.get("v"); } - parserName = parserName==null ? QParserPlugin.DEFAULT_QTYPE : parserName; - QParserPlugin qplug = req.getCore().getQueryPlugin(parserName); QParser parser = qplug.createParser(qstr, localParams, req.getParams(), req); diff --git a/solr/core/src/java/org/apache/solr/search/join/ChildFieldValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/join/ChildFieldValueSourceParser.java index fcd21b32dc60..aa2b77d533d2 100644 --- a/solr/core/src/java/org/apache/solr/search/join/ChildFieldValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/join/ChildFieldValueSourceParser.java @@ -162,8 +162,8 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError { final Query query; if (fp.hasMoreArguments()){ query = fp.parseNestedQuery(); - }else{ - query = fp.subQuery(fp.getParam(CommonParams.Q), BlockJoinParentQParserPlugin.NAME).getQuery(); + } else { + query = fp.subQuery(fp.getParam(CommonParams.Q), null).getQuery(); } BitSetProducer parentFilter; diff --git a/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java index 386f6900e716..dbd4c64c46ab 100644 --- a/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/DisMaxRequestHandlerTest.java @@ -169,6 +169,26 @@ public void doTestSomeStuff(final String qt) throws Exception { "q", "\"cool chick\"" ) ,"//*[@numFound='1']" ); + + } + + @Test + public void testSubQueriesNotSupported() { + // See org.apache.solr.search.TestSolrQueryParser.testNestedQueryModifiers() + assertQ("don't parse subqueries", + req("defType", "dismax", + "df", "doesnotexist_s", + "q", "_query_:\"{!v=$qq}\"", + "qq", "features_t:cool") + ,"//*[@numFound='0']" + ); + assertQ("don't parse subqueries", + req("defType", "dismax", + "df", "doesnotexist_s", + "q", "{!v=$qq}", + "qq", "features_t:cool") + ,"//*[@numFound='0']" + ); } @Test diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java index 759de00cc2e8..f345441e71e5 100644 --- a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java +++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java @@ -1007,7 +1007,7 @@ public void testHlQParameter() { "//lst[@name='highlighting']/lst[@name='1']" + "/arr[@name='title']/str='Apache Software Foundation'"); assertQ("hl.q parameter uses localparam parser definition correctly", - req("q", "Apache", "defType", "edismax", "qf", "title t_text", "hl", "true", "hl.fl", "title", "hl.q", "{!edismax}Software"), + req("q", "Apache", "defType", "edismax", "qf", "title t_text", "hl", "true", "hl.fl", "title", "hl.q", "{!edismax}Software", "hl.qparser", "lucene"), "//lst[@name='highlighting']/lst[@name='1']" + "/arr[@name='title']/str='Apache Software Foundation'"); assertQ("hl.q parameter uses defType correctly", diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index ae85089f8984..a52ba5661e1e 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -1049,7 +1049,7 @@ protected void assertQueryEquals(final String defType, SolrQueryResponse rsp = new SolrQueryResponse(); SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req,rsp)); for (int i = 0; i < inputs.length; i++) { - queries[i] = (QParser.getParser(inputs[i], defType, req).getQuery()); + queries[i] = QParser.getParser(inputs[i], defType, true, req).getQuery(); } } finally { SolrRequestInfo.clearRequestInfo(); diff --git a/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java index 02060a98acb3..4964d5b83ebd 100644 --- a/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java +++ b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java @@ -16,6 +16,7 @@ */ package org.apache.solr.search; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.util.AbstractSolrTestCase; @@ -164,6 +165,15 @@ public void test() { "//result[@numFound='2']" ); + assertQEx("don't parse subqueries", + "SyntaxError", + sumLRF.makeRequest("_query_:\"{!prefix f=name v=smi}\""), SolrException.ErrorCode.BAD_REQUEST + ); + assertQEx("don't parse subqueries", + "SyntaxError", + sumLRF.makeRequest("{!prefix f=name v=smi}"), SolrException.ErrorCode.BAD_REQUEST + ); + } @Test diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index e222bcf71eb2..6666c29a2a6f 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -51,7 +51,7 @@ public static void beforeClass() throws Exception { index(); } - public static void index() throws Exception { + public static void index() throws Exception { assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious", "name", "Zapp Brannigan")); assertU(adoc("id", "43" , @@ -393,13 +393,22 @@ public void testFocusQueryParser() { // special psuedo-fields like _query_ and _val_ - // special fields (and real field id) should be included by default + // _query_ should be excluded by default assertQ(req("defType", "edismax", "mm", "100%", "fq", "id:51", - "q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""), - oner); - // should also work when explicitly allowed + "q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\"", + "debugQuery", "true"), + nor, + "//str[@name='parsedquery_toString'][.='+(((text:queri) (text:\"geofilt d 20 sfield store pt 12 34 56 78\"))~2)']"); + // again; this time use embedded local-params style + assertQ(req("defType", "edismax", + "mm", "100%", + "fq", "id:51", + "q", " {!geofilt d=20 sfield=store pt=12.34,-56.78}"),//notice leading space + nor); + + // should work when explicitly allowed assertQ(req("defType", "edismax", "mm", "100%", "fq", "id:51", @@ -413,6 +422,14 @@ public void testFocusQueryParser() { "uf", "_query_", "q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""), oner); + // again; this time use embedded local-params style + assertQ(req("defType", "edismax", + "mm", "100%", + "fq", "id:51", + "uf", "id", + "uf", "_query_", + "q", " {!geofilt d=20 sfield=store pt=12.34,-56.78}"),//notice leading space + oner); // should fail when prohibited assertQ(req("defType", "edismax", @@ -424,7 +441,7 @@ public void testFocusQueryParser() { assertQ(req("defType", "edismax", "mm", "100%", "fq", "id:51", - "uf", "id", // excluded by ommision + "uf", "id", // excluded by omission "q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""), nor); @@ -2047,4 +2064,11 @@ public void testShingleQueries() throws Exception { , "/response/numFound==1" ); } + + /** SOLR-11512 */ + @Test(expected=SolrException.class) + public void killInfiniteRecursionParse() throws Exception { + assertJQ(req("defType", "edismax", "q", "*", "qq", "{!edismax v=something}", "bq", "{!edismax v=$qq}")); + } + } diff --git a/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java b/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java index 0945ea2413cf..ef976a3b9013 100644 --- a/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java +++ b/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java @@ -430,7 +430,7 @@ public void testQueryTypes() { ); assertQ("test nested nested query", - req("q","_query_:\"{!query defType=query v=$q1}\"", "q1","{!v=$q2}","q2","{!prefix f=v_t v=$qqq}","qqq","hel") + req("q","_query_:\"{!query v=$q1}\"", "q1","{!v=$q2}","q2","{!prefix f=v_t v=$qqq}","qqq","hel") ,"//result[@numFound='2']" ); assertQ("Test text field with no analysis doesn't NPE with wildcards (SOLR-4318)", From 3c91feb1f996bf693d4f88e673b1336e4958b512 Mon Sep 17 00:00:00 2001 From: Karl Wright Date: Wed, 22 Nov 2017 11:07:52 -0500 Subject: [PATCH 10/25] LUCENE-8057: getBounds() for exact circle did not include segment endpoints. --- .../lucene/spatial3d/geom/GeoExactCircle.java | 7 ++++++- .../lucene/spatial3d/geom/GeoCircleTest.java | 19 ++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java index bbd1ff03248f..8bba244c0af4 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoExactCircle.java @@ -356,8 +356,13 @@ public void getBounds(Bounds bounds) { return; } // Add bounds for all circle planes - for (final SidedPlane plane : circlePlanes) { + for (int edgeIndex = 0; edgeIndex < circlePlanes.size(); edgeIndex++) { + final SidedPlane plane = circlePlanes.get(edgeIndex); bounds.addPlane(planetModel, plane, eitherBounds.get(plane)); + final GeoPoint[] points = notableEdgePoints.get(edgeIndex); + for (final GeoPoint point : points) { + bounds.addPoint(point); + } // We don't bother to compute the intersection bounds since, unless the planet model is pathological, we expect planes to be intersecting at shallow // angles. } diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java index d203b095f293..3f35ef305cf3 100755 --- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java +++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoCircleTest.java @@ -533,5 +533,22 @@ public void testLUCENE8056(){ assertTrue(bBox.getRelationship(circle) == GeoArea.OVERLAPS); } - + @Test + public void testExactCircleBounds() { + + GeoPoint center = new GeoPoint(PlanetModel.WGS84, 0, 0); + // Construct four cardinal points, and then we'll build the first two planes + final GeoPoint northPoint = PlanetModel.WGS84.surfacePointOnBearing(center, 1, 0.0); + final GeoPoint southPoint = PlanetModel.WGS84.surfacePointOnBearing(center, 1, Math.PI); + final GeoPoint eastPoint = PlanetModel.WGS84.surfacePointOnBearing(center, 1, Math.PI * 0.5); + final GeoPoint westPoint = PlanetModel.WGS84.surfacePointOnBearing(center, 1, Math.PI * 1.5); + + GeoCircle circle = GeoCircleFactory.makeExactGeoCircle(PlanetModel.WGS84, 0, 0, 1, 1e-6); + LatLonBounds bounds = new LatLonBounds(); + circle.getBounds(bounds); + assertEquals(northPoint.getLatitude(), bounds.getMaxLatitude(), 1e-2); + assertEquals(southPoint.getLatitude(), bounds.getMinLatitude(), 1e-2); + assertEquals(westPoint.getLongitude(), bounds.getLeftLongitude(), 1e-2); + assertEquals(eastPoint.getLongitude(), bounds.getRightLongitude(), 1e-2); + } } From ff90389b9821b0dc64dd34655c7415b5ce622cd0 Mon Sep 17 00:00:00 2001 From: Karl Wright Date: Wed, 22 Nov 2017 11:11:21 -0500 Subject: [PATCH 11/25] LUCENE-8057: Update CHANGES.txt --- lucene/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b6618fdb2e92..6c9f702e8485 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -77,6 +77,9 @@ New Features Bug Fixes +* LUCENE-8057: Exact circle bounds computation was incorrect. + (Ignacio Vera) + * LUCENE-8056: Exact circle segment bounding suffered from precision errors. (Karl Wright) From 218ad4453bd539d3855292a606944d024801bf04 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Wed, 22 Nov 2017 11:28:27 -0500 Subject: [PATCH 12/25] Move scoreOverlaps functionality down to Solr level --- .../org/apache/lucene/util/QueryBuilder.java | 26 -------- .../apache/lucene/util/TestQueryBuilder.java | 34 ---------- .../solr/parser/SolrQueryParserBase.java | 40 ++++++++++++ .../org/apache/solr/schema/TextField.java | 9 +-- .../solr/collection1/conf/schema12.xml | 65 +++++++++++++++++++ .../solr/collection1/conf/synonyms.txt | 8 ++- .../solr/search/TestExtendedDismaxParser.java | 35 ++++++++++ 7 files changed, 152 insertions(+), 65 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index 283f5ec17e59..d6f88894a78e 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -63,16 +63,8 @@ public class QueryBuilder { protected boolean enablePositionIncrements = true; protected boolean enableGraphQueries = true; protected boolean autoGenerateMultiTermSynonymsPhraseQuery = false; - protected ScoreOverlaps scoreOverlaps = ScoreOverlaps.AS_SAME_TERM; - public static enum ScoreOverlaps { - AS_SAME_TERM, /* default, blends doc freq for terms in same posn: SynonymQuery(A B)*/ - PICK_BEST, /* picks dismax over synonyms, choosing best scoring per doc: (A | B) */ - AS_DISTINCT_TERMS /* picks combination (A OR B).*/ - } - protected boolean blendSynonyms = true; - /** Creates a new QueryBuilder using the given analyzer. */ public QueryBuilder(Analyzer analyzer) { this.analyzer = analyzer; @@ -268,10 +260,6 @@ public boolean getEnableGraphQueries() { return enableGraphQueries; } - public void setScoreOverlaps(ScoreOverlaps v) { scoreOverlaps = v;} - - public ScoreOverlaps getScoreOverlaps() { return scoreOverlaps;} - /** * Creates a query from a token stream. * @@ -635,20 +623,6 @@ protected BooleanQuery.Builder newBooleanQuery() { * @return new Query instance */ protected Query newSynonymQuery(Term terms[]) { - if (scoreOverlaps == ScoreOverlaps.PICK_BEST) { - List currPosnClauses = new ArrayList(); - for (Term term : terms) { - currPosnClauses.add(newTermQuery(term)); - } - DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f); - return dm; - } else if (scoreOverlaps == ScoreOverlaps.AS_DISTINCT_TERMS) { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - for (Term term : terms) { - builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD); - } - return builder.build(); - } return new SynonymQuery(terms); } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java index a00642bdcdfe..96bd8a9555dc 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java @@ -147,40 +147,6 @@ public void testSynonyms() throws Exception { assertEquals(expected, builder.createBooleanQuery("field", "dogs", BooleanClause.Occur.MUST)); assertEquals(expected, builder.createPhraseQuery("field", "dogs")); } - - - /** synonym dismax expansion instead of blended */ - public void testBestSynonyms() throws Exception { - List queries = new ArrayList(); - queries.add(new TermQuery(new Term("field", "dogs"))); - queries.add(new TermQuery(new Term("field", "dog"))); - - DisjunctionMaxQuery expected = new DisjunctionMaxQuery(queries, 0.0f); - QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); - builder.setScoreOverlaps(QueryBuilder.ScoreOverlaps.PICK_BEST); - - assertEquals(expected, builder.createBooleanQuery("field", "dogs")); - assertEquals(expected, builder.createPhraseQuery("field", "dogs")); - assertEquals(expected, builder.createBooleanQuery("field", "dogs", BooleanClause.Occur.MUST)); - assertEquals(expected, builder.createPhraseQuery("field", "dogs")); - } - - - /** synonym boolean expansion instead of blended */ - public void testSynonymsDistinct() throws Exception { - Query expected = new BooleanQuery.Builder() - .add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD) - .build(); - - QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); - builder.setScoreOverlaps(QueryBuilder.ScoreOverlaps.AS_DISTINCT_TERMS); - - assertEquals(expected, builder.createBooleanQuery("field", "dogs")); - assertEquals(expected, builder.createPhraseQuery("field", "dogs")); - assertEquals(expected, builder.createBooleanQuery("field", "dogs", BooleanClause.Occur.MUST)); - assertEquals(expected, builder.createPhraseQuery("field", "dogs")); - } /** forms multiphrase query */ public void testSynonymsPhrase() throws Exception { diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index e7e303cbac05..414fc6ca5a38 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -34,6 +34,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; @@ -78,6 +79,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder { static final int MOD_NOT = 10; static final int MOD_REQ = 11; + protected ScoreOverlaps scoreOverlaps = ScoreOverlaps.AS_SAME_TERM; + + public static enum ScoreOverlaps { + AS_SAME_TERM, /* default, blends doc freq for terms in same posn: SynonymQuery(A B)*/ + PICK_BEST, /* picks dismax over synonyms, choosing best scoring per doc: (A | B) */ + AS_DISTINCT_TERMS /* picks combination (A OR B).*/ + } + // make it possible to call setDefaultOperator() without accessing // the nested class: /** Alternative form of QueryParser.Operator.AND */ @@ -330,6 +339,18 @@ public void setAllowSubQueryParsing(boolean allowSubQueryParsing) { this.allowSubQueryParsing = allowSubQueryParsing; } + /** + * Set how overlapping query terms should be scored, as if they're the same term, + * picking highest scoring term, or OR'ing them together + * @param scoreOverlaps the overlap scorring method + */ + public void setScoreOverlaps(ScoreOverlaps scoreOverlaps) {this.scoreOverlaps = scoreOverlaps;} + + /** + * Gets how overlapping query terms should be scored + */ + public ScoreOverlaps getScoreOverlaps() {return this.scoreOverlaps;} + /** * Set to true to allow leading wildcard characters. *

@@ -542,6 +563,25 @@ protected Query newRegexpQuery(Term regexp) { return query; } + @Override + protected Query newSynonymQuery(Term terms[]) { + if (scoreOverlaps == ScoreOverlaps.PICK_BEST) { + List currPosnClauses = new ArrayList(); + for (Term term : terms) { + currPosnClauses.add(newTermQuery(term)); + } + DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f); + return dm; + } else if (scoreOverlaps == ScoreOverlaps.AS_DISTINCT_TERMS) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Term term : terms) { + builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD); + } + return builder.build(); + } + return super.newSynonymQuery(terms); + } + /** * Builds a new FuzzyQuery instance * @param term Term diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 3a44e99c692e..e1fa4bc3463b 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -29,6 +29,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; import org.apache.solr.common.SolrException; +import org.apache.solr.parser.SolrQueryParserBase; import org.apache.solr.query.SolrRangeQuery; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -41,7 +42,7 @@ public class TextField extends FieldType { protected boolean autoGeneratePhraseQueries; protected boolean enableGraphQueries; - protected QueryBuilder.ScoreOverlaps scoreOverlapsMethod; + protected SolrQueryParserBase.ScoreOverlaps scoreOverlapsMethod; /** * Analyzer set by schema for text types to use when searching fields @@ -74,10 +75,10 @@ protected void init(IndexSchema schema, Map args) { if (autoGeneratePhraseQueriesStr != null) autoGeneratePhraseQueries = Boolean.parseBoolean(autoGeneratePhraseQueriesStr); - scoreOverlapsMethod = QueryBuilder.ScoreOverlaps.AS_SAME_TERM; + scoreOverlapsMethod = SolrQueryParserBase.ScoreOverlaps.AS_SAME_TERM; String scoreOverlapsMethod = args.remove(SCORE_OVERLAPS); if (scoreOverlapsMethod != null) { - this.scoreOverlapsMethod = QueryBuilder.ScoreOverlaps.valueOf(scoreOverlapsMethod.toUpperCase()); + this.scoreOverlapsMethod = SolrQueryParserBase.ScoreOverlaps.valueOf(scoreOverlapsMethod.toUpperCase()); } enableGraphQueries = true; @@ -111,7 +112,7 @@ public boolean getEnableGraphQueries() { return enableGraphQueries; } - public QueryBuilder.ScoreOverlaps getScoreOverlapsMethod() {return scoreOverlapsMethod;} + public SolrQueryParserBase.ScoreOverlaps getScoreOverlapsMethod() {return scoreOverlapsMethod;} @Override public SortField getSortField(SchemaField field, boolean reverse) { diff --git a/solr/core/src/test-files/solr/collection1/conf/schema12.xml b/solr/core/src/test-files/solr/collection1/conf/schema12.xml index 225b88ed4059..f62d307858bb 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema12.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema12.xml @@ -167,6 +167,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -590,6 +651,10 @@ + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/synonyms.txt b/solr/core/src/test-files/solr/collection1/conf/synonyms.txt index 54cf2cc11c6a..68dbf0bf62b1 100644 --- a/solr/core/src/test-files/solr/collection1/conf/synonyms.txt +++ b/solr/core/src/test-files/solr/collection1/conf/synonyms.txt @@ -31,4 +31,10 @@ pixima => pixma # multiword synonyms wi fi => wifi -crow blackbird, grackle \ No newline at end of file +crow blackbird, grackle + +# Synonyms used in semantic expansion +tabby => tabby, cat, feline, animal +persian => persian, cat, feline, animal + +jeans, denim pants \ No newline at end of file diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index 6666c29a2a6f..1a8f1bbbfb79 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; @@ -96,6 +97,9 @@ public static void index() throws Exception { assertU(adoc("id", "71", "text_sw", "ties")); assertU(adoc("id", "72", "text_sw", "wifi ATM")); assertU(adoc("id", "73", "shingle23", "A B X D E")); +// assertU(adoc("id", "74", "text_pick_best", "tabby")); +// assertU(adoc("id", "74", "text_as_distinct", "persian")); + assertU(commit()); } @@ -1794,6 +1798,37 @@ public void testOperatorsAndMultiWordSynonyms() throws Exception { ); } + public void testOverlapTermScoringQueries() throws Exception { + ModifiableSolrParams edismaxParams = new ModifiableSolrParams(); + edismaxParams.add("qf", "t_pick_best_foo"); + + QParser qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); + Query q = qParser.getQuery(); + assertEquals("+((t_pick_best_foo:tabbi | t_pick_best_foo:cat | t_pick_best_foo:felin | t_pick_best_foo:anim))", q.toString()); + + edismaxParams = new ModifiableSolrParams(); + edismaxParams.add("qf", "t_as_distinct_foo"); + qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); + q = qParser.getQuery(); + assertEquals("+((t_as_distinct_foo:tabbi t_as_distinct_foo:cat t_as_distinct_foo:felin t_as_distinct_foo:anim))", q.toString()); + + /*confirm autoGeneratePhraseQueries always builds OR queries*/ + edismaxParams = new ModifiableSolrParams(); + edismaxParams.add("qf", "t_as_distinct_foo"); + edismaxParams.add("sow", "false"); + qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); + q = qParser.getQuery(); + assertEquals("+(((t_as_distinct_foo:\"denim pant\" t_as_distinct_foo:jean)))", q.toString()); + + edismaxParams = new ModifiableSolrParams(); + edismaxParams.add("qf", "t_pick_best_foo"); + edismaxParams.add("sow", "false"); + qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); + q = qParser.getQuery(); + assertEquals("+(((t_pick_best_foo:\"denim pant\" t_pick_best_foo:jean)))", q.toString()); + + } + public void testAutoGeneratePhraseQueries() throws Exception { ModifiableSolrParams noSowParams = new ModifiableSolrParams(); noSowParams.add("df", "text"); From 3ebc2cbf1107c084dde780910e1e0b2caa03173c Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Wed, 22 Nov 2017 16:20:30 -0500 Subject: [PATCH 13/25] Undo unintentional changes --- .../src/java/org/apache/lucene/search/BlendedTermQuery.java | 2 +- lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java | 2 -- .../core/src/test/org/apache/lucene/util/TestQueryBuilder.java | 3 --- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java index f846c8d1f5c1..219d4535827c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java @@ -246,7 +246,7 @@ public int hashCode() { @Override public String toString(String field) { - StringBuilder builder = new StringBuilder("BLENDED("); + StringBuilder builder = new StringBuilder("Blended("); for (int i = 0; i < terms.length; ++i) { if (i != 0) { builder.append(" "); diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index d6f88894a78e..2cb066bd2085 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -31,7 +31,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; @@ -64,7 +63,6 @@ public class QueryBuilder { protected boolean enableGraphQueries = true; protected boolean autoGenerateMultiTermSynonymsPhraseQuery = false; - /** Creates a new QueryBuilder using the given analyzer. */ public QueryBuilder(Analyzer analyzer) { this.analyzer = analyzer; diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java index 96bd8a9555dc..fece16697cf4 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java @@ -18,8 +18,6 @@ import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; @@ -33,7 +31,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; From 6d9711d24b2659dbeae4e9590e16249018e1f043 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Wed, 22 Nov 2017 16:28:21 -0500 Subject: [PATCH 14/25] Adds changes.txt node --- solr/CHANGES.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 2e3ff42df117..f845a5a2bda2 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -30,7 +30,8 @@ Apache ZooKeeper 3.4.10 Jetty 9.3.20.v20170531 -(No Changes) +SOLR-11662: You can configure whether SynonymQuery, a Dismax, or BooleanQuery occurs over query terms that overlap +their position (Doug Turnbull) ================== 7.2.0 ================== From c21d453c75a59c7e0ef64e4c9832b52598cbc1a4 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Mon, 27 Nov 2017 20:59:15 -0500 Subject: [PATCH 15/25] Change test to use params() --- .../solr/search/TestExtendedDismaxParser.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index 1a8f1bbbfb79..22103af830c6 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -1799,30 +1799,26 @@ public void testOperatorsAndMultiWordSynonyms() throws Exception { } public void testOverlapTermScoringQueries() throws Exception { - ModifiableSolrParams edismaxParams = new ModifiableSolrParams(); - edismaxParams.add("qf", "t_pick_best_foo"); + ModifiableSolrParams edismaxParams = params("qf", "t_pick_best_foo"); QParser qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); Query q = qParser.getQuery(); assertEquals("+((t_pick_best_foo:tabbi | t_pick_best_foo:cat | t_pick_best_foo:felin | t_pick_best_foo:anim))", q.toString()); - edismaxParams = new ModifiableSolrParams(); - edismaxParams.add("qf", "t_as_distinct_foo"); + edismaxParams = params("qf", "t_as_distinct_foo"); qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); q = qParser.getQuery(); assertEquals("+((t_as_distinct_foo:tabbi t_as_distinct_foo:cat t_as_distinct_foo:felin t_as_distinct_foo:anim))", q.toString()); /*confirm autoGeneratePhraseQueries always builds OR queries*/ - edismaxParams = new ModifiableSolrParams(); - edismaxParams.add("qf", "t_as_distinct_foo"); - edismaxParams.add("sow", "false"); + edismaxParams = params("qf", "t_as_distinct_foo", + "sow", "false"); qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); q = qParser.getQuery(); assertEquals("+(((t_as_distinct_foo:\"denim pant\" t_as_distinct_foo:jean)))", q.toString()); - edismaxParams = new ModifiableSolrParams(); - edismaxParams.add("qf", "t_pick_best_foo"); - edismaxParams.add("sow", "false"); + edismaxParams = params("qf", "t_pick_best_foo", + "sow", "false"); qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); q = qParser.getQuery(); assertEquals("+(((t_pick_best_foo:\"denim pant\" t_pick_best_foo:jean)))", q.toString()); From 649a8190d9fb83f0030b311a434ea30e32e8e183 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Mon, 27 Nov 2017 20:59:38 -0500 Subject: [PATCH 16/25] Document enum; change to case statement --- .../solr/parser/SolrQueryParserBase.java | 77 +++++++++++++------ 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index 612c43ab1dc9..46ff74e297f1 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -62,6 +62,8 @@ import org.apache.solr.search.SolrConstantScoreQuery; import org.apache.solr.search.SyntaxError; +import static org.apache.solr.parser.SolrQueryParserBase.ScoreOverlaps.*; + /** This class is overridden by QueryParser in QueryParser.jj * and acts to separate the majority of the Java code from the .jj grammar file. */ @@ -79,12 +81,37 @@ public abstract class SolrQueryParserBase extends QueryBuilder { static final int MOD_NOT = 10; static final int MOD_REQ = 11; - protected ScoreOverlaps scoreOverlaps = ScoreOverlaps.AS_SAME_TERM; + protected ScoreOverlaps scoreOverlaps = AS_SAME_TERM; + /** + * Query strategy when analyzed query terms overlap the same position (ie synonyms) + * consider if pants and khakis are query time synonyms + * + *

  • {@link #AS_SAME_TERM}
  • + *
  • {@link #PICK_BEST}
  • + *
  • {@link #AS_DISTINCT_TERMS}
  • + */ public static enum ScoreOverlaps { - AS_SAME_TERM, /* default, blends doc freq for terms in same posn: SynonymQuery(A B)*/ - PICK_BEST, /* picks dismax over synonyms, choosing best scoring per doc: (A | B) */ - AS_DISTINCT_TERMS /* picks combination (A OR B).*/ + /** (default) synonym terms share doc freq + * so if "pants" has df 500, and "khakis" a df of 50, uses 500 df when scoring both terms + * appropriate for exact synonyms + * see {@link org.apache.lucene.search.SynonymQuery} + * */ + AS_SAME_TERM, + + /** highest scoring term match chosen (ie dismax) + * so if "pants" has df 500, and "khakis" a df of 50, khakis matches are scored higher + * appropriate when more specific synonyms should score higher + * */ + PICK_BEST, + + /** each synonym scored indepedently, then added together (ie boolean query) + * so if "pants" has df 500, and "khakis" a df of 50, khakis matches are scored higher but + * summed with any "pants" matches + * appropriate when more specific synonyms should score higher, but we don't want to ignore + * less specific synonyms + * */ + AS_DISTINCT_TERMS } // make it possible to call setDefaultOperator() without accessing @@ -491,7 +518,7 @@ protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, Query query = createFieldQuery(analyzer, occur, field, queryText, quoted || fieldAutoGenPhraseQueries || autoGeneratePhraseQueries, phraseSlop); setEnableGraphQueries(true); // reset back to default - setScoreOverlaps(ScoreOverlaps.AS_SAME_TERM); + setScoreOverlaps(AS_SAME_TERM); return query; } @@ -566,21 +593,23 @@ protected Query newRegexpQuery(Term regexp) { @Override protected Query newSynonymQuery(Term terms[]) { - if (scoreOverlaps == ScoreOverlaps.PICK_BEST) { - List currPosnClauses = new ArrayList(); - for (Term term : terms) { - currPosnClauses.add(newTermQuery(term)); - } - DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f); - return dm; - } else if (scoreOverlaps == ScoreOverlaps.AS_DISTINCT_TERMS) { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - for (Term term : terms) { - builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD); - } - return builder.build(); + switch (scoreOverlaps) { + case PICK_BEST: + List currPosnClauses = new ArrayList(terms.length); + for (Term term : terms) { + currPosnClauses.add(newTermQuery(term)); + } + DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f); + return dm; + case AS_DISTINCT_TERMS: + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Term term : terms) { + builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD); + } + return builder.build(); + default: + return super.newSynonymQuery(terms); } - return super.newSynonymQuery(terms); } /** @@ -705,7 +734,7 @@ protected Query getBooleanQuery(List clauses) throws SyntaxError boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - ScoreOverlaps synMatchType = ScoreOverlaps.AS_SAME_TERM; + ScoreOverlaps synMatchType = AS_SAME_TERM; if (ft instanceof TextField) { synMatchType = ((TextField)(ft)).getScoreOverlapsMethod(); } @@ -1028,7 +1057,7 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo if (ft.isTokenized() && sf.indexed()) { boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - ScoreOverlaps synMatchType = ScoreOverlaps.AS_SAME_TERM; + ScoreOverlaps synMatchType = AS_SAME_TERM; if (ft instanceof TextField) { synMatchType = ((TextField)(ft)).getScoreOverlapsMethod(); } @@ -1043,7 +1072,7 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo } // default to a normal field query - return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true, ScoreOverlaps.AS_SAME_TERM); + return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true, AS_SAME_TERM); } // Assumption: quoted is always false @@ -1077,7 +1106,7 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - ScoreOverlaps synMatchType = ScoreOverlaps.AS_SAME_TERM; + ScoreOverlaps synMatchType = AS_SAME_TERM; if (ft instanceof TextField) { synMatchType = ((TextField)(ft)).getScoreOverlapsMethod(); } @@ -1114,7 +1143,7 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw // default to a normal field query String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); - return newFieldQuery(getAnalyzer(), field, queryText, false, false, true, ScoreOverlaps.AS_SAME_TERM); + return newFieldQuery(getAnalyzer(), field, queryText, false, false, true, AS_SAME_TERM); } protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){ From 2034e173d5269092a01ee5e372a5fa90aa45753c Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Tue, 28 Nov 2017 19:00:25 -0500 Subject: [PATCH 17/25] Move test to SolrQueryParser --- .../solr/search/TestExtendedDismaxParser.java | 27 ----------------- .../solr/search/TestSolrQueryParser.java | 30 ++++++++++++++++++- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index 22103af830c6..65b806084853 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -1798,33 +1798,6 @@ public void testOperatorsAndMultiWordSynonyms() throws Exception { ); } - public void testOverlapTermScoringQueries() throws Exception { - ModifiableSolrParams edismaxParams = params("qf", "t_pick_best_foo"); - - QParser qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); - Query q = qParser.getQuery(); - assertEquals("+((t_pick_best_foo:tabbi | t_pick_best_foo:cat | t_pick_best_foo:felin | t_pick_best_foo:anim))", q.toString()); - - edismaxParams = params("qf", "t_as_distinct_foo"); - qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); - q = qParser.getQuery(); - assertEquals("+((t_as_distinct_foo:tabbi t_as_distinct_foo:cat t_as_distinct_foo:felin t_as_distinct_foo:anim))", q.toString()); - - /*confirm autoGeneratePhraseQueries always builds OR queries*/ - edismaxParams = params("qf", "t_as_distinct_foo", - "sow", "false"); - qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); - q = qParser.getQuery(); - assertEquals("+(((t_as_distinct_foo:\"denim pant\" t_as_distinct_foo:jean)))", q.toString()); - - edismaxParams = params("qf", "t_pick_best_foo", - "sow", "false"); - qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); - q = qParser.getQuery(); - assertEquals("+(((t_pick_best_foo:\"denim pant\" t_pick_best_foo:jean)))", q.toString()); - - } - public void testAutoGeneratePhraseQueries() throws Exception { ModifiableSolrParams noSowParams = new ModifiableSolrParams(); noSowParams.add("df", "text"); diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java index 1db6b5a7c229..5571b5dab2ec 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java @@ -1057,7 +1057,35 @@ public void testShingleQueries() throws Exception { , "/response/numFound==1" ); } - + + + public void testOverlapTermScoringQueries() throws Exception { + ModifiableSolrParams edismaxParams = params("qf", "t_pick_best_foo"); + + QParser qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); + Query q = qParser.getQuery(); + assertEquals("+((t_pick_best_foo:tabbi | t_pick_best_foo:cat | t_pick_best_foo:felin | t_pick_best_foo:anim))", q.toString()); + + edismaxParams = params("qf", "t_as_distinct_foo"); + qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); + q = qParser.getQuery(); + assertEquals("+((t_as_distinct_foo:tabbi t_as_distinct_foo:cat t_as_distinct_foo:felin t_as_distinct_foo:anim))", q.toString()); + + /*confirm autoGeneratePhraseQueries always builds OR queries*/ + edismaxParams = params("qf", "t_as_distinct_foo", + "sow", "false"); + qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); + q = qParser.getQuery(); + assertEquals("+(((t_as_distinct_foo:\"denim pant\" t_as_distinct_foo:jean)))", q.toString()); + + edismaxParams = params("qf", "t_pick_best_foo", + "sow", "false"); + qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); + q = qParser.getQuery(); + assertEquals("+(((t_pick_best_foo:\"denim pant\" t_pick_best_foo:jean)))", q.toString()); + + } + @Test public void testBadRequestInSetQuery() throws SyntaxError { SolrQueryRequest req = req(); From 3c10b3267e9b3d504e986526e6083fa6e6f11e13 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Tue, 28 Nov 2017 19:02:22 -0500 Subject: [PATCH 18/25] Add synonym comment --- .../src/java/org/apache/solr/parser/SolrQueryParserBase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index 46ff74e297f1..da56980c8344 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -367,7 +367,7 @@ public void setAllowSubQueryParsing(boolean allowSubQueryParsing) { } /** - * Set how overlapping query terms should be scored, as if they're the same term, + * Set how overlapping query terms (ie synonyms) should be scored, as if they're the same term, * picking highest scoring term, or OR'ing them together * @param scoreOverlaps the overlap scorring method */ From f4c4849712d53c216936bce1f292109517165969 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Tue, 28 Nov 2017 19:39:35 -0500 Subject: [PATCH 19/25] Rename scoreOverlaps to synonymQueryStyle --- .../org/apache/solr/parser/QueryParser.java | 4 +- .../solr/parser/SolrQueryParserBase.java | 38 +++++++++---------- .../org/apache/solr/schema/FieldType.java | 4 +- .../org/apache/solr/schema/TextField.java | 12 +++--- .../solr/search/ExtendedDismaxQParser.java | 4 +- .../solr/collection1/conf/schema12.xml | 4 +- .../solr/search/TestExtendedDismaxParser.java | 5 +-- .../solr/search/TestSolrQueryParser.java | 2 +- 8 files changed, 36 insertions(+), 37 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/parser/QueryParser.java b/solr/core/src/java/org/apache/solr/parser/QueryParser.java index 6a14b2ea9e6d..518cdefaa791 100644 --- a/solr/core/src/java/org/apache/solr/parser/QueryParser.java +++ b/solr/core/src/java/org/apache/solr/parser/QueryParser.java @@ -53,13 +53,13 @@ private static boolean allowedPostMultiTerm(int tokenKind) { @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries, - ScoreOverlaps synMatchType) + SynonymQueryStyle synonymQueryStyle) throws SyntaxError { setAutoGenerateMultiTermSynonymsPhraseQuery(fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()); // Don't auto-quote graph-aware field queries boolean treatAsQuoted = getSplitOnWhitespace() ? (quoted || fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()) : quoted; - return super.newFieldQuery(analyzer, field, queryText, treatAsQuoted, false, fieldEnableGraphQueries, synMatchType); + return super.newFieldQuery(analyzer, field, queryText, treatAsQuoted, false, fieldEnableGraphQueries, synonymQueryStyle); } // * Query ::= ( Clause )* diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index da56980c8344..fdf790716cdf 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -62,7 +62,7 @@ import org.apache.solr.search.SolrConstantScoreQuery; import org.apache.solr.search.SyntaxError; -import static org.apache.solr.parser.SolrQueryParserBase.ScoreOverlaps.*; +import static org.apache.solr.parser.SolrQueryParserBase.SynonymQueryStyle.*; /** This class is overridden by QueryParser in QueryParser.jj * and acts to separate the majority of the Java code from the .jj grammar file. @@ -81,7 +81,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { static final int MOD_NOT = 10; static final int MOD_REQ = 11; - protected ScoreOverlaps scoreOverlaps = AS_SAME_TERM; + protected SynonymQueryStyle synonymQueryStyle = AS_SAME_TERM; /** * Query strategy when analyzed query terms overlap the same position (ie synonyms) @@ -91,7 +91,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { *
  • {@link #PICK_BEST}
  • *
  • {@link #AS_DISTINCT_TERMS}
  • */ - public static enum ScoreOverlaps { + public static enum SynonymQueryStyle { /** (default) synonym terms share doc freq * so if "pants" has df 500, and "khakis" a df of 50, uses 500 df when scoring both terms * appropriate for exact synonyms @@ -369,14 +369,14 @@ public void setAllowSubQueryParsing(boolean allowSubQueryParsing) { /** * Set how overlapping query terms (ie synonyms) should be scored, as if they're the same term, * picking highest scoring term, or OR'ing them together - * @param scoreOverlaps the overlap scorring method + * @param synonymQueryStyle how to score terms that overlap see {{@link SynonymQueryStyle}} */ - public void setScoreOverlaps(ScoreOverlaps scoreOverlaps) {this.scoreOverlaps = scoreOverlaps;} + public void setSynonymQueryStyle(SynonymQueryStyle synonymQueryStyle) {this.synonymQueryStyle = synonymQueryStyle;} /** * Gets how overlapping query terms should be scored */ - public ScoreOverlaps getScoreOverlaps() {return this.scoreOverlaps;} + public SynonymQueryStyle getSynonymQueryStyle() {return this.synonymQueryStyle;} /** @@ -510,15 +510,15 @@ protected void addMultiTermClause(List clauses, Query q) { protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries, - ScoreOverlaps scoreOverlaps) + SynonymQueryStyle synonymQueryStyle) throws SyntaxError { BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; setEnableGraphQueries(fieldEnableGraphQueries); - setScoreOverlaps(scoreOverlaps); + setSynonymQueryStyle(synonymQueryStyle); Query query = createFieldQuery(analyzer, occur, field, queryText, quoted || fieldAutoGenPhraseQueries || autoGeneratePhraseQueries, phraseSlop); setEnableGraphQueries(true); // reset back to default - setScoreOverlaps(AS_SAME_TERM); + setSynonymQueryStyle(AS_SAME_TERM); return query; } @@ -593,7 +593,7 @@ protected Query newRegexpQuery(Term regexp) { @Override protected Query newSynonymQuery(Term terms[]) { - switch (scoreOverlaps) { + switch (synonymQueryStyle) { case PICK_BEST: List currPosnClauses = new ArrayList(terms.length); for (Term term : terms) { @@ -734,13 +734,13 @@ protected Query getBooleanQuery(List clauses) throws SyntaxError boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - ScoreOverlaps synMatchType = AS_SAME_TERM; + SynonymQueryStyle synonymQueryStyle = AS_SAME_TERM; if (ft instanceof TextField) { - synMatchType = ((TextField)(ft)).getScoreOverlapsMethod(); + synonymQueryStyle = ((TextField)(ft)).getSynonymQueryStyle(); } subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(), - false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); + false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synonymQueryStyle); booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); } else { for (String externalVal : rawq.getExternalVals()) { @@ -1057,11 +1057,11 @@ protected Query getFieldQuery(String field, String queryText, boolean quoted, bo if (ft.isTokenized() && sf.indexed()) { boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - ScoreOverlaps synMatchType = AS_SAME_TERM; + SynonymQueryStyle synonymQueryStyle = AS_SAME_TERM; if (ft instanceof TextField) { - synMatchType = ((TextField)(ft)).getScoreOverlapsMethod(); + synonymQueryStyle = ((TextField)(ft)).getSynonymQueryStyle(); } - return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); + return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synonymQueryStyle); } else { if (raw) { return new RawQuery(sf, queryText); @@ -1106,12 +1106,12 @@ protected Query getFieldQuery(String field, List queryTerms, boolean raw String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries(); - ScoreOverlaps synMatchType = AS_SAME_TERM; + SynonymQueryStyle synonymQueryStyle = AS_SAME_TERM; if (ft instanceof TextField) { - synMatchType = ((TextField)(ft)).getScoreOverlapsMethod(); + synonymQueryStyle = ((TextField)(ft)).getSynonymQueryStyle(); } return newFieldQuery - (getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synMatchType); + (getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synonymQueryStyle); } else { if (raw) { return new RawQuery(sf, queryTerms); diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index d3b6ebfe2758..31ef6ec7ac78 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -905,7 +905,7 @@ protected void checkSupportsDocValues() { protected static final String ENABLE_GRAPH_QUERIES = "enableGraphQueries"; private static final String ARGS = "args"; private static final String POSITION_INCREMENT_GAP = "positionIncrementGap"; - protected static final String SCORE_OVERLAPS = "scoreOverlaps"; + protected static final String SYNONYM_QUERY_STYLE = "synonymQueryStyle"; /** * Get a map of property name -> value for this field type. @@ -927,7 +927,7 @@ public SimpleOrderedMap getNamedPropertyValues(boolean showDefaults) { if (this instanceof TextField) { namedPropertyValues.add(AUTO_GENERATE_PHRASE_QUERIES, ((TextField) this).getAutoGeneratePhraseQueries()); namedPropertyValues.add(ENABLE_GRAPH_QUERIES, ((TextField) this).getEnableGraphQueries()); - namedPropertyValues.add(SCORE_OVERLAPS, ((TextField) this).getScoreOverlapsMethod()); + namedPropertyValues.add(SYNONYM_QUERY_STYLE, ((TextField) this).getSynonymQueryStyle()); } namedPropertyValues.add(getPropertyName(INDEXED), hasProperty(INDEXED)); namedPropertyValues.add(getPropertyName(STORED), hasProperty(STORED)); diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index e1fa4bc3463b..2fcddc2ecad6 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -42,7 +42,7 @@ public class TextField extends FieldType { protected boolean autoGeneratePhraseQueries; protected boolean enableGraphQueries; - protected SolrQueryParserBase.ScoreOverlaps scoreOverlapsMethod; + protected SolrQueryParserBase.SynonymQueryStyle synonymQueryStyle; /** * Analyzer set by schema for text types to use when searching fields @@ -75,10 +75,10 @@ protected void init(IndexSchema schema, Map args) { if (autoGeneratePhraseQueriesStr != null) autoGeneratePhraseQueries = Boolean.parseBoolean(autoGeneratePhraseQueriesStr); - scoreOverlapsMethod = SolrQueryParserBase.ScoreOverlaps.AS_SAME_TERM; - String scoreOverlapsMethod = args.remove(SCORE_OVERLAPS); - if (scoreOverlapsMethod != null) { - this.scoreOverlapsMethod = SolrQueryParserBase.ScoreOverlaps.valueOf(scoreOverlapsMethod.toUpperCase()); + synonymQueryStyle = SolrQueryParserBase.SynonymQueryStyle.AS_SAME_TERM; + String synonymQueryStyle = args.remove(SYNONYM_QUERY_STYLE); + if (synonymQueryStyle != null) { + this.synonymQueryStyle = SolrQueryParserBase.SynonymQueryStyle.valueOf(synonymQueryStyle.toUpperCase()); } enableGraphQueries = true; @@ -112,7 +112,7 @@ public boolean getEnableGraphQueries() { return enableGraphQueries; } - public SolrQueryParserBase.ScoreOverlaps getScoreOverlapsMethod() {return scoreOverlapsMethod;} + public SolrQueryParserBase.SynonymQueryStyle getSynonymQueryStyle() {return synonymQueryStyle;} @Override public SortField getSortField(SchemaField field, boolean reverse) { diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index 999a069975f2..c48d127db0e8 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -1004,7 +1004,7 @@ protected Query getPrefixQuery(String field, String val) throws SyntaxError { @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted, boolean fieldAutoGenPhraseQueries, boolean enableGraphQueries, - ScoreOverlaps synMatchType) + SynonymQueryStyle synonymQueryStyle) throws SyntaxError { Analyzer actualAnalyzer; if (removeStopFilter) { @@ -1018,7 +1018,7 @@ protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, } else { actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer(); } - return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, enableGraphQueries, synMatchType); + return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, enableGraphQueries, synonymQueryStyle); } @Override diff --git a/solr/core/src/test-files/solr/collection1/conf/schema12.xml b/solr/core/src/test-files/solr/collection1/conf/schema12.xml index f62d307858bb..97b6ed0df81d 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema12.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema12.xml @@ -168,7 +168,7 @@ - + - + Date: Fri, 1 Dec 2017 20:47:29 -0500 Subject: [PATCH 20/25] Target syntax of lucene qparser --- .../solr/search/TestSolrQueryParser.java | 26 ++++++------------- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java index 8a9ac89c860e..4bf4cc515eda 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java @@ -1060,29 +1060,19 @@ public void testShingleQueries() throws Exception { public void testSynonymQueryStyle() throws Exception { - ModifiableSolrParams edismaxParams = params("qf", "t_pick_best_foo"); - QParser qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); - Query q = qParser.getQuery(); - assertEquals("+((t_pick_best_foo:tabbi | t_pick_best_foo:cat | t_pick_best_foo:felin | t_pick_best_foo:anim))", q.toString()); + Query q = QParser.getParser("tabby", req(params("df", "t_pick_best_foo"))).getQuery(); + assertEquals("(t_pick_best_foo:tabbi | t_pick_best_foo:cat | t_pick_best_foo:felin | t_pick_best_foo:anim)", q.toString()); - edismaxParams = params("qf", "t_as_distinct_foo"); - qParser = QParser.getParser("tabby", "edismax", req(edismaxParams)); - q = qParser.getQuery(); - assertEquals("+((t_as_distinct_foo:tabbi t_as_distinct_foo:cat t_as_distinct_foo:felin t_as_distinct_foo:anim))", q.toString()); + q = QParser.getParser("tabby", req(params("df", "t_as_distinct_foo"))).getQuery(); + assertEquals("t_as_distinct_foo:tabbi t_as_distinct_foo:cat t_as_distinct_foo:felin t_as_distinct_foo:anim", q.toString()); /*confirm autoGeneratePhraseQueries always builds OR queries*/ - edismaxParams = params("qf", "t_as_distinct_foo", - "sow", "false"); - qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); - q = qParser.getQuery(); - assertEquals("+(((t_as_distinct_foo:\"denim pant\" t_as_distinct_foo:jean)))", q.toString()); + q = QParser.getParser("jeans", req(params("df", "t_as_distinct_foo", "sow", "false"))).getQuery(); + assertEquals("(t_as_distinct_foo:\"denim pant\" t_as_distinct_foo:jean)", q.toString()); - edismaxParams = params("qf", "t_pick_best_foo", - "sow", "false"); - qParser = QParser.getParser("jeans", "edismax", req(edismaxParams)); - q = qParser.getQuery(); - assertEquals("+(((t_pick_best_foo:\"denim pant\" t_pick_best_foo:jean)))", q.toString()); + q = QParser.getParser("jeans", req(params("df", "t_pick_best_foo", "sow", "false"))).getQuery(); + assertEquals("(t_pick_best_foo:\"denim pant\" t_pick_best_foo:jean)", q.toString()); } From 0ee839032eeaca975e26ec4f37db644e7c08373b Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Fri, 1 Dec 2017 20:48:05 -0500 Subject: [PATCH 21/25] Expect one of three enum values, default throw assertion --- .../org/apache/solr/parser/SolrQueryParserBase.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index fdf790716cdf..0068c9aaa0f0 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -87,9 +87,9 @@ public abstract class SolrQueryParserBase extends QueryBuilder { * Query strategy when analyzed query terms overlap the same position (ie synonyms) * consider if pants and khakis are query time synonyms * - *
  • {@link #AS_SAME_TERM}
  • - *
  • {@link #PICK_BEST}
  • - *
  • {@link #AS_DISTINCT_TERMS}
  • + * {@link #AS_SAME_TERM} + * {@link #PICK_BEST} + * {@link #AS_DISTINCT_TERMS} */ public static enum SynonymQueryStyle { /** (default) synonym terms share doc freq @@ -607,8 +607,10 @@ protected Query newSynonymQuery(Term terms[]) { builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD); } return builder.build(); - default: + case AS_SAME_TERM: return super.newSynonymQuery(terms); + default: + throw new AssertionError("unrecognized synonymQueryStyle passed when creating newSynonymQuery"); } } From 74292cfa6900f1e84e0b6d57ed9cec4bdaedcdde Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Sat, 2 Dec 2017 14:12:17 -0500 Subject: [PATCH 22/25] Update field-type-definitions-and-properties.adoc --- .../src/field-type-definitions-and-properties.adoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc index 54fda3cf8edb..5a774dfaeb63 100644 --- a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc +++ b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc @@ -87,6 +87,13 @@ For multivalued fields, specifies a distance between multiple values, which prev `autoGeneratePhraseQueries`:: For text fields. If `true`, Solr automatically generates phrase queries for adjacent terms. If `false`, terms must be enclosed in double-quotes to be treated as phrases. +`synonymQueryStyle`:: +Query used to combine scores of overlapping query terms (ie synonyms). Consider a search for "blue tee" with query-time synonyms `tshirt,tee`. ++ +Use `as_same_term` (default) to blend terms, ie `SynonymQuery(tshirt,tee)` where each term will be treated as equally important. Use `pick_best` to select the most significant synonym when scoring `Dismax(tee,tshirt)`. Use `as_distinct_terms` to bias scoring towards the most significant synonym `(pants OR slacks)`. ++ +`as_same_term` is appropriatte when terms are true synonyms (television, tv). `pick_best` and `as_distinct_terms` are appropriatte when synonyms are expanding to hyponyms (q=jeans w/ jeans=>jeans,pants) and you want exact to come before parent and sibling concepts. See this http://opensourceconnections.com/blog/2017/11/21/solr-synonyms-mea-culpa/[blog article]. + `enableGraphQueries`:: For text fields, applicable when querying with <> (which is the default for the `sow` parameter). Use `true`, the default, for field types with query analyzers including graph-aware filters, e.g., <> and <>. + From 013e12dd549ad104c5f8f3c6720d2d588c2ccddc Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Sun, 3 Dec 2017 18:25:37 -0500 Subject: [PATCH 23/25] Fix spelling error --- .../src/field-type-definitions-and-properties.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc index 5a774dfaeb63..854d248f8180 100644 --- a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc +++ b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc @@ -92,7 +92,7 @@ Query used to combine scores of overlapping query terms (ie synonyms). Consider + Use `as_same_term` (default) to blend terms, ie `SynonymQuery(tshirt,tee)` where each term will be treated as equally important. Use `pick_best` to select the most significant synonym when scoring `Dismax(tee,tshirt)`. Use `as_distinct_terms` to bias scoring towards the most significant synonym `(pants OR slacks)`. + -`as_same_term` is appropriatte when terms are true synonyms (television, tv). `pick_best` and `as_distinct_terms` are appropriatte when synonyms are expanding to hyponyms (q=jeans w/ jeans=>jeans,pants) and you want exact to come before parent and sibling concepts. See this http://opensourceconnections.com/blog/2017/11/21/solr-synonyms-mea-culpa/[blog article]. +`as_same_term` is appropriate when terms are true synonyms (television, tv). Use `pick_best` or `as_distinct_terms` when synonyms are expanding to hyponyms (q=jeans w/ jeans=>jeans,pants) and you want exact to come before parent and sibling concepts. See this http://opensourceconnections.com/blog/2017/11/21/solr-synonyms-mea-culpa/[blog article]. `enableGraphQueries`:: For text fields, applicable when querying with <> (which is the default for the `sow` parameter). Use `true`, the default, for field types with query analyzers including graph-aware filters, e.g., <> and <>. From cf1fc4733606c3330355b037345507fddc513f29 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Sun, 3 Dec 2017 19:08:23 -0500 Subject: [PATCH 24/25] ie -> i.e. --- .../src/field-type-definitions-and-properties.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc index 854d248f8180..b5be77fe0d68 100644 --- a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc +++ b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc @@ -88,9 +88,9 @@ For multivalued fields, specifies a distance between multiple values, which prev `autoGeneratePhraseQueries`:: For text fields. If `true`, Solr automatically generates phrase queries for adjacent terms. If `false`, terms must be enclosed in double-quotes to be treated as phrases. `synonymQueryStyle`:: -Query used to combine scores of overlapping query terms (ie synonyms). Consider a search for "blue tee" with query-time synonyms `tshirt,tee`. +Query used to combine scores of overlapping query terms (i.e. synonyms). Consider a search for "blue tee" with query-time synonyms `tshirt,tee`. + -Use `as_same_term` (default) to blend terms, ie `SynonymQuery(tshirt,tee)` where each term will be treated as equally important. Use `pick_best` to select the most significant synonym when scoring `Dismax(tee,tshirt)`. Use `as_distinct_terms` to bias scoring towards the most significant synonym `(pants OR slacks)`. +Use `as_same_term` (default) to blend terms, i.e. `SynonymQuery(tshirt,tee)` where each term will be treated as equally important. Use `pick_best` to select the most significant synonym when scoring `Dismax(tee,tshirt)`. Use `as_distinct_terms` to bias scoring towards the most significant synonym `(pants OR slacks)`. + `as_same_term` is appropriate when terms are true synonyms (television, tv). Use `pick_best` or `as_distinct_terms` when synonyms are expanding to hyponyms (q=jeans w/ jeans=>jeans,pants) and you want exact to come before parent and sibling concepts. See this http://opensourceconnections.com/blog/2017/11/21/solr-synonyms-mea-culpa/[blog article]. From e18c2bc7a010a2790b4736313b81b7dc8e3cdeb7 Mon Sep 17 00:00:00 2001 From: Doug Turnbull Date: Mon, 4 Dec 2017 15:22:27 -0500 Subject: [PATCH 25/25] Escape => for synonym example --- .../src/field-type-definitions-and-properties.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc index b5be77fe0d68..d7ae09f2ed7a 100644 --- a/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc +++ b/solr/solr-ref-guide/src/field-type-definitions-and-properties.adoc @@ -92,7 +92,7 @@ Query used to combine scores of overlapping query terms (i.e. synonyms). Conside + Use `as_same_term` (default) to blend terms, i.e. `SynonymQuery(tshirt,tee)` where each term will be treated as equally important. Use `pick_best` to select the most significant synonym when scoring `Dismax(tee,tshirt)`. Use `as_distinct_terms` to bias scoring towards the most significant synonym `(pants OR slacks)`. + -`as_same_term` is appropriate when terms are true synonyms (television, tv). Use `pick_best` or `as_distinct_terms` when synonyms are expanding to hyponyms (q=jeans w/ jeans=>jeans,pants) and you want exact to come before parent and sibling concepts. See this http://opensourceconnections.com/blog/2017/11/21/solr-synonyms-mea-culpa/[blog article]. +`as_same_term` is appropriate when terms are true synonyms (television, tv). Use `pick_best` or `as_distinct_terms` when synonyms are expanding to hyponyms (q=jeans w/ `jeans\=>jeans,pants`) and you want exact to come before parent and sibling concepts. See this http://opensourceconnections.com/blog/2017/11/21/solr-synonyms-mea-culpa/[blog article]. `enableGraphQueries`:: For text fields, applicable when querying with <> (which is the default for the `sow` parameter). Use `true`, the default, for field types with query analyzers including graph-aware filters, e.g., <> and <>.