Search in sources :

Example 76 with TermQuery

use of org.apache.lucene.search.TermQuery in project neo4j by neo4j.

the class SimpleUniquenessVerifier method verify.

@Override
public void verify(PropertyAccessor accessor, int[] propKeyIds) throws IndexEntryConflictException, IOException {
    try {
        DuplicateCheckingCollector collector = DuplicateCheckingCollector.forProperties(accessor, propKeyIds);
        IndexSearcher searcher = indexSearcher();
        for (LeafReaderContext leafReaderContext : searcher.getIndexReader().leaves()) {
            Fields fields = leafReaderContext.reader().fields();
            for (String field : fields) {
                if (LuceneDocumentStructure.NODE_ID_KEY.equals(field)) {
                    continue;
                }
                TermsEnum terms = LuceneDocumentStructure.originalTerms(fields.terms(field), field);
                BytesRef termsRef;
                while ((termsRef = terms.next()) != null) {
                    if (terms.docFreq() > 1) {
                        collector.reset();
                        searcher.search(new TermQuery(new Term(field, termsRef)), collector);
                    }
                }
            }
        }
    } catch (IOException e) {
        Throwable cause = e.getCause();
        if (cause instanceof IndexEntryConflictException) {
            throw (IndexEntryConflictException) cause;
        }
        throw e;
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Fields(org.apache.lucene.index.Fields) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) IndexEntryConflictException(org.neo4j.kernel.api.exceptions.index.IndexEntryConflictException) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 77 with TermQuery

use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.

the class NestedAggregatorTests method testResetRootDocId.

public void testResetRootDocId() throws Exception {
    Directory directory = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc);
    List<Document> documents = new ArrayList<>();
    // 1 segment with, 1 root document, with 3 nested sub docs
    Document document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    indexWriter.addDocuments(documents);
    indexWriter.commit();
    documents.clear();
    // 1 segment with:
    // 1 document, with 1 nested subdoc
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    indexWriter.addDocuments(documents);
    documents.clear();
    // and 1 document, with 1 nested subdoc
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    indexWriter.addDocuments(documents);
    indexWriter.commit();
    indexWriter.close();
    IndexService indexService = createIndex("test");
    DirectoryReader directoryReader = DirectoryReader.open(directory);
    directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    indexService.mapperService().merge("test", new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested").string()), MapperService.MergeReason.MAPPING_UPDATE, false);
    SearchContext context = createSearchContext(indexService);
    AggregatorFactories.Builder builder = AggregatorFactories.builder();
    NestedAggregationBuilder factory = new NestedAggregationBuilder("test", "nested_field");
    builder.addAggregator(factory);
    AggregatorFactories factories = builder.build(context, null);
    context.aggregations(new SearchContextAggregations(factories));
    Aggregator[] aggs = factories.createTopLevelAggregators();
    BucketCollector collector = BucketCollector.wrap(Arrays.asList(aggs));
    collector.preCollection();
    // A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here (otherwise MatchAllDocsQuery would be sufficient)
    // We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc when we process a new segment, because
    // root doc type#3 and root doc type#1 have the same segment docid
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(Queries.newNonNestedFilter(), Occur.MUST);
    bq.add(new TermQuery(new Term(UidFieldMapper.NAME, "type#2")), Occur.MUST_NOT);
    searcher.search(new ConstantScoreQuery(bq.build()), collector);
    collector.postCollection();
    Nested nested = (Nested) aggs[0].buildAggregation(0);
    // The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous child docs from the first segment are emitted as hits.
    assertThat(nested.getDocCount(), equalTo(4L));
    directoryReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexService(org.elasticsearch.index.IndexService) ArrayList(java.util.ArrayList) SearchContext(org.elasticsearch.search.internal.SearchContext) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) Field(org.apache.lucene.document.Field) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) AggregatorFactories(org.elasticsearch.search.aggregations.AggregatorFactories) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) SearchContextAggregations(org.elasticsearch.search.aggregations.SearchContextAggregations) Aggregator(org.elasticsearch.search.aggregations.Aggregator) Term(org.apache.lucene.index.Term) BucketCollector(org.elasticsearch.search.aggregations.BucketCollector) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 78 with TermQuery

use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.

the class SumAggregatorTests method testQueryFiltering.

public void testQueryFiltering() throws IOException {
    testCase(new TermQuery(new Term("match", "yes")), iw -> {
        iw.addDocument(Arrays.asList(new StringField("match", "yes", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 1)));
        iw.addDocument(Arrays.asList(new StringField("match", "no", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 2)));
        iw.addDocument(Arrays.asList(new StringField("match", "yes", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 3)));
        iw.addDocument(Arrays.asList(new StringField("match", "no", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 4)));
        iw.addDocument(Arrays.asList(new StringField("match", "yes", Field.Store.NO), new NumericDocValuesField(FIELD_NAME, 5)));
    }, count -> assertEquals(9L, count.getValue(), 0d));
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StringField(org.apache.lucene.document.StringField) Term(org.apache.lucene.index.Term)

Example 79 with TermQuery

use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.

the class PlainHighlighterTests method checkGeoQueryHighlighting.

public void checkGeoQueryHighlighting(Query geoQuery) throws IOException, InvalidTokenOffsetsException {
    Map analysers = new HashMap<String, Analyzer>();
    analysers.put("text", new StandardAnalyzer());
    FieldNameAnalyzer fieldNameAnalyzer = new FieldNameAnalyzer(analysers);
    Query termQuery = new TermQuery(new Term("text", "failure"));
    Query boolQuery = new BooleanQuery.Builder().add(new BooleanClause(geoQuery, BooleanClause.Occur.SHOULD)).add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD)).build();
    org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(boolQuery));
    String fragment = highlighter.getBestFragment(fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"), "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
    Query rewritten = boolQuery.rewrite(null);
    highlighter = new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(rewritten));
    fragment = highlighter.getBestFragment(fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"), "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) GeoPointDistanceQuery(org.apache.lucene.spatial.geopoint.search.GeoPointDistanceQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) GeoPointInBBoxQuery(org.apache.lucene.spatial.geopoint.search.GeoPointInBBoxQuery) GeoPointInPolygonQuery(org.apache.lucene.spatial.geopoint.search.GeoPointInPolygonQuery) HashMap(java.util.HashMap) FieldNameAnalyzer(org.elasticsearch.index.analysis.FieldNameAnalyzer) CustomQueryScorer(org.elasticsearch.search.fetch.subphase.highlight.CustomQueryScorer) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) HashMap(java.util.HashMap) Map(java.util.Map)

Example 80 with TermQuery

use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.

the class QueryPhaseTests method countTestCase.

private void countTestCase(boolean withDeletions) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    final int numDocs = scaledRandomIntBetween(100, 200);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (randomBoolean()) {
            doc.add(new StringField("foo", "bar", Store.NO));
        }
        if (randomBoolean()) {
            doc.add(new StringField("foo", "baz", Store.NO));
        }
        if (withDeletions && (rarely() || i == 0)) {
            doc.add(new StringField("delete", "yes", Store.NO));
        }
        w.addDocument(doc);
    }
    if (withDeletions) {
        w.deleteDocuments(new Term("delete", "yes"));
    }
    final IndexReader reader = w.getReader();
    Query matchAll = new MatchAllDocsQuery();
    Query matchAllCsq = new ConstantScoreQuery(matchAll);
    Query tq = new TermQuery(new Term("foo", "bar"));
    Query tCsq = new ConstantScoreQuery(tq);
    BooleanQuery bq = new BooleanQuery.Builder().add(matchAll, Occur.SHOULD).add(tq, Occur.MUST).build();
    countTestCase(matchAll, reader, false);
    countTestCase(matchAllCsq, reader, false);
    countTestCase(tq, reader, withDeletions);
    countTestCase(tCsq, reader, withDeletions);
    countTestCase(bq, reader, true);
    reader.close();
    w.close();
    dir.close();
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) ParsedQuery(org.elasticsearch.index.query.ParsedQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

TermQuery (org.apache.lucene.search.TermQuery)673 Term (org.apache.lucene.index.Term)560 BooleanQuery (org.apache.lucene.search.BooleanQuery)343 Query (org.apache.lucene.search.Query)275 IndexSearcher (org.apache.lucene.search.IndexSearcher)252 Document (org.apache.lucene.document.Document)210 TopDocs (org.apache.lucene.search.TopDocs)164 Directory (org.apache.lucene.store.Directory)164 IndexReader (org.apache.lucene.index.IndexReader)125 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)125 PhraseQuery (org.apache.lucene.search.PhraseQuery)122 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)116 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)114 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)97 BoostQuery (org.apache.lucene.search.BoostQuery)85 Field (org.apache.lucene.document.Field)81 Test (org.junit.Test)75 PrefixQuery (org.apache.lucene.search.PrefixQuery)74 ArrayList (java.util.ArrayList)72 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)62