Search in sources :

Example 26 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class QueryPhaseTests method countTestCase.

private void countTestCase(boolean withDeletions) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    final int numDocs = scaledRandomIntBetween(100, 200);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (randomBoolean()) {
            doc.add(new StringField("foo", "bar", Store.NO));
        }
        if (randomBoolean()) {
            doc.add(new StringField("foo", "baz", Store.NO));
        }
        if (withDeletions && (rarely() || i == 0)) {
            doc.add(new StringField("delete", "yes", Store.NO));
        }
        w.addDocument(doc);
    }
    if (withDeletions) {
        w.deleteDocuments(new Term("delete", "yes"));
    }
    final IndexReader reader = w.getReader();
    Query matchAll = new MatchAllDocsQuery();
    Query matchAllCsq = new ConstantScoreQuery(matchAll);
    Query tq = new TermQuery(new Term("foo", "bar"));
    Query tCsq = new ConstantScoreQuery(tq);
    BooleanQuery bq = new BooleanQuery.Builder().add(matchAll, Occur.SHOULD).add(tq, Occur.MUST).build();
    countTestCase(matchAll, reader, false);
    countTestCase(matchAllCsq, reader, false);
    countTestCase(tq, reader, withDeletions);
    countTestCase(tCsq, reader, withDeletions);
    countTestCase(bq, reader, true);
    reader.close();
    w.close();
    dir.close();
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) ParsedQuery(org.elasticsearch.index.query.ParsedQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 27 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class PercolateQueryTests method testPercolateQuery.

public void testPercolateQuery() throws Exception {
    List<Iterable<? extends IndexableField>> docs = new ArrayList<>();
    List<Query> queries = new ArrayList<>();
    PercolateQuery.QueryStore queryStore = ctx -> queries::get;
    queries.add(new TermQuery(new Term("field", "fox")));
    docs.add(Collections.singleton(new StringField("select", "a", Field.Store.NO)));
    SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true);
    snp.addClause(new SpanTermQuery(new Term("field", "jumps")));
    snp.addClause(new SpanTermQuery(new Term("field", "lazy")));
    snp.addClause(new SpanTermQuery(new Term("field", "dog")));
    snp.setSlop(2);
    queries.add(snp.build());
    docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
    PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
    pq1.add(new Term("field", "quick"));
    pq1.add(new Term("field", "brown"));
    pq1.add(new Term("field", "jumps"));
    pq1.setSlop(1);
    queries.add(pq1.build());
    docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
    BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
    bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST);
    bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST);
    bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
    queries.add(bq1.build());
    docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
    indexWriter.addDocuments(docs);
    indexWriter.close();
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);
    MemoryIndex memoryIndex = new MemoryIndex();
    memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    IndexSearcher percolateSearcher = memoryIndex.createSearcher();
    // no scoring, wrapping it in a constant score query:
    Query query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("a"), new TermQuery(new Term("select", "a")), percolateSearcher, new MatchNoDocsQuery("")));
    TopDocs topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(1));
    assertThat(topDocs.scoreDocs.length, equalTo(1));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
    Explanation explanation = shardSearcher.explain(query, 0);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
    query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("b"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery("")));
    topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(3));
    assertThat(topDocs.scoreDocs.length, equalTo(3));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(1));
    explanation = shardSearcher.explain(query, 1);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
    assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
    explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
    assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
    explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
    query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("c"), new MatchAllDocsQuery(), percolateSearcher, new MatchAllDocsQuery()));
    topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(4));
    query = new PercolateQuery("type", queryStore, new BytesArray("{}"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery(""));
    topDocs = shardSearcher.search(query, 10);
    assertThat(topDocs.totalHits, equalTo(3));
    assertThat(topDocs.scoreDocs.length, equalTo(3));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(3));
    explanation = shardSearcher.explain(query, 3);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));
    assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
    explanation = shardSearcher.explain(query, 2);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));
    assertThat(topDocs.scoreDocs[2].doc, equalTo(1));
    explanation = shardSearcher.explain(query, 1);
    assertThat(explanation.isMatch(), is(true));
    assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
    assertThat(explanation.getDetails(), arrayWithSize(1));
}
Also used : Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) NoMergePolicy(org.apache.lucene.index.NoMergePolicy) Matchers.arrayWithSize(org.hamcrest.Matchers.arrayWithSize) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) StringField(org.apache.lucene.document.StringField) IndexableField(org.apache.lucene.index.IndexableField) Term(org.apache.lucene.index.Term) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) PhraseQuery(org.apache.lucene.search.PhraseQuery) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) ArrayList(java.util.ArrayList) BytesArray(org.elasticsearch.common.bytes.BytesArray) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Directory(org.apache.lucene.store.Directory) After(org.junit.After) ESTestCase(org.elasticsearch.test.ESTestCase) Before(org.junit.Before) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TopDocs(org.apache.lucene.search.TopDocs) Explanation(org.apache.lucene.search.Explanation) DirectoryReader(org.apache.lucene.index.DirectoryReader) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BooleanClause(org.apache.lucene.search.BooleanClause) IndexWriter(org.apache.lucene.index.IndexWriter) TermQuery(org.apache.lucene.search.TermQuery) List(java.util.List) BooleanQuery(org.apache.lucene.search.BooleanQuery) Field(org.apache.lucene.document.Field) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Collections(java.util.Collections) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) Explanation(org.apache.lucene.search.Explanation) ArrayList(java.util.ArrayList) TopDocs(org.apache.lucene.search.TopDocs) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) BytesArray(org.elasticsearch.common.bytes.BytesArray) PhraseQuery(org.apache.lucene.search.PhraseQuery) Term(org.apache.lucene.index.Term) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) IndexableField(org.apache.lucene.index.IndexableField) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) StringField(org.apache.lucene.document.StringField) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 28 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class MockFSDirectoryService method checkIndex.

public static void checkIndex(Logger logger, Store store, ShardId shardId) {
    if (store.tryIncRef()) {
        logger.info("start check index");
        try {
            Directory dir = store.directory();
            if (!Lucene.indexExists(dir)) {
                return;
            }
            try (CheckIndex checkIndex = new CheckIndex(dir)) {
                BytesStreamOutput os = new BytesStreamOutput();
                PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
                checkIndex.setInfoStream(out);
                out.flush();
                CheckIndex.Status status = checkIndex.checkIndex();
                if (!status.clean) {
                    ESTestCase.checkIndexFailed = true;
                    logger.warn("check index [failure] index files={}\n{}", Arrays.toString(dir.listAll()), os.bytes().utf8ToString());
                    throw new IOException("index check failure");
                } else {
                    if (logger.isDebugEnabled()) {
                        logger.debug("check index [success]\n{}", os.bytes().utf8ToString());
                    }
                }
            } catch (LockObtainFailedException e) {
                ESTestCase.checkIndexFailed = true;
                throw new IllegalStateException("IndexWriter is still open on shard " + shardId, e);
            }
        } catch (Exception e) {
            logger.warn("failed to check index", e);
        } finally {
            logger.info("end check index");
            store.decRef();
        }
    }
}
Also used : PrintStream(java.io.PrintStream) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) IOException(java.io.IOException) CheckIndex(org.apache.lucene.index.CheckIndex) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) IOException(java.io.IOException) Directory(org.apache.lucene.store.Directory)

Example 29 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class CorruptionUtils method corruptFile.

/**
     * Corrupts a random file at a random position
     */
public static void corruptFile(Random random, Path... files) throws IOException {
    assertTrue("files must be non-empty", files.length > 0);
    final Path fileToCorrupt = RandomPicks.randomFrom(random, files);
    assertTrue(fileToCorrupt + " is not a file", Files.isRegularFile(fileToCorrupt));
    try (Directory dir = FSDirectory.open(fileToCorrupt.toAbsolutePath().getParent())) {
        long checksumBeforeCorruption;
        try (IndexInput input = dir.openInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
            checksumBeforeCorruption = CodecUtil.retrieveChecksum(input);
        }
        try (FileChannel raf = FileChannel.open(fileToCorrupt, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
            // read
            raf.position(random.nextInt((int) Math.min(Integer.MAX_VALUE, raf.size())));
            long filePointer = raf.position();
            ByteBuffer bb = ByteBuffer.wrap(new byte[1]);
            raf.read(bb);
            bb.flip();
            // corrupt
            byte oldValue = bb.get(0);
            byte newValue = (byte) (oldValue + 1);
            bb.put(0, newValue);
            // rewrite
            raf.position(filePointer);
            raf.write(bb);
            logger.info("Corrupting file --  flipping at position {} from {} to {} file: {}", filePointer, Integer.toHexString(oldValue), Integer.toHexString(newValue), fileToCorrupt.getFileName());
        }
        long checksumAfterCorruption;
        long actualChecksumAfterCorruption;
        try (ChecksumIndexInput input = dir.openChecksumInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
            assertThat(input.getFilePointer(), is(0L));
            // one long is the checksum... 8 bytes
            input.seek(input.length() - 8);
            checksumAfterCorruption = input.getChecksum();
            actualChecksumAfterCorruption = input.readLong();
        }
        // we need to add assumptions here that the checksums actually really don't match there is a small chance to get collisions
        // in the checksum which is ok though....
        StringBuilder msg = new StringBuilder();
        msg.append("before: [").append(checksumBeforeCorruption).append("] ");
        msg.append("after: [").append(checksumAfterCorruption).append("] ");
        msg.append("checksum value after corruption: ").append(actualChecksumAfterCorruption).append("] ");
        msg.append("file: ").append(fileToCorrupt.getFileName()).append(" length: ").append(dir.fileLength(fileToCorrupt.getFileName().toString()));
        logger.info("Checksum {}", msg);
        assumeTrue("Checksum collision - " + msg.toString(), // collision
        checksumAfterCorruption != checksumBeforeCorruption || // checksum corrupted
        actualChecksumAfterCorruption != checksumBeforeCorruption);
        assertThat("no file corrupted", fileToCorrupt, notNullValue());
    }
}
Also used : Path(java.nio.file.Path) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) FileChannel(java.nio.channels.FileChannel) IndexInput(org.apache.lucene.store.IndexInput) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) ByteBuffer(java.nio.ByteBuffer) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 30 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class FreqTermsEnumTests method setUp.

@Before
@Override
public void setUp() throws Exception {
    super.setUp();
    referenceAll = new HashMap<>();
    referenceNotDeleted = new HashMap<>();
    referenceFilter = new HashMap<>();
    Directory dir = newDirectory();
    // use keyword analyzer we rely on the stored field holding the exact term.
    IndexWriterConfig conf = newIndexWriterConfig(new KeywordAnalyzer());
    if (frequently()) {
        // we don't want to do any merges, so we won't expunge deletes
        conf.setMergePolicy(NoMergePolicy.INSTANCE);
    }
    iw = new IndexWriter(dir, conf);
    terms = new String[scaledRandomIntBetween(10, 300)];
    for (int i = 0; i < terms.length; i++) {
        terms[i] = randomAsciiOfLength(5);
    }
    int numberOfDocs = scaledRandomIntBetween(30, 300);
    Document[] docs = new Document[numberOfDocs];
    for (int i = 0; i < numberOfDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(i), Field.Store.YES));
        docs[i] = doc;
        for (String term : terms) {
            if (randomBoolean()) {
                continue;
            }
            int freq = randomIntBetween(1, 3);
            for (int j = 0; j < freq; j++) {
                doc.add(new TextField("field", term, Field.Store.YES));
            }
        }
    }
    for (int i = 0; i < docs.length; i++) {
        Document doc = docs[i];
        iw.addDocument(doc);
        if (rarely()) {
            iw.commit();
        }
    }
    Set<String> deletedIds = new HashSet<>();
    for (int i = 0; i < docs.length; i++) {
        Document doc = docs[i];
        if (randomInt(5) == 2) {
            Term idTerm = new Term("id", doc.getField("id").stringValue());
            deletedIds.add(idTerm.text());
            iw.deleteDocuments(idTerm);
        }
    }
    for (String term : terms) {
        referenceAll.put(term, new FreqHolder());
        referenceFilter.put(term, new FreqHolder());
        referenceNotDeleted.put(term, new FreqHolder());
    }
    // now go over each doc, build the relevant references and filter
    reader = DirectoryReader.open(iw);
    List<BytesRef> filterTerms = new ArrayList<>();
    for (int docId = 0; docId < reader.maxDoc(); docId++) {
        Document doc = reader.document(docId);
        addFreqs(doc, referenceAll);
        if (!deletedIds.contains(doc.getField("id").stringValue())) {
            addFreqs(doc, referenceNotDeleted);
            if (randomBoolean()) {
                filterTerms.add(new BytesRef(doc.getField("id").stringValue()));
                addFreqs(doc, referenceFilter);
            }
        }
    }
    filter = new TermInSetQuery("id", filterTerms);
}
Also used : KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) HashSet(java.util.HashSet) Before(org.junit.Before)

Aggregations

Directory (org.apache.lucene.store.Directory)2188 Document (org.apache.lucene.document.Document)1374 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)816 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)669 IndexReader (org.apache.lucene.index.IndexReader)590 IndexSearcher (org.apache.lucene.search.IndexSearcher)381 BytesRef (org.apache.lucene.util.BytesRef)376 RAMDirectory (org.apache.lucene.store.RAMDirectory)360 Term (org.apache.lucene.index.Term)325 StringField (org.apache.lucene.document.StringField)313 IndexWriter (org.apache.lucene.index.IndexWriter)309 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)271 TextField (org.apache.lucene.document.TextField)259 Field (org.apache.lucene.document.Field)257 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)257 Test (org.junit.Test)236 FSDirectory (org.apache.lucene.store.FSDirectory)215 DirectoryReader (org.apache.lucene.index.DirectoryReader)193 Analyzer (org.apache.lucene.analysis.Analyzer)192 FieldType (org.apache.lucene.document.FieldType)173