use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class QueryPhaseTests method countTestCase.
private void countTestCase(boolean withDeletions) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
final int numDocs = scaledRandomIntBetween(100, 200);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (randomBoolean()) {
doc.add(new StringField("foo", "bar", Store.NO));
}
if (randomBoolean()) {
doc.add(new StringField("foo", "baz", Store.NO));
}
if (withDeletions && (rarely() || i == 0)) {
doc.add(new StringField("delete", "yes", Store.NO));
}
w.addDocument(doc);
}
if (withDeletions) {
w.deleteDocuments(new Term("delete", "yes"));
}
final IndexReader reader = w.getReader();
Query matchAll = new MatchAllDocsQuery();
Query matchAllCsq = new ConstantScoreQuery(matchAll);
Query tq = new TermQuery(new Term("foo", "bar"));
Query tCsq = new ConstantScoreQuery(tq);
BooleanQuery bq = new BooleanQuery.Builder().add(matchAll, Occur.SHOULD).add(tq, Occur.MUST).build();
countTestCase(matchAll, reader, false);
countTestCase(matchAllCsq, reader, false);
countTestCase(tq, reader, withDeletions);
countTestCase(tCsq, reader, withDeletions);
countTestCase(bq, reader, true);
reader.close();
w.close();
dir.close();
}
use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class PercolateQueryTests method testPercolateQuery.
public void testPercolateQuery() throws Exception {
List<Iterable<? extends IndexableField>> docs = new ArrayList<>();
List<Query> queries = new ArrayList<>();
PercolateQuery.QueryStore queryStore = ctx -> queries::get;
queries.add(new TermQuery(new Term("field", "fox")));
docs.add(Collections.singleton(new StringField("select", "a", Field.Store.NO)));
SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true);
snp.addClause(new SpanTermQuery(new Term("field", "jumps")));
snp.addClause(new SpanTermQuery(new Term("field", "lazy")));
snp.addClause(new SpanTermQuery(new Term("field", "dog")));
snp.setSlop(2);
queries.add(snp.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
pq1.add(new Term("field", "quick"));
pq1.add(new Term("field", "brown"));
pq1.add(new Term("field", "jumps"));
pq1.setSlop(1);
queries.add(pq1.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
queries.add(bq1.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
indexWriter.addDocuments(docs);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
// no scoring, wrapping it in a constant score query:
Query query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("a"), new TermQuery(new Term("select", "a")), percolateSearcher, new MatchNoDocsQuery("")));
TopDocs topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(1));
assertThat(topDocs.scoreDocs.length, equalTo(1));
assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
Explanation explanation = shardSearcher.explain(query, 0);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("b"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery("")));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(3));
assertThat(topDocs.scoreDocs.length, equalTo(3));
assertThat(topDocs.scoreDocs[0].doc, equalTo(1));
explanation = shardSearcher.explain(query, 1);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("c"), new MatchAllDocsQuery(), percolateSearcher, new MatchAllDocsQuery()));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(4));
query = new PercolateQuery("type", queryStore, new BytesArray("{}"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery(""));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(3));
assertThat(topDocs.scoreDocs.length, equalTo(3));
assertThat(topDocs.scoreDocs[0].doc, equalTo(3));
explanation = shardSearcher.explain(query, 3);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
assertThat(topDocs.scoreDocs[2].doc, equalTo(1));
explanation = shardSearcher.explain(query, 1);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
}
use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class MockFSDirectoryService method checkIndex.
public static void checkIndex(Logger logger, Store store, ShardId shardId) {
if (store.tryIncRef()) {
logger.info("start check index");
try {
Directory dir = store.directory();
if (!Lucene.indexExists(dir)) {
return;
}
try (CheckIndex checkIndex = new CheckIndex(dir)) {
BytesStreamOutput os = new BytesStreamOutput();
PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
checkIndex.setInfoStream(out);
out.flush();
CheckIndex.Status status = checkIndex.checkIndex();
if (!status.clean) {
ESTestCase.checkIndexFailed = true;
logger.warn("check index [failure] index files={}\n{}", Arrays.toString(dir.listAll()), os.bytes().utf8ToString());
throw new IOException("index check failure");
} else {
if (logger.isDebugEnabled()) {
logger.debug("check index [success]\n{}", os.bytes().utf8ToString());
}
}
} catch (LockObtainFailedException e) {
ESTestCase.checkIndexFailed = true;
throw new IllegalStateException("IndexWriter is still open on shard " + shardId, e);
}
} catch (Exception e) {
logger.warn("failed to check index", e);
} finally {
logger.info("end check index");
store.decRef();
}
}
}
use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class CorruptionUtils method corruptFile.
/**
* Corrupts a random file at a random position
*/
public static void corruptFile(Random random, Path... files) throws IOException {
assertTrue("files must be non-empty", files.length > 0);
final Path fileToCorrupt = RandomPicks.randomFrom(random, files);
assertTrue(fileToCorrupt + " is not a file", Files.isRegularFile(fileToCorrupt));
try (Directory dir = FSDirectory.open(fileToCorrupt.toAbsolutePath().getParent())) {
long checksumBeforeCorruption;
try (IndexInput input = dir.openInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
checksumBeforeCorruption = CodecUtil.retrieveChecksum(input);
}
try (FileChannel raf = FileChannel.open(fileToCorrupt, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
// read
raf.position(random.nextInt((int) Math.min(Integer.MAX_VALUE, raf.size())));
long filePointer = raf.position();
ByteBuffer bb = ByteBuffer.wrap(new byte[1]);
raf.read(bb);
bb.flip();
// corrupt
byte oldValue = bb.get(0);
byte newValue = (byte) (oldValue + 1);
bb.put(0, newValue);
// rewrite
raf.position(filePointer);
raf.write(bb);
logger.info("Corrupting file -- flipping at position {} from {} to {} file: {}", filePointer, Integer.toHexString(oldValue), Integer.toHexString(newValue), fileToCorrupt.getFileName());
}
long checksumAfterCorruption;
long actualChecksumAfterCorruption;
try (ChecksumIndexInput input = dir.openChecksumInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
assertThat(input.getFilePointer(), is(0L));
// one long is the checksum... 8 bytes
input.seek(input.length() - 8);
checksumAfterCorruption = input.getChecksum();
actualChecksumAfterCorruption = input.readLong();
}
// we need to add assumptions here that the checksums actually really don't match there is a small chance to get collisions
// in the checksum which is ok though....
StringBuilder msg = new StringBuilder();
msg.append("before: [").append(checksumBeforeCorruption).append("] ");
msg.append("after: [").append(checksumAfterCorruption).append("] ");
msg.append("checksum value after corruption: ").append(actualChecksumAfterCorruption).append("] ");
msg.append("file: ").append(fileToCorrupt.getFileName()).append(" length: ").append(dir.fileLength(fileToCorrupt.getFileName().toString()));
logger.info("Checksum {}", msg);
assumeTrue("Checksum collision - " + msg.toString(), // collision
checksumAfterCorruption != checksumBeforeCorruption || // checksum corrupted
actualChecksumAfterCorruption != checksumBeforeCorruption);
assertThat("no file corrupted", fileToCorrupt, notNullValue());
}
}
use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class FreqTermsEnumTests method setUp.
@Before
@Override
public void setUp() throws Exception {
super.setUp();
referenceAll = new HashMap<>();
referenceNotDeleted = new HashMap<>();
referenceFilter = new HashMap<>();
Directory dir = newDirectory();
// use keyword analyzer we rely on the stored field holding the exact term.
IndexWriterConfig conf = newIndexWriterConfig(new KeywordAnalyzer());
if (frequently()) {
// we don't want to do any merges, so we won't expunge deletes
conf.setMergePolicy(NoMergePolicy.INSTANCE);
}
iw = new IndexWriter(dir, conf);
terms = new String[scaledRandomIntBetween(10, 300)];
for (int i = 0; i < terms.length; i++) {
terms[i] = randomAsciiOfLength(5);
}
int numberOfDocs = scaledRandomIntBetween(30, 300);
Document[] docs = new Document[numberOfDocs];
for (int i = 0; i < numberOfDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Field.Store.YES));
docs[i] = doc;
for (String term : terms) {
if (randomBoolean()) {
continue;
}
int freq = randomIntBetween(1, 3);
for (int j = 0; j < freq; j++) {
doc.add(new TextField("field", term, Field.Store.YES));
}
}
}
for (int i = 0; i < docs.length; i++) {
Document doc = docs[i];
iw.addDocument(doc);
if (rarely()) {
iw.commit();
}
}
Set<String> deletedIds = new HashSet<>();
for (int i = 0; i < docs.length; i++) {
Document doc = docs[i];
if (randomInt(5) == 2) {
Term idTerm = new Term("id", doc.getField("id").stringValue());
deletedIds.add(idTerm.text());
iw.deleteDocuments(idTerm);
}
}
for (String term : terms) {
referenceAll.put(term, new FreqHolder());
referenceFilter.put(term, new FreqHolder());
referenceNotDeleted.put(term, new FreqHolder());
}
// now go over each doc, build the relevant references and filter
reader = DirectoryReader.open(iw);
List<BytesRef> filterTerms = new ArrayList<>();
for (int docId = 0; docId < reader.maxDoc(); docId++) {
Document doc = reader.document(docId);
addFreqs(doc, referenceAll);
if (!deletedIds.contains(doc.getField("id").stringValue())) {
addFreqs(doc, referenceNotDeleted);
if (randomBoolean()) {
filterTerms.add(new BytesRef(doc.getField("id").stringValue()));
addFreqs(doc, referenceFilter);
}
}
}
filter = new TermInSetQuery("id", filterTerms);
}
Aggregations