use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.
the class TestNRTReplication method testFullClusterCrash.
@Nightly
public void testFullClusterCrash() throws Exception {
Path path1 = createTempDir("1");
NodeProcess primary = startNode(-1, 0, path1, -1, true);
Path path2 = createTempDir("2");
NodeProcess replica1 = startNode(primary.tcpPort, 1, path2, -1, true);
Path path3 = createTempDir("3");
NodeProcess replica2 = startNode(primary.tcpPort, 2, path3, -1, true);
sendReplicasToPrimary(primary, replica1, replica2);
// Index 50 docs into primary:
LineFileDocs docs = new LineFileDocs(random());
long primaryVersion1 = 0;
for (int iter = 0; iter < 5; iter++) {
try (Connection c = new Connection(primary.tcpPort)) {
c.out.writeByte(SimplePrimaryNode.CMD_INDEXING);
for (int i = 0; i < 10; i++) {
Document doc = docs.nextDoc();
primary.addOrUpdateDocument(c, doc, false);
}
}
// Refresh primary, which also pushes to replicas:
primaryVersion1 = primary.flush(0);
assertTrue(primaryVersion1 > 0);
}
// Wait for replicas to sync up:
waitForVersionAndHits(replica1, primaryVersion1, 50);
waitForVersionAndHits(replica2, primaryVersion1, 50);
primary.commit();
replica1.commit();
replica2.commit();
// Index 10 more docs, but don't sync to replicas:
try (Connection c = new Connection(primary.tcpPort)) {
c.out.writeByte(SimplePrimaryNode.CMD_INDEXING);
for (int i = 0; i < 10; i++) {
Document doc = docs.nextDoc();
primary.addOrUpdateDocument(c, doc, false);
}
}
// Full cluster crash
primary.crash();
replica1.crash();
replica2.crash();
// Full cluster restart
primary = startNode(-1, 0, path1, -1, true);
replica1 = startNode(primary.tcpPort, 1, path2, -1, true);
replica2 = startNode(primary.tcpPort, 2, path3, -1, true);
// Only 50 because we didn't commit primary before the crash:
// It's -1 because it's unpredictable how IW changes segments version on init:
assertVersionAndHits(primary, -1, 50);
assertVersionAndHits(replica1, primary.initInfosVersion, 50);
assertVersionAndHits(replica2, primary.initInfosVersion, 50);
primary.close();
replica1.close();
replica2.close();
}
use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.
the class TestNRTReplication method testIndexingWhileReplicaIsDown.
// Start up, index 10 docs, replicate, commit, crash, index more docs, replicate, then restart the replica
@Nightly
public void testIndexingWhileReplicaIsDown() throws Exception {
Path primaryPath = createTempDir("primary");
NodeProcess primary = startNode(-1, 0, primaryPath, -1, false);
Path replicaPath = createTempDir("replica");
NodeProcess replica = startNode(primary.tcpPort, 1, replicaPath, -1, true);
sendReplicasToPrimary(primary, replica);
// Index 10 docs into primary:
LineFileDocs docs = new LineFileDocs(random());
try (Connection c = new Connection(primary.tcpPort)) {
c.out.writeByte(SimplePrimaryNode.CMD_INDEXING);
for (int i = 0; i < 10; i++) {
Document doc = docs.nextDoc();
primary.addOrUpdateDocument(c, doc, false);
}
}
// Refresh primary, which also pushes to replica:
long primaryVersion1 = primary.flush(0);
assertTrue(primaryVersion1 > 0);
// Wait for replica to sync up:
waitForVersionAndHits(replica, primaryVersion1, 10);
// Commit and crash replica:
replica.commit();
replica.crash();
sendReplicasToPrimary(primary);
// Index 10 more docs, while replica is down
try (Connection c = new Connection(primary.tcpPort)) {
c.out.writeByte(SimplePrimaryNode.CMD_INDEXING);
for (int i = 0; i < 10; i++) {
Document doc = docs.nextDoc();
primary.addOrUpdateDocument(c, doc, false);
}
}
// And flush:
long primaryVersion2 = primary.flush(0);
assertTrue(primaryVersion2 > primaryVersion1);
// Now restart replica:
replica = startNode(primary.tcpPort, 1, replicaPath, -1, false);
sendReplicasToPrimary(primary, replica);
// On startup the replica still searches its last commit:
assertVersionAndHits(replica, primaryVersion1, 10);
// Now ask replica to sync:
replica.newNRTPoint(primaryVersion2, 0, primary.tcpPort);
waitForVersionAndHits(replica, primaryVersion2, 20);
replica.close();
primary.close();
}
use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.
the class TestFreeTextSuggester method testWiki.
@Ignore
public void testWiki() throws Exception {
final LineFileDocs lfd = new LineFileDocs(null, "/lucenedata/enwiki/enwiki-20120502-lines-1k.txt");
// Skip header:
lfd.nextDoc();
Analyzer analyzer = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(analyzer);
sug.build(new InputIterator() {
private int count;
@Override
public long weight() {
return 1;
}
@Override
public BytesRef next() {
Document doc;
try {
doc = lfd.nextDoc();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
if (doc == null) {
return null;
}
if (count++ == 10000) {
return null;
}
return new BytesRef(doc.get("body"));
}
@Override
public BytesRef payload() {
return null;
}
@Override
public boolean hasPayloads() {
return false;
}
@Override
public Set<BytesRef> contexts() {
return null;
}
@Override
public boolean hasContexts() {
return false;
}
});
if (VERBOSE) {
System.out.println(sug.ramBytesUsed() + " bytes");
List<LookupResult> results = sug.lookup("general r", 10);
System.out.println("results:");
for (LookupResult result : results) {
System.out.println(" " + result);
}
}
analyzer.close();
}
use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.
the class BaseTestCheckIndex method testChecksumsOnly.
public void testChecksumsOnly(Directory dir) throws IOException {
LineFileDocs lf = new LineFileDocs(random());
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
for (int i = 0; i < 100; i++) {
iw.addDocument(lf.nextDoc());
}
iw.addDocument(new Document());
iw.commit();
iw.close();
lf.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
if (VERBOSE)
checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
assertTrue(indexStatus.clean);
checker.close();
analyzer.close();
}
use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.
the class ThreadedIndexingAndSearchingTestCase method runTest.
public void runTest(String testName) throws Exception {
failed.set(false);
addCount.set(0);
delCount.set(0);
packCount.set(0);
final long t0 = System.currentTimeMillis();
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random);
final Path tempDir = createTempDir(testName);
// some subclasses rely on this being MDW
dir = getDirectory(newMockFSDirectory(tempDir));
if (dir instanceof BaseDirectoryWrapper) {
// don't double-checkIndex, we do it ourselves.
((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false);
}
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false);
}
if (LuceneTestCase.TEST_NIGHTLY) {
// newIWConfig makes smallish max seg size, which
// results in tons and tons of segments for this test
// when run nightly:
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof TieredMergePolicy) {
((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
} else if (mp instanceof LogByteSizeMergePolicy) {
((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
} else if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setMaxMergeDocs(100000);
}
// when running nightly, merging can still have crazy parameters,
// and might use many per-field codecs. turn on CFS for IW flushes
// and ensure CFS ratio is reasonable to keep it contained.
conf.setUseCompoundFile(true);
mp.setNoCFSRatio(Math.max(0.25d, mp.getNoCFSRatio()));
}
conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
@Override
public void warm(LeafReader reader) throws IOException {
if (VERBOSE) {
System.out.println("TEST: now warm merged reader=" + reader);
}
warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
int sum = 0;
final int inc = Math.max(1, maxDoc / 50);
for (int docID = 0; docID < maxDoc; docID += inc) {
if (liveDocs == null || liveDocs.get(docID)) {
final Document doc = reader.document(docID);
sum += doc.getFields().size();
}
}
IndexSearcher searcher = newSearcher(reader, false);
sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
if (VERBOSE) {
System.out.println("TEST: warm visited " + sum + " fields");
}
}
});
if (VERBOSE) {
conf.setInfoStream(new PrintStreamInfoStream(System.out) {
@Override
public void message(String component, String message) {
if ("TP".equals(component)) {
// ignore test points!
return;
}
super.message(component, message);
}
});
}
writer = new IndexWriter(dir, conf);
TestUtil.reduceOpenFiles(writer);
final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
doAfterWriter(es);
final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4);
final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;
final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());
final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);
if (VERBOSE) {
System.out.println("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]");
}
// Let index build up a bit
Thread.sleep(100);
doSearching(es, stopTime);
if (VERBOSE) {
System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]");
}
for (Thread thread : indexThreads) {
thread.join();
}
if (VERBOSE) {
System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
}
final IndexSearcher s = getFinalSearcher();
if (VERBOSE) {
System.out.println("TEST: finalSearcher=" + s);
}
assertFalse(failed.get());
boolean doFail = false;
// Verify: make sure delIDs are in fact deleted:
for (String id : delIDs) {
final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
if (hits.totalHits != 0) {
System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
doFail = true;
}
}
// Verify: make sure delPackIDs are in fact deleted:
for (String id : delPackIDs) {
final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
if (hits.totalHits != 0) {
System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
doFail = true;
}
}
// Verify: make sure each group of sub-docs are still in docID order:
for (SubDocs subDocs : allSubDocs) {
TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
if (!subDocs.deleted) {
// We sort by relevance but the scores should be identical so sort falls back to by docID:
if (hits.totalHits != subDocs.subIDs.size()) {
System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
doFail = true;
} else {
int lastDocID = -1;
int startDocID = -1;
for (ScoreDoc scoreDoc : hits.scoreDocs) {
final int docID = scoreDoc.doc;
if (lastDocID != -1) {
assertEquals(1 + lastDocID, docID);
} else {
startDocID = docID;
}
lastDocID = docID;
final Document doc = s.doc(docID);
assertEquals(subDocs.packID, doc.get("packID"));
}
lastDocID = startDocID - 1;
for (String subID : subDocs.subIDs) {
hits = s.search(new TermQuery(new Term("docid", subID)), 1);
assertEquals(1, hits.totalHits);
final int docID = hits.scoreDocs[0].doc;
if (lastDocID != -1) {
assertEquals(1 + lastDocID, docID);
}
lastDocID = docID;
}
}
} else {
// because we can re-use packID for update:
for (String subID : subDocs.subIDs) {
assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
}
}
}
// Verify: make sure all not-deleted docs are in fact
// not deleted:
final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
docs.close();
for (int id = 0; id < endID; id++) {
String stringID = "" + id;
if (!delIDs.contains(stringID)) {
final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
if (hits.totalHits != 1) {
System.out.println("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs);
doFail = true;
}
}
}
assertFalse(doFail);
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs());
releaseSearcher(s);
writer.commit();
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());
doClose();
try {
writer.commit();
} finally {
writer.close();
}
// searches, and that IS may be using this es!
if (es != null) {
es.shutdown();
es.awaitTermination(1, TimeUnit.SECONDS);
}
TestUtil.checkIndex(dir);
dir.close();
if (VERBOSE) {
System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]");
}
}
Aggregations