use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestIndexWriterExceptions method testAddDocsNonAbortingException.
public void testAddDocsNonAbortingException() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int numDocs1 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs1; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
final List<Document> docs = new ArrayList<>();
for (int docCount = 0; docCount < 7; docCount++) {
Document doc = new Document();
docs.add(doc);
doc.add(newStringField("id", docCount + "", Field.Store.NO));
doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
if (docCount == 4) {
Field f = newTextField("crash", "", Field.Store.NO);
doc.add(f);
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader("crash me on the 4th token"));
// disable workflow checking as we forcefully close() in exceptional cases.
tokenizer.setEnableChecks(false);
f.setTokenStream(new CrashingFilter("crash", tokenizer));
}
}
IOException expected = expectThrows(IOException.class, () -> {
w.addDocuments(docs);
});
assertEquals(CRASH_FAIL_MESSAGE, expected.getMessage());
final int numDocs2 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs2; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
final IndexReader r = w.getReader();
w.close();
final IndexSearcher s = newSearcher(r);
PhraseQuery pq = new PhraseQuery("content", "silly", "good");
assertEquals(0, s.search(pq, 1).totalHits);
pq = new PhraseQuery("content", "good", "content");
assertEquals(numDocs1 + numDocs2, s.search(pq, 1).totalHits);
r.close();
dir.close();
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestIndexWriterExceptions method testUpdateDocsNonAbortingException.
public void testUpdateDocsNonAbortingException() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int numDocs1 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs1; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
// Use addDocs (no exception) to get docs in the index:
final List<Document> docs = new ArrayList<>();
final int numDocs2 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs2; docCount++) {
Document doc = new Document();
docs.add(doc);
doc.add(newStringField("subid", "subs", Field.Store.NO));
doc.add(newStringField("id", docCount + "", Field.Store.NO));
doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
}
w.addDocuments(docs);
final int numDocs3 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs3; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
docs.clear();
final int limit = TestUtil.nextInt(random(), 2, 25);
final int crashAt = random().nextInt(limit);
for (int docCount = 0; docCount < limit; docCount++) {
Document doc = new Document();
docs.add(doc);
doc.add(newStringField("id", docCount + "", Field.Store.NO));
doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
if (docCount == crashAt) {
Field f = newTextField("crash", "", Field.Store.NO);
doc.add(f);
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader("crash me on the 4th token"));
// disable workflow checking as we forcefully close() in exceptional cases.
tokenizer.setEnableChecks(false);
f.setTokenStream(new CrashingFilter("crash", tokenizer));
}
}
IOException expected = expectThrows(IOException.class, () -> {
w.updateDocuments(new Term("subid", "subs"), docs);
});
assertEquals(CRASH_FAIL_MESSAGE, expected.getMessage());
final int numDocs4 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs4; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
final IndexReader r = w.getReader();
w.close();
final IndexSearcher s = newSearcher(r);
PhraseQuery pq = new PhraseQuery("content", "silly", "content");
assertEquals(numDocs2, s.search(pq, 1).totalHits);
pq = new PhraseQuery("content", "good", "content");
assertEquals(numDocs1 + numDocs3 + numDocs4, s.search(pq, 1).totalHits);
r.close();
dir.close();
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestIndexWriterExceptions method testDocumentsWriterExceptionThreads.
public void testDocumentsWriterExceptionThreads() throws Exception {
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
public TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
// disable workflow checking as we forcefully close() in exceptional cases.
tokenizer.setEnableChecks(false);
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
}
};
final int NUM_THREAD = 3;
final int NUM_ITER = 100;
for (int i = 0; i < 2; i++) {
Directory dir = newDirectory();
{
final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer).setMaxBufferedDocs(-1).setMergePolicy(NoMergePolicy.INSTANCE));
// don't use a merge policy here they depend on the DWPThreadPool and its max thread states etc.
final int finalI = i;
Thread[] threads = new Thread[NUM_THREAD];
for (int t = 0; t < NUM_THREAD; t++) {
threads[t] = new Thread() {
@Override
public void run() {
try {
for (int iter = 0; iter < NUM_ITER; iter++) {
Document doc = new Document();
doc.add(newField("contents", "here are some contents", DocCopyIterator.custom5));
writer.addDocument(doc);
writer.addDocument(doc);
doc.add(newField("crash", "this should crash after 4 terms", DocCopyIterator.custom5));
doc.add(newField("other", "this will not get indexed", DocCopyIterator.custom5));
expectThrows(IOException.class, () -> {
writer.addDocument(doc);
});
if (0 == finalI) {
Document extraDoc = new Document();
extraDoc.add(newField("contents", "here are some contents", DocCopyIterator.custom5));
writer.addDocument(extraDoc);
writer.addDocument(extraDoc);
}
}
} catch (Throwable t) {
synchronized (this) {
System.out.println(Thread.currentThread().getName() + ": ERROR: hit unexpected exception");
t.printStackTrace(System.out);
}
fail();
}
}
};
threads[t].start();
}
for (int t = 0; t < NUM_THREAD; t++) threads[t].join();
writer.close();
}
IndexReader reader = DirectoryReader.open(dir);
int expected = (3 + (1 - i) * 2) * NUM_THREAD * NUM_ITER;
assertEquals("i=" + i, expected, reader.docFreq(new Term("contents", "here")));
assertEquals(expected, reader.maxDoc());
int numDel = 0;
final Bits liveDocs = MultiFields.getLiveDocs(reader);
assertNotNull(liveDocs);
for (int j = 0; j < reader.maxDoc(); j++) {
if (!liveDocs.get(j))
numDel++;
else {
reader.document(j);
reader.getTermVectors(j);
}
}
reader.close();
assertEquals(NUM_THREAD * NUM_ITER, numDel);
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer).setMaxBufferedDocs(10));
Document doc = new Document();
doc.add(newField("contents", "here are some contents", DocCopyIterator.custom5));
for (int j = 0; j < 17; j++) writer.addDocument(doc);
writer.forceMerge(1);
writer.close();
reader = DirectoryReader.open(dir);
expected += 17 - NUM_THREAD * NUM_ITER;
assertEquals(expected, reader.docFreq(new Term("contents", "here")));
assertEquals(expected, reader.maxDoc());
assertNull(MultiFields.getLiveDocs(reader));
for (int j = 0; j < reader.maxDoc(); j++) {
reader.document(j);
reader.getTermVectors(j);
}
reader.close();
dir.close();
}
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestIndexWriterExceptions method testExceptionJustBeforeFlush.
// LUCENE-1208
public void testExceptionJustBeforeFlush() throws IOException {
Directory dir = newDirectory();
final AtomicBoolean doCrash = new AtomicBoolean();
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
public TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
// disable workflow checking as we forcefully close() in exceptional cases.
tokenizer.setEnableChecks(false);
TokenStream stream = tokenizer;
if (doCrash.get()) {
stream = new CrashingFilter(fieldName, stream);
}
return new TokenStreamComponents(tokenizer, stream);
}
};
IndexWriter w = RandomIndexWriter.mockIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setMaxBufferedDocs(2), new TestPoint1());
Document doc = new Document();
doc.add(newTextField("field", "a field", Field.Store.YES));
w.addDocument(doc);
Document crashDoc = new Document();
crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES));
doCrash.set(true);
expectThrows(IOException.class, () -> {
w.addDocument(crashDoc);
});
w.addDocument(doc);
w.close();
dir.close();
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestLatvianStemmer method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
}
};
}
Aggregations