use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.
the class TestBackwardsCompatibility method testCreateSortedIndex.
// ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
public void testCreateSortedIndex() throws Exception {
Path indexDir = getIndexDir().resolve("sorted");
Files.deleteIfExists(indexDir);
Directory dir = newFSDirectory(indexDir);
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
mp.setNoCFSRatio(1.0);
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
// TODO: remove randomness
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
conf.setMergePolicy(mp);
conf.setUseCompoundFile(false);
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
IndexWriter writer = new IndexWriter(dir, conf);
LineFileDocs docs = new LineFileDocs(random());
SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
parser.setTimeZone(TimeZone.getTimeZone("UTC"));
ParsePosition position = new ParsePosition(0);
Field dateDVField = null;
for (int i = 0; i < 50; i++) {
Document doc = docs.nextDoc();
String dateString = doc.get("date");
position.setIndex(0);
Date date = parser.parse(dateString, position);
if (position.getErrorIndex() != -1) {
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
}
if (position.getIndex() != dateString.length()) {
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
}
if (dateDVField == null) {
dateDVField = new NumericDocValuesField("dateDV", 0l);
doc.add(dateDVField);
}
dateDVField.setLongValue(date.getTime());
if (i == 250) {
writer.commit();
}
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.close();
dir.close();
}
use of org.apache.lucene.util.LineFileDocs in project elasticsearch by elastic.
the class TranslogTests method testWithRandomException.
/**
* This test adds operations to the translog which might randomly throw an IOException. The only thing this test verifies is
* that we can, after we hit an exception, open and recover the translog successfully and retrieve all successfully synced operations
* from the transaction log.
*/
public void testWithRandomException() throws IOException {
final int runs = randomIntBetween(5, 10);
for (int run = 0; run < runs; run++) {
Path tempDir = createTempDir();
final FailSwitch fail = new FailSwitch();
fail.failRandomly();
TranslogConfig config = getTranslogConfig(tempDir);
final int numOps = randomIntBetween(100, 200);
List<String> syncedDocs = new ArrayList<>();
List<String> unsynced = new ArrayList<>();
if (randomBoolean()) {
fail.onceFailedFailAlways();
}
Translog.TranslogGeneration generation = null;
try {
final Translog failableTLog = getFailableTranslog(fail, config, randomBoolean(), false, generation);
try {
//writes pretty big docs so we cross buffer boarders regularly
LineFileDocs lineFileDocs = new LineFileDocs(random());
for (int opsAdded = 0; opsAdded < numOps; opsAdded++) {
String doc = lineFileDocs.nextDoc().toString();
failableTLog.add(new Translog.Index("test", "" + opsAdded, doc.getBytes(Charset.forName("UTF-8"))));
unsynced.add(doc);
if (randomBoolean()) {
failableTLog.sync();
syncedDocs.addAll(unsynced);
unsynced.clear();
}
if (randomFloat() < 0.1) {
// we have to sync here first otherwise we don't know if the sync succeeded if the commit fails
failableTLog.sync();
syncedDocs.addAll(unsynced);
unsynced.clear();
if (randomBoolean()) {
failableTLog.prepareCommit();
}
failableTLog.commit();
syncedDocs.clear();
}
}
// we survived all the randomness!!!
// lets close the translog and if it succeeds we are all synced again. If we don't do this we will close
// it in the finally block but miss to copy over unsynced docs to syncedDocs and fail the assertion down the road...
failableTLog.close();
syncedDocs.addAll(unsynced);
unsynced.clear();
} catch (TranslogException | MockDirectoryWrapper.FakeIOException ex) {
// fair enough
} catch (IOException ex) {
assertEquals(ex.getMessage(), "__FAKE__ no space left on device");
} finally {
Checkpoint checkpoint = Translog.readCheckpoint(config.getTranslogPath());
if (checkpoint.numOps == unsynced.size() + syncedDocs.size()) {
// failed in fsync but got fully written
syncedDocs.addAll(unsynced);
unsynced.clear();
}
generation = failableTLog.getGeneration();
IOUtils.closeWhileHandlingException(failableTLog);
}
} catch (TranslogException | MockDirectoryWrapper.FakeIOException ex) {
// failed - that's ok, we didn't even create it
} catch (IOException ex) {
assertEquals(ex.getMessage(), "__FAKE__ no space left on device");
}
// now randomly open this failing tlog again just to make sure we can also recover from failing during recovery
if (randomBoolean()) {
try {
IOUtils.close(getFailableTranslog(fail, config, randomBoolean(), false, generation));
} catch (TranslogException | MockDirectoryWrapper.FakeIOException ex) {
// failed - that's ok, we didn't even create it
} catch (IOException ex) {
assertEquals(ex.getMessage(), "__FAKE__ no space left on device");
}
}
// we don't wanna fail here but we might since we write a new checkpoint and create a new tlog file
fail.failNever();
try (Translog translog = new Translog(config, generation, () -> SequenceNumbersService.UNASSIGNED_SEQ_NO)) {
Translog.Snapshot snapshot = translog.newSnapshot();
assertEquals(syncedDocs.size(), snapshot.totalOperations());
for (int i = 0; i < syncedDocs.size(); i++) {
Translog.Operation next = snapshot.next();
assertEquals(syncedDocs.get(i), next.getSource().source.utf8ToString());
assertNotNull("operation " + i + " must be non-null", next);
}
}
}
}
use of org.apache.lucene.util.LineFileDocs in project elasticsearch by elastic.
the class TranslogTests method testTragicEventCanBeAnyException.
public void testTragicEventCanBeAnyException() throws IOException {
Path tempDir = createTempDir();
final FailSwitch fail = new FailSwitch();
TranslogConfig config = getTranslogConfig(tempDir);
Translog translog = getFailableTranslog(fail, config, false, true, null);
// writes pretty big docs so we cross buffer boarders regularly
LineFileDocs lineFileDocs = new LineFileDocs(random());
translog.add(new Translog.Index("test", "1", lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8"))));
fail.failAlways();
try {
Translog.Location location = translog.add(new Translog.Index("test", "2", lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8"))));
if (randomBoolean()) {
translog.ensureSynced(location);
} else {
translog.sync();
}
//TODO once we have a mock FS that can simulate we can also fail on plain sync
fail("WTF");
} catch (UnknownException ex) {
// w00t
} catch (TranslogException ex) {
assertTrue(ex.getCause() instanceof UnknownException);
}
assertFalse(translog.isOpen());
assertTrue(translog.getTragicException() instanceof UnknownException);
}
use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.
the class TestCustomNorms method testFloatNorms.
public void testFloatNorms() throws IOException {
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig config = newIndexWriterConfig(analyzer);
Similarity provider = new MySimProvider();
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
final LineFileDocs docs = new LineFileDocs(random());
int num = atLeast(100);
for (int i = 0; i < num; i++) {
Document doc = docs.nextDoc();
int boost = TestUtil.nextInt(random(), 1, 10);
String value = IntStream.range(0, boost).mapToObj(k -> Integer.toString(boost)).collect(Collectors.joining(" "));
Field f = new TextField(FLOAT_TEST_FIELD, value, Field.Store.YES);
doc.add(f);
writer.addDocument(doc);
doc.removeField(FLOAT_TEST_FIELD);
if (rarely()) {
writer.commit();
}
}
writer.commit();
writer.close();
DirectoryReader open = DirectoryReader.open(dir);
NumericDocValues norms = MultiDocValues.getNormValues(open, FLOAT_TEST_FIELD);
assertNotNull(norms);
for (int i = 0; i < open.maxDoc(); i++) {
Document document = open.document(i);
int expected = Integer.parseInt(document.get(FLOAT_TEST_FIELD).split(" ")[0]);
assertEquals(i, norms.nextDoc());
assertEquals(expected, norms.longValue());
}
open.close();
dir.close();
docs.close();
}
use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.
the class TestTermsEnum method test.
public void test() throws Exception {
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random);
final Directory d = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final RandomIndexWriter w = new RandomIndexWriter(random(), d, analyzer);
final int numDocs = atLeast(10);
for (int docCount = 0; docCount < numDocs; docCount++) {
w.addDocument(docs.nextDoc());
}
final IndexReader r = w.getReader();
w.close();
final List<BytesRef> terms = new ArrayList<>();
final TermsEnum termsEnum = MultiFields.getTerms(r, "body").iterator();
BytesRef term;
while ((term = termsEnum.next()) != null) {
terms.add(BytesRef.deepCopyOf(term));
}
if (VERBOSE) {
System.out.println("TEST: " + terms.size() + " terms");
}
int upto = -1;
final int iters = atLeast(200);
for (int iter = 0; iter < iters; iter++) {
final boolean isEnd;
if (upto != -1 && random().nextBoolean()) {
// next
if (VERBOSE) {
System.out.println("TEST: iter next");
}
isEnd = termsEnum.next() == null;
upto++;
if (isEnd) {
if (VERBOSE) {
System.out.println(" end");
}
assertEquals(upto, terms.size());
upto = -1;
} else {
if (VERBOSE) {
System.out.println(" got term=" + termsEnum.term().utf8ToString() + " expected=" + terms.get(upto).utf8ToString());
}
assertTrue(upto < terms.size());
assertEquals(terms.get(upto), termsEnum.term());
}
} else {
final BytesRef target;
final String exists;
if (random().nextBoolean()) {
// likely fake term
if (random().nextBoolean()) {
target = new BytesRef(TestUtil.randomSimpleString(random()));
} else {
target = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
}
exists = "likely not";
} else {
// real term
target = terms.get(random().nextInt(terms.size()));
exists = "yes";
}
upto = Collections.binarySearch(terms, target);
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: iter seekCeil target=" + target.utf8ToString() + " exists=" + exists);
}
// seekCeil
final TermsEnum.SeekStatus status = termsEnum.seekCeil(target);
if (VERBOSE) {
System.out.println(" got " + status);
}
if (upto < 0) {
upto = -(upto + 1);
if (upto >= terms.size()) {
assertEquals(TermsEnum.SeekStatus.END, status);
upto = -1;
} else {
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
assertEquals(terms.get(upto), termsEnum.term());
}
} else {
assertEquals(TermsEnum.SeekStatus.FOUND, status);
assertEquals(terms.get(upto), termsEnum.term());
}
} else {
if (VERBOSE) {
System.out.println("TEST: iter seekExact target=" + target.utf8ToString() + " exists=" + exists);
}
// seekExact
final boolean result = termsEnum.seekExact(target);
if (VERBOSE) {
System.out.println(" got " + result);
}
if (upto < 0) {
assertFalse(result);
upto = -1;
} else {
assertTrue(result);
assertEquals(target, termsEnum.term());
}
}
}
}
r.close();
d.close();
docs.close();
}
Aggregations