Search in sources :

Example 21 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class TestBackwardsCompatibility method testCreateSortedIndex.

// ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
public void testCreateSortedIndex() throws Exception {
    Path indexDir = getIndexDir().resolve("sorted");
    Files.deleteIfExists(indexDir);
    Directory dir = newFSDirectory(indexDir);
    LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
    mp.setNoCFSRatio(1.0);
    mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    // TODO: remove randomness
    IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    conf.setMergePolicy(mp);
    conf.setUseCompoundFile(false);
    conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
    IndexWriter writer = new IndexWriter(dir, conf);
    LineFileDocs docs = new LineFileDocs(random());
    SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
    parser.setTimeZone(TimeZone.getTimeZone("UTC"));
    ParsePosition position = new ParsePosition(0);
    Field dateDVField = null;
    for (int i = 0; i < 50; i++) {
        Document doc = docs.nextDoc();
        String dateString = doc.get("date");
        position.setIndex(0);
        Date date = parser.parse(dateString, position);
        if (position.getErrorIndex() != -1) {
            throw new AssertionError("failed to parse \"" + dateString + "\" as date");
        }
        if (position.getIndex() != dateString.length()) {
            throw new AssertionError("failed to parse \"" + dateString + "\" as date");
        }
        if (dateDVField == null) {
            dateDVField = new NumericDocValuesField("dateDV", 0l);
            doc.add(dateDVField);
        }
        dateDVField.setLongValue(date.getTime());
        if (i == 250) {
            writer.commit();
        }
        writer.addDocument(doc);
    }
    writer.forceMerge(1);
    writer.close();
    dir.close();
}
Also used : Path(java.nio.file.Path) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) Date(java.util.Date) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) SimpleDateFormat(java.text.SimpleDateFormat) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory) LineFileDocs(org.apache.lucene.util.LineFileDocs) ParsePosition(java.text.ParsePosition)

Example 22 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project elasticsearch by elastic.

the class TranslogTests method testWithRandomException.

/**
     * This test adds operations to the translog which might randomly throw an IOException. The only thing this test verifies is
     * that we can, after we hit an exception, open and recover the translog successfully and retrieve all successfully synced operations
     * from the transaction log.
     */
public void testWithRandomException() throws IOException {
    final int runs = randomIntBetween(5, 10);
    for (int run = 0; run < runs; run++) {
        Path tempDir = createTempDir();
        final FailSwitch fail = new FailSwitch();
        fail.failRandomly();
        TranslogConfig config = getTranslogConfig(tempDir);
        final int numOps = randomIntBetween(100, 200);
        List<String> syncedDocs = new ArrayList<>();
        List<String> unsynced = new ArrayList<>();
        if (randomBoolean()) {
            fail.onceFailedFailAlways();
        }
        Translog.TranslogGeneration generation = null;
        try {
            final Translog failableTLog = getFailableTranslog(fail, config, randomBoolean(), false, generation);
            try {
                //writes pretty big docs so we cross buffer boarders regularly
                LineFileDocs lineFileDocs = new LineFileDocs(random());
                for (int opsAdded = 0; opsAdded < numOps; opsAdded++) {
                    String doc = lineFileDocs.nextDoc().toString();
                    failableTLog.add(new Translog.Index("test", "" + opsAdded, doc.getBytes(Charset.forName("UTF-8"))));
                    unsynced.add(doc);
                    if (randomBoolean()) {
                        failableTLog.sync();
                        syncedDocs.addAll(unsynced);
                        unsynced.clear();
                    }
                    if (randomFloat() < 0.1) {
                        // we have to sync here first otherwise we don't know if the sync succeeded if the commit fails
                        failableTLog.sync();
                        syncedDocs.addAll(unsynced);
                        unsynced.clear();
                        if (randomBoolean()) {
                            failableTLog.prepareCommit();
                        }
                        failableTLog.commit();
                        syncedDocs.clear();
                    }
                }
                // we survived all the randomness!!!
                // lets close the translog and if it succeeds we are all synced again. If we don't do this we will close
                // it in the finally block but miss to copy over unsynced docs to syncedDocs and fail the assertion down the road...
                failableTLog.close();
                syncedDocs.addAll(unsynced);
                unsynced.clear();
            } catch (TranslogException | MockDirectoryWrapper.FakeIOException ex) {
            // fair enough
            } catch (IOException ex) {
                assertEquals(ex.getMessage(), "__FAKE__ no space left on device");
            } finally {
                Checkpoint checkpoint = Translog.readCheckpoint(config.getTranslogPath());
                if (checkpoint.numOps == unsynced.size() + syncedDocs.size()) {
                    // failed in fsync but got fully written
                    syncedDocs.addAll(unsynced);
                    unsynced.clear();
                }
                generation = failableTLog.getGeneration();
                IOUtils.closeWhileHandlingException(failableTLog);
            }
        } catch (TranslogException | MockDirectoryWrapper.FakeIOException ex) {
        // failed - that's ok, we didn't even create it
        } catch (IOException ex) {
            assertEquals(ex.getMessage(), "__FAKE__ no space left on device");
        }
        // now randomly open this failing tlog again just to make sure we can also recover from failing during recovery
        if (randomBoolean()) {
            try {
                IOUtils.close(getFailableTranslog(fail, config, randomBoolean(), false, generation));
            } catch (TranslogException | MockDirectoryWrapper.FakeIOException ex) {
            // failed - that's ok, we didn't even create it
            } catch (IOException ex) {
                assertEquals(ex.getMessage(), "__FAKE__ no space left on device");
            }
        }
        // we don't wanna fail here but we might since we write a new checkpoint and create a new tlog file
        fail.failNever();
        try (Translog translog = new Translog(config, generation, () -> SequenceNumbersService.UNASSIGNED_SEQ_NO)) {
            Translog.Snapshot snapshot = translog.newSnapshot();
            assertEquals(syncedDocs.size(), snapshot.totalOperations());
            for (int i = 0; i < syncedDocs.size(); i++) {
                Translog.Operation next = snapshot.next();
                assertEquals(syncedDocs.get(i), next.getSource().source.utf8ToString());
                assertNotNull("operation " + i + " must be non-null", next);
            }
        }
    }
}
Also used : Path(java.nio.file.Path) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) Matchers.hasToString(org.hamcrest.Matchers.hasToString) Matchers.containsString(org.hamcrest.Matchers.containsString) IOException(java.io.IOException) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 23 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project elasticsearch by elastic.

the class TranslogTests method testTragicEventCanBeAnyException.

public void testTragicEventCanBeAnyException() throws IOException {
    Path tempDir = createTempDir();
    final FailSwitch fail = new FailSwitch();
    TranslogConfig config = getTranslogConfig(tempDir);
    Translog translog = getFailableTranslog(fail, config, false, true, null);
    // writes pretty big docs so we cross buffer boarders regularly
    LineFileDocs lineFileDocs = new LineFileDocs(random());
    translog.add(new Translog.Index("test", "1", lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8"))));
    fail.failAlways();
    try {
        Translog.Location location = translog.add(new Translog.Index("test", "2", lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8"))));
        if (randomBoolean()) {
            translog.ensureSynced(location);
        } else {
            translog.sync();
        }
        //TODO once we have a mock FS that can simulate we can also fail on plain sync
        fail("WTF");
    } catch (UnknownException ex) {
    // w00t
    } catch (TranslogException ex) {
        assertTrue(ex.getCause() instanceof UnknownException);
    }
    assertFalse(translog.isOpen());
    assertTrue(translog.getTragicException() instanceof UnknownException);
}
Also used : Path(java.nio.file.Path) Location(org.elasticsearch.index.translog.Translog.Location) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 24 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class TestCustomNorms method testFloatNorms.

public void testFloatNorms() throws IOException {
    Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig config = newIndexWriterConfig(analyzer);
    Similarity provider = new MySimProvider();
    config.setSimilarity(provider);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
    final LineFileDocs docs = new LineFileDocs(random());
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
        Document doc = docs.nextDoc();
        int boost = TestUtil.nextInt(random(), 1, 10);
        String value = IntStream.range(0, boost).mapToObj(k -> Integer.toString(boost)).collect(Collectors.joining(" "));
        Field f = new TextField(FLOAT_TEST_FIELD, value, Field.Store.YES);
        doc.add(f);
        writer.addDocument(doc);
        doc.removeField(FLOAT_TEST_FIELD);
        if (rarely()) {
            writer.commit();
        }
    }
    writer.commit();
    writer.close();
    DirectoryReader open = DirectoryReader.open(dir);
    NumericDocValues norms = MultiDocValues.getNormValues(open, FLOAT_TEST_FIELD);
    assertNotNull(norms);
    for (int i = 0; i < open.maxDoc(); i++) {
        Document document = open.document(i);
        int expected = Integer.parseInt(document.get(FLOAT_TEST_FIELD).split(" ")[0]);
        assertEquals(i, norms.nextDoc());
        assertEquals(expected, norms.longValue());
    }
    open.close();
    dir.close();
    docs.close();
}
Also used : IntStream(java.util.stream.IntStream) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) TestUtil(org.apache.lucene.util.TestUtil) IOException(java.io.IOException) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper) Collectors(java.util.stream.Collectors) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) LineFileDocs(org.apache.lucene.util.LineFileDocs) Field(org.apache.lucene.document.Field) Similarity(org.apache.lucene.search.similarities.Similarity) Directory(org.apache.lucene.store.Directory) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) TextField(org.apache.lucene.document.TextField) TermStatistics(org.apache.lucene.search.TermStatistics) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) Similarity(org.apache.lucene.search.similarities.Similarity) Document(org.apache.lucene.document.Document) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 25 with LineFileDocs

use of org.apache.lucene.util.LineFileDocs in project lucene-solr by apache.

the class TestTermsEnum method test.

public void test() throws Exception {
    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random);
    final Directory d = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final RandomIndexWriter w = new RandomIndexWriter(random(), d, analyzer);
    final int numDocs = atLeast(10);
    for (int docCount = 0; docCount < numDocs; docCount++) {
        w.addDocument(docs.nextDoc());
    }
    final IndexReader r = w.getReader();
    w.close();
    final List<BytesRef> terms = new ArrayList<>();
    final TermsEnum termsEnum = MultiFields.getTerms(r, "body").iterator();
    BytesRef term;
    while ((term = termsEnum.next()) != null) {
        terms.add(BytesRef.deepCopyOf(term));
    }
    if (VERBOSE) {
        System.out.println("TEST: " + terms.size() + " terms");
    }
    int upto = -1;
    final int iters = atLeast(200);
    for (int iter = 0; iter < iters; iter++) {
        final boolean isEnd;
        if (upto != -1 && random().nextBoolean()) {
            // next
            if (VERBOSE) {
                System.out.println("TEST: iter next");
            }
            isEnd = termsEnum.next() == null;
            upto++;
            if (isEnd) {
                if (VERBOSE) {
                    System.out.println("  end");
                }
                assertEquals(upto, terms.size());
                upto = -1;
            } else {
                if (VERBOSE) {
                    System.out.println("  got term=" + termsEnum.term().utf8ToString() + " expected=" + terms.get(upto).utf8ToString());
                }
                assertTrue(upto < terms.size());
                assertEquals(terms.get(upto), termsEnum.term());
            }
        } else {
            final BytesRef target;
            final String exists;
            if (random().nextBoolean()) {
                // likely fake term
                if (random().nextBoolean()) {
                    target = new BytesRef(TestUtil.randomSimpleString(random()));
                } else {
                    target = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
                }
                exists = "likely not";
            } else {
                // real term
                target = terms.get(random().nextInt(terms.size()));
                exists = "yes";
            }
            upto = Collections.binarySearch(terms, target);
            if (random().nextBoolean()) {
                if (VERBOSE) {
                    System.out.println("TEST: iter seekCeil target=" + target.utf8ToString() + " exists=" + exists);
                }
                // seekCeil
                final TermsEnum.SeekStatus status = termsEnum.seekCeil(target);
                if (VERBOSE) {
                    System.out.println("  got " + status);
                }
                if (upto < 0) {
                    upto = -(upto + 1);
                    if (upto >= terms.size()) {
                        assertEquals(TermsEnum.SeekStatus.END, status);
                        upto = -1;
                    } else {
                        assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
                        assertEquals(terms.get(upto), termsEnum.term());
                    }
                } else {
                    assertEquals(TermsEnum.SeekStatus.FOUND, status);
                    assertEquals(terms.get(upto), termsEnum.term());
                }
            } else {
                if (VERBOSE) {
                    System.out.println("TEST: iter seekExact target=" + target.utf8ToString() + " exists=" + exists);
                }
                // seekExact
                final boolean result = termsEnum.seekExact(target);
                if (VERBOSE) {
                    System.out.println("  got " + result);
                }
                if (upto < 0) {
                    assertFalse(result);
                    upto = -1;
                } else {
                    assertTrue(result);
                    assertEquals(target, termsEnum.term());
                }
            }
        }
    }
    r.close();
    d.close();
    docs.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef) LineFileDocs(org.apache.lucene.util.LineFileDocs) Directory(org.apache.lucene.store.Directory)

Aggregations

LineFileDocs (org.apache.lucene.util.LineFileDocs)45 Document (org.apache.lucene.document.Document)27 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)24 Directory (org.apache.lucene.store.Directory)17 Path (java.nio.file.Path)16 IOException (java.io.IOException)9 BytesRef (org.apache.lucene.util.BytesRef)8 Random (java.util.Random)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 ArrayList (java.util.ArrayList)5 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)5 IntPoint (org.apache.lucene.document.IntPoint)4 RAMDirectory (org.apache.lucene.store.RAMDirectory)4 HashMap (java.util.HashMap)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 Analyzer (org.apache.lucene.analysis.Analyzer)3 DoublePoint (org.apache.lucene.document.DoublePoint)3 Field (org.apache.lucene.document.Field)3 FloatPoint (org.apache.lucene.document.FloatPoint)3 LongPoint (org.apache.lucene.document.LongPoint)3