Search in sources :

Example 11 with ByteArrayDataOutput

use of org.apache.lucene.store.ByteArrayDataOutput in project elasticsearch by elastic.

the class TranslogTests method testTranslogWriter.

public void testTranslogWriter() throws IOException {
    final TranslogWriter writer = translog.createWriter(0);
    final int numOps = randomIntBetween(8, 128);
    byte[] bytes = new byte[4];
    ByteArrayDataOutput out = new ByteArrayDataOutput(bytes);
    final Set<Long> seenSeqNos = new HashSet<>();
    boolean opsHaveValidSequenceNumbers = randomBoolean();
    for (int i = 0; i < numOps; i++) {
        out.reset(bytes);
        out.writeInt(i);
        long seqNo;
        do {
            seqNo = opsHaveValidSequenceNumbers ? randomNonNegativeLong() : SequenceNumbersService.UNASSIGNED_SEQ_NO;
            opsHaveValidSequenceNumbers = opsHaveValidSequenceNumbers || !rarely();
        } while (seenSeqNos.contains(seqNo));
        if (seqNo != SequenceNumbersService.UNASSIGNED_SEQ_NO) {
            seenSeqNos.add(seqNo);
        }
        writer.add(new BytesArray(bytes), seqNo);
    }
    writer.sync();
    final BaseTranslogReader reader = randomBoolean() ? writer : translog.openReader(writer.path(), Checkpoint.read(translog.location().resolve(Translog.CHECKPOINT_FILE_NAME)));
    for (int i = 0; i < numOps; i++) {
        ByteBuffer buffer = ByteBuffer.allocate(4);
        reader.readBytes(buffer, reader.getFirstOperationOffset() + 4 * i);
        buffer.flip();
        final int value = buffer.getInt();
        assertEquals(i, value);
    }
    final long minSeqNo = seenSeqNos.stream().min(Long::compareTo).orElse(SequenceNumbersService.NO_OPS_PERFORMED);
    final long maxSeqNo = seenSeqNos.stream().max(Long::compareTo).orElse(SequenceNumbersService.NO_OPS_PERFORMED);
    assertThat(reader.getCheckpoint().minSeqNo, equalTo(minSeqNo));
    assertThat(reader.getCheckpoint().maxSeqNo, equalTo(maxSeqNo));
    out.reset(bytes);
    out.writeInt(2048);
    writer.add(new BytesArray(bytes), randomNonNegativeLong());
    if (reader instanceof TranslogReader) {
        ByteBuffer buffer = ByteBuffer.allocate(4);
        try {
            reader.readBytes(buffer, reader.getFirstOperationOffset() + 4 * numOps);
            fail("read past EOF?");
        } catch (EOFException ex) {
        // expected
        }
        ((TranslogReader) reader).close();
    } else {
        // live reader!
        ByteBuffer buffer = ByteBuffer.allocate(4);
        final long pos = reader.getFirstOperationOffset() + 4 * numOps;
        reader.readBytes(buffer, pos);
        buffer.flip();
        final int value = buffer.getInt();
        assertEquals(2048, value);
    }
    IOUtils.close(writer);
}
Also used : BytesArray(org.elasticsearch.common.bytes.BytesArray) ByteBuffer(java.nio.ByteBuffer) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) AtomicLong(java.util.concurrent.atomic.AtomicLong) EOFException(java.io.EOFException) HashSet(java.util.HashSet)

Example 12 with ByteArrayDataOutput

use of org.apache.lucene.store.ByteArrayDataOutput in project lucene-solr by apache.

the class TestCompressingStoredFieldsFormat method testZDouble.

public void testZDouble() throws Exception {
    // we never need more than 9 bytes
    byte[] buffer = new byte[9];
    ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
    ByteArrayDataInput in = new ByteArrayDataInput(buffer);
    // round-trip small integer values
    for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
        double x = (double) i;
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertTrue(in.eof());
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
        // check that compression actually works
        if (i >= -1 && i <= 124) {
            // single byte compression
            assertEquals(1, out.getPosition());
        }
        out.reset(buffer);
    }
    // round-trip special values
    double[] special = { -0.0d, +0.0d, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.MIN_VALUE, Double.MAX_VALUE, Double.NaN };
    for (double x : special) {
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertTrue(in.eof());
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
        out.reset(buffer);
    }
    // round-trip random values
    Random r = random();
    for (int i = 0; i < 100000; i++) {
        double x = r.nextDouble() * (random().nextInt(100) - 50);
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= (x < 0 ? 9 : 8));
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertTrue(in.eof());
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
        out.reset(buffer);
    }
    // same with floats
    for (int i = 0; i < 100000; i++) {
        double x = (double) (r.nextFloat() * (random().nextInt(100) - 50));
        CompressingStoredFieldsWriter.writeZDouble(out, x);
        assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= 5);
        in.reset(buffer, 0, out.getPosition());
        double y = CompressingStoredFieldsReader.readZDouble(in);
        assertTrue(in.eof());
        assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
        out.reset(buffer);
    }
}
Also used : Random(java.util.Random) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) IntPoint(org.apache.lucene.document.IntPoint)

Example 13 with ByteArrayDataOutput

use of org.apache.lucene.store.ByteArrayDataOutput in project lucene-solr by apache.

the class TestCompressingStoredFieldsFormat method testTLong.

public void testTLong() throws Exception {
    // we never need more than 10 bytes
    byte[] buffer = new byte[10];
    ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
    ByteArrayDataInput in = new ByteArrayDataInput(buffer);
    // round-trip small integer values
    for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
        for (long mul : new long[] { SECOND, HOUR, DAY }) {
            long l1 = (long) i * mul;
            CompressingStoredFieldsWriter.writeTLong(out, l1);
            in.reset(buffer, 0, out.getPosition());
            long l2 = CompressingStoredFieldsReader.readTLong(in);
            assertTrue(in.eof());
            assertEquals(l1, l2);
            // check that compression actually works
            if (i >= -16 && i <= 15) {
                // single byte compression
                assertEquals(1, out.getPosition());
            }
            out.reset(buffer);
        }
    }
    // round-trip random values
    Random r = random();
    for (int i = 0; i < 100000; i++) {
        final int numBits = r.nextInt(65);
        long l1 = r.nextLong() & ((1L << numBits) - 1);
        switch(r.nextInt(4)) {
            case 0:
                l1 *= SECOND;
                break;
            case 1:
                l1 *= HOUR;
                break;
            case 2:
                l1 *= DAY;
                break;
            default:
                break;
        }
        CompressingStoredFieldsWriter.writeTLong(out, l1);
        in.reset(buffer, 0, out.getPosition());
        long l2 = CompressingStoredFieldsReader.readTLong(in);
        assertTrue(in.eof());
        assertEquals(l1, l2);
        out.reset(buffer);
    }
}
Also used : Random(java.util.Random) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) IntPoint(org.apache.lucene.document.IntPoint)

Example 14 with ByteArrayDataOutput

use of org.apache.lucene.store.ByteArrayDataOutput in project lucene-solr by apache.

the class Test2BBinaryDocValues method testVariableBinary.

// indexes IndexWriter.MAX_DOCS docs with a variable binary field
public void testVariableBinary() throws Exception {
    BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BVariableBinary"));
    if (dir instanceof MockDirectoryWrapper) {
        ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
    Document doc = new Document();
    byte[] bytes = new byte[4];
    ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes);
    BytesRef data = new BytesRef(bytes);
    BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
    doc.add(dvField);
    for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
        encoder.reset(bytes);
        // 1, 2, or 3 bytes
        encoder.writeVInt(i % 65535);
        data.length = encoder.getPosition();
        w.addDocument(doc);
        if (i % 100000 == 0) {
            System.out.println("indexed: " + i);
            System.out.flush();
        }
    }
    w.forceMerge(1);
    w.close();
    System.out.println("verifying...");
    System.out.flush();
    DirectoryReader r = DirectoryReader.open(dir);
    int expectedValue = 0;
    ByteArrayDataInput input = new ByteArrayDataInput();
    for (LeafReaderContext context : r.leaves()) {
        LeafReader reader = context.reader();
        BinaryDocValues dv = reader.getBinaryDocValues("dv");
        for (int i = 0; i < reader.maxDoc(); i++) {
            assertEquals(i, dv.nextDoc());
            final BytesRef term = dv.binaryValue();
            input.reset(term.bytes, term.offset, term.length);
            assertEquals(expectedValue % 65535, input.readVInt());
            assertTrue(input.eof());
            expectedValue++;
        }
    }
    r.close();
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) Document(org.apache.lucene.document.Document) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper) BytesRef(org.apache.lucene.util.BytesRef)

Example 15 with ByteArrayDataOutput

use of org.apache.lucene.store.ByteArrayDataOutput in project lucene-solr by apache.

the class Dictionary method parseAffix.

/**
   * Parses a specific affix rule putting the result into the provided affix map
   * 
   * @param affixes Map where the result of the parsing will be put
   * @param header Header line of the affix rule
   * @param reader BufferedReader to read the content of the rule from
   * @param conditionPattern {@link String#format(String, Object...)} pattern to be used to generate the condition regex
   *                         pattern
   * @param seenPatterns map from condition -&gt; index of patterns, for deduplication.
   * @throws IOException Can be thrown while reading the rule
   */
private void parseAffix(TreeMap<String, List<Integer>> affixes, String header, LineNumberReader reader, String conditionPattern, Map<String, Integer> seenPatterns, Map<String, Integer> seenStrips) throws IOException, ParseException {
    BytesRefBuilder scratch = new BytesRefBuilder();
    StringBuilder sb = new StringBuilder();
    String[] args = header.split("\\s+");
    boolean crossProduct = args[2].equals("Y");
    boolean isSuffix = conditionPattern == SUFFIX_CONDITION_REGEX_PATTERN;
    int numLines = Integer.parseInt(args[3]);
    affixData = ArrayUtil.grow(affixData, (currentAffix << 3) + (numLines << 3));
    ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
    for (int i = 0; i < numLines; i++) {
        assert affixWriter.getPosition() == currentAffix << 3;
        String line = reader.readLine();
        String[] ruleArgs = line.split("\\s+");
        // condition is optional
        if (ruleArgs.length < 4) {
            throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.getLineNumber());
        }
        char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
        String strip = ruleArgs[2].equals("0") ? "" : ruleArgs[2];
        String affixArg = ruleArgs[3];
        char[] appendFlags = null;
        // first: parse continuation classes out of affix
        int flagSep = affixArg.lastIndexOf('/');
        if (flagSep != -1) {
            String flagPart = affixArg.substring(flagSep + 1);
            affixArg = affixArg.substring(0, flagSep);
            if (aliasCount > 0) {
                flagPart = getAliasValue(Integer.parseInt(flagPart));
            }
            appendFlags = flagParsingStrategy.parseFlags(flagPart);
            Arrays.sort(appendFlags);
            twoStageAffix = true;
        }
        // zero affix -> empty string
        if ("0".equals(affixArg)) {
            affixArg = "";
        }
        String condition = ruleArgs.length > 4 ? ruleArgs[4] : ".";
        // at least the gascon affix file has this issue
        if (condition.startsWith("[") && condition.indexOf(']') == -1) {
            condition = condition + "]";
        }
        // "dash hasn't got special meaning" (we must escape it)
        if (condition.indexOf('-') >= 0) {
            condition = escapeDash(condition);
        }
        final String regex;
        if (".".equals(condition)) {
            // Zero condition is indicated by dot
            regex = ".*";
        } else if (condition.equals(strip)) {
            // TODO: optimize this better:
            regex = ".*";
        // if we remove 'strip' from condition, we don't have to append 'strip' to check it...!
        // but this is complicated...
        } else {
            regex = String.format(Locale.ROOT, conditionPattern, condition);
        }
        // deduplicate patterns
        Integer patternIndex = seenPatterns.get(regex);
        if (patternIndex == null) {
            patternIndex = patterns.size();
            if (patternIndex > Short.MAX_VALUE) {
                throw new UnsupportedOperationException("Too many patterns, please report this to dev@lucene.apache.org");
            }
            seenPatterns.put(regex, patternIndex);
            CharacterRunAutomaton pattern = new CharacterRunAutomaton(new RegExp(regex, RegExp.NONE).toAutomaton());
            patterns.add(pattern);
        }
        Integer stripOrd = seenStrips.get(strip);
        if (stripOrd == null) {
            stripOrd = seenStrips.size();
            seenStrips.put(strip, stripOrd);
            if (stripOrd > Character.MAX_VALUE) {
                throw new UnsupportedOperationException("Too many unique strips, please report this to dev@lucene.apache.org");
            }
        }
        if (appendFlags == null) {
            appendFlags = NOFLAGS;
        }
        encodeFlags(scratch, appendFlags);
        int appendFlagsOrd = flagLookup.add(scratch.get());
        if (appendFlagsOrd < 0) {
            // already exists in our hash
            appendFlagsOrd = (-appendFlagsOrd) - 1;
        } else if (appendFlagsOrd > Short.MAX_VALUE) {
            // this limit is probably flexible, but it's a good sanity check too
            throw new UnsupportedOperationException("Too many unique append flags, please report this to dev@lucene.apache.org");
        }
        affixWriter.writeShort((short) flag);
        affixWriter.writeShort((short) stripOrd.intValue());
        // encode crossProduct into patternIndex
        int patternOrd = patternIndex.intValue() << 1 | (crossProduct ? 1 : 0);
        affixWriter.writeShort((short) patternOrd);
        affixWriter.writeShort((short) appendFlagsOrd);
        if (needsInputCleaning) {
            CharSequence cleaned = cleanInput(affixArg, sb);
            affixArg = cleaned.toString();
        }
        if (isSuffix) {
            affixArg = new StringBuilder(affixArg).reverse().toString();
        }
        List<Integer> list = affixes.get(affixArg);
        if (list == null) {
            list = new ArrayList<>();
            affixes.put(affixArg, list);
        }
        list.add(currentAffix);
        currentAffix++;
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) RegExp(org.apache.lucene.util.automaton.RegExp) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) ParseException(java.text.ParseException)

Aggregations

ByteArrayDataOutput (org.apache.lucene.store.ByteArrayDataOutput)15 ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)8 BytesRef (org.apache.lucene.util.BytesRef)5 Random (java.util.Random)4 OfflineSorter (org.apache.lucene.util.OfflineSorter)4 HashSet (java.util.HashSet)3 IntPoint (org.apache.lucene.document.IntPoint)3 IndexOutput (org.apache.lucene.store.IndexOutput)3 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)3 ByteBuffer (java.nio.ByteBuffer)2 TokenStreamToAutomaton (org.apache.lucene.analysis.TokenStreamToAutomaton)2 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)2 IntsRef (org.apache.lucene.util.IntsRef)2 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)2 LimitedFiniteStringsIterator (org.apache.lucene.util.automaton.LimitedFiniteStringsIterator)2 Builder (org.apache.lucene.util.fst.Builder)2 PairOutputs (org.apache.lucene.util.fst.PairOutputs)2 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)2 BytesArray (org.elasticsearch.common.bytes.BytesArray)2 EOFException (java.io.EOFException)1