use of org.apache.lucene.store.ByteArrayDataOutput in project elasticsearch by elastic.
the class TranslogTests method testTranslogWriter.
public void testTranslogWriter() throws IOException {
final TranslogWriter writer = translog.createWriter(0);
final int numOps = randomIntBetween(8, 128);
byte[] bytes = new byte[4];
ByteArrayDataOutput out = new ByteArrayDataOutput(bytes);
final Set<Long> seenSeqNos = new HashSet<>();
boolean opsHaveValidSequenceNumbers = randomBoolean();
for (int i = 0; i < numOps; i++) {
out.reset(bytes);
out.writeInt(i);
long seqNo;
do {
seqNo = opsHaveValidSequenceNumbers ? randomNonNegativeLong() : SequenceNumbersService.UNASSIGNED_SEQ_NO;
opsHaveValidSequenceNumbers = opsHaveValidSequenceNumbers || !rarely();
} while (seenSeqNos.contains(seqNo));
if (seqNo != SequenceNumbersService.UNASSIGNED_SEQ_NO) {
seenSeqNos.add(seqNo);
}
writer.add(new BytesArray(bytes), seqNo);
}
writer.sync();
final BaseTranslogReader reader = randomBoolean() ? writer : translog.openReader(writer.path(), Checkpoint.read(translog.location().resolve(Translog.CHECKPOINT_FILE_NAME)));
for (int i = 0; i < numOps; i++) {
ByteBuffer buffer = ByteBuffer.allocate(4);
reader.readBytes(buffer, reader.getFirstOperationOffset() + 4 * i);
buffer.flip();
final int value = buffer.getInt();
assertEquals(i, value);
}
final long minSeqNo = seenSeqNos.stream().min(Long::compareTo).orElse(SequenceNumbersService.NO_OPS_PERFORMED);
final long maxSeqNo = seenSeqNos.stream().max(Long::compareTo).orElse(SequenceNumbersService.NO_OPS_PERFORMED);
assertThat(reader.getCheckpoint().minSeqNo, equalTo(minSeqNo));
assertThat(reader.getCheckpoint().maxSeqNo, equalTo(maxSeqNo));
out.reset(bytes);
out.writeInt(2048);
writer.add(new BytesArray(bytes), randomNonNegativeLong());
if (reader instanceof TranslogReader) {
ByteBuffer buffer = ByteBuffer.allocate(4);
try {
reader.readBytes(buffer, reader.getFirstOperationOffset() + 4 * numOps);
fail("read past EOF?");
} catch (EOFException ex) {
// expected
}
((TranslogReader) reader).close();
} else {
// live reader!
ByteBuffer buffer = ByteBuffer.allocate(4);
final long pos = reader.getFirstOperationOffset() + 4 * numOps;
reader.readBytes(buffer, pos);
buffer.flip();
final int value = buffer.getInt();
assertEquals(2048, value);
}
IOUtils.close(writer);
}
use of org.apache.lucene.store.ByteArrayDataOutput in project lucene-solr by apache.
the class TestCompressingStoredFieldsFormat method testZDouble.
public void testZDouble() throws Exception {
// we never need more than 9 bytes
byte[] buffer = new byte[9];
ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
ByteArrayDataInput in = new ByteArrayDataInput(buffer);
// round-trip small integer values
for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
double x = (double) i;
CompressingStoredFieldsWriter.writeZDouble(out, x);
in.reset(buffer, 0, out.getPosition());
double y = CompressingStoredFieldsReader.readZDouble(in);
assertTrue(in.eof());
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
// check that compression actually works
if (i >= -1 && i <= 124) {
// single byte compression
assertEquals(1, out.getPosition());
}
out.reset(buffer);
}
// round-trip special values
double[] special = { -0.0d, +0.0d, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.MIN_VALUE, Double.MAX_VALUE, Double.NaN };
for (double x : special) {
CompressingStoredFieldsWriter.writeZDouble(out, x);
in.reset(buffer, 0, out.getPosition());
double y = CompressingStoredFieldsReader.readZDouble(in);
assertTrue(in.eof());
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
out.reset(buffer);
}
// round-trip random values
Random r = random();
for (int i = 0; i < 100000; i++) {
double x = r.nextDouble() * (random().nextInt(100) - 50);
CompressingStoredFieldsWriter.writeZDouble(out, x);
assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= (x < 0 ? 9 : 8));
in.reset(buffer, 0, out.getPosition());
double y = CompressingStoredFieldsReader.readZDouble(in);
assertTrue(in.eof());
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
out.reset(buffer);
}
// same with floats
for (int i = 0; i < 100000; i++) {
double x = (double) (r.nextFloat() * (random().nextInt(100) - 50));
CompressingStoredFieldsWriter.writeZDouble(out, x);
assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= 5);
in.reset(buffer, 0, out.getPosition());
double y = CompressingStoredFieldsReader.readZDouble(in);
assertTrue(in.eof());
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
out.reset(buffer);
}
}
use of org.apache.lucene.store.ByteArrayDataOutput in project lucene-solr by apache.
the class TestCompressingStoredFieldsFormat method testTLong.
public void testTLong() throws Exception {
// we never need more than 10 bytes
byte[] buffer = new byte[10];
ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
ByteArrayDataInput in = new ByteArrayDataInput(buffer);
// round-trip small integer values
for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
for (long mul : new long[] { SECOND, HOUR, DAY }) {
long l1 = (long) i * mul;
CompressingStoredFieldsWriter.writeTLong(out, l1);
in.reset(buffer, 0, out.getPosition());
long l2 = CompressingStoredFieldsReader.readTLong(in);
assertTrue(in.eof());
assertEquals(l1, l2);
// check that compression actually works
if (i >= -16 && i <= 15) {
// single byte compression
assertEquals(1, out.getPosition());
}
out.reset(buffer);
}
}
// round-trip random values
Random r = random();
for (int i = 0; i < 100000; i++) {
final int numBits = r.nextInt(65);
long l1 = r.nextLong() & ((1L << numBits) - 1);
switch(r.nextInt(4)) {
case 0:
l1 *= SECOND;
break;
case 1:
l1 *= HOUR;
break;
case 2:
l1 *= DAY;
break;
default:
break;
}
CompressingStoredFieldsWriter.writeTLong(out, l1);
in.reset(buffer, 0, out.getPosition());
long l2 = CompressingStoredFieldsReader.readTLong(in);
assertTrue(in.eof());
assertEquals(l1, l2);
out.reset(buffer);
}
}
use of org.apache.lucene.store.ByteArrayDataOutput in project lucene-solr by apache.
the class Test2BBinaryDocValues method testVariableBinary.
// indexes IndexWriter.MAX_DOCS docs with a variable binary field
public void testVariableBinary() throws Exception {
BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BVariableBinary"));
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
}
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
Document doc = new Document();
byte[] bytes = new byte[4];
ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes);
BytesRef data = new BytesRef(bytes);
BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
doc.add(dvField);
for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
encoder.reset(bytes);
// 1, 2, or 3 bytes
encoder.writeVInt(i % 65535);
data.length = encoder.getPosition();
w.addDocument(doc);
if (i % 100000 == 0) {
System.out.println("indexed: " + i);
System.out.flush();
}
}
w.forceMerge(1);
w.close();
System.out.println("verifying...");
System.out.flush();
DirectoryReader r = DirectoryReader.open(dir);
int expectedValue = 0;
ByteArrayDataInput input = new ByteArrayDataInput();
for (LeafReaderContext context : r.leaves()) {
LeafReader reader = context.reader();
BinaryDocValues dv = reader.getBinaryDocValues("dv");
for (int i = 0; i < reader.maxDoc(); i++) {
assertEquals(i, dv.nextDoc());
final BytesRef term = dv.binaryValue();
input.reset(term.bytes, term.offset, term.length);
assertEquals(expectedValue % 65535, input.readVInt());
assertTrue(input.eof());
expectedValue++;
}
}
r.close();
dir.close();
}
use of org.apache.lucene.store.ByteArrayDataOutput in project lucene-solr by apache.
the class Dictionary method parseAffix.
/**
* Parses a specific affix rule putting the result into the provided affix map
*
* @param affixes Map where the result of the parsing will be put
* @param header Header line of the affix rule
* @param reader BufferedReader to read the content of the rule from
* @param conditionPattern {@link String#format(String, Object...)} pattern to be used to generate the condition regex
* pattern
* @param seenPatterns map from condition -> index of patterns, for deduplication.
* @throws IOException Can be thrown while reading the rule
*/
private void parseAffix(TreeMap<String, List<Integer>> affixes, String header, LineNumberReader reader, String conditionPattern, Map<String, Integer> seenPatterns, Map<String, Integer> seenStrips) throws IOException, ParseException {
BytesRefBuilder scratch = new BytesRefBuilder();
StringBuilder sb = new StringBuilder();
String[] args = header.split("\\s+");
boolean crossProduct = args[2].equals("Y");
boolean isSuffix = conditionPattern == SUFFIX_CONDITION_REGEX_PATTERN;
int numLines = Integer.parseInt(args[3]);
affixData = ArrayUtil.grow(affixData, (currentAffix << 3) + (numLines << 3));
ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
for (int i = 0; i < numLines; i++) {
assert affixWriter.getPosition() == currentAffix << 3;
String line = reader.readLine();
String[] ruleArgs = line.split("\\s+");
// condition is optional
if (ruleArgs.length < 4) {
throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.getLineNumber());
}
char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
String strip = ruleArgs[2].equals("0") ? "" : ruleArgs[2];
String affixArg = ruleArgs[3];
char[] appendFlags = null;
// first: parse continuation classes out of affix
int flagSep = affixArg.lastIndexOf('/');
if (flagSep != -1) {
String flagPart = affixArg.substring(flagSep + 1);
affixArg = affixArg.substring(0, flagSep);
if (aliasCount > 0) {
flagPart = getAliasValue(Integer.parseInt(flagPart));
}
appendFlags = flagParsingStrategy.parseFlags(flagPart);
Arrays.sort(appendFlags);
twoStageAffix = true;
}
// zero affix -> empty string
if ("0".equals(affixArg)) {
affixArg = "";
}
String condition = ruleArgs.length > 4 ? ruleArgs[4] : ".";
// at least the gascon affix file has this issue
if (condition.startsWith("[") && condition.indexOf(']') == -1) {
condition = condition + "]";
}
// "dash hasn't got special meaning" (we must escape it)
if (condition.indexOf('-') >= 0) {
condition = escapeDash(condition);
}
final String regex;
if (".".equals(condition)) {
// Zero condition is indicated by dot
regex = ".*";
} else if (condition.equals(strip)) {
// TODO: optimize this better:
regex = ".*";
// if we remove 'strip' from condition, we don't have to append 'strip' to check it...!
// but this is complicated...
} else {
regex = String.format(Locale.ROOT, conditionPattern, condition);
}
// deduplicate patterns
Integer patternIndex = seenPatterns.get(regex);
if (patternIndex == null) {
patternIndex = patterns.size();
if (patternIndex > Short.MAX_VALUE) {
throw new UnsupportedOperationException("Too many patterns, please report this to dev@lucene.apache.org");
}
seenPatterns.put(regex, patternIndex);
CharacterRunAutomaton pattern = new CharacterRunAutomaton(new RegExp(regex, RegExp.NONE).toAutomaton());
patterns.add(pattern);
}
Integer stripOrd = seenStrips.get(strip);
if (stripOrd == null) {
stripOrd = seenStrips.size();
seenStrips.put(strip, stripOrd);
if (stripOrd > Character.MAX_VALUE) {
throw new UnsupportedOperationException("Too many unique strips, please report this to dev@lucene.apache.org");
}
}
if (appendFlags == null) {
appendFlags = NOFLAGS;
}
encodeFlags(scratch, appendFlags);
int appendFlagsOrd = flagLookup.add(scratch.get());
if (appendFlagsOrd < 0) {
// already exists in our hash
appendFlagsOrd = (-appendFlagsOrd) - 1;
} else if (appendFlagsOrd > Short.MAX_VALUE) {
// this limit is probably flexible, but it's a good sanity check too
throw new UnsupportedOperationException("Too many unique append flags, please report this to dev@lucene.apache.org");
}
affixWriter.writeShort((short) flag);
affixWriter.writeShort((short) stripOrd.intValue());
// encode crossProduct into patternIndex
int patternOrd = patternIndex.intValue() << 1 | (crossProduct ? 1 : 0);
affixWriter.writeShort((short) patternOrd);
affixWriter.writeShort((short) appendFlagsOrd);
if (needsInputCleaning) {
CharSequence cleaned = cleanInput(affixArg, sb);
affixArg = cleaned.toString();
}
if (isSuffix) {
affixArg = new StringBuilder(affixArg).reverse().toString();
}
List<Integer> list = affixes.get(affixArg);
if (list == null) {
list = new ArrayList<>();
affixes.put(affixArg, list);
}
list.add(currentAffix);
currentAffix++;
}
}
Aggregations