use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.
the class ColumnSectionWriter method compilerInternals.
protected void compilerInternals() {
tokenizer.setNodeFirstInsertionIndexes();
tokenizer.appendNodes(nonLeaves, true, false);
tokenizer.appendNodes(leaves, false, true);
allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
allNodes.addAll(nonLeaves);
allNodes.addAll(leaves);
columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
for (int i = 0; i < allNodes.size(); ++i) {
TokenizerNode node = allNodes.get(i);
columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType));
}
// leaf widths are known at this point, so add them up
int totalBytesWithoutOffsets = 0;
for (int i = allNodes.size() - 1; i >= 0; --i) {
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
// leaves store all but their first token byte
totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
}
// figure out how wide our offset FInts are
int parentOffsetWidth = 0;
while (true) {
++parentOffsetWidth;
int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
numBytes = numBytesFinder;
break;
}
// it fits
}
if (this.nodeType == ColumnNodeType.FAMILY) {
blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
} else {
blockMeta.setTagsOffsetWidth(parentOffsetWidth);
}
int forwardIndex = 0;
for (int i = 0; i < allNodes.size(); ++i) {
TokenizerNode node = allNodes.get(i);
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
int fullNodeWidth = columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
node.setOutputArrayOffset(forwardIndex);
columnNodeWriter.setTokenBytes(node.getToken());
if (node.isRoot()) {
columnNodeWriter.setParentStartPosition(0);
} else {
columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
}
forwardIndex += fullNodeWidth;
}
tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
}
use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.
the class RowNodeWriter method writeFan.
/**
* UVInt: numFanBytes/fanOut
* bytes: each fan byte
*/
public void writeFan(OutputStream os) throws IOException {
UVIntTool.writeBytes(fanOut, os);
if (fanOut <= 0) {
return;
}
ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
// first byte of each child's token
os.write(child.getToken().get(0));
}
}
use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.
the class RowSectionWriter method compile.
/****************** methods *******************************/
public RowSectionWriter compile() {
blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
// track the starting position of each node in final output
int negativeIndex = 0;
// create leaf writer nodes
// leaf widths are known at this point, so add them up
int totalLeafBytes = 0;
for (int i = leaves.size() - 1; i >= 0; --i) {
TokenizerNode leaf = leaves.get(i);
RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
++numLeafWriters;
// leaves store all but their first token byte
int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
totalLeafBytes += leafNodeWidth;
negativeIndex += leafNodeWidth;
leaf.setNegativeIndex(negativeIndex);
}
int totalNonLeafBytesWithoutOffsets = 0;
int totalChildPointers = 0;
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
TokenizerNode nonLeaf = nonLeaves.get(i);
RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
++numNonLeafWriters;
totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
totalChildPointers += nonLeaf.getNumChildren();
}
// figure out how wide our offset FInts are
int offsetWidth = 0;
while (true) {
++offsetWidth;
int offsetBytes = totalChildPointers * offsetWidth;
int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
// it fits
numBytes = totalRowBytes;
break;
}
}
blockMeta.setNextNodeOffsetWidth(offsetWidth);
// populate negativeIndexes
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
TokenizerNode nonLeaf = nonLeaves.get(i);
int writerIndex = nonLeaves.size() - i - 1;
RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
int nodeWidth = nonLeafWriter.calculateWidth();
negativeIndex += nodeWidth;
nonLeaf.setNegativeIndex(negativeIndex);
}
return this;
}
use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.
the class TestTokenizer method testSearching.
@Test
public void testSearching() {
for (byte[] input : inputs) {
TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult();
builder.getNode(resultHolder, input, 0, input.length);
TokenizerNode n = resultHolder.getMatchingNode();
byte[] output = n.getNewByteArray();
Assert.assertTrue(Bytes.equals(input, output));
}
}
use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.
the class TestColumnBuilder method testReaderRoundTrip.
/************* methods ********************************/
@Test
public void testReaderRoundTrip() throws IOException {
for (int i = 0; i < sortedUniqueColumns.size(); ++i) {
ByteRange column = sortedUniqueColumns.get(i);
builder.addSorted(column);
}
List<byte[]> builderOutputArrays = builder.getArrays();
for (int i = 0; i < builderOutputArrays.size(); ++i) {
byte[] inputArray = sortedUniqueColumns.get(i).deepCopyToNewArray();
byte[] outputArray = builderOutputArrays.get(i);
boolean same = Bytes.equals(inputArray, outputArray);
Assert.assertTrue(same);
}
Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size());
writer = new ColumnSectionWriter(blockMeta, builder, ColumnNodeType.QUALIFIER);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
writer.compile().writeBytes(baos);
bytes = baos.toByteArray();
buffer = new byte[blockMeta.getMaxQualifierLength()];
reader = new ColumnReader(buffer, ColumnNodeType.QUALIFIER);
reader.initOnBlock(blockMeta, new SingleByteBuff(ByteBuffer.wrap(bytes)));
List<TokenizerNode> builderNodes = Lists.newArrayList();
builder.appendNodes(builderNodes, true, true);
int i = 0;
for (TokenizerNode builderNode : builderNodes) {
if (!builderNode.hasOccurrences()) {
continue;
}
// we de-duped before adding to
Assert.assertEquals(1, builderNode.getNumOccurrences());
// builder
int position = builderNode.getOutputArrayOffset();
byte[] output = reader.populateBuffer(position).copyBufferToNewArray();
boolean same = Bytes.equals(sortedUniqueColumns.get(i).deepCopyToNewArray(), output);
Assert.assertTrue(same);
++i;
}
}
Aggregations