Search in sources :

Example 1 with TokenizerNode

use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.

the class ColumnSectionWriter method compilerInternals.

protected void compilerInternals() {
    tokenizer.setNodeFirstInsertionIndexes();
    tokenizer.appendNodes(nonLeaves, true, false);
    tokenizer.appendNodes(leaves, false, true);
    allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
    allNodes.addAll(nonLeaves);
    allNodes.addAll(leaves);
    columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
    for (int i = 0; i < allNodes.size(); ++i) {
        TokenizerNode node = allNodes.get(i);
        columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType));
    }
    // leaf widths are known at this point, so add them up
    int totalBytesWithoutOffsets = 0;
    for (int i = allNodes.size() - 1; i >= 0; --i) {
        ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
        // leaves store all but their first token byte
        totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
    }
    // figure out how wide our offset FInts are
    int parentOffsetWidth = 0;
    while (true) {
        ++parentOffsetWidth;
        int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
        if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
            numBytes = numBytesFinder;
            break;
        }
    // it fits
    }
    if (this.nodeType == ColumnNodeType.FAMILY) {
        blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
    } else if (this.nodeType == ColumnNodeType.QUALIFIER) {
        blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
    } else {
        blockMeta.setTagsOffsetWidth(parentOffsetWidth);
    }
    int forwardIndex = 0;
    for (int i = 0; i < allNodes.size(); ++i) {
        TokenizerNode node = allNodes.get(i);
        ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
        int fullNodeWidth = columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
        node.setOutputArrayOffset(forwardIndex);
        columnNodeWriter.setTokenBytes(node.getToken());
        if (node.isRoot()) {
            columnNodeWriter.setParentStartPosition(0);
        } else {
            columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
        }
        forwardIndex += fullNodeWidth;
    }
    tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
}
Also used : TokenizerNode(org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode)

Example 2 with TokenizerNode

use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.

the class RowNodeWriter method writeFan.

/**
   * UVInt: numFanBytes/fanOut
   * bytes: each fan byte
   */
public void writeFan(OutputStream os) throws IOException {
    UVIntTool.writeBytes(fanOut, os);
    if (fanOut <= 0) {
        return;
    }
    ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
    for (int i = 0; i < children.size(); ++i) {
        TokenizerNode child = children.get(i);
        // first byte of each child's token
        os.write(child.getToken().get(0));
    }
}
Also used : TokenizerNode(org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode)

Example 3 with TokenizerNode

use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.

the class RowSectionWriter method compile.

/****************** methods *******************************/
public RowSectionWriter compile() {
    blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
    prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
    prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
    prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
    // track the starting position of each node in final output
    int negativeIndex = 0;
    // create leaf writer nodes
    // leaf widths are known at this point, so add them up
    int totalLeafBytes = 0;
    for (int i = leaves.size() - 1; i >= 0; --i) {
        TokenizerNode leaf = leaves.get(i);
        RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
        ++numLeafWriters;
        // leaves store all but their first token byte
        int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
        totalLeafBytes += leafNodeWidth;
        negativeIndex += leafNodeWidth;
        leaf.setNegativeIndex(negativeIndex);
    }
    int totalNonLeafBytesWithoutOffsets = 0;
    int totalChildPointers = 0;
    for (int i = nonLeaves.size() - 1; i >= 0; --i) {
        TokenizerNode nonLeaf = nonLeaves.get(i);
        RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
        ++numNonLeafWriters;
        totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
        totalChildPointers += nonLeaf.getNumChildren();
    }
    // figure out how wide our offset FInts are
    int offsetWidth = 0;
    while (true) {
        ++offsetWidth;
        int offsetBytes = totalChildPointers * offsetWidth;
        int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
        if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
            // it fits
            numBytes = totalRowBytes;
            break;
        }
    }
    blockMeta.setNextNodeOffsetWidth(offsetWidth);
    // populate negativeIndexes
    for (int i = nonLeaves.size() - 1; i >= 0; --i) {
        TokenizerNode nonLeaf = nonLeaves.get(i);
        int writerIndex = nonLeaves.size() - i - 1;
        RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
        int nodeWidth = nonLeafWriter.calculateWidth();
        negativeIndex += nodeWidth;
        nonLeaf.setNegativeIndex(negativeIndex);
    }
    return this;
}
Also used : TokenizerNode(org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode)

Example 4 with TokenizerNode

use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.

the class TestTokenizer method testSearching.

@Test
public void testSearching() {
    for (byte[] input : inputs) {
        TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult();
        builder.getNode(resultHolder, input, 0, input.length);
        TokenizerNode n = resultHolder.getMatchingNode();
        byte[] output = n.getNewByteArray();
        Assert.assertTrue(Bytes.equals(input, output));
    }
}
Also used : TokenizerNode(org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode) TokenizerRowSearchResult(org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerRowSearchResult) Test(org.junit.Test)

Example 5 with TokenizerNode

use of org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode in project hbase by apache.

the class TestColumnBuilder method testReaderRoundTrip.

/************* methods ********************************/
@Test
public void testReaderRoundTrip() throws IOException {
    for (int i = 0; i < sortedUniqueColumns.size(); ++i) {
        ByteRange column = sortedUniqueColumns.get(i);
        builder.addSorted(column);
    }
    List<byte[]> builderOutputArrays = builder.getArrays();
    for (int i = 0; i < builderOutputArrays.size(); ++i) {
        byte[] inputArray = sortedUniqueColumns.get(i).deepCopyToNewArray();
        byte[] outputArray = builderOutputArrays.get(i);
        boolean same = Bytes.equals(inputArray, outputArray);
        Assert.assertTrue(same);
    }
    Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size());
    writer = new ColumnSectionWriter(blockMeta, builder, ColumnNodeType.QUALIFIER);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    writer.compile().writeBytes(baos);
    bytes = baos.toByteArray();
    buffer = new byte[blockMeta.getMaxQualifierLength()];
    reader = new ColumnReader(buffer, ColumnNodeType.QUALIFIER);
    reader.initOnBlock(blockMeta, new SingleByteBuff(ByteBuffer.wrap(bytes)));
    List<TokenizerNode> builderNodes = Lists.newArrayList();
    builder.appendNodes(builderNodes, true, true);
    int i = 0;
    for (TokenizerNode builderNode : builderNodes) {
        if (!builderNode.hasOccurrences()) {
            continue;
        }
        // we de-duped before adding to
        Assert.assertEquals(1, builderNode.getNumOccurrences());
        // builder
        int position = builderNode.getOutputArrayOffset();
        byte[] output = reader.populateBuffer(position).copyBufferToNewArray();
        boolean same = Bytes.equals(sortedUniqueColumns.get(i).deepCopyToNewArray(), output);
        Assert.assertTrue(same);
        ++i;
    }
}
Also used : ByteRange(org.apache.hadoop.hbase.util.ByteRange) SingleByteBuff(org.apache.hadoop.hbase.nio.SingleByteBuff) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ColumnReader(org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader) ColumnSectionWriter(org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter) TokenizerNode(org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode) Test(org.junit.Test)

Aggregations

TokenizerNode (org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 Test (org.junit.Test)2 KeyValue (org.apache.hadoop.hbase.KeyValue)1 ColumnReader (org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader)1 PrefixTreeEncoder (org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder)1 ColumnNodeWriter (org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnNodeWriter)1 ColumnSectionWriter (org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter)1 RowNodeWriter (org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowNodeWriter)1 TokenizerRowSearchResult (org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerRowSearchResult)1 SingleByteBuff (org.apache.hadoop.hbase.nio.SingleByteBuff)1 ByteRange (org.apache.hadoop.hbase.util.ByteRange)1