Search in sources :

Example 1 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hive by apache.

the class ReduceSinkOperator method toHiveKey.

// Serialize the keys and append the tag
protected HiveKey toHiveKey(Object obj, int tag, Integer distLength) throws SerDeException {
    BinaryComparable key = (BinaryComparable) keySerializer.serialize(obj, keyObjectInspector);
    int keyLength = key.getLength();
    if (tag == -1 || skipTag) {
        keyWritable.set(key.getBytes(), 0, keyLength);
    } else {
        keyWritable.setSize(keyLength + 1);
        System.arraycopy(key.getBytes(), 0, keyWritable.get(), 0, keyLength);
        keyWritable.get()[keyLength] = tagByte[0];
    }
    keyWritable.setDistKeyLength((distLength == null) ? keyLength : distLength);
    return keyWritable;
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable)

Example 2 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.

the class TestBinaryPartitioner method testDefaultOffsets.

@Test
public void testDefaultOffsets() {
    Configuration conf = new Configuration();
    BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
    BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    BinaryComparable key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    int partition1 = partitioner.getPartition(key1, null, 10);
    int partition2 = partitioner.getPartition(key2, null, 10);
    assertEquals(partition1, partition2);
    key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    key2 = new BytesWritable(new byte[] { 6, 2, 3, 4, 5 });
    partition1 = partitioner.getPartition(key1, null, 10);
    partition2 = partitioner.getPartition(key2, null, 10);
    assertTrue(partition1 != partition2);
    key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 6 });
    partition1 = partitioner.getPartition(key1, null, 10);
    partition2 = partitioner.getPartition(key2, null, 10);
    assertTrue(partition1 != partition2);
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Example 3 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.

the class TestBinaryPartitioner method testUpperBound.

@Test
public void testUpperBound() {
    Configuration conf = new Configuration();
    BinaryPartitioner.setRightOffset(conf, 4);
    BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
    BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    BinaryComparable key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 6 });
    int partition1 = partitioner.getPartition(key1, null, 10);
    int partition2 = partitioner.getPartition(key2, null, 10);
    assertTrue(partition1 != partition2);
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Example 4 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.

the class TotalOrderPartitioner method setConf.

/**
   * Read in the partition file and build indexing data structures.
   * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
   * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
   * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
   * will be built. Otherwise, keys will be located using a binary search of
   * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
   * defined for this job. The input file must be sorted with the same
   * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
   */
// keytype from conf not static
@SuppressWarnings("unchecked")
public void setConf(Configuration conf) {
    try {
        this.conf = conf;
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? // assume in DistributedCache
        FileSystem.getLocal(conf) : partFile.getFileSystem(conf);
        Job job = Job.getInstance(conf);
        Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            throw new IOException("Wrong number of partitions in keyset");
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // limit large but not huge.
            conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BinaryComparable(org.apache.hadoop.io.BinaryComparable) IOException(java.io.IOException) RawComparator(org.apache.hadoop.io.RawComparator) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job)

Example 5 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hive by apache.

the class VectorDeserializeOrcWriter method writeOneRow.

@Override
public void writeOneRow(Writable row) throws IOException {
    if (sourceBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
        flushBatch();
    }
    BinaryComparable binComp = (BinaryComparable) row;
    deserializeRead.set(binComp.getBytes(), 0, binComp.getLength());
    // Deserialize and append new row using the current batch size as the index.
    try {
        // Not using ByRef now since it's unsafe for text readers. Might be safe for others.
        vectorDeserializeRow.deserialize(sourceBatch, sourceBatch.size++);
    } catch (Exception e) {
        throw new IOException("DeserializeRead detail: " + vectorDeserializeRow.getDetailedReadPositionString(), e);
    }
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

BinaryComparable (org.apache.hadoop.io.BinaryComparable)11 BytesWritable (org.apache.hadoop.io.BytesWritable)5 Configuration (org.apache.hadoop.conf.Configuration)4 Test (org.junit.Test)4 IOException (java.io.IOException)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 Slice (io.airlift.slice.Slice)1 ArrayList (java.util.ArrayList)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 ExecMapperContext (org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext)1 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)1 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)1 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)1 RawComparator (org.apache.hadoop.io.RawComparator)1 Text (org.apache.hadoop.io.Text)1 Job (org.apache.hadoop.mapreduce.Job)1