Search in sources :

Example 6 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class SparkReduceRecordHandler method processVectors.

/**
   * @param values
   * @return true if it is not done and can take more inputs
   */
private <E> boolean processVectors(Iterator<E> values, byte tag) throws HiveException {
    VectorizedRowBatch batch = batches[tag];
    batch.reset();
    buffer.reset();
    /* deserialize key into columns */
    VectorizedBatchUtil.addRowToBatchFrom(keyObject, keyStructInspector, 0, 0, batch, buffer);
    for (int i = 0; i < keysColumnOffset; i++) {
        VectorizedBatchUtil.setRepeatingColumn(batch, i);
    }
    int rowIdx = 0;
    try {
        while (values.hasNext()) {
            /* deserialize value into columns */
            BytesWritable valueWritable = (BytesWritable) values.next();
            Object valueObj = deserializeValue(valueWritable, tag);
            VectorizedBatchUtil.addRowToBatchFrom(valueObj, valueStructInspectors[tag], rowIdx, keysColumnOffset, batch, buffer);
            rowIdx++;
            if (rowIdx >= BATCH_SIZE) {
                VectorizedBatchUtil.setBatchSize(batch, rowIdx);
                reducer.process(batch, tag);
                rowIdx = 0;
                if (isLogInfoEnabled) {
                    logMemoryInfo();
                }
            }
        }
        if (rowIdx > 0) {
            VectorizedBatchUtil.setBatchSize(batch, rowIdx);
            reducer.process(batch, tag);
        }
        if (isLogInfoEnabled) {
            logMemoryInfo();
        }
    } catch (Exception e) {
        String rowString = null;
        try {
            rowString = batch.toString();
        } catch (Exception e2) {
            rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) + " ]";
        }
        throw new HiveException("Error while processing vector batch (tag=" + tag + ") " + rowString, e);
    }
    // give me more
    return true;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 7 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class VerifyFast method verifyDeserializeRead.

public static void verifyDeserializeRead(DeserializeRead deserializeRead, PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
    boolean isNull;
    isNull = !deserializeRead.readNextField();
    if (isNull) {
        if (writable != null) {
            TestCase.fail("Field reports null but object is not null (class " + writable.getClass().getName() + ", " + writable.toString() + ")");
        }
        return;
    } else if (writable == null) {
        TestCase.fail("Field report not null but object is null");
    }
    switch(primitiveTypeInfo.getPrimitiveCategory()) {
        case BOOLEAN:
            {
                boolean value = deserializeRead.currentBoolean;
                if (!(writable instanceof BooleanWritable)) {
                    TestCase.fail("Boolean expected writable not Boolean");
                }
                boolean expected = ((BooleanWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case BYTE:
            {
                byte value = deserializeRead.currentByte;
                if (!(writable instanceof ByteWritable)) {
                    TestCase.fail("Byte expected writable not Byte");
                }
                byte expected = ((ByteWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
                }
            }
            break;
        case SHORT:
            {
                short value = deserializeRead.currentShort;
                if (!(writable instanceof ShortWritable)) {
                    TestCase.fail("Short expected writable not Short");
                }
                short expected = ((ShortWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case INT:
            {
                int value = deserializeRead.currentInt;
                if (!(writable instanceof IntWritable)) {
                    TestCase.fail("Integer expected writable not Integer");
                }
                int expected = ((IntWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case LONG:
            {
                long value = deserializeRead.currentLong;
                if (!(writable instanceof LongWritable)) {
                    TestCase.fail("Long expected writable not Long");
                }
                Long expected = ((LongWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case FLOAT:
            {
                float value = deserializeRead.currentFloat;
                if (!(writable instanceof FloatWritable)) {
                    TestCase.fail("Float expected writable not Float");
                }
                float expected = ((FloatWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case DOUBLE:
            {
                double value = deserializeRead.currentDouble;
                if (!(writable instanceof DoubleWritable)) {
                    TestCase.fail("Double expected writable not Double");
                }
                double expected = ((DoubleWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case STRING:
            {
                byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                Text text = new Text(stringBytes);
                String string = text.toString();
                String expected = ((Text) writable).toString();
                if (!string.equals(expected)) {
                    TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
                }
            }
            break;
        case CHAR:
            {
                byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                Text text = new Text(stringBytes);
                String string = text.toString();
                HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
                HiveChar expected = ((HiveCharWritable) writable).getHiveChar();
                if (!hiveChar.equals(expected)) {
                    TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
                }
            }
            break;
        case VARCHAR:
            {
                byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                Text text = new Text(stringBytes);
                String string = text.toString();
                HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
                HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar();
                if (!hiveVarchar.equals(expected)) {
                    TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
                }
            }
            break;
        case DECIMAL:
            {
                HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
                if (value == null) {
                    TestCase.fail("Decimal field evaluated to NULL");
                }
                HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal();
                if (!value.equals(expected)) {
                    DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
                    int precision = decimalTypeInfo.getPrecision();
                    int scale = decimalTypeInfo.getScale();
                    TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
                }
            }
            break;
        case DATE:
            {
                Date value = deserializeRead.currentDateWritable.get();
                Date expected = ((DateWritable) writable).get();
                if (!value.equals(expected)) {
                    TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                }
            }
            break;
        case TIMESTAMP:
            {
                Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
                Timestamp expected = ((TimestampWritable) writable).getTimestamp();
                if (!value.equals(expected)) {
                    TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                }
            }
            break;
        case INTERVAL_YEAR_MONTH:
            {
                HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
                HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
                if (!value.equals(expected)) {
                    TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                }
            }
            break;
        case INTERVAL_DAY_TIME:
            {
                HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
                HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
                if (!value.equals(expected)) {
                    TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                }
            }
            break;
        case BINARY:
            {
                byte[] byteArray = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                BytesWritable bytesWritable = (BytesWritable) writable;
                byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
                if (byteArray.length != expected.length) {
                    TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
                }
                for (int b = 0; b < byteArray.length; b++) {
                    if (byteArray[b] != expected[b]) {
                        TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
                    }
                }
            }
            break;
        default:
            throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(java.sql.Timestamp) Date(java.sql.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime)

Example 8 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestCombineSequenceFileInputFormat method testFormat.

@Test(timeout = 10000)
public void testFormat() throws IOException, InterruptedException {
    Job job = Job.getInstance(conf);
    Random random = new Random();
    long seed = random.nextLong();
    random.setSeed(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    final int length = 10000;
    final int numFiles = 10;
    // create files with a variety of lengths
    createFiles(length, numFiles, random, job);
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    // create a combine split for the files
    InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
    for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        LOG.info("splitting: requesting = " + numSplits);
        List<InputSplit> splits = format.getSplits(job);
        LOG.info("splitting: got =        " + splits.size());
        // we should have a single split as the length is comfortably smaller than
        // the block size
        assertEquals("We got more than one splits!", 1, splits.size());
        InputSplit split = splits.get(0);
        assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
        // check the split
        BitSet bits = new BitSet(length);
        RecordReader<IntWritable, BytesWritable> reader = format.createRecordReader(split, context);
        MapContext<IntWritable, BytesWritable, IntWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, BytesWritable, IntWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass());
        try {
            while (reader.nextKeyValue()) {
                IntWritable key = reader.getCurrentKey();
                BytesWritable value = reader.getCurrentValue();
                assertNotNull("Value should not be null.", value);
                final int k = key.get();
                LOG.debug("read " + k);
                assertFalse("Key in multiple partitions.", bits.get(k));
                bits.set(k);
            }
        } finally {
            reader.close();
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
}
Also used : MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) BitSet(java.util.BitSet) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BytesWritable(org.apache.hadoop.io.BytesWritable) Random(java.util.Random) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 9 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestCombineSequenceFileInputFormat method createFiles.

private static void createFiles(int length, int numFiles, Random random, Job job) throws IOException {
    Range[] ranges = createRanges(length, numFiles, random);
    for (int i = 0; i < numFiles; i++) {
        Path file = new Path(workDir, "test_" + i + ".seq");
        // create a file with length entries
        @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(localFs, job.getConfiguration(), file, IntWritable.class, BytesWritable.class);
        Range range = ranges[i];
        try {
            for (int j = range.start; j < range.end; j++) {
                IntWritable key = new IntWritable(j);
                byte[] data = new byte[random.nextInt(10)];
                random.nextBytes(data);
                BytesWritable value = new BytesWritable(data);
                writer.append(key, value);
            }
        } finally {
            writer.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) BytesWritable(org.apache.hadoop.io.BytesWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 10 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestMRSequenceFileAsBinaryInputFormat method testBinary.

@Test
public void testBinary() throws IOException, InterruptedException {
    Job job = Job.getInstance();
    FileSystem fs = FileSystem.getLocal(job.getConfiguration());
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "testbinary.seq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    Text tkey = new Text();
    Text tval = new Text();
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.getConfiguration(), file, Text.class, Text.class);
    try {
        for (int i = 0; i < RECORDS; ++i) {
            tkey.set(Integer.toString(r.nextInt(), 36));
            tval.set(Long.toString(r.nextLong(), 36));
            writer.append(tkey, tval);
        }
    } finally {
        writer.close();
    }
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    InputFormat<BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat();
    int count = 0;
    r.setSeed(seed);
    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();
    Text cmpkey = new Text();
    Text cmpval = new Text();
    DataInputBuffer buf = new DataInputBuffer();
    FileInputFormat.setInputPaths(job, file);
    for (InputSplit split : bformat.getSplits(job)) {
        RecordReader<BytesWritable, BytesWritable> reader = bformat.createRecordReader(split, context);
        MapContext<BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            while (reader.nextKeyValue()) {
                bkey = reader.getCurrentKey();
                bval = reader.getCurrentValue();
                tkey.set(Integer.toString(r.nextInt(), 36));
                tval.set(Long.toString(r.nextLong(), 36));
                buf.reset(bkey.getBytes(), bkey.getLength());
                cmpkey.readFields(buf);
                buf.reset(bval.getBytes(), bval.getLength());
                cmpval.readFields(buf);
                assertTrue("Keys don't match: " + "*" + cmpkey.toString() + ":" + tkey.toString() + "*", cmpkey.toString().equals(tkey.toString()));
                assertTrue("Vals don't match: " + "*" + cmpval.toString() + ":" + tval.toString() + "*", cmpval.toString().equals(tval.toString()));
                ++count;
            }
        } finally {
            reader.close();
        }
    }
    assertEquals("Some records not found", RECORDS, count);
}
Also used : Path(org.apache.hadoop.fs.Path) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) Text(org.apache.hadoop.io.Text) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BytesWritable(org.apache.hadoop.io.BytesWritable) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Random(java.util.Random) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)339 Test (org.junit.Test)92 Text (org.apache.hadoop.io.Text)81 LongWritable (org.apache.hadoop.io.LongWritable)66 IntWritable (org.apache.hadoop.io.IntWritable)54 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)51 ArrayList (java.util.ArrayList)48 List (java.util.List)48 Path (org.apache.hadoop.fs.Path)47 IOException (java.io.IOException)42 Configuration (org.apache.hadoop.conf.Configuration)41 FloatWritable (org.apache.hadoop.io.FloatWritable)37 Writable (org.apache.hadoop.io.Writable)36 BooleanWritable (org.apache.hadoop.io.BooleanWritable)35 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)27 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)26 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)26 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)25 Random (java.util.Random)24