Search in sources :

Example 61 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestOrcFile method testZeroCopySeek.

@Test
public void testZeroCopySeek() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(200000).bufferSize(65536).rowIndexStride(1000));
    Random rand = new Random(42);
    final int COUNT = 32768;
    long[] intValues = new long[COUNT];
    double[] doubleValues = new double[COUNT];
    String[] stringValues = new String[COUNT];
    BytesWritable[] byteValues = new BytesWritable[COUNT];
    String[] words = new String[128];
    for (int i = 0; i < words.length; ++i) {
        words[i] = Integer.toHexString(rand.nextInt());
    }
    for (int i = 0; i < COUNT / 2; ++i) {
        intValues[2 * i] = rand.nextLong();
        intValues[2 * i + 1] = intValues[2 * i];
        stringValues[2 * i] = words[rand.nextInt(words.length)];
        stringValues[2 * i + 1] = stringValues[2 * i];
    }
    for (int i = 0; i < COUNT; ++i) {
        doubleValues[i] = rand.nextDouble();
        byte[] buf = new byte[20];
        rand.nextBytes(buf);
        byteValues[i] = new BytesWritable(buf);
    }
    for (int i = 0; i < COUNT; ++i) {
        writer.addRow(createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i));
    }
    writer.close();
    writer = null;
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    assertEquals(COUNT, reader.getNumberOfRows());
    /* enable zero copy record reader */
    Configuration conf = new Configuration();
    conf.setBoolean(OrcConf.USE_ZEROCOPY.getHiveConfName(), true);
    RecordReader rows = reader.rows();
    /* all tests are identical to the other seek() tests */
    OrcStruct row = null;
    for (int i = COUNT - 1; i >= 0; --i) {
        // we load the previous buffer of rows
        if (i % COUNT == COUNT - 1) {
            rows.seekToRow(i - (COUNT - 1));
        }
        rows.seekToRow(i);
        row = (OrcStruct) rows.next(row);
        BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i);
        assertEquals(expected.boolean1.booleanValue(), ((BooleanWritable) row.getFieldValue(0)).get());
        assertEquals(expected.byte1.byteValue(), ((ByteWritable) row.getFieldValue(1)).get());
        assertEquals(expected.short1.shortValue(), ((ShortWritable) row.getFieldValue(2)).get());
        assertEquals(expected.int1.intValue(), ((IntWritable) row.getFieldValue(3)).get());
        assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
        assertEquals(expected.float1.floatValue(), ((FloatWritable) row.getFieldValue(5)).get(), 0.0001);
        assertEquals(expected.double1.doubleValue(), ((DoubleWritable) row.getFieldValue(6)).get(), 0.0001);
        assertEquals(expected.bytes1, row.getFieldValue(7));
        assertEquals(expected.string1, row.getFieldValue(8));
        List<InnerStruct> expectedList = expected.middle.list;
        List<OrcStruct> actualList = (List) ((OrcStruct) row.getFieldValue(9)).getFieldValue(0);
        compareList(expectedList, actualList);
        compareList(expected.list, (List) row.getFieldValue(10));
    }
    rows.close();
    Iterator<StripeInformation> stripeIterator = reader.getStripes().iterator();
    long offsetOfStripe2 = 0;
    long offsetOfStripe4 = 0;
    long lastRowOfStripe2 = 0;
    for (int i = 0; i < 5; ++i) {
        StripeInformation stripe = stripeIterator.next();
        if (i < 2) {
            lastRowOfStripe2 += stripe.getNumberOfRows();
        } else if (i == 2) {
            offsetOfStripe2 = stripe.getOffset();
            lastRowOfStripe2 += stripe.getNumberOfRows() - 1;
        } else if (i == 4) {
            offsetOfStripe4 = stripe.getOffset();
        }
    }
    boolean[] columns = new boolean[reader.getStatistics().length];
    // long colulmn
    columns[5] = true;
    // text column
    columns[9] = true;
    /* use zero copy record reader */
    rows = reader.rowsOptions(new Reader.Options().range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2).include(columns));
    rows.seekToRow(lastRowOfStripe2);
    for (int i = 0; i < 2; ++i) {
        row = (OrcStruct) rows.next(row);
        BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, (int) (lastRowOfStripe2 + i));
        assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
        assertEquals(expected.string1, row.getFieldValue(8));
    }
    rows.close();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Random(java.util.Random) List(java.util.List) ArrayList(java.util.ArrayList) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) StripeInformation(org.apache.orc.StripeInformation) Test(org.junit.Test)

Example 62 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class PerformTestRCFileAndSeqFile method writeSeqenceFileTest.

private void writeSeqenceFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec) throws IOException {
    byte[][] columnRandom;
    resetRandomGenerators();
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
    columnRandom = new byte[columnNum][];
    for (int i = 0; i < columnNum; i++) {
        BytesRefWritable cu = new BytesRefWritable();
        bytes.set(i, cu);
    }
    // zero length key is not allowed by block compress writer, so we use a byte
    // writable
    ByteWritable key = new ByteWritable();
    SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, file, ByteWritable.class, BytesRefArrayWritable.class, CompressionType.BLOCK, codec);
    for (int i = 0; i < rowCount; i++) {
        nextRandomRow(columnRandom, bytes);
        seqWriter.append(key, bytes);
    }
    seqWriter.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 63 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class PerformTestRCFileAndSeqFile method performSequenceFileRead.

public void performSequenceFileRead(FileSystem fs, int count, Path file) throws IOException {
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
    ByteWritable key = new ByteWritable();
    BytesRefArrayWritable val = new BytesRefArrayWritable();
    for (int i = 0; i < count; i++) {
        reader.next(key, val);
    }
}
Also used : SequenceFile(org.apache.hadoop.io.SequenceFile) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable)

Example 64 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestGenericUDFOPMinus method testByteMinusShort.

@Test
public void testByteMinusShort() throws HiveException {
    GenericUDFOPMinus udf = new GenericUDFOPMinus();
    ByteWritable left = new ByteWritable((byte) 4);
    ShortWritable right = new ShortWritable((short) 6);
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableByteObjectInspector, PrimitiveObjectInspectorFactory.writableShortObjectInspector };
    DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(oi.getTypeInfo(), TypeInfoFactory.shortTypeInfo);
    ShortWritable res = (ShortWritable) udf.evaluate(args);
    Assert.assertEquals(-2, res.get());
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Test(org.junit.Test)

Example 65 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestGenericUDFPower method testBytePowerShort.

@Test
public void testBytePowerShort() throws HiveException {
    GenericUDFPower udf = new GenericUDFPower();
    ByteWritable left = new ByteWritable((byte) 2);
    ShortWritable right = new ShortWritable((short) 4);
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableByteObjectInspector, PrimitiveObjectInspectorFactory.writableShortObjectInspector };
    DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(TypeInfoFactory.doubleTypeInfo, oi.getTypeInfo());
    DoubleWritable res = (DoubleWritable) udf.evaluate(args);
    Assert.assertEquals(new Double(16), new Double(res.get()));
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Test(org.junit.Test)

Aggregations

ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)75 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)49 IntWritable (org.apache.hadoop.io.IntWritable)48 LongWritable (org.apache.hadoop.io.LongWritable)44 Text (org.apache.hadoop.io.Text)43 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)42 FloatWritable (org.apache.hadoop.io.FloatWritable)35 BooleanWritable (org.apache.hadoop.io.BooleanWritable)33 Test (org.junit.Test)27 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)26 BytesWritable (org.apache.hadoop.io.BytesWritable)25 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)22 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)22 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)21 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)21 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)21 ArrayList (java.util.ArrayList)19 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)18 HiveIntervalDayTimeWritable (org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable)16 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)14