Search in sources :

Example 11 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class RCFileOutputFormat method getRecordWriter.

/**
 * {@inheritDoc}
 */
@Override
public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    Path outputPath = getWorkOutputPath(job);
    FileSystem fs = outputPath.getFileSystem(job);
    Path file = new Path(outputPath, name);
    CompressionCodec codec = null;
    if (getCompressOutput(job)) {
        Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);
    }
    final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec);
    return new RecordWriter<WritableComparable, BytesRefArrayWritable>() {

        @Override
        public void close(Reporter reporter) throws IOException {
            out.close();
        }

        @Override
        public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException {
            out.append(value);
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) RecordWriter(org.apache.hadoop.mapred.RecordWriter) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) Reporter(org.apache.hadoop.mapred.Reporter) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) RecordWriter(org.apache.hadoop.mapred.RecordWriter)

Example 12 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class PerformTestRCFileAndSeqFile method performRCFileReadFirstColumnTest.

public int performRCFileReadFirstColumnTest(FileSystem fs, Path file, int allColumnsNumber, boolean chechCorrect) throws IOException {
    byte[][] checkBytes = null;
    BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber);
    if (chechCorrect) {
        resetRandomGenerators();
        checkBytes = new byte[allColumnsNumber][];
    }
    int actualReadCount = 0;
    java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
    readCols.add(Integer.valueOf(0));
    ColumnProjectionUtils.appendReadColumns(conf, readCols);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    while (reader.next(rowID)) {
        reader.getCurrentRow(cols);
        boolean ok = true;
        if (chechCorrect) {
            nextRandomRow(checkBytes, checkRow);
            ok = ok && (checkRow.get(0).equals(cols.get(0)));
        }
        if (!ok) {
            throw new IllegalStateException("Compare read and write error.");
        }
        actualReadCount++;
    }
    return actualReadCount;
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) LongWritable(org.apache.hadoop.io.LongWritable)

Example 13 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class PerformTestRCFileAndSeqFile method writeRCFileTest.

private void writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec) throws IOException {
    fs.delete(file, true);
    resetRandomGenerators();
    RCFileOutputFormat.setColumnNumber(conf, columnNum);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
    byte[][] columnRandom;
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
    columnRandom = new byte[columnNum][];
    for (int i = 0; i < columnNum; i++) {
        BytesRefWritable cu = new BytesRefWritable();
        bytes.set(i, cu);
    }
    for (int i = 0; i < rowCount; i++) {
        nextRandomRow(columnRandom, bytes);
        writer.append(bytes);
    }
    writer.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 14 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestStatsSerde method testColumnarSerDe.

/**
 * Test ColumnarSerDe
 */
@Test
public void testColumnarSerDe() throws Throwable {
    try {
        System.out.println("test: testColumnarSerde");
        // Create the SerDe
        ColumnarSerDe serDe = new ColumnarSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        serDe.initialize(conf, tbl, null);
        // Data
        BytesRefArrayWritable braw = new BytesRefArrayWritable(8);
        String[] data = { "123", "456", "789", "1000", "5.3", "hive and hadoop", "1.", "NULL" };
        for (int i = 0; i < 8; i++) {
            braw.set(i, new BytesRefWritable(data[i].getBytes()));
        }
        // Test
        deserializeAndSerializeColumnar(serDe, braw, data);
        System.out.println("test: testColumnarSerde - OK");
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) Properties(java.util.Properties) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) Test(org.junit.Test)

Example 15 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestLazyBinaryColumnarSerDe method testSerDeInnerNulls.

@Test
public void testSerDeInnerNulls() throws SerDeException {
    StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
    String cols = ObjectInspectorUtils.getFieldNames(oi);
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, cols);
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
    LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
    serde.initialize(new Configuration(), props, null);
    OuterStruct outerStruct = new OuterStruct();
    outerStruct.mByte = 1;
    outerStruct.mShort = 2;
    outerStruct.mInt = 3;
    outerStruct.mLong = 4l;
    outerStruct.mFloat = 5.01f;
    outerStruct.mDouble = 6.001d;
    outerStruct.mString = "seven";
    outerStruct.mBA = new byte[] { '3' };
    InnerStruct is1 = new InnerStruct(null, 9l);
    InnerStruct is2 = new InnerStruct(10, null);
    outerStruct.mArray = new ArrayList<InnerStruct>(2);
    outerStruct.mArray.add(is1);
    outerStruct.mArray.add(is2);
    outerStruct.mMap = new HashMap<String, InnerStruct>();
    outerStruct.mMap.put(null, new InnerStruct(13, 14l));
    outerStruct.mMap.put(new String("fifteen"), null);
    outerStruct.mStruct = new InnerStruct(null, null);
    BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
    ObjectInspector out_oi = serde.getObjectInspector();
    Object out_o = serde.deserialize(braw);
    if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
        System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
        System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
        fail("Deserialized object does not compare");
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) SimpleMapEqualComparer(org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)28 BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)17 Configuration (org.apache.hadoop.conf.Configuration)13 LongWritable (org.apache.hadoop.io.LongWritable)12 Path (org.apache.hadoop.fs.Path)11 Test (org.junit.Test)11 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)10 Properties (java.util.Properties)7 RecordReader (org.apache.hadoop.mapred.RecordReader)7 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 RCFile (org.apache.hadoop.hive.ql.io.RCFile)4 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)4 JobConf (org.apache.hadoop.mapred.JobConf)4 IOException (java.io.IOException)3 ColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)3 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 SimpleMapEqualComparer (org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 Random (java.util.Random)2