Search in sources :

Example 16 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestLazyBinaryColumnarSerDe method testSerDeOuterNulls.

@Test
public void testSerDeOuterNulls() throws SerDeException {
    StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
    String cols = ObjectInspectorUtils.getFieldNames(oi);
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, cols);
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
    LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
    serde.initialize(new Configuration(), props, null);
    OuterStruct outerStruct = new OuterStruct();
    BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
    ObjectInspector out_oi = serde.getObjectInspector();
    Object out_o = serde.deserialize(braw);
    if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
        System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
        System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
        fail("Deserialized object does not compare");
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) SimpleMapEqualComparer(org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 17 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project elephant-bird by twitter.

the class RCFilePigStorage method getNext.

@Override
public Tuple getNext() throws IOException {
    try {
        if (!in.nextKeyValue()) {
            return null;
        }
        BytesRefArrayWritable byteRefs = (BytesRefArrayWritable) in.getCurrentValue();
        boolean isProjected = requiredColumns != null;
        int inputSize = byteRefs.size();
        int tupleSize = isProjected ? requiredColumns.length : inputSize;
        Tuple tuple = tupleFactory.newTuple(tupleSize);
        int tupleIdx = 0;
        for (int i = 0; i < inputSize && tupleIdx < tupleSize; i++) {
            if (!isProjected || i == requiredColumns[tupleIdx]) {
                // set if all the fields are required or the field is projected
                BytesRefWritable ref = byteRefs.get(i);
                if (ref != null && ref.getLength() > 0) {
                    tuple.set(tupleIdx, new DataByteArray(ref.getBytesCopy()));
                }
                tupleIdx++;
            }
        }
        return tuple;
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) IOException(java.io.IOException) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 18 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project elephant-bird by twitter.

the class RCFilePigStorage method putNext.

@SuppressWarnings("unchecked")
@Override
public void putNext(Tuple t) throws IOException {
    if (rowWritable == null) {
        // initialize
        if (numColumns < 1) {
            throw new IOException("number of columns is not set");
        }
        byteStream = new ByteStream.Output();
        rowWritable = new BytesRefArrayWritable();
        colValRefs = new BytesRefWritable[numColumns];
        for (int i = 0; i < numColumns; i++) {
            colValRefs[i] = new BytesRefWritable();
            rowWritable.set(i, colValRefs[i]);
        }
    }
    byteStream.reset();
    // write each field as a text (just like PigStorage)
    int sz = t.size();
    int startPos = 0;
    for (int i = 0; i < sz && i < numColumns; i++) {
        StorageUtil.putField(byteStream, t.get(i));
        colValRefs[i].set(byteStream.getData(), startPos, byteStream.getCount() - startPos);
        startPos = byteStream.getCount();
    }
    try {
        writer.write(null, rowWritable);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) ByteStream(org.apache.hadoop.hive.serde2.ByteStream) IOException(java.io.IOException) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 19 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestRCFile method testReadCorruptFile.

@Test
public void testReadCorruptFile() throws IOException, SerDeException {
    cleanup();
    byte[][] record = { null, null, null, null, null, null, null, null };
    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length);
    final int recCount = 100;
    Random rand = new Random();
    for (int recIdx = 0; recIdx < recCount; recIdx++) {
        for (int i = 0; i < record.length; i++) {
            record[i] = String.valueOf(rand.nextInt()).getBytes(StandardCharsets.UTF_8);
        }
        for (int i = 0; i < record.length; i++) {
            BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length);
            bytes.set(i, cu);
        }
        writer.append(bytes);
        bytes.clear();
    }
    writer.close();
    // Insert junk in middle of file. Assumes file is on local disk.
    RandomAccessFile raf = new RandomAccessFile(file.toUri().getPath(), "rw");
    long corruptOffset = raf.length() / 2;
    LOG.info("corrupting " + raf + " at offset " + corruptOffset);
    raf.seek(corruptOffset);
    raf.writeBytes("junkjunkjunkjunkjunkjunkjunkjunk");
    raf.close();
    // Set the option for tolerating corruptions. The read should succeed.
    Configuration tmpConf = new Configuration(conf);
    tmpConf.setBoolean("hive.io.rcfile.tolerate.corruptions", true);
    RCFile.Reader reader = new RCFile.Reader(fs, file, tmpConf);
    LongWritable rowID = new LongWritable();
    while (true) {
        boolean more = reader.next(rowID);
        if (!more) {
            break;
        }
        BytesRefArrayWritable cols = new BytesRefArrayWritable();
        reader.getCurrentRow(cols);
        cols.resetValid(8);
    }
    reader.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) Configuration(org.apache.hadoop.conf.Configuration) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) RecordReader(org.apache.hadoop.mapred.RecordReader) Random(java.util.Random) RandomAccessFile(java.io.RandomAccessFile) LongWritable(org.apache.hadoop.io.LongWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) Test(org.junit.Test)

Example 20 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestRCFile method testReadOldFileHeader.

@Test
public void testReadOldFileHeader() throws IOException {
    String[] row = new String[] { "Tester", "Bart", "333 X St.", "Reno", "NV", "USA" };
    RCFile.Reader reader = new RCFile.Reader(fs, new Path(HiveTestUtils.getFileFromClasspath("rc-file-v0.rc")), conf);
    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    assertTrue("old file reader first row", reader.next(rowID));
    reader.getCurrentRow(cols);
    assertEquals(row.length, cols.size());
    for (int i = 0; i < cols.size(); ++i) {
        assertEquals(row[i], new String(cols.get(i).getBytesCopy()));
    }
    assertFalse("old file reader end", reader.next(rowID));
    reader.close();
}
Also used : Path(org.apache.hadoop.fs.Path) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) LongWritable(org.apache.hadoop.io.LongWritable) Test(org.junit.Test)

Aggregations

BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)28 BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)17 Configuration (org.apache.hadoop.conf.Configuration)13 LongWritable (org.apache.hadoop.io.LongWritable)12 Path (org.apache.hadoop.fs.Path)11 Test (org.junit.Test)11 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)10 Properties (java.util.Properties)7 RecordReader (org.apache.hadoop.mapred.RecordReader)7 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 RCFile (org.apache.hadoop.hive.ql.io.RCFile)4 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)4 JobConf (org.apache.hadoop.mapred.JobConf)4 IOException (java.io.IOException)3 ColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)3 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 SimpleMapEqualComparer (org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 Random (java.util.Random)2