Search in sources :

Example 36 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class TestRCFile method testSimpleReadAndWrite.

@Test
public void testSimpleReadAndWrite() throws IOException, SerDeException {
    cleanup();
    byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
    byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"), "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();
    Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();
    for (int i = 0; i < 2; i++) {
        reader.next(rowID);
        BytesRefArrayWritable cols = new BytesRefArrayWritable();
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);
        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int j = 0; j < fieldRefs.size(); j++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            if (i == 0) {
                assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
            } else {
                assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
            }
        }
    }
    reader.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) RecordReader(org.apache.hadoop.mapred.RecordReader) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 37 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class TestRCFile method partialReadTest.

private void partialReadTest(FileSystem fs, int count, Path file) throws IOException, SerDeException {
    LOG.debug("reading " + count + " records");
    long start = System.currentTimeMillis();
    java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
    readCols.add(Integer.valueOf(2));
    readCols.add(Integer.valueOf(3));
    ColumnProjectionUtils.appendReadColumns(conf, readCols);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    while (reader.next(rowID)) {
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);
        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int i : readCols) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(i).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            assertEquals("Field " + i, standardWritableData, expectedPartitalFieldsData[i]);
        }
        assertEquals("Class of the serialized object should be BytesRefArrayWritable", BytesRefArrayWritable.class, serDe.getSerializedClass());
        BytesRefArrayWritable serializedBytes = (BytesRefArrayWritable) serDe.serialize(row, oi);
        assertEquals("Serialized data", patialS, serializedBytes);
    }
    reader.close();
    long cost = System.currentTimeMillis() - start;
    LOG.debug("reading fully costs:" + cost + " milliseconds");
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) LongWritable(org.apache.hadoop.io.LongWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 38 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class ThriftFormatter method convert.

@Override
public Object convert(Object row, ObjectInspector rowOI) throws Exception {
    StructObjectInspector structOI = (StructObjectInspector) rowOI;
    List<? extends StructField> fields = structOI.getAllStructFieldRefs();
    Object[] converted = new Object[fields.size()];
    for (int i = 0; i < converted.length; i++) {
        StructField fieldRef = fields.get(i);
        Object field = structOI.getStructFieldData(row, fieldRef);
        converted[i] = field == null ? null : SerDeUtils.toThriftPayload(field, fieldRef.getFieldObjectInspector(), protocol);
    }
    return converted;
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 39 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class LazySimpleStructObjectInspector method getStructFieldData.

// With Data
@Override
public Object getStructFieldData(Object data, StructField fieldRef) {
    if (data == null) {
        return null;
    }
    StructObject struct = (StructObject) data;
    MyField f = (MyField) fieldRef;
    int fieldID = f.getFieldID();
    assert (fieldID >= 0 && fieldID < fields.size());
    ObjectInspector oi = f.getFieldObjectInspector();
    if (oi instanceof AvroLazyObjectInspector) {
        return ((AvroLazyObjectInspector) oi).getStructFieldData(data, fieldRef);
    }
    if (oi instanceof MapObjectInspector) {
        ObjectInspector valueOI = ((MapObjectInspector) oi).getMapValueObjectInspector();
        if (valueOI instanceof AvroLazyObjectInspector) {
            return ((AvroLazyObjectInspector) valueOI).getStructFieldData(data, fieldRef);
        }
    }
    return struct.getField(fieldID);
}
Also used : AvroLazyObjectInspector(org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BaseStructObjectInspector(org.apache.hadoop.hive.serde2.BaseStructObjectInspector) AvroLazyObjectInspector(org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector) StructObject(org.apache.hadoop.hive.serde2.StructObject) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)

Example 40 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class TestObjectInspectorUtils method testObjectInspectorUtils.

public void testObjectInspectorUtils() throws Throwable {
    try {
        ObjectInspector oi1 = ObjectInspectorFactory.getReflectionObjectInspector(Complex.class, ObjectInspectorFactory.ObjectInspectorOptions.THRIFT);
        // metadata
        assertEquals(Category.STRUCT, oi1.getCategory());
        // standard ObjectInspector
        StructObjectInspector soi = (StructObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(oi1);
        List<? extends StructField> fields = soi.getAllStructFieldRefs();
        assertEquals(10, fields.size());
        assertEquals(fields.get(0), soi.getStructFieldRef("aint"));
        // null
        for (int i = 0; i < fields.size(); i++) {
            assertNull(soi.getStructFieldData(null, fields.get(i)));
        }
        // real object
        Complex cc = new Complex();
        cc.setAint(1);
        cc.setAString("test");
        List<Integer> c2 = Arrays.asList(new Integer[] { 1, 2, 3 });
        cc.setLint(c2);
        List<String> c3 = Arrays.asList(new String[] { "one", "two" });
        cc.setLString(c3);
        List<IntString> c4 = new ArrayList<IntString>();
        cc.setLintString(c4);
        cc.setMStringString(null);
        // standard object
        Object c = ObjectInspectorUtils.copyToStandardObject(cc, oi1);
        assertEquals(1, soi.getStructFieldData(c, fields.get(0)));
        assertEquals("test", soi.getStructFieldData(c, fields.get(1)));
        assertEquals(c2, soi.getStructFieldData(c, fields.get(2)));
        assertEquals(c3, soi.getStructFieldData(c, fields.get(3)));
        assertEquals(c4, soi.getStructFieldData(c, fields.get(4)));
        assertNull(soi.getStructFieldData(c, fields.get(5)));
        ArrayList<Object> cfields = new ArrayList<Object>();
        for (int i = 0; i < 10; i++) {
            cfields.add(soi.getStructFieldData(c, fields.get(i)));
        }
        assertEquals(cfields, soi.getStructFieldsDataAsList(c));
        // sub fields
        assertEquals(PrimitiveObjectInspectorFactory.javaIntObjectInspector, fields.get(0).getFieldObjectInspector());
        assertEquals(PrimitiveObjectInspectorFactory.javaStringObjectInspector, fields.get(1).getFieldObjectInspector());
        assertEquals(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector), fields.get(2).getFieldObjectInspector());
        assertEquals(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), fields.get(3).getFieldObjectInspector());
        assertEquals(ObjectInspectorUtils.getStandardObjectInspector(ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getReflectionObjectInspector(IntString.class, ObjectInspectorFactory.ObjectInspectorOptions.THRIFT))), fields.get(4).getFieldObjectInspector());
        assertEquals(ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector), fields.get(5).getFieldObjectInspector());
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ArrayList(java.util.ArrayList) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString) Complex(org.apache.hadoop.hive.serde2.thrift.test.Complex) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)147 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)107 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)93 ArrayList (java.util.ArrayList)75 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)56 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)46 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)42 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)33 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)29 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)28 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)26 List (java.util.List)25 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)24 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)24 Test (org.junit.Test)24 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)22 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)22 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)22 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)20 UnionObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)19