Search in sources :

Example 26 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project cdap by caskdata.

the class StandardObjectInspectorsTest method testCollectionObjectInspector.

@Test
public void testCollectionObjectInspector() throws Throwable {
    // Test with sets
    ObjectInspector oi = ObjectInspectorFactory.getReflectionObjectInspector(new TypeToken<Set<String>>() {
    }.getType());
    Assert.assertTrue(oi instanceof StandardListObjectInspector);
    StandardListObjectInspector loi = (StandardListObjectInspector) oi;
    Set<String> set = Sets.newHashSet("foo", "bar", "foobar");
    List<?> inspectedSet = loi.getList(set);
    Assert.assertTrue(inspectedSet.contains("foo"));
    Assert.assertTrue(inspectedSet.contains("bar"));
    Assert.assertTrue(inspectedSet.contains("foobar"));
    // Test with queues
    oi = ObjectInspectorFactory.getReflectionObjectInspector(new TypeToken<Queue<String>>() {
    }.getType());
    Assert.assertTrue(oi instanceof StandardListObjectInspector);
    loi = (StandardListObjectInspector) oi;
    Queue<String> queue = new LinkedList<>();
    queue.add("foo");
    queue.add("bar");
    List<?> inspectedQueue = loi.getList(set);
    Assert.assertEquals("bar", inspectedQueue.get(0));
    Assert.assertEquals("foo", inspectedQueue.get(1));
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) TypeToken(com.google.common.reflect.TypeToken) Queue(java.util.Queue) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 27 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestLazySimpleSerDe method testSerDeParameters.

/**
 * Tests the deprecated usage of SerDeParameters.
 */
@Test
@SuppressWarnings("deprecation")
public void testSerDeParameters() throws SerDeException, IOException {
    // Setup
    LazySimpleSerDe serDe = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    MyTestClass row = new MyTestClass();
    ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
    row.randomFill(new Random(1234), extraTypeInfo);
    StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
    Properties schema = new Properties();
    schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames);
    schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes);
    SerDeUtils.initializeSerDe(serDe, conf, schema, null);
    SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, schema, "testSerdeName");
    // Test
    LazyStruct data = (LazyStruct) serializeAndDeserialize(row, rowOI, serDe, serdeParams);
    assertEquals((boolean) row.myBool, ((LazyBoolean) data.getField(0)).getWritableObject().get());
    assertEquals((int) row.myInt, ((LazyInteger) data.getField(3)).getWritableObject().get());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Random(java.util.Random) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) SerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 28 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestLazyBinarySerDe method testLongerSchemaDeserialization1.

/**
 * Test longer schema deserialization where a smaller struct is serialized and
 * it is then deserialized with a bigger struct Here the serialized struct has
 * 8 fields and we deserialized to a struct of 9 fields.
 */
void testLongerSchemaDeserialization1(Random r) throws Throwable {
    StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassSmaller.class, ObjectInspectorOptions.JAVA);
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
    AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    serde1.getObjectInspector();
    StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
    AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();
    int num = 100;
    for (int itest = 0; itest < num; itest++) {
        MyTestClassSmaller t = new MyTestClassSmaller();
        ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
        t.randomFill(r, extraTypeInfo);
        BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
        Object output = serde2.deserialize(bw);
        if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
            System.out.println("structs      = " + SerDeUtils.getJSONString(t, rowOI1));
            System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
            System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
            assertEquals(t, output);
        }
    }
}
Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) BytesWritable(org.apache.hadoop.io.BytesWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 29 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestLazyBinarySerDe method testShorterSchemaDeserialization1.

/**
 * Test shorter schema deserialization where a bigger struct is serialized and
 * it is then deserialized with a smaller struct. Here the serialized struct
 * has 9 fields and we deserialized to a struct of 8 fields.
 */
private void testShorterSchemaDeserialization1(Random r) throws Throwable {
    StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
    AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    serde1.getObjectInspector();
    StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassSmaller.class, ObjectInspectorOptions.JAVA);
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
    AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();
    int num = 100;
    for (int itest = 0; itest < num; itest++) {
        MyTestClass t = new MyTestClass();
        ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
        t.randomFill(r, extraTypeInfo);
        BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
        Object output = serde2.deserialize(bw);
        if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
            System.out.println("structs      = " + SerDeUtils.getJSONString(t, rowOI1));
            System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
            System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
            assertEquals(t, output);
        }
    }
}
Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) BytesWritable(org.apache.hadoop.io.BytesWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 30 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class OrcFileGenerator method generateOrcFile.

/**
 * Generates an orc file based on the provided record class in the specified file system
 * at the output path.
 *
 * @param conf the configuration used to initialize the orc writer
 * @param fs the file system to which will contain the generated orc file
 * @param outputPath the path where the generated orc will be placed
 * @param recordClass a class the defines the record format for the generated orc file, this
 * class must have exactly one constructor.
 */
public static void generateOrcFile(Configuration conf, FileSystem fs, Path outputPath, Class recordClass) throws IOException, InstantiationException, IllegalAccessException, InvocationTargetException {
    ObjectInspector inspector;
    synchronized (TestVectorizedORCReader.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(recordClass, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(fs, outputPath, conf, inspector, 100000, CompressionKind.ZLIB, 10000, 10000);
    try {
        Constructor[] constructors = recordClass.getConstructors();
        if (constructors.length != 1) {
            throw new UnsupportedOperationException("The provided recordClass must have exactly one constructor.");
        }
        BatchDataDistribution[] dataDist = BatchDataDistribution.values();
        Class[] columns = constructors[0].getParameterTypes();
        for (int i = 0; i < dataDist.length * 3; i++) {
            Object[][] rows = new Object[columns.length][VectorizedRowBatch.DEFAULT_SIZE];
            for (int c = 0; c < columns.length; c++) {
                if (!TYPE_TO_BATCH_GEN_MAP.containsKey(columns[c])) {
                    throw new UnsupportedOperationException("No batch generator defined for type " + columns[c].getName());
                }
                rows[c] = TYPE_TO_BATCH_GEN_MAP.get(columns[c]).generateBatch(dataDist[(i + c) % dataDist.length]);
            }
            for (int r = 0; r < VectorizedRowBatch.DEFAULT_SIZE; r++) {
                Object[] row = new Object[columns.length];
                for (int c = 0; c < columns.length; c++) {
                    row[c] = rows[c][r];
                }
                writer.addRow(constructors[0].newInstance(row));
            }
        }
    } finally {
        writer.close();
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Constructor(java.lang.reflect.Constructor) TestVectorizedORCReader(org.apache.hadoop.hive.ql.io.orc.TestVectorizedORCReader) Writer(org.apache.hadoop.hive.ql.io.orc.Writer)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)73 Test (org.junit.Test)64 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)60 Configuration (org.apache.hadoop.conf.Configuration)25 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)25 InputSplit (org.apache.hadoop.mapred.InputSplit)25 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)24 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)23 Properties (java.util.Properties)20 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)20 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)18 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)18 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)18 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)18 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)18 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)18 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)18 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)18 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)18 RecordWriter (org.apache.hadoop.mapred.RecordWriter)18