Search in sources :

Example 31 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project nifi by apache.

the class TestNiFiOrcUtils method test_getOrcField_map.

@Test
public void test_getOrcField_map() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("map").type().map().values().doubleType().noDefault();
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema());
    assertEquals(TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createDouble()), orcType);
}
Also used : Schema(org.apache.avro.Schema) SchemaBuilder(org.apache.avro.SchemaBuilder) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Test(org.junit.Test)

Example 32 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project nifi by apache.

the class TestNiFiOrcUtils method test_getOrcField_enum.

@Test
public void test_getOrcField_enum() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("enumField").type().enumeration("enum").symbols("a", "b", "c").enumDefault("a");
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("enumField").schema());
    assertEquals(TypeInfoCreator.createString(), orcType);
}
Also used : Schema(org.apache.avro.Schema) SchemaBuilder(org.apache.avro.SchemaBuilder) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Test(org.junit.Test)

Example 33 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project nifi by apache.

the class TestNiFiOrcUtils method test_getOrcField_nested_map.

@Test
public void test_getOrcField_nested_map() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("map").type().map().values().map().values().doubleType().noDefault();
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema());
    assertEquals(TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createDouble())), orcType);
}
Also used : Schema(org.apache.avro.Schema) SchemaBuilder(org.apache.avro.SchemaBuilder) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Test(org.junit.Test)

Example 34 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project nifi by apache.

the class TestNiFiOrcUtils method test_getOrcField_array.

@Test
public void test_getOrcField_array() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("array").type().array().items().longType().noDefault();
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema());
    assertEquals(TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createLong()), orcType);
}
Also used : Schema(org.apache.avro.Schema) SchemaBuilder(org.apache.avro.SchemaBuilder) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Test(org.junit.Test)

Example 35 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project nifi by apache.

the class NiFiOrcUtils method createWriter.

public static OrcFlowFileWriter createWriter(OutputStream flowFileOutputStream, Path path, Configuration conf, TypeInfo orcSchema, long stripeSize, CompressionKind compress, int bufferSize) throws IOException {
    int rowIndexStride = HiveConf.getIntVar(conf, HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE);
    boolean addBlockPadding = HiveConf.getBoolVar(conf, HIVE_ORC_DEFAULT_BLOCK_PADDING);
    String versionName = HiveConf.getVar(conf, HIVE_ORC_WRITE_FORMAT);
    OrcFile.Version versionValue = (versionName == null) ? OrcFile.Version.CURRENT : OrcFile.Version.byName(versionName);
    OrcFile.EncodingStrategy encodingStrategy;
    String enString = conf.get(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname);
    if (enString == null) {
        encodingStrategy = OrcFile.EncodingStrategy.SPEED;
    } else {
        encodingStrategy = OrcFile.EncodingStrategy.valueOf(enString);
    }
    OrcFile.CompressionStrategy compressionStrategy;
    String compString = conf.get(HiveConf.ConfVars.HIVE_ORC_COMPRESSION_STRATEGY.varname);
    if (compString == null) {
        compressionStrategy = OrcFile.CompressionStrategy.SPEED;
    } else {
        compressionStrategy = OrcFile.CompressionStrategy.valueOf(compString);
    }
    float paddingTolerance;
    paddingTolerance = conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname, HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal);
    long blockSizeValue = HiveConf.getLongVar(conf, HIVE_ORC_DEFAULT_BLOCK_SIZE);
    double bloomFilterFpp = BloomFilterIO.DEFAULT_FPP;
    ObjectInspector inspector = OrcStruct.createObjectInspector(orcSchema);
    return new OrcFlowFileWriter(flowFileOutputStream, path, conf, inspector, stripeSize, compress, bufferSize, rowIndexStride, getMemoryManager(conf), addBlockPadding, versionValue, // no callback
    null, encodingStrategy, compressionStrategy, paddingTolerance, blockSizeValue, // no Bloom Filter column names
    null, bloomFilterFpp);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)516 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)287 ArrayList (java.util.ArrayList)202 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)193 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)167 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)151 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)148 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)138 Test (org.junit.Test)135 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)107 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)78 HashMap (java.util.HashMap)74 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)71 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)69 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)67 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)63 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)61 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)59 List (java.util.List)54 HiveConf (org.apache.hadoop.hive.conf.HiveConf)53