Search in sources :

Example 21 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class TestNewInputOutputFormat method testNewOutputFormatComplex.

@SuppressWarnings("unchecked")
@Test
public // Test outputformat with complex data type, and with reduce
void testNewOutputFormatComplex() throws Exception {
    Path inputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt");
    Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
    localFs.delete(outputPath, true);
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(localFs.create(inputPath)));
    pw.println("I have eaten");
    pw.println("the plums");
    pw.println("that were in");
    pw.println("the icebox");
    pw.println("and which");
    pw.println("you were probably");
    pw.println("saving");
    pw.println("for breakfast");
    pw.println("Forgive me");
    pw.println("they were delicious");
    pw.println("so sweet");
    pw.println("and so cold");
    pw.close();
    Job job = new Job(conf, "orc test");
    job.setOutputFormatClass(OrcNewOutputFormat.class);
    job.setJarByClass(TestNewInputOutputFormat.class);
    job.setMapperClass(OrcTestMapper3.class);
    job.setReducerClass(OrcTestReducer3.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(OrcSerdeRow.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    boolean result = job.waitForCompletion(true);
    assertTrue(result);
    Path outputFilePath = new Path(outputPath, "part-r-00000");
    Reader reader = OrcFile.createReader(outputFilePath, OrcFile.readerOptions(conf).filesystem(localFs));
    RecordReader rows = reader.rows();
    ObjectInspector orcOi = reader.getObjectInspector();
    ObjectInspector stoi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(OrcTestReducer3.typeInfo);
    ObjectInspectorConverters.Converter converter = ObjectInspectorConverters.getConverter(orcOi, stoi);
    Object row = rows.next(null);
    List<Object> converted = (List<Object>) converter.convert(row);
    assertEquals(1, converted.get(0));
    assertEquals(1, converted.get(1));
    List<Object> list = (List<Object>) converted.get(2);
    assertEquals(list.size(), 1);
    assertEquals("saving", ((List<Object>) list.get(0)).get(0));
    assertEquals(6, ((List<Object>) list.get(0)).get(1));
    Map<String, Integer> map = (Map<String, Integer>) converted.get(3);
    assertEquals(map.size(), 1);
    assertEquals(map.get("saving"), new Integer(1));
    row = rows.next(null);
    converted = (List<Object>) converter.convert(row);
    assertEquals(2, converted.get(0));
    assertEquals(6, converted.get(1));
    list = (List<Object>) converted.get(2);
    assertEquals(list.size(), 6);
    assertEquals("breakfast", ((List<Object>) list.get(0)).get(0));
    assertEquals(9, ((List<Object>) list.get(0)).get(1));
    map = (Map<String, Integer>) converted.get(3);
    assertEquals(map.size(), 11);
    assertEquals(map.get("the"), new Integer(2));
    row = rows.next(null);
    converted = (List<Object>) converter.convert(row);
    assertEquals(3, converted.get(0));
    assertEquals(5, converted.get(1));
    list = (List<Object>) converted.get(2);
    assertEquals(list.size(), 5);
    assertEquals("cold", ((List<Object>) list.get(0)).get(0));
    assertEquals(4, ((List<Object>) list.get(0)).get(1));
    map = (Map<String, Integer>) converted.get(3);
    assertEquals(map.size(), 13);
    assertEquals(map.get("were"), new Integer(3));
    assertFalse(rows.hasNext());
    localFs.delete(outputPath, true);
}
Also used : Path(org.apache.hadoop.fs.Path) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) ObjectInspectorConverters(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters) OutputStreamWriter(java.io.OutputStreamWriter) ArrayList(java.util.ArrayList) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) HashMap(java.util.HashMap) Map(java.util.Map) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 22 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class TestETypeConverter method testTimestampInt96ConverterGMT.

@Test
public void testTimestampInt96ConverterGMT() {
    PrimitiveConverter converter;
    parent.metadata.put(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, "GMT");
    converter = getETypeConverter(parent, PrimitiveTypeName.INT96, TypeInfoFactory.timestampTypeInfo);
    converter.addBinary(NanoTimeUtils.getNanoTime(ts, Calendar.getInstance(TimeZone.getTimeZone("GMT"))).toBinary());
    parent.assertWritableValue(new TimestampWritable(ts));
}
Also used : PrimitiveConverter(org.apache.parquet.io.api.PrimitiveConverter) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Test(org.junit.Test)

Example 23 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class TestETypeConverter method testTimestampInt96ConverterLocal.

@Test
public void testTimestampInt96ConverterLocal() {
    PrimitiveConverter converter;
    // Default timezone should be Localtime
    converter = getETypeConverter(parent, PrimitiveTypeName.INT96, TypeInfoFactory.timestampTypeInfo);
    converter.addBinary(NanoTimeUtils.getNanoTime(ts, Calendar.getInstance()).toBinary());
    parent.assertWritableValue(new TimestampWritable(ts));
}
Also used : PrimitiveConverter(org.apache.parquet.io.api.PrimitiveConverter) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Test(org.junit.Test)

Example 24 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class DynamicPartitionPruner method prunePartitionSingleSource.

@VisibleForTesting
protected void prunePartitionSingleSource(String source, SourceInfo si) throws HiveException {
    if (si.skipPruning.get()) {
        // in this case we've determined that there's too much data
        // to prune dynamically.
        LOG.info("Skip pruning on " + source + ", column " + si.columnName);
        return;
    }
    Set<Object> values = si.values;
    String columnName = si.columnName;
    if (LOG.isDebugEnabled()) {
        StringBuilder sb = new StringBuilder("Pruning ");
        sb.append(columnName);
        sb.append(" with ");
        for (Object value : values) {
            sb.append(value == null ? null : value.toString());
            sb.append(", ");
        }
        LOG.debug(sb.toString());
    }
    ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(si.columnType));
    Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
    StructObjectInspector soi = ObjectInspectorFactory.getStandardStructObjectInspector(Collections.singletonList(columnName), Collections.singletonList(oi));
    @SuppressWarnings("rawtypes") ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(si.partKey);
    eval.initialize(soi);
    applyFilterToPartitions(converter, eval, columnName, values);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 25 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class MapJoinOperator method getValueObjectInspectors.

@Override
protected List<ObjectInspector> getValueObjectInspectors(byte alias, List<ObjectInspector>[] aliasToObjectInspectors) {
    int[] valueIndex = conf.getValueIndex(alias);
    if (valueIndex == null) {
        return super.getValueObjectInspectors(alias, aliasToObjectInspectors);
    }
    List<ObjectInspector> inspectors = aliasToObjectInspectors[alias];
    int bigPos = conf.getPosBigTable();
    Converter[] converters = new Converter[valueIndex.length];
    List<ObjectInspector> valueOI = new ArrayList<ObjectInspector>();
    for (int i = 0; i < valueIndex.length; i++) {
        if (valueIndex[i] >= 0 && !joinKeysObjectInspectors[bigPos].isEmpty()) {
            if (conf.getNoOuterJoin()) {
                valueOI.add(joinKeysObjectInspectors[bigPos].get(valueIndex[i]));
            } else {
                // It is an outer join. We are going to add the inspector from the
                // inner side, but the key value will come from the outer side, so
                // we need to create a converter from inputOI to outputOI.
                valueOI.add(inspectors.get(i));
                converters[i] = ObjectInspectorConverters.getConverter(joinKeysObjectInspectors[bigPos].get(valueIndex[i]), inspectors.get(i));
            }
        } else {
            valueOI.add(inspectors.get(i));
        }
    }
    unwrapContainer[alias] = new UnwrapRowContainer(alias, valueIndex, converters, hasFilter(alias));
    return valueOI;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) UnwrapRowContainer(org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)20 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)18 Converter (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter)17 UDFArgumentTypeException (org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException)13 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)12 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)9 ArrayList (java.util.ArrayList)8 UDFArgumentLengthException (org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException)7 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)6 ConstantObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)6 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)6 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)5 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)5 Test (org.junit.Test)5 List (java.util.List)4 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)4 PrimitiveConverter (org.apache.parquet.io.api.PrimitiveConverter)4 Properties (java.util.Properties)3 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 ByteString (com.google.protobuf.ByteString)2