Search in sources :

Example 6 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class GenericUDFMap method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length % 2 != 0) {
        throw new UDFArgumentLengthException("Arguments must be in key/value pairs");
    }
    GenericUDFUtils.ReturnObjectInspectorResolver keyOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
    GenericUDFUtils.ReturnObjectInspectorResolver valueOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
    for (int i = 0; i < arguments.length; i++) {
        if (i % 2 == 0) {
            // Keys
            if (!(arguments[i] instanceof PrimitiveObjectInspector)) {
                throw new UDFArgumentTypeException(1, "Primitive Type is expected but " + arguments[i].getTypeName() + "\" is found");
            }
            if (!keyOIResolver.update(arguments[i])) {
                throw new UDFArgumentTypeException(i, "Key type \"" + arguments[i].getTypeName() + "\" is different from preceding key types. " + "Previous key type was \"" + arguments[i - 2].getTypeName() + "\"");
            }
        } else {
            // Values
            if (!valueOIResolver.update(arguments[i]) && !compatibleTypes(arguments[i], arguments[i - 2])) {
                throw new UDFArgumentTypeException(i, "Value type \"" + arguments[i].getTypeName() + "\" is different from preceding value types. " + "Previous value type was \"" + arguments[i - 2].getTypeName() + "\"");
            }
        }
    }
    ObjectInspector keyOI = keyOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector valueOI = valueOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    converters = new Converter[arguments.length];
    for (int i = 0; i < arguments.length; i++) {
        converters[i] = ObjectInspectorConverters.getConverter(arguments[i], i % 2 == 0 ? keyOI : valueOI);
    }
    return ObjectInspectorFactory.getStandardMapObjectInspector(keyOI, valueOI);
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) VoidObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Example 7 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class GenericUDFSortArrayByField method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
    returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
    /**This UDF requires minimum 2 arguments array_name,field name*/
    if (arguments.length < 2) {
        throw new UDFArgumentLengthException("SORT_ARRAY_BY requires minimum 2 arguments, got " + arguments.length);
    }
    /**First argument must be array*/
    switch(arguments[0].getCategory()) {
        case LIST:
            listObjectInspector = (ListObjectInspector) arguments[0];
            break;
        default:
            throw new UDFArgumentTypeException(0, "Argument 1 of function SORT_ARRAY_BY must be " + serdeConstants.LIST_TYPE_NAME + ", but " + arguments[0].getTypeName() + " was found.");
    }
    /**Elements inside first argument(array) must be tuple(s)*/
    switch(listObjectInspector.getListElementObjectInspector().getCategory()) {
        case STRUCT:
            structObjectInspector = (StructObjectInspector) listObjectInspector.getListElementObjectInspector();
            break;
        default:
            throw new UDFArgumentTypeException(0, "Element[s] of first argument array in function SORT_ARRAY_BY must be " + serdeConstants.STRUCT_TYPE_NAME + ", but " + listObjectInspector.getTypeName() + " was found.");
    }
    /**All sort fields argument name and sort order name must be in String type*/
    converters = new Converter[arguments.length];
    inputTypes = new PrimitiveCategory[arguments.length];
    fields = new StructField[arguments.length - 1];
    noOfInputFields = arguments.length - 1;
    for (int i = 1; i < arguments.length; i++) {
        checkArgPrimitive(arguments, i);
        checkArgGroups(arguments, i, inputTypes, PrimitiveGrouping.STRING_GROUP);
        if (arguments[i] instanceof ConstantObjectInspector) {
            String fieldName = getConstantStringValue(arguments, i);
            /**checking whether any sorting order (ASC,DESC) has specified in last argument*/
            if (i != 1 && (i == arguments.length - 1) && (fieldName.trim().toUpperCase().equals(SORT_ORDER_TYPE.ASC.name()) || fieldName.trim().toUpperCase().equals(SORT_ORDER_TYPE.DESC.name()))) {
                sortOrder = SORT_ORDER_TYPE.valueOf(fieldName.trim().toUpperCase());
                noOfInputFields -= 1;
                continue;
            }
            fields[i - 1] = structObjectInspector.getStructFieldRef(getConstantStringValue(arguments, i));
        }
        obtainStringConverter(arguments, i, inputTypes, converters);
    }
    ObjectInspector returnOI = returnOIResolver.get(structObjectInspector);
    converters[0] = ObjectInspectorConverters.getConverter(structObjectInspector, returnOI);
    return ObjectInspectorFactory.getStandardListObjectInspector(structObjectInspector);
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)

Example 8 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class TestNewInputOutputFormat method testNewOutputFormatComplex.

@SuppressWarnings("unchecked")
@Test
public //Test outputformat with complex data type, and with reduce
void testNewOutputFormatComplex() throws Exception {
    Path inputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt");
    Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
    localFs.delete(outputPath, true);
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(localFs.create(inputPath)));
    pw.println("I have eaten");
    pw.println("the plums");
    pw.println("that were in");
    pw.println("the icebox");
    pw.println("and which");
    pw.println("you were probably");
    pw.println("saving");
    pw.println("for breakfast");
    pw.println("Forgive me");
    pw.println("they were delicious");
    pw.println("so sweet");
    pw.println("and so cold");
    pw.close();
    Job job = new Job(conf, "orc test");
    job.setOutputFormatClass(OrcNewOutputFormat.class);
    job.setJarByClass(TestNewInputOutputFormat.class);
    job.setMapperClass(OrcTestMapper3.class);
    job.setReducerClass(OrcTestReducer3.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(OrcSerdeRow.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    boolean result = job.waitForCompletion(true);
    assertTrue(result);
    Path outputFilePath = new Path(outputPath, "part-r-00000");
    Reader reader = OrcFile.createReader(outputFilePath, OrcFile.readerOptions(conf).filesystem(localFs));
    RecordReader rows = reader.rows();
    ObjectInspector orcOi = reader.getObjectInspector();
    ObjectInspector stoi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(OrcTestReducer3.typeInfo);
    ObjectInspectorConverters.Converter converter = ObjectInspectorConverters.getConverter(orcOi, stoi);
    Object row = rows.next(null);
    List<Object> converted = (List<Object>) converter.convert(row);
    assertEquals(1, converted.get(0));
    assertEquals(1, converted.get(1));
    List<Object> list = (List<Object>) converted.get(2);
    assertEquals(list.size(), 1);
    assertEquals("saving", ((List<Object>) list.get(0)).get(0));
    assertEquals(6, ((List<Object>) list.get(0)).get(1));
    Map<String, Integer> map = (Map<String, Integer>) converted.get(3);
    assertEquals(map.size(), 1);
    assertEquals(map.get("saving"), new Integer(1));
    row = rows.next(null);
    converted = (List<Object>) converter.convert(row);
    assertEquals(2, converted.get(0));
    assertEquals(6, converted.get(1));
    list = (List<Object>) converted.get(2);
    assertEquals(list.size(), 6);
    assertEquals("breakfast", ((List<Object>) list.get(0)).get(0));
    assertEquals(9, ((List<Object>) list.get(0)).get(1));
    map = (Map<String, Integer>) converted.get(3);
    assertEquals(map.size(), 11);
    assertEquals(map.get("the"), new Integer(2));
    row = rows.next(null);
    converted = (List<Object>) converter.convert(row);
    assertEquals(3, converted.get(0));
    assertEquals(5, converted.get(1));
    list = (List<Object>) converted.get(2);
    assertEquals(list.size(), 5);
    assertEquals("cold", ((List<Object>) list.get(0)).get(0));
    assertEquals(4, ((List<Object>) list.get(0)).get(1));
    map = (Map<String, Integer>) converted.get(3);
    assertEquals(map.size(), 13);
    assertEquals(map.get("were"), new Integer(3));
    assertFalse(rows.hasNext());
    localFs.delete(outputPath, true);
}
Also used : Path(org.apache.hadoop.fs.Path) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) ObjectInspectorConverters(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters) OutputStreamWriter(java.io.OutputStreamWriter) ArrayList(java.util.ArrayList) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) HashMap(java.util.HashMap) Map(java.util.Map) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 9 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class DDLTask method alterTableAlterPart.

/**
   * Alter partition column type in a table
   *
   * @param db
   *          Database to rename the partition.
   * @param alterPartitionDesc
   *          change partition column type.
   * @return Returns 0 when execution succeeds and above 0 if it fails.
   * @throws HiveException
   */
private int alterTableAlterPart(Hive db, AlterTableAlterPartDesc alterPartitionDesc) throws HiveException {
    Table tbl = db.getTable(alterPartitionDesc.getTableName(), true);
    String tabName = alterPartitionDesc.getTableName();
    // This is checked by DDLSemanticAnalyzer
    assert (tbl.isPartitioned());
    List<FieldSchema> newPartitionKeys = new ArrayList<FieldSchema>();
    // with a non null value before trying to alter the partition column type.
    try {
        Set<Partition> partitions = db.getAllPartitionsOf(tbl);
        int colIndex = -1;
        for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
            colIndex++;
            if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
                break;
            }
        }
        if (colIndex == -1 || colIndex == tbl.getTTable().getPartitionKeys().size()) {
            throw new HiveException("Cannot find partition column " + alterPartitionDesc.getPartKeySpec().getName());
        }
        TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(alterPartitionDesc.getPartKeySpec().getType());
        ObjectInspector outputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
        Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
        // For all the existing partitions, check if the value can be type casted to a non-null object
        for (Partition part : partitions) {
            if (part.getName().equals(conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME))) {
                continue;
            }
            try {
                String value = part.getValues().get(colIndex);
                Object convertedValue = converter.convert(value);
                if (convertedValue == null) {
                    throw new HiveException(" Converting from " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + value + " resulted in NULL object");
                }
            } catch (Exception e) {
                throw new HiveException("Exception while converting " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + part.getValues().get(colIndex));
            }
        }
    } catch (Exception e) {
        throw new HiveException("Exception while checking type conversion of existing partition values to " + alterPartitionDesc.getPartKeySpec() + " : " + e.getMessage());
    }
    for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
        if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
            newPartitionKeys.add(alterPartitionDesc.getPartKeySpec());
        } else {
            newPartitionKeys.add(col);
        }
    }
    tbl.getTTable().setPartitionKeys(newPartitionKeys);
    try {
        db.alterTable(tabName, tbl, null);
    } catch (InvalidOperationException e) {
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "Unable to alter " + tabName);
    }
    work.getInputs().add(new ReadEntity(tbl));
    // We've already locked the table as the input, don't relock it as the output.
    addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    return 0;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) StringUtils.stringifyException(org.apache.hadoop.util.StringUtils.stringifyException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) StatObjectConverter(org.apache.hadoop.hive.metastore.StatObjectConverter) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) HivePrivilegeObject(org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject) HiveLockObject(org.apache.hadoop.hive.ql.lockmgr.HiveLockObject) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 10 with Converter

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.

the class SparkDynamicPartitionPruner method prunePartitionSingleSource.

private void prunePartitionSingleSource(SourceInfo info, MapWork work) throws HiveException {
    Set<Object> values = info.values;
    String columnName = info.columnName;
    ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(info.fieldInspector.getTypeName()));
    ObjectInspectorConverters.Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
    StructObjectInspector soi = ObjectInspectorFactory.getStandardStructObjectInspector(Collections.singletonList(columnName), Collections.singletonList(oi));
    @SuppressWarnings("rawtypes") ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(info.partKey);
    eval.initialize(soi);
    applyFilterToPartitions(work, converter, eval, columnName, values);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspectorConverters(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)20 Converter (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter)17 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)16 UDFArgumentTypeException (org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException)13 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)10 ArrayList (java.util.ArrayList)8 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)8 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)6 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)6 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)5 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)5 Test (org.junit.Test)5 List (java.util.List)4 UDFArgumentLengthException (org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException)4 ConstantObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)4 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)4 PrimitiveConverter (org.apache.parquet.io.api.PrimitiveConverter)4 Properties (java.util.Properties)3 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 BytesWritable (org.apache.hadoop.io.BytesWritable)3