use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.
the class GenericUDFMap method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length % 2 != 0) {
throw new UDFArgumentLengthException("Arguments must be in key/value pairs");
}
GenericUDFUtils.ReturnObjectInspectorResolver keyOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
GenericUDFUtils.ReturnObjectInspectorResolver valueOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
for (int i = 0; i < arguments.length; i++) {
if (i % 2 == 0) {
// Keys
if (!(arguments[i] instanceof PrimitiveObjectInspector)) {
throw new UDFArgumentTypeException(1, "Primitive Type is expected but " + arguments[i].getTypeName() + "\" is found");
}
if (!keyOIResolver.update(arguments[i])) {
throw new UDFArgumentTypeException(i, "Key type \"" + arguments[i].getTypeName() + "\" is different from preceding key types. " + "Previous key type was \"" + arguments[i - 2].getTypeName() + "\"");
}
} else {
// Values
if (!valueOIResolver.update(arguments[i]) && !compatibleTypes(arguments[i], arguments[i - 2])) {
throw new UDFArgumentTypeException(i, "Value type \"" + arguments[i].getTypeName() + "\" is different from preceding value types. " + "Previous value type was \"" + arguments[i - 2].getTypeName() + "\"");
}
}
}
ObjectInspector keyOI = keyOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
ObjectInspector valueOI = valueOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
converters = new Converter[arguments.length];
for (int i = 0; i < arguments.length; i++) {
converters[i] = ObjectInspectorConverters.getConverter(arguments[i], i % 2 == 0 ? keyOI : valueOI);
}
return ObjectInspectorFactory.getStandardMapObjectInspector(keyOI, valueOI);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.
the class GenericUDFSortArrayByField method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
/**This UDF requires minimum 2 arguments array_name,field name*/
if (arguments.length < 2) {
throw new UDFArgumentLengthException("SORT_ARRAY_BY requires minimum 2 arguments, got " + arguments.length);
}
/**First argument must be array*/
switch(arguments[0].getCategory()) {
case LIST:
listObjectInspector = (ListObjectInspector) arguments[0];
break;
default:
throw new UDFArgumentTypeException(0, "Argument 1 of function SORT_ARRAY_BY must be " + serdeConstants.LIST_TYPE_NAME + ", but " + arguments[0].getTypeName() + " was found.");
}
/**Elements inside first argument(array) must be tuple(s)*/
switch(listObjectInspector.getListElementObjectInspector().getCategory()) {
case STRUCT:
structObjectInspector = (StructObjectInspector) listObjectInspector.getListElementObjectInspector();
break;
default:
throw new UDFArgumentTypeException(0, "Element[s] of first argument array in function SORT_ARRAY_BY must be " + serdeConstants.STRUCT_TYPE_NAME + ", but " + listObjectInspector.getTypeName() + " was found.");
}
/**All sort fields argument name and sort order name must be in String type*/
converters = new Converter[arguments.length];
inputTypes = new PrimitiveCategory[arguments.length];
fields = new StructField[arguments.length - 1];
noOfInputFields = arguments.length - 1;
for (int i = 1; i < arguments.length; i++) {
checkArgPrimitive(arguments, i);
checkArgGroups(arguments, i, inputTypes, PrimitiveGrouping.STRING_GROUP);
if (arguments[i] instanceof ConstantObjectInspector) {
String fieldName = getConstantStringValue(arguments, i);
/**checking whether any sorting order (ASC,DESC) has specified in last argument*/
if (i != 1 && (i == arguments.length - 1) && (fieldName.trim().toUpperCase().equals(SORT_ORDER_TYPE.ASC.name()) || fieldName.trim().toUpperCase().equals(SORT_ORDER_TYPE.DESC.name()))) {
sortOrder = SORT_ORDER_TYPE.valueOf(fieldName.trim().toUpperCase());
noOfInputFields -= 1;
continue;
}
fields[i - 1] = structObjectInspector.getStructFieldRef(getConstantStringValue(arguments, i));
}
obtainStringConverter(arguments, i, inputTypes, converters);
}
ObjectInspector returnOI = returnOIResolver.get(structObjectInspector);
converters[0] = ObjectInspectorConverters.getConverter(structObjectInspector, returnOI);
return ObjectInspectorFactory.getStandardListObjectInspector(structObjectInspector);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.
the class TestNewInputOutputFormat method testNewOutputFormatComplex.
@SuppressWarnings("unchecked")
@Test
public //Test outputformat with complex data type, and with reduce
void testNewOutputFormatComplex() throws Exception {
Path inputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt");
Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
localFs.delete(outputPath, true);
PrintWriter pw = new PrintWriter(new OutputStreamWriter(localFs.create(inputPath)));
pw.println("I have eaten");
pw.println("the plums");
pw.println("that were in");
pw.println("the icebox");
pw.println("and which");
pw.println("you were probably");
pw.println("saving");
pw.println("for breakfast");
pw.println("Forgive me");
pw.println("they were delicious");
pw.println("so sweet");
pw.println("and so cold");
pw.close();
Job job = new Job(conf, "orc test");
job.setOutputFormatClass(OrcNewOutputFormat.class);
job.setJarByClass(TestNewInputOutputFormat.class);
job.setMapperClass(OrcTestMapper3.class);
job.setReducerClass(OrcTestReducer3.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(OrcSerdeRow.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
boolean result = job.waitForCompletion(true);
assertTrue(result);
Path outputFilePath = new Path(outputPath, "part-r-00000");
Reader reader = OrcFile.createReader(outputFilePath, OrcFile.readerOptions(conf).filesystem(localFs));
RecordReader rows = reader.rows();
ObjectInspector orcOi = reader.getObjectInspector();
ObjectInspector stoi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(OrcTestReducer3.typeInfo);
ObjectInspectorConverters.Converter converter = ObjectInspectorConverters.getConverter(orcOi, stoi);
Object row = rows.next(null);
List<Object> converted = (List<Object>) converter.convert(row);
assertEquals(1, converted.get(0));
assertEquals(1, converted.get(1));
List<Object> list = (List<Object>) converted.get(2);
assertEquals(list.size(), 1);
assertEquals("saving", ((List<Object>) list.get(0)).get(0));
assertEquals(6, ((List<Object>) list.get(0)).get(1));
Map<String, Integer> map = (Map<String, Integer>) converted.get(3);
assertEquals(map.size(), 1);
assertEquals(map.get("saving"), new Integer(1));
row = rows.next(null);
converted = (List<Object>) converter.convert(row);
assertEquals(2, converted.get(0));
assertEquals(6, converted.get(1));
list = (List<Object>) converted.get(2);
assertEquals(list.size(), 6);
assertEquals("breakfast", ((List<Object>) list.get(0)).get(0));
assertEquals(9, ((List<Object>) list.get(0)).get(1));
map = (Map<String, Integer>) converted.get(3);
assertEquals(map.size(), 11);
assertEquals(map.get("the"), new Integer(2));
row = rows.next(null);
converted = (List<Object>) converter.convert(row);
assertEquals(3, converted.get(0));
assertEquals(5, converted.get(1));
list = (List<Object>) converted.get(2);
assertEquals(list.size(), 5);
assertEquals("cold", ((List<Object>) list.get(0)).get(0));
assertEquals(4, ((List<Object>) list.get(0)).get(1));
map = (Map<String, Integer>) converted.get(3);
assertEquals(map.size(), 13);
assertEquals(map.get("were"), new Integer(3));
assertFalse(rows.hasNext());
localFs.delete(outputPath, true);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.
the class DDLTask method alterTableAlterPart.
/**
* Alter partition column type in a table
*
* @param db
* Database to rename the partition.
* @param alterPartitionDesc
* change partition column type.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
*/
private int alterTableAlterPart(Hive db, AlterTableAlterPartDesc alterPartitionDesc) throws HiveException {
Table tbl = db.getTable(alterPartitionDesc.getTableName(), true);
String tabName = alterPartitionDesc.getTableName();
// This is checked by DDLSemanticAnalyzer
assert (tbl.isPartitioned());
List<FieldSchema> newPartitionKeys = new ArrayList<FieldSchema>();
// with a non null value before trying to alter the partition column type.
try {
Set<Partition> partitions = db.getAllPartitionsOf(tbl);
int colIndex = -1;
for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
colIndex++;
if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
break;
}
}
if (colIndex == -1 || colIndex == tbl.getTTable().getPartitionKeys().size()) {
throw new HiveException("Cannot find partition column " + alterPartitionDesc.getPartKeySpec().getName());
}
TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(alterPartitionDesc.getPartKeySpec().getType());
ObjectInspector outputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
// For all the existing partitions, check if the value can be type casted to a non-null object
for (Partition part : partitions) {
if (part.getName().equals(conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME))) {
continue;
}
try {
String value = part.getValues().get(colIndex);
Object convertedValue = converter.convert(value);
if (convertedValue == null) {
throw new HiveException(" Converting from " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + value + " resulted in NULL object");
}
} catch (Exception e) {
throw new HiveException("Exception while converting " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + part.getValues().get(colIndex));
}
}
} catch (Exception e) {
throw new HiveException("Exception while checking type conversion of existing partition values to " + alterPartitionDesc.getPartKeySpec() + " : " + e.getMessage());
}
for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
newPartitionKeys.add(alterPartitionDesc.getPartKeySpec());
} else {
newPartitionKeys.add(col);
}
}
tbl.getTTable().setPartitionKeys(newPartitionKeys);
try {
db.alterTable(tabName, tbl, null);
} catch (InvalidOperationException e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "Unable to alter " + tabName);
}
work.getInputs().add(new ReadEntity(tbl));
// We've already locked the table as the input, don't relock it as the output.
addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter in project hive by apache.
the class SparkDynamicPartitionPruner method prunePartitionSingleSource.
private void prunePartitionSingleSource(SourceInfo info, MapWork work) throws HiveException {
Set<Object> values = info.values;
String columnName = info.columnName;
ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(info.fieldInspector.getTypeName()));
ObjectInspectorConverters.Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
StructObjectInspector soi = ObjectInspectorFactory.getStandardStructObjectInspector(Collections.singletonList(columnName), Collections.singletonList(oi));
@SuppressWarnings("rawtypes") ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(info.partKey);
eval.initialize(soi);
applyFilterToPartitions(work, converter, eval, columnName, values);
}
Aggregations