use of org.apache.orc.TypeDescription in project hive by apache.
the class OrcInputFormat method genIncludedColumns.
public static boolean[] genIncludedColumns(TypeDescription readerSchema, List<Integer> included) {
boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
if (included == null) {
Arrays.fill(result, true);
return result;
}
result[0] = true;
List<TypeDescription> children = readerSchema.getChildren();
for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
if (included.contains(columnNumber)) {
TypeDescription child = children.get(columnNumber);
for (int col = child.getId(); col <= child.getMaximumId(); ++col) {
result[col] = true;
}
}
}
return result;
}
use of org.apache.orc.TypeDescription in project hive by apache.
the class OrcInputFormat method typeDescriptionsFromHiveTypeProperty.
/**
* Convert a Hive type property string that contains separated type names into a list of
* TypeDescription objects.
* @param hiveTypeProperty the desired types from hive
* @param maxColumns the maximum number of desired columns
* @return the list of TypeDescription objects.
*/
public static ArrayList<TypeDescription> typeDescriptionsFromHiveTypeProperty(String hiveTypeProperty, int maxColumns) {
// CONSDIER: We need a type name parser for TypeDescription.
ArrayList<TypeInfo> typeInfoList = TypeInfoUtils.getTypeInfosFromTypeString(hiveTypeProperty);
ArrayList<TypeDescription> typeDescrList = new ArrayList<TypeDescription>(typeInfoList.size());
for (TypeInfo typeInfo : typeInfoList) {
typeDescrList.add(convertTypeInfo(typeInfo));
if (typeDescrList.size() >= maxColumns) {
break;
}
}
return typeDescrList;
}
use of org.apache.orc.TypeDescription in project hive by apache.
the class OrcOutputFormat method getOptions.
private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) {
OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf);
if (props != null) {
final String columnNameProperty = props.getProperty(IOConstants.COLUMNS);
final String columnTypeProperty = props.getProperty(IOConstants.COLUMNS_TYPES);
if (columnNameProperty != null && !columnNameProperty.isEmpty() && columnTypeProperty != null && !columnTypeProperty.isEmpty()) {
List<String> columnNames;
List<TypeInfo> columnTypes;
final String columnNameDelimiter = props.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? props.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
TypeDescription schema = TypeDescription.createStruct();
for (int i = 0; i < columnNames.size(); ++i) {
schema.addField(columnNames.get(i), OrcInputFormat.convertTypeInfo(columnTypes.get(i)));
}
if (LOG.isDebugEnabled()) {
LOG.debug("ORC schema = " + schema);
}
result.setSchema(schema);
}
}
return result;
}
use of org.apache.orc.TypeDescription in project hive by apache.
the class RecordReaderImpl method nextMap.
static HashMap<Object, Object> nextMap(ColumnVector vector, int row, TypeDescription schema, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
MapColumnVector map = (MapColumnVector) vector;
int length = (int) map.lengths[row];
int offset = (int) map.offsets[row];
TypeDescription keyType = schema.getChildren().get(0);
TypeDescription valueType = schema.getChildren().get(1);
HashMap<Object, Object> result;
if (previous == null || previous.getClass() != HashMap.class) {
result = new HashMap<Object, Object>(length);
} else {
result = (HashMap<Object, Object>) previous;
// I couldn't think of a good way to reuse the keys and value objects
// without even more allocations, so take the easy and safe approach.
result.clear();
}
for (int e = 0; e < length; ++e) {
result.put(nextValue(map.keys, e + offset, keyType, null), nextValue(map.values, e + offset, valueType, null));
}
return result;
} else {
return null;
}
}
use of org.apache.orc.TypeDescription in project hive by apache.
the class RecordReaderImpl method nextStruct.
static OrcStruct nextStruct(ColumnVector vector, int row, TypeDescription schema, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
OrcStruct result;
List<TypeDescription> childrenTypes = schema.getChildren();
int numChildren = childrenTypes.size();
if (previous == null || previous.getClass() != OrcStruct.class) {
result = new OrcStruct(numChildren);
} else {
result = (OrcStruct) previous;
result.setNumFields(numChildren);
}
StructColumnVector struct = (StructColumnVector) vector;
for (int f = 0; f < numChildren; ++f) {
result.setFieldValue(f, nextValue(struct.fields[f], row, childrenTypes.get(f), result.getFieldValue(f)));
}
return result;
} else {
return null;
}
}
Aggregations