use of io.jhdf.object.datatype.CompoundDataType in project drill by apache.
the class HDF5BatchReader method getAndMapCompoundData.
/**
* Processes the MAP data type which can be found in HDF5 files.
* It automatically flattens anything greater than 2 dimensions.
*
* @param path the HDF5 path tp the compound data
* @param reader the HDF5 reader for the data file
* @param rowWriter the rowWriter to write the data
*/
private void getAndMapCompoundData(String path, HdfFile reader, RowSetLoader rowWriter) {
final String COMPOUND_DATA_FIELD_NAME = "compound_data";
List<CompoundDataMember> data = ((CompoundDataType) reader.getDatasetByPath(path).getDataType()).getMembers();
int index;
// Add map to schema
SchemaBuilder innerSchema = new SchemaBuilder();
MapBuilder mapBuilder = innerSchema.addMap(COMPOUND_DATA_FIELD_NAME);
// Loop to build schema
for (CompoundDataMember dataMember : data) {
String dataType = dataMember.getDataType().getJavaType().getName();
String fieldName = dataMember.getName();
switch(dataType) {
case "byte":
mapBuilder.add(fieldName, MinorType.TINYINT, DataMode.REPEATED);
break;
case "short":
mapBuilder.add(fieldName, MinorType.SMALLINT, DataMode.REPEATED);
break;
case "int":
mapBuilder.add(fieldName, MinorType.INT, DataMode.REPEATED);
break;
case "double":
mapBuilder.add(fieldName, MinorType.FLOAT8, DataMode.REPEATED);
break;
case "float":
mapBuilder.add(fieldName, MinorType.FLOAT4, DataMode.REPEATED);
break;
case "long":
mapBuilder.add(fieldName, MinorType.BIGINT, DataMode.REPEATED);
break;
case "boolean":
mapBuilder.add(fieldName, MinorType.BIT, DataMode.REPEATED);
break;
case "java.lang.String":
mapBuilder.add(fieldName, MinorType.VARCHAR, DataMode.REPEATED);
break;
default:
logger.warn("Drill cannot process data type {} in compound fields.", dataType);
break;
}
}
TupleMetadata finalInnerSchema = mapBuilder.resumeSchema().buildSchema();
index = rowWriter.tupleSchema().index(COMPOUND_DATA_FIELD_NAME);
if (index == -1) {
index = rowWriter.addColumn(finalInnerSchema.column(COMPOUND_DATA_FIELD_NAME));
}
TupleWriter listWriter = rowWriter.column(index).tuple();
for (CompoundDataMember dataMember : data) {
String dataType = dataMember.getDataType().getJavaType().getName();
String fieldName = dataMember.getName();
int[] dataLength = reader.getDatasetByPath(path).getDimensions();
Object rawData = ((LinkedHashMap<String, ?>) reader.getDatasetByPath(path).getData()).get(fieldName);
ArrayWriter innerWriter = listWriter.array(fieldName);
for (int i = 0; i < dataLength[0]; i++) {
switch(dataType) {
case "byte":
innerWriter.scalar().setInt(((byte[]) rawData)[i]);
break;
case "short":
innerWriter.scalar().setInt(((short[]) rawData)[i]);
break;
case "int":
innerWriter.scalar().setInt(((int[]) rawData)[i]);
break;
case "double":
innerWriter.scalar().setDouble(((double[]) rawData)[i]);
break;
case "float":
innerWriter.scalar().setFloat(((float[]) rawData)[i]);
break;
case "long":
innerWriter.scalar().setLong(((long[]) rawData)[i]);
break;
case "boolean":
innerWriter.scalar().setBoolean(((boolean[]) rawData)[i]);
break;
case "java.lang.String":
if ((((String[]) rawData)[i]) != null) {
innerWriter.scalar().setString(((String[]) rawData)[i]);
} else {
innerWriter.scalar().setNull();
}
break;
default:
logger.warn("Drill cannot process data type {} in compound fields.", dataType);
break;
}
}
}
}
Aggregations