Search in sources :

Example 1 with CompoundDataMember

use of io.jhdf.object.datatype.CompoundDataType.CompoundDataMember in project drill by apache.

the class HDF5BatchReader method getAndMapCompoundData.

/**
 * Processes the MAP data type which can be found in HDF5 files.
 * It automatically flattens anything greater than 2 dimensions.
 *
 * @param path the HDF5 path tp the compound data
 * @param reader the HDF5 reader for the data file
 * @param rowWriter the rowWriter to write the data
 */
private void getAndMapCompoundData(String path, HdfFile reader, RowSetLoader rowWriter) {
    final String COMPOUND_DATA_FIELD_NAME = "compound_data";
    List<CompoundDataMember> data = ((CompoundDataType) reader.getDatasetByPath(path).getDataType()).getMembers();
    int index;
    // Add map to schema
    SchemaBuilder innerSchema = new SchemaBuilder();
    MapBuilder mapBuilder = innerSchema.addMap(COMPOUND_DATA_FIELD_NAME);
    // Loop to build schema
    for (CompoundDataMember dataMember : data) {
        String dataType = dataMember.getDataType().getJavaType().getName();
        String fieldName = dataMember.getName();
        switch(dataType) {
            case "byte":
                mapBuilder.add(fieldName, MinorType.TINYINT, DataMode.REPEATED);
                break;
            case "short":
                mapBuilder.add(fieldName, MinorType.SMALLINT, DataMode.REPEATED);
                break;
            case "int":
                mapBuilder.add(fieldName, MinorType.INT, DataMode.REPEATED);
                break;
            case "double":
                mapBuilder.add(fieldName, MinorType.FLOAT8, DataMode.REPEATED);
                break;
            case "float":
                mapBuilder.add(fieldName, MinorType.FLOAT4, DataMode.REPEATED);
                break;
            case "long":
                mapBuilder.add(fieldName, MinorType.BIGINT, DataMode.REPEATED);
                break;
            case "boolean":
                mapBuilder.add(fieldName, MinorType.BIT, DataMode.REPEATED);
                break;
            case "java.lang.String":
                mapBuilder.add(fieldName, MinorType.VARCHAR, DataMode.REPEATED);
                break;
            default:
                logger.warn("Drill cannot process data type {} in compound fields.", dataType);
                break;
        }
    }
    TupleMetadata finalInnerSchema = mapBuilder.resumeSchema().buildSchema();
    index = rowWriter.tupleSchema().index(COMPOUND_DATA_FIELD_NAME);
    if (index == -1) {
        index = rowWriter.addColumn(finalInnerSchema.column(COMPOUND_DATA_FIELD_NAME));
    }
    TupleWriter listWriter = rowWriter.column(index).tuple();
    for (CompoundDataMember dataMember : data) {
        String dataType = dataMember.getDataType().getJavaType().getName();
        String fieldName = dataMember.getName();
        int[] dataLength = reader.getDatasetByPath(path).getDimensions();
        Object rawData = ((LinkedHashMap<String, ?>) reader.getDatasetByPath(path).getData()).get(fieldName);
        ArrayWriter innerWriter = listWriter.array(fieldName);
        for (int i = 0; i < dataLength[0]; i++) {
            switch(dataType) {
                case "byte":
                    innerWriter.scalar().setInt(((byte[]) rawData)[i]);
                    break;
                case "short":
                    innerWriter.scalar().setInt(((short[]) rawData)[i]);
                    break;
                case "int":
                    innerWriter.scalar().setInt(((int[]) rawData)[i]);
                    break;
                case "double":
                    innerWriter.scalar().setDouble(((double[]) rawData)[i]);
                    break;
                case "float":
                    innerWriter.scalar().setFloat(((float[]) rawData)[i]);
                    break;
                case "long":
                    innerWriter.scalar().setLong(((long[]) rawData)[i]);
                    break;
                case "boolean":
                    innerWriter.scalar().setBoolean(((boolean[]) rawData)[i]);
                    break;
                case "java.lang.String":
                    if ((((String[]) rawData)[i]) != null) {
                        innerWriter.scalar().setString(((String[]) rawData)[i]);
                    } else {
                        innerWriter.scalar().setNull();
                    }
                    break;
                default:
                    logger.warn("Drill cannot process data type {} in compound fields.", dataType);
                    break;
            }
        }
    }
}
Also used : CompoundDataType(io.jhdf.object.datatype.CompoundDataType) CompoundDataMember(io.jhdf.object.datatype.CompoundDataType.CompoundDataMember) LinkedHashMap(java.util.LinkedHashMap) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MapBuilder(org.apache.drill.exec.record.metadata.MapBuilder) ArrayWriter(org.apache.drill.exec.vector.accessor.ArrayWriter)

Example 2 with CompoundDataMember

use of io.jhdf.object.datatype.CompoundDataType.CompoundDataMember in project drill by apache.

the class HDF5MapDataWriter method getDataWriters.

/**
 * Populates the ArrayList of DataWriters. Since HDF5 Maps contain homogeneous
 * columns, it is fine to get the first row, and iterate through the columns
 * to get the data types and build the schema accordingly.
 */
private void getDataWriters(WriterSpec writerSpec) {
    for (CompoundDataMember dataMember : data) {
        String dataType = dataMember.getDataType().getJavaType().getName();
        String fieldName = dataMember.getName();
        switch(dataType) {
            case "byte":
                dataWriters.add(new HDF5ByteDataWriter(reader, writerSpec, fieldName.replace(UNSAFE_SPACE_SEPARATOR, SAFE_SPACE_SEPARATOR), (byte[]) compoundData.get(fieldName)));
                break;
            case "short":
                dataWriters.add(new HDF5SmallIntDataWriter(reader, writerSpec, fieldName.replace(UNSAFE_SPACE_SEPARATOR, SAFE_SPACE_SEPARATOR), (short[]) compoundData.get(fieldName)));
                break;
            case "int":
                dataWriters.add(new HDF5IntDataWriter(reader, writerSpec, fieldName.replace(UNSAFE_SPACE_SEPARATOR, SAFE_SPACE_SEPARATOR), (int[]) compoundData.get(fieldName)));
                break;
            case "long":
                dataWriters.add(new HDF5LongDataWriter(reader, writerSpec, fieldName.replace(UNSAFE_SPACE_SEPARATOR, SAFE_SPACE_SEPARATOR), (long[]) compoundData.get(fieldName)));
                break;
            case "double":
                dataWriters.add(new HDF5DoubleDataWriter(reader, writerSpec, fieldName.replace(UNSAFE_SPACE_SEPARATOR, SAFE_SPACE_SEPARATOR), (double[]) compoundData.get(fieldName)));
                break;
            case "float":
                dataWriters.add(new HDF5FloatDataWriter(reader, writerSpec, fieldName.replace(UNSAFE_SPACE_SEPARATOR, SAFE_SPACE_SEPARATOR), (float[]) compoundData.get(fieldName)));
                break;
            case "java.lang.String":
                dataWriters.add(new HDF5StringDataWriter(reader, writerSpec, fieldName.replace(UNSAFE_SPACE_SEPARATOR, SAFE_SPACE_SEPARATOR), (String[]) compoundData.get(fieldName)));
                break;
            default:
                // Log unknown data type
                logger.warn("Drill cannot process data type {} in compound fields.", dataType);
                break;
        }
    }
}
Also used : CompoundDataMember(io.jhdf.object.datatype.CompoundDataType.CompoundDataMember)

Aggregations

CompoundDataMember (io.jhdf.object.datatype.CompoundDataType.CompoundDataMember)2 CompoundDataType (io.jhdf.object.datatype.CompoundDataType)1 LinkedHashMap (java.util.LinkedHashMap)1 MapBuilder (org.apache.drill.exec.record.metadata.MapBuilder)1 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)1 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)1 ArrayWriter (org.apache.drill.exec.vector.accessor.ArrayWriter)1 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)1