use of org.apache.drill.exec.store.hdf5.writers.WriterSpec in project drill by apache.
the class HDF5BatchReader method open.
@Override
public boolean open(FileSchemaNegotiator negotiator) {
split = negotiator.split();
errorContext = negotiator.parentErrorContext();
// Since the HDF file reader uses a stream to actually read the file, the file name from the
// module is incorrect.
fileName = split.getPath().getName();
try {
openFile(negotiator);
} catch (IOException e) {
throw UserException.dataReadError(e).addContext("Failed to close input file: %s", split.getPath()).addContext(errorContext).build(logger);
}
ResultSetLoader loader;
if (readerConfig.defaultPath == null) {
// Get file metadata
List<HDF5DrillMetadata> metadata = getFileMetadata(hdfFile, new ArrayList<>());
metadataIterator = metadata.iterator();
// Schema for Metadata query
SchemaBuilder builder = new SchemaBuilder().addNullable(PATH_COLUMN_NAME, MinorType.VARCHAR).addNullable(DATA_TYPE_COLUMN_NAME, MinorType.VARCHAR).addNullable(FILE_NAME_COLUMN_NAME, MinorType.VARCHAR).addNullable(DATA_SIZE_COLUMN_NAME, MinorType.BIGINT).addNullable(IS_LINK_COLUMN_NAME, MinorType.BIT).addNullable(ELEMENT_COUNT_NAME, MinorType.BIGINT).addNullable(DATASET_DATA_TYPE_NAME, MinorType.VARCHAR).addNullable(DIMENSIONS_FIELD_NAME, MinorType.VARCHAR);
negotiator.tableSchema(builder.buildSchema(), false);
loader = negotiator.build();
dimensions = new int[0];
rowWriter = loader.writer();
} else {
// This is the case when the default path is specified. Since the user is explicitly asking for a dataset
// Drill can obtain the schema by getting the datatypes below and ultimately mapping that schema to columns
Dataset dataSet = hdfFile.getDatasetByPath(readerConfig.defaultPath);
dimensions = dataSet.getDimensions();
loader = negotiator.build();
rowWriter = loader.writer();
writerSpec = new WriterSpec(rowWriter, negotiator.providedSchema(), negotiator.parentErrorContext());
if (dimensions.length <= 1) {
buildSchemaFor1DimensionalDataset(dataSet);
} else if (dimensions.length == 2) {
buildSchemaFor2DimensionalDataset(dataSet);
} else {
// Case for datasets of greater than 2D
// These are automatically flattened
buildSchemaFor2DimensionalDataset(dataSet);
}
}
if (readerConfig.defaultPath == null) {
pathWriter = rowWriter.scalar(PATH_COLUMN_NAME);
dataTypeWriter = rowWriter.scalar(DATA_TYPE_COLUMN_NAME);
fileNameWriter = rowWriter.scalar(FILE_NAME_COLUMN_NAME);
dataSizeWriter = rowWriter.scalar(DATA_SIZE_COLUMN_NAME);
linkWriter = rowWriter.scalar(IS_LINK_COLUMN_NAME);
elementCountWriter = rowWriter.scalar(ELEMENT_COUNT_NAME);
datasetTypeWriter = rowWriter.scalar(DATASET_DATA_TYPE_NAME);
dimensionsWriter = rowWriter.scalar(DIMENSIONS_FIELD_NAME);
}
return true;
}
use of org.apache.drill.exec.store.hdf5.writers.WriterSpec in project drill by apache.
the class HDF5BatchReader method buildSchemaFor2DimensionalDataset.
/**
* Builds a Drill schema from a dataset with 2 or more dimensions. HDF5 only
* supports INT, LONG, DOUBLE and FLOAT for >2 data types so this function is
* not as inclusive as the 1D function. This function will build the schema
* by adding DataWriters to the dataWriters array.
*
* @param dataset
* The dataset which Drill will use to build a schema
*/
private void buildSchemaFor2DimensionalDataset(Dataset dataset) {
MinorType currentDataType = HDF5Utils.getDataType(dataset.getDataType());
// Case for null or unknown data types:
if (currentDataType == null) {
logger.warn("Couldn't add {}", dataset.getJavaType().getName());
return;
}
long cols = dimensions[1];
String tempFieldName;
for (int i = 0; i < cols; i++) {
switch(currentDataType) {
case INT:
tempFieldName = INT_COLUMN_PREFIX + i;
dataWriters.add(new HDF5IntDataWriter(hdfFile, writerSpec, readerConfig.defaultPath, tempFieldName, i));
break;
case BIGINT:
tempFieldName = LONG_COLUMN_PREFIX + i;
dataWriters.add(new HDF5LongDataWriter(hdfFile, writerSpec, readerConfig.defaultPath, tempFieldName, i));
break;
case FLOAT8:
tempFieldName = DOUBLE_COLUMN_PREFIX + i;
dataWriters.add(new HDF5DoubleDataWriter(hdfFile, writerSpec, readerConfig.defaultPath, tempFieldName, i));
break;
case FLOAT4:
tempFieldName = FLOAT_COLUMN_PREFIX + i;
dataWriters.add(new HDF5FloatDataWriter(hdfFile, writerSpec, readerConfig.defaultPath, tempFieldName, i));
break;
default:
throw new UnsupportedOperationException(currentDataType.name());
}
}
}
Aggregations