Search in sources :

Example 1 with Datatype

use of io.tiledb.java.api.Datatype in project TileDB-Spark by TileDB-Inc.

the class TileDBDataReaderPartitionScanLegacy method allocateQuerybuffers.

private void allocateQuerybuffers(long readBufferSize) throws TileDBError {
    metricsUpdater.startTimer(queryAllocBufferTimerName);
    // Create coordinate buffers
    int minDimDize = Integer.MAX_VALUE;
    for (Dimension dimension : arraySchema.getDomain().getDimensions()) {
        int nativeSize = dimension.getType().getNativeSize();
        if (nativeSize < minDimDize)
            minDimDize = nativeSize;
    }
    int ncoords = Math.toIntExact(readBufferSize / minDimDize);
    // loop over all attributes and set the query buffers based on buffer size
    // the query object handles the lifetime of the allocated (offheap) NativeArrays
    int i = 0;
    for (String fieldName : fieldNames) {
        // get the spark column name and match to array schema
        String name = fieldName;
        Boolean isVar;
        Datatype type;
        if (domain.hasDimension(name)) {
            Dimension dim = domain.getDimension(name);
            type = dim.getType();
            isVar = dim.isVar();
        } else {
            Attribute attr = arraySchema.getAttribute(name);
            type = attr.getType();
            isVar = attr.isVar();
        }
        boolean nullable = false;
        if (this.arraySchema.hasAttribute(name)) {
            try (Attribute attr = this.arraySchema.getAttribute(name)) {
                nullable = attr.getNullable();
            }
        }
        int nvalues = Math.toIntExact(readBufferSize / type.getNativeSize());
        NativeArray data = new NativeArray(ctx, nvalues, type);
        // attribute is variable length, init the varlen result buffers using the est num offsets
        if (isVar) {
            int noffsets = Math.toIntExact(readBufferSize / TILEDB_UINT64.getNativeSize());
            NativeArray offsets = new NativeArray(ctx, noffsets, TILEDB_UINT64);
            if (nullable) {
                query.setBufferNullable(name, offsets, data, new NativeArray(ctx, nvalues, TILEDB_UINT8));
            } else {
                query.setBuffer(name, offsets, data);
            }
            queryBuffers.set(i++, new Pair<>(offsets, data));
        } else {
            // attribute is fixed length, use the result size estimate for allocation
            if (nullable) {
                query.setBufferNullable(name, new NativeArray(ctx, nvalues, type), new NativeArray(ctx, nvalues, TILEDB_UINT8));
            } else {
                query.setBuffer(name, new NativeArray(ctx, nvalues, type));
            }
            queryBuffers.set(i++, new Pair<>(null, data));
        }
    }
    // Allocate result set batch based on the estimated (upper bound) number of rows / cells
    resultVectors = OnHeapColumnVector.allocateColumns(ncoords, sparkSchema);
    resultBatch = new ColumnarBatch(resultVectors);
    metricsUpdater.finish(queryAllocBufferTimerName);
}
Also used : ColumnarBatch(org.apache.spark.sql.vectorized.ColumnarBatch) Datatype(io.tiledb.java.api.Datatype)

Example 2 with Datatype

use of io.tiledb.java.api.Datatype in project TileDB-Spark by TileDB-Inc.

the class TileDBPartitionReaderLegacy method getColumnBatch.

/**
 * For a given Spark field name, dispatch between attribute and dimension buffer copying
 *
 * @param field Spark field to copy query result set
 * @param index Spark field index in the projected schmema
 * @return number of values copied into the columnar batch result buffers
 * @throws TileDBError A TileDB exception
 */
private int getColumnBatch(StructField field, int index) throws TileDBError {
    String name = field.name();
    Datatype dataType;
    long cellValNum;
    boolean isVar;
    if (arraySchema.hasAttribute(name)) {
        Attribute attribute = arraySchema.getAttribute(name);
        dataType = attribute.getType();
        cellValNum = attribute.getCellValNum();
        isVar = attribute.isVar();
    } else if (domain.hasDimension(name)) {
        Dimension dimension = domain.getDimension(name);
        dataType = dimension.getType();
        cellValNum = dimension.getCellValNum();
        isVar = dimension.isVar();
    } else {
        throw new TileDBError("Array " + array.getUri() + " has no attribute/dimension with name " + name);
    }
    if (cellValNum > 1) {
        return getVarLengthAttributeColumn(name, dataType, isVar, cellValNum, index);
    } else {
        return getScalarValueColumn(name, dataType, index);
    }
}
Also used : Datatype(io.tiledb.java.api.Datatype)

Example 3 with Datatype

use of io.tiledb.java.api.Datatype in project TileDB-Spark by TileDB-Inc.

the class TileDBPartitionReaderLegacy method getDimensionColumn.

@Deprecated
private int getDimensionColumn(String name, int index) throws TileDBError {
    metricsUpdater.startTimer(queryGetDimensionTimerName);
    int bufferLength = 0;
    Dimension dim = domain.getDimension(name);
    Datatype type = dim.getType();
    int ndim = Math.toIntExact(domain.getNDim());
    // perform a strided copy for dimension columnar buffers startng a dimIdx offset (slow path)
    switch(type) {
        case TILEDB_FLOAT32:
            {
                float[] buffer = (float[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putFloats(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_FLOAT64:
            {
                double[] buffer = (double[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putDoubles(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_INT8:
            {
                byte[] buffer = (byte[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putBytes(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_INT16:
        case TILEDB_UINT8:
            {
                short[] buffer = (short[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putShorts(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_UINT16:
        case TILEDB_INT32:
            {
                int[] buffer = (int[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putInts(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_INT64:
        case TILEDB_UINT32:
        case TILEDB_UINT64:
        case TILEDB_DATETIME_MS:
            {
                long[] buffer = (long[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putLongs(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_DATETIME_DAY:
            {
                long[] buffer = (long[]) query.getBuffer(name);
                bufferLength = bufferLength / ndim;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putLongs(0, bufferLength, buffer, 0);
                }
                break;
            }
        default:
            {
                throw new TileDBError("Unsupported dimension type for domain " + domain.getType());
            }
    }
    metricsUpdater.finish(queryGetDimensionTimerName);
    return bufferLength;
}
Also used : Datatype(io.tiledb.java.api.Datatype)

Example 4 with Datatype

use of io.tiledb.java.api.Datatype in project TileDB-Spark by TileDB-Inc.

the class TileDBPartitionReaderLegacy method allocateQuerybuffers.

private void allocateQuerybuffers(long readBufferSize) throws TileDBError {
    metricsUpdater.startTimer(queryAllocBufferTimerName);
    // Create coordinate buffers
    int minDimDize = Integer.MAX_VALUE;
    for (Dimension dimension : arraySchema.getDomain().getDimensions()) {
        int nativeSize = dimension.getType().getNativeSize();
        if (nativeSize < minDimDize)
            minDimDize = nativeSize;
    }
    int ncoords = Math.toIntExact(readBufferSize / minDimDize);
    // loop over all attributes and set the query buffers based on buffer size
    // the query object handles the lifetime of the allocated (offheap) NativeArrays
    int i = 0;
    for (String fieldName : fieldNames) {
        // get the spark column name and match to array schema
        String name = fieldName;
        Boolean isVar;
        Datatype type;
        if (domain.hasDimension(name)) {
            Dimension dim = domain.getDimension(name);
            type = dim.getType();
            isVar = dim.isVar();
        } else {
            Attribute attr = arraySchema.getAttribute(name);
            type = attr.getType();
            isVar = attr.isVar();
        }
        boolean nullable = false;
        if (this.arraySchema.hasAttribute(name)) {
            try (Attribute attr = this.arraySchema.getAttribute(name)) {
                nullable = attr.getNullable();
            }
        }
        int nvalues = Math.toIntExact(readBufferSize / type.getNativeSize());
        NativeArray data = new NativeArray(ctx, nvalues, type);
        // attribute is variable length, init the varlen result buffers using the est num offsets
        if (isVar) {
            int noffsets = Math.toIntExact(readBufferSize / TILEDB_UINT64.getNativeSize());
            NativeArray offsets = new NativeArray(ctx, noffsets, TILEDB_UINT64);
            if (nullable) {
                query.setBufferNullable(name, offsets, data, new NativeArray(ctx, nvalues, TILEDB_UINT8));
            } else {
                query.setBuffer(name, offsets, data);
            }
            queryBuffers.set(i++, new Pair<>(offsets, data));
        } else {
            // attribute is fixed length, use the result size estimate for allocation
            if (nullable) {
                query.setBufferNullable(name, new NativeArray(ctx, nvalues, type), new NativeArray(ctx, nvalues, TILEDB_UINT8));
            } else {
                query.setBuffer(name, new NativeArray(ctx, nvalues, type));
            }
            queryBuffers.set(i++, new Pair<>(null, data));
        }
    }
    // Allocate result set batch based on the estimated (upper bound) number of rows / cells
    resultVectors = OnHeapColumnVector.allocateColumns(ncoords, sparkSchema);
    resultBatch = new ColumnarBatch(resultVectors);
    metricsUpdater.finish(queryAllocBufferTimerName);
}
Also used : ColumnarBatch(org.apache.spark.sql.vectorized.ColumnarBatch) Datatype(io.tiledb.java.api.Datatype)

Example 5 with Datatype

use of io.tiledb.java.api.Datatype in project TileDB-Spark by TileDB-Inc.

the class TileDBDataReaderPartitionScanLegacy method getDimensionColumn.

@Deprecated
private int getDimensionColumn(String name, int index) throws TileDBError {
    metricsUpdater.startTimer(queryGetDimensionTimerName);
    int bufferLength = 0;
    Dimension dim = domain.getDimension(name);
    Datatype type = dim.getType();
    int ndim = Math.toIntExact(domain.getNDim());
    // perform a strided copy for dimension columnar buffers startng a dimIdx offset (slow path)
    switch(type) {
        case TILEDB_FLOAT32:
            {
                float[] buffer = (float[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putFloats(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_FLOAT64:
            {
                double[] buffer = (double[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putDoubles(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_INT8:
            {
                byte[] buffer = (byte[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putBytes(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_INT16:
        case TILEDB_UINT8:
            {
                short[] buffer = (short[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putShorts(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_UINT16:
        case TILEDB_INT32:
            {
                int[] buffer = (int[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putInts(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_INT64:
        case TILEDB_UINT32:
        case TILEDB_UINT64:
        case TILEDB_DATETIME_MS:
            {
                long[] buffer = (long[]) query.getBuffer(name);
                bufferLength = buffer.length;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putLongs(0, bufferLength, buffer, 0);
                }
                break;
            }
        case TILEDB_DATETIME_DAY:
            {
                long[] buffer = (long[]) query.getBuffer(name);
                bufferLength = bufferLength / ndim;
                if (resultVectors.length > 0) {
                    resultVectors[index].reset();
                    resultVectors[index].putLongs(0, bufferLength, buffer, 0);
                }
                break;
            }
        default:
            {
                throw new TileDBError("Unsupported dimension type for domain " + domain.getType());
            }
    }
    metricsUpdater.finish(queryGetDimensionTimerName);
    return bufferLength;
}
Also used : Datatype(io.tiledb.java.api.Datatype)

Aggregations

Datatype (io.tiledb.java.api.Datatype)6 ColumnarBatch (org.apache.spark.sql.vectorized.ColumnarBatch)2