Search in sources :

Example 1 with ArrowWriter

use of org.apache.arrow.vector.ipc.ArrowWriter in project conquery by bakdata.

the class ResultArrowProcessor method getArrowResult.

public static <E extends ManagedExecution<?> & SingleTableResult> Response getArrowResult(Function<OutputStream, Function<VectorSchemaRoot, ArrowWriter>> writerProducer, Subject subject, E exec, Dataset dataset, DatasetRegistry datasetRegistry, boolean pretty, String fileExtension, MediaType mediaType, ConqueryConfig config) {
    final Namespace namespace = datasetRegistry.get(dataset.getId());
    ConqueryMDC.setLocation(subject.getName());
    log.info("Downloading results for {} on dataset {}", exec, dataset);
    subject.authorize(dataset, Ability.READ);
    subject.authorize(dataset, Ability.DOWNLOAD);
    subject.authorize(exec, Ability.READ);
    // Check if subject is permitted to download on all datasets that were referenced by the query
    authorizeDownloadDatasets(subject, exec);
    if (!(exec instanceof ManagedQuery || (exec instanceof ManagedForm && ((ManagedForm) exec).getSubQueries().size() == 1))) {
        return Response.status(HttpStatus.SC_UNPROCESSABLE_ENTITY, "Execution result is not a single Table").build();
    }
    // Get the locale extracted by the LocaleFilter
    IdPrinter idPrinter = config.getFrontend().getQueryUpload().getIdPrinter(subject, exec, namespace);
    final Locale locale = I18n.LOCALE.get();
    PrintSettings settings = new PrintSettings(pretty, locale, datasetRegistry, config, idPrinter::createId);
    // Collect ResultInfos for id columns and result columns
    final List<ResultInfo> resultInfosId = config.getFrontend().getQueryUpload().getIdResultInfos();
    final List<ResultInfo> resultInfosExec = exec.getResultInfos();
    StreamingOutput out = output -> renderToStream(writerProducer.apply(output), settings, config.getArrow().getBatchSize(), resultInfosId, resultInfosExec, exec.streamResults());
    return makeResponseWithFileName(out, exec.getLabelWithoutAutoLabelSuffix(), fileExtension, mediaType, ResultUtil.ContentDispositionOption.ATTACHMENT);
}
Also used : IdPrinter(com.bakdata.conquery.models.identifiable.mapping.IdPrinter) Locale(java.util.Locale) ManagedQuery(com.bakdata.conquery.models.query.ManagedQuery) ConqueryConfig(com.bakdata.conquery.models.config.ConqueryConfig) ManagedForm(com.bakdata.conquery.models.forms.managed.ManagedForm) Subject(com.bakdata.conquery.models.auth.entities.Subject) AuthorizationHelper.authorizeDownloadDatasets(com.bakdata.conquery.models.auth.AuthorizationHelper.authorizeDownloadDatasets) HttpStatus(org.apache.http.HttpStatus) Function(java.util.function.Function) ArrayList(java.util.ArrayList) PrintSettings(com.bakdata.conquery.models.query.PrintSettings) UtilityClass(lombok.experimental.UtilityClass) MediaType(javax.ws.rs.core.MediaType) ManagedExecution(com.bakdata.conquery.models.execution.ManagedExecution) ArrowWriter(org.apache.arrow.vector.ipc.ArrowWriter) Locale(java.util.Locale) ArrowRenderer.renderToStream(com.bakdata.conquery.io.result.arrow.ArrowRenderer.renderToStream) I18n(com.bakdata.conquery.models.i18n.I18n) IdPrinter(com.bakdata.conquery.models.identifiable.mapping.IdPrinter) ResultInfo(com.bakdata.conquery.models.query.resultinfo.ResultInfo) OutputStream(java.io.OutputStream) ResultUtil(com.bakdata.conquery.io.result.ResultUtil) ConqueryMDC(com.bakdata.conquery.util.io.ConqueryMDC) ResultUtil.makeResponseWithFileName(com.bakdata.conquery.io.result.ResultUtil.makeResponseWithFileName) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) StreamingOutput(javax.ws.rs.core.StreamingOutput) SingleTableResult(com.bakdata.conquery.models.query.SingleTableResult) Dataset(com.bakdata.conquery.models.datasets.Dataset) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) Response(javax.ws.rs.core.Response) Ability(com.bakdata.conquery.models.auth.permissions.Ability) DatasetRegistry(com.bakdata.conquery.models.worker.DatasetRegistry) Namespace(com.bakdata.conquery.models.worker.Namespace) ManagedForm(com.bakdata.conquery.models.forms.managed.ManagedForm) PrintSettings(com.bakdata.conquery.models.query.PrintSettings) StreamingOutput(javax.ws.rs.core.StreamingOutput) ManagedQuery(com.bakdata.conquery.models.query.ManagedQuery) ResultInfo(com.bakdata.conquery.models.query.resultinfo.ResultInfo) Namespace(com.bakdata.conquery.models.worker.Namespace)

Example 2 with ArrowWriter

use of org.apache.arrow.vector.ipc.ArrowWriter in project conquery by bakdata.

the class ArrowRenderer method renderToStream.

public static void renderToStream(Function<VectorSchemaRoot, ArrowWriter> writerProducer, PrintSettings printSettings, int batchSize, List<ResultInfo> idHeaders, List<ResultInfo> resultInfo, Stream<EntityResult> results) throws IOException {
    // Combine id and value Fields to one vector to build a schema
    final UniqueNamer uniqNamer = new UniqueNamer(printSettings);
    final List<Field> idFields = generateFields(idHeaders, uniqNamer);
    List<Field> fields = new ArrayList<>(idFields);
    fields.addAll(generateFields(resultInfo, uniqNamer));
    VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(fields, null), ROOT_ALLOCATOR);
    // Build separate pipelines for id and value, as they have different sources but the same target
    RowConsumer[] idWriters = generateWriterPipeline(root, 0, idHeaders.size(), printSettings, null);
    RowConsumer[] valueWriter = generateWriterPipeline(root, idHeaders.size(), resultInfo.size(), printSettings, resultInfo);
    // Write the data
    try (ArrowWriter writer = writerProducer.apply(root)) {
        write(writer, root, idWriters, valueWriter, printSettings.getIdMapper(), results, batchSize);
    }
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowWriter(org.apache.arrow.vector.ipc.ArrowWriter) UniqueNamer(com.bakdata.conquery.models.query.resultinfo.UniqueNamer)

Example 3 with ArrowWriter

use of org.apache.arrow.vector.ipc.ArrowWriter in project snowflake-jdbc by snowflakedb.

the class SFArrowResultSetIT method createArrowFile.

private File createArrowFile(String fileName, Schema schema, Object[][] data, int rowsPerRecordBatch) throws IOException {
    File file = resultFolder.newFile(fileName);
    VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
    try (ArrowWriter writer = new ArrowStreamWriter(root, new DictionaryProvider.MapDictionaryProvider(), new FileOutputStream(file))) {
        writer.start();
        for (int i = 0; i < data[0].length; ) {
            int rowsToAppend = Math.min(rowsPerRecordBatch, data[0].length - i);
            root.setRowCount(rowsToAppend);
            for (int j = 0; j < data.length; j++) {
                FieldVector vector = root.getFieldVectors().get(j);
                switch(vector.getMinorType()) {
                    case INT:
                        writeIntToField(vector, data[j], i, rowsToAppend);
                        break;
                }
            }
            writer.writeBatch();
            i += rowsToAppend;
        }
    }
    return file;
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) DictionaryProvider(org.apache.arrow.vector.dictionary.DictionaryProvider) FileOutputStream(java.io.FileOutputStream) ArrowWriter(org.apache.arrow.vector.ipc.ArrowWriter) FieldVector(org.apache.arrow.vector.FieldVector) File(java.io.File) ArrowStreamWriter(org.apache.arrow.vector.ipc.ArrowStreamWriter)

Aggregations

VectorSchemaRoot (org.apache.arrow.vector.VectorSchemaRoot)3 ArrowWriter (org.apache.arrow.vector.ipc.ArrowWriter)3 ResultUtil (com.bakdata.conquery.io.result.ResultUtil)1 ResultUtil.makeResponseWithFileName (com.bakdata.conquery.io.result.ResultUtil.makeResponseWithFileName)1 ArrowRenderer.renderToStream (com.bakdata.conquery.io.result.arrow.ArrowRenderer.renderToStream)1 AuthorizationHelper.authorizeDownloadDatasets (com.bakdata.conquery.models.auth.AuthorizationHelper.authorizeDownloadDatasets)1 Subject (com.bakdata.conquery.models.auth.entities.Subject)1 Ability (com.bakdata.conquery.models.auth.permissions.Ability)1 ConqueryConfig (com.bakdata.conquery.models.config.ConqueryConfig)1 Dataset (com.bakdata.conquery.models.datasets.Dataset)1 ManagedExecution (com.bakdata.conquery.models.execution.ManagedExecution)1 ManagedForm (com.bakdata.conquery.models.forms.managed.ManagedForm)1 I18n (com.bakdata.conquery.models.i18n.I18n)1 IdPrinter (com.bakdata.conquery.models.identifiable.mapping.IdPrinter)1 ManagedQuery (com.bakdata.conquery.models.query.ManagedQuery)1 PrintSettings (com.bakdata.conquery.models.query.PrintSettings)1 SingleTableResult (com.bakdata.conquery.models.query.SingleTableResult)1 ResultInfo (com.bakdata.conquery.models.query.resultinfo.ResultInfo)1 UniqueNamer (com.bakdata.conquery.models.query.resultinfo.UniqueNamer)1 DatasetRegistry (com.bakdata.conquery.models.worker.DatasetRegistry)1