Search in sources :

Example 51 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class ExampleMetadataHandlerTest method getPartitions.

@Test
public void getPartitions() throws Exception {
    if (!enableTests) {
        // We do this because until you complete the tutorial these tests will fail. When you attempt to publis
        // using ../toos/publish.sh ...  it will set the publishing flag and force these tests. This is how we
        // avoid breaking the build but still have a useful tutorial. We are also duplicateing this block
        // on purpose since this is a somewhat odd pattern.
        logger.info("getPartitions: Tests are disabled, to enable them set the 'publishing' environment variable " + "using maven clean install -Dpublishing=true");
        return;
    }
    logger.info("doGetTableLayout - enter");
    Schema tableSchema = SchemaBuilder.newBuilder().addIntField("day").addIntField("month").addIntField("year").build();
    Set<String> partitionCols = new HashSet<>();
    partitionCols.add("day");
    partitionCols.add("month");
    partitionCols.add("year");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("day", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 0)), false));
    constraintsMap.put("month", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 0)), false));
    constraintsMap.put("year", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 2000)), false));
    GetTableLayoutRequest req = null;
    GetTableLayoutResponse res = null;
    try {
        req = new GetTableLayoutRequest(fakeIdentity(), "queryId", "default", new TableName("schema1", "table1"), new Constraints(constraintsMap), tableSchema, partitionCols);
        res = handler.doGetTableLayout(allocator, req);
        logger.info("doGetTableLayout - {}", res);
        Block partitions = res.getPartitions();
        for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
            logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
        }
        assertTrue(partitions.getRowCount() > 0);
        logger.info("doGetTableLayout: partitions[{}]", partitions.getRowCount());
    } finally {
        try {
            req.close();
            res.close();
        } catch (Exception ex) {
            logger.error("doGetTableLayout: ", ex);
        }
    }
    logger.info("doGetTableLayout - exit");
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 52 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class ExampleRecordHandler method readWithConstraint.

/**
 * Here we generate our simulated row data. A real connector would instead connect to the actual source and read
 * the data corresponding to the requested split.
 *
 * @param spiller A BlockSpiller that should be used to write the row data associated with this Split.
 * The BlockSpiller automatically handles applying constraints, chunking the response, encrypting, and spilling to S3.
 * @param request The ReadRecordsRequest containing the split and other details about what to read.
 * @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
 */
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest request, QueryStatusChecker queryStatusChecker) {
    long startTime = System.currentTimeMillis();
    /**
     * It is important to try and throw any throttling events before writing data since Athena may not be able to
     * continue the query, due to consistency errors, if you throttle after writing data.
     */
    if (simulateThrottle > 0 && count++ % simulateThrottle == 0) {
        logger.info("readWithConstraint: throwing throttle Exception!");
        throw new FederationThrottleException("Please slow down for this simulated throttling event");
    }
    logCaller(request);
    Set<String> partitionCols = new HashSet<>();
    String partitionColsMetadata = request.getSchema().getCustomMetadata().get("partitionCols");
    if (partitionColsMetadata != null) {
        partitionCols.addAll(Arrays.asList(partitionColsMetadata.split(",")));
    }
    int year = Integer.valueOf(request.getSplit().getProperty("year"));
    int month = Integer.valueOf(request.getSplit().getProperty("month"));
    int day = Integer.valueOf(request.getSplit().getProperty("day"));
    final RowContext rowContext = new RowContext(year, month, day);
    GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(request.getConstraints());
    for (Field next : request.getSchema().getFields()) {
        Extractor extractor = makeExtractor(next, rowContext);
        if (extractor != null) {
            builder.withExtractor(next.getName(), extractor);
        } else {
            builder.withFieldWriterFactory(next.getName(), makeFactory(next, rowContext));
        }
    }
    GeneratedRowWriter rowWriter = builder.build();
    for (int i = 0; i < numRowsPerSplit; i++) {
        rowContext.seed = i;
        rowContext.negative = i % 2 == 0;
        if (!queryStatusChecker.isQueryRunning()) {
            return;
        }
        spiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, rowContext) ? 1 : 0);
    }
    logger.info("readWithConstraint: Completed generating rows in {} ms", System.currentTimeMillis() - startTime);
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) GeneratedRowWriter(com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter) Block(com.amazonaws.athena.connector.lambda.data.Block) FederationThrottleException(com.amazonaws.athena.connector.lambda.exceptions.FederationThrottleException) BigIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor) DecimalExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DecimalExtractor) DateDayExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateDayExtractor) TinyIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor) VarBinaryExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarBinaryExtractor) BitExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BitExtractor) IntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.IntExtractor) Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor) Float8Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float8Extractor) SmallIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor) VarCharExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarCharExtractor) Float4Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float4Extractor) DateMilliExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor) HashSet(java.util.HashSet)

Example 53 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class UserDefinedFunctionHandler method processRows.

/**
 * Processes a group by rows. This method takes in a block of data (containing multiple rows), process them and
 * returns multiple rows of the output column in a block.
 * <p>
 * UDF methods are invoked row-by-row in a for loop. Arrow values are converted to Java Objects and then passed into
 * the UDF java method. This is not very efficient because we might potentially be doing a lot of data copying.
 * Advanced users could choose to override this method and directly deal with Arrow data to achieve better
 * performance.
 *
 * @param allocator arrow memory allocator
 * @param udfMethod the extracted java method matching the User-Defined-Function defined in Athena.
 * @param inputRecords input data in Arrow format
 * @param outputSchema output data schema in Arrow format
 * @return output data in Arrow format
 */
protected Block processRows(BlockAllocator allocator, Method udfMethod, Block inputRecords, Schema outputSchema) throws Exception {
    int rowCount = inputRecords.getRowCount();
    List<ArrowValueProjector> valueProjectors = Lists.newArrayList();
    for (Field field : inputRecords.getFields()) {
        FieldReader fieldReader = inputRecords.getFieldReader(field.getName());
        ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
        valueProjectors.add(arrowValueProjector);
    }
    Field outputField = outputSchema.getFields().get(0);
    GeneratedRowWriter outputRowWriter = createOutputRowWriter(outputField, valueProjectors, udfMethod);
    Block outputRecords = allocator.createBlock(outputSchema);
    outputRecords.setRowCount(rowCount);
    try {
        for (int rowNum = 0; rowNum < rowCount; ++rowNum) {
            outputRowWriter.writeRow(outputRecords, rowNum, rowNum);
        }
    } catch (Throwable t) {
        try {
            outputRecords.close();
        } catch (Exception e) {
            logger.error("Error closing output block", e);
        }
        throw t;
    }
    return outputRecords;
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) GeneratedRowWriter(com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter) ArrowValueProjector(com.amazonaws.athena.connector.lambda.data.projectors.ArrowValueProjector) Block(com.amazonaws.athena.connector.lambda.data.Block) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 54 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class EquatableValueSet method subtract.

private static Block subtract(BlockAllocator allocator, EquatableValueSet left, EquatableValueSet right) {
    Block resultBlock = BlockUtils.newEmptyBlock(allocator, DEFAULT_COLUMN, left.getType());
    FieldVector result = resultBlock.getFieldVector(DEFAULT_COLUMN);
    Block lhsBlock = left.getValues();
    FieldReader lhs = lhsBlock.getFieldReader(DEFAULT_COLUMN);
    int count = 0;
    for (int i = 0; i < lhsBlock.getRowCount(); i++) {
        lhs.setPosition(i);
        if (!isPresent(lhs.readObject(), right.valueBlock)) {
            BlockUtils.setValue(result, count++, lhs.readObject());
        }
    }
    resultBlock.setRowCount(count);
    return resultBlock;
}
Also used : Block(com.amazonaws.athena.connector.lambda.data.Block) FieldVector(org.apache.arrow.vector.FieldVector) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader)

Example 55 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class EquatableValueSet method intersect.

private static Block intersect(BlockAllocator allocator, EquatableValueSet left, EquatableValueSet right) {
    Block resultBlock = BlockUtils.newEmptyBlock(allocator, DEFAULT_COLUMN, left.getType());
    FieldVector result = resultBlock.getFieldVector(DEFAULT_COLUMN);
    Block lhsBlock = left.getValues();
    FieldReader lhs = lhsBlock.getFieldReader(DEFAULT_COLUMN);
    int count = 0;
    for (int i = 0; i < lhsBlock.getRowCount(); i++) {
        lhs.setPosition(i);
        if (isPresent(lhs.readObject(), right.valueBlock)) {
            BlockUtils.setValue(result, count++, lhs.readObject());
        }
    }
    resultBlock.setRowCount(count);
    return resultBlock;
}
Also used : Block(com.amazonaws.athena.connector.lambda.data.Block) FieldVector(org.apache.arrow.vector.FieldVector) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11