Search in sources :

Example 6 with KuduScanToken

use of org.apache.kudu.client.KuduScanToken in project apex-malhar by apache.

the class KuduClientTestCommons method buildMockWiring.

public ApexKuduConnection buildMockWiring(AbstractKuduInputOperator abstractKuduInputOperator, int numScanTokens) throws Exception {
    ApexKuduConnection mockedConnectionHandle = PowerMockito.mock(ApexKuduConnection.class);
    ApexKuduConnection.ApexKuduConnectionBuilder mockedConnectionHandleBuilder = PowerMockito.mock(ApexKuduConnection.ApexKuduConnectionBuilder.class);
    KuduClient mockedClient = PowerMockito.mock(KuduClient.class);
    KuduSession mockedKuduSession = PowerMockito.mock(KuduSession.class);
    KuduTable mockedKuduTable = PowerMockito.mock(KuduTable.class);
    KuduScanToken.KuduScanTokenBuilder mockedScanTokenBuilder = PowerMockito.mock(KuduScanToken.KuduScanTokenBuilder.class);
    List<KuduScanToken> mockedScanTokens = new ArrayList<>();
    int scanTokensToBuild = numScanTokens;
    for (int i = 0; i < scanTokensToBuild; i++) {
        mockedScanTokens.add(PowerMockito.mock(KuduScanToken.class));
    }
    PowerMockito.mockStatic(KryoCloneUtils.class);
    when(KryoCloneUtils.cloneObject(abstractKuduInputOperator)).thenReturn(abstractKuduInputOperator);
    // wire the mocks
    when(abstractKuduInputOperator.getApexKuduConnectionInfo()).thenReturn(mockedConnectionHandleBuilder);
    when(mockedConnectionHandle.getKuduClient()).thenReturn(mockedClient);
    when(mockedClient.newSession()).thenReturn(mockedKuduSession);
    when(mockedConnectionHandle.getKuduTable()).thenReturn(mockedKuduTable);
    when(mockedConnectionHandle.getKuduSession()).thenReturn(mockedKuduSession);
    when(mockedConnectionHandle.getBuilderForThisConnection()).thenReturn(mockedConnectionHandleBuilder);
    when(mockedClient.openTable(tableName)).thenReturn(mockedKuduTable);
    when(mockedConnectionHandleBuilder.build()).thenReturn(mockedConnectionHandle);
    when(mockedKuduTable.getSchema()).thenReturn(schemaForUnitTests);
    when(mockedClient.newScanTokenBuilder(mockedKuduTable)).thenReturn(mockedScanTokenBuilder);
    when(mockedScanTokenBuilder.build()).thenReturn(mockedScanTokens);
    return mockedConnectionHandle;
}
Also used : KuduScanToken(org.apache.kudu.client.KuduScanToken) KuduSession(org.apache.kudu.client.KuduSession) KuduClient(org.apache.kudu.client.KuduClient) ArrayList(java.util.ArrayList) KuduTable(org.apache.kudu.client.KuduTable)

Example 7 with KuduScanToken

use of org.apache.kudu.client.KuduScanToken in project apex-malhar by apache.

the class AbstractKuduPartitionScanner method preparePlanForScanners.

/**
 * The main logic which takes the parsed in query and builds the Kudud scan tokens specific to this query.
 * It makes sure that these scan tokens are sorted before the actual scan tokens that are to be executed in the
 * current physical instance of the operator are shortlisted. Since the kudu scan taken builder gives the scan
 * tokens for the query and does not differentiate between a distributed system and a single instance system, this
 * method takes the plan as generated by the Kudu scan token builder and then chooses only those segments that were
 * decided to be the responsibility of this operator at partitioning time.
 * @param parsedQuery The parsed query instance
 * @return A list of partition scan metadata objects that are applicable for this instance of the physical operator
 * i.e. the operator owning this instance of the scanner.
 * @throws IOException If the scan assignment cannot be serialized
 */
public List<KuduPartitionScanAssignmentMeta> preparePlanForScanners(SQLToKuduPredicatesTranslator parsedQuery) throws IOException {
    List<KuduPredicate> predicateList = parsedQuery.getKuduSQLParseTreeListener().getKuduPredicateList();
    // we will have atleast one connection
    ApexKuduConnection apexKuduConnection = verifyConnectionStaleness(0);
    KuduScanToken.KuduScanTokenBuilder builder = apexKuduConnection.getKuduClient().newScanTokenBuilder(apexKuduConnection.getKuduTable());
    builder = builder.setProjectedColumnNames(new ArrayList<>(parsedQuery.getKuduSQLParseTreeListener().getListOfColumnsUsed()));
    for (KuduPredicate aPredicate : predicateList) {
        builder = builder.addPredicate(aPredicate);
    }
    builder.setFaultTolerant(parentOperator.isFaultTolerantScanner());
    Map<String, String> optionsUsedForThisQuery = parentOperator.getOptionsEnabledForCurrentQuery();
    if (optionsUsedForThisQuery.containsKey(KuduSQLParseTreeListener.READ_SNAPSHOT_TIME)) {
        try {
            long readSnapShotTime = Long.valueOf(optionsUsedForThisQuery.get(KuduSQLParseTreeListener.READ_SNAPSHOT_TIME));
            builder = builder.readMode(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT);
            builder = builder.snapshotTimestampMicros(readSnapShotTime);
            LOG.info("Using read snapshot for this query as " + readSnapShotTime);
        } catch (Exception ex) {
            LOG.error("Cannot parse the Read snaptshot time " + ex.getMessage(), ex);
        }
    }
    List<KuduScanToken> allPossibleScanTokens = builder.build();
    // Make sure we deal with a sorted list of scan tokens
    Collections.sort(// Make sure we deal with a sorted list of scan tokens
    allPossibleScanTokens, new Comparator<KuduScanToken>() {

        @Override
        public int compare(KuduScanToken left, KuduScanToken right) {
            return left.compareTo(right);
        }
    });
    LOG.info(" Query will scan " + allPossibleScanTokens.size() + " tablets");
    if (LOG.isDebugEnabled()) {
        LOG.debug(" Predicates scheduled for this query are " + predicateList.size());
        for (int i = 0; i < allPossibleScanTokens.size(); i++) {
            LOG.debug("A tablet scheduled for all operators scanning is " + allPossibleScanTokens.get(i).getTablet());
        }
    }
    List<KuduPartitionScanAssignmentMeta> partitionPieForThisOperator = parentOperator.getPartitionPieAssignment();
    List<KuduPartitionScanAssignmentMeta> returnOfAssignments = new ArrayList<>();
    int totalScansForThisQuery = allPossibleScanTokens.size();
    int counterForPartAssignments = 0;
    for (KuduPartitionScanAssignmentMeta aPartofThePie : partitionPieForThisOperator) {
        if (aPartofThePie.getOrdinal() < totalScansForThisQuery) {
            // a given query plan might have less scantokens
            KuduPartitionScanAssignmentMeta aMetaForThisQuery = new KuduPartitionScanAssignmentMeta();
            aMetaForThisQuery.setTotalSize(totalScansForThisQuery);
            aMetaForThisQuery.setOrdinal(counterForPartAssignments);
            counterForPartAssignments += 1;
            aMetaForThisQuery.setCurrentQuery(parsedQuery.getSqlExpresssion());
            // we pick up only those ordinals that are part of the original partition pie assignment
            KuduScanToken aTokenForThisOperator = allPossibleScanTokens.get(aPartofThePie.getOrdinal());
            aMetaForThisQuery.setSerializedKuduScanToken(aTokenForThisOperator.serialize());
            returnOfAssignments.add(aMetaForThisQuery);
            LOG.debug("Added query scan for this operator " + aMetaForThisQuery + " with scan tablet as " + allPossibleScanTokens.get(aPartofThePie.getOrdinal()).getTablet());
        }
    }
    LOG.info(" A total of " + returnOfAssignments.size() + " have been scheduled for this operator");
    return returnOfAssignments;
}
Also used : KuduScanToken(org.apache.kudu.client.KuduScanToken) ApexKuduConnection(org.apache.apex.malhar.kudu.ApexKuduConnection) ArrayList(java.util.ArrayList) KuduPredicate(org.apache.kudu.client.KuduPredicate) IOException(java.io.IOException)

Example 8 with KuduScanToken

use of org.apache.kudu.client.KuduScanToken in project apex-malhar by apache.

the class AbstractKuduInputPartitioner method getListOfPartitionAssignments.

/**
 * Builds a list of scan assignment metadata instances from raw kudu scan tokens as returned by the Kudu Query planner
 *  assuming all of the columns and rows are to be scanned
 * @param partitions The current set of partitions
 * @param context The current partitioning context
 * @return The new set of partitions
 * @throws Exception if the Kudu connection opened for generating the scan plan cannot be closed
 */
public List<KuduPartitionScanAssignmentMeta> getListOfPartitionAssignments(Collection<Partition<AbstractKuduInputOperator>> partitions, PartitioningContext context) throws Exception {
    List<KuduPartitionScanAssignmentMeta> returnList = new ArrayList<>();
    List<KuduScanToken> allColumnsScanTokens = new ArrayList<>();
    // we are looking at a first time invocation scenario
    try {
        allColumnsScanTokens.addAll(getKuduScanTokensForSelectAllColumns());
    } catch (Exception e) {
        LOG.error(" Error while calculating the number of scan tokens for all column projections " + e.getMessage(), e);
    }
    if (allColumnsScanTokens.size() == 0) {
        LOG.error("No column information could be extracted from the Kudu table");
        throw new Exception("No column information could be extracted from the Kudu table");
    }
    int totalPartitionCount = allColumnsScanTokens.size();
    LOG.info("Determined maximum as " + totalPartitionCount + " tablets for this table");
    for (int i = 0; i < totalPartitionCount; i++) {
        KuduPartitionScanAssignmentMeta aMeta = new KuduPartitionScanAssignmentMeta();
        aMeta.setOrdinal(i);
        aMeta.setTotalSize(totalPartitionCount);
        returnList.add(aMeta);
        LOG.info("A planned scan meta of the total partitions " + aMeta);
    }
    LOG.info("Total kudu partition size is " + returnList.size());
    return returnList;
}
Also used : KuduScanToken(org.apache.kudu.client.KuduScanToken) ArrayList(java.util.ArrayList) KuduPartitionScanAssignmentMeta(org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta)

Aggregations

KuduScanToken (org.apache.kudu.client.KuduScanToken)8 ArrayList (java.util.ArrayList)6 KuduTable (org.apache.kudu.client.KuduTable)6 KuduClient (org.apache.kudu.client.KuduClient)5 KuduPredicate (org.apache.kudu.client.KuduPredicate)4 KuduSession (org.apache.kudu.client.KuduSession)4 IOException (java.io.IOException)3 KuduScanner (org.apache.kudu.client.KuduScanner)3 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 ColumnSchema (org.apache.kudu.ColumnSchema)2 Schema (org.apache.kudu.Schema)2 KuduException (org.apache.kudu.client.KuduException)2 RowResult (org.apache.kudu.client.RowResult)2 RowResultIterator (org.apache.kudu.client.RowResultIterator)2 Logger (com.facebook.airlift.log.Logger)1 DiscreteValues (com.facebook.presto.common.predicate.DiscreteValues)1 Domain (com.facebook.presto.common.predicate.Domain)1 EquatableValueSet (com.facebook.presto.common.predicate.EquatableValueSet)1 Marker (com.facebook.presto.common.predicate.Marker)1