Search in sources :

Example 1 with KuduPartitionScanAssignmentMeta

use of org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta in project apex-malhar by apache.

the class AbstractKuduInputOperator method filterTupleBasedOnCurrentState.

/**
 * Used to filter of tuples read from the Kudu scan if it is being replayed in the windows before the
 * reconciling window phase. THis method also invokes the filter check in case the current window is in the
 *  reconciling window phase
 * @param recordWithMeta
 */
protected void filterTupleBasedOnCurrentState(KuduRecordWithMeta<T> recordWithMeta) {
    boolean filter = false;
    if (isCurrentlyInSafeMode) {
        filter = true;
    }
    KuduPartitionScanAssignmentMeta currentRecordMeta = recordWithMeta.getTabletMetadata();
    long currentPositionInScan = recordWithMeta.getPositionInScan();
    if (windowManagerDataForScans.containsKey(currentRecordMeta)) {
        long counterLimitForThisMeta = windowManagerDataForScans.get(currentRecordMeta);
        if (currentPositionInScan <= counterLimitForThisMeta) {
            // This is the case of a replay and hence do not emit
            filter = true;
        } else {
            windowManagerDataForScans.put(currentRecordMeta, currentPositionInScan);
        }
    } else {
        windowManagerDataForScans.put(currentRecordMeta, currentPositionInScan);
    }
    if (isCurrentlyInReconcilingMode) {
        // check to see if we can emit based on the buisness logic in a reconciling window processing state
        if (!isAllowedInReconcilingWindow(recordWithMeta)) {
            filter = true;
        }
    }
    if (!filter) {
        outputPort.emit(recordWithMeta.getThePayload());
        currentWindowTupleCount += 1;
    }
}
Also used : KuduPartitionScanAssignmentMeta(org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta)

Example 2 with KuduPartitionScanAssignmentMeta

use of org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta in project apex-malhar by apache.

the class AbstractKuduInputOperator method initCurrentState.

/**
 * Used to read an existing state from a window data manager and initialize the starting state of this operator. Note
 * that there might be a query that was partially processed before the checkpointing process or the crash was
 *  triggered.
 */
private void initCurrentState() {
    Map<KuduPartitionScanAssignmentMeta, Long> savedState = null;
    if ((windowManagerDataForScans.size() == 0) && (currentQueryBeingProcessed == null)) {
        // This is the case of an application restart possibly and hence want to get the exact state
        try {
            savedState = (Map<KuduPartitionScanAssignmentMeta, Long>) windowDataManager.retrieve(windowDataManager.getLargestCompletedWindow());
        } catch (IOException e) {
            throw new RuntimeException("Error while retrieving the window manager data at the initialization phase", e);
        } catch (NullPointerException ex) {
            LOG.error("Error while getting the window manager data ", ex);
        }
    }
    if ((savedState != null) && (savedState.size() > 0)) {
        // we have one atleast
        KuduPartitionScanAssignmentMeta aMeta = savedState.keySet().iterator().next();
        currentQueryBeingProcessed = aMeta.getCurrentQuery();
        allScansCompleteForThisQuery = false;
        windowManagerDataForScans.putAll(savedState);
        processForQueryString(currentQueryBeingProcessed);
    }
}
Also used : KuduPartitionScanAssignmentMeta(org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta) IOException(java.io.IOException)

Example 3 with KuduPartitionScanAssignmentMeta

use of org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta in project apex-malhar by apache.

the class AbstractKuduInputOperator method processForEndScanMarker.

/**
 * Used to see if all the segments that are planned for the current query and the current physical instance of the
 * operator are done in terms of streaming tuples to the downstream operators.
 * @param entryFetchedFromBuffer
 */
protected void processForEndScanMarker(KuduRecordWithMeta<T> entryFetchedFromBuffer) {
    Boolean currentStatus = currentQueryCompletionStatus.get(entryFetchedFromBuffer.getTabletMetadata());
    if (currentStatus == null) {
        LOG.error(" End scan marker cannot be precede a Begin Scan marker ");
    }
    currentQueryCompletionStatus.put(entryFetchedFromBuffer.getTabletMetadata(), true);
    if (plannedSegmentsForCurrentQuery == 0) {
        allScansCompleteForThisQuery = true;
        return;
    }
    boolean areAllScansComplete = true;
    if (currentQueryCompletionStatus.size() != plannedSegmentsForCurrentQuery) {
        return;
    }
    for (KuduPartitionScanAssignmentMeta aMeta : currentQueryCompletionStatus.keySet()) {
        if (!currentQueryCompletionStatus.get(aMeta)) {
            areAllScansComplete = false;
        }
    }
    if (areAllScansComplete) {
        allScansCompleteForThisQuery = true;
    }
}
Also used : KuduPartitionScanAssignmentMeta(org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta)

Example 4 with KuduPartitionScanAssignmentMeta

use of org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta in project apex-malhar by apache.

the class AbstractKuduInputPartitioner method getListOfPartitionAssignments.

/**
 * Builds a list of scan assignment metadata instances from raw kudu scan tokens as returned by the Kudu Query planner
 *  assuming all of the columns and rows are to be scanned
 * @param partitions The current set of partitions
 * @param context The current partitioning context
 * @return The new set of partitions
 * @throws Exception if the Kudu connection opened for generating the scan plan cannot be closed
 */
public List<KuduPartitionScanAssignmentMeta> getListOfPartitionAssignments(Collection<Partition<AbstractKuduInputOperator>> partitions, PartitioningContext context) throws Exception {
    List<KuduPartitionScanAssignmentMeta> returnList = new ArrayList<>();
    List<KuduScanToken> allColumnsScanTokens = new ArrayList<>();
    // we are looking at a first time invocation scenario
    try {
        allColumnsScanTokens.addAll(getKuduScanTokensForSelectAllColumns());
    } catch (Exception e) {
        LOG.error(" Error while calculating the number of scan tokens for all column projections " + e.getMessage(), e);
    }
    if (allColumnsScanTokens.size() == 0) {
        LOG.error("No column information could be extracted from the Kudu table");
        throw new Exception("No column information could be extracted from the Kudu table");
    }
    int totalPartitionCount = allColumnsScanTokens.size();
    LOG.info("Determined maximum as " + totalPartitionCount + " tablets for this table");
    for (int i = 0; i < totalPartitionCount; i++) {
        KuduPartitionScanAssignmentMeta aMeta = new KuduPartitionScanAssignmentMeta();
        aMeta.setOrdinal(i);
        aMeta.setTotalSize(totalPartitionCount);
        returnList.add(aMeta);
        LOG.info("A planned scan meta of the total partitions " + aMeta);
    }
    LOG.info("Total kudu partition size is " + returnList.size());
    return returnList;
}
Also used : KuduScanToken(org.apache.kudu.client.KuduScanToken) ArrayList(java.util.ArrayList) KuduPartitionScanAssignmentMeta(org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta)

Aggregations

KuduPartitionScanAssignmentMeta (org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta)4 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 KuduScanToken (org.apache.kudu.client.KuduScanToken)1