use of org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta in project apex-malhar by apache.
the class AbstractKuduInputOperator method filterTupleBasedOnCurrentState.
/**
* Used to filter of tuples read from the Kudu scan if it is being replayed in the windows before the
* reconciling window phase. THis method also invokes the filter check in case the current window is in the
* reconciling window phase
* @param recordWithMeta
*/
protected void filterTupleBasedOnCurrentState(KuduRecordWithMeta<T> recordWithMeta) {
boolean filter = false;
if (isCurrentlyInSafeMode) {
filter = true;
}
KuduPartitionScanAssignmentMeta currentRecordMeta = recordWithMeta.getTabletMetadata();
long currentPositionInScan = recordWithMeta.getPositionInScan();
if (windowManagerDataForScans.containsKey(currentRecordMeta)) {
long counterLimitForThisMeta = windowManagerDataForScans.get(currentRecordMeta);
if (currentPositionInScan <= counterLimitForThisMeta) {
// This is the case of a replay and hence do not emit
filter = true;
} else {
windowManagerDataForScans.put(currentRecordMeta, currentPositionInScan);
}
} else {
windowManagerDataForScans.put(currentRecordMeta, currentPositionInScan);
}
if (isCurrentlyInReconcilingMode) {
// check to see if we can emit based on the buisness logic in a reconciling window processing state
if (!isAllowedInReconcilingWindow(recordWithMeta)) {
filter = true;
}
}
if (!filter) {
outputPort.emit(recordWithMeta.getThePayload());
currentWindowTupleCount += 1;
}
}
use of org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta in project apex-malhar by apache.
the class AbstractKuduInputOperator method initCurrentState.
/**
* Used to read an existing state from a window data manager and initialize the starting state of this operator. Note
* that there might be a query that was partially processed before the checkpointing process or the crash was
* triggered.
*/
private void initCurrentState() {
Map<KuduPartitionScanAssignmentMeta, Long> savedState = null;
if ((windowManagerDataForScans.size() == 0) && (currentQueryBeingProcessed == null)) {
// This is the case of an application restart possibly and hence want to get the exact state
try {
savedState = (Map<KuduPartitionScanAssignmentMeta, Long>) windowDataManager.retrieve(windowDataManager.getLargestCompletedWindow());
} catch (IOException e) {
throw new RuntimeException("Error while retrieving the window manager data at the initialization phase", e);
} catch (NullPointerException ex) {
LOG.error("Error while getting the window manager data ", ex);
}
}
if ((savedState != null) && (savedState.size() > 0)) {
// we have one atleast
KuduPartitionScanAssignmentMeta aMeta = savedState.keySet().iterator().next();
currentQueryBeingProcessed = aMeta.getCurrentQuery();
allScansCompleteForThisQuery = false;
windowManagerDataForScans.putAll(savedState);
processForQueryString(currentQueryBeingProcessed);
}
}
use of org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta in project apex-malhar by apache.
the class AbstractKuduInputOperator method processForEndScanMarker.
/**
* Used to see if all the segments that are planned for the current query and the current physical instance of the
* operator are done in terms of streaming tuples to the downstream operators.
* @param entryFetchedFromBuffer
*/
protected void processForEndScanMarker(KuduRecordWithMeta<T> entryFetchedFromBuffer) {
Boolean currentStatus = currentQueryCompletionStatus.get(entryFetchedFromBuffer.getTabletMetadata());
if (currentStatus == null) {
LOG.error(" End scan marker cannot be precede a Begin Scan marker ");
}
currentQueryCompletionStatus.put(entryFetchedFromBuffer.getTabletMetadata(), true);
if (plannedSegmentsForCurrentQuery == 0) {
allScansCompleteForThisQuery = true;
return;
}
boolean areAllScansComplete = true;
if (currentQueryCompletionStatus.size() != plannedSegmentsForCurrentQuery) {
return;
}
for (KuduPartitionScanAssignmentMeta aMeta : currentQueryCompletionStatus.keySet()) {
if (!currentQueryCompletionStatus.get(aMeta)) {
areAllScansComplete = false;
}
}
if (areAllScansComplete) {
allScansCompleteForThisQuery = true;
}
}
use of org.apache.apex.malhar.kudu.scanner.KuduPartitionScanAssignmentMeta in project apex-malhar by apache.
the class AbstractKuduInputPartitioner method getListOfPartitionAssignments.
/**
* Builds a list of scan assignment metadata instances from raw kudu scan tokens as returned by the Kudu Query planner
* assuming all of the columns and rows are to be scanned
* @param partitions The current set of partitions
* @param context The current partitioning context
* @return The new set of partitions
* @throws Exception if the Kudu connection opened for generating the scan plan cannot be closed
*/
public List<KuduPartitionScanAssignmentMeta> getListOfPartitionAssignments(Collection<Partition<AbstractKuduInputOperator>> partitions, PartitioningContext context) throws Exception {
List<KuduPartitionScanAssignmentMeta> returnList = new ArrayList<>();
List<KuduScanToken> allColumnsScanTokens = new ArrayList<>();
// we are looking at a first time invocation scenario
try {
allColumnsScanTokens.addAll(getKuduScanTokensForSelectAllColumns());
} catch (Exception e) {
LOG.error(" Error while calculating the number of scan tokens for all column projections " + e.getMessage(), e);
}
if (allColumnsScanTokens.size() == 0) {
LOG.error("No column information could be extracted from the Kudu table");
throw new Exception("No column information could be extracted from the Kudu table");
}
int totalPartitionCount = allColumnsScanTokens.size();
LOG.info("Determined maximum as " + totalPartitionCount + " tablets for this table");
for (int i = 0; i < totalPartitionCount; i++) {
KuduPartitionScanAssignmentMeta aMeta = new KuduPartitionScanAssignmentMeta();
aMeta.setOrdinal(i);
aMeta.setTotalSize(totalPartitionCount);
returnList.add(aMeta);
LOG.info("A planned scan meta of the total partitions " + aMeta);
}
LOG.info("Total kudu partition size is " + returnList.size());
return returnList;
}
Aggregations