use of org.apache.apex.malhar.kudu.ApexKuduConnection in project apex-malhar by apache.
the class AbstractKuduInputPartitioner method getKuduScanTokensForSelectAllColumns.
/**
* Builds a set of scan tokens. The list of scan tokens are generated as if the entire table is being scanned
* i.e. a SELECT * FROM TABLE equivalent expression. This list is used to assign the partition pie assignments
* for all of the planned partition of operators. Each operator gets a part of the PIE as if all columns were
* selected. Subsequently when a query is to be processed, the query is used to generate the scan tokens applicable
* for that query. Given that partition pie represents the entire data set, the scan assignments for the current
* query will be a subset.
* @return The list of scan tokens as if the entire table is getting scanned.
* @throws Exception in cases when the connection to kudu cluster cannot be closed.
*/
public List<KuduScanToken> getKuduScanTokensForSelectAllColumns() throws Exception {
// We are not using the current query for deciding the partition strategy but a SELECT * as
// we do not want to want to optimize on just the current query. This prevents rapid throttling of operator
// instances when the scan patterns are erratic. On the other hand, this might result on under utilized
// operator resources in the DAG but will be consistent at a minimum.
ApexKuduConnection apexKuduConnection = prototypeKuduInputOperator.getApexKuduConnectionInfo().build();
KuduClient clientHandle = apexKuduConnection.getKuduClient();
KuduTable table = apexKuduConnection.getKuduTable();
KuduScanToken.KuduScanTokenBuilder builder = clientHandle.newScanTokenBuilder(table);
List<String> allColumns = new ArrayList<>();
List<ColumnSchema> columnList = apexKuduConnection.getKuduTable().getSchema().getColumns();
for (ColumnSchema column : columnList) {
allColumns.add(column.getName());
}
builder.setProjectedColumnNames(allColumns);
LOG.debug("Building the partition pie assignments for the input operator");
List<KuduScanToken> allPossibleTokens = builder.build();
apexKuduConnection.close();
return allPossibleTokens;
}
use of org.apache.apex.malhar.kudu.ApexKuduConnection in project apex-malhar by apache.
the class AbstractKuduPartitionScanner method verifyConnectionStaleness.
/**
* Used to renew a connection in case it is dead. There can be scenarios when there is a continuous sequence of
* queries that do not touch a tablet resulting in inactivity on the kudu client session.
* @param indexPos The index position in the connection pool
* @return A renewed connection in case it was dead
*/
public ApexKuduConnection verifyConnectionStaleness(int indexPos) {
ApexKuduConnection apexKuduConnection = connectionPoolForThreads.get(indexPos);
checkNotNull(apexKuduConnection, "Null connection not expected while checking staleness of" + " existing connection");
if (apexKuduConnection.getKuduSession().isClosed()) {
try {
// closes the wrapper
apexKuduConnection.close();
} catch (Exception e) {
LOG.error(" Could not close a possibly stale kudu connection handle ", e);
}
LOG.info("Ripped the old kudu connection out and building a new connection for this scanner");
ApexKuduConnection newConnection = apexKuduConnection.getBuilderForThisConnection().build();
connectionPoolForThreads.put(indexPos, newConnection);
return newConnection;
} else {
return apexKuduConnection;
}
}
use of org.apache.apex.malhar.kudu.ApexKuduConnection in project apex-malhar by apache.
the class AbstractKuduPartitionScanner method preparePlanForScanners.
/**
* The main logic which takes the parsed in query and builds the Kudud scan tokens specific to this query.
* It makes sure that these scan tokens are sorted before the actual scan tokens that are to be executed in the
* current physical instance of the operator are shortlisted. Since the kudu scan taken builder gives the scan
* tokens for the query and does not differentiate between a distributed system and a single instance system, this
* method takes the plan as generated by the Kudu scan token builder and then chooses only those segments that were
* decided to be the responsibility of this operator at partitioning time.
* @param parsedQuery The parsed query instance
* @return A list of partition scan metadata objects that are applicable for this instance of the physical operator
* i.e. the operator owning this instance of the scanner.
* @throws IOException If the scan assignment cannot be serialized
*/
public List<KuduPartitionScanAssignmentMeta> preparePlanForScanners(SQLToKuduPredicatesTranslator parsedQuery) throws IOException {
List<KuduPredicate> predicateList = parsedQuery.getKuduSQLParseTreeListener().getKuduPredicateList();
// we will have atleast one connection
ApexKuduConnection apexKuduConnection = verifyConnectionStaleness(0);
KuduScanToken.KuduScanTokenBuilder builder = apexKuduConnection.getKuduClient().newScanTokenBuilder(apexKuduConnection.getKuduTable());
builder = builder.setProjectedColumnNames(new ArrayList<>(parsedQuery.getKuduSQLParseTreeListener().getListOfColumnsUsed()));
for (KuduPredicate aPredicate : predicateList) {
builder = builder.addPredicate(aPredicate);
}
builder.setFaultTolerant(parentOperator.isFaultTolerantScanner());
Map<String, String> optionsUsedForThisQuery = parentOperator.getOptionsEnabledForCurrentQuery();
if (optionsUsedForThisQuery.containsKey(KuduSQLParseTreeListener.READ_SNAPSHOT_TIME)) {
try {
long readSnapShotTime = Long.valueOf(optionsUsedForThisQuery.get(KuduSQLParseTreeListener.READ_SNAPSHOT_TIME));
builder = builder.readMode(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT);
builder = builder.snapshotTimestampMicros(readSnapShotTime);
LOG.info("Using read snapshot for this query as " + readSnapShotTime);
} catch (Exception ex) {
LOG.error("Cannot parse the Read snaptshot time " + ex.getMessage(), ex);
}
}
List<KuduScanToken> allPossibleScanTokens = builder.build();
// Make sure we deal with a sorted list of scan tokens
Collections.sort(// Make sure we deal with a sorted list of scan tokens
allPossibleScanTokens, new Comparator<KuduScanToken>() {
@Override
public int compare(KuduScanToken left, KuduScanToken right) {
return left.compareTo(right);
}
});
LOG.info(" Query will scan " + allPossibleScanTokens.size() + " tablets");
if (LOG.isDebugEnabled()) {
LOG.debug(" Predicates scheduled for this query are " + predicateList.size());
for (int i = 0; i < allPossibleScanTokens.size(); i++) {
LOG.debug("A tablet scheduled for all operators scanning is " + allPossibleScanTokens.get(i).getTablet());
}
}
List<KuduPartitionScanAssignmentMeta> partitionPieForThisOperator = parentOperator.getPartitionPieAssignment();
List<KuduPartitionScanAssignmentMeta> returnOfAssignments = new ArrayList<>();
int totalScansForThisQuery = allPossibleScanTokens.size();
int counterForPartAssignments = 0;
for (KuduPartitionScanAssignmentMeta aPartofThePie : partitionPieForThisOperator) {
if (aPartofThePie.getOrdinal() < totalScansForThisQuery) {
// a given query plan might have less scantokens
KuduPartitionScanAssignmentMeta aMetaForThisQuery = new KuduPartitionScanAssignmentMeta();
aMetaForThisQuery.setTotalSize(totalScansForThisQuery);
aMetaForThisQuery.setOrdinal(counterForPartAssignments);
counterForPartAssignments += 1;
aMetaForThisQuery.setCurrentQuery(parsedQuery.getSqlExpresssion());
// we pick up only those ordinals that are part of the original partition pie assignment
KuduScanToken aTokenForThisOperator = allPossibleScanTokens.get(aPartofThePie.getOrdinal());
aMetaForThisQuery.setSerializedKuduScanToken(aTokenForThisOperator.serialize());
returnOfAssignments.add(aMetaForThisQuery);
LOG.debug("Added query scan for this operator " + aMetaForThisQuery + " with scan tablet as " + allPossibleScanTokens.get(aPartofThePie.getOrdinal()).getTablet());
}
}
LOG.info(" A total of " + returnOfAssignments.size() + " have been scheduled for this operator");
return returnOfAssignments;
}
Aggregations