use of org.apache.kudu.client.KuduScanToken in project presto by prestodb.
the class KuduClientSession method buildKuduSplits.
public List<KuduSplit> buildKuduSplits(KuduTableLayoutHandle layoutHandle) {
reTryKerberos(kerberosAuthEnabled);
KuduTableHandle tableHandle = layoutHandle.getTableHandle();
KuduTable table = tableHandle.getTable(this);
final int primaryKeyColumnCount = table.getSchema().getPrimaryKeyColumnCount();
KuduScanToken.KuduScanTokenBuilder builder = client.newScanTokenBuilder(table);
TupleDomain<ColumnHandle> constraintSummary = layoutHandle.getConstraintSummary();
if (!addConstraintPredicates(table, builder, constraintSummary)) {
return ImmutableList.of();
}
Optional<Set<ColumnHandle>> desiredColumns = layoutHandle.getDesiredColumns();
if (desiredColumns.isPresent()) {
if (desiredColumns.get().contains(KuduColumnHandle.ROW_ID_HANDLE)) {
List<Integer> columnIndexes = IntStream.range(0, primaryKeyColumnCount).boxed().collect(Collectors.toList());
for (ColumnHandle columnHandle : desiredColumns.get()) {
if (columnHandle instanceof KuduColumnHandle) {
KuduColumnHandle k = (KuduColumnHandle) columnHandle;
int index = k.getOrdinalPosition();
if (index >= primaryKeyColumnCount) {
columnIndexes.add(index);
}
}
}
builder.setProjectedColumnIndexes(columnIndexes);
} else {
List<Integer> columnIndexes = desiredColumns.get().stream().map(handle -> ((KuduColumnHandle) handle).getOrdinalPosition()).collect(toImmutableList());
builder.setProjectedColumnIndexes(columnIndexes);
}
}
List<KuduScanToken> tokens = builder.build();
return tokens.stream().map(token -> toKuduSplit(tableHandle, token, primaryKeyColumnCount)).collect(toImmutableList());
}
use of org.apache.kudu.client.KuduScanToken in project beam by apache.
the class KuduServiceImpl method createTabletScanners.
@Override
public List<byte[]> createTabletScanners(KuduIO.Read spec) throws KuduException {
try (KuduClient client = getKuduClient(spec.getMasterAddresses())) {
KuduTable table = client.openTable(spec.getTable());
KuduScanToken.KuduScanTokenBuilder builder = client.newScanTokenBuilder(table);
configureBuilder(spec, table.getSchema(), builder);
List<KuduScanToken> tokens = builder.build();
return tokens.stream().map(t -> uncheckCall(t::serialize)).collect(Collectors.toList());
}
}
use of org.apache.kudu.client.KuduScanToken in project apex-malhar by apache.
the class AbstractKuduInputPartitioner method getKuduScanTokensForSelectAllColumns.
/**
* Builds a set of scan tokens. The list of scan tokens are generated as if the entire table is being scanned
* i.e. a SELECT * FROM TABLE equivalent expression. This list is used to assign the partition pie assignments
* for all of the planned partition of operators. Each operator gets a part of the PIE as if all columns were
* selected. Subsequently when a query is to be processed, the query is used to generate the scan tokens applicable
* for that query. Given that partition pie represents the entire data set, the scan assignments for the current
* query will be a subset.
* @return The list of scan tokens as if the entire table is getting scanned.
* @throws Exception in cases when the connection to kudu cluster cannot be closed.
*/
public List<KuduScanToken> getKuduScanTokensForSelectAllColumns() throws Exception {
// We are not using the current query for deciding the partition strategy but a SELECT * as
// we do not want to want to optimize on just the current query. This prevents rapid throttling of operator
// instances when the scan patterns are erratic. On the other hand, this might result on under utilized
// operator resources in the DAG but will be consistent at a minimum.
ApexKuduConnection apexKuduConnection = prototypeKuduInputOperator.getApexKuduConnectionInfo().build();
KuduClient clientHandle = apexKuduConnection.getKuduClient();
KuduTable table = apexKuduConnection.getKuduTable();
KuduScanToken.KuduScanTokenBuilder builder = clientHandle.newScanTokenBuilder(table);
List<String> allColumns = new ArrayList<>();
List<ColumnSchema> columnList = apexKuduConnection.getKuduTable().getSchema().getColumns();
for (ColumnSchema column : columnList) {
allColumns.add(column.getName());
}
builder.setProjectedColumnNames(allColumns);
LOG.debug("Building the partition pie assignments for the input operator");
List<KuduScanToken> allPossibleTokens = builder.build();
apexKuduConnection.close();
return allPossibleTokens;
}
use of org.apache.kudu.client.KuduScanToken in project apex-malhar by apache.
the class KuduInputOperatorCommons method truncateTable.
public void truncateTable() throws Exception {
AbstractKuduPartitionScanner<UnitTestTablePojo, InputOperatorControlTuple> scannerForDeletingRows = unitTestStepwiseScanInputOperator.getScanner();
List<KuduScanToken> scansForAllTablets = unitTestStepwiseScanInputOperator.getPartitioner().getKuduScanTokensForSelectAllColumns();
ApexKuduConnection aCurrentConnection = scannerForDeletingRows.getConnectionPoolForThreads().get(0);
KuduSession aSessionForDeletes = aCurrentConnection.getKuduClient().newSession();
KuduTable currentTable = aCurrentConnection.getKuduTable();
for (KuduScanToken aTabletScanToken : scansForAllTablets) {
KuduScanner aScanner = aTabletScanToken.intoScanner(aCurrentConnection.getKuduClient());
while (aScanner.hasMoreRows()) {
RowResultIterator itrForRows = aScanner.nextRows();
while (itrForRows.hasNext()) {
RowResult aRow = itrForRows.next();
int intRowKey = aRow.getInt("introwkey");
String stringRowKey = aRow.getString("stringrowkey");
long timestampRowKey = aRow.getLong("timestamprowkey");
Delete aDeleteOp = currentTable.newDelete();
aDeleteOp.getRow().addInt("introwkey", intRowKey);
aDeleteOp.getRow().addString("stringrowkey", stringRowKey);
aDeleteOp.getRow().addLong("timestamprowkey", timestampRowKey);
aSessionForDeletes.apply(aDeleteOp);
}
}
}
aSessionForDeletes.close();
// Sleep to allow for scans to complete
Thread.sleep(2000);
}
use of org.apache.kudu.client.KuduScanToken in project hive by apache.
the class KuduInputFormat method computeSplits.
private List<KuduInputSplit> computeSplits(Configuration conf) throws IOException {
try (KuduClient client = KuduHiveUtils.getKuduClient(conf)) {
// Hive depends on FileSplits so we get the dummy Path for the Splits.
Job job = Job.getInstance(conf);
JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
Path[] paths = FileInputFormat.getInputPaths(jobContext);
Path dummyPath = paths[0];
String tableName = conf.get(KUDU_TABLE_NAME_KEY);
if (StringUtils.isEmpty(tableName)) {
throw new IllegalArgumentException(KUDU_TABLE_NAME_KEY + " is not set.");
}
if (!client.tableExists(tableName)) {
throw new IllegalArgumentException("Kudu table does not exist: " + tableName);
}
KuduTable table = client.openTable(tableName);
List<KuduPredicate> predicates = KuduPredicateHandler.getPredicates(conf, table.getSchema());
KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table).setProjectedColumnNames(getProjectedColumns(conf));
for (KuduPredicate predicate : predicates) {
tokenBuilder.addPredicate(predicate);
}
List<KuduScanToken> tokens = tokenBuilder.build();
List<KuduInputSplit> splits = new ArrayList<>(tokens.size());
for (KuduScanToken token : tokens) {
List<String> locations = new ArrayList<>(token.getTablet().getReplicas().size());
for (LocatedTablet.Replica replica : token.getTablet().getReplicas()) {
locations.add(replica.getRpcHost());
}
splits.add(new KuduInputSplit(token, dummyPath, locations.toArray(new String[0])));
}
return splits;
}
}
Aggregations