use of org.voltdb.CSVSnapshotFilter in project voltdb by VoltDB.
the class CSVSnapshotWritePlan method createSetup.
@Override
public Callable<Boolean> createSetup(String file_path, String pathType, String file_nonce, long txnId, Map<Integer, Long> partitionTransactionIds, JSONObject jsData, SystemProcedureExecutionContext context, final VoltTable result, ExtensibleSnapshotDigestData extraSnapshotData, SiteTracker tracker, HashinatorSnapshotData hashinatorData, long timestamp) {
assert (SnapshotSiteProcessor.ExecutionSitesCurrentlySnapshotting.isEmpty());
/*
* List of partitions to include if this snapshot is
* going to be deduped. Attempts to break up the work
* by seeding and RNG selecting
* a random replica to do the work. Will not work in failure
* cases, but we don't use dedupe when we want durability.
*/
List<Long> sitesToInclude = CSVSnapshotWritePlan.computeDedupedLocalSites(txnId, tracker);
// If there's no work to do on this host, just claim success and get out:
if (sitesToInclude.isEmpty() && !tracker.isFirstHost()) {
return null;
}
final SnapshotRequestConfig config = new SnapshotRequestConfig(jsData, context.getDatabase());
final AtomicInteger numTables = new AtomicInteger(config.tables.length);
final SnapshotRegistry.Snapshot snapshotRecord = SnapshotRegistry.startSnapshot(txnId, context.getHostId(), file_path, file_nonce, SnapshotFormat.CSV, config.tables);
boolean noTargetsCreated = true;
final ArrayList<SnapshotTableTask> partitionedSnapshotTasks = new ArrayList<SnapshotTableTask>();
final ArrayList<SnapshotTableTask> replicatedSnapshotTasks = new ArrayList<SnapshotTableTask>();
for (final Table table : config.tables) {
/*
* For a deduped csv snapshot, only produce the replicated tables on the "leader"
* host.
*/
if (table.getIsreplicated() && !tracker.isFirstHost()) {
snapshotRecord.removeTable(table.getTypeName());
// We'll expect one less table in the global table count
// in order to be done, too (ENG-4802)
numTables.decrementAndGet();
continue;
}
List<SnapshotDataFilter> filters = new ArrayList<SnapshotDataFilter>();
filters.add(new CSVSnapshotFilter(CatalogUtil.getVoltTable(table), ',', null));
final SnapshotTableTask task = new SnapshotTableTask(table, filters.toArray(new SnapshotDataFilter[filters.size()]), null, false);
if (table.getIsreplicated()) {
replicatedSnapshotTasks.add(task);
} else {
partitionedSnapshotTasks.add(task);
}
noTargetsCreated = false;
result.addRow(context.getHostId(), CoreUtils.getHostnameOrAddress(), table.getTypeName(), "SUCCESS", "");
}
if (noTargetsCreated) {
SnapshotRegistry.discardSnapshot(snapshotRecord);
}
// CSV snapshots do the partitioned work only on the specified sites for de-duping,
// but since we've pre-filtered the replicated task list to only contain entries on
// one node, we can go ahead and distribute them across all of the sites on that node.
placePartitionedTasks(partitionedSnapshotTasks, sitesToInclude);
placeReplicatedTasks(replicatedSnapshotTasks, tracker.getSitesForHost(context.getHostId()));
// All IO work will be deferred and be run on the dedicated snapshot IO thread
return createDeferredSetup(file_path, pathType, file_nonce, config.tables, txnId, partitionTransactionIds, context, extraSnapshotData, timestamp, numTables, snapshotRecord, partitionedSnapshotTasks, replicatedSnapshotTasks);
}
Aggregations