use of org.apache.apex.malhar.lib.wal.WindowDataManager in project apex-malhar by apache.
the class TestNiFiInputApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
final SiteToSiteClientConfig clientConfig = new SiteToSiteClient.Builder().url("http://localhost:8080/nifi").portName("Apex").requestBatchCount(5).buildConfig();
final SiteToSiteClient.Builder builder = new SiteToSiteClient.Builder().fromConfig(clientConfig);
final WindowDataManager windowDataManager = new WindowDataManager.NoopWindowDataManager();
NiFiSinglePortInputOperator nifi = dag.addOperator("nifi", new NiFiSinglePortInputOperator(builder, windowDataManager));
ConsoleOutputOperator console = dag.addOperator("console", new ConsoleOutputOperator());
dag.addStream("nifi_console", nifi.outputPort, console.input).setLocality(null);
}
use of org.apache.apex.malhar.lib.wal.WindowDataManager in project apex-malhar by apache.
the class AbstractFileInputOperator method definePartitions.
@Override
public Collection<Partition<AbstractFileInputOperator<T>>> definePartitions(Collection<Partition<AbstractFileInputOperator<T>>> partitions, PartitioningContext context) {
lastRepartition = System.currentTimeMillis();
int totalCount = getNewPartitionCount(partitions, context);
LOG.debug("Computed new partitions: {}", totalCount);
if (totalCount == partitions.size()) {
return partitions;
}
AbstractFileInputOperator<T> tempOperator = partitions.iterator().next().getPartitionedInstance();
MutableLong tempGlobalNumberOfRetries = tempOperator.globalNumberOfRetries;
MutableLong tempGlobalNumberOfFailures = tempOperator.globalNumberOfRetries;
/*
* Build collective state from all instances of the operator.
*/
Set<String> totalProcessedFiles = Sets.newHashSet();
Set<FailedFile> currentFiles = Sets.newHashSet();
List<DirectoryScanner> oldscanners = Lists.newLinkedList();
List<FailedFile> totalFailedFiles = Lists.newLinkedList();
List<String> totalPendingFiles = Lists.newLinkedList();
Set<Integer> deletedOperators = Sets.newHashSet();
for (Partition<AbstractFileInputOperator<T>> partition : partitions) {
AbstractFileInputOperator<T> oper = partition.getPartitionedInstance();
totalProcessedFiles.addAll(oper.processedFiles);
totalFailedFiles.addAll(oper.failedFiles);
totalPendingFiles.addAll(oper.pendingFiles);
currentFiles.addAll(unfinishedFiles);
tempGlobalNumberOfRetries.add(oper.localNumberOfRetries);
tempGlobalNumberOfFailures.add(oper.localNumberOfFailures);
if (oper.currentFile != null) {
currentFiles.add(new FailedFile(oper.currentFile, oper.offset));
}
oldscanners.add(oper.getScanner());
deletedOperators.add(oper.operatorId);
}
/*
* Create partitions of scanners, scanner's partition method will do state
* transfer for DirectoryScanner objects.
*/
List<DirectoryScanner> scanners = scanner.partition(totalCount, oldscanners);
Collection<Partition<AbstractFileInputOperator<T>>> newPartitions = Lists.newArrayListWithExpectedSize(totalCount);
List<WindowDataManager> newManagers = windowDataManager.partition(totalCount, deletedOperators);
KryoCloneUtils<AbstractFileInputOperator<T>> cloneUtils = KryoCloneUtils.createCloneUtils(this);
for (int i = 0; i < scanners.size(); i++) {
@SuppressWarnings("unchecked") AbstractFileInputOperator<T> oper = cloneUtils.getClone();
DirectoryScanner scn = scanners.get(i);
oper.setScanner(scn);
// Do state transfer for processed files.
oper.processedFiles.addAll(totalProcessedFiles);
oper.globalNumberOfFailures = tempGlobalNumberOfRetries;
oper.localNumberOfFailures.setValue(0);
oper.globalNumberOfRetries = tempGlobalNumberOfFailures;
oper.localNumberOfRetries.setValue(0);
/* redistribute unfinished files properly */
oper.unfinishedFiles.clear();
oper.currentFile = null;
oper.offset = 0;
Iterator<FailedFile> unfinishedIter = currentFiles.iterator();
while (unfinishedIter.hasNext()) {
FailedFile unfinishedFile = unfinishedIter.next();
if (scn.acceptFile(unfinishedFile.path)) {
oper.unfinishedFiles.add(unfinishedFile);
unfinishedIter.remove();
}
}
/* transfer failed files */
oper.failedFiles.clear();
Iterator<FailedFile> iter = totalFailedFiles.iterator();
while (iter.hasNext()) {
FailedFile ff = iter.next();
if (scn.acceptFile(ff.path)) {
oper.failedFiles.add(ff);
iter.remove();
}
}
/* redistribute pending files properly */
oper.pendingFiles.clear();
Iterator<String> pendingFilesIterator = totalPendingFiles.iterator();
while (pendingFilesIterator.hasNext()) {
String pathString = pendingFilesIterator.next();
if (scn.acceptFile(pathString)) {
oper.pendingFiles.add(pathString);
pendingFilesIterator.remove();
}
}
oper.setWindowDataManager(newManagers.get(i));
newPartitions.add(new DefaultPartition<AbstractFileInputOperator<T>>(oper));
}
LOG.info("definePartitions called returning {} partitions", newPartitions.size());
return newPartitions;
}
use of org.apache.apex.malhar.lib.wal.WindowDataManager in project apex-malhar by apache.
the class AbstractKafkaInputOperator method definePartitions.
@Override
public Collection<Partitioner.Partition<AbstractKafkaInputOperator<K>>> definePartitions(Collection<Partitioner.Partition<AbstractKafkaInputOperator<K>>> partitions, Partitioner.PartitioningContext context) {
// Initialize brokers from zookeepers
getConsumer().initBrokers();
boolean isInitialParitition = true;
// check if it's the initial partition
if (partitions.iterator().hasNext()) {
isInitialParitition = partitions.iterator().next().getStats() == null;
}
// Operator partitions
List<Partitioner.Partition<AbstractKafkaInputOperator<K>>> newPartitions = null;
// initialize the offset
Map<KafkaPartition, Long> initOffset = null;
if (isInitialParitition && offsetManager != null) {
initOffset = offsetManager.loadInitialOffsets();
logger.info("Initial offsets: {} ", "{ " + Joiner.on(", ").useForNull("").withKeyValueSeparator(": ").join(initOffset) + " }");
}
Set<Integer> deletedOperators = Sets.newHashSet();
Collection<Partition<AbstractKafkaInputOperator<K>>> resultPartitions = partitions;
boolean numPartitionsChanged = false;
switch(strategy) {
// Each operator partition will consume from only one kafka partition
case ONE_TO_ONE:
if (isInitialParitition) {
lastRepartitionTime = System.currentTimeMillis();
logger.info("[ONE_TO_ONE]: Initializing partition(s)");
// get partition metadata for topics.
// Whatever operator is using high-level or simple kafka consumer, the operator always create a temporary simple kafka consumer to get the metadata of the topic
// The initial value of brokerList of the KafkaConsumer is used to retrieve the topic metadata
Map<String, List<PartitionMetadata>> kafkaPartitions = KafkaMetadataUtil.getPartitionsForTopic(getConsumer().brokers, getConsumer().getTopic());
// initialize the number of operator partitions according to number of kafka partitions
newPartitions = new LinkedList<Partitioner.Partition<AbstractKafkaInputOperator<K>>>();
for (Map.Entry<String, List<PartitionMetadata>> kp : kafkaPartitions.entrySet()) {
String clusterId = kp.getKey();
for (PartitionMetadata pm : kp.getValue()) {
logger.info("[ONE_TO_ONE]: Create operator partition for cluster {}, topic {}, kafka partition {} ", clusterId, getConsumer().topic, pm.partitionId());
newPartitions.add(createPartition(Sets.newHashSet(new KafkaPartition(clusterId, consumer.topic, pm.partitionId())), initOffset));
}
}
resultPartitions = newPartitions;
numPartitionsChanged = true;
} else if (newWaitingPartition.size() != 0) {
// add partition for new kafka partition
for (KafkaPartition newPartition : newWaitingPartition) {
logger.info("[ONE_TO_ONE]: Add operator partition for cluster {}, topic {}, partition {}", newPartition.getClusterId(), getConsumer().topic, newPartition.getPartitionId());
partitions.add(createPartition(Sets.newHashSet(newPartition), null));
}
newWaitingPartition.clear();
resultPartitions = partitions;
numPartitionsChanged = true;
}
break;
// and guarantee the total intake rate for each operator partition is below some threshold
case ONE_TO_MANY:
if (getConsumer() instanceof HighlevelKafkaConsumer) {
throw new UnsupportedOperationException("[ONE_TO_MANY]: The high-level consumer is not supported for ONE_TO_MANY partition strategy.");
}
if (isInitialParitition || newWaitingPartition.size() != 0) {
lastRepartitionTime = System.currentTimeMillis();
logger.info("[ONE_TO_MANY]: Initializing partition(s)");
// get partition metadata for topics.
// Whatever operator is using high-level or simple kafka consumer, the operator always create a temporary simple kafka consumer to get the metadata of the topic
// The initial value of brokerList of the KafkaConsumer is used to retrieve the topic metadata
Map<String, List<PartitionMetadata>> kafkaPartitions = KafkaMetadataUtil.getPartitionsForTopic(getConsumer().brokers, getConsumer().getTopic());
int size = initialPartitionCount;
@SuppressWarnings("unchecked") Set<KafkaPartition>[] kps = (Set<KafkaPartition>[]) Array.newInstance((new HashSet<KafkaPartition>()).getClass(), size);
int i = 0;
for (Map.Entry<String, List<PartitionMetadata>> en : kafkaPartitions.entrySet()) {
String clusterId = en.getKey();
for (PartitionMetadata pm : en.getValue()) {
if (kps[i % size] == null) {
kps[i % size] = new HashSet<KafkaPartition>();
}
kps[i % size].add(new KafkaPartition(clusterId, consumer.topic, pm.partitionId()));
i++;
}
}
size = i > size ? size : i;
newPartitions = new ArrayList<Partitioner.Partition<AbstractKafkaInputOperator<K>>>(size);
for (i = 0; i < size; i++) {
logger.info("[ONE_TO_MANY]: Create operator partition for kafka partition(s): {} ", StringUtils.join(kps[i], ", "));
newPartitions.add(createPartition(kps[i], initOffset));
}
// Add the existing partition Ids to the deleted operators
for (Partition<AbstractKafkaInputOperator<K>> op : partitions) {
deletedOperators.add(op.getPartitionedInstance().operatorId);
}
newWaitingPartition.clear();
resultPartitions = newPartitions;
numPartitionsChanged = true;
}
break;
case ONE_TO_MANY_HEURISTIC:
throw new UnsupportedOperationException("[ONE_TO_MANY_HEURISTIC]: Not implemented yet");
default:
break;
}
if (numPartitionsChanged) {
List<WindowDataManager> managers = windowDataManager.partition(resultPartitions.size(), deletedOperators);
int i = 0;
for (Partition<AbstractKafkaInputOperator<K>> partition : resultPartitions) {
partition.getPartitionedInstance().setWindowDataManager(managers.get(i++));
}
}
return resultPartitions;
}
use of org.apache.apex.malhar.lib.wal.WindowDataManager in project apex-malhar by apache.
the class TestNiFiOutputApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
final SiteToSiteClientConfig clientConfig = new SiteToSiteClient.Builder().url("http://localhost:8080/nifi").portName("Apex").buildConfig();
final int batchSize = 1;
final SiteToSiteClient.Builder builder = new SiteToSiteClient.Builder().fromConfig(clientConfig);
final NiFiDataPacketBuilder<String> dataPacketBuilder = new StringNiFiDataPacketBuilder();
final WindowDataManager windowDataManager = new WindowDataManager.NoopWindowDataManager();
RandomEventGenerator rand = dag.addOperator("rand", new RandomEventGenerator());
NiFiSinglePortOutputOperator nifi = dag.addOperator("nifi", new NiFiSinglePortOutputOperator(builder, dataPacketBuilder, windowDataManager, batchSize));
dag.addStream("rand_nifi", rand.string_data, nifi.inputPort).setLocality(null);
}
use of org.apache.apex.malhar.lib.wal.WindowDataManager in project apex-malhar by apache.
the class AbstractKinesisInputOperator method definePartitions.
@Override
public Collection<Partition<AbstractKinesisInputOperator>> definePartitions(Collection<Partition<AbstractKinesisInputOperator>> partitions, PartitioningContext context) {
boolean isInitialParitition = partitions.iterator().next().getStats() == null;
// Set the credentials to get the list of shards
if (isInitialParitition) {
try {
KinesisUtil.getInstance().createKinesisClient(accessKey, secretKey, endPoint);
} catch (Exception e) {
throw new RuntimeException("[definePartitions]: Unable to load credentials. ", e);
}
}
List<Shard> shards = KinesisUtil.getInstance().getShardList(getStreamName());
// Operator partitions
List<Partition<AbstractKinesisInputOperator>> newPartitions = null;
Set<Integer> deletedOperators = Sets.newHashSet();
// initialize the shard positions
Map<String, String> initShardPos = null;
if (isInitialParitition && shardManager != null) {
initShardPos = shardManager.loadInitialShardPositions();
}
switch(strategy) {
// Each operator partition will consume from only one kinesis shard
case ONE_TO_ONE:
if (isInitialParitition) {
lastRepartitionTime = System.currentTimeMillis();
logger.info("[ONE_TO_ONE]: Initializing partition(s)");
// initialize the number of operator partitions according to number of shards
newPartitions = new ArrayList<Partition<AbstractKinesisInputOperator>>(shards.size());
for (int i = 0; i < shards.size(); i++) {
logger.info("[ONE_TO_ONE]: Create operator partition for kinesis partition: " + shards.get(i).getShardId() + ", StreamName: " + this.getConsumer().streamName);
newPartitions.add(createPartition(Sets.newHashSet(shards.get(i).getShardId()), initShardPos));
}
} else if (newWaitingPartition.size() != 0) {
// Remove the partitions for the closed shards
removePartitionsForClosedShards(partitions, deletedOperators);
// add partition for new kinesis shard
for (String pid : newWaitingPartition) {
logger.info("[ONE_TO_ONE]: Add operator partition for kinesis partition " + pid);
partitions.add(createPartition(Sets.newHashSet(pid), null));
}
newWaitingPartition.clear();
List<WindowDataManager> managers = windowDataManager.partition(partitions.size(), deletedOperators);
int i = 0;
for (Partition<AbstractKinesisInputOperator> partition : partitions) {
partition.getPartitionedInstance().setWindowDataManager(managers.get(i));
i++;
}
return partitions;
}
break;
// Afterwards, the framework will dynamically adjust the partition
case MANY_TO_ONE:
/* This case was handled into two ways.
1. Dynamic Partition: Number of DT partitions is depends on the number of open shards.
2. Static Partition: Number of DT partitions is fixed, whether the number of shards are increased/decreased.
*/
int size = initialPartitionCount;
if (newWaitingPartition.size() != 0) {
// Get the list of open shards
shards = getOpenShards(partitions);
if (shardsPerPartition > 1) {
size = (int) Math.ceil(shards.size() / (shardsPerPartition * 1.0));
}
initShardPos = shardManager.loadInitialShardPositions();
}
@SuppressWarnings("unchecked") Set<String>[] pIds = (Set<String>[]) Array.newInstance((new HashSet<String>()).getClass(), size);
newPartitions = new ArrayList<Partition<AbstractKinesisInputOperator>>(size);
for (int i = 0; i < shards.size(); i++) {
Shard pm = shards.get(i);
if (pIds[i % size] == null) {
pIds[i % size] = new HashSet<String>();
}
pIds[i % size].add(pm.getShardId());
}
if (isInitialParitition) {
lastRepartitionTime = System.currentTimeMillis();
logger.info("[MANY_TO_ONE]: Initializing partition(s)");
} else {
logger.info("[MANY_TO_ONE]: Add operator partition for kinesis partition(s): " + StringUtils.join(newWaitingPartition, ", ") + ", StreamName: " + this.getConsumer().streamName);
newWaitingPartition.clear();
}
// Add the existing partition Ids to the deleted operators
for (Partition<AbstractKinesisInputOperator> op : partitions) {
deletedOperators.add(op.getPartitionedInstance().operatorId);
}
for (int i = 0; i < pIds.length; i++) {
logger.info("[MANY_TO_ONE]: Create operator partition for kinesis partition(s): " + StringUtils.join(pIds[i], ", ") + ", StreamName: " + this.getConsumer().streamName);
if (pIds[i] != null) {
newPartitions.add(createPartition(pIds[i], initShardPos));
}
}
break;
default:
break;
}
int i = 0;
List<WindowDataManager> managers = windowDataManager.partition(partitions.size(), deletedOperators);
for (Partition<AbstractKinesisInputOperator> partition : partitions) {
partition.getPartitionedInstance().setWindowDataManager(managers.get(i++));
}
return newPartitions;
}
Aggregations