use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class MasterSlaveIT method runMultipleCopiesOnMultipleSlaves.
/**
* This test reads a CSV file and sends the data to 3 copies on 3 slave servers.<br>
*/
public void runMultipleCopiesOnMultipleSlaves() throws Exception {
TransMeta transMeta = loadTransMetaReplaceSlavesInCluster(clusterGenerator, "test/org/pentaho/di/cluster/test-multiple-copies-on-multiple-slaves.ktr");
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
LogChannel logChannel = createLogChannel("cluster unit test <testMultipleCopiesOnMultipleSlaves>");
long nrErrors = Trans.monitorClusteredTransformation(logChannel, transSplitter, null, 1);
assertEquals(0L, nrErrors);
String result = loadFileContent(transMeta, "${java.io.tmpdir}/test-multiple-copies-on-multiple-slaves.txt");
assertEqualsIgnoreWhitespacesAndCase("100", result);
}
use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class PartitioningIT method testPartitioningRepartitioningOnCluster.
/**
* This test reads a CSV file in parallel on the cluster, one copy per slave.<br>
* It then partitions the data on id in 12 partitions (4 per slave).<br>
* After that it re-partitions the data in 9 partitions (3 per slave).<br>
* As such we expect 9 result files on disk.<br>
* File: "partitioning-repartitioning-on-cluster.ktr"<br>
*/
public void testPartitioningRepartitioningOnCluster() throws Exception {
init();
ClusterGenerator clusterGenerator = new ClusterGenerator();
try {
clusterGenerator.launchSlaveServers();
TransMeta transMeta = loadAndModifyTestTransformation(clusterGenerator, "src/it/resources/org/pentaho/di/cluster/partitioning-repartitioning-on-cluster.ktr");
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
long nrErrors = Trans.monitorClusteredTransformation(new LogChannel("cluster unit test <testParallelFileReadOnMaster>"), transSplitter, null, 1);
assertEquals(0L, nrErrors);
String[] results = new String[] { "8", "9", "9", "9", "9", "8", "8", "8", "8", "8", "8", "8" };
String[] files = new String[] { "000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010", "011" };
for (int i = 0; i < results.length; i++) {
String filename = "${java.io.tmpdir}/partitioning-repartitioning-on-cluster-" + files[i] + ".txt";
String result = loadFileContent(transMeta, filename);
assertEqualsIgnoreWhitespacesAndCase(results[i], result);
// Remove the output file : we don't want to leave too much clutter around
//
FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
file.delete();
}
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
} finally {
try {
clusterGenerator.stopSlaveServers();
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
}
}
}
use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class PartitioningIT method testPartitioningRepartitioningOnCluster3.
/**
* Same as testPartitioningRepartitioningOnCluster() but passing the data to a non-partitioned step on the master.
*
* File: "partitioning-repartitioning-on-cluster3.ktr"<br>
*/
public void testPartitioningRepartitioningOnCluster3() throws Exception {
init();
ClusterGenerator clusterGenerator = new ClusterGenerator();
try {
clusterGenerator.launchSlaveServers();
TransMeta transMeta = loadAndModifyTestTransformation(clusterGenerator, "src/it/resources/org/pentaho/di/cluster/partitioning-repartitioning-on-cluster3.ktr");
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
long nrErrors = Trans.monitorClusteredTransformation(new LogChannel("cluster unit test <testParallelFileReadOnMaster>"), transSplitter, null, 1);
assertEquals(0L, nrErrors);
String goldenData = "0;16\n1;17\n2;17\n3;17\n4;17\n5;16";
String filename = "${java.io.tmpdir}/partitioning-repartitioning-on-cluster3.txt";
String result = loadFileContent(transMeta, filename);
assertEqualsIgnoreWhitespacesAndCase(goldenData, result);
// Remove the output file : we don't want to leave too much clutter around
//
// FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
// file.delete();
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
} finally {
try {
clusterGenerator.stopSlaveServers();
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
}
}
}
use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class PartitioningIT method testPartitioningSwimmingLanesOnCluster.
/**
* This test reads a CSV file in parallel on the cluster, one copy per slave.<br>
* It then partitions the data on id in 12 partitions (4 per slave) and keeps the data partitioned until written to
* file.<br>
* As such we expect 12 files on disk.<br>
* File: "partitioning-swimming-lanes-on-cluster.ktr"<br>
*/
public void testPartitioningSwimmingLanesOnCluster() throws Exception {
init();
ClusterGenerator clusterGenerator = new ClusterGenerator();
try {
clusterGenerator.launchSlaveServers();
TransMeta transMeta = loadAndModifyTestTransformation(clusterGenerator, "src/it/resources/org/pentaho/di/cluster/partitioning-swimming-lanes-on-cluster.ktr");
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
long nrErrors = Trans.monitorClusteredTransformation(new LogChannel("cluster unit test <testParallelFileReadOnMaster>"), transSplitter, null, 1);
assertEquals(0L, nrErrors);
String[] results = new String[] { "8", "9", "9", "9", "9", "8", "8", "8", "8", "8", "8", "8" };
String[] files = new String[] { "000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010", "011" };
for (int i = 0; i < results.length; i++) {
String filename = "${java.io.tmpdir}/partitioning-swimming-lanes-on-cluster-" + files[i] + ".txt";
String result = loadFileContent(transMeta, filename);
assertEqualsIgnoreWhitespacesAndCase(results[i], result);
// Remove the output file : we don't want to leave too much clutter around
//
FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
file.delete();
}
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
} finally {
try {
clusterGenerator.stopSlaveServers();
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
}
}
}
use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class Trans method executeClustered.
/**
* Execute the transformation in a clustered fashion. The transformation steps are split and collected in a
* TransSplitter object
*
* @param transMeta
* the transformation's meta-data
* @param executionConfiguration
* the execution configuration
* @return the transformation splitter object
* @throws KettleException
* the kettle exception
*/
public static TransSplitter executeClustered(final TransMeta transMeta, final TransExecutionConfiguration executionConfiguration) throws KettleException {
if (Utils.isEmpty(transMeta.getName())) {
throw new KettleException("The transformation needs a name to uniquely identify it by on the remote server.");
}
TransSplitter transSplitter = new TransSplitter(transMeta);
transSplitter.splitOriginalTransformation();
// Pass the clustered run ID to allow for parallel execution of clustered transformations
//
executionConfiguration.getVariables().put(Const.INTERNAL_VARIABLE_CLUSTER_RUN_ID, transSplitter.getClusteredRunId());
executeClustered(transSplitter, executionConfiguration);
return transSplitter;
}
Aggregations