Search in sources :

Example 6 with TransSplitter

use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.

the class MasterSlaveIT method runMultipleCopiesOnMultipleSlaves.

/**
 * This test reads a CSV file and sends the data to 3 copies on 3 slave servers.<br>
 */
public void runMultipleCopiesOnMultipleSlaves() throws Exception {
    TransMeta transMeta = loadTransMetaReplaceSlavesInCluster(clusterGenerator, "test/org/pentaho/di/cluster/test-multiple-copies-on-multiple-slaves.ktr");
    TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
    TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
    LogChannel logChannel = createLogChannel("cluster unit test <testMultipleCopiesOnMultipleSlaves>");
    long nrErrors = Trans.monitorClusteredTransformation(logChannel, transSplitter, null, 1);
    assertEquals(0L, nrErrors);
    String result = loadFileContent(transMeta, "${java.io.tmpdir}/test-multiple-copies-on-multiple-slaves.txt");
    assertEqualsIgnoreWhitespacesAndCase("100", result);
}
Also used : TransExecutionConfiguration(org.pentaho.di.trans.TransExecutionConfiguration) TransMeta(org.pentaho.di.trans.TransMeta) LogChannel(org.pentaho.di.core.logging.LogChannel) TransSplitter(org.pentaho.di.trans.cluster.TransSplitter)

Example 7 with TransSplitter

use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.

the class PartitioningIT method testPartitioningRepartitioningOnCluster.

/**
 * This test reads a CSV file in parallel on the cluster, one copy per slave.<br>
 * It then partitions the data on id in 12 partitions (4 per slave).<br>
 * After that it re-partitions the data in 9 partitions (3 per slave).<br>
 * As such we expect 9 result files on disk.<br>
 * File: "partitioning-repartitioning-on-cluster.ktr"<br>
 */
public void testPartitioningRepartitioningOnCluster() throws Exception {
    init();
    ClusterGenerator clusterGenerator = new ClusterGenerator();
    try {
        clusterGenerator.launchSlaveServers();
        TransMeta transMeta = loadAndModifyTestTransformation(clusterGenerator, "src/it/resources/org/pentaho/di/cluster/partitioning-repartitioning-on-cluster.ktr");
        TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
        TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
        long nrErrors = Trans.monitorClusteredTransformation(new LogChannel("cluster unit test <testParallelFileReadOnMaster>"), transSplitter, null, 1);
        assertEquals(0L, nrErrors);
        String[] results = new String[] { "8", "9", "9", "9", "9", "8", "8", "8", "8", "8", "8", "8" };
        String[] files = new String[] { "000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010", "011" };
        for (int i = 0; i < results.length; i++) {
            String filename = "${java.io.tmpdir}/partitioning-repartitioning-on-cluster-" + files[i] + ".txt";
            String result = loadFileContent(transMeta, filename);
            assertEqualsIgnoreWhitespacesAndCase(results[i], result);
            // Remove the output file : we don't want to leave too much clutter around
            // 
            FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
            file.delete();
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.toString());
    } finally {
        try {
            clusterGenerator.stopSlaveServers();
        } catch (Exception e) {
            e.printStackTrace();
            fail(e.toString());
        }
    }
}
Also used : TransExecutionConfiguration(org.pentaho.di.trans.TransExecutionConfiguration) TransMeta(org.pentaho.di.trans.TransMeta) LogChannel(org.pentaho.di.core.logging.LogChannel) FileObject(org.apache.commons.vfs2.FileObject) TransSplitter(org.pentaho.di.trans.cluster.TransSplitter)

Example 8 with TransSplitter

use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.

the class PartitioningIT method testPartitioningRepartitioningOnCluster3.

/**
 * Same as testPartitioningRepartitioningOnCluster() but passing the data to a non-partitioned step on the master.
 *
 * File: "partitioning-repartitioning-on-cluster3.ktr"<br>
 */
public void testPartitioningRepartitioningOnCluster3() throws Exception {
    init();
    ClusterGenerator clusterGenerator = new ClusterGenerator();
    try {
        clusterGenerator.launchSlaveServers();
        TransMeta transMeta = loadAndModifyTestTransformation(clusterGenerator, "src/it/resources/org/pentaho/di/cluster/partitioning-repartitioning-on-cluster3.ktr");
        TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
        TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
        long nrErrors = Trans.monitorClusteredTransformation(new LogChannel("cluster unit test <testParallelFileReadOnMaster>"), transSplitter, null, 1);
        assertEquals(0L, nrErrors);
        String goldenData = "0;16\n1;17\n2;17\n3;17\n4;17\n5;16";
        String filename = "${java.io.tmpdir}/partitioning-repartitioning-on-cluster3.txt";
        String result = loadFileContent(transMeta, filename);
        assertEqualsIgnoreWhitespacesAndCase(goldenData, result);
    // Remove the output file : we don't want to leave too much clutter around
    // 
    // FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
    // file.delete();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.toString());
    } finally {
        try {
            clusterGenerator.stopSlaveServers();
        } catch (Exception e) {
            e.printStackTrace();
            fail(e.toString());
        }
    }
}
Also used : TransExecutionConfiguration(org.pentaho.di.trans.TransExecutionConfiguration) TransMeta(org.pentaho.di.trans.TransMeta) LogChannel(org.pentaho.di.core.logging.LogChannel) TransSplitter(org.pentaho.di.trans.cluster.TransSplitter)

Example 9 with TransSplitter

use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.

the class PartitioningIT method testPartitioningSwimmingLanesOnCluster.

/**
 * This test reads a CSV file in parallel on the cluster, one copy per slave.<br>
 * It then partitions the data on id in 12 partitions (4 per slave) and keeps the data partitioned until written to
 * file.<br>
 * As such we expect 12 files on disk.<br>
 * File: "partitioning-swimming-lanes-on-cluster.ktr"<br>
 */
public void testPartitioningSwimmingLanesOnCluster() throws Exception {
    init();
    ClusterGenerator clusterGenerator = new ClusterGenerator();
    try {
        clusterGenerator.launchSlaveServers();
        TransMeta transMeta = loadAndModifyTestTransformation(clusterGenerator, "src/it/resources/org/pentaho/di/cluster/partitioning-swimming-lanes-on-cluster.ktr");
        TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
        TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
        long nrErrors = Trans.monitorClusteredTransformation(new LogChannel("cluster unit test <testParallelFileReadOnMaster>"), transSplitter, null, 1);
        assertEquals(0L, nrErrors);
        String[] results = new String[] { "8", "9", "9", "9", "9", "8", "8", "8", "8", "8", "8", "8" };
        String[] files = new String[] { "000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010", "011" };
        for (int i = 0; i < results.length; i++) {
            String filename = "${java.io.tmpdir}/partitioning-swimming-lanes-on-cluster-" + files[i] + ".txt";
            String result = loadFileContent(transMeta, filename);
            assertEqualsIgnoreWhitespacesAndCase(results[i], result);
            // Remove the output file : we don't want to leave too much clutter around
            // 
            FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
            file.delete();
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.toString());
    } finally {
        try {
            clusterGenerator.stopSlaveServers();
        } catch (Exception e) {
            e.printStackTrace();
            fail(e.toString());
        }
    }
}
Also used : TransExecutionConfiguration(org.pentaho.di.trans.TransExecutionConfiguration) TransMeta(org.pentaho.di.trans.TransMeta) LogChannel(org.pentaho.di.core.logging.LogChannel) FileObject(org.apache.commons.vfs2.FileObject) TransSplitter(org.pentaho.di.trans.cluster.TransSplitter)

Example 10 with TransSplitter

use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.

the class Trans method executeClustered.

/**
 * Execute the transformation in a clustered fashion. The transformation steps are split and collected in a
 * TransSplitter object
 *
 * @param transMeta
 *          the transformation's meta-data
 * @param executionConfiguration
 *          the execution configuration
 * @return the transformation splitter object
 * @throws KettleException
 *           the kettle exception
 */
public static TransSplitter executeClustered(final TransMeta transMeta, final TransExecutionConfiguration executionConfiguration) throws KettleException {
    if (Utils.isEmpty(transMeta.getName())) {
        throw new KettleException("The transformation needs a name to uniquely identify it by on the remote server.");
    }
    TransSplitter transSplitter = new TransSplitter(transMeta);
    transSplitter.splitOriginalTransformation();
    // Pass the clustered run ID to allow for parallel execution of clustered transformations
    // 
    executionConfiguration.getVariables().put(Const.INTERNAL_VARIABLE_CLUSTER_RUN_ID, transSplitter.getClusteredRunId());
    executeClustered(transSplitter, executionConfiguration);
    return transSplitter;
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) TransSplitter(org.pentaho.di.trans.cluster.TransSplitter)

Aggregations

TransSplitter (org.pentaho.di.trans.cluster.TransSplitter)16 TransMeta (org.pentaho.di.trans.TransMeta)15 TransExecutionConfiguration (org.pentaho.di.trans.TransExecutionConfiguration)14 LogChannel (org.pentaho.di.core.logging.LogChannel)13 Result (org.pentaho.di.core.Result)3 KettleException (org.pentaho.di.core.exception.KettleException)3 FileObject (org.apache.commons.vfs2.FileObject)2 SlaveServer (org.pentaho.di.cluster.SlaveServer)2 KettleExtensionPoint (org.pentaho.di.core.extension.KettleExtensionPoint)2 StepMeta (org.pentaho.di.trans.step.StepMeta)2 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 ResultFile (org.pentaho.di.core.ResultFile)1 RowMetaAndData (org.pentaho.di.core.RowMetaAndData)1 KettleDatabaseException (org.pentaho.di.core.exception.KettleDatabaseException)1 KettleXMLException (org.pentaho.di.core.exception.KettleXMLException)1 Point (org.pentaho.di.core.gui.Point)1 LogChannelFileWriter (org.pentaho.di.core.logging.LogChannelFileWriter)1 LogLevel (org.pentaho.di.core.logging.LogLevel)1 NamedParams (org.pentaho.di.core.parameters.NamedParams)1