Search in sources :

Example 11 with PartitionSchema

use of org.pentaho.di.partition.PartitionSchema in project pentaho-kettle by pentaho.

the class TransSplitter method generateSlavePartitionSchemas.

/**
 * We want to divide the available partitions over the slaves. Let's create a hashtable that contains the partition
 * schema's Since we can only use a single cluster, we can divide them all over a single set of slave servers.
 *
 * @throws KettleException
 */
private void generateSlavePartitionSchemas() throws KettleException {
    slaveServerPartitionsMap = new Hashtable<SlaveServer, Map<PartitionSchema, List<String>>>();
    for (int i = 0; i < referenceSteps.length; i++) {
        StepMeta stepMeta = referenceSteps[i];
        StepPartitioningMeta stepPartitioningMeta = stepMeta.getStepPartitioningMeta();
        if (stepPartitioningMeta == null) {
            continue;
        }
        if (stepPartitioningMeta.getMethodType() == StepPartitioningMeta.PARTITIONING_METHOD_NONE) {
            continue;
        }
        ClusterSchema clusterSchema = stepMeta.getClusterSchema();
        if (clusterSchema == null) {
            continue;
        }
        // Make a copy of the partition schema because we might change the object.
        // Let's not alter the original transformation.
        // The match is done on name, and the name is preserved in this case, so it should be safe to do so.
        // Also, all cloned steps re-match with the cloned schema name afterwards...
        // 
        PartitionSchema partitionSchema = (PartitionSchema) stepPartitioningMeta.getPartitionSchema().clone();
        int nrSlaves = clusterSchema.findNrSlaves();
        if (nrSlaves == 0) {
            // no slaves: ignore this situation too
            continue;
        }
        // 
        if (partitionSchema.isDynamicallyDefined()) {
            partitionSchema.expandPartitionsDynamically(nrSlaves, originalTransformation);
        }
        int nrPartitions = partitionSchema.getPartitionIDs().size();
        if (nrPartitions < nrSlaves) {
            throw new KettleException("It doesn't make sense to have a partitioned, clustered step with less partitions (" + nrPartitions + ") than that there are slave servers (" + nrSlaves + ")");
        }
        int slaveServerNr = 0;
        List<SlaveServer> slaveServers = clusterSchema.getSlaveServers();
        for (int p = 0; p < nrPartitions; p++) {
            String partitionId = partitionSchema.getPartitionIDs().get(p);
            SlaveServer slaveServer = slaveServers.get(slaveServerNr);
            // 
            if (slaveServer.isMaster()) {
                slaveServerNr++;
                if (slaveServerNr >= slaveServers.size()) {
                    // re-start
                    slaveServerNr = 0;
                }
                slaveServer = slaveServers.get(slaveServerNr);
            }
            Map<PartitionSchema, List<String>> schemaPartitionsMap = slaveServerPartitionsMap.get(slaveServer);
            if (schemaPartitionsMap == null) {
                // Add the schema-partitions map to the the slave server
                // 
                schemaPartitionsMap = new HashMap<PartitionSchema, List<String>>();
                slaveServerPartitionsMap.put(slaveServer, schemaPartitionsMap);
            }
            // See if we find a list of partitions
            // 
            List<String> partitions = schemaPartitionsMap.get(partitionSchema);
            if (partitions == null) {
                partitions = new ArrayList<String>();
                schemaPartitionsMap.put(partitionSchema, partitions);
            }
            // 
            if (partitions.indexOf(partitionId) < 0) {
                partitions.add(partitionId);
            }
            // Switch to next slave.
            slaveServerNr++;
            if (slaveServerNr >= clusterSchema.getSlaveServers().size()) {
                // re-start
                slaveServerNr = 0;
            }
        }
    }
// System.out.println("We have "+(slaveServerPartitionsMap.size())+" entries in the slave server partitions map");
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) PartitionSchema(org.pentaho.di.partition.PartitionSchema) SlaveServer(org.pentaho.di.cluster.SlaveServer) StepPartitioningMeta(org.pentaho.di.trans.step.StepPartitioningMeta) StepMeta(org.pentaho.di.trans.step.StepMeta) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) ClusterSchema(org.pentaho.di.cluster.ClusterSchema)

Example 12 with PartitionSchema

use of org.pentaho.di.partition.PartitionSchema in project pentaho-kettle by pentaho.

the class TransSplitter method addSlaveCopy.

/**
 * Create a copy of a step from the original transformation for use in the a slave transformation. If the step is
 * partitioned, the partitioning will be changed to "schemaName (slave)"
 *
 * @param stepMeta
 *          The step to copy / clone.
 * @return a copy of the specified step for use in a slave transformation.
 */
private StepMeta addSlaveCopy(TransMeta transMeta, StepMeta stepMeta, SlaveServer slaveServer) {
    StepMeta copy = (StepMeta) stepMeta.clone();
    if (copy.isPartitioned()) {
        StepPartitioningMeta stepPartitioningMeta = copy.getStepPartitioningMeta();
        PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
        String slavePartitionSchemaName = createSlavePartitionSchemaName(partitionSchema.getName());
        PartitionSchema slaveSchema = transMeta.findPartitionSchema(slavePartitionSchemaName);
        if (slaveSchema != null) {
            stepPartitioningMeta.setPartitionSchema(slaveSchema);
        }
        // Always just start a single copy on the slave server...
        // Otherwise the confusion w.r.t. to partitioning & re-partitioning would be complete.
        // 
        copy.setCopies(1);
    }
    transMeta.addStep(copy);
    return copy;
}
Also used : PartitionSchema(org.pentaho.di.partition.PartitionSchema) StepPartitioningMeta(org.pentaho.di.trans.step.StepPartitioningMeta) StepMeta(org.pentaho.di.trans.step.StepMeta)

Example 13 with PartitionSchema

use of org.pentaho.di.partition.PartitionSchema in project pentaho-kettle by pentaho.

the class KettleDatabaseRepositoryTransDelegate method saveTransformation.

/**
 * Saves the transformation to a repository.
 *
 * @param transMeta
 *          the transformation metadata to store
 * @param monitor
 *          the way we report progress to the user, can be null if no UI is present
 * @param overwriteAssociated
 *          Overwrite existing object(s)?
 * @throws KettleException
 *           if an error occurs.
 */
public void saveTransformation(TransMeta transMeta, String versionComment, ProgressMonitorListener monitor, boolean overwriteAssociated) throws KettleException {
    try {
        if (monitor != null) {
            monitor.subTask(BaseMessages.getString(PKG, "TransMeta.Monitor.LockingRepository"));
        }
        repository.insertLogEntry("save transformation '" + transMeta.getName() + "'");
        // Clear attribute id cache
        // force repository lookup.
        repository.connectionDelegate.clearNextIDCounters();
        // Do we have a valid directory?
        if (transMeta.getRepositoryDirectory().getObjectId() == null) {
            throw new KettleException(BaseMessages.getString(PKG, "TransMeta.Exception.PlsSelectAValidDirectoryBeforeSavingTheTransformation"));
        }
        int nrWorks = 2 + transMeta.nrDatabases() + transMeta.nrNotes() + transMeta.nrSteps() + transMeta.nrTransHops();
        if (monitor != null) {
            monitor.beginTask(BaseMessages.getString(PKG, "TransMeta.Monitor.SavingTransformationTask.Title") + transMeta.getPathAndName(), nrWorks);
        }
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.SavingOfTransformationStarted"));
        }
        if (monitor != null && monitor.isCanceled()) {
            throw new KettleDatabaseException();
        }
        // 
        if (monitor != null) {
            monitor.subTask(BaseMessages.getString(PKG, "TransMeta.Monitor.HandlingOldVersionTransformationTask.Title"));
        // transMeta.setObjectId(getTransformationID(transMeta.getName(),
        // transMeta.getRepositoryDirectory().getObjectId()));
        }
        // If no valid id is available in the database, assign one...
        if (transMeta.getObjectId() == null) {
            transMeta.setObjectId(repository.connectionDelegate.getNextTransformationID());
        } else {
            // of the database for this id_transformation, before we put it back in...
            if (monitor != null) {
                monitor.subTask(BaseMessages.getString(PKG, "TransMeta.Monitor.DeletingOldVersionTransformationTask.Title"));
            }
            if (log.isDebug()) {
                log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.DeletingOldVersionTransformation"));
            }
            repository.deleteTransformation(transMeta.getObjectId());
            if (log.isDebug()) {
                log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.OldVersionOfTransformationRemoved"));
            }
        }
        if (monitor != null) {
            monitor.worked(1);
        }
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.SavingNotes"));
        }
        for (int i = 0; i < transMeta.nrNotes(); i++) {
            if (monitor != null && monitor.isCanceled()) {
                throw new KettleDatabaseException(BaseMessages.getString(PKG, "TransMeta.Log.UserCancelledTransSave"));
            }
            // if (monitor != null) monitor.subTask(BaseMessages.getString(PKG, "TransMeta.Monitor.SavingNoteTask.Title") +
            // (i + 1) + "/" + transMeta.nrNotes());
            NotePadMeta ni = transMeta.getNote(i);
            repository.saveNotePadMeta(ni, transMeta.getObjectId());
            if (ni.getObjectId() != null) {
                repository.insertTransNote(transMeta.getObjectId(), ni.getObjectId());
            }
            if (monitor != null) {
                monitor.worked(1);
            }
        }
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.SavingDatabaseConnections"));
        }
        for (int i = 0; i < transMeta.nrDatabases(); i++) {
            if (monitor != null && monitor.isCanceled()) {
                throw new KettleDatabaseException(BaseMessages.getString(PKG, "TransMeta.Log.UserCancelledTransSave"));
            }
            // if (monitor != null) monitor.subTask(BaseMessages.getString(PKG,
            // "TransMeta.Monitor.SavingDatabaseTask.Title") + (i + 1) + "/" + transMeta.nrDatabases());
            DatabaseMeta databaseMeta = transMeta.getDatabase(i);
            // repository)
            if (overwriteAssociated || databaseMeta.hasChanged() || databaseMeta.getObjectId() == null) {
                repository.save(databaseMeta, versionComment, monitor, overwriteAssociated);
            }
            if (monitor != null) {
                monitor.worked(1);
            }
        }
        // It is possible that we received another step through a plugin.
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.CheckingStepTypes"));
        }
        repository.updateStepTypes();
        repository.updateDatabaseTypes();
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.SavingSteps"));
        }
        for (int i = 0; i < transMeta.nrSteps(); i++) {
            if (monitor != null && monitor.isCanceled()) {
                throw new KettleDatabaseException(BaseMessages.getString(PKG, "TransMeta.Log.UserCancelledTransSave"));
            }
            // if (monitor != null) monitor.subTask(BaseMessages.getString(PKG, "TransMeta.Monitor.SavingStepTask.Title") +
            // (i + 1) + "/" + transMeta.nrSteps());
            StepMeta stepMeta = transMeta.getStep(i);
            repository.stepDelegate.saveStepMeta(stepMeta, transMeta.getObjectId());
            if (monitor != null) {
                monitor.worked(1);
            }
        }
        repository.connectionDelegate.closeStepAttributeInsertPreparedStatement();
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.SavingHops"));
        }
        for (int i = 0; i < transMeta.nrTransHops(); i++) {
            if (monitor != null && monitor.isCanceled()) {
                throw new KettleDatabaseException(BaseMessages.getString(PKG, "TransMeta.Log.UserCancelledTransSave"));
            }
            // if (monitor != null) monitor.subTask(BaseMessages.getString(PKG, "TransMeta.Monitor.SavingHopTask.Title") +
            // (i + 1) + "/" + transMeta.nrTransHops());
            TransHopMeta hi = transMeta.getTransHop(i);
            saveTransHopMeta(hi, transMeta.getObjectId());
            if (monitor != null) {
                monitor.worked(1);
            }
        }
        // if (monitor != null) monitor.subTask(BaseMessages.getString(PKG, "TransMeta.Monitor.FinishingTask.Title"));
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.SavingTransformationInfo"));
        }
        // save the top level information for the transformation
        insertTransformation(transMeta);
        saveTransParameters(transMeta);
        repository.connectionDelegate.closeTransAttributeInsertPreparedStatement();
        // 
        for (int i = 0; i < transMeta.getPartitionSchemas().size(); i++) {
            if (monitor != null && monitor.isCanceled()) {
                throw new KettleDatabaseException(BaseMessages.getString(PKG, "TransMeta.Log.UserCancelledTransSave"));
            }
            PartitionSchema partitionSchema = transMeta.getPartitionSchemas().get(i);
            // See if this transformation really is a consumer of this object
            // It might be simply loaded as a shared object from the repository
            // 
            boolean isUsedByTransformation = transMeta.isUsingPartitionSchema(partitionSchema);
            repository.save(partitionSchema, versionComment, null, transMeta.getObjectId(), isUsedByTransformation, overwriteAssociated);
        }
        // 
        for (int i = 0; i < transMeta.getSlaveServers().size(); i++) {
            if (monitor != null && monitor.isCanceled()) {
                throw new KettleDatabaseException(BaseMessages.getString(PKG, "TransMeta.Log.UserCancelledTransSave"));
            }
            SlaveServer slaveServer = transMeta.getSlaveServers().get(i);
            boolean isUsedByTransformation = transMeta.isUsingSlaveServer(slaveServer);
            repository.save(slaveServer, versionComment, null, transMeta.getObjectId(), isUsedByTransformation, overwriteAssociated);
        }
        // Save the clustering schemas
        for (int i = 0; i < transMeta.getClusterSchemas().size(); i++) {
            if (monitor != null && monitor.isCanceled()) {
                throw new KettleDatabaseException(BaseMessages.getString(PKG, "TransMeta.Log.UserCancelledTransSave"));
            }
            ClusterSchema clusterSchema = transMeta.getClusterSchemas().get(i);
            boolean isUsedByTransformation = transMeta.isUsingClusterSchema(clusterSchema);
            repository.save(clusterSchema, versionComment, null, transMeta.getObjectId(), isUsedByTransformation, overwriteAssociated);
        }
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.SavingDependencies"));
        }
        for (int i = 0; i < transMeta.nrDependencies(); i++) {
            if (monitor != null && monitor.isCanceled()) {
                throw new KettleDatabaseException(BaseMessages.getString(PKG, "TransMeta.Log.UserCancelledTransSave"));
            }
            TransDependency td = transMeta.getDependency(i);
            saveTransDependency(td, transMeta.getObjectId());
        }
        saveTransAttributesMap(transMeta.getObjectId(), transMeta.getAttributesMap());
        // Save the step error handling information as well!
        for (int i = 0; i < transMeta.nrSteps(); i++) {
            StepMeta stepMeta = transMeta.getStep(i);
            StepErrorMeta stepErrorMeta = stepMeta.getStepErrorMeta();
            if (stepErrorMeta != null) {
                repository.stepDelegate.saveStepErrorMeta(stepErrorMeta, transMeta.getObjectId(), stepMeta.getObjectId());
            }
        }
        repository.connectionDelegate.closeStepAttributeInsertPreparedStatement();
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.SavingFinished"));
        }
        if (monitor != null) {
            monitor.subTask(BaseMessages.getString(PKG, "TransMeta.Monitor.UnlockingRepository"));
        }
        repository.unlockRepository();
        // Perform a commit!
        repository.commit();
        transMeta.clearChanged();
        if (monitor != null) {
            monitor.worked(1);
        }
        if (monitor != null) {
            monitor.done();
        }
    } catch (KettleDatabaseException dbe) {
        // Oops, roll back!
        repository.rollback();
        log.logError(BaseMessages.getString(PKG, "TransMeta.Log.ErrorSavingTransformationToRepository") + Const.CR + dbe.getMessage());
        throw new KettleException(BaseMessages.getString(PKG, "TransMeta.Log.ErrorSavingTransformationToRepository"), dbe);
    }
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) PartitionSchema(org.pentaho.di.partition.PartitionSchema) KettleDatabaseException(org.pentaho.di.core.exception.KettleDatabaseException) StepErrorMeta(org.pentaho.di.trans.step.StepErrorMeta) TransDependency(org.pentaho.di.trans.TransDependency) SlaveServer(org.pentaho.di.cluster.SlaveServer) DatabaseMeta(org.pentaho.di.core.database.DatabaseMeta) StepMeta(org.pentaho.di.trans.step.StepMeta) NotePadMeta(org.pentaho.di.core.NotePadMeta) TransHopMeta(org.pentaho.di.trans.TransHopMeta) ClusterSchema(org.pentaho.di.cluster.ClusterSchema)

Example 14 with PartitionSchema

use of org.pentaho.di.partition.PartitionSchema in project pentaho-kettle by pentaho.

the class KettleDatabaseRepositoryPartitionSchemaDelegate method loadPartitionSchema.

public PartitionSchema loadPartitionSchema(ObjectId id_partition_schema) throws KettleException {
    PartitionSchema partitionSchema = new PartitionSchema();
    partitionSchema.setObjectId(id_partition_schema);
    RowMetaAndData row = getPartitionSchema(id_partition_schema);
    partitionSchema.setName(row.getString("NAME", null));
    ObjectId[] pids = repository.getPartitionIDs(id_partition_schema);
    for (int i = 0; i < pids.length; i++) {
        partitionSchema.getPartitionIDs().add(getPartition(pids[i]).getString("PARTITION_ID", null));
    }
    partitionSchema.setDynamicallyDefined(row.getBoolean("DYNAMIC_DEFINITION", false));
    partitionSchema.setNumberOfPartitionsPerSlave(row.getString("PARTITIONS_PER_SLAVE", null));
    return partitionSchema;
}
Also used : RowMetaAndData(org.pentaho.di.core.RowMetaAndData) PartitionSchema(org.pentaho.di.partition.PartitionSchema) ObjectId(org.pentaho.di.repository.ObjectId)

Example 15 with PartitionSchema

use of org.pentaho.di.partition.PartitionSchema in project pentaho-kettle by pentaho.

the class KettleDatabaseRepositoryTransDelegate method loadTransHopMeta.

public TransHopMeta loadTransHopMeta(ObjectId id_trans_hop, List<StepMeta> steps) throws KettleException {
    TransHopMeta hopTransMeta = new TransHopMeta();
    try {
        hopTransMeta.setObjectId(id_trans_hop);
        RowMetaAndData r = getTransHop(id_trans_hop);
        hopTransMeta.setEnabled(r.getBoolean("ENABLED", false));
        long id_step_from = r.getInteger("ID_STEP_FROM", 0);
        long id_step_to = r.getInteger("ID_STEP_TO", 0);
        StepMeta fromStep = StepMeta.findStep(steps, new LongObjectId(id_step_from));
        // 
        if (fromStep == null && id_step_from > 0) {
            // Simply load this, we only want the name, we don't care about the
            // rest...
            // 
            StepMeta stepMeta = repository.stepDelegate.loadStepMeta(new LongObjectId(id_step_from), new ArrayList<DatabaseMeta>(), new ArrayList<PartitionSchema>());
            fromStep = StepMeta.findStep(steps, stepMeta.getName());
        }
        if (fromStep == null) {
            log.logError("Unable to determine source step of transformation hop with ID: " + id_trans_hop);
            // Invalid hop, simply ignore. See: PDI-2446
            return null;
        }
        hopTransMeta.setFromStep(fromStep);
        hopTransMeta.getFromStep().setDraw(true);
        hopTransMeta.setToStep(StepMeta.findStep(steps, new LongObjectId(id_step_to)));
        // 
        if (hopTransMeta.getToStep() == null && id_step_to > 0) {
            // Simply load this, we only want the name, we don't care about
            // the rest...
            StepMeta stepMeta = repository.stepDelegate.loadStepMeta(new LongObjectId(id_step_to), new ArrayList<DatabaseMeta>(), new ArrayList<PartitionSchema>());
            hopTransMeta.setToStep(StepMeta.findStep(steps, stepMeta.getName()));
        }
        if (hopTransMeta.getFromStep() == null) {
            // 
            return null;
        }
        hopTransMeta.getToStep().setDraw(true);
        return hopTransMeta;
    } catch (KettleDatabaseException dbe) {
        throw new KettleException(BaseMessages.getString(PKG, "TransHopMeta.Exception.LoadTransformationHopInfo") + id_trans_hop, dbe);
    }
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) RowMetaAndData(org.pentaho.di.core.RowMetaAndData) PartitionSchema(org.pentaho.di.partition.PartitionSchema) KettleDatabaseException(org.pentaho.di.core.exception.KettleDatabaseException) TransHopMeta(org.pentaho.di.trans.TransHopMeta) LongObjectId(org.pentaho.di.repository.LongObjectId) StepMeta(org.pentaho.di.trans.step.StepMeta) DatabaseMeta(org.pentaho.di.core.database.DatabaseMeta)

Aggregations

PartitionSchema (org.pentaho.di.partition.PartitionSchema)74 KettleException (org.pentaho.di.core.exception.KettleException)26 TransMeta (org.pentaho.di.trans.TransMeta)19 StepMeta (org.pentaho.di.trans.step.StepMeta)19 ClusterSchema (org.pentaho.di.cluster.ClusterSchema)18 SlaveServer (org.pentaho.di.cluster.SlaveServer)18 DatabaseMeta (org.pentaho.di.core.database.DatabaseMeta)17 StepPartitioningMeta (org.pentaho.di.trans.step.StepPartitioningMeta)17 Test (org.junit.Test)16 ArrayList (java.util.ArrayList)15 ObjectId (org.pentaho.di.repository.ObjectId)14 ErrorDialog (org.pentaho.di.ui.core.dialog.ErrorDialog)10 List (java.util.List)8 KettleExtensionPoint (org.pentaho.di.core.extension.KettleExtensionPoint)8 KettleDatabaseException (org.pentaho.di.core.exception.KettleDatabaseException)6 StringObjectId (org.pentaho.di.repository.StringObjectId)6 PartitionSchemaDialog (org.pentaho.di.ui.partition.dialog.PartitionSchemaDialog)6 MessageBox (org.eclipse.swt.widgets.MessageBox)5 Point (org.pentaho.di.core.gui.Point)5 IOException (java.io.IOException)4