use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.
the class K8sWorkerUtils method initLogger.
/**
* initialize the logger
* entityName can be "jobMaster", "mpiMaster", etc.
*/
public static void initLogger(Config cnfg, String entityName, boolean pvExists) {
// if logging to file is not requested, do nothing
if (!LoggingContext.fileLoggingRequested()) {
return;
}
if (!pvExists && "persistent".equalsIgnoreCase(LoggingContext.loggingStorageType(cnfg))) {
LOG.warning("Persistent logging is requested but no PersistentVolume provided. " + "Not logging to file");
return;
}
if (LoggingContext.redirectSysOutErr()) {
LOG.warning("Redirecting System.out and System.err to the log file. " + "Check the log file for the upcoming log messages. ");
}
String logDirName = LoggingContext.loggingDir(cnfg);
File logDir = new File(logDirName);
// refresh parent directory the cache
logDir.getParentFile().list();
if (!logDir.exists()) {
if (!logDir.mkdirs()) {
throw new Twister2RuntimeException("Failed to create the log directory: " + logDir);
}
}
String logFileName = entityName;
LoggingHelper.setupLogging(cnfg, logDirName, logFileName);
String logFileWithPath = logDirName + "/" + logFileName + ".log.0";
LOG.info("Logging to file initialized: " + logFileWithPath);
}
use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.
the class Twister2Submitter method restartJob.
/**
* Restart a Twister2 job
*/
public static Twister2JobState restartJob(String jobID, Config config) {
// job package filename from failed submission
String prevJobDir = FsContext.uploaderJobDirectory(config) + File.separator + jobID;
String jobPackage = prevJobDir + File.separator + SchedulerContext.jobPackageFileName(config);
Path jobPackageFile = Paths.get(jobPackage);
if (Files.notExists(jobPackageFile)) {
LOG.severe("Job Package File does not exist: " + jobPackage);
return new Twister2JobState(false);
}
// unpack the previous job package to a temp directory
Path tempDirPath;
try {
tempDirPath = Files.createTempDirectory(jobID);
} catch (IOException e) {
throw new Twister2RuntimeException("Can not create temp directory", e);
}
// todo: we can exclude user-job-file from being unpacked
// usually that is the lastest file, so we can be more efficient
TarGzipPacker.unpack(jobPackageFile, tempDirPath);
// load Job object
String unpackedJobDir = tempDirPath + File.separator + Context.JOB_ARCHIVE_DIRECTORY;
String jobFile = unpackedJobDir + File.separator + SchedulerContext.createJobDescriptionFileName(jobID);
JobAPI.Job job = JobUtils.readJobFile(jobFile);
// load previous configurations
Config prevConfig = ConfigLoader.loadConfig(Context.twister2Home(config), unpackedJobDir, Context.clusterType(config));
// delete temp directory
try {
Files.delete(tempDirPath);
LOG.info("Unpacked job directory deleted: " + tempDirPath);
} catch (IOException e) {
LOG.warning("Exception when deleting temp directory: " + tempDirPath);
}
URI packageURI = null;
try {
packageURI = new URI(prevJobDir);
} catch (URISyntaxException e) {
throw new Twister2RuntimeException("Can not ceate URI for directory: " + prevJobDir, e);
}
// add restore parameter
// local packages path
prevConfig = Config.newBuilder().putAll(prevConfig).put(CheckpointingContext.CHECKPOINTING_RESTORE_JOB, true).put(SchedulerContext.TEMPORARY_PACKAGES_PATH, prevJobDir).put(SchedulerContext.USER_JOB_FILE, job.getJobFormat().getJobFile()).put(SchedulerContext.JOB_PACKAGE_URI, packageURI).put(Context.TWISTER2_HOME.getKey(), Context.twister2Home(config)).put(Context.JOB_ID, jobID).put(Context.TWISTER2_CLUSTER_TYPE, Context.clusterType(config)).build();
writeJobIdToFile(jobID);
printJobInfo(job, prevConfig);
// launch the launcher
ResourceAllocator resourceAllocator = new ResourceAllocator(prevConfig, job);
return resourceAllocator.resubmitJob();
}
use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.
the class ZKMasterController method workerFailed.
@Override
public void workerFailed(int workerID) {
JobMasterAPI.WorkerFailed workerFailed = JobMasterAPI.WorkerFailed.newBuilder().setWorkerID(workerID).build();
JobMasterAPI.JobEvent jobEvent = JobMasterAPI.JobEvent.newBuilder().setFailed(workerFailed).build();
try {
ZKEventsManager.publishEvent(client, rootPath, jobID, jobEvent);
} catch (Twister2Exception e) {
throw new Twister2RuntimeException(e);
}
}
use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.
the class ZKMasterController method jmRestarted.
public void jmRestarted() {
// generate en event and inform all other workers
JobMasterAPI.JobMasterRestarted jmRestarted = JobMasterAPI.JobMasterRestarted.newBuilder().setNumberOfWorkers(numberOfWorkers).setJmAddress(jmAddress).build();
JobMasterAPI.JobEvent jobEvent = JobMasterAPI.JobEvent.newBuilder().setJmRestarted(jmRestarted).build();
try {
ZKEventsManager.publishEvent(client, rootPath, jobID, jobEvent);
} catch (Twister2Exception e) {
throw new Twister2RuntimeException(e);
}
}
use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.
the class AbstractPythonProcessor method initialValue.
@Override
protected Jep initialValue() {
try {
// get the jep instance for this thread
Jep jep = JepInstance.getInstance();
// todo temporary workaround for JepArray issue.
// This won't be a significant performance bottleneck though
String lambdaString = Base64.getEncoder().encodeToString(bytes);
jep.set("func_bin", lambdaString);
jep.eval(this.objectId + " = cp.loads(base64.b64decode(func_bin))");
jep.eval("del func_bin");
return jep;
} catch (JepException e) {
throw new Twister2RuntimeException("Error in building lambda function", e);
}
}
Aggregations