use of org.apache.commons.lang.mutable.MutableBoolean in project incubator-systemml by apache.
the class CacheableData method exportData.
/**
* Synchronized because there might be parallel threads (parfor local) that
* access the same object (in case it was created before the loop).
* If all threads export the same data object concurrently it results in errors
* because they all write to the same file. Efficiency for loops and parallel threads
* is achieved by checking if the in-memory block is dirty.
*
* NOTE: MB: we do not use dfs copy from local (evicted) to HDFS because this would ignore
* the output format and most importantly would bypass reblocking during write (which effects the
* potential degree of parallelism). However, we copy files on HDFS if certain criteria are given.
*
* @param fName file name
* @param outputFormat format
* @param replication ?
* @param formatProperties file format properties
* @throws CacheException if CacheException occurs
*/
public synchronized void exportData(String fName, String outputFormat, int replication, FileFormatProperties formatProperties) throws CacheException {
if (LOG.isTraceEnabled())
LOG.trace("Export data " + getVarName() + " " + fName);
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
//prevent concurrent modifications
if (!isAvailableToRead())
throw new CacheException("MatrixObject not available to read.");
LOG.trace("Exporting " + this.getDebugName() + " to " + fName + " in format " + outputFormat);
//TODO remove
boolean copiedFromGPU = false;
for (Map.Entry<GPUContext, GPUObject> kv : _gpuObjects.entrySet()) {
GPUObject gObj = kv.getValue();
if (gObj != null && copiedFromGPU && gObj.isDirty()) {
LOG.error("Inconsistent internal state - A copy of this CacheableData was dirty on more than 1 GPU");
throw new CacheException("Internal Error : Inconsistent internal state, A copy of this CacheableData was dirty on more than 1 GPU");
} else if (gObj != null) {
copiedFromGPU = gObj.acquireHostRead();
if (_data == null)
getCache();
}
}
// !fName.equals(_hdfsFileName); //persistent write flag
boolean pWrite = false;
if (fName.equals(_hdfsFileName)) {
setHDFSFileExists(true);
pWrite = false;
} else {
// i.e., export is called from "write" instruction
pWrite = true;
}
//actual export (note: no direct transfer of local copy in order to ensure blocking (and hence, parallelism))
if (//use dirty for skipping parallel exports
isDirty() || (pWrite && !isEqualOutputFormat(outputFormat))) {
// a) get the matrix
if (isEmpty(true)) {
//note: for large rdd outputs, we compile dedicated writespinstructions (no need to handle this here)
try {
if (getRDDHandle() == null || getRDDHandle().allowsShortCircuitRead())
_data = readBlobFromHDFS(_hdfsFileName);
else
_data = readBlobFromRDD(getRDDHandle(), new MutableBoolean());
setDirty(false);
} catch (IOException e) {
throw new CacheException("Reading of " + _hdfsFileName + " (" + getVarName() + ") failed.", e);
}
}
//get object from cache
if (_data == null)
getCache();
//incl. read matrix if evicted
acquire(false, _data == null);
// b) write the matrix
try {
writeMetaData(fName, outputFormat, formatProperties);
writeBlobToHDFS(fName, outputFormat, replication, formatProperties);
if (!pWrite)
setDirty(false);
} catch (Exception e) {
throw new CacheException("Export to " + fName + " failed.", e);
} finally {
release();
}
} else if (// pwrite with same output format
pWrite) {
//CASE 2: matrix already in same format but different file on hdfs (copy matrix to fname)
try {
MapReduceTool.deleteFileIfExistOnHDFS(fName);
MapReduceTool.deleteFileIfExistOnHDFS(fName + ".mtd");
if (getRDDHandle() == null || getRDDHandle().allowsShortCircuitRead())
MapReduceTool.copyFileOnHDFS(_hdfsFileName, fName);
else
//write might trigger rdd operations and nnz maintenance
writeBlobFromRDDtoHDFS(getRDDHandle(), fName, outputFormat);
writeMetaData(fName, outputFormat, formatProperties);
} catch (Exception e) {
throw new CacheException("Export to " + fName + " failed.", e);
}
} else if (getRDDHandle() != null && getRDDHandle().isPending() && !getRDDHandle().isHDFSFile() && !getRDDHandle().allowsShortCircuitRead()) {
//CASE 3: pending rdd operation (other than checkpoints)
try {
//write matrix or frame
writeBlobFromRDDtoHDFS(getRDDHandle(), fName, outputFormat);
writeMetaData(fName, outputFormat, formatProperties);
//update rdd status
getRDDHandle().setPending(false);
} catch (Exception e) {
throw new CacheException("Export to " + fName + " failed.", e);
}
} else {
//CASE 4: data already in hdfs (do nothing, no need for export)
LOG.trace(this.getDebugName() + ": Skip export to hdfs since data already exists.");
}
if (DMLScript.STATISTICS) {
long t1 = System.nanoTime();
CacheStatistics.incrementExportTime(t1 - t0);
}
}
use of org.apache.commons.lang.mutable.MutableBoolean in project incubator-systemml by apache.
the class CacheableData method acquireRead.
// *********************************************
// *** ***
// *** HIGH-LEVEL METHODS THAT SPECIFY ***
// *** THE LOCKING AND CACHING INTERFACE ***
// *** ***
// *********************************************
/**
* Acquires a shared "read-only" lock, produces the reference to the cache block,
* restores the cache block to main memory, reads from HDFS if needed.
*
* Synchronized because there might be parallel threads (parfor local) that
* access the same object (in case it was created before the loop).
*
* In-Status: EMPTY, EVICTABLE, EVICTED, READ;
* Out-Status: READ(+1).
*
* @return cacheable data
* @throws CacheException if CacheException occurs
*/
public synchronized T acquireRead() throws CacheException {
if (LOG.isTraceEnabled())
LOG.trace("Acquire read " + getVarName());
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
if (!isAvailableToRead())
throw new CacheException("MatrixObject not available to read.");
//get object from cache
if (_data == null)
getCache();
//call acquireHostRead if gpuHandle is set as well as is allocated
boolean copiedFromGPU = false;
for (Map.Entry<GPUContext, GPUObject> kv : _gpuObjects.entrySet()) {
GPUObject gObj = kv.getValue();
if (gObj != null && copiedFromGPU && gObj.isDirty()) {
LOG.error("Inconsistent internal state - A copy of this CacheableData was dirty on more than 1 GPU");
throw new CacheException("Internal Error : Inconsistent internal state, A copy of this CacheableData was dirty on more than 1 GPU");
} else if (gObj != null) {
copiedFromGPU = gObj.acquireHostRead();
if (_data == null)
getCache();
}
}
//(probe data for cache_nowrite / jvm_reuse)
if (isEmpty(true) && _data == null) {
try {
if (DMLScript.STATISTICS)
CacheStatistics.incrementHDFSHits();
if (getRDDHandle() == null || getRDDHandle().allowsShortCircuitRead()) {
//check filename
if (_hdfsFileName == null)
throw new CacheException("Cannot read matrix for empty filename.");
//read cacheable data from hdfs
_data = readBlobFromHDFS(_hdfsFileName);
//mark for initial local write despite read operation
_requiresLocalWrite = CACHING_WRITE_CACHE_ON_READ;
} else {
//read matrix from rdd (incl execute pending rdd operations)
MutableBoolean writeStatus = new MutableBoolean();
_data = readBlobFromRDD(getRDDHandle(), writeStatus);
//mark for initial local write (prevent repeated execution of rdd operations)
_requiresLocalWrite = writeStatus.booleanValue() ? CACHING_WRITE_CACHE_ON_READ : true;
}
setDirty(false);
} catch (IOException e) {
throw new CacheException("Reading of " + _hdfsFileName + " (" + getVarName() + ") failed.", e);
}
_isAcquireFromEmpty = true;
} else if (DMLScript.STATISTICS) {
if (_data != null)
CacheStatistics.incrementMemHits();
}
//cache status maintenance
acquire(false, _data == null);
updateStatusPinned(true);
if (DMLScript.STATISTICS) {
long t1 = System.nanoTime();
CacheStatistics.incrementAcquireRTime(t1 - t0);
}
return _data;
}
use of org.apache.commons.lang.mutable.MutableBoolean in project voldemort by voldemort.
the class VersionedPutPruneJob method operate.
@Override
public void operate() throws Exception {
StoreDefinition storeDef = StoreDefinitionUtils.getStoreDefinitionWithName(metadataStore.getStoreDefList(), storeName);
if (storeDef == null) {
throw new VoldemortException("Unknown store " + storeName);
}
if (isWritableStore(storeDef)) {
// Lets generate routing strategy for this storage engine
StoreRoutingPlan routingPlan = new StoreRoutingPlan(metadataStore.getCluster(), storeDef);
logger.info("Pruning store " + storeDef.getName());
StorageEngine<ByteArray, byte[], byte[]> engine = storeRepo.getStorageEngine(storeDef.getName());
iterator = engine.keys();
long itemsScanned = 0;
long numPrunedKeys = 0;
while (iterator.hasNext()) {
ByteArray key = iterator.next();
KeyLockHandle<byte[]> lockHandle = null;
try {
lockHandle = engine.getAndLock(key);
List<Versioned<byte[]>> vals = lockHandle.getValues();
List<Integer> keyReplicas = routingPlan.getReplicationNodeList(routingPlan.getMasterPartitionId(key.get()));
MutableBoolean didPrune = new MutableBoolean(false);
List<Versioned<byte[]>> prunedVals = pruneNonReplicaEntries(vals, keyReplicas, didPrune);
// happened. Optimization to reduce load on storage
if (didPrune.booleanValue()) {
List<Versioned<byte[]>> resolvedVals = VectorClockUtils.resolveVersions(prunedVals);
// TODO this is only implemented for BDB for now
lockHandle.setValues(resolvedVals);
engine.putAndUnlock(key, lockHandle);
numPrunedKeys = this.numKeysUpdatedThisRun.incrementAndGet();
} else {
// if we did not prune, still need to let go of the lock
engine.releaseLock(lockHandle);
}
itemsScanned = this.numKeysScannedThisRun.incrementAndGet();
throttler.maybeThrottle(1);
if (itemsScanned % STAT_RECORDS_INTERVAL == 0) {
logger.info("#Scanned:" + itemsScanned + " #Pruned:" + numPrunedKeys);
}
} catch (Exception e) {
throw e;
} finally {
if (lockHandle != null && !lockHandle.isClosed()) {
engine.releaseLock(lockHandle);
}
}
}
logger.info("Completed store " + storeDef.getName() + " #Scanned:" + itemsScanned + " #Pruned:" + numPrunedKeys);
}
}
use of org.apache.commons.lang.mutable.MutableBoolean in project voldemort by voldemort.
the class VersionedPutPruningTest method testPruningLogic.
@Test
public void testPruningLogic() {
List<Versioned<byte[]>> vals = new ArrayList<Versioned<byte[]>>();
VectorClock clock1 = TestUtils.getClock(0, 2, 1, 3);
VectorClock clock2 = TestUtils.getClock(9, 4);
VectorClock clock3 = TestUtils.getClock(0, 1);
VectorClock clock4 = TestUtils.getClock(8, 0);
vals.add(new Versioned<byte[]>(key, clock1));
vals.add(new Versioned<byte[]>(key, clock2));
vals.add(new Versioned<byte[]>(key, clock3));
vals.add(new Versioned<byte[]>(key, clock4));
MutableBoolean didPrune = new MutableBoolean(false);
List<Versioned<byte[]>> prunedVals = VersionedPutPruneJob.pruneNonReplicaEntries(vals, keyReplicas, didPrune);
assertEquals("Must have pruned some versions", true, didPrune.booleanValue());
assertEquals("Not pruned properly", TestUtils.getClock(0, 1, 2), prunedVals.get(0).getVersion());
assertEquals("Not pruned properly", TestUtils.getClock(), prunedVals.get(1).getVersion());
assertEquals("Not pruned properly", TestUtils.getClock(0, 1), prunedVals.get(2).getVersion());
assertEquals("Not pruned properly", TestUtils.getClock(0), prunedVals.get(3).getVersion());
List<Versioned<byte[]>> resolvedVals = VectorClockUtils.resolveVersions(prunedVals);
assertEquals("Must be exactly one winning version", 1, resolvedVals.size());
assertEquals("Incorrect winning version", TestUtils.getClock(0, 1, 2), resolvedVals.get(0).getVersion());
assertEquals("Incorrect winning version", clock1.getTimestamp(), ((VectorClock) resolvedVals.get(0).getVersion()).getTimestamp());
}
use of org.apache.commons.lang.mutable.MutableBoolean in project voldemort by voldemort.
the class VersionedPutPruningTest method testOnlineBehavior.
@Test
public void testOnlineBehavior() {
long now = System.currentTimeMillis();
// let's assume previous replicas are [4, 5, 0]
VectorClock fetchedClock = TestUtils.getVersionedPutClock(now, 4, 4, 5, 0);
VectorClock onlineClock = TestUtils.getVersionedPutClock(now, 0, 0, 2, 1);
assertEquals("fetched and online versions should conflict", Occurred.CONCURRENTLY, VectorClockUtils.compare(fetchedClock, onlineClock));
// case where key has received writes before the prune job
List<Versioned<byte[]>> vals = new ArrayList<Versioned<byte[]>>();
vals.add(new Versioned<byte[]>(key, fetchedClock));
vals.add(new Versioned<byte[]>(key, onlineClock));
MutableBoolean didPrune = new MutableBoolean();
vals = pruneAndResolve(vals, didPrune);
assertEquals("Must have pruned something", true, didPrune.booleanValue());
assertEquals("Must have one winning version", 1, vals.size());
assertEquals("Must resolve to onlineClock", onlineClock, vals.get(0).getVersion());
// case where key has not received any writes before the prune job
vals = new ArrayList<Versioned<byte[]>>();
vals.add(new Versioned<byte[]>(key, fetchedClock));
didPrune = new MutableBoolean();
vals = pruneAndResolve(vals, didPrune);
assertEquals("Must have pruned something", true, didPrune.booleanValue());
assertEquals("Must have one winning version", 1, vals.size());
assertEquals("Must resolve to [0:ts] clock", TestUtils.getVersionedPutClock(now, -1, 0), vals.get(0).getVersion());
VectorClock nextOnlineClock = TestUtils.getVersionedPutClock(now + Time.MS_PER_SECOND, 0, 0, 2, 1);
assertFalse("Next online write would not result in conflict", Occurred.CONCURRENTLY == VectorClockUtils.compare((VectorClock) vals.get(0).getVersion(), nextOnlineClock));
}
Aggregations