Search in sources :

Example 11 with MutableBoolean

use of org.apache.commons.lang.mutable.MutableBoolean in project incubator-systemml by apache.

the class CacheableData method exportData.

/**
	 * Synchronized because there might be parallel threads (parfor local) that
	 * access the same object (in case it was created before the loop).
	 * If all threads export the same data object concurrently it results in errors
	 * because they all write to the same file. Efficiency for loops and parallel threads
	 * is achieved by checking if the in-memory block is dirty.
	 * 
	 * NOTE: MB: we do not use dfs copy from local (evicted) to HDFS because this would ignore
	 * the output format and most importantly would bypass reblocking during write (which effects the
	 * potential degree of parallelism). However, we copy files on HDFS if certain criteria are given.  
	 * 
	 * @param fName file name
	 * @param outputFormat format
	 * @param replication ?
	 * @param formatProperties file format properties
	 * @throws CacheException if CacheException occurs
	 */
public synchronized void exportData(String fName, String outputFormat, int replication, FileFormatProperties formatProperties) throws CacheException {
    if (LOG.isTraceEnabled())
        LOG.trace("Export data " + getVarName() + " " + fName);
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    //prevent concurrent modifications
    if (!isAvailableToRead())
        throw new CacheException("MatrixObject not available to read.");
    LOG.trace("Exporting " + this.getDebugName() + " to " + fName + " in format " + outputFormat);
    //TODO remove
    boolean copiedFromGPU = false;
    for (Map.Entry<GPUContext, GPUObject> kv : _gpuObjects.entrySet()) {
        GPUObject gObj = kv.getValue();
        if (gObj != null && copiedFromGPU && gObj.isDirty()) {
            LOG.error("Inconsistent internal state - A copy of this CacheableData was dirty on more than 1 GPU");
            throw new CacheException("Internal Error : Inconsistent internal state, A copy of this CacheableData was dirty on more than 1 GPU");
        } else if (gObj != null) {
            copiedFromGPU = gObj.acquireHostRead();
            if (_data == null)
                getCache();
        }
    }
    // !fName.equals(_hdfsFileName); //persistent write flag
    boolean pWrite = false;
    if (fName.equals(_hdfsFileName)) {
        setHDFSFileExists(true);
        pWrite = false;
    } else {
        // i.e., export is called from "write" instruction
        pWrite = true;
    }
    //actual export (note: no direct transfer of local copy in order to ensure blocking (and hence, parallelism))
    if (//use dirty for skipping parallel exports
    isDirty() || (pWrite && !isEqualOutputFormat(outputFormat))) {
        // a) get the matrix		
        if (isEmpty(true)) {
            //note: for large rdd outputs, we compile dedicated writespinstructions (no need to handle this here) 
            try {
                if (getRDDHandle() == null || getRDDHandle().allowsShortCircuitRead())
                    _data = readBlobFromHDFS(_hdfsFileName);
                else
                    _data = readBlobFromRDD(getRDDHandle(), new MutableBoolean());
                setDirty(false);
            } catch (IOException e) {
                throw new CacheException("Reading of " + _hdfsFileName + " (" + getVarName() + ") failed.", e);
            }
        }
        //get object from cache
        if (_data == null)
            getCache();
        //incl. read matrix if evicted	
        acquire(false, _data == null);
        // b) write the matrix 
        try {
            writeMetaData(fName, outputFormat, formatProperties);
            writeBlobToHDFS(fName, outputFormat, replication, formatProperties);
            if (!pWrite)
                setDirty(false);
        } catch (Exception e) {
            throw new CacheException("Export to " + fName + " failed.", e);
        } finally {
            release();
        }
    } else if (// pwrite with same output format
    pWrite) {
        //CASE 2: matrix already in same format but different file on hdfs (copy matrix to fname)
        try {
            MapReduceTool.deleteFileIfExistOnHDFS(fName);
            MapReduceTool.deleteFileIfExistOnHDFS(fName + ".mtd");
            if (getRDDHandle() == null || getRDDHandle().allowsShortCircuitRead())
                MapReduceTool.copyFileOnHDFS(_hdfsFileName, fName);
            else
                //write might trigger rdd operations and nnz maintenance
                writeBlobFromRDDtoHDFS(getRDDHandle(), fName, outputFormat);
            writeMetaData(fName, outputFormat, formatProperties);
        } catch (Exception e) {
            throw new CacheException("Export to " + fName + " failed.", e);
        }
    } else if (getRDDHandle() != null && getRDDHandle().isPending() && !getRDDHandle().isHDFSFile() && !getRDDHandle().allowsShortCircuitRead()) {
        //CASE 3: pending rdd operation (other than checkpoints)
        try {
            //write matrix or frame
            writeBlobFromRDDtoHDFS(getRDDHandle(), fName, outputFormat);
            writeMetaData(fName, outputFormat, formatProperties);
            //update rdd status
            getRDDHandle().setPending(false);
        } catch (Exception e) {
            throw new CacheException("Export to " + fName + " failed.", e);
        }
    } else {
        //CASE 4: data already in hdfs (do nothing, no need for export)
        LOG.trace(this.getDebugName() + ": Skip export to hdfs since data already exists.");
    }
    if (DMLScript.STATISTICS) {
        long t1 = System.nanoTime();
        CacheStatistics.incrementExportTime(t1 - t0);
    }
}
Also used : GPUContext(org.apache.sysml.runtime.instructions.gpu.context.GPUContext) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) GPUObject(org.apache.sysml.runtime.instructions.gpu.context.GPUObject) IOException(java.io.IOException) HashMap(java.util.HashMap) Map(java.util.Map) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException)

Example 12 with MutableBoolean

use of org.apache.commons.lang.mutable.MutableBoolean in project incubator-systemml by apache.

the class CacheableData method acquireRead.

// *********************************************
// ***                                       ***
// ***    HIGH-LEVEL METHODS THAT SPECIFY    ***
// ***   THE LOCKING AND CACHING INTERFACE   ***
// ***                                       ***
// *********************************************
/**
	 * Acquires a shared "read-only" lock, produces the reference to the cache block,
	 * restores the cache block to main memory, reads from HDFS if needed.
	 * 
	 * Synchronized because there might be parallel threads (parfor local) that
	 * access the same object (in case it was created before the loop).
	 * 
	 * In-Status:  EMPTY, EVICTABLE, EVICTED, READ;
	 * Out-Status: READ(+1).
	 * 
	 * @return cacheable data
	 * @throws CacheException if CacheException occurs
	 */
public synchronized T acquireRead() throws CacheException {
    if (LOG.isTraceEnabled())
        LOG.trace("Acquire read " + getVarName());
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    if (!isAvailableToRead())
        throw new CacheException("MatrixObject not available to read.");
    //get object from cache
    if (_data == null)
        getCache();
    //call acquireHostRead if gpuHandle is set as well as is allocated
    boolean copiedFromGPU = false;
    for (Map.Entry<GPUContext, GPUObject> kv : _gpuObjects.entrySet()) {
        GPUObject gObj = kv.getValue();
        if (gObj != null && copiedFromGPU && gObj.isDirty()) {
            LOG.error("Inconsistent internal state - A copy of this CacheableData was dirty on more than 1 GPU");
            throw new CacheException("Internal Error : Inconsistent internal state, A copy of this CacheableData was dirty on more than 1 GPU");
        } else if (gObj != null) {
            copiedFromGPU = gObj.acquireHostRead();
            if (_data == null)
                getCache();
        }
    }
    //(probe data for cache_nowrite / jvm_reuse)  
    if (isEmpty(true) && _data == null) {
        try {
            if (DMLScript.STATISTICS)
                CacheStatistics.incrementHDFSHits();
            if (getRDDHandle() == null || getRDDHandle().allowsShortCircuitRead()) {
                //check filename
                if (_hdfsFileName == null)
                    throw new CacheException("Cannot read matrix for empty filename.");
                //read cacheable data from hdfs
                _data = readBlobFromHDFS(_hdfsFileName);
                //mark for initial local write despite read operation
                _requiresLocalWrite = CACHING_WRITE_CACHE_ON_READ;
            } else {
                //read matrix from rdd (incl execute pending rdd operations)
                MutableBoolean writeStatus = new MutableBoolean();
                _data = readBlobFromRDD(getRDDHandle(), writeStatus);
                //mark for initial local write (prevent repeated execution of rdd operations)
                _requiresLocalWrite = writeStatus.booleanValue() ? CACHING_WRITE_CACHE_ON_READ : true;
            }
            setDirty(false);
        } catch (IOException e) {
            throw new CacheException("Reading of " + _hdfsFileName + " (" + getVarName() + ") failed.", e);
        }
        _isAcquireFromEmpty = true;
    } else if (DMLScript.STATISTICS) {
        if (_data != null)
            CacheStatistics.incrementMemHits();
    }
    //cache status maintenance
    acquire(false, _data == null);
    updateStatusPinned(true);
    if (DMLScript.STATISTICS) {
        long t1 = System.nanoTime();
        CacheStatistics.incrementAcquireRTime(t1 - t0);
    }
    return _data;
}
Also used : GPUContext(org.apache.sysml.runtime.instructions.gpu.context.GPUContext) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) GPUObject(org.apache.sysml.runtime.instructions.gpu.context.GPUObject) IOException(java.io.IOException) HashMap(java.util.HashMap) Map(java.util.Map)

Example 13 with MutableBoolean

use of org.apache.commons.lang.mutable.MutableBoolean in project voldemort by voldemort.

the class VersionedPutPruneJob method operate.

@Override
public void operate() throws Exception {
    StoreDefinition storeDef = StoreDefinitionUtils.getStoreDefinitionWithName(metadataStore.getStoreDefList(), storeName);
    if (storeDef == null) {
        throw new VoldemortException("Unknown store " + storeName);
    }
    if (isWritableStore(storeDef)) {
        // Lets generate routing strategy for this storage engine
        StoreRoutingPlan routingPlan = new StoreRoutingPlan(metadataStore.getCluster(), storeDef);
        logger.info("Pruning store " + storeDef.getName());
        StorageEngine<ByteArray, byte[], byte[]> engine = storeRepo.getStorageEngine(storeDef.getName());
        iterator = engine.keys();
        long itemsScanned = 0;
        long numPrunedKeys = 0;
        while (iterator.hasNext()) {
            ByteArray key = iterator.next();
            KeyLockHandle<byte[]> lockHandle = null;
            try {
                lockHandle = engine.getAndLock(key);
                List<Versioned<byte[]>> vals = lockHandle.getValues();
                List<Integer> keyReplicas = routingPlan.getReplicationNodeList(routingPlan.getMasterPartitionId(key.get()));
                MutableBoolean didPrune = new MutableBoolean(false);
                List<Versioned<byte[]>> prunedVals = pruneNonReplicaEntries(vals, keyReplicas, didPrune);
                // happened. Optimization to reduce load on storage
                if (didPrune.booleanValue()) {
                    List<Versioned<byte[]>> resolvedVals = VectorClockUtils.resolveVersions(prunedVals);
                    // TODO this is only implemented for BDB for now
                    lockHandle.setValues(resolvedVals);
                    engine.putAndUnlock(key, lockHandle);
                    numPrunedKeys = this.numKeysUpdatedThisRun.incrementAndGet();
                } else {
                    // if we did not prune, still need to let go of the lock
                    engine.releaseLock(lockHandle);
                }
                itemsScanned = this.numKeysScannedThisRun.incrementAndGet();
                throttler.maybeThrottle(1);
                if (itemsScanned % STAT_RECORDS_INTERVAL == 0) {
                    logger.info("#Scanned:" + itemsScanned + " #Pruned:" + numPrunedKeys);
                }
            } catch (Exception e) {
                throw e;
            } finally {
                if (lockHandle != null && !lockHandle.isClosed()) {
                    engine.releaseLock(lockHandle);
                }
            }
        }
        logger.info("Completed store " + storeDef.getName() + " #Scanned:" + itemsScanned + " #Pruned:" + numPrunedKeys);
    }
}
Also used : StoreRoutingPlan(voldemort.routing.StoreRoutingPlan) Versioned(voldemort.versioning.Versioned) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) VoldemortException(voldemort.VoldemortException) VoldemortException(voldemort.VoldemortException) StoreDefinition(voldemort.store.StoreDefinition) ByteArray(voldemort.utils.ByteArray)

Example 14 with MutableBoolean

use of org.apache.commons.lang.mutable.MutableBoolean in project voldemort by voldemort.

the class VersionedPutPruningTest method testPruningLogic.

@Test
public void testPruningLogic() {
    List<Versioned<byte[]>> vals = new ArrayList<Versioned<byte[]>>();
    VectorClock clock1 = TestUtils.getClock(0, 2, 1, 3);
    VectorClock clock2 = TestUtils.getClock(9, 4);
    VectorClock clock3 = TestUtils.getClock(0, 1);
    VectorClock clock4 = TestUtils.getClock(8, 0);
    vals.add(new Versioned<byte[]>(key, clock1));
    vals.add(new Versioned<byte[]>(key, clock2));
    vals.add(new Versioned<byte[]>(key, clock3));
    vals.add(new Versioned<byte[]>(key, clock4));
    MutableBoolean didPrune = new MutableBoolean(false);
    List<Versioned<byte[]>> prunedVals = VersionedPutPruneJob.pruneNonReplicaEntries(vals, keyReplicas, didPrune);
    assertEquals("Must have pruned some versions", true, didPrune.booleanValue());
    assertEquals("Not pruned properly", TestUtils.getClock(0, 1, 2), prunedVals.get(0).getVersion());
    assertEquals("Not pruned properly", TestUtils.getClock(), prunedVals.get(1).getVersion());
    assertEquals("Not pruned properly", TestUtils.getClock(0, 1), prunedVals.get(2).getVersion());
    assertEquals("Not pruned properly", TestUtils.getClock(0), prunedVals.get(3).getVersion());
    List<Versioned<byte[]>> resolvedVals = VectorClockUtils.resolveVersions(prunedVals);
    assertEquals("Must be exactly one winning version", 1, resolvedVals.size());
    assertEquals("Incorrect winning version", TestUtils.getClock(0, 1, 2), resolvedVals.get(0).getVersion());
    assertEquals("Incorrect winning version", clock1.getTimestamp(), ((VectorClock) resolvedVals.get(0).getVersion()).getTimestamp());
}
Also used : Versioned(voldemort.versioning.Versioned) VectorClock(voldemort.versioning.VectorClock) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 15 with MutableBoolean

use of org.apache.commons.lang.mutable.MutableBoolean in project voldemort by voldemort.

the class VersionedPutPruningTest method testOnlineBehavior.

@Test
public void testOnlineBehavior() {
    long now = System.currentTimeMillis();
    // let's assume previous replicas are [4, 5, 0]
    VectorClock fetchedClock = TestUtils.getVersionedPutClock(now, 4, 4, 5, 0);
    VectorClock onlineClock = TestUtils.getVersionedPutClock(now, 0, 0, 2, 1);
    assertEquals("fetched and online versions should conflict", Occurred.CONCURRENTLY, VectorClockUtils.compare(fetchedClock, onlineClock));
    // case where key has received writes before the prune job
    List<Versioned<byte[]>> vals = new ArrayList<Versioned<byte[]>>();
    vals.add(new Versioned<byte[]>(key, fetchedClock));
    vals.add(new Versioned<byte[]>(key, onlineClock));
    MutableBoolean didPrune = new MutableBoolean();
    vals = pruneAndResolve(vals, didPrune);
    assertEquals("Must have pruned something", true, didPrune.booleanValue());
    assertEquals("Must have one winning version", 1, vals.size());
    assertEquals("Must resolve to onlineClock", onlineClock, vals.get(0).getVersion());
    // case where key has not received any writes before the prune job
    vals = new ArrayList<Versioned<byte[]>>();
    vals.add(new Versioned<byte[]>(key, fetchedClock));
    didPrune = new MutableBoolean();
    vals = pruneAndResolve(vals, didPrune);
    assertEquals("Must have pruned something", true, didPrune.booleanValue());
    assertEquals("Must have one winning version", 1, vals.size());
    assertEquals("Must resolve to [0:ts] clock", TestUtils.getVersionedPutClock(now, -1, 0), vals.get(0).getVersion());
    VectorClock nextOnlineClock = TestUtils.getVersionedPutClock(now + Time.MS_PER_SECOND, 0, 0, 2, 1);
    assertFalse("Next online write would not result in conflict", Occurred.CONCURRENTLY == VectorClockUtils.compare((VectorClock) vals.get(0).getVersion(), nextOnlineClock));
}
Also used : Versioned(voldemort.versioning.Versioned) VectorClock(voldemort.versioning.VectorClock) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

MutableBoolean (org.apache.commons.lang.mutable.MutableBoolean)27 Test (org.junit.Test)13 ArrayList (java.util.ArrayList)6 IOException (java.io.IOException)5 Map (java.util.Map)5 ExtendedBlockId (org.apache.hadoop.hdfs.ExtendedBlockId)5 Versioned (voldemort.versioning.Versioned)5 HashMap (java.util.HashMap)4 CacheVisitor (org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.CacheVisitor)4 VectorClock (voldemort.versioning.VectorClock)4 Slot (org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.Slot)3 com.evolveum.midpoint.prism (com.evolveum.midpoint.prism)2 ItemPath (com.evolveum.midpoint.prism.path.ItemPath)2 PolyString (com.evolveum.midpoint.prism.polystring.PolyString)2 Source (com.evolveum.midpoint.repo.common.expression.Source)2 ObjectType (com.evolveum.midpoint.xml.ns._public.common.common_3.ObjectType)2 LinkedMap (org.apache.commons.collections.map.LinkedMap)2 Validate (org.apache.commons.lang.Validate)2 Configuration (org.apache.hadoop.conf.Configuration)2 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)2