Search in sources :

Example 6 with MutableLong

use of org.apache.commons.lang.mutable.MutableLong in project apex-malhar by apache.

the class AbstractBlockReader method setup.

@Override
public void setup(Context.OperatorContext context) {
    operatorId = context.getId();
    LOG.debug("{}: partition keys {} mask {}", operatorId, partitionKeys, partitionMask);
    this.context = context;
    counters.setCounter(ReaderCounterKeys.BLOCKS, new MutableLong());
    counters.setCounter(ReaderCounterKeys.RECORDS, new MutableLong());
    counters.setCounter(ReaderCounterKeys.BYTES, new MutableLong());
    counters.setCounter(ReaderCounterKeys.TIME, new MutableLong());
    sleepTimeMillis = context.getValue(Context.OperatorContext.SPIN_MILLIS);
}
Also used : MutableLong(org.apache.commons.lang.mutable.MutableLong)

Example 7 with MutableLong

use of org.apache.commons.lang.mutable.MutableLong in project apex-malhar by apache.

the class AbstractFileInputOperator method definePartitions.

@Override
public Collection<Partition<AbstractFileInputOperator<T>>> definePartitions(Collection<Partition<AbstractFileInputOperator<T>>> partitions, PartitioningContext context) {
    lastRepartition = System.currentTimeMillis();
    int totalCount = getNewPartitionCount(partitions, context);
    LOG.debug("Computed new partitions: {}", totalCount);
    if (totalCount == partitions.size()) {
        return partitions;
    }
    AbstractFileInputOperator<T> tempOperator = partitions.iterator().next().getPartitionedInstance();
    MutableLong tempGlobalNumberOfRetries = tempOperator.globalNumberOfRetries;
    MutableLong tempGlobalNumberOfFailures = tempOperator.globalNumberOfRetries;
    /*
     * Build collective state from all instances of the operator.
     */
    Set<String> totalProcessedFiles = Sets.newHashSet();
    Set<FailedFile> currentFiles = Sets.newHashSet();
    List<DirectoryScanner> oldscanners = Lists.newLinkedList();
    List<FailedFile> totalFailedFiles = Lists.newLinkedList();
    List<String> totalPendingFiles = Lists.newLinkedList();
    Set<Integer> deletedOperators = Sets.newHashSet();
    for (Partition<AbstractFileInputOperator<T>> partition : partitions) {
        AbstractFileInputOperator<T> oper = partition.getPartitionedInstance();
        totalProcessedFiles.addAll(oper.processedFiles);
        totalFailedFiles.addAll(oper.failedFiles);
        totalPendingFiles.addAll(oper.pendingFiles);
        currentFiles.addAll(unfinishedFiles);
        tempGlobalNumberOfRetries.add(oper.localNumberOfRetries);
        tempGlobalNumberOfFailures.add(oper.localNumberOfFailures);
        if (oper.currentFile != null) {
            currentFiles.add(new FailedFile(oper.currentFile, oper.offset));
        }
        oldscanners.add(oper.getScanner());
        deletedOperators.add(oper.operatorId);
    }
    /*
     * Create partitions of scanners, scanner's partition method will do state
     * transfer for DirectoryScanner objects.
     */
    List<DirectoryScanner> scanners = scanner.partition(totalCount, oldscanners);
    Collection<Partition<AbstractFileInputOperator<T>>> newPartitions = Lists.newArrayListWithExpectedSize(totalCount);
    List<WindowDataManager> newManagers = windowDataManager.partition(totalCount, deletedOperators);
    KryoCloneUtils<AbstractFileInputOperator<T>> cloneUtils = KryoCloneUtils.createCloneUtils(this);
    for (int i = 0; i < scanners.size(); i++) {
        @SuppressWarnings("unchecked") AbstractFileInputOperator<T> oper = cloneUtils.getClone();
        DirectoryScanner scn = scanners.get(i);
        oper.setScanner(scn);
        // Do state transfer for processed files.
        oper.processedFiles.addAll(totalProcessedFiles);
        oper.globalNumberOfFailures = tempGlobalNumberOfRetries;
        oper.localNumberOfFailures.setValue(0);
        oper.globalNumberOfRetries = tempGlobalNumberOfFailures;
        oper.localNumberOfRetries.setValue(0);
        /* redistribute unfinished files properly */
        oper.unfinishedFiles.clear();
        oper.currentFile = null;
        oper.offset = 0;
        Iterator<FailedFile> unfinishedIter = currentFiles.iterator();
        while (unfinishedIter.hasNext()) {
            FailedFile unfinishedFile = unfinishedIter.next();
            if (scn.acceptFile(unfinishedFile.path)) {
                oper.unfinishedFiles.add(unfinishedFile);
                unfinishedIter.remove();
            }
        }
        /* transfer failed files */
        oper.failedFiles.clear();
        Iterator<FailedFile> iter = totalFailedFiles.iterator();
        while (iter.hasNext()) {
            FailedFile ff = iter.next();
            if (scn.acceptFile(ff.path)) {
                oper.failedFiles.add(ff);
                iter.remove();
            }
        }
        /* redistribute pending files properly */
        oper.pendingFiles.clear();
        Iterator<String> pendingFilesIterator = totalPendingFiles.iterator();
        while (pendingFilesIterator.hasNext()) {
            String pathString = pendingFilesIterator.next();
            if (scn.acceptFile(pathString)) {
                oper.pendingFiles.add(pathString);
                pendingFilesIterator.remove();
            }
        }
        oper.setWindowDataManager(newManagers.get(i));
        newPartitions.add(new DefaultPartition<AbstractFileInputOperator<T>>(oper));
    }
    LOG.info("definePartitions called returning {} partitions", newPartitions.size());
    return newPartitions;
}
Also used : DefaultPartition(com.datatorrent.api.DefaultPartition) MutableLong(org.apache.commons.lang.mutable.MutableLong) WindowDataManager(org.apache.apex.malhar.lib.wal.WindowDataManager)

Example 8 with MutableLong

use of org.apache.commons.lang.mutable.MutableLong in project apex-malhar by apache.

the class AbstractFileOutputOperator method setup.

@Override
public void setup(Context.OperatorContext context) {
    LOG.debug("setup initiated");
    if (expireStreamAfterAccessMillis == null) {
        expireStreamAfterAccessMillis = (long) (context.getValue(Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS) * context.getValue(Context.DAGContext.CHECKPOINT_WINDOW_COUNT));
    }
    rollingFile = (maxLength < Long.MAX_VALUE) || (rotationWindows > 0);
    // Getting required file system instance.
    try {
        fs = getFSInstance();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
    if (replication <= 0) {
        replication = fs.getDefaultReplication(new Path(filePath));
    }
    LOG.debug("FS class {}", fs.getClass());
    // building cache
    RemovalListener<String, FSFilterStreamContext> removalListener = createCacheRemoveListener();
    CacheLoader<String, FSFilterStreamContext> loader = createCacheLoader();
    streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles).expireAfterAccess(expireStreamAfterAccessMillis, TimeUnit.MILLISECONDS).removalListener(removalListener).build(loader);
    LOG.debug("File system class: {}", fs.getClass());
    LOG.debug("end-offsets {}", endOffsets);
    try {
        // Restore the files in case they were corrupted and the operator was re-deployed.
        Path writerPath = new Path(filePath);
        if (fs.exists(writerPath)) {
            for (String seenFileName : endOffsets.keySet()) {
                String seenFileNamePart = getPartFileNamePri(seenFileName);
                LOG.debug("seenFileNamePart: {}", seenFileNamePart);
                Path activeFilePath;
                if (alwaysWriteToTmp) {
                    String tmpFileName = fileNameToTmpName.get(seenFileNamePart);
                    activeFilePath = new Path(filePath + Path.SEPARATOR + tmpFileName);
                } else {
                    activeFilePath = new Path(filePath + Path.SEPARATOR + seenFileNamePart);
                }
                if (fs.exists(activeFilePath)) {
                    recoverFile(seenFileName, seenFileNamePart, activeFilePath);
                }
            }
        }
        if (rollingFile) {
            // delete the left over future rolling files produced from the previous crashed instance of this operator.
            for (String seenFileName : endOffsets.keySet()) {
                try {
                    Integer fileOpenPart = this.openPart.get(seenFileName).getValue();
                    int nextPart = fileOpenPart + 1;
                    String seenPartFileName;
                    while (true) {
                        seenPartFileName = getPartFileName(seenFileName, nextPart);
                        Path activePath = null;
                        if (alwaysWriteToTmp) {
                            String tmpFileName = fileNameToTmpName.get(seenPartFileName);
                            if (tmpFileName != null) {
                                activePath = new Path(filePath + Path.SEPARATOR + tmpFileName);
                            }
                        } else {
                            activePath = new Path(filePath + Path.SEPARATOR + seenPartFileName);
                        }
                        if (activePath == null || !fs.exists(activePath)) {
                            break;
                        }
                        fs.delete(activePath, true);
                        nextPart++;
                    }
                    seenPartFileName = getPartFileName(seenFileName, fileOpenPart);
                    Path activePath = null;
                    if (alwaysWriteToTmp) {
                        String tmpFileName = fileNameToTmpName.get(seenPartFileName);
                        if (tmpFileName != null) {
                            activePath = new Path(filePath + Path.SEPARATOR + fileNameToTmpName.get(seenPartFileName));
                        }
                    } else {
                        activePath = new Path(filePath + Path.SEPARATOR + seenPartFileName);
                    }
                    if (activePath != null && fs.exists(activePath) && fs.getFileStatus(activePath).getLen() > maxLength) {
                        // Handle the case when restoring to a checkpoint where the current rolling file
                        // already has a length greater than max length.
                        LOG.debug("rotating file at setup.");
                        rotate(seenFileName);
                    }
                } catch (IOException | ExecutionException e) {
                    throw new RuntimeException(e);
                }
            }
        }
        LOG.debug("setup completed");
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    this.context = context;
    fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong());
    fileCounters.setCounter(Counters.TOTAL_TIME_WRITING_MILLISECONDS, new MutableLong());
}
Also used : Path(org.apache.hadoop.fs.Path) MutableLong(org.apache.commons.lang.mutable.MutableLong) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 9 with MutableLong

use of org.apache.commons.lang.mutable.MutableLong in project apex-malhar by apache.

the class AbstractFileOutputOperator method processTuple.

/**
 * This method processes received tuples.
 * Tuples are written out to the appropriate files as determined by the getFileName method.
 * If the output port is connected incoming tuples are also converted and emitted on the appropriate output port.
 * @param tuple An incoming tuple which needs to be processed.
 */
protected void processTuple(INPUT tuple) {
    String fileName = getFileName(tuple);
    if (Strings.isNullOrEmpty(fileName)) {
        return;
    }
    try {
        FilterOutputStream fsOutput = streamsCache.get(fileName).getFilterStream();
        byte[] tupleBytes = getBytesForTuple(tuple);
        long start = System.currentTimeMillis();
        fsOutput.write(tupleBytes);
        totalWritingTime += System.currentTimeMillis() - start;
        totalBytesWritten += tupleBytes.length;
        MutableLong currentOffset = endOffsets.get(fileName);
        if (currentOffset == null) {
            currentOffset = new MutableLong(0);
            endOffsets.put(fileName, currentOffset);
        }
        currentOffset.add(tupleBytes.length);
        if (rotationWindows > 0) {
            getRotationState(fileName).notEmpty = true;
        }
        if (rollingFile && currentOffset.longValue() > maxLength) {
            LOG.debug("Rotating file {} {} {}", fileName, openPart.get(fileName), currentOffset.longValue());
            rotate(fileName);
        }
        MutableLong count = counts.get(fileName);
        if (count == null) {
            count = new MutableLong(0);
            counts.put(fileName, count);
        }
        count.add(1);
    } catch (IOException | ExecutionException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : MutableLong(org.apache.commons.lang.mutable.MutableLong) IOException(java.io.IOException) FilterOutputStream(java.io.FilterOutputStream) ExecutionException(java.util.concurrent.ExecutionException)

Example 10 with MutableLong

use of org.apache.commons.lang.mutable.MutableLong in project apex-malhar by apache.

the class AbstractThroughputFileInputOperator method processStats.

@Override
@SuppressWarnings("unchecked")
public Response processStats(BatchedOperatorStats batchedOperatorStats) {
    BasicCounters<MutableLong> fileCounters = null;
    for (OperatorStats operatorStats : batchedOperatorStats.getLastWindowedStats()) {
        if (operatorStats.counters != null) {
            fileCounters = (BasicCounters<MutableLong>) operatorStats.counters;
        }
    }
    Response response = new Response();
    if (fileCounters != null && fileCounters.getCounter(FileCounters.PENDING_FILES).longValue() > 0L || System.currentTimeMillis() - repartitionInterval <= lastRepartition) {
        response.repartitionRequired = false;
        return response;
    }
    response.repartitionRequired = true;
    return response;
}
Also used : MutableLong(org.apache.commons.lang.mutable.MutableLong) OperatorStats(com.datatorrent.api.Stats.OperatorStats)

Aggregations

MutableLong (org.apache.commons.lang.mutable.MutableLong)27 IOException (java.io.IOException)8 Path (org.apache.hadoop.fs.Path)5 ObjectMapperString (com.datatorrent.common.util.ObjectMapperString)3 Map (java.util.Map)3 BasicCounters (org.apache.apex.malhar.lib.counters.BasicCounters)3 DefaultPartition (com.datatorrent.api.DefaultPartition)2 IncompatibleVersionException (com.datatorrent.stram.client.WebServicesVersionConversion.IncompatibleVersionException)2 TupleRecorder (com.datatorrent.stram.debug.TupleRecorder)2 VCFHeader (htsjdk.variant.vcf.VCFHeader)2 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)2 BufferedReader (java.io.BufferedReader)2 InputStreamReader (java.io.InputStreamReader)2 HashMap (java.util.HashMap)2 ExecutionException (java.util.concurrent.ExecutionException)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 JSONException (org.codehaus.jettison.json.JSONException)2 JSONObject (org.codehaus.jettison.json.JSONObject)2 DataRow (org.knime.core.data.DataRow)2 ColumnBufferedDataTableSorter (org.knime.core.data.sort.ColumnBufferedDataTableSorter)2