use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class PutHDFS method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final FileSystem hdfs = getFileSystem();
final Configuration configuration = getConfiguration();
final UserGroupInformation ugi = getUserGroupInformation();
if (configuration == null || hdfs == null || ugi == null) {
getLogger().error("HDFS not configured properly");
session.transfer(flowFile, REL_FAILURE);
context.yield();
return;
}
ugi.doAs(new PrivilegedAction<Object>() {
@Override
public Object run() {
Path tempDotCopyFile = null;
FlowFile putFlowFile = flowFile;
try {
final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue();
final Path configuredRootDirPath = new Path(dirValue);
final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue();
final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B);
final long blockSize = blockSizeProp != null ? blockSizeProp.longValue() : hdfs.getDefaultBlockSize(configuredRootDirPath);
final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT);
final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger();
final short replication = replicationProp != null ? replicationProp.shortValue() : hdfs.getDefaultReplication(configuredRootDirPath);
final CompressionCodec codec = getCompressionCodec(context, configuration);
final String filename = codec != null ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension() : putFlowFile.getAttribute(CoreAttributes.FILENAME.key());
final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename);
final Path copyFile = new Path(configuredRootDirPath, filename);
// Create destination directory if it does not exist
try {
if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) {
throw new IOException(configuredRootDirPath.toString() + " already exists and is not a directory");
}
} catch (FileNotFoundException fe) {
if (!hdfs.mkdirs(configuredRootDirPath)) {
throw new IOException(configuredRootDirPath.toString() + " could not be created");
}
changeOwner(context, hdfs, configuredRootDirPath, flowFile);
}
final boolean destinationExists = hdfs.exists(copyFile);
// If destination file already exists, resolve that based on processor configuration
if (destinationExists) {
switch(conflictResponse) {
case REPLACE_RESOLUTION:
if (hdfs.delete(copyFile, false)) {
getLogger().info("deleted {} in order to replace with the contents of {}", new Object[] { copyFile, putFlowFile });
}
break;
case IGNORE_RESOLUTION:
session.transfer(putFlowFile, REL_SUCCESS);
getLogger().info("transferring {} to success because file with same name already exists", new Object[] { putFlowFile });
return null;
case FAIL_RESOLUTION:
session.transfer(session.penalize(putFlowFile), REL_FAILURE);
getLogger().warn("penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile });
return null;
default:
break;
}
}
// Write FlowFile to temp file on HDFS
final StopWatch stopWatch = new StopWatch(true);
session.read(putFlowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
OutputStream fos = null;
Path createdFile = null;
try {
if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) {
fos = hdfs.append(copyFile, bufferSize);
} else {
fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize);
}
if (codec != null) {
fos = codec.createOutputStream(fos);
}
createdFile = tempCopyFile;
BufferedInputStream bis = new BufferedInputStream(in);
StreamUtils.copy(bis, fos);
bis = null;
fos.flush();
} finally {
try {
if (fos != null) {
fos.close();
}
} catch (RemoteException re) {
// when talking to remote HDFS clusters, we don't notice problems until fos.close()
if (createdFile != null) {
try {
hdfs.delete(createdFile, false);
} catch (Throwable ignore) {
}
}
throw re;
} catch (Throwable ignore) {
}
fos = null;
}
}
});
stopWatch.stop();
final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
tempDotCopyFile = tempCopyFile;
if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) {
boolean renamed = false;
for (int i = 0; i < 10; i++) {
// try to rename multiple times.
if (hdfs.rename(tempCopyFile, copyFile)) {
renamed = true;
// rename was successful
break;
}
// try waiting to let whatever might cause rename failure to resolve
Thread.sleep(200L);
}
if (!renamed) {
hdfs.delete(tempCopyFile, false);
throw new ProcessException("Copied file to HDFS but could not rename dot file " + tempCopyFile + " to its final filename");
}
changeOwner(context, hdfs, copyFile, flowFile);
}
getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, copyFile, millis, dataRate });
final String newFilename = copyFile.getName();
final String hdfsPath = copyFile.getParent().toString();
putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename);
putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
session.transfer(putFlowFile, REL_SUCCESS);
} catch (final Throwable t) {
if (tempDotCopyFile != null) {
try {
hdfs.delete(tempDotCopyFile, false);
} catch (Exception e) {
getLogger().error("Unable to remove temporary file {} due to {}", new Object[] { tempDotCopyFile, e });
}
}
getLogger().error("Failed to write to HDFS due to {}", new Object[] { t });
session.transfer(session.penalize(putFlowFile), REL_FAILURE);
context.yield();
}
return null;
}
});
}
use of org.apache.nifi.util.StopWatch in project nifi-minifi by apache.
the class MiNiFiPersistentProvenanceRepository method purgeOldEvents.
/**
* Purges old events from the repository
*
* @throws IOException if unable to purge old events due to an I/O problem
*/
synchronized void purgeOldEvents() throws IOException {
while (!recoveryFinished.get()) {
try {
Thread.sleep(100L);
} catch (final InterruptedException ie) {
}
}
final List<File> toPurge = new ArrayList<>();
final long timeCutoff = System.currentTimeMillis() - configuration.getMaxRecordLife(TimeUnit.MILLISECONDS);
final List<File> sortedByBasename = getLogFiles();
long bytesUsed = getSize(sortedByBasename, timeCutoff);
for (final Path path : idToPathMap.get().values()) {
final File file = path.toFile();
final long lastModified = file.lastModified();
if (lastModified > 0L && lastModified < timeCutoff) {
toPurge.add(file);
}
}
// This comparator sorts the data based on the "basename" of the files. I.e., the numeric portion.
// We do this because the numeric portion represents the ID of the first event in the log file.
// As a result, we are sorting based on time, since the ID is monotonically increasing. By doing this,
// are able to avoid hitting disk continually to check timestamps
final Comparator<File> sortByBasenameComparator = new Comparator<File>() {
@Override
public int compare(final File o1, final File o2) {
final String baseName1 = LuceneUtil.substringBefore(o1.getName(), ".");
final String baseName2 = LuceneUtil.substringBefore(o2.getName(), ".");
Long id1 = null;
Long id2 = null;
try {
id1 = Long.parseLong(baseName1);
} catch (final NumberFormatException nfe) {
id1 = null;
}
try {
id2 = Long.parseLong(baseName2);
} catch (final NumberFormatException nfe) {
id2 = null;
}
if (id1 == null && id2 == null) {
return 0;
}
if (id1 == null) {
return 1;
}
if (id2 == null) {
return -1;
}
return Long.compare(id1, id2);
}
};
// If we have too much data (at least 90% of our max capacity), start aging it off
if (bytesUsed > configuration.getMaxStorageCapacity() * 0.9) {
Collections.sort(sortedByBasename, sortByBasenameComparator);
for (final File file : sortedByBasename) {
toPurge.add(file);
bytesUsed -= file.length();
if (bytesUsed < configuration.getMaxStorageCapacity()) {
// we've shrunk the repo size down enough to stop
break;
}
}
}
// Sort all of the files that we want to purge such that the oldest events are aged off first
Collections.sort(toPurge, sortByBasenameComparator);
logger.debug("Purging old event files: {}", toPurge);
// Remove any duplicates that we may have.
final Set<File> uniqueFilesToPurge = new LinkedHashSet<>(toPurge);
// Age off the data.
final Set<String> removed = new LinkedHashSet<>();
for (File file : uniqueFilesToPurge) {
final String baseName = LuceneUtil.substringBefore(file.getName(), ".");
ExpirationAction currentAction = null;
try {
for (final ExpirationAction action : expirationActions) {
currentAction = action;
if (!action.hasBeenPerformed(file)) {
final File fileBeforeAction = file;
final StopWatch stopWatch = new StopWatch(true);
file = action.execute(file);
stopWatch.stop();
logger.info("Successfully performed Expiration Action {} on Provenance Event file {} in {}", action, fileBeforeAction, stopWatch.getDuration());
}
}
removed.add(baseName);
} catch (final FileNotFoundException fnf) {
logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} because the file no longer exists; will not " + "perform additional Expiration Actions on this file", currentAction, file);
removed.add(baseName);
} catch (final Throwable t) {
logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} due to {}; will not perform additional " + "Expiration Actions on this file at this time", currentAction, file, t.toString());
logger.warn("", t);
eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to perform Expiration Action " + currentAction + " on Provenance Event file " + file + " due to " + t.toString() + "; will not perform additional Expiration Actions " + "on this file at this time");
}
}
// Update the Map ID to Path map to not include the removed file
// We cannot obtain the write lock here because there may be a need for the lock in the rollover method,
// if we have 'backpressure applied'. This would result in a deadlock because the rollover method would be
// waiting for purgeOldEvents, and purgeOldEvents would be waiting for the write lock held by rollover.
boolean updated = false;
while (!updated) {
final SortedMap<Long, Path> existingPathMap = idToPathMap.get();
final SortedMap<Long, Path> newPathMap = new TreeMap<>(new PathMapComparator());
newPathMap.putAll(existingPathMap);
final Iterator<Map.Entry<Long, Path>> itr = newPathMap.entrySet().iterator();
while (itr.hasNext()) {
final Map.Entry<Long, Path> entry = itr.next();
final String filename = entry.getValue().toFile().getName();
final String baseName = LuceneUtil.substringBefore(filename, ".");
if (removed.contains(baseName)) {
itr.remove();
}
}
updated = idToPathMap.compareAndSet(existingPathMap, newPathMap);
logger.debug("After expiration, path map: {}", newPathMap);
}
purgeExpiredIndexes();
}
Aggregations