Search in sources :

Example 1 with TransferManagerWrapper

use of com.amazonaws.services.neptune.util.TransferManagerWrapper in project amazon-neptune-tools by awslabs.

the class ExportToS3NeptuneExportEventHandler method onExportComplete.

@Override
public void onExportComplete(Directories directories, ExportStats stats, Cluster cluster, GraphSchema graphSchema) throws Exception {
    try {
        long size = Files.walk(directories.rootDirectory()).mapToLong(p -> p.toFile().length()).sum();
        logger.info("Total size of exported files: {}", FileUtils.byteCountToDisplaySize(size));
    } catch (Exception e) {
    // Ignore
    }
    if (StringUtils.isEmpty(outputS3Path)) {
        return;
    }
    logger.info("S3 upload params: {}", s3UploadParams);
    try (TransferManagerWrapper transferManager = new TransferManagerWrapper(s3Region)) {
        File outputDirectory = directories.rootDirectory().toFile();
        S3ObjectInfo outputS3ObjectInfo = calculateOutputS3Path(outputDirectory);
        Timer.timedActivity("uploading files to S3", (CheckedActivity.Runnable) () -> {
            deleteS3Directories(directories, outputS3ObjectInfo);
            uploadExportFilesToS3(transferManager.get(), outputDirectory, outputS3ObjectInfo);
            uploadCompletionFileToS3(transferManager.get(), outputDirectory, outputS3ObjectInfo, stats, graphSchema);
        });
        result.set(outputS3ObjectInfo);
    }
}
Also used : StringUtils(org.apache.commons.lang.StringUtils) Cluster(com.amazonaws.services.neptune.cluster.Cluster) S3ObjectInfo(com.amazonaws.services.neptune.util.S3ObjectInfo) LoggerFactory(org.slf4j.LoggerFactory) Directories(com.amazonaws.services.neptune.io.Directories) Timer(com.amazonaws.services.neptune.util.Timer) AtomicReference(java.util.concurrent.atomic.AtomicReference) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) ProgressEvent(com.amazonaws.event.ProgressEvent) ArrayList(java.util.ArrayList) ProgressListener(com.amazonaws.event.ProgressListener) ObjectMetadata(com.amazonaws.services.s3.model.ObjectMetadata) TransferManagerWrapper(com.amazonaws.services.neptune.util.TransferManagerWrapper) com.amazonaws.services.s3.transfer(com.amazonaws.services.s3.transfer) ObjectTagging(com.amazonaws.services.s3.model.ObjectTagging) Path(java.nio.file.Path) ExportStats(com.amazonaws.services.neptune.propertygraph.ExportStats) ObjectWriter(com.fasterxml.jackson.databind.ObjectWriter) Files(java.nio.file.Files) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Collection(java.util.Collection) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileUtils(org.apache.commons.io.FileUtils) Tag(com.amazonaws.services.s3.model.Tag) UUID(java.util.UUID) PutObjectRequest(com.amazonaws.services.s3.model.PutObjectRequest) GraphSchema(com.amazonaws.services.neptune.propertygraph.schema.GraphSchema) List(java.util.List) java.io(java.io) NEPTUNE_EXPORT_TAGS(com.amazonaws.services.neptune.export.NeptuneExportService.NEPTUNE_EXPORT_TAGS) Paths(java.nio.file.Paths) JsonNodeFactory(com.fasterxml.jackson.databind.node.JsonNodeFactory) AmazonClientException(com.amazonaws.AmazonClientException) FilenameUtils(org.apache.commons.io.FilenameUtils) CheckedActivity(com.amazonaws.services.neptune.util.CheckedActivity) S3ObjectInfo(com.amazonaws.services.neptune.util.S3ObjectInfo) TransferManagerWrapper(com.amazonaws.services.neptune.util.TransferManagerWrapper) CheckedActivity(com.amazonaws.services.neptune.util.CheckedActivity) AmazonClientException(com.amazonaws.AmazonClientException)

Example 2 with TransferManagerWrapper

use of com.amazonaws.services.neptune.util.TransferManagerWrapper in project amazon-neptune-tools by awslabs.

the class ExportToS3NeptuneExportEventHandler method onError.

@Override
public void onError() {
    if (!uploadToS3OnError) {
        return;
    }
    logger.warn("Uploading results of failed export to S3");
    if (StringUtils.isEmpty(outputS3Path)) {
        logger.warn("S3 output path is empty");
        return;
    }
    try {
        Path outputPath = Paths.get(localOutputPath);
        long size = Files.walk(outputPath).mapToLong(p -> p.toFile().length()).sum();
        logger.warn("Total size of failed export files: {}", FileUtils.byteCountToDisplaySize(size));
        try (TransferManagerWrapper transferManager = new TransferManagerWrapper(s3Region)) {
            String s3Suffix = UUID.randomUUID().toString().replace("-", "");
            File outputDirectory = outputPath.toFile();
            S3ObjectInfo outputS3ObjectInfo = calculateOutputS3Path(outputDirectory).replaceOrAppendKey("/tmp", "/failed").withNewKeySuffix(s3Suffix);
            Timer.timedActivity("uploading failed export files to S3", (CheckedActivity.Runnable) () -> {
                uploadExportFilesToS3(transferManager.get(), outputDirectory, outputS3ObjectInfo);
                uploadGcLogToS3(transferManager.get(), outputDirectory, outputS3ObjectInfo);
            });
            logger.warn("Failed export S3 location: {}", outputS3ObjectInfo.toString());
        }
    } catch (Exception e) {
        logger.error("Failed to upload failed export files to S3", e);
    }
}
Also used : Path(java.nio.file.Path) StringUtils(org.apache.commons.lang.StringUtils) Cluster(com.amazonaws.services.neptune.cluster.Cluster) S3ObjectInfo(com.amazonaws.services.neptune.util.S3ObjectInfo) LoggerFactory(org.slf4j.LoggerFactory) Directories(com.amazonaws.services.neptune.io.Directories) Timer(com.amazonaws.services.neptune.util.Timer) AtomicReference(java.util.concurrent.atomic.AtomicReference) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) ProgressEvent(com.amazonaws.event.ProgressEvent) ArrayList(java.util.ArrayList) ProgressListener(com.amazonaws.event.ProgressListener) ObjectMetadata(com.amazonaws.services.s3.model.ObjectMetadata) TransferManagerWrapper(com.amazonaws.services.neptune.util.TransferManagerWrapper) com.amazonaws.services.s3.transfer(com.amazonaws.services.s3.transfer) ObjectTagging(com.amazonaws.services.s3.model.ObjectTagging) Path(java.nio.file.Path) ExportStats(com.amazonaws.services.neptune.propertygraph.ExportStats) ObjectWriter(com.fasterxml.jackson.databind.ObjectWriter) Files(java.nio.file.Files) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Collection(java.util.Collection) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileUtils(org.apache.commons.io.FileUtils) Tag(com.amazonaws.services.s3.model.Tag) UUID(java.util.UUID) PutObjectRequest(com.amazonaws.services.s3.model.PutObjectRequest) GraphSchema(com.amazonaws.services.neptune.propertygraph.schema.GraphSchema) List(java.util.List) java.io(java.io) NEPTUNE_EXPORT_TAGS(com.amazonaws.services.neptune.export.NeptuneExportService.NEPTUNE_EXPORT_TAGS) Paths(java.nio.file.Paths) JsonNodeFactory(com.fasterxml.jackson.databind.node.JsonNodeFactory) AmazonClientException(com.amazonaws.AmazonClientException) FilenameUtils(org.apache.commons.io.FilenameUtils) CheckedActivity(com.amazonaws.services.neptune.util.CheckedActivity) S3ObjectInfo(com.amazonaws.services.neptune.util.S3ObjectInfo) TransferManagerWrapper(com.amazonaws.services.neptune.util.TransferManagerWrapper) CheckedActivity(com.amazonaws.services.neptune.util.CheckedActivity) AmazonClientException(com.amazonaws.AmazonClientException)

Example 3 with TransferManagerWrapper

use of com.amazonaws.services.neptune.util.TransferManagerWrapper in project amazon-neptune-tools by awslabs.

the class NeptuneExportService method execute.

public S3ObjectInfo execute() throws IOException {
    Args args;
    try {
        args = new Args(cmd);
        if (StringUtils.isNotEmpty(configFileS3Path)) {
            args.removeOptions("-c", "--config-file");
        }
        if (StringUtils.isNotEmpty(queriesFileS3Path)) {
            args.removeOptions("--queries");
        }
        if (args.contains("create-pg-config") || args.contains("export-pg") || args.contains("export-pg-from-config") || args.contains("export-pg-from-queries") || args.contains("export-rdf")) {
            args.removeOptions("-d", "--dir");
            args.addOption("-d", new File(localOutputPath, "output").getAbsolutePath());
            if (maxConcurrency > 0 && !args.contains("--clone-cluster-max-concurrency")) {
                args.addOption("--clone-cluster-max-concurrency", String.valueOf(maxConcurrency));
            }
            if (!args.contains("--clone-cluster-correlation-id")) {
                String correlationId = EnvironmentVariableUtils.getOptionalEnv("AWS_BATCH_JOB_ID", null);
                if (StringUtils.isNotEmpty(correlationId)) {
                    args.addOption("--clone-cluster-correlation-id", correlationId);
                }
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    try (TransferManagerWrapper transferManager = new TransferManagerWrapper(s3Region)) {
        if (cleanOutputPath) {
            clearTempFiles();
        }
        if (StringUtils.isNotEmpty(configFileS3Path)) {
            updateArgs(args, "--config-file", downloadFile(transferManager.get(), configFileS3Path));
        }
        if (StringUtils.isNotEmpty(queriesFileS3Path)) {
            updateArgs(args, "--queries", downloadFile(transferManager.get(), queriesFileS3Path));
        }
    }
    if (additionalParams.has(NEPTUNE_ML_PROFILE_NAME) && (!args.contains("--profile", NEPTUNE_ML_PROFILE_NAME))) {
        args.addOption("--profile", NEPTUNE_ML_PROFILE_NAME);
    }
    Collection<String> profiles = args.getOptionValues("--profile");
    if (!createExportSubdirectory && !overwriteExisting) {
        checkS3OutputIsEmpty();
    }
    EventHandlerCollection eventHandlerCollection = new EventHandlerCollection();
    Collection<CompletionFileWriter> completionFileWriters = new ArrayList<>();
    ExportToS3NeptuneExportEventHandler.S3UploadParams s3UploadParams = new ExportToS3NeptuneExportEventHandler.S3UploadParams().setCreateExportSubdirectory(createExportSubdirectory).setOverwriteExisting(overwriteExisting);
    ExportToS3NeptuneExportEventHandler exportToS3EventHandler = new ExportToS3NeptuneExportEventHandler(localOutputPath, outputS3Path, s3Region, completionFileS3Path, completionFilePayload, uploadToS3OnError, s3UploadParams, profiles, completionFileWriters);
    eventHandlerCollection.addHandler(exportToS3EventHandler);
    if (profiles.contains(NEPTUNE_ML_PROFILE_NAME)) {
        JsonNode neptuneMlNode = additionalParams.path(NEPTUNE_ML_PROFILE_NAME);
        boolean useV2 = args.contains("--feature-toggle", FeatureToggle.NeptuneML_V2.name()) || (neptuneMlNode.has("version") && neptuneMlNode.get("version").textValue().startsWith("v2."));
        boolean useV1 = (neptuneMlNode.has("version") && neptuneMlNode.get("version").textValue().startsWith("v1."));
        if (useV1) {
            NeptuneMachineLearningExportEventHandlerV1 neptuneMlEventHandler = new NeptuneMachineLearningExportEventHandlerV1(outputS3Path, s3Region, createExportSubdirectory, additionalParams, args, profiles);
            eventHandlerCollection.addHandler(neptuneMlEventHandler);
        } else {
            NeptuneMachineLearningExportEventHandlerV2 neptuneMlEventHandler = new NeptuneMachineLearningExportEventHandlerV2(outputS3Path, s3Region, createExportSubdirectory, additionalParams, args, profiles);
            eventHandlerCollection.addHandler(neptuneMlEventHandler);
        }
    }
    if (profiles.contains(INCREMENTAL_EXPORT_PROFILE_NAME)) {
        IncrementalExportEventHandler incrementalExportEventHandler = new IncrementalExportEventHandler(additionalParams);
        completionFileWriters.add(incrementalExportEventHandler);
        eventHandlerCollection.addHandler(incrementalExportEventHandler);
    }
    /**
     * We are removing a buffer of 1000 for maxFileDescriptorCount used at {@link com.amazonaws.services.neptune.propertygraph.io.LabelWriters#put}
     * since the value received from neptune-export service is set as the `nofile` ulimit in the AWS Batch
     * container properties and there might be other processes on the container having open files.
     * This ensures we close the leastRecentlyAccessed files before exceeding the hard limit for `nofile` ulimit.
     */
    final int maxFileDescriptorCountAfterRemovingBuffer = Math.max(maxFileDescriptorCount - 1000, MAX_FILE_DESCRIPTOR_COUNT);
    eventHandlerCollection.onBeforeExport(args, s3UploadParams);
    logger.info("Args after service init: {}", String.join(" ", args.values()));
    new NeptuneExportRunner(args.values(), eventHandlerCollection, false, maxFileDescriptorCountAfterRemovingBuffer).run();
    return exportToS3EventHandler.result();
}
Also used : IncrementalExportEventHandler(com.amazonaws.services.neptune.profiles.incremental_export.IncrementalExportEventHandler) TransferManagerWrapper(com.amazonaws.services.neptune.util.TransferManagerWrapper) JsonNode(com.fasterxml.jackson.databind.JsonNode) IOException(java.io.IOException) NeptuneMachineLearningExportEventHandlerV2(com.amazonaws.services.neptune.profiles.neptune_ml.NeptuneMachineLearningExportEventHandlerV2) NeptuneMachineLearningExportEventHandlerV1(com.amazonaws.services.neptune.profiles.neptune_ml.NeptuneMachineLearningExportEventHandlerV1) File(java.io.File)

Aggregations

TransferManagerWrapper (com.amazonaws.services.neptune.util.TransferManagerWrapper)3 AmazonClientException (com.amazonaws.AmazonClientException)2 ProgressEvent (com.amazonaws.event.ProgressEvent)2 ProgressListener (com.amazonaws.event.ProgressListener)2 Cluster (com.amazonaws.services.neptune.cluster.Cluster)2 NEPTUNE_EXPORT_TAGS (com.amazonaws.services.neptune.export.NeptuneExportService.NEPTUNE_EXPORT_TAGS)2 Directories (com.amazonaws.services.neptune.io.Directories)2 ExportStats (com.amazonaws.services.neptune.propertygraph.ExportStats)2 GraphSchema (com.amazonaws.services.neptune.propertygraph.schema.GraphSchema)2 CheckedActivity (com.amazonaws.services.neptune.util.CheckedActivity)2 S3ObjectInfo (com.amazonaws.services.neptune.util.S3ObjectInfo)2 Timer (com.amazonaws.services.neptune.util.Timer)2 ObjectMetadata (com.amazonaws.services.s3.model.ObjectMetadata)2 ObjectTagging (com.amazonaws.services.s3.model.ObjectTagging)2 PutObjectRequest (com.amazonaws.services.s3.model.PutObjectRequest)2 Tag (com.amazonaws.services.s3.model.Tag)2 com.amazonaws.services.s3.transfer (com.amazonaws.services.s3.transfer)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 ObjectWriter (com.fasterxml.jackson.databind.ObjectWriter)2 JsonNodeFactory (com.fasterxml.jackson.databind.node.JsonNodeFactory)2