Search in sources :

Example 6 with BulkLoadFileModel

use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.

the class FileLoadTest method makeBulkFileLoad.

private BulkLoadRequestModel makeBulkFileLoad(String tagBase, int fileCount) {
    String testId = Names.randomizeName("test");
    String loadTag = tagBase + testId;
    String targetPath = "scratch/loadtest" + UUID.randomUUID().toString() + ".json";
    // track the file so it gets cleaned up
    connectedOperations.addScratchFile(targetPath);
    String gspath = "gs://" + testConfig.getIngestbucket() + "/" + targetPath;
    Storage storage = StorageOptions.getDefaultInstance().getService();
    try (GcsChannelWriter writer = new GcsChannelWriter(storage, testConfig.getIngestbucket(), targetPath)) {
        // length is 20
        int repeats = fileCount / goodFileSource.length;
        for (int r = 0; r < repeats; r++) {
            for (int i = 0; i < goodFileSource.length; i++) {
                BulkLoadFileModel fileModel = getFileModel(i, r, testId);
                String fileLine = objectMapper.writeValueAsString(fileModel) + "\n";
                writer.write(fileLine);
            }
        }
    } catch (IOException ex) {
        fail("Failed to write load file '" + targetPath + "' to bucket '" + testConfig.getIngestbucket() + "'");
    }
    BulkLoadRequestModel loadRequest = new BulkLoadRequestModel().profileId(profileModel.getId()).loadTag(loadTag).maxFailedFileLoads(0).loadControlFile(gspath);
    return loadRequest;
}
Also used : Storage(com.google.cloud.storage.Storage) GcsChannelWriter(bio.terra.service.filedata.google.gcs.GcsChannelWriter) BulkLoadRequestModel(bio.terra.model.BulkLoadRequestModel) IOException(java.io.IOException) BulkLoadFileModel(bio.terra.model.BulkLoadFileModel)

Example 7 with BulkLoadFileModel

use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.

the class FileOperationTest method arrayMultiFileLoadFailRetryTest.

@Test
public void arrayMultiFileLoadFailRetryTest() throws Exception {
    String testId = Names.randomizeName("test");
    String loadTag = "arrayMultiFileLoadFileRetryTest" + testId;
    BulkLoadArrayRequestModel arrayLoad = new BulkLoadArrayRequestModel().profileId(profileModel.getId()).loadTag(loadTag).maxFailedFileLoads(2);
    arrayLoad.addLoadArrayItem(getFileModel(true, 2, testId));
    arrayLoad.addLoadArrayItem(getFileModel(false, 3, testId));
    arrayLoad.addLoadArrayItem(getFileModel(true, 4, testId));
    BulkLoadArrayResultModel result = connectedOperations.ingestArraySuccess(datasetSummary.getId(), arrayLoad);
    checkLoadSummary(result.getLoadSummary(), loadTag, 3, 2, 1, 0);
    Map<String, BulkLoadFileResultModel> resultMap = new HashMap<>();
    for (BulkLoadFileResultModel fileResult : result.getLoadFileResults()) {
        resultMap.put(fileResult.getTargetPath(), fileResult);
    }
    // Query Big Query datarepo_load_history table - assert correctly reflects different
    // bulk load file states
    String columnsToQuery = "state, file_id, error";
    TableResult queryLoadHistoryTableResult = queryLoadHistoryTable(columnsToQuery);
    for (FieldValueList item : queryLoadHistoryTableResult.getValues()) {
        String state = item.get(0).getStringValue();
        assertTrue("state should either be succeeded or failed.", state.equals(BulkLoadFileState.SUCCEEDED.toString()) || state.equals(BulkLoadFileState.FAILED.toString()));
        if (state.equals(BulkLoadFileState.SUCCEEDED.toString())) {
            assertTrue("file_id should have value", item.get(1).getStringValue().length() > 0);
            assertTrue("Error column should be empty", item.get(2).getStringValue().length() == 0);
        } else if (state.equals(BulkLoadFileState.FAILED.toString())) {
            assertTrue("file_id should NOT have value", item.get(1).getStringValue().length() == 0);
            assertTrue("Error column should have value", item.get(2).getStringValue().length() > 0);
        }
    }
    FieldValueList curr_result;
    List<BulkLoadFileModel> loadArray = arrayLoad.getLoadArray();
    BulkLoadFileResultModel fileResult = resultMap.get(loadArray.get(0).getTargetPath());
    checkFileResultSuccess(fileResult);
    fileResult = resultMap.get(loadArray.get(1).getTargetPath());
    checkFileResultFailed(fileResult);
    fileResult = resultMap.get(loadArray.get(2).getTargetPath());
    checkFileResultSuccess(fileResult);
    // fix the bad file and retry load
    loadArray.set(1, getFileModel(true, 3, testId));
    BulkLoadArrayResultModel result2 = connectedOperations.ingestArraySuccess(datasetSummary.getId(), arrayLoad);
    checkLoadSummary(result2.getLoadSummary(), loadTag, 3, 3, 0, 0);
}
Also used : BulkLoadArrayRequestModel(bio.terra.model.BulkLoadArrayRequestModel) TableResult(com.google.cloud.bigquery.TableResult) BulkLoadFileResultModel(bio.terra.model.BulkLoadFileResultModel) BulkLoadArrayResultModel(bio.terra.model.BulkLoadArrayResultModel) FieldValueList(com.google.cloud.bigquery.FieldValueList) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) BulkLoadFileModel(bio.terra.model.BulkLoadFileModel) SpringBootTest(org.springframework.boot.test.context.SpringBootTest) Test(org.junit.Test)

Example 8 with BulkLoadFileModel

use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.

the class FileTest method longFileLoadTest.

// The purpose of this test is to have a long-running workload that completes successfully
// while we delete pods and have them recover.
// Marked ignore for normal testing.
@Ignore
@Test
public void longFileLoadTest() throws Exception {
    // TODO: want this to run about 5 minutes on 2 DRmanager instances. The speed of loads is when they are
    // not local is about 2.5GB/minutes. With a fixed size of 1GB, each instance should do 2.5 files per minute,
    // so two instances should do 5 files per minute. To run 5 minutes we should run 25 files.
    // (There are 25 files in the directory, so if we need more we should do a reuse scheme like the fileLoadTest)
    final int filesToLoad = 25;
    String loadTag = Names.randomizeName("longtest");
    BulkLoadArrayRequestModel arrayLoad = new BulkLoadArrayRequestModel().profileId(profileId).loadTag(loadTag).maxFailedFileLoads(// do not stop if there is a failure.
    filesToLoad);
    logger.info("longFileLoadTest loading " + filesToLoad + " files into dataset id " + datasetId);
    for (int i = 0; i < filesToLoad; i++) {
        String tailPath = String.format("/fileloadscaletest/file1GB-%02d.txt", i);
        String sourcePath = "gs://jade-testdata-uswestregion" + tailPath;
        String targetPath = "/" + loadTag + tailPath;
        BulkLoadFileModel model = new BulkLoadFileModel().mimeType("application/binary");
        model.description("bulk load file " + i).sourcePath(sourcePath).targetPath(targetPath);
        arrayLoad.addLoadArrayItem(model);
    }
    BulkLoadArrayResultModel result = dataRepoFixtures.bulkLoadArray(steward(), datasetId, arrayLoad);
    BulkLoadResultModel loadSummary = result.getLoadSummary();
    logger.info("Total files    : " + loadSummary.getTotalFiles());
    logger.info("Succeeded files: " + loadSummary.getSucceededFiles());
    logger.info("Failed files   : " + loadSummary.getFailedFiles());
    logger.info("Not Tried files: " + loadSummary.getNotTriedFiles());
}
Also used : BulkLoadArrayRequestModel(bio.terra.model.BulkLoadArrayRequestModel) BulkLoadArrayResultModel(bio.terra.model.BulkLoadArrayResultModel) BulkLoadResultModel(bio.terra.model.BulkLoadResultModel) BulkLoadFileModel(bio.terra.model.BulkLoadFileModel) Ignore(org.junit.Ignore) Test(org.junit.Test) SpringBootTest(org.springframework.boot.test.context.SpringBootTest)

Aggregations

BulkLoadFileModel (bio.terra.model.BulkLoadFileModel)8 BulkLoadArrayRequestModel (bio.terra.model.BulkLoadArrayRequestModel)3 BulkLoadArrayResultModel (bio.terra.model.BulkLoadArrayResultModel)3 BulkLoadRequestModel (bio.terra.model.BulkLoadRequestModel)3 GcsChannelWriter (bio.terra.service.filedata.google.gcs.GcsChannelWriter)3 Storage (com.google.cloud.storage.Storage)3 IOException (java.io.IOException)3 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)3 BulkLoadFileResultModel (bio.terra.model.BulkLoadFileResultModel)2 BufferedReader (java.io.BufferedReader)2 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 SpringBootTest (org.springframework.boot.test.context.SpringBootTest)2 BulkLoadResultModel (bio.terra.model.BulkLoadResultModel)1 BulkLoadControlFileException (bio.terra.service.filedata.exception.BulkLoadControlFileException)1 GcsBufferedReader (bio.terra.service.filedata.google.gcs.GcsBufferedReader)1 FlightMap (bio.terra.stairway.FlightMap)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Jdk8Module (com.fasterxml.jackson.datatype.jdk8.Jdk8Module)1 JavaTimeModule (com.fasterxml.jackson.datatype.jsr310.JavaTimeModule)1