use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetImportTest method testImportStatus.
/**
* Test 'importing' status: the data set should remain in 'importing' state as long as create operation isn't
* completed.
*/
@Test
public void testImportStatus() throws Exception {
// Create a data set (asynchronously)
Runnable creation = () -> {
try {
dataSetId = given().body(IOUtils.toString(DataSetImportTest.class.getResourceAsStream("tagada.csv"), UTF_8)).queryParam("Content-Type", "text/csv").when().post("/datasets").asString();
LOGGER.debug("testImportStatus dataset created #{}", dataSetId);
} catch (IOException e) {
throw new RuntimeException(e);
}
};
Thread creationThread = new Thread(creation);
creationThread.start();
// Wait for creation of data set object
while (!dataSetMetadataRepository.list().findFirst().isPresent()) {
TimeUnit.MILLISECONDS.sleep(20);
}
// Data set should show as importing
final Iterator<DataSetMetadata> iterator = dataSetMetadataRepository.list().iterator();
assertThat(iterator.hasNext(), is(true));
final DataSetMetadata next = iterator.next();
assertThat(next.getLifecycle().isImporting(), is(true));
// Asserts when import is done
// Wait until creation is done (i.e. end of thread since creation is a blocking operation).
creationThread.join();
assertThat(dataSetId, notNullValue());
final DataSetMetadata metadata = dataSetMetadataRepository.get(dataSetId);
assertThat(metadata.getLifecycle().isImporting(), is(false));
assertThat(metadata.getLifecycle().schemaAnalyzed(), is(true));
// TDP-283: Quality analysis should be synchronous
assertThat(metadata.getLifecycle().qualityAnalyzed(), is(true));
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetImportTest method testCannotOpenDataSetBeingImported.
/**
* Test 'importing' status with get: user is not allowed to get data set content when it's still being imported. In
* real life situation, this kind of event is rather unlikely since the UUID of the data set is only returned once
* the creation completes (no access before this).
*/
@Test
public void testCannotOpenDataSetBeingImported() throws Exception {
LOGGER.info("testCannotOpenDataSetBeingImported started");
assertThat(dataSetMetadataRepository.size(), is(0));
LOGGER.debug("dataSetMetadata repository is empty");
// Create a data set (asynchronously)
Runnable creation = () -> {
try {
dataSetId = given().body(IOUtils.toString(DataSetImportTest.class.getResourceAsStream("tagada.csv"), UTF_8)).queryParam(CONTENT_TYPE, "text/csv").when().post("/datasets").asString();
LOGGER.debug("testCannotOpenDataSetBeingImported dataset created #{}", dataSetId);
} catch (IOException e) {
throw new RuntimeException(e);
}
};
Thread creationThread = new Thread(creation);
creationThread.start();
// Wait for creation of data set object
int iterations = 0;
while (dataSetMetadataRepository.size() == 0) {
TimeUnit.MILLISECONDS.sleep(20);
if (iterations++ > 500) {
fail();
}
}
// Find data set being imported...
final Iterator<DataSetMetadata> iterator = dataSetMetadataRepository.list().iterator();
assertThat(iterator.hasNext(), is(true));
final DataSetMetadata next = iterator.next();
LOGGER.info("found {}", next);
assertThat(next.getLifecycle().isImporting(), is(true));
// ... get operation should *not* return data set being imported but report an error ...
int statusCode = when().get("/datasets/{id}/content", next.getId()).getStatusCode();
assertThat(statusCode, is(400));
// Assert the new data set is returned when creation completes.
// Wait until creation is done (i.e. end of thread since creation is a blocking operation).
creationThread.join();
assertThat(dataSetId, notNullValue());
final DataSetMetadata metadata = dataSetMetadataRepository.get(dataSetId);
statusCode = when().get("/datasets/{id}/content", metadata.getId()).getStatusCode();
assertThat(statusCode, is(200));
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetServiceTest method delete.
@Test
public void delete() throws Exception {
String expectedId = UUID.randomUUID().toString();
DataSetMetadata dataSetMetadata = metadataBuilder.metadata().id(expectedId).formatFamilyId(new CSVFormatFamily().getBeanId()).build();
dataSetMetadata.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ";");
dataSetMetadataRepository.save(dataSetMetadata);
List<String> ids = from(when().get("/datasets").asString()).get("");
assertThat(ids.size(), is(1));
int before = dataSetMetadataRepository.size();
when().delete("/datasets/{id}", expectedId).then().statusCode(OK.value());
int after = dataSetMetadataRepository.size();
logger.debug("delete before {} after {}", before, after);
assertThat(before - after, is(1));
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetServiceTest method shouldUpdateSeparatorWithoutHeader.
/**
* see https://jira.talendforge.org/browse/TDP-1066
*/
@Test
public void shouldUpdateSeparatorWithoutHeader() throws Exception {
// given
String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream("../tdp-1066_no_header.ssv"), "tdp-1066-2");
InputStream metadataInput = when().get("/datasets/{id}/metadata", dataSetId).asInputStream();
DataSet dataSet = mapper.readerFor(DataSet.class).readValue(metadataInput);
DataSetMetadata metadata = dataSet.getMetadata();
// then
// ';' is guessed as separator ==> 2 columns
assertThat(metadata.getRowMetadata().getColumns().size(), is(2));
// when
final Map<String, String> parameters = metadata.getContent().getParameters();
parameters.put(CSVFormatFamily.SEPARATOR_PARAMETER, " ");
parameters.remove(CSVFormatFamily.HEADER_COLUMNS_PARAMETER);
final int statusCode = //
given().contentType(//
JSON).body(//
mapper.writer().writeValueAsString(metadata)).expect().statusCode(200).log().ifError().when().put("/datasets/{id}", dataSetId).getStatusCode();
assertThat(statusCode, is(200));
assertQueueMessages(dataSetId);
// then
InputStream datasetContent = given().when().get("/datasets/{id}/content?metadata=true", dataSetId).asInputStream();
final DataSet actual = mapper.readerFor(DataSet.class).readValue(datasetContent);
final DataSetMetadata actualMetadata = actual.getMetadata();
// with ' ' as separator ==> 10 columns
assertThat(actualMetadata.getRowMetadata().getColumns().size(), is(10));
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetServiceTest method should_list_filtered_datasets_properly.
@Test
public void should_list_filtered_datasets_properly() throws Exception {
// create data sets
final String dataSetId1 = createCSVDataSet(this.getClass().getResourceAsStream("../tagada3.csv"), "dataset1");
final String dataSetId2 = createCSVDataSet(this.getClass().getResourceAsStream("../avengers.csv"), "dataset2");
final String dataSetId3 = createCSVDataSet(this.getClass().getResourceAsStream("../tagada.csv"), "dataset3");
createCSVDataSet(this.getClass().getResourceAsStream("../tagada2.csv"), "dataset4");
// Make dataset1 more recent
final DataSetMetadata dataSetMetadata1 = dataSetMetadataRepository.get(dataSetId1);
dataSetMetadata1.getGovernance().setCertificationStep(Certification.CERTIFIED);
dataSetMetadata1.setLastModificationDate((now().getEpochSecond() + 1) * 1_000);
dataSetMetadataRepository.save(dataSetMetadata1);
final DataSetMetadata dataSetMetadata2 = dataSetMetadataRepository.get(dataSetId2);
dataSetMetadataRepository.save(dataSetMetadata2);
final DataSetMetadata dataSetMetadata3 = dataSetMetadataRepository.get(dataSetId3);
dataSetMetadata3.getGovernance().setCertificationStep(Certification.CERTIFIED);
dataSetMetadataRepository.save(dataSetMetadata3);
UserData userData = new UserData();
userData.setUserId(security.getUserId());
userData.addFavoriteDataset(dataSetMetadata1.getId());
userData.addFavoriteDataset(dataSetMetadata2.getId());
userDataRepository.save(userData);
// @formatter:off
// certified, favorite and recent
given().queryParam("favorite", "true").queryParam("certified", "true").queryParam("limit", "true").when().get("/datasets").then().statusCode(200).body("name", hasItem("dataset1")).body("name", hasSize(1));
// certified, favorite and recent
given().queryParam("favorite", "true").queryParam("certified", "true").queryParam("limit", "true").queryParam("name", "2").when().get("/datasets").then().statusCode(200).body("name", hasSize(0));
// only names
given().queryParam("name", "ATAset2").when().get("/datasets").then().statusCode(200).body("name", hasItem("dataset2")).body("name", hasSize(1));
// only favorites
given().queryParam("favorite", "true").when().get("/datasets").then().statusCode(200).body("name", hasItems("dataset1", "dataset2")).body("name", hasSize(2));
// only certified
given().queryParam("certified", "true").when().get("/datasets").then().statusCode(200).body("name", hasItems("dataset1", "dataset3")).body("name", hasSize(2));
// only recent
given().queryParam("limit", "true").queryParam("sort", LAST_MODIFICATION_DATE.camelName()).when().get("/datasets").then().statusCode(200).body("name", hasItems("dataset1", "dataset3", "dataset4")).body("name", hasSize(3));
// all
when().get("/datasets").then().statusCode(200).body("name", hasItems("dataset1", "dataset2", "dataset3", "dataset4")).body("name", hasSize(4));
// @formatter:on
}
Aggregations