use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetServiceTest method compatibleDatasetsListDateOrder.
@Test
public void compatibleDatasetsListDateOrder() throws Exception {
String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-10");
String dataSetId2 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-9");
String dataSetId3 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-8");
DataSetMetadata metadata1 = dataSetMetadataRepository.get(dataSetId);
metadata1.setName("CCCC");
dataSetMetadataRepository.save(metadata1);
DataSetMetadata metadata2 = dataSetMetadataRepository.get(dataSetId2);
metadata2.setName("BBBB");
dataSetMetadataRepository.save(metadata2);
DataSetMetadata metadata3 = dataSetMetadataRepository.get(dataSetId3);
metadata3.setName("AAAA");
dataSetMetadataRepository.save(metadata3);
// when
final String actual = when().get("/datasets/{id}/compatibledatasets?sort=creationDate&order=asc", dataSetId).asString();
// Ensure order by name (most recent first)
final Iterator<JsonNode> elements = mapper.readTree(actual).elements();
String[] expectedNames = new String[] { "BBBB", "AAAA" };
int i = 0;
while (elements.hasNext()) {
assertThat(elements.next().get("name").asText(), is(expectedNames[i++]));
}
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetServiceTest method updateDatasetColumn_should_update_domain.
@Test
public void updateDatasetColumn_should_update_domain() throws Exception {
// given
final String dataSetId = //
given().body(//
IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV), UTF_8)).queryParam(CONTENT_TYPE, //
"text/csv").when().post(//
"/datasets").asString();
final ColumnMetadata column;
// update the metadata in the repository (lock mechanism is needed otherwise semantic domain will be erased by
// analysis)
final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId);
DataSetMetadata dataSetMetadata;
RowMetadata row;
lock.lock();
try {
dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
assertNotNull(dataSetMetadata);
row = dataSetMetadata.getRowMetadata();
assertNotNull(row);
column = row.getById("0002");
final SemanticDomain jsoDomain = new SemanticDomain("JSO", "JSO label", 1.0F);
column.getSemanticDomains().add(jsoDomain);
dataSetMetadataRepository.save(dataSetMetadata);
} finally {
lock.unlock();
}
assertThat(column.getDomain(), is("FIRST_NAME"));
assertThat(column.getDomainLabel(), is("First Name"));
assertThat(column.getDomainFrequency(), is(100.0F));
// when
final Response res = //
given().body(//
"{\"domain\": \"JSO\"}").when().contentType(//
JSON).post("/datasets/{dataSetId}/column/{columnId}", dataSetId, "0002");
// then
res.then().statusCode(200);
dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
assertNotNull(dataSetMetadata);
row = dataSetMetadata.getRowMetadata();
assertNotNull(row);
final ColumnMetadata actual = row.getById("0002");
assertThat(actual.getDomain(), is("JSO"));
assertThat(actual.getDomainLabel(), is("JSO label"));
assertThat(actual.getDomainFrequency(), is(1.0F));
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetServiceTest method updateDatasetColumn_should_update_type.
@Test
public void updateDatasetColumn_should_update_type() throws Exception {
// given
final String dataSetId = //
given().body(//
IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV), UTF_8)).queryParam(CONTENT_TYPE, //
"text/csv").when().post(//
"/datasets").asString();
DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
Assert.assertNotNull(dataSetMetadata);
RowMetadata row = dataSetMetadata.getRowMetadata();
assertNotNull(row);
final ColumnMetadata column = row.getById("0002");
assertThat(column.getDomain(), is("FIRST_NAME"));
assertThat(column.getDomainLabel(), is("First Name"));
assertThat(column.getDomainFrequency(), is(100.0F));
assertThat(column.getType(), is("string"));
// when
final Response res = //
given().body(//
"{\"type\": \"integer\"}").when().contentType(//
JSON).post("/datasets/{dataSetId}/column/{columnId}", dataSetId, "0002");
// then
res.then().statusCode(200);
dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
Assert.assertNotNull(dataSetMetadata);
row = dataSetMetadata.getRowMetadata();
assertNotNull(row);
final ColumnMetadata actual = row.getById("0002");
assertThat(actual.getDomain(), is("FIRST_NAME"));
assertThat(actual.getDomainLabel(), is("First Name"));
assertThat(actual.getDomainFrequency(), is(100.0F));
assertThat(actual.getType(), is("integer"));
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class StatisticsAnalysisTest method testTDP_402.
/**
* See <a href="https://jira.talendforge.org/browse/TDP-402">https://jira.talendforge.org/browse/TDP-402</a>.
*
* @throws Exception
*/
@Test
public void testTDP_402() throws Exception {
final DataSetMetadata metadata = initializeDataSetMetadata(this.getClass().getResourceAsStream("dataset.csv"));
final ColumnMetadata dateOfBirth = metadata.getRowMetadata().getById("0004");
assertThat(dateOfBirth.getName(), is("date-of-birth"));
assertThat(dateOfBirth.getType(), is("date"));
final List<PatternFrequency> patternFrequencies = dateOfBirth.getStatistics().getPatternFrequencies();
final List<String> patterns = patternFrequencies.stream().map(pf -> pf.getPattern()).collect(Collectors.toList());
assertThat(patterns.size(), is(5));
assertTrue(patterns.contains("MM/dd/yyyy"));
assertTrue(patterns.contains("dd/MM/yyyy"));
assertTrue(patterns.contains("aaaaa"));
assertTrue(patterns.contains("yyyy-MM-dd"));
assertTrue(patterns.contains("yyyy-M-d"));
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class StatisticsAnalysisTest method testTDP_2021.
/**
* See <a href="https://jira.talendforge.org/browse/TDP-2120">TDP-2120</a>.
*
* @throws Exception
*/
@Test
public void testTDP_2021() throws Exception {
final DataSetMetadata metadata = initializeDataSetMetadata(this.getClass().getResourceAsStream("dataset.csv"));
final ColumnMetadata dateOfBirth = metadata.getRowMetadata().getById("0004");
assertThat(dateOfBirth.getName(), is("date-of-birth"));
final double min = dateOfBirth.getStatistics().getMin();
final double max = dateOfBirth.getStatistics().getMax();
assertFalse(Double.isNaN(min));
assertFalse(Double.isNaN(max));
assertEquals(-924912000000D, min, 0.5);
assertEquals(-707529600000D, max, 0.5);
}
Aggregations