use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class QualityAnalysisTest method testAnalysis.
@Test
public void testAnalysis() {
String id = UUID.randomUUID().toString();
final DataSetMetadata metadata = metadataBuilder.metadata().id(id).build();
dataSetMetadataRepository.save(metadata);
contentStore.storeAsRaw(metadata, DataSetServiceTest.class.getResourceAsStream("../avengers.csv"));
formatAnalysis.analyze(id);
contentAnalysis.analyze(id);
schemaAnalysis.analyze(id);
// Analyze quality
qualityAnalysis.analyze(id);
final DataSetMetadata actual = dataSetMetadataRepository.get(id);
assertThat(actual.getLifecycle().qualityAnalyzed(), is(true));
assertThat(actual.getContent().getNbRecords(), is(5L));
for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
final Quality quality = column.getQuality();
assertThat(quality.getValid(), is(5));
assertThat(quality.getInvalid(), is(0));
assertThat(quality.getEmpty(), is(0));
}
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class SchemaAnalysisTest method testTDP_1674.
/**
* See <a href="https://jira.talendforge.org/browse/TDP-1674">TDP-1674_error_with_ipv6_addresses</a>.
*/
@Test
public void testTDP_1674() {
final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../ipv6.csv"));
assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
String[] expectedNames = { "number", "description", "address" };
Type[] expectedTypes = { Type.INTEGER, Type.STRING, Type.STRING };
int i = 0;
for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
assertThat(column.getName(), is(expectedNames[i]));
assertThat(column.getType(), is(expectedTypes[i].getName()));
i++;
}
assertThat("IPv6 Address", is(actual.getRowMetadata().getColumns().get(2).getDomainLabel()));
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class SchemaAnalysisTest method testTDP_224.
/**
* See <a href="https://jira.talendforge.org/browse/TDP-224">https://jira.talendforge.org/browse/TDP-224</a>.
*
* @throws Exception
*/
@Test
public void testTDP_224() {
final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../whatever.xls"));
assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
// Not a typo: this is what QA provided as column name.
String[] expectedNames = { "whaterver" };
Type[] expectedTypes = { Type.STRING };
int i = 0;
int j = 0;
for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
assertThat(column.getName(), is(expectedNames[i++]));
assertThat(column.getType(), is(expectedTypes[j++].getName()));
}
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class SchemaAnalysisTest method testTDP_226.
/**
* See <a href="https://jira.talendforge.org/browse/TDP-226">https://jira.talendforge.org/browse/TDP-226</a>.
*
* @throws Exception
*/
@Test
public void testTDP_226() {
final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../empty_lines.csv"));
assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
String[] expectedNames = { "id", "firstname", "lastname", "age", "date-of-birth", "alive" };
Type[] expectedTypes = { Type.INTEGER, Type.STRING, Type.STRING, Type.INTEGER, Type.DATE, Type.BOOLEAN };
int i = 0;
for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
assertThat(column.getName(), is(expectedNames[i]));
assertThat(column.getType(), is(expectedTypes[i].getName()));
i++;
}
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class SchemaAnalysisTest method testTDP_279.
/**
* See <a href="https://jira.talendforge.org/browse/TDP-279">https://jira.talendforge.org/browse/TDP-279</a>.
*
* @throws Exception
*/
@Test
public void testTDP_279() {
final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../post_code.xls"));
assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
String[] expectedNames = { "zip" };
Type[] expectedTypes = { Type.INTEGER };
String[] expectedDomains = { "FR_POSTAL_CODE" };
int i = 0;
for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
assertThat(column.getName(), is(expectedNames[i]));
assertThat(column.getType(), is(expectedTypes[i].getName()));
assertThat(column.getDomain(), is(expectedDomains[i++]));
assertThat(column.getSemanticDomains()).isNotNull().isNotEmpty().hasSize(4).contains(//
new SemanticDomain("FR_POSTAL_CODE", "FR Postal Code", (float) 58.33), //
new SemanticDomain("FR_CODE_COMMUNE_INSEE", "FR Insee Code", (float) 58.33), //
new SemanticDomain("DE_POSTAL_CODE", "DE Postal Code", (float) 58.33), new SemanticDomain("US_POSTAL_CODE", "US Postal Code", (float) 58.33));
}
}
Aggregations