Search in sources :

Example 41 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class FormatAnalysisTest method testXLSXAnalysis.

@Test
public void testXLSXAnalysis() {
    String id = UUID.randomUUID().toString();
    final DataSetMetadata metadata = metadataBuilder.metadata().id(id).build();
    dataSetMetadataRepository.save(metadata);
    contentStore.storeAsRaw(metadata, DataSetServiceTest.class.getResourceAsStream("../tagada.xls"));
    formatAnalysis.analyze(id);
    final DataSetMetadata actual = dataSetMetadataRepository.get(id);
    assertThat(actual, notNullValue());
    assertThat(actual.getContent().getFormatFamilyId(), is(XlsFormatFamily.BEAN_ID));
    assertThat(actual.getContent().getMediaType(), is("application/vnd.ms-excel"));
    assertThat(actual.getContent().getParameters().isEmpty(), is(true));
}
Also used : DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 42 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class FormatAnalysisTest method testUpdate.

@Test
public void testUpdate() {
    String id = UUID.randomUUID().toString();
    final DataSetMetadata metadata = metadataBuilder.metadata().id(id).build();
    dataSetMetadataRepository.save(metadata);
    contentStore.storeAsRaw(metadata, DataSetServiceTest.class.getResourceAsStream("../avengers.csv"));
    formatAnalysis.analyze(id);
    final DataSetMetadata original = dataSetMetadataRepository.get(id);
    final DataSetMetadata modified = dataSetMetadataRepository.get(id);
    modified.setEncoding("windows-1252");
    modified.getContent().getParameters().put("SEPARATOR", ",");
    formatAnalysis.update(original, modified);
    final DataSetMetadata updated = dataSetMetadataRepository.get(id);
    assertNotNull(updated);
    assertThat(updated.getContent().getFormatFamilyId(), is(CSVFormatFamily.BEAN_ID));
    assertThat(updated.getContent().getMediaType(), is("text/csv"));
    assertThat(updated.getEncoding(), is("windows-1252"));
// assertThat(updated.getContent().getParameters().get("SEPARATOR"), is(";"));
}
Also used : DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 43 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class QualityAnalysisTest method TDP_1150_string_must_be_detected_as_so_if_even_if_subtype_is_integer.

/**
 * This test ensures that string is detected as type even if we use the sub type (integer) of the most frequent type
 * (String) to detect invalids.
 *
 * See <a href="https://jira.talendforge.org/browse/TDP-224">https://jira.talendforge.org/browse/TDP-1150</a>.
 *
 * @throws Exception
 */
@Test
public void TDP_1150_string_must_be_detected_as_so_if_even_if_subtype_is_integer() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../valid_must_be_text1.csv"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    String expectedName = "user_id";
    Type expectedType = Type.STRING;
    ColumnMetadata column = actual.getRowMetadata().getColumns().get(0);
    assertThat(column.getName(), is(expectedName));
    assertThat(column.getType(), is(expectedType.getName()));
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 44 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class QualityAnalysisTest method TDP_1150_full.

/**
 * This test ensures that data types have been rightly detected when performing a full analysis.
 *
 * See <a href="https://jira.talendforge.org/browse/TDP-224">https://jira.talendforge.org/browse/TDP-1150</a>.
 *
 * @throws Exception
 */
@Test
public void TDP_1150_full() {
    // given
    String[] expectedNames = { // 
    "string_boolean", // 
    "double_integer", // 
    "string_integer", // 
    "string_double", // 
    "string_date", // 
    "type_mix", // 
    "boolean", // 
    "integer", // 
    "double", // 
    "date", // 
    "string", // 
    "empty" };
    Type[] expectedTypes = { // 
    Type.BOOLEAN, // 
    Type.DOUBLE, // 
    Type.INTEGER, // 
    Type.DOUBLE, // 
    Type.DATE, // 
    Type.STRING, // 
    Type.BOOLEAN, // 
    Type.INTEGER, // 
    Type.DOUBLE, // 
    Type.DATE, // 
    Type.STRING, // 
    Type.STRING };
    // when
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../invalids_and_type_detection.csv"));
    // then
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    for (int i = 0; i < expectedTypes.length; i++) {
        ColumnMetadata column = actual.getRowMetadata().getColumns().get(i);
        assertThat(column.getName(), is(expectedNames[i]));
        assertThat("column '" + column.getName() + "' is expected to be detected as '" + expectedTypes[i] + "' but was found as '" + column.getType() + // 
        "'", // 
        column.getType(), // 
        is(expectedTypes[i].getName()));
    }
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 45 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class QualityAnalysisTest method TDP_1150_integer_must_be_detected_as_so_even_if_sampling_detects_text.

/**
 * This test ensures that data types have been rightly detected when performing a full analysis.
 *
 * See <a href="https://jira.talendforge.org/browse/TDP-224">https://jira.talendforge.org/browse/TDP-1150</a>.
 *
 * @throws Exception
 */
@Test
public void TDP_1150_integer_must_be_detected_as_so_even_if_sampling_detects_text() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../valid_must_be_integer.csv"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    String expectedName = "user_id";
    Type expectedType = Type.INTEGER;
    ColumnMetadata column = actual.getRowMetadata().getColumns().get(0);
    assertThat(column.getName(), is(expectedName));
    assertThat(column.getType(), is(expectedType.getName()));
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Aggregations

DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)192 Test (org.junit.Test)126 DataSetBaseTest (org.talend.dataprep.dataset.DataSetBaseTest)63 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)48 InputStream (java.io.InputStream)45 Matchers.containsString (org.hamcrest.Matchers.containsString)28 Matchers.isEmptyString (org.hamcrest.Matchers.isEmptyString)28 TDPException (org.talend.dataprep.exception.TDPException)26 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)20 DataSetServiceTest (org.talend.dataprep.dataset.service.DataSetServiceTest)20 ApiOperation (io.swagger.annotations.ApiOperation)18 DataSet (org.talend.dataprep.api.dataset.DataSet)18 Type (org.talend.dataprep.api.type.Type)17 Timed (org.talend.dataprep.metrics.Timed)17 DistributedLock (org.talend.dataprep.lock.DistributedLock)16 Autowired (org.springframework.beans.factory.annotation.Autowired)14 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)14 IOException (java.io.IOException)13 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)13 ArrayList (java.util.ArrayList)12