Search in sources :

Example 6 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class QualityAnalysisTest method TDP_1150_full.

/**
 * This test ensures that data types have been rightly detected when performing a full analysis.
 *
 * See <a href="https://jira.talendforge.org/browse/TDP-224">https://jira.talendforge.org/browse/TDP-1150</a>.
 *
 * @throws Exception
 */
@Test
public void TDP_1150_full() {
    // given
    String[] expectedNames = { // 
    "string_boolean", // 
    "double_integer", // 
    "string_integer", // 
    "string_double", // 
    "string_date", // 
    "type_mix", // 
    "boolean", // 
    "integer", // 
    "double", // 
    "date", // 
    "string", // 
    "empty" };
    Type[] expectedTypes = { // 
    Type.BOOLEAN, // 
    Type.DOUBLE, // 
    Type.INTEGER, // 
    Type.DOUBLE, // 
    Type.DATE, // 
    Type.STRING, // 
    Type.BOOLEAN, // 
    Type.INTEGER, // 
    Type.DOUBLE, // 
    Type.DATE, // 
    Type.STRING, // 
    Type.STRING };
    // when
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../invalids_and_type_detection.csv"));
    // then
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    for (int i = 0; i < expectedTypes.length; i++) {
        ColumnMetadata column = actual.getRowMetadata().getColumns().get(i);
        assertThat(column.getName(), is(expectedNames[i]));
        assertThat("column '" + column.getName() + "' is expected to be detected as '" + expectedTypes[i] + "' but was found as '" + column.getType() + // 
        "'", // 
        column.getType(), // 
        is(expectedTypes[i].getName()));
    }
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 7 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class QualityAnalysisTest method TDP_1150_integer_must_be_detected_as_so_even_if_sampling_detects_text.

/**
 * This test ensures that data types have been rightly detected when performing a full analysis.
 *
 * See <a href="https://jira.talendforge.org/browse/TDP-224">https://jira.talendforge.org/browse/TDP-1150</a>.
 *
 * @throws Exception
 */
@Test
public void TDP_1150_integer_must_be_detected_as_so_even_if_sampling_detects_text() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../valid_must_be_integer.csv"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    String expectedName = "user_id";
    Type expectedType = Type.INTEGER;
    ColumnMetadata column = actual.getRowMetadata().getColumns().get(0);
    assertThat(column.getName(), is(expectedName));
    assertThat(column.getType(), is(expectedType.getName()));
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 8 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class SchemaAnalysisTest method testTDP_1674.

/**
 * See <a href="https://jira.talendforge.org/browse/TDP-1674">TDP-1674_error_with_ipv6_addresses</a>.
 */
@Test
public void testTDP_1674() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../ipv6.csv"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    String[] expectedNames = { "number", "description", "address" };
    Type[] expectedTypes = { Type.INTEGER, Type.STRING, Type.STRING };
    int i = 0;
    for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
        assertThat(column.getName(), is(expectedNames[i]));
        assertThat(column.getType(), is(expectedTypes[i].getName()));
        i++;
    }
    assertThat("IPv6 Address", is(actual.getRowMetadata().getColumns().get(2).getDomainLabel()));
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 9 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class SchemaAnalysisTest method testTDP_224.

/**
 * See <a href="https://jira.talendforge.org/browse/TDP-224">https://jira.talendforge.org/browse/TDP-224</a>.
 *
 * @throws Exception
 */
@Test
public void testTDP_224() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../whatever.xls"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    // Not a typo: this is what QA provided as column name.
    String[] expectedNames = { "whaterver" };
    Type[] expectedTypes = { Type.STRING };
    int i = 0;
    int j = 0;
    for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
        assertThat(column.getName(), is(expectedNames[i++]));
        assertThat(column.getType(), is(expectedTypes[j++].getName()));
    }
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 10 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class SchemaAnalysisTest method testTDP_226.

/**
 * See <a href="https://jira.talendforge.org/browse/TDP-226">https://jira.talendforge.org/browse/TDP-226</a>.
 *
 * @throws Exception
 */
@Test
public void testTDP_226() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../empty_lines.csv"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    String[] expectedNames = { "id", "firstname", "lastname", "age", "date-of-birth", "alive" };
    Type[] expectedTypes = { Type.INTEGER, Type.STRING, Type.STRING, Type.INTEGER, Type.DATE, Type.BOOLEAN };
    int i = 0;
    for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
        assertThat(column.getName(), is(expectedNames[i]));
        assertThat(column.getType(), is(expectedTypes[i].getName()));
        i++;
    }
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Aggregations

Type (org.talend.dataprep.api.type.Type)24 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)21 Test (org.junit.Test)17 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)14 DataSetBaseTest (org.talend.dataprep.dataset.DataSetBaseTest)13 DataSetServiceTest (org.talend.dataprep.dataset.service.DataSetServiceTest)12 Arrays (java.util.Arrays)4 List (java.util.List)4 Optional (java.util.Optional)3 StringUtils (org.apache.commons.lang.StringUtils)3 Assert.assertEquals (org.junit.Assert.assertEquals)3 Builder.column (org.talend.dataprep.api.dataset.ColumnMetadata.Builder.column)3 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)3 PatternFrequency (org.talend.dataprep.api.dataset.statistics.PatternFrequency)3 IOException (java.io.IOException)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 Locale (java.util.Locale)2 Map (java.util.Map)2 Collectors (java.util.stream.Collectors)2