Search in sources :

Example 11 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class SchemaAnalysisTest method testTDP_279.

/**
 * See <a href="https://jira.talendforge.org/browse/TDP-279">https://jira.talendforge.org/browse/TDP-279</a>.
 *
 * @throws Exception
 */
@Test
public void testTDP_279() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../post_code.xls"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    String[] expectedNames = { "zip" };
    Type[] expectedTypes = { Type.INTEGER };
    String[] expectedDomains = { "FR_POSTAL_CODE" };
    int i = 0;
    for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
        assertThat(column.getName(), is(expectedNames[i]));
        assertThat(column.getType(), is(expectedTypes[i].getName()));
        assertThat(column.getDomain(), is(expectedDomains[i++]));
        assertThat(column.getSemanticDomains()).isNotNull().isNotEmpty().hasSize(4).contains(// 
        new SemanticDomain("FR_POSTAL_CODE", "FR Postal Code", (float) 58.33), // 
        new SemanticDomain("FR_CODE_COMMUNE_INSEE", "FR Insee Code", (float) 58.33), // 
        new SemanticDomain("DE_POSTAL_CODE", "DE Postal Code", (float) 58.33), new SemanticDomain("US_POSTAL_CODE", "US Postal Code", (float) 58.33));
    }
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) SemanticDomain(org.talend.dataprep.api.dataset.statistics.SemanticDomain) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 12 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class SchemaAnalysisTest method testTDP_471.

/**
 * See <a href="https://jira.talendforge.org/browse/TDP-471">https://jira.talendforge.org/browse/TDP-471</a>.
 *
 * @throws Exception
 */
@Test
public void testTDP_471() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../semantic_type_threshold.csv"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    String[] expectedNames = { "gender_column" };
    Type[] expectedTypes = { Type.INTEGER };
    String[] expectedDomains = { "" };
    int i = 0;
    for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
        assertThat(column.getName(), is(expectedNames[i]));
        assertThat(column.getType(), is(expectedTypes[i].getName()));
        assertThat(column.getDomain(), is(expectedDomains[i++]));
        assertThat(column.getSemanticDomains()).isNotNull().isNotEmpty().hasSize(2).contains(// 
        new SemanticDomain("GENDER", "Gender", (float) 35), new SemanticDomain("CIVILITY", "Civility", (float) 20.833334));
    }
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) SemanticDomain(org.talend.dataprep.api.dataset.statistics.SemanticDomain) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 13 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class SchemaAnalysisTest method testTDP_855.

/**
 * See <a href="https://jira.talendforge.org/browse/TDP-855">TDP-855_movie_title_detected_as_city</a>.
 */
@Test
public void testTDP_855() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../TDP-855_movie_title_detected_as_city.csv"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    ColumnMetadata column = actual.getRowMetadata().getColumns().get(0);
    String expectedName = "Genre: (Movie, Program, show)";
    Type expectedType = Type.STRING;
    assertThat(expectedName, is(column.getName()));
    assertThat(expectedType.getName(), is(column.getType()));
    assertThat("", is(column.getDomainLabel()));
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) Type(org.talend.dataprep.api.type.Type) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 14 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class SchemaAnalysisTest method testAnalysis.

@Test
public void testAnalysis() {
    final DataSetMetadata actual = initializeDataSetMetadata(DataSetServiceTest.class.getResourceAsStream("../avengers.csv"));
    assertThat(actual.getLifecycle().schemaAnalyzed(), is(true));
    String[] expectedNames = { "nickname", "secret firstname", "secret lastname", "date of birth", "city" };
    Type[] expectedTypes = { Type.STRING, Type.STRING, Type.STRING, Type.DATE, Type.STRING };
    int i = 0;
    int j = 0;
    for (ColumnMetadata column : actual.getRowMetadata().getColumns()) {
        assertThat(column.getName(), is(expectedNames[i++]));
        assertThat(column.getType(), is(expectedTypes[j++].getName()));
    }
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) DataSetServiceTest(org.talend.dataprep.dataset.service.DataSetServiceTest)

Example 15 with Type

use of org.talend.dataprep.api.type.Type in project data-prep by Talend.

the class DomainChangeTest method should_not_accept_any_type_to_avoid_transformation_to_be_in_transfo_list.

@Test
public void should_not_accept_any_type_to_avoid_transformation_to_be_in_transfo_list() {
    // given
    final DomainChange domainChange = new DomainChange();
    for (final Type type : Type.values()) {
        final ColumnMetadata columnMetadata = // 
        ColumnMetadata.Builder.column().type(// 
        type).computedId(// 
        "0002").domain(// 
        "FR_BEER").domainFrequency(// 
        1).domainLabel(// 
        "French Beer").build();
        // when
        final boolean accepted = domainChange.acceptField(columnMetadata);
        // then
        assertThat(accepted).isTrue();
    }
}
Also used : Type(org.talend.dataprep.api.type.Type) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) Test(org.junit.Test) AbstractMetadataBaseTest(org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)

Aggregations

Type (org.talend.dataprep.api.type.Type)24 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)21 Test (org.junit.Test)17 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)14 DataSetBaseTest (org.talend.dataprep.dataset.DataSetBaseTest)13 DataSetServiceTest (org.talend.dataprep.dataset.service.DataSetServiceTest)12 Arrays (java.util.Arrays)4 List (java.util.List)4 Optional (java.util.Optional)3 StringUtils (org.apache.commons.lang.StringUtils)3 Assert.assertEquals (org.junit.Assert.assertEquals)3 Builder.column (org.talend.dataprep.api.dataset.ColumnMetadata.Builder.column)3 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)3 PatternFrequency (org.talend.dataprep.api.dataset.statistics.PatternFrequency)3 IOException (java.io.IOException)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 Locale (java.util.Locale)2 Map (java.util.Map)2 Collectors (java.util.stream.Collectors)2