use of org.talend.dataprep.schema.Format in project data-prep by Talend.
the class XlsDetectorTest method should_detect_new_xls_format.
@Test
public void should_detect_new_xls_format() throws Exception {
try (InputStream inputStream = this.getClass().getResourceAsStream("test_new.xlsx")) {
Format actual = xlsDetector.detect(inputStream);
assertTrue(actual.getFormatFamily() instanceof XlsFormatFamily);
assertTrue(StringUtils.equals("UTF-8", actual.getEncoding()));
}
}
use of org.talend.dataprep.schema.Format in project data-prep by Talend.
the class XlsSerializerTest method read_xls_TDP_143.
@Test
public void read_xls_TDP_143() throws Exception {
String fileName = "state_table.xls";
Format format;
try (InputStream inputStream = this.getClass().getResourceAsStream(fileName)) {
format = formatDetector.detect(inputStream);
Assert.assertNotNull(format);
Assert.assertTrue(format.getFormatFamily() instanceof XlsFormatFamily);
Assert.assertEquals(XlsFormatFamily.MEDIA_TYPE, format.getFormatFamily().getMediaType());
}
try (InputStream inputStream = this.getClass().getResourceAsStream(fileName)) {
List<ColumnMetadata> columnsMetadata = xlsSchemaParser.parse(getRequest(inputStream, "#852")).getSheetContents().get(0).getColumnMetadatas();
logger.debug("columnsMetadata: {}", columnsMetadata);
Assertions.assertThat(columnsMetadata).isNotNull().isNotEmpty().hasSize(17);
}
}
use of org.talend.dataprep.schema.Format in project data-prep by Talend.
the class XlsSerializerTest method read_xls_file_then_serialize.
@Test
public void read_xls_file_then_serialize() throws Exception {
String fileName = "test.xls";
Format format;
DataSetMetadata dataSetMetadata = metadataBuilder.metadata().id("beer").build();
assertFormat(fileName);
try (InputStream inputStream = this.getClass().getResourceAsStream(fileName)) {
List<ColumnMetadata> columnMetadatas = //
xlsSchemaParser.parse(//
getRequest(inputStream, "#123")).getSheetContents().get(0).getColumnMetadatas();
dataSetMetadata.getRowMetadata().setColumns(columnMetadatas);
}
List<Map<String, String>> values = getValuesFromFile(fileName, dataSetMetadata);
logger.debug("values: {}", values);
// expected*
// {country=Australie, note=10.0, beer name =Little Creatures, quality=Awesome}
// {country=France , note=, beer name =Heinekein, quality=crappy}
// {country=Australie, note=6.0, beer name =Foo, quality=10.0}
// {country=France , note=2.0, beer name =Bar, quality=crappy}
Assertions.assertThat(values).isNotEmpty().hasSize(4);
//
Assertions.assertThat(values.get(0)).contains(//
entry("0000", "Little Creatures"), //
entry("0001", "Australie"), //
entry("0002", "Awesome"), //
entry("0003", "10"));
//
Assertions.assertThat(values.get(1)).contains(//
entry("0000", "Heinekein"), //
entry("0001", "France"), //
entry("0002", "crappy"), //
entry("0003", ""));
//
Assertions.assertThat(values.get(2)).contains(//
entry("0000", "Foo"), //
entry("0001", "Australie"), //
entry("0002", "10"), entry("0003", "6"));
//
Assertions.assertThat(values.get(3)).contains(//
entry("0000", "Bar"), //
entry("0001", "France"), //
entry("0002", "crappy"), entry("0003", "2"));
}
use of org.talend.dataprep.schema.Format in project data-prep by Talend.
the class XlsSerializerTest method testGeneralNumberFormat_TDP_222.
/**
* <p>
* See <a href="https://jira.talendforge.org/browse/TDP-222">https://jira.talendforge.org/browse/TDP-222</a>.
* </p>
* <p>
* XlsSerializer should follow the data format as set in the Excel file. This test ensures XlsSerializer follows the
* data format as defined and don't directly use {@link Cell#getNumericCellValue()}.
* </p>
*/
@Test
public void testGeneralNumberFormat_TDP_222() throws Exception {
final DataSetMetadata metadata = metadataBuilder.metadata().id("1234").row(column().name("id").id(0).type(Type.INTEGER), column().name("value1").id(1).type(Type.INTEGER)).build();
Format format = assertFormat("excel_numbers.xls");
// Test number serialization in XLS type guess
InputStream input = this.getClass().getResourceAsStream("excel_numbers.xls");
final String result = IOUtils.toString(format.getFormatFamily().getSerializer().serialize(input, metadata, -1), UTF_8);
final String expected = "[{\"0000\":\"1\",\"0001\":\"123\"},{\"0000\":\"2\",\"0001\":\"123.1\"},{\"0000\":\"3\",\"0001\":\"209.9\"}]";
assertThat(result, sameJSONAs(expected));
}
Aggregations