use of org.talend.dataprep.schema.Format in project data-prep by Talend.
the class XlsDetector method detect.
/**
* Reads an input stream and checks if it has a XLS format.
*
* The general contract of a detector is to not close the specified stream before returning. It is to the
* responsibility of the caller to close it. The detector should leverage the mark/reset feature of the specified
* {@see TikaInputStream} in order to let the stream always return the same bytes.
*
* @param metadata the specified TIKA {@link Metadata}
* @param inputStream the specified input stream
* @return either null or an XLS format
* @throws IOException
*/
@Override
public Format detect(Metadata metadata, TikaInputStream inputStream) throws IOException {
Format result = null;
MediaType mediaType = super.detect(inputStream, metadata);
if (mediaType == null || StringUtils.equals(mediaType.toString(), FormatUtils.UNKNOWN_MEDIA_TYPE)) {
mediaType = zipContainerDetector.detect(inputStream, new Metadata());
}
if (mediaType != null) {
String mediaTypeName = mediaType.toString();
if (StringUtils.startsWith(mediaTypeName, XlsFormatFamily.MEDIA_TYPE) || StringUtils.equals(mediaTypeName, OLD_XLS_MEDIA_TYPE)) {
result = new Format(xlsFormatFamily, FormatUtils.DEFAULT_ENCODING);
}
}
return result;
}
use of org.talend.dataprep.schema.Format in project data-prep by Talend.
the class XlsDetectorTest method should_detect_old_xls_format.
@Test
public void should_detect_old_xls_format() throws Exception {
try (InputStream inputStream = this.getClass().getResourceAsStream("test.xls")) {
Format actual = xlsDetector.detect(inputStream);
assertTrue(actual.getFormatFamily() instanceof XlsFormatFamily);
assertTrue(StringUtils.equals("UTF-8", actual.getEncoding()));
}
}
use of org.talend.dataprep.schema.Format in project data-prep by Talend.
the class XlsDetectorTest method read_xls_that_can_be_parsed_as_csv_TDP_375.
@Test
public void read_xls_that_can_be_parsed_as_csv_TDP_375() throws Exception {
String fileName = "TDP-375_xsl_read_as_csv.xls";
try (InputStream inputStream = this.getClass().getResourceAsStream(fileName)) {
Format actual = xlsDetector.detect(inputStream);
Assert.assertNotNull(actual);
assertTrue(actual.getFormatFamily() instanceof XlsFormatFamily);
assertEquals(XlsFormatFamily.MEDIA_TYPE, actual.getFormatFamily().getMediaType());
assertTrue(StringUtils.equals("UTF-8", actual.getEncoding()));
}
}
use of org.talend.dataprep.schema.Format in project data-prep by Talend.
the class XlsSerializerTest method assertFormat.
private Format assertFormat(InputStream inputStream) throws Exception {
Format format = formatDetector.detect(inputStream);
Assert.assertNotNull(format);
Assert.assertTrue(format.getFormatFamily() instanceof XlsFormatFamily);
Assert.assertEquals(XlsFormatFamily.MEDIA_TYPE, format.getFormatFamily().getMediaType());
return format;
}
use of org.talend.dataprep.schema.Format in project data-prep by Talend.
the class XlsSerializerTest method checkExcelFile.
/**
* Make sure the given file name is recognized as an excel file.
*
* @param fileName the excel file name to open.
*/
private void checkExcelFile(String fileName) throws IOException {
Format format;
try (InputStream inputStream = this.getClass().getResourceAsStream(fileName)) {
format = formatDetector.detect(inputStream);
Assert.assertNotNull(format);
Assert.assertTrue(format.getFormatFamily() instanceof XlsFormatFamily);
Assert.assertEquals(XlsFormatFamily.MEDIA_TYPE, format.getFormatFamily().getMediaType());
}
}
Aggregations