use of com.alibaba.alink.operator.common.io.reader.HttpFileSplitReader in project Alink by alibaba.
the class FilePath method download.
public static String download(FilePath folder, String fileName) throws IOException {
// local
if (folder.getFileSystem() instanceof LocalFileSystem) {
return folder.getPathStr();
}
File localConfDir = new File(System.getProperty("java.io.tmpdir"), FileUtils.getRandomFilename(""));
String scheme = folder.getPath().toUri().getScheme();
if (!localConfDir.mkdir()) {
throw new RuntimeException("Could not create the dir " + localConfDir.getAbsolutePath());
}
try (FileOutputStream outputStream = new FileOutputStream(Paths.get(localConfDir.getPath(), fileName).toFile())) {
// http
if (scheme != null && (scheme.equalsIgnoreCase("http") || scheme.equalsIgnoreCase("https"))) {
try (HttpFileSplitReader reader = new HttpFileSplitReader(new Path(folder.getPath(), fileName).toString())) {
long fileLen = reader.getFileLength();
reader.open(null, 0, fileLen);
int offset = 0;
byte[] buffer = new byte[1024];
while (offset < fileLen) {
int len = reader.read(buffer, offset, 1024);
outputStream.write(buffer, offset, len);
offset += len;
}
}
} else {
// file system
try (FSDataInputStream inputStream = folder.getFileSystem().open(new Path(folder.getPath(), fileName))) {
IOUtils.copy(inputStream, outputStream);
}
}
return localConfDir.getAbsolutePath();
}
}
use of com.alibaba.alink.operator.common.io.reader.HttpFileSplitReader in project Alink by alibaba.
the class HiveCatalog method fileExists.
public static boolean fileExists(FilePath folder, String file) throws IOException {
// local
if (folder.getFileSystem() instanceof LocalFileSystem) {
return folder.getFileSystem().exists(new Path(folder.getPath(), file));
}
String scheme = folder.getPath().toUri().getScheme();
if (scheme != null && (scheme.equalsIgnoreCase("http") || scheme.equalsIgnoreCase("https"))) {
try (HttpFileSplitReader reader = new HttpFileSplitReader(folder.getPathStr() + "/" + file)) {
long fileLen = reader.getFileLength();
reader.open(null, 0, fileLen);
} catch (FileNotFoundException exception) {
return false;
}
return true;
} else {
return folder.getFileSystem().exists(new Path(folder.getPath(), file));
}
}
use of com.alibaba.alink.operator.common.io.reader.HttpFileSplitReader in project Alink by alibaba.
the class HiveCatalog method readFile.
public static String readFile(FilePath filePath) throws IOException {
String scheme = filePath.getPath().toUri().getScheme();
if (scheme != null && (scheme.equalsIgnoreCase("http") || scheme.equalsIgnoreCase("https"))) {
try (HttpFileSplitReader reader = new HttpFileSplitReader(filePath.toString())) {
long fileLen = reader.getFileLength();
reader.open(null, 0, fileLen);
int len = (int) reader.getFileLength();
byte[] buffer = new byte[len];
reader.read(buffer, 0, len);
return new String(buffer, StandardCharsets.UTF_8);
}
} else {
try (FSDataInputStream inputStream = filePath.getFileSystem().open(filePath.getPath())) {
return IOUtils.toString(inputStream, StandardCharsets.UTF_8);
}
}
}
use of com.alibaba.alink.operator.common.io.reader.HttpFileSplitReader in project Alink by alibaba.
the class HiveCatalog method downloadFolder.
public static String downloadFolder(FilePath folder, String... files) throws IOException {
// local
if (folder.getFileSystem() instanceof LocalFileSystem) {
return folder.getPathStr();
}
File localConfDir = new File(System.getProperty("java.io.tmpdir"), FileUtils.getRandomFilename(""));
String scheme = folder.getPath().toUri().getScheme();
if (!localConfDir.mkdir()) {
throw new RuntimeException("Could not create the dir " + localConfDir.getAbsolutePath());
}
if (scheme != null && (scheme.equalsIgnoreCase("http") || scheme.equalsIgnoreCase("https"))) {
for (String path : files) {
try (HttpFileSplitReader reader = new HttpFileSplitReader(folder.getPathStr() + "/" + path)) {
long fileLen = reader.getFileLength();
reader.open(null, 0, fileLen);
int offset = 0;
byte[] buffer = new byte[1024];
try (FileOutputStream outputStream = new FileOutputStream(Paths.get(localConfDir.getPath(), path).toFile())) {
while (offset < fileLen) {
int len = reader.read(buffer, offset, 1024);
outputStream.write(buffer, offset, len);
offset += len;
}
}
} catch (FileNotFoundException exception) {
// pass
}
}
} else {
for (String path : files) {
// file system
if (!folder.getFileSystem().exists(new Path(folder.getPath(), path))) {
continue;
}
try (FSDataInputStream inputStream = folder.getFileSystem().open(new Path(folder.getPath(), path));
FileOutputStream outputStream = new FileOutputStream(Paths.get(localConfDir.getPath(), path).toFile())) {
IOUtils.copy(inputStream, outputStream);
}
}
}
return localConfDir.getAbsolutePath();
}
use of com.alibaba.alink.operator.common.io.reader.HttpFileSplitReader in project Alink by alibaba.
the class InternalCsvSourceBatchOp method initializeDataSource.
@Override
public Table initializeDataSource() {
final String filePath = getFilePath().getPathStr();
final String schemaStr = getSchemaStr();
final String fieldDelim = getFieldDelimiter();
final String rowDelim = getRowDelimiter();
final Character quoteChar = getQuoteChar();
final boolean skipBlankLine = getSkipBlankLine();
final boolean lenient = getLenient();
final String[] colNames = CsvUtil.getColNames(schemaStr);
final TypeInformation<?>[] colTypes = CsvUtil.getColTypes(schemaStr);
boolean ignoreFirstLine = getIgnoreFirstLine();
String protocol = "";
try {
URL url = new URL(filePath);
protocol = url.getProtocol();
} catch (MalformedURLException ignored) {
}
DataSet<Row> rows;
ExecutionEnvironment execEnv = MLEnvironmentFactory.get(getMLEnvironmentId()).getExecutionEnvironment();
TableSchema dummySchema = new TableSchema(new String[] { "f1" }, new TypeInformation[] { Types.STRING });
if (protocol.equalsIgnoreCase("http") || protocol.equalsIgnoreCase("https")) {
HttpFileSplitReader reader = new HttpFileSplitReader(filePath);
rows = execEnv.createInput(new GenericCsvInputFormat(reader, dummySchema.getFieldTypes(), rowDelim, rowDelim, ignoreFirstLine), new RowTypeInfo(dummySchema.getFieldTypes(), dummySchema.getFieldNames())).name("http_csv_source");
} else {
RowCsvInputFormat inputFormat = new RowCsvInputFormat(new Path(filePath), dummySchema.getFieldTypes(), rowDelim, rowDelim, new int[] { 0 }, true, getFilePath().getFileSystem());
inputFormat.setSkipFirstLineAsHeader(ignoreFirstLine);
rows = execEnv.createInput(inputFormat).name("csv_source");
}
rows = rows.flatMap(new CsvUtil.ParseCsvFunc(colTypes, fieldDelim, quoteChar, skipBlankLine, lenient));
return DataSetConversionUtil.toTable(getMLEnvironmentId(), rows, colNames, colTypes);
}
Aggregations