Search in sources :

Example 1 with HadoopFileSystem

use of com.alibaba.alink.common.io.filesystem.HadoopFileSystem in project Alink by alibaba.

the class Chap03 method c_1_2_2.

static void c_1_2_2() throws Exception {
    LocalFileSystem local = new LocalFileSystem();
    HadoopFileSystem hdfs = new HadoopFileSystem(HADOOP_VERSION, HDFS_URI);
    copy(hdfs.open(HDFS_URI + "user/yangxu/alink/data/temp/hello.txt"), local.create(LOCAL_DIR + "hello_1.txt", WriteMode.OVERWRITE));
    copy(local.open(LOCAL_DIR + "hello_1.txt"), hdfs.create(HDFS_URI + "user/yangxu/alink/data/temp/hello_2.txt", WriteMode.OVERWRITE));
    for (FileStatus status : hdfs.listStatus(HDFS_URI + "user/yangxu/alink/data/temp/")) {
        System.out.println(status.getPath().toUri() + " \t" + status.getLen() + " \t" + new Date(status.getModificationTime()));
    }
}
Also used : FileStatus(org.apache.flink.core.fs.FileStatus) LocalFileSystem(com.alibaba.alink.common.io.filesystem.LocalFileSystem) HadoopFileSystem(com.alibaba.alink.common.io.filesystem.HadoopFileSystem) Date(java.util.Date)

Example 2 with HadoopFileSystem

use of com.alibaba.alink.common.io.filesystem.HadoopFileSystem in project Alink by alibaba.

the class Chap03 method c_2_3_1.

static void c_2_3_1() throws Exception {
    HadoopFileSystem hdfs = new HadoopFileSystem(HADOOP_VERSION, HDFS_URI);
    OssFileSystem oss = new OssFileSystem(OSS_VERSION, OSS_END_POINT, OSS_BUCKET_NAME, OSS_ACCESS_ID, OSS_ACCESS_KEY);
    FilePath[] filePaths = new FilePath[] { new FilePath(LOCAL_DIR + "iris.ak"), new FilePath(HDFS_URI + "user/yangxu/alink/data/temp/iris.ak", hdfs), new FilePath(OSS_PREFIX_URI + "alink/data/temp/iris.ak", oss) };
    for (FilePath filePath : filePaths) {
        new CsvSourceBatchOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new AkSinkBatchOp().setFilePath(filePath).setOverwriteSink(true));
        BatchOperator.execute();
        System.out.println(new AkSourceBatchOp().setFilePath(filePath).count());
    }
    for (FilePath filePath : filePaths) {
        new CsvSourceStreamOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new AkSinkStreamOp().setFilePath(filePath).setOverwriteSink(true));
        StreamOperator.execute();
        new AkSourceStreamOp().setFilePath(filePath).filter("sepal_length < 4.5").print();
        StreamOperator.execute();
    }
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSourceStreamOp(com.alibaba.alink.operator.stream.source.AkSourceStreamOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) HadoopFileSystem(com.alibaba.alink.common.io.filesystem.HadoopFileSystem) CsvSourceStreamOp(com.alibaba.alink.operator.stream.source.CsvSourceStreamOp) OssFileSystem(com.alibaba.alink.common.io.filesystem.OssFileSystem) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)

Example 3 with HadoopFileSystem

use of com.alibaba.alink.common.io.filesystem.HadoopFileSystem in project Alink by alibaba.

the class AkExample method main.

public static void main(String[] args) throws Exception {
    String URL = "https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/iris.csv";
    String SCHEMA_STR = "sepal_length double, sepal_width double, petal_length double, petal_width double, category string";
    // Note: Complete the parameter below with the right oss configure.
    BaseFileSystem<?> ossFileSystem = new OssFileSystem("OssVersion", "OssEndPoint", "OssBucket", "OssId", "OssKey");
    // Note: Complete the parameter below with the right hdfs configure.
    BaseFileSystem<?> hadoopFileSystem = new HadoopFileSystem("HadoopVersion", "HdfsFileSystemUri");
    // csv to oss
    CsvSourceBatchOp csvSourceBatchOp = new CsvSourceBatchOp().setFilePath(URL).setSchemaStr(SCHEMA_STR);
    AkSinkBatchOp akSinkToOss = new AkSinkBatchOp().setFilePath(new FilePath("iris", ossFileSystem)).setOverwriteSink(true);
    csvSourceBatchOp.link(akSinkToOss);
    BatchOperator.execute();
    // oss to hdfs
    AkSourceBatchOp akSourceFromOss = new AkSourceBatchOp().setFilePath(new FilePath("iris", ossFileSystem));
    AkSinkBatchOp akSinkToHdfs = new AkSinkBatchOp().setFilePath(new FilePath("iris", hadoopFileSystem)).setOverwriteSink(true);
    akSourceFromOss.link(akSinkToHdfs);
    BatchOperator.execute();
    // hdfs to stdout
    AkSourceBatchOp akSourceFromHdfs = new AkSourceBatchOp().setFilePath(new FilePath("iris", hadoopFileSystem));
    akSourceFromHdfs.firstN(10).print();
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) HadoopFileSystem(com.alibaba.alink.common.io.filesystem.HadoopFileSystem) OssFileSystem(com.alibaba.alink.common.io.filesystem.OssFileSystem) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)

Example 4 with HadoopFileSystem

use of com.alibaba.alink.common.io.filesystem.HadoopFileSystem in project Alink by alibaba.

the class Chap03 method c_2_1_2.

static void c_2_1_2() throws Exception {
    HadoopFileSystem hdfs = new HadoopFileSystem(HADOOP_VERSION, HDFS_URI);
    OssFileSystem oss = new OssFileSystem(OSS_VERSION, OSS_END_POINT, OSS_BUCKET_NAME, OSS_ACCESS_ID, OSS_ACCESS_KEY);
    FilePath[] filePaths = new FilePath[] { new FilePath(LOCAL_DIR + "iris.csv"), new FilePath(HDFS_URI + "user/yangxu/alink/data/temp/iris.csv", hdfs), new FilePath(OSS_PREFIX_URI + "alink/data/temp/iris.csv", oss) };
    for (FilePath filePath : filePaths) {
        new CsvSourceBatchOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new CsvSinkBatchOp().setFilePath(filePath).setOverwriteSink(true));
        BatchOperator.execute();
        System.out.println(new CsvSourceBatchOp().setFilePath(filePath).setSchemaStr(IRIS_SCHEMA_STR).count());
    }
    for (FilePath filePath : filePaths) {
        new CsvSourceStreamOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new CsvSinkStreamOp().setFilePath(filePath).setOverwriteSink(true));
        StreamOperator.execute();
        new CsvSourceStreamOp().setFilePath(filePath).setSchemaStr(IRIS_SCHEMA_STR).filter("sepal_length < 4.5").print();
        StreamOperator.execute();
    }
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) CsvSinkBatchOp(com.alibaba.alink.operator.batch.sink.CsvSinkBatchOp) CsvSinkStreamOp(com.alibaba.alink.operator.stream.sink.CsvSinkStreamOp) HadoopFileSystem(com.alibaba.alink.common.io.filesystem.HadoopFileSystem) CsvSourceStreamOp(com.alibaba.alink.operator.stream.source.CsvSourceStreamOp) OssFileSystem(com.alibaba.alink.common.io.filesystem.OssFileSystem) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)

Example 5 with HadoopFileSystem

use of com.alibaba.alink.common.io.filesystem.HadoopFileSystem in project Alink by alibaba.

the class Chap03 method c_1_2_1.

static void c_1_2_1() throws Exception {
    HadoopFileSystem hdfs = new HadoopFileSystem(HADOOP_VERSION, HDFS_URI);
    final String hdfsDir = HDFS_URI + "user/yangxu/alink/data/temp/";
    System.out.println(hdfs.getKind());
    if (!hdfs.exists(hdfsDir)) {
        hdfs.mkdirs(hdfsDir);
    }
    String path = hdfsDir + "hello.txt";
    if (hdfs.exists(path)) {
        hdfs.delete(path, true);
    }
    OutputStream outputStream = hdfs.create(path, WriteMode.NO_OVERWRITE);
    outputStream.write("Hello Alink!".getBytes());
    outputStream.close();
    InputStream inputStream = hdfs.open(path);
    String readString = IOUtils.toString(inputStream);
    System.out.println(readString);
}
Also used : InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) HadoopFileSystem(com.alibaba.alink.common.io.filesystem.HadoopFileSystem)

Aggregations

HadoopFileSystem (com.alibaba.alink.common.io.filesystem.HadoopFileSystem)5 FilePath (com.alibaba.alink.common.io.filesystem.FilePath)3 OssFileSystem (com.alibaba.alink.common.io.filesystem.OssFileSystem)3 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)3 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)2 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)2 CsvSourceStreamOp (com.alibaba.alink.operator.stream.source.CsvSourceStreamOp)2 LocalFileSystem (com.alibaba.alink.common.io.filesystem.LocalFileSystem)1 CsvSinkBatchOp (com.alibaba.alink.operator.batch.sink.CsvSinkBatchOp)1 AkSinkStreamOp (com.alibaba.alink.operator.stream.sink.AkSinkStreamOp)1 CsvSinkStreamOp (com.alibaba.alink.operator.stream.sink.CsvSinkStreamOp)1 AkSourceStreamOp (com.alibaba.alink.operator.stream.source.AkSourceStreamOp)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 Date (java.util.Date)1 FileStatus (org.apache.flink.core.fs.FileStatus)1