Search in sources :

Example 16 with FilePath

use of com.alibaba.alink.common.io.filesystem.FilePath in project Alink by alibaba.

the class ModelStreamUtils method descModel.

public static Tuple3<Timestamp, Long, FilePath> descModel(FilePath filePath, Timestamp timestamp) {
    BaseFileSystem<?> fileSystem = filePath.getFileSystem();
    String modelId = toStringPresentation(timestamp);
    Path confPath = new Path(new Path(filePath.getPath(), FileModelStreamSink.MODEL_CONF), String.format("%s.log", modelId));
    ModelStreamMeta meta;
    try (FSDataInputStream fsDataInputStream = filePath.getFileSystem().open(confPath)) {
        meta = JsonConverter.fromJson(IOUtils.toString(fsDataInputStream), ModelStreamMeta.class);
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
    Path modelFolderPath = new Path(filePath.getPath(), modelId);
    try {
        if (!fileSystem.exists(modelFolderPath)) {
            throw new IllegalStateException("Model " + modelFolderPath.getPath() + " is not exists.");
        }
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
    return Tuple3.of(timestamp, meta.count, new FilePath(modelFolderPath, fileSystem));
}
Also used : Path(org.apache.flink.core.fs.Path) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) IOException(java.io.IOException)

Example 17 with FilePath

use of com.alibaba.alink.common.io.filesystem.FilePath in project Alink by alibaba.

the class PyCalcRunner method open.

/**
 * Start Python process if necessary and create the handle.
 */
public void open() {
    String pythonEnv = config.get(BasePythonBridge.PY_VIRTUAL_ENV_KEY);
    if (null != pythonEnv) {
        if (PythonFileUtils.isCompressedFile(pythonEnv)) {
            String tempWorkDir = PythonFileUtils.createTempDir("python_env_").toString();
            ArchivesUtils.downloadDecompressToDirectory(pythonEnv, new File(tempWorkDir));
            pythonEnv = new File(tempWorkDir, PythonFileUtils.getCompressedFileName(pythonEnv)).getAbsolutePath();
        } else {
            if (PythonFileUtils.isLocalFile(pythonEnv)) {
                pythonEnv = pythonEnv.substring("file://".length());
            }
        }
    } else {
        FilePath pluginFilePath = null;
        RegisterKey tf1RegisterKey = DLEnvConfig.getRegisterKey(Version.TF115);
        RegisterKey tf2RegisterKey = DLEnvConfig.getRegisterKey(Version.TF231);
        try {
            pluginFilePath = ResourcePluginFactory.getResourcePluginPath(tf1RegisterKey, tf2RegisterKey);
        } catch (Exception e) {
            String info = String.format("Cannot prepare plugin for %s-%s, and %s-%s, fallback to use system Python.", tf1RegisterKey.getName(), tf1RegisterKey.getVersion(), tf2RegisterKey.getName(), tf2RegisterKey.getVersion());
            LOG.info(info, e);
            if (AlinkGlobalConfiguration.isPrintProcessInfo()) {
                System.out.println(info + ": " + e);
            }
        }
        if (null != pluginFilePath) {
            File pluginDirectory = new File(pluginFilePath.getPath().getPath());
            File[] dirs = pluginDirectory.listFiles(File::isDirectory);
            Preconditions.checkArgument(null != dirs && dirs.length == 1, String.format("There should be only 1 directory in plugin directory: %s.", pluginDirectory));
            pythonEnv = dirs[0].getAbsolutePath();
            LOG.info("Use virtual env in {}", pythonEnv);
        }
    }
    config.put(BasePythonBridge.PY_VIRTUAL_ENV_KEY, pythonEnv);
    bridge.open(getClass().getName(), config::getOrDefault, null);
    this.handle = bridge.app().newobj(pythonClassName);
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) File(java.io.File) RegisterKey(com.alibaba.alink.common.io.plugin.RegisterKey) IOException(java.io.IOException)

Example 18 with FilePath

use of com.alibaba.alink.common.io.filesystem.FilePath in project Alink by alibaba.

the class PluginDownloader method downloadPluginSafely.

public void downloadPluginSafely(String pluginName, String pluginVersion) throws IOException {
    loadPluginConfig();
    String pluginFolder = pluginName + "-" + pluginVersion;
    if (jarsPluginConfigs.containsKey(pluginName)) {
        final List<String> jars = getListOfJars(pluginName, pluginVersion);
        final FilePath remotePath = new FilePath(new Path(getRemoteFlinkRoot().getPath(), pluginFolder), getRemoteFlinkRoot().getFileSystem());
        downloadFileLocked(getLocalFlinkRoot(), pluginFolder, (rawPath, path) -> {
            if (new File(rawPath.getPath()).exists()) {
                return;
            }
            File f = new File(path.getPath());
            if (f.exists()) {
                // overwrite tmp file.
                LOG.info("Tmp file {} exists. Delete first.", f);
                try {
                    FileUtils.forceDelete(f);
                } catch (IOException e) {
                    LOG.warn("Delete tmp file {} returns false.", f, e);
                }
            }
            for (String jar : jars) {
                download(new FilePath(new Path(remotePath.getPath(), jar), remotePath.getFileSystem()), new Path(path, jar).getPath());
            }
            if (!new File(path.getPath()).renameTo(new File(rawPath.getPath()))) {
                throw new DistributePluginException(String.format("Commit file: %s fail.", path.getPath()));
            }
        });
    } else if (resourcePluginConfigs.containsKey(pluginName)) {
        final List<String> resources = getListOfResource(pluginName, pluginVersion);
        final FilePath remotePath = new FilePath(new Path(getRemoteResourceRoot().getPath(), pluginFolder), getRemoteResourceRoot().getFileSystem());
        downloadFileLocked(getLocalResourceRoot(), pluginFolder, (rawPath, path) -> {
            if (new File(rawPath.getPath()).exists()) {
                return;
            }
            File f = new File(path.getPath());
            if (f.exists()) {
                // overwrite tmp file.
                LOG.info("Tmp file {} exists. Delete first.", f);
                try {
                    FileUtils.forceDelete(f);
                } catch (IOException e) {
                    LOG.warn("Delete tmp file {} returns false.", f, e);
                }
            }
            for (String resource : resources) {
                download(new FilePath(new Path(remotePath.getPath(), resource), remotePath.getFileSystem()), new Path(path, resource).getPath());
            }
            if (!new File(path.getPath()).renameTo(new File(rawPath.getPath()))) {
                throw new DistributePluginException(String.format("Commit file: %s fail.", path.getPath()));
            }
        });
    } else {
        throw new DistributePluginException("plugin [" + pluginName + "] not found!");
    }
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) Path(org.apache.flink.core.fs.Path) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) LoggerFactory(org.slf4j.LoggerFactory) Files(org.apache.flink.shaded.guava18.com.google.common.io.Files) JsonConverter(com.alibaba.alink.common.utils.JsonConverter) FileLock(java.nio.channels.FileLock) ArrayList(java.util.ArrayList) Path(org.apache.flink.core.fs.Path) Map(java.util.Map) Logger(org.slf4j.Logger) ZipFileUtil(com.alibaba.alink.common.dl.utils.ZipFileUtil) FileOutputStream(java.io.FileOutputStream) FileUtils(org.apache.commons.io.FileUtils) IOException(java.io.IOException) AlinkGlobalConfiguration(com.alibaba.alink.common.AlinkGlobalConfiguration) TarFileUtil(com.alibaba.alink.common.pyrunner.TarFileUtil) File(java.io.File) StandardCharsets(java.nio.charset.StandardCharsets) BaseFileSystem(com.alibaba.alink.common.io.filesystem.BaseFileSystem) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) TypeReference(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.type.TypeReference) List(java.util.List) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) DistributePluginException(com.alibaba.alink.common.exceptions.DistributePluginException) FileChannel(java.nio.channels.FileChannel) Collections(java.util.Collections) InputStream(java.io.InputStream) DistributePluginException(com.alibaba.alink.common.exceptions.DistributePluginException) ArrayList(java.util.ArrayList) List(java.util.List) IOException(java.io.IOException) File(java.io.File)

Example 19 with FilePath

use of com.alibaba.alink.common.io.filesystem.FilePath in project Alink by alibaba.

the class PluginDownloader method downloadFileLocked.

private static synchronized void downloadFileLocked(String root, String filePath, DoDownload doDownload) throws IOException {
    Path localRawPath = new Path(root, filePath);
    Path localPath = new Path(root, filePath + ".downloading");
    Path lockFile = new Path(System.getProperty("java.io.tmpdir"), new Path(root, filePath + ".lock").getPath());
    new File(lockFile.getParent().getPath()).mkdirs();
    FileChannel channel = null;
    FileLock lock = null;
    try {
        channel = new FileOutputStream(lockFile.getPath(), true).getChannel();
        lock = channel.lock();
        doDownload.download(localRawPath, localPath);
    } finally {
        if (lock != null) {
            try {
                lock.release();
            } catch (IOException e) {
                // pass
                LOG.warn("Release file lock fail.", e);
            }
        }
        if (channel != null) {
            try {
                channel.close();
            } catch (IOException e) {
                // pass
                LOG.warn("Close channel fail.", e);
            }
        }
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) FileChannel(java.nio.channels.FileChannel) FileOutputStream(java.io.FileOutputStream) FileLock(java.nio.channels.FileLock) IOException(java.io.IOException) File(java.io.File)

Example 20 with FilePath

use of com.alibaba.alink.common.io.filesystem.FilePath in project Alink by alibaba.

the class AppendModelStreamFileSinkBatchOp method sinkFrom.

@Override
protected AppendModelStreamFileSinkBatchOp sinkFrom(BatchOperator<?> in) {
    final FilePath filePath = getFilePath();
    final Timestamp timestamp = ModelStreamUtils.createStartTime(getModelTime());
    final int numFiles = getNumFiles();
    final int numKeepModel = getNumKeepModel();
    final TableSchema schema = in.getSchema();
    final FileModelStreamSink sink = new FileModelStreamSink(filePath, CsvUtil.schema2SchemaStr(schema));
    try {
        sink.initializeGlobal();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    DataSet<Row> writtenModel = in.getDataSet().map(new RichMapFunction<Row, Row>() {

        @Override
        public void open(Configuration parameters) throws Exception {
            sink.open(timestamp, getRuntimeContext().getIndexOfThisSubtask());
        }

        @Override
        public void close() throws Exception {
            sink.close();
        }

        @Override
        public Row map(Row value) throws Exception {
            sink.collect(value);
            return value;
        }
    }).setParallelism(numFiles);
    DataSetUtils.countElementsPerPartition(writtenModel).sum(1).output(new OutputFormat<Tuple2<Integer, Long>>() {

        @Override
        public void configure(Configuration parameters) {
        // pass
        }

        @Override
        public void open(int taskNumber, int numTasks) throws IOException {
        // pass
        }

        @Override
        public void writeRecord(Tuple2<Integer, Long> record) throws IOException {
            sink.finalizeGlobal(timestamp, record.f1, numFiles, numKeepModel);
        }

        @Override
        public void close() throws IOException {
        // pass
        }
    }).setParallelism(1);
    return this;
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) FileModelStreamSink(com.alibaba.alink.operator.common.stream.model.FileModelStreamSink) OutputFormat(org.apache.flink.api.common.io.OutputFormat) IOException(java.io.IOException) Timestamp(java.sql.Timestamp) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Row(org.apache.flink.types.Row)

Aggregations

FilePath (com.alibaba.alink.common.io.filesystem.FilePath)36 Path (org.apache.flink.core.fs.Path)22 IOException (java.io.IOException)9 Test (org.junit.Test)9 Row (org.apache.flink.types.Row)8 File (java.io.File)7 TableSchema (org.apache.flink.table.api.TableSchema)6 AkSinkStreamOp (com.alibaba.alink.operator.stream.sink.AkSinkStreamOp)5 AppendIdBatchOp (com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp)4 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)4 RandomTableSourceBatchOp (com.alibaba.alink.operator.batch.source.RandomTableSourceBatchOp)4 SelectBatchOp (com.alibaba.alink.operator.batch.sql.SelectBatchOp)4 HttpFileSplitReader (com.alibaba.alink.operator.common.io.reader.HttpFileSplitReader)4 AppendIdStreamOp (com.alibaba.alink.operator.stream.dataproc.AppendIdStreamOp)4 ArrayList (java.util.ArrayList)4 List (java.util.List)4 HadoopFileSystem (com.alibaba.alink.common.io.filesystem.HadoopFileSystem)3 OssFileSystem (com.alibaba.alink.common.io.filesystem.OssFileSystem)3 RandomTableSourceStreamOp (com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp)3 SelectStreamOp (com.alibaba.alink.operator.stream.sql.SelectStreamOp)3