Search in sources :

Example 46 with FileInfo

use of org.smartdata.model.FileInfo in project SSM by Intel-bigdata.

the class SmallFilePlugin method updateContainerFileInfoCache.

/**
 * Update container file info cache based on containerFileSizeThreshold
 * and cmdlet.
 */
private void updateContainerFileInfoCache(RuleInfo ruleInfo, Map<String, FileInfo> containerFileInfoMap) {
    if (!containerFileInfoMap.isEmpty()) {
        // Remove container file whose size is greater than containerFileSizeThreshold
        for (Map.Entry<String, FileInfo> entry : containerFileInfoMap.entrySet()) {
            if (entry.getValue().getLength() >= containerFileSizeThreshold) {
                containerFileInfoMap.remove(entry.getKey());
            }
        }
        // Remove container file which is being used
        try {
            List<Long> aids = new ArrayList<>();
            List<CmdletInfo> list = cmdletManager.listCmdletsInfo(ruleInfo.getId());
            for (CmdletInfo cmdletInfo : list) {
                if (!CmdletState.isTerminalState(cmdletInfo.getState())) {
                    aids.addAll(cmdletInfo.getAids());
                }
            }
            List<ActionInfo> actionInfos = cmdletManager.getActions(aids);
            for (ActionInfo actionInfo : actionInfos) {
                Map<String, String> args = actionInfo.getArgs();
                if (args.containsKey(SmallFileCompactAction.CONTAINER_FILE)) {
                    containerFileInfoMap.remove(args.get(SmallFileCompactAction.CONTAINER_FILE));
                }
            }
        } catch (IOException e) {
            LOG.error("Failed to get cmdlet and action info.", e);
        }
    }
    containerFileInfoCache.put(ruleInfo, containerFileInfoMap);
}
Also used : ArrayList(java.util.ArrayList) ActionInfo(org.smartdata.model.ActionInfo) IOException(java.io.IOException) FileInfo(org.smartdata.model.FileInfo) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) CmdletInfo(org.smartdata.model.CmdletInfo)

Example 47 with FileInfo

use of org.smartdata.model.FileInfo in project SSM by Intel-bigdata.

the class SmallFilePlugin method getContainerFileInfos.

/**
 * Get container file info map from meta store.
 */
private Map<String, FileInfo> getContainerFileInfos() {
    Map<String, FileInfo> ret = new LinkedHashMap<>();
    try {
        List<String> containerFiles = metaStore.getAllContainerFiles();
        if (!containerFiles.isEmpty()) {
            List<FileInfo> fileInfos = metaStore.getFilesByPaths(containerFiles);
            // Sort file infos based on the file length
            Collections.sort(fileInfos, new Comparator<FileInfo>() {

                @Override
                public int compare(FileInfo a, FileInfo b) {
                    return Long.compare(a.getLength(), b.getLength());
                }
            });
            for (FileInfo fileInfo : fileInfos) {
                ret.put(fileInfo.getPath(), fileInfo);
            }
        }
    } catch (MetaStoreException e) {
        LOG.error("Failed to get file info of all the container files.", e);
    }
    return ret;
}
Also used : MetaStoreException(org.smartdata.metastore.MetaStoreException) FileInfo(org.smartdata.model.FileInfo) LinkedHashMap(java.util.LinkedHashMap)

Example 48 with FileInfo

use of org.smartdata.model.FileInfo in project SSM by Intel-bigdata.

the class SmallFilePlugin method preSubmitCmdlet.

@Override
public List<String> preSubmitCmdlet(final RuleInfo ruleInfo, List<String> objects) {
    if (ruleInfo.getRuleText().contains(COMPACT_ACTION_NAME)) {
        if (objects == null || objects.isEmpty()) {
            LOG.debug("Objects is null or empty.");
            return objects;
        }
        // Split valid small files according to the file permission
        Map<String, FileInfo> containerFileInfoMap = getContainerFileInfos();
        Map<SmallFileStatus, List<String>> smallFileStateMap = new HashMap<>();
        for (String object : objects) {
            LOG.debug("Start handling the file: {}.", object);
            // Check if the file is container file
            if (!object.endsWith("/")) {
                String fileName = object.substring(object.lastIndexOf("/") + 1, object.length());
                if (fileName.startsWith(CONTAINER_FILE_PREFIX) || containerFileInfoMap.containsKey(object)) {
                    LOG.debug("{} is container file.", object);
                    continue;
                }
            }
            // Check file info and state
            try {
                FileInfo fileInfo = metaStore.getFile(object);
                FileState fileState = metaStore.getFileState(object);
                if (fileInfo != null && fileInfo.getLength() > 0 && fileInfo.getLength() < containerFileSizeThreshold && fileState.getFileType().equals(FileState.FileType.NORMAL) && fileState.getFileStage().equals(FileState.FileStage.DONE)) {
                    SmallFileStatus smallFileStatus = new SmallFileStatus(fileInfo);
                    if (smallFileStateMap.containsKey(smallFileStatus)) {
                        smallFileStateMap.get(smallFileStatus).add(object);
                    } else {
                        firstFileInfoCache.put(object, fileInfo);
                        List<String> list = new ArrayList<>();
                        list.add(object);
                        smallFileStateMap.put(smallFileStatus, list);
                    }
                } else {
                    LOG.debug("Invalid file {} for small file compact.", object);
                }
            } catch (MetaStoreException e) {
                LOG.error(String.format("Failed to get file info of %s.", object), e);
            }
        }
        // Split small files according to the batch size
        List<String> smallFileList = new ArrayList<>();
        for (List<String> listElement : smallFileStateMap.values()) {
            int size = listElement.size();
            for (int i = 0; i < size; i += batchSize) {
                int toIndex = (i + batchSize <= size) ? i + batchSize : size;
                String smallFiles = new Gson().toJson(listElement.subList(i, toIndex));
                smallFileList.add(smallFiles);
            }
        }
        // Update container file info cache for preSubmitCmdletDescriptor
        updateContainerFileInfoCache(ruleInfo, containerFileInfoMap);
        return smallFileList;
    } else {
        return objects;
    }
}
Also used : FileState(org.smartdata.model.FileState) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) MetaStoreException(org.smartdata.metastore.MetaStoreException) FileInfo(org.smartdata.model.FileInfo) ArrayList(java.util.ArrayList) List(java.util.List)

Example 49 with FileInfo

use of org.smartdata.model.FileInfo in project SSM by Intel-bigdata.

the class MetaStore method listFileActions.

public List<DetailedFileAction> listFileActions(long rid, int size) throws MetaStoreException {
    if (mapStoragePolicyIdName == null) {
        updateCache();
    }
    List<ActionInfo> actionInfos = getActions(rid, size);
    List<DetailedFileAction> detailedFileActions = new ArrayList<>();
    for (ActionInfo actionInfo : actionInfos) {
        DetailedFileAction detailedFileAction = new DetailedFileAction(actionInfo);
        String filePath = actionInfo.getArgs().get("-file");
        FileInfo fileInfo = getFile(filePath);
        if (fileInfo == null) {
            // LOG.debug("Namespace is not sync! File {} not in file table!", filePath);
            // Add a mock fileInfo
            fileInfo = new FileInfo(filePath, 0L, 0L, false, (short) 0, 0L, 0L, 0L, (short) 0, "root", "root", (byte) 0, (byte) 0);
        }
        detailedFileAction.setFileLength(fileInfo.getLength());
        detailedFileAction.setFilePath(filePath);
        if (actionInfo.getActionName().contains("allssd") || actionInfo.getActionName().contains("onessd") || actionInfo.getActionName().contains("archive") || actionInfo.getActionName().contains("alldisk") || actionInfo.getActionName().contains("onedisk") || actionInfo.getActionName().contains("ramdisk")) {
            detailedFileAction.setTarget(actionInfo.getActionName());
            detailedFileAction.setSrc(mapStoragePolicyIdName.get((int) fileInfo.getStoragePolicy()));
        } else {
            detailedFileAction.setSrc(actionInfo.getArgs().get("-src"));
            detailedFileAction.setTarget(actionInfo.getArgs().get("-dest"));
        }
        detailedFileActions.add(detailedFileAction);
    }
    return detailedFileActions;
}
Also used : FileInfo(org.smartdata.model.FileInfo) DetailedFileAction(org.smartdata.model.DetailedFileAction) ArrayList(java.util.ArrayList) ActionInfo(org.smartdata.model.ActionInfo)

Example 50 with FileInfo

use of org.smartdata.model.FileInfo in project SSM by Intel-bigdata.

the class FileStatusIngester method run.

@Override
public void run() {
    FileInfoBatch batch = IngestionTask.pollBatch();
    try {
        if (batch != null) {
            FileInfo[] statuses = batch.getFileInfos();
            if (statuses.length == batch.actualSize()) {
                this.dbAdapter.insertFiles(batch.getFileInfos());
                IngestionTask.numPersisted.addAndGet(statuses.length);
            } else {
                FileInfo[] actual = new FileInfo[batch.actualSize()];
                System.arraycopy(statuses, 0, actual, 0, batch.actualSize());
                this.dbAdapter.insertFiles(actual);
                IngestionTask.numPersisted.addAndGet(actual.length);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Consumer " + id + " " + batch.actualSize() + " files insert into table 'files'.");
            }
        }
    } catch (MetaStoreException e) {
        // TODO: handle this issue
        LOG.error("Consumer {} error", id);
    }
    if (id == 0) {
        long curr = System.currentTimeMillis();
        if (curr - lastUpdateTime >= 5000) {
            long total = IngestionTask.numDirectoriesFetched.get() + IngestionTask.numFilesFetched.get();
            if (total > 0) {
                LOG.info(String.format("%d sec, %d%% persisted into database", (curr - startTime) / 1000, IngestionTask.numPersisted.get() * 100 / total));
            } else {
                LOG.info(String.format("%d sec, 0%% persisted into database", (curr - startTime) / 1000));
            }
            lastUpdateTime = curr;
        }
    }
}
Also used : FileInfoBatch(org.smartdata.model.FileInfoBatch) MetaStoreException(org.smartdata.metastore.MetaStoreException) FileInfo(org.smartdata.model.FileInfo)

Aggregations

FileInfo (org.smartdata.model.FileInfo)51 Test (org.junit.Test)17 ArrayList (java.util.ArrayList)15 FileDiff (org.smartdata.model.FileDiff)12 AlluxioURI (alluxio.AlluxioURI)10 HashMap (java.util.HashMap)10 URIStatus (alluxio.client.file.URIStatus)9 MetaStoreException (org.smartdata.metastore.MetaStoreException)9 BackUpInfo (org.smartdata.model.BackUpInfo)7 FileSystem (alluxio.client.file.FileSystem)6 JournalEntry (alluxio.proto.journal.Journal.JournalEntry)6 AlluxioEntryApplier (org.smartdata.alluxio.metric.fetcher.AlluxioEntryApplier)6 SmartFilePermission (org.smartdata.SmartFilePermission)5 Gson (com.google.gson.Gson)4 IOException (java.io.IOException)4 LinkedHashMap (java.util.LinkedHashMap)4 ActionInfo (org.smartdata.model.ActionInfo)4 AlluxioException (alluxio.exception.AlluxioException)3 Map (java.util.Map)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3