use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class MetadataNode method getExternalFile.
@Override
public ExternalFile getExternalFile(JobId jobId, String dataverseName, String datasetName, Integer fileNumber) throws MetadataException, RemoteException {
try {
ITupleReference searchKey = createExternalFileSearchTuple(dataverseName, datasetName, fileNumber);
ExternalFileTupleTranslator tupleReaderWriter = tupleTranslatorProvider.getExternalFileTupleTranslator(false);
IValueExtractor<ExternalFile> valueExtractor = new MetadataEntityValueExtractor<>(tupleReaderWriter);
List<ExternalFile> results = new ArrayList<>();
searchIndex(jobId, MetadataPrimaryIndexes.EXTERNAL_FILE_DATASET, searchKey, valueExtractor, results);
if (results.isEmpty()) {
return null;
}
return results.get(0);
} catch (HyracksDataException e) {
throw new MetadataException(e);
}
}
use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class HDFSUtils method getSplits.
/**
* Instead of creating the split using the input format, we do it manually
* This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions
* and the produced splits only cover intersection between current files in hdfs and files stored internally
* in AsterixDB
* 1. NoOp means appended file
* 2. AddOp means new file
* 3. UpdateOp means the delta of a file
*
* @return
* @throws IOException
*/
public static InputSplit[] getSplits(JobConf conf, List<ExternalFile> files) throws IOException {
// Create file system object
FileSystem fs = FileSystem.get(conf);
ArrayList<FileSplit> fileSplits = new ArrayList<>();
ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<>();
// Create files splits
for (ExternalFile file : files) {
Path filePath = new Path(file.getFileName());
FileStatus fileStatus;
try {
fileStatus = fs.getFileStatus(filePath);
} catch (FileNotFoundException e) {
// file was deleted at some point, skip to next file
continue;
}
if (file.getPendingOp() == ExternalFilePendingOp.ADD_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
// Get its information from HDFS name node
BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize());
// Create a split per block
for (BlockLocation block : fileBlocks) {
if (block.getOffset() < file.getSize()) {
fileSplits.add(new FileSplit(filePath, block.getOffset(), (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength() : (file.getSize() - block.getOffset()), block.getHosts()));
orderedExternalFiles.add(file);
}
}
} else if (file.getPendingOp() == ExternalFilePendingOp.NO_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
long oldSize = 0L;
long newSize = file.getSize();
for (int i = 0; i < files.size(); i++) {
if (files.get(i).getFileName() == file.getFileName() && files.get(i).getSize() != file.getSize()) {
newSize = files.get(i).getSize();
oldSize = file.getSize();
break;
}
}
// Get its information from HDFS name node
BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize);
// Create a split per block
for (BlockLocation block : fileBlocks) {
if (block.getOffset() + block.getLength() > oldSize) {
if (block.getOffset() < newSize) {
// Block interact with delta -> Create a split
long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset();
long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L : block.getOffset() + block.getLength() - newSize;
long splitLength = block.getLength() - startCut - endCut;
fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength, block.getHosts()));
orderedExternalFiles.add(file);
}
}
}
}
}
fs.close();
files.clear();
files.addAll(orderedExternalFiles);
return fileSplits.toArray(new FileSplit[fileSplits.size()]);
}
use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class MetadataNode method dropDataset.
@Override
public void dropDataset(JobId jobId, String dataverseName, String datasetName) throws MetadataException, RemoteException {
Dataset dataset = getDataset(jobId, dataverseName, datasetName);
if (dataset == null) {
throw new MetadataException("Cannot drop dataset '" + datasetName + "' because it doesn't exist.");
}
try {
// Delete entry from the 'datasets' dataset.
ITupleReference searchKey = createTuple(dataverseName, datasetName);
// Searches the index for the tuple to be deleted. Acquires an S
// lock on the 'dataset' dataset.
ITupleReference datasetTuple = null;
try {
datasetTuple = getTupleToBeDeleted(jobId, MetadataPrimaryIndexes.DATASET_DATASET, searchKey);
// Delete entry(s) from the 'indexes' dataset.
List<Index> datasetIndexes = getDatasetIndexes(jobId, dataverseName, datasetName);
if (datasetIndexes != null) {
for (Index index : datasetIndexes) {
dropIndex(jobId, dataverseName, datasetName, index.getIndexName());
}
}
if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
// Delete External Files
// As a side effect, acquires an S lock on the 'ExternalFile' dataset
// on behalf of txnId.
List<ExternalFile> datasetFiles = getExternalFiles(jobId, dataset);
if (datasetFiles != null && datasetFiles.size() > 0) {
// Drop all external files in this dataset.
for (ExternalFile file : datasetFiles) {
dropExternalFile(jobId, dataverseName, file.getDatasetName(), file.getFileNumber());
}
}
}
} catch (HyracksDataException hde) {
// artifacts.
if (!hde.getComponent().equals(ErrorCode.HYRACKS) || hde.getErrorCode() != ErrorCode.UPDATE_OR_DELETE_NON_EXISTENT_KEY) {
throw new MetadataException(hde);
}
} finally {
deleteTupleFromIndex(jobId, MetadataPrimaryIndexes.DATASET_DATASET, datasetTuple);
}
} catch (HyracksDataException | ACIDException e) {
throw new MetadataException(e);
}
}
use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class ExternalFilesIndexModificationOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
return new AbstractOperatorNodePushable() {
@Override
public void initialize() throws HyracksDataException {
final IIndexDataflowHelper indexHelper = dataflowHelperFactory.create(ctx, partition);
FileIndexTupleTranslator filesTupleTranslator = new FileIndexTupleTranslator();
// Open and get
indexHelper.open();
IIndex index = indexHelper.getIndexInstance();
LSMTwoPCBTreeBulkLoader bulkLoader = null;
try {
bulkLoader = (LSMTwoPCBTreeBulkLoader) ((ExternalBTree) index).createTransactionBulkLoader(BTree.DEFAULT_FILL_FACTOR, false, files.size());
// The files must be ordered according to their numbers
for (ExternalFile file : files) {
switch(file.getPendingOp()) {
case ADD_OP:
case APPEND_OP:
bulkLoader.add(filesTupleTranslator.getTupleFromFile(file));
break;
case DROP_OP:
bulkLoader.delete(filesTupleTranslator.getTupleFromFile(file));
break;
case NO_OP:
break;
default:
throw HyracksDataException.create(ErrorCode.UNKNOWN_EXTERNAL_FILE_PENDING_OP, file.getPendingOp());
}
}
bulkLoader.end();
} catch (Exception e) {
if (bulkLoader != null) {
bulkLoader.abort();
}
throw HyracksDataException.create(e);
} finally {
indexHelper.close();
}
}
@Override
public void deinitialize() throws HyracksDataException {
}
@Override
public int getInputArity() {
return 0;
}
@Override
public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) throws HyracksDataException {
}
@Override
public IFrameWriter getInputFrameWriter(int index) {
return null;
}
};
}
use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.
the class ExternalFilesIndexCreateOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
return new AbstractOperatorNodePushable() {
@Override
public void initialize() throws HyracksDataException {
IIndexBuilder indexBuilder = indexBuilderFactory.create(ctx, partition);
IIndexDataflowHelper indexHelper = dataflowHelperFactory.create(ctx, partition);
FileIndexTupleTranslator filesTupleTranslator = new FileIndexTupleTranslator();
// Build the index
indexBuilder.build();
// Open the index
indexHelper.open();
try {
IIndex index = indexHelper.getIndexInstance();
// Create bulk loader
IIndexBulkLoader bulkLoader = index.createBulkLoader(BTree.DEFAULT_FILL_FACTOR, false, files.size(), false);
// Load files
for (ExternalFile file : files) {
bulkLoader.add(filesTupleTranslator.getTupleFromFile(file));
}
bulkLoader.end();
} finally {
indexHelper.close();
}
}
@Override
public void deinitialize() throws HyracksDataException {
}
@Override
public int getInputArity() {
return 0;
}
@Override
public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) throws HyracksDataException {
}
@Override
public IFrameWriter getInputFrameWriter(int index) {
return null;
}
};
}
Aggregations