use of edu.iu.dsc.tws.data.fs.Path in project twister2 by DSC-SPIDAL.
the class BinaryInputFormatter method createInputSplits.
/**
* Computes the input splits for the file. By default, one file block is one split. If more
* splits are requested than blocks are available, then a split may be a fraction of a block and
* splits may cross block boundaries.
*
* @param minNumSplits The minimum desired number of file splits.
* @return The computed file splits.
*/
@Override
public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException {
if (minNumSplits < 1) {
throw new IllegalArgumentException("Number of input splits has to be at least 1.");
}
// TODO L2: The current implementaion only handles a snigle binary file not a set of files
// take the desired number of splits into account
int curminNumSplits = Math.max(minNumSplits, this.numSplits);
final Path path = this.filePath;
final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(curminNumSplits);
// get all the files that are involved in the splits
List<FileStatus> files = new ArrayList<FileStatus>();
long totalLength = 0;
final FileSystem fs = path.getFileSystem();
final FileStatus pathFile = fs.getFileStatus(path);
if (pathFile.isDir()) {
totalLength += sumFilesInDir(path, files, true);
} else {
// TODO L3: implement test for unsplittable
// testForUnsplittable(pathFile);
files.add(pathFile);
totalLength += pathFile.getLen();
}
// Odd records will be divided among the first splits so the max diff would be 1 record
if (totalLength % this.recordLength != 0) {
throw new IllegalStateException("The Binary file has a incomplete record");
}
long numberOfRecords = totalLength / this.recordLength;
long minRecordsForSplit = Math.floorDiv(numberOfRecords, minNumSplits);
long oddRecords = numberOfRecords % minNumSplits;
// Generate the splits
int splitNum = 0;
for (final FileStatus file : files) {
final long len = file.getLen();
final long blockSize = file.getBlockSize();
final long minSplitSize = minRecordsForSplit * this.recordLength;
long currentSplitSize = minSplitSize;
long halfSplit = currentSplitSize >>> 1;
if (oddRecords > 0) {
currentSplitSize = currentSplitSize + this.recordLength;
oddRecords--;
}
if (len > 0) {
// get the block locations and make sure they are in order with respect to their offset
final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
Arrays.sort(blocks);
long bytesUnassigned = len;
long position = 0;
int blockIndex = 0;
while (bytesUnassigned >= currentSplitSize) {
// get the block containing the majority of the data
blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
// create a new split
FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, currentSplitSize, blocks[blockIndex].getHosts());
inputSplits.add(fis);
// adjust the positions
position += currentSplitSize;
bytesUnassigned -= currentSplitSize;
}
} else {
throw new IllegalStateException("The binary file " + file.getPath() + " is Empty");
}
}
return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
use of edu.iu.dsc.tws.data.fs.Path in project twister2 by DSC-SPIDAL.
the class SimpleTaskQueueWithMM method init.
/**
* Initialize the container
*/
public void init(Config cfg, int containerId, ResourcePlan plan) {
LOG.log(Level.INFO, "Starting the example with container id: " + plan.getThisId());
// Creates task an task executor instance to be used in this container
taskExecutor = new TaskExecutorFixedThread();
this.status = Status.INIT;
// lets create the task plan
TaskPlan taskPlan = Utils.createTaskPlan(cfg, plan);
// first get the communication config file
TWSNetwork network = new TWSNetwork(cfg, taskPlan);
TWSCommunication channel = network.getDataFlowTWSCommunication();
// we are sending messages from 0th task to 1st task
Set<Integer> sources = new HashSet<>();
sources.add(0);
int dests = 1;
Map<String, Object> newCfg = new HashMap<>();
LOG.info("Setting up reduce dataflow operation");
Path dataPath = new Path("/home/pulasthi/work/twister2/lmdbdatabase");
MemoryManager memoryManager = new LMDBMemoryManager(dataPath);
// this method calls the init method
// I think this is wrong
// TODO: Does the task genereate the communication or is it done by a controller for examples
// the direct comm between task 0 and 1 is it done by the container or the the task
// TODO: if the task creates the dataflowop does the task progress it or the executor
// TODO : FOR NOW the dataflowop is created at container and sent to task
LinkedQueue<Message> pongQueue = new LinkedQueue<Message>();
taskExecutor.registerQueue(0, pongQueue);
direct = channel.direct(newCfg, MessageType.OBJECT, 0, sources, dests, new PingPongReceive());
taskExecutor.initCommunication(channel, direct);
// Memory Manager
if (containerId == 0) {
byte[] val = Longs.toByteArray(1231212121213L);
byte[] val2 = Longs.toByteArray(22222222L);
ByteBuffer valbuf = ByteBuffer.allocateDirect(8192);
memoryManager.put(0, "temp", valbuf);
// memoryManager.put(0, "temp", val);
// memoryManager.put(0, "temp", val2);
// the map thread where data is produced
// LOG.log(Level.INFO, "Starting map thread");
// SourceTask<Object> mapTask = new MapWorker(0, direct);
// mapTask.setMemoryManager(memoryManager);
// taskExecutor.registerTask(mapTask);
// taskExecutor.submitTask(0);
// taskExecutor.progres();
} else if (containerId == 1) {
byte[] val3 = Longs.toByteArray(3333333L);
ByteBuffer val3buf = ByteBuffer.wrap(val3);
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
ByteBuffer results = memoryManager.get(0, "temp");
if (results.limit() == 8192) {
System.out.println("Correct " + results.limit());
}
ByteBuffer valbuf2 = ByteBuffer.allocateDirect(16192);
memoryManager.put(0, "temp", valbuf2);
results = memoryManager.get(0, "temp");
if (results.limit() == 16192) {
System.out.println("Correct " + results.limit());
}
ByteBuffer results2 = memoryManager.get(0, "temp");
ByteBuffer results3 = memoryManager.get(0, "temp");
if (results2 == null) {
System.out.println("Missing key is null");
}
if (results3.getLong() == 1231212121213L) {
System.out.println("Long value is correct");
}
memoryManager.append(0, "temp", val3buf);
ByteBuffer resultsappend = memoryManager.get(0, "temp");
System.out.println("Long value 1 :" + resultsappend.getLong());
System.out.println("Long value 1 :" + resultsappend.getLong());
// ArrayList<Integer> inq = new ArrayList<>();
// inq.add(0);
// taskExecutor.setTaskMessageProcessLimit(10000);
// SinkTask<Object> recTask = new RecieveWorker(1);
// recTask.setMemoryManager(memoryManager);
// taskExecutor.registerSinkTask(recTask, inq);
// taskExecutor.progres();
}
}
Aggregations