下面列出了怎么用org.apache.hadoop.util.QuickSort的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Find the split points for a given sample. The sample keys are sorted
* and down sampled to find even split points for the partitions. The
* returned keys should be the start of their respective partitions.
* @param numPartitions the desired number of partitions
* @return an array of size numPartitions - 1 that holds the split points
*/
Text[] createPartitions(int numPartitions) {
int numRecords = records.size();
System.out.println("Making " + numPartitions + " from " + numRecords +
" sampled records");
if (numPartitions > numRecords) {
throw new IllegalArgumentException
("Requested more partitions than input keys (" + numPartitions +
" > " + numRecords + ")");
}
new QuickSort().sort(this, 0, records.size());
float stepSize = numRecords / (float) numPartitions;
Text[] result = new Text[numPartitions-1];
for(int i=1; i < numPartitions; ++i) {
result[i-1] = records.get(Math.round(stepSize * i));
}
return result;
}
/**
* Find the split points for a given sample. The sample keys are sorted
* and down sampled to find even split points for the partitions. The
* returned keys should be the start of their respective partitions.
* @param numPartitions the desired number of partitions
* @return an array of size numPartitions - 1 that holds the split points
*/
Text[] createPartitions(int numPartitions) {
int numRecords = records.size();
System.out.println("Making " + numPartitions + " from " + numRecords +
" sampled records");
if (numPartitions > numRecords) {
throw new IllegalArgumentException
("Requested more partitions than input keys (" + numPartitions +
" > " + numRecords + ")");
}
new QuickSort().sort(this, 0, records.size());
float stepSize = numRecords / (float) numPartitions;
Text[] result = new Text[numPartitions-1];
for(int i=1; i < numPartitions; ++i) {
result[i-1] = records.get(Math.round(stepSize * i));
}
return result;
}
@Override
public SelectionVector4 getFinalSort(BufferAllocator allocator, int targetBatchSize){
Stopwatch watch = Stopwatch.createStarted();
intVector.setValueCount(totalCount);
QuickSort qs = new QuickSort();
if (totalCount > 0) {
qs.sort(this, 0, totalCount);
}
SelectionVector4 finalSortedSV4 = new SelectionVector4(allocator.buffer(totalCount * 4), totalCount, targetBatchSize);
for (int i = 0; i < totalCount; i++) {
finalSortedSV4.set(i, intVector.get(i));
}
logger.debug("Took {} us to final sort {} records in {} batches",
watch.elapsed(TimeUnit.MICROSECONDS), totalCount, hyperBatch.size());
return finalSortedSV4;
}
/**
* Find the split points for a given sample. The sample keys are sorted
* and down sampled to find even split points for the partitions. The
* returned keys should be the start of their respective partitions.
* @param numPartitions the desired number of partitions
* @return an array of size numPartitions - 1 that holds the split points
*/
Text[] createPartitions(int numPartitions) {
int numRecords = records.size();
System.out.println("Making " + numPartitions + " from " + numRecords +
" sampled records");
if (numPartitions > numRecords) {
throw new IllegalArgumentException
("Requested more partitions than input keys (" + numPartitions +
" > " + numRecords + ")");
}
new QuickSort().sort(this, 0, records.size());
float stepSize = numRecords / (float) numPartitions;
Text[] result = new Text[numPartitions-1];
for(int i=1; i < numPartitions; ++i) {
result[i-1] = records.get(Math.round(stepSize * i));
}
return result;
}
/**
* Find the split points for a given sample. The sample keys are sorted
* and down sampled to find even split points for the partitions. The
* returned keys should be the start of their respective partitions.
* @param numPartitions the desired number of partitions
* @return an array of size numPartitions - 1 that holds the split points
*/
Text[] createPartitions(int numPartitions) {
int numRecords = records.size();
System.out.println("Making " + numPartitions + " from " + numRecords +
" records");
if (numPartitions > numRecords) {
throw new IllegalArgumentException
("Requested more partitions than input keys (" + numPartitions +
" > " + numRecords + ")");
}
new QuickSort().sort(this, 0, records.size());
float stepSize = numRecords / (float) numPartitions;
System.out.println("Step size is " + stepSize);
Text[] result = new Text[numPartitions-1];
for(int i=1; i < numPartitions; ++i) {
result[i-1] = records.get(Math.round(stepSize * i));
}
return result;
}
/**
* Find the split points for a given sample. The sample keys are sorted
* and down sampled to find even split points for the partitions. The
* returned keys should be the start of their respective partitions.
* @param numPartitions the desired number of partitions
* @return an array of size numPartitions - 1 that holds the split points
*/
Text[] createPartitions(int numPartitions) {
int numRecords = records.size();
System.out.println("Making " + numPartitions + " from " + numRecords +
" sampled records");
if (numPartitions > numRecords) {
throw new IllegalArgumentException
("Requested more partitions than input keys (" + numPartitions +
" > " + numRecords + ")");
}
new QuickSort().sort(this, 0, records.size());
float stepSize = numRecords / (float) numPartitions;
Text[] result = new Text[numPartitions-1];
for(int i=1; i < numPartitions; ++i) {
result[i-1] = records.get(Math.round(stepSize * i));
}
return result;
}
/**
* Find the split points for a given sample. The sample keys are sorted
* and down sampled to find even split points for the partitions. The
* returned keys should be the start of their respective partitions.
*
* @param numPartitions the desired number of partitions
* @return an array of size numPartitions - 1 that holds the split
* points
*/
Text[] createPartitions(int numPartitions) {
int numRecords = records.size();
System.out.println("Making " + numPartitions + " from " + numRecords
+ " records");
if (numPartitions > numRecords) {
throw new IllegalArgumentException("Requested more partitions than input keys (" + numPartitions
+ " > " + numRecords + ")");
}
new QuickSort().sort(this, 0, records.size());
float stepSize = numRecords / (float) numPartitions;
System.out.println("Step size is " + stepSize);
Text[] result = new Text[numPartitions - 1];
for (int i = 1; i < numPartitions; ++i) {
result[i - 1] = records.get(Math.round(stepSize * i));
}
return result;
}
/**
* Find the split points for a given sample. The sample keys are sorted
* and down sampled to find even split points for the partitions. The
* returned keys should be the start of their respective partitions.
* @param numPartitions the desired number of partitions
* @return an array of size numPartitions - 1 that holds the split points
*/
Text[] createPartitions(int numPartitions) {
int numRecords = records.size();
System.out.println("Making " + numPartitions + " from " + numRecords +
" records");
if (numPartitions > numRecords) {
throw new IllegalArgumentException
("Requested more partitions than input keys (" + numPartitions +
" > " + numRecords + ")");
}
new QuickSort().sort(this, 0, records.size());
float stepSize = numRecords / (float) numPartitions;
System.out.println("Step size is " + stepSize);
Text[] result = new Text[numPartitions-1];
for(int i=1; i < numPartitions; ++i) {
result[i-1] = records.get(Math.round(stepSize * i));
}
return result;
}
@Override
public void sort(SelectionVector4 vector4, VectorContainer container){
Stopwatch watch = Stopwatch.createStarted();
QuickSort qs = new QuickSort();
qs.sort(this, 0, vector4.getTotalCount());
logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector4.getTotalCount());
}
@Override
public void sort(SelectionVector2 vector2){
QuickSort qs = new QuickSort();
Stopwatch watch = Stopwatch.createStarted();
if (vector2.getCount() > 0) {
qs.sort(this, 0, vector2.getCount());
}
logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector2.getCount());
}
@Override
public void sort(SelectionVector2 vector2){
QuickSort qs = new QuickSort();
Stopwatch watch = Stopwatch.createStarted();
if (vector2.getCount() > 0) {
qs.sort(this, 0, vector2.getCount());
}
logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector2.getCount());
}
public long doSort(){
QuickSort qs = new QuickSort();
ByteSortable b = new ByteSortable();
long nano = System.nanoTime();
qs.sort(b, 0, RECORD_COUNT);
return System.nanoTime() - nano;
}
@Override
public Iterable<Tuple> sort() {
new QuickSort().sort(this, 0, mappings.length);
return new Iterable<Tuple>() {
@Override
public Iterator<Tuple> iterator() {
return new Iterator<Tuple>() {
int index;
public boolean hasNext() { return index < mappings.length; }
public Tuple next() { return tuples[mappings[index++]]; }
public void remove() { throw new TajoRuntimeException(new UnsupportedException()); }
};
}
};
}
protected void sortMemBlock(MemoryBlock memBlock) {
if (memBlock.currentPtr <= 0) {
return;
}
// quick sort the offsets
OffsetSortable sortableObj = new OffsetSortable(memBlock, kvbuffer);
QuickSort quickSort = new QuickSort();
quickSort.sort(sortableObj, 0, memBlock.currentPtr);
}
@Override
public void sort(SelectionVector2 vector2, VectorContainer container){
QuickSort qs = new QuickSort();
qs.sort(this, 0, vector2.getCount());
}
private void buildTable() throws IOException
{
QuickSort quickSort = new QuickSort();
long start, end;
/* Sort the offsets array */
start = System.currentTimeMillis();
if (offsetArr.length > 1)
{
quickSort.sort(sortable, 0, offsetArr.length);
}
end = System.currentTimeMillis();
print.f("LookUpTable: Sorted %d entries in %d ms", offsetArr.length, (end - start));
/* Fill in the HashCode array */
start = System.currentTimeMillis();
int prevHashCode = -1;
Tuple prevTuple = newTuple();
Tuple t = newTuple();
for (int i = 0; i < offsetArr.length; ++i)
{
t = store.getTuple(offsetArr[i], t);
int hashCode = tupleHashCode(t);
if (prevHashCode != hashCode)
{
hashCodeArr[hashCode] = i;
prevHashCode = hashCode;
}
if (i == 0 || !compareKeys(prevTuple, t))
{
offsetArr[i] = offsetArr[i] | SIGNBIT;
}
/* Object Reuse: Swap the tuples instead of creating new ones */
Tuple temp = t;
t = prevTuple;
prevTuple = temp;
}
end = System.currentTimeMillis();
print.f("LookUpTable: Created HashCode Array for %d entries in %d ms", offsetArr.length, (end - start));
}
@SuppressWarnings("unchecked")
public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
TaskReporter reporter
) throws IOException, ClassNotFoundException {
this.job = job;
this.reporter = reporter;
localFs = FileSystem.getLocal(job);
partitions = job.getNumReduceTasks();
rfs = ((LocalFileSystem)localFs).getRaw();
indexCacheList = new ArrayList<SpillRecord>();
spillSortCounters = new MapSpillSortCounters(reporter);
//sanity checks
final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8);
final float recper = job.getFloat("io.sort.record.percent",(float)0.05);
boolean localMode = job.get("mapred.job.tracker", "local").equals("local");
int sortmb = job.getInt("io.sort.mb", 100);
if (localMode) {
sortmb = job.getInt("io.sort.mb.localmode", 100);
}
if (spillper > (float)1.0 || spillper < (float)0.0) {
throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
}
if (recper > (float)1.0 || recper < (float)0.01) {
throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
}
if ((sortmb & 0x7FF) != sortmb) {
throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
}
sorter = ReflectionUtils.newInstance(
job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job);
LOG.info("io.sort.mb = " + sortmb);
// buffers and accounting
int maxMemUsage = sortmb << 20;
int recordCapacity = (int)(maxMemUsage * recper);
recordCapacity -= recordCapacity % RECSIZE;
kvbuffer = new byte[maxMemUsage - recordCapacity];
bufvoid = kvbuffer.length;
recordCapacity /= RECSIZE;
kvoffsets = new int[recordCapacity];
kvindices = new int[recordCapacity * ACCTSIZE];
softBufferLimit = (int)(kvbuffer.length * spillper);
softRecordLimit = (int)(kvoffsets.length * spillper);
LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
// k/v serialization
comparator = job.getOutputKeyComparator();
keyClass = (Class<K>)job.getMapOutputKeyClass();
valClass = (Class<V>)job.getMapOutputValueClass();
serializationFactory = new SerializationFactory(job);
keySerializer = serializationFactory.getSerializer(keyClass);
keySerializer.open(bb);
valSerializer = serializationFactory.getSerializer(valClass);
valSerializer.open(bb);
// counters
mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
Counters.Counter combineInputCounter =
reporter.getCounter(COMBINE_INPUT_RECORDS);
combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
// compression
if (job.getCompressMapOutput()) {
Class<? extends CompressionCodec> codecClass =
job.getMapOutputCompressorClass(DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job);
}
// combiner
combinerRunner = CombinerRunner.create(job, getTaskID(),
combineInputCounter,
reporter, null);
if (combinerRunner != null) {
combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter);
} else {
combineCollector = null;
}
minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
spillThread.setDaemon(true);
spillThread.setName("SpillThread");
spillLock.lock();
try {
spillThread.start();
while (!spillThreadRunning) {
spillDone.await();
}
} catch (InterruptedException e) {
throw (IOException)new IOException("Spill thread failed to initialize"
).initCause(sortSpillException);
} finally {
spillLock.unlock();
}
if (sortSpillException != null) {
throw (IOException)new IOException("Spill thread failed to initialize"
).initCause(sortSpillException);
}
}
public ExternalSorter(TezOutputContext outputContext, Configuration conf, int numOutputs,
long initialMemoryAvailable) throws IOException {
this.outputContext = outputContext;
this.conf = conf;
this.partitions = numOutputs;
rfs = ((LocalFileSystem)FileSystem.getLocal(this.conf)).getRaw();
int assignedMb = (int) (initialMemoryAvailable >> 20);
if (assignedMb <= 0) {
if (initialMemoryAvailable > 0) { // Rounded down to 0MB - may be > 0 && < 1MB
this.availableMemoryMb = 1;
LOG.warn("initialAvailableMemory: " + initialMemoryAvailable
+ " is too low. Rounding to 1 MB");
} else {
throw new RuntimeException("InitialMemoryAssigned is <= 0: " + initialMemoryAvailable);
}
} else {
this.availableMemoryMb = assignedMb;
}
// sorter
sorter = ReflectionUtils.newInstance(this.conf.getClass(
TezJobConfig.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, QuickSort.class,
IndexedSorter.class), this.conf);
comparator = ConfigUtils.getIntermediateOutputKeyComparator(this.conf);
// k/v serialization
keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);
serializationFactory = new SerializationFactory(this.conf);
keySerializer = serializationFactory.getSerializer(keyClass);
valSerializer = serializationFactory.getSerializer(valClass);
// counters
mapOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES);
mapOutputRecordCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
outputBytesWithOverheadCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
fileOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
spilledRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
additionalSpillBytesWritten = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
additionalSpillBytesRead = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
numAdditionalSpills = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
// compression
if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
Class<? extends CompressionCodec> codecClass =
ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, this.conf);
} else {
codec = null;
}
this.ifileReadAhead = this.conf.getBoolean(
TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
if (this.ifileReadAhead) {
this.ifileReadAheadLength = conf.getInt(
TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
} else {
this.ifileReadAheadLength = 0;
}
this.ifileBufferSize = conf.getInt("io.file.buffer.size",
TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);
// Task outputs
mapOutputFile = TezRuntimeUtils.instantiateTaskOutputManager(conf, outputContext);
LOG.info("Instantiating Partitioner: [" + conf.get(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS) + "]");
this.conf.setInt(TezRuntimeFrameworkConfigs.TEZ_RUNTIME_NUM_EXPECTED_PARTITIONS, this.partitions);
this.partitioner = TezRuntimeUtils.instantiatePartitioner(this.conf);
this.combiner = TezRuntimeUtils.instantiateCombiner(this.conf, outputContext);
}
@SuppressWarnings("unchecked")
public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
TaskReporter reporter
) throws IOException, ClassNotFoundException {
this.job = job;
this.reporter = reporter;
localFs = FileSystem.getLocal(job);
partitions = job.getNumReduceTasks();
rfs = ((LocalFileSystem)localFs).getRaw();
indexCacheList = new ArrayList<SpillRecord>();
//sanity checks
final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8);
final float recper = job.getFloat("io.sort.record.percent",(float)0.05);
final int sortmb = job.getInt("io.sort.mb", 100);
if (spillper > (float)1.0 || spillper < (float)0.0) {
throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
}
if (recper > (float)1.0 || recper < (float)0.01) {
throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
}
if ((sortmb & 0x7FF) != sortmb) {
throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
}
sorter = ReflectionUtils.newInstance(
job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job);
LOG.info("io.sort.mb = " + sortmb);
// buffers and accounting
int maxMemUsage = sortmb << 20;
int recordCapacity = (int)(maxMemUsage * recper);
recordCapacity -= recordCapacity % RECSIZE;
kvbuffer = new byte[maxMemUsage - recordCapacity];
bufvoid = kvbuffer.length;
recordCapacity /= RECSIZE;
kvoffsets = new int[recordCapacity];
kvindices = new int[recordCapacity * ACCTSIZE];
softBufferLimit = (int)(kvbuffer.length * spillper);
softRecordLimit = (int)(kvoffsets.length * spillper);
LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
// k/v serialization
comparator = job.getOutputKeyComparator();
keyClass = (Class<K>)job.getMapOutputKeyClass();
valClass = (Class<V>)job.getMapOutputValueClass();
serializationFactory = new SerializationFactory(job);
keySerializer = serializationFactory.getSerializer(keyClass);
keySerializer.open(bb);
valSerializer = serializationFactory.getSerializer(valClass);
valSerializer.open(bb);
// counters
mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
Counters.Counter combineInputCounter =
reporter.getCounter(COMBINE_INPUT_RECORDS);
combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
// compression
if (job.getCompressMapOutput()) {
Class<? extends CompressionCodec> codecClass =
job.getMapOutputCompressorClass(DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job);
}
// combiner
combinerRunner = CombinerRunner.create(job, getTaskID(),
combineInputCounter,
reporter, null);
if (combinerRunner != null) {
combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter);
} else {
combineCollector = null;
}
minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
spillThread.setDaemon(true);
spillThread.setName("SpillThread");
spillLock.lock();
try {
spillThread.start();
while (!spillThreadRunning) {
spillDone.await();
}
} catch (InterruptedException e) {
throw (IOException)new IOException("Spill thread failed to initialize"
).initCause(sortSpillException);
} finally {
spillLock.unlock();
}
if (sortSpillException != null) {
throw (IOException)new IOException("Spill thread failed to initialize"
).initCause(sortSpillException);
}
}