下面列出了org.apache.hadoop.io.BytesWritable#set ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Test
public void testReuse()
{
BytesWritable value = new BytesWritable();
byte[] first = "hello world".getBytes(UTF_8);
value.set(first, 0, first.length);
byte[] second = "bye".getBytes(UTF_8);
value.set(second, 0, second.length);
Type type = new TypeToken<Map<BytesWritable, Long>>() {}.getType();
ObjectInspector inspector = getInspector(type);
Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L), inspector);
Block expected = mapBlockOf(createUnboundedVarcharType(), BIGINT, "bye", 0L);
assertBlockEquals(actual, expected);
}
/**
* Main logic called by hive if sketchSize is also passed in. Computes the
* hash in first sketch excluding the hash in second sketch of two sketches of
* same or different column.
*
* @param firstSketchBytes
* first sketch to be included.
* @param secondSketchBytes
* second sketch to be excluded.
* @param hashSeed
* Only required if input sketches were constructed using an update seed that was not the default.
* @return resulting sketch of exclusion.
*/
public BytesWritable evaluate(final BytesWritable firstSketchBytes,
final BytesWritable secondSketchBytes, final long hashSeed) {
Sketch firstSketch = null;
if (firstSketchBytes != null && firstSketchBytes.getLength() > 0) {
firstSketch = Sketch.wrap(BytesWritableHelper.wrapAsMemory(firstSketchBytes), hashSeed);
}
Sketch secondSketch = null;
if (secondSketchBytes != null && secondSketchBytes.getLength() > 0) {
secondSketch = Sketch.wrap(BytesWritableHelper.wrapAsMemory(secondSketchBytes), hashSeed);
}
final AnotB anotb = SetOperation.builder().setSeed(hashSeed).buildANotB();
anotb.update(firstSketch, secondSketch);
final byte[] excludeSketchBytes = anotb.getResult().toByteArray();
final BytesWritable result = new BytesWritable();
result.set(excludeSketchBytes, 0, excludeSketchBytes.length);
return result;
}
/**
*
* Read a next block.
*
* @param key is a 32 byte array (parentHash)
* @param value is a deserialized Java object of class EthereumBlock
*
* @return true if next block is available, false if not
*/
@Override
public boolean next(BytesWritable key, EthereumBlock value) throws IOException {
// read all the blocks, if necessary a block overlapping a split
while(getFilePosition()<=getEnd()) { // did we already went beyond the split (remote) or do we have no further data left?
EthereumBlock dataBlock=null;
try {
dataBlock=getEbr().readBlock();
} catch (EthereumBlockReadException e) {
LOG.error(e);
throw new RuntimeException(e.toString());
}
if (dataBlock==null) {
return false;
}
byte[] newKey=dataBlock.getEthereumBlockHeader().getParentHash();
key.set(newKey,0,newKey.length);
value.set(dataBlock);
return true;
}
return false;
}
private void fillBuffer(Random rng, BytesWritable bw, byte[] tmp, int len) {
int n = 0;
while (n < len) {
byte[] word = dictionary[rng.nextInt(dictionary.length)];
int l = Math.min(word.length, len - n);
System.arraycopy(word, 0, tmp, n, l);
n += l;
}
bw.set(tmp, 0, len);
}
public void next(BytesWritable key, BytesWritable value, boolean dupKey) {
if (dupKey) {
key.set(lastKey);
}
else {
fillKey(key);
}
fillValue(value);
}
private Object convertBinaryTypes(Object val, String javaColType) {
byte[] bb = (byte[]) val;
if (javaColType.equals(BYTESWRITABLE)) {
BytesWritable bw = new BytesWritable();
bw.set(bb, 0, bb.length);
return bw;
}
return null;
}
@Override
public synchronized boolean next(LongWritable key, BytesWritable value)
throws IOException {
boolean dataRead = reader.nextKeyValue();
if (dataRead) {
LongWritable newKey = reader.getCurrentKey();
BytesWritable newValue = reader.getCurrentValue();
key.set(newKey.get());
value.set(newValue);
}
return dataRead;
}
/**
*
* Read a next block.
*
* @param key is a 64 byte array (hashMerkleRoot and prevHashBlock)
* @param value is a deserialized Java object of class BitcoinBlock
*
* @return true if next block is available, false if not
*/
@Override
public boolean next(BytesWritable key, BitcoinBlock value) throws IOException {
// read all the blocks, if necessary a block overlapping a split
while(getFilePosition()<=getEnd()) { // did we already went beyond the split (remote) or do we have no further data left?
BitcoinBlock dataBlock=null;
try {
dataBlock=getBbr().readBlock();
} catch (BitcoinBlockReadException e) {
// log
LOG.error(e);
}
if (dataBlock==null) {
return false;
}
byte[] hashMerkleRoot=dataBlock.getHashMerkleRoot();
byte[] hashPrevBlock=dataBlock.getHashPrevBlock();
byte[] newKey=new byte[hashMerkleRoot.length+hashPrevBlock.length];
for (int i=0;i<hashMerkleRoot.length;i++) {
newKey[i]=hashMerkleRoot[i];
}
for (int j=0;j<hashPrevBlock.length;j++) {
newKey[j+hashMerkleRoot.length]=hashPrevBlock[j];
}
key.set(newKey,0,newKey.length);
value.set(dataBlock);
return true;
}
return false;
}
public void next(BytesWritable key, BytesWritable value, boolean dupKey) {
if (dupKey) {
key.set(lastKey);
}
else {
fillKey(key);
}
fillValue(value);
}
private void timeWrite(Path path, KVAppendable appendable, int baseKlen,
int baseVlen, long fileSize) throws IOException {
int maxKlen = baseKlen * 2;
int maxVlen = baseVlen * 2;
BytesWritable key = new BytesWritable();
BytesWritable value = new BytesWritable();
byte[] keyBuffer = new byte[maxKlen];
byte[] valueBuffer = new byte[maxVlen];
Random rng = new Random(options.seed);
long totalBytes = 0;
printlnWithTimestamp("Start writing: " + path.getName() + "...");
startTime();
for (long i = 0; true; ++i) {
if (i % 1000 == 0) { // test the size for every 1000 rows.
if (fs.getFileStatus(path).getLen() >= fileSize) {
break;
}
}
int klen = rng.nextInt(baseKlen) + baseKlen;
int vlen = rng.nextInt(baseVlen) + baseVlen;
fillBuffer(rng, key, keyBuffer, klen);
fillBuffer(rng, value, valueBuffer, vlen);
key.set(keyBuffer, 0, klen);
value.set(valueBuffer, 0, vlen);
appendable.append(key, value);
totalBytes += klen;
totalBytes += vlen;
}
stopTime();
appendable.close();
reportStats(path, totalBytes);
}
public void next(BytesWritable key, BytesWritable value, boolean dupKey) {
if (dupKey) {
key.set(lastKey);
}
else {
fillKey(key);
}
fillValue(value);
}
private void fillBuffer(Random rng, BytesWritable bw, byte[] tmp, int len) {
int n = 0;
while (n < len) {
byte[] word = dictionary[rng.nextInt(dictionary.length)];
int l = Math.min(word.length, len - n);
System.arraycopy(word, 0, tmp, n, l);
n += l;
}
bw.set(tmp, 0, len);
}
@Override
public boolean next(BytesWritable key, MapWritable value) throws IOException {
if (!nextKeyValue()) {
return false;
}
key.set(getCurrentKey());
value.clear();
value.putAll(getCurrentValue());
return true;
}
/**
*
* Read a next block.
*
* @param key is a 68 byte array (hashMerkleRoot, prevHashBlock, transActionCounter)
* @param value is a deserialized Java object of class BitcoinBlock
*
* @return true if next block is available, false if not
*/
@Override
public boolean next(BytesWritable key, BitcoinTransaction value) throws IOException {
// read all the blocks, if necessary a block overlapping a split
while(getFilePosition()<=getEnd()) { // did we already went beyond the split (remote) or do we have no further data left?
if ((currentBitcoinBlock==null) || (currentBitcoinBlock.getTransactions().size()==currentTransactionCounterInBlock)){
try {
currentBitcoinBlock=getBbr().readBlock();
currentTransactionCounterInBlock=0;
} catch (BitcoinBlockReadException e) {
// log
LOG.error(e);
}
}
if (currentBitcoinBlock==null) {
return false;
}
BitcoinTransaction currentTransaction=currentBitcoinBlock.getTransactions().get(currentTransactionCounterInBlock);
// the unique identifier that is linked in other transaction is usually its hash
byte[] newKey = BitcoinUtil.getTransactionHash(currentTransaction);
key.set(newKey,0,newKey.length);
value.set(currentTransaction);
currentTransactionCounterInBlock++;
return true;
}
return false;
}
@Override
public BytesWritable convert( ValueMetaInterface meta, Object obj ) throws TypeConversionException {
try {
BytesWritable result = new BytesWritable();
byte[] binary = meta.getBinary( obj );
result.set( binary, 0, binary.length );
return result;
} catch ( Exception ex ) {
throw new TypeConversionException( BaseMessages
.getString( TypeConverterFactory.class, "ErrorConverting", BytesWritable.class.getSimpleName(), obj ), ex );
}
}
private void fillBuffer(Random rng, BytesWritable bw, byte[] tmp, int len) {
int n = 0;
while (n < len) {
byte[] word = dictionary[rng.nextInt(dictionary.length)];
int l = Math.min(word.length, len - n);
System.arraycopy(word, 0, tmp, n, l);
n += l;
}
bw.set(tmp, 0, len);
}
private void timeWrite(Path path, KVAppendable appendable, int baseKlen,
int baseVlen, long fileSize) throws IOException {
int maxKlen = baseKlen * 2;
int maxVlen = baseVlen * 2;
BytesWritable key = new BytesWritable();
BytesWritable value = new BytesWritable();
byte[] keyBuffer = new byte[maxKlen];
byte[] valueBuffer = new byte[maxVlen];
Random rng = new Random(options.seed);
long totalBytes = 0;
printlnWithTimestamp("Start writing: " + path.getName() + "...");
startTime();
for (long i = 0; true; ++i) {
if (i % 1000 == 0) { // test the size for every 1000 rows.
if (fs.getFileStatus(path).getLen() >= fileSize) {
break;
}
}
int klen = rng.nextInt(baseKlen) + baseKlen;
int vlen = rng.nextInt(baseVlen) + baseVlen;
fillBuffer(rng, key, keyBuffer, klen);
fillBuffer(rng, value, valueBuffer, vlen);
key.set(keyBuffer, 0, klen);
value.set(valueBuffer, 0, vlen);
appendable.append(key, value);
totalBytes += klen;
totalBytes += vlen;
}
stopTime();
appendable.close();
reportStats(path, totalBytes);
}
public void dump(String path) throws IOException {
if (null == this.directoryClient) {
return;
}
long nano = System.nanoTime();
int gts = 0;
long chunks = 0;
long bytes = 0L;
long datapoints = 0;
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
BytesWritable key = new BytesWritable();
BytesWritable value = new BytesWritable();
CompressionCodec Codec = new DefaultCodec();
SequenceFile.Writer writer = null;
SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path));
SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass());
SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass());
SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec);
writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom);
TSerializer serializer = new TSerializer(new TCompactProtocol.Factory());
System.out.println("Dumping memory to '" + path + "'.");
try {
for (Entry<BigInteger,InMemoryChunkSet> entry: this.series.entrySet()) {
gts++;
Metadata metadata = this.directoryClient.getMetadataById(entry.getKey());
List<GTSDecoder> decoders = entry.getValue().getDecoders();
//GTSEncoder encoder = entry.getValue().fetchEncoder(now, this.chunkcount * this.chunkspan);
for (GTSDecoder decoder: decoders) {
chunks++;
GTSWrapper wrapper = new GTSWrapper();
wrapper.setMetadata(metadata);
wrapper.setBase(decoder.getBaseTimestamp());
wrapper.setCount(decoder.getCount());
datapoints += wrapper.getCount();
byte[] data = serializer.serialize(wrapper);
key.set(data, 0, data.length);
ByteBuffer bb = decoder.getBuffer();
ByteBuffer rwbb = ByteBuffer.allocate(bb.remaining());
rwbb.put(bb);
rwbb.rewind();
value.set(rwbb.array(), rwbb.arrayOffset(), rwbb.remaining());
bytes += key.getLength() + value.getLength();
writer.append(key, value);
}
}
} catch (IOException ioe) {
ioe.printStackTrace();
throw ioe;
} catch (Exception e) {
e.printStackTrace();
throw new IOException(e);
}
writer.close();
nano = System.nanoTime() - nano;
System.out.println("Dumped " + gts + " GTS (" + chunks + " chunks, " + datapoints + " datapoints, " + bytes + " bytes) in " + (nano / 1000000.0D) + " ms.");
}
public void dump(String path) throws IOException {
long nano = System.nanoTime();
int gts = 0;
long bytes = 0L;
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
BytesWritable key = new BytesWritable();
BytesWritable value = new BytesWritable();
CompressionCodec Codec = new DefaultCodec();
SequenceFile.Writer writer = null;
SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path));
SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass());
SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass());
SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec);
writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom);
TSerializer serializer = new TSerializer(new TCompactProtocol.Factory());
try {
for (Entry<BigInteger,GTSEncoder> entry: this.series.entrySet()) {
gts++;
Metadata metadata = this.directoryClient.getMetadataById(entry.getKey());
GTSWrapper wrapper = new GTSWrapper();
wrapper.setMetadata(metadata);
GTSEncoder encoder = entry.getValue();
wrapper.setBase(encoder.getBaseTimestamp());
wrapper.setCount(encoder.getCount());
byte[] data = serializer.serialize(wrapper);
key.set(data, 0, data.length);
data = encoder.getBytes();
value.set(data, 0, data.length);
bytes += key.getLength() + value.getLength();
writer.append(key, value);
}
/*
for (Entry<BigInteger,Metadata> entry: this.metadatas.entrySet()) {
gts++;
byte[] data = serializer.serialize(entry.getValue());
key.set(data, 0, data.length);
GTSEncoder encoder = this.series.get(entry.getKey());
data = encoder.getBytes();
value.set(data, 0, data.length);
bytes += key.getLength() + value.getLength();
writer.append(key, value);
}
*/
} catch (IOException ioe) {
ioe.printStackTrace();
throw ioe;
} catch (Exception e) {
e.printStackTrace();
throw new IOException(e);
}
writer.close();
nano = System.nanoTime() - nano;
System.out.println("Dumped " + gts + " GTS (" + bytes + " bytes) in " + (nano / 1000000.0D) + " ms.");
}
private static String textifyBytes(Text t) {
BytesWritable b = new BytesWritable();
b.set(t.getBytes(), 0, t.getLength());
return b.toString();
}