下面列出了org.apache.hadoop.io.Text#encode ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/** Filtering method
* If MD5(key) % frequency==0, return true; otherwise return false
* @see Filter#accept(Object)
*/
public boolean accept(Object key) {
try {
long hashcode;
if (key instanceof Text) {
hashcode = MD5Hashcode((Text)key);
} else if (key instanceof BytesWritable) {
hashcode = MD5Hashcode((BytesWritable)key);
} else {
ByteBuffer bb;
bb = Text.encode(key.toString());
hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
}
if (hashcode / frequency * frequency == hashcode)
return true;
} catch(Exception e) {
LOG.warn(e);
throw new RuntimeException(e);
}
return false;
}
/** Filtering method
* If MD5(key) % frequency==0, return true; otherwise return false
* @see Filter#accept(Object)
*/
public boolean accept(Object key) {
try {
long hashcode;
if (key instanceof Text) {
hashcode = MD5Hashcode((Text)key);
} else if (key instanceof BytesWritable) {
hashcode = MD5Hashcode((BytesWritable)key);
} else {
ByteBuffer bb;
bb = Text.encode(key.toString());
hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
}
if (hashcode / frequency * frequency == hashcode)
return true;
} catch(Exception e) {
LOG.warn(e);
throw new RuntimeException(e);
}
return false;
}
/** Filtering method
* If MD5(key) % frequency==0, return true; otherwise return false
* @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)
*/
public boolean accept(Object key) {
try {
long hashcode;
if (key instanceof Text) {
hashcode = MD5Hashcode((Text)key);
} else if (key instanceof BytesWritable) {
hashcode = MD5Hashcode((BytesWritable)key);
} else {
ByteBuffer bb;
bb = Text.encode(key.toString());
hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
}
if (hashcode/frequency*frequency==hashcode)
return true;
} catch(Exception e) {
LOG.warn(e);
throw new RuntimeException(e);
}
return false;
}
/** Filtering method
* If MD5(key) % frequency==0, return true; otherwise return false
* @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)
*/
public boolean accept(Object key) {
try {
long hashcode;
if (key instanceof Text) {
hashcode = MD5Hashcode((Text)key);
} else if (key instanceof BytesWritable) {
hashcode = MD5Hashcode((BytesWritable)key);
} else {
ByteBuffer bb;
bb = Text.encode(key.toString());
hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
}
if (hashcode/frequency*frequency==hashcode)
return true;
} catch(Exception e) {
LOG.warn(e);
throw new RuntimeException(e);
}
return false;
}
/**
* Appends the UTF-8 bytes of the given string to the given {@link Text}
*/
public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) {
try {
ByteBuffer buffer = Text.encode(s, replaceBadChar);
t.append(buffer.array(), 0, buffer.limit());
} catch (CharacterCodingException cce) {
throw new IllegalArgumentException(cce);
}
}
/**
* Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to UTF-8 and is much faster than calling
* {@link String#getBytes(String)}.
*
* @param string
* the string to convert
* @return the UTF-8 representation of the string
*/
public static byte[] toUtf8(String string) {
ByteBuffer buffer;
try {
buffer = Text.encode(string, false);
} catch (CharacterCodingException cce) {
throw new IllegalArgumentException(cce);
}
byte[] bytes = new byte[buffer.limit()];
System.arraycopy(buffer.array(), 0, bytes, 0, bytes.length);
return bytes;
}
private static Text concat(Text prefix, String str) {
Text temp = new Text(prefix);
try {
ByteBuffer buffer = Text.encode(str, false);
temp.append(buffer.array(), 0, buffer.limit());
} catch (CharacterCodingException cce) {
throw new IllegalArgumentException(cce);
}
return temp;
}
public void write(Text ignored_key, SequencedFragment seq) throws IOException
{
sBuilder.delete(0, sBuilder.length()); // clear
sBuilder.append( seq.getInstrument() == null ? "" : seq.getInstrument() ).append(delim);
sBuilder.append( seq.getRunNumber() == null ? "" : seq.getRunNumber().toString() ).append(delim);
sBuilder.append( seq.getLane() == null ? "" : seq.getLane().toString() ).append(delim);
sBuilder.append( seq.getTile() == null ? "" : seq.getTile().toString() ).append(delim);
sBuilder.append( seq.getXpos() == null ? "" : seq.getXpos().toString() ).append(delim);
sBuilder.append( seq.getYpos() == null ? "" : seq.getYpos().toString() ).append(delim);
String index;
if (seq.getIndexSequence() == null || seq.getIndexSequence().isEmpty())
index = "0";
else
index = seq.getIndexSequence().replace('N', '.');
sBuilder.append( index ).append(delim);
sBuilder.append( seq.getRead() == null ? "" : seq.getRead().toString() ).append(delim);
// here we also replace 'N' with '.'
sBuilder.append( seq.getSequence() == null ? "" : seq.getSequence().toString().replace('N', '.')).append(delim);
//////// quality may have to be re-coded
if (seq.getQuality() == null)
sBuilder.append("");
else
{
int startPos = sBuilder.length();
sBuilder.append(seq.getQuality().toString());
if (baseQualityFormat == BaseQualityEncoding.Sanger)
{
// do nothing
}
else if (baseQualityFormat == BaseQualityEncoding.Illumina)
{
// recode the quality in-place
for (int i = startPos; i < sBuilder.length(); ++i)
{
// cast to avoid warning about possible loss of precision for assigning a char from an int.
char newValue = (char)(sBuilder.charAt(i) + 31); // 64 - 33 = 31: difference between illumina and sanger encoding
if (newValue > 126)
throw new RuntimeException("output quality score over allowed range. Maybe you meant to write in Sanger format?");
sBuilder.setCharAt(i, newValue);
}
}
else
throw new RuntimeException("BUG! Unknown base quality format value " + baseQualityFormat + " in QseqRecordWriter");
}
sBuilder.append(delim);
/////////
sBuilder.append((seq.getFilterPassed() == null || seq.getFilterPassed() ) ? 1 : 0);
try {
ByteBuffer buf = Text.encode(sBuilder.toString());
out.write(buf.array(), 0, buf.limit());
} catch (java.nio.charset.CharacterCodingException e) {
throw new RuntimeException("Error encoding qseq record: " + seq);
}
out.write(newLine, 0, newLine.length);
}