org.apache.hadoop.io.Text#encode ( )源码实例Demo

下面列出了org.apache.hadoop.io.Text#encode ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: hadoop   文件: SequenceFileInputFilter.java

/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode / frequency * frequency == hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
 
源代码2 项目: big-c   文件: SequenceFileInputFilter.java

/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode / frequency * frequency == hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
 
源代码3 项目: RDFS   文件: SequenceFileInputFilter.java

/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode/frequency*frequency==hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
 

/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode/frequency*frequency==hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
 
源代码5 项目: datawave   文件: TextUtil.java

/**
 * Appends the UTF-8 bytes of the given string to the given {@link Text}
 */
public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) {
    try {
        ByteBuffer buffer = Text.encode(s, replaceBadChar);
        t.append(buffer.array(), 0, buffer.limit());
    } catch (CharacterCodingException cce) {
        throw new IllegalArgumentException(cce);
    }
}
 
源代码6 项目: datawave   文件: TextUtil.java

/**
 * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to UTF-8 and is much faster than calling
 * {@link String#getBytes(String)}.
 *
 * @param string
 *            the string to convert
 * @return the UTF-8 representation of the string
 */
public static byte[] toUtf8(String string) {
    ByteBuffer buffer;
    try {
        buffer = Text.encode(string, false);
    } catch (CharacterCodingException cce) {
        throw new IllegalArgumentException(cce);
    }
    byte[] bytes = new byte[buffer.limit()];
    System.arraycopy(buffer.array(), 0, bytes, 0, bytes.length);
    return bytes;
}
 
源代码7 项目: rya   文件: ColumnPrefixes.java

private static Text concat(Text prefix, String str) {
	Text temp = new Text(prefix);

	try {
		ByteBuffer buffer = Text.encode(str, false);
		temp.append(buffer.array(), 0, buffer.limit());
	} catch (CharacterCodingException cce) {
		throw new IllegalArgumentException(cce);
	}

	return temp;
}
 
源代码8 项目: Hadoop-BAM   文件: QseqOutputFormat.java

public void write(Text ignored_key, SequencedFragment seq) throws IOException
{
	sBuilder.delete(0, sBuilder.length()); // clear

	sBuilder.append( seq.getInstrument() == null ? "" : seq.getInstrument() ).append(delim);
	sBuilder.append( seq.getRunNumber() == null ? "" : seq.getRunNumber().toString() ).append(delim);
	sBuilder.append( seq.getLane() == null ? "" : seq.getLane().toString() ).append(delim);
	sBuilder.append( seq.getTile() == null ? "" : seq.getTile().toString() ).append(delim);
	sBuilder.append( seq.getXpos() == null ? "" : seq.getXpos().toString() ).append(delim);
	sBuilder.append( seq.getYpos() == null ? "" : seq.getYpos().toString() ).append(delim);

	String index;
	if (seq.getIndexSequence() == null || seq.getIndexSequence().isEmpty())
		index = "0";
	else
		index = seq.getIndexSequence().replace('N', '.');
	sBuilder.append( index ).append(delim);

	sBuilder.append( seq.getRead() == null ? "" : seq.getRead().toString() ).append(delim);
	// here we also replace 'N' with '.'
	sBuilder.append( seq.getSequence() == null ? "" : seq.getSequence().toString().replace('N', '.')).append(delim);

	//////// quality may have to be re-coded
	if (seq.getQuality() == null)
		sBuilder.append("");
	else
	{
		int startPos = sBuilder.length();
		sBuilder.append(seq.getQuality().toString());
		if (baseQualityFormat == BaseQualityEncoding.Sanger)
		{
			//  do nothing
		}
		else if (baseQualityFormat == BaseQualityEncoding.Illumina)
		{
			// recode the quality in-place
			for (int i = startPos; i < sBuilder.length(); ++i)
			{
				// cast to avoid warning about possible loss of precision for assigning a char from an int.
				char newValue = (char)(sBuilder.charAt(i) + 31); // 64 - 33 = 31: difference between illumina and sanger encoding
				if (newValue > 126)
					throw new RuntimeException("output quality score over allowed range.  Maybe you meant to write in Sanger format?");
				sBuilder.setCharAt(i, newValue);
			}
		}
		else
			throw new RuntimeException("BUG!  Unknown base quality format value " + baseQualityFormat + " in QseqRecordWriter");
	}
	sBuilder.append(delim);
	/////////
	sBuilder.append((seq.getFilterPassed() == null || seq.getFilterPassed() ) ? 1 : 0);

	try {
		ByteBuffer buf = Text.encode(sBuilder.toString());
		out.write(buf.array(), 0, buf.limit());
	} catch (java.nio.charset.CharacterCodingException e) {
		throw new RuntimeException("Error encoding qseq record: " + seq);
	}
	out.write(newLine, 0, newLine.length);
}