下面列出了org.apache.hadoop.io.Text#decode ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* This method will convert bytes into a string value. If not expected to be binary, then it will attempt to decode as UTF8. If that fails or is expected to
* be binary, then it will decode one byte per character.
*
* @param bytes
* @param expectBinary
* @return the value
*/
public static String decode(byte[] bytes, boolean expectBinary) {
synchronized (NormalizedFieldAndValue.class) {
if (null == mimeDecoder) {
IngestConfiguration config = IngestConfigurationFactory.getIngestConfiguration();
mimeDecoder = config.createMimeDecoder();
}
}
String value = null;
if (!expectBinary) {
try {
value = Text.decode(mimeDecoder.decode(bytes));
} catch (Exception e) {
// ok, treat as binary
}
}
if (value == null) {
try {
value = new String(bytes, "ISO8859-1");
} catch (UnsupportedEncodingException uee) {
// this should never happen, however....
throw new RuntimeException(uee);
}
}
return value;
}
/**
* @param bytes
* byte array holding the parts of the edge key
* @param bLen
* number of bytes to use (important: the byte array may be reused so its length may not be correct)
*/
private void getParts(byte[] bytes, int bLen) {
try {
int start = 0;
for (int i = 0; i < bLen; i++) {
if (pLen >= parts.length) {
throw new RuntimeException("Exceeded number of possible number of parts (" + parts.length + ")." + " bytes as String: "
+ new Text(bytes) + " parts: " + Arrays.toString(parts));
}
if (bytes[i] == COL_SEPARATOR_BYTE) {
parts[pLen++] = Text.decode(bytes, start, i - start);
start = i + 1;
}
}
parts[pLen++] = Text.decode(bytes, start, bLen - start);
} catch (CharacterCodingException e) {
throw new RuntimeException("Edge key column encoding exception", e);
}
}
public static String getYYYYMMDD(Text colQual) {
int numCharsToCheck = Math.min(DATE_LEN + 1, colQual.getLength());
int firstSlashIndex = DATE_LEN; // there may not be a slash
byte[] bytes = colQual.getBytes();
// find the first slash if it exists
for (int i = 0; i < numCharsToCheck; i++) {
if (bytes[i] == COL_SEPARATOR_BYTE) {
firstSlashIndex = i;
break;
}
}
try {
return Text.decode(colQual.getBytes(), 0, Math.min(colQual.getLength(), firstSlashIndex));
} catch (CharacterCodingException e) {
// same behavior as EdgeKey.getParts
throw new RuntimeException("Edge key column encoding exception", e);
}
}
/**
* Reads the next key/value pair from the input for processing.
*/
public boolean next(Text key, ReferenceFragment value) throws IOException
{
if (pos >= end)
return false; // past end of slice
int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
pos += bytesRead;
if (bytesRead >= MAX_LINE_LENGTH)
throw new RuntimeException("found abnormally large line (length " + bytesRead + ") at " + makePositionMessage(pos - bytesRead) + ": " + Text.decode(buffer.getBytes(), 0, 500));
else if (bytesRead <= 0)
return false; // EOF
else
{
scanFastaLine(buffer, key, value);
current_split_pos += bytesRead;
return true;
}
}
private int lowLevelQseqRead(Text key, SequencedFragment value) throws IOException
{
int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
pos += bytesRead;
if (bytesRead >= MAX_LINE_LENGTH)
{
String line;
try {
line = Text.decode(buffer.getBytes(), 0, 500);
} catch (java.nio.charset.CharacterCodingException e) {
line = "(line not convertible to printable format)";
}
throw new RuntimeException("found abnormally large line (length " + bytesRead + ") at " +
makePositionMessage(pos - bytesRead) + ": " + line);
}
else if (bytesRead > 0)
scanQseqLine(buffer, key, value);
return bytesRead;
}
public Attribute<?> getFieldValue(String fieldName, Key k) {
k.getColumnQualifier(holder);
int index = holder.find(Constants.NULL);
if (0 > index) {
throw new IllegalArgumentException("Could not find null-byte contained in columnqualifier for key: " + k);
}
try {
String data = Text.decode(holder.getBytes(), index + 1, (holder.getLength() - (index + 1)));
ColumnVisibility cv = getCV(k);
Attribute<?> attr = this.attrFactory.create(fieldName, data, k, (attrFilter == null || attrFilter.keep(k)));
if (attrFilter != null) {
attr.setToKeep(attrFilter.keep(k));
}
if (log.isTraceEnabled()) {
log.trace("Created " + attr.getClass().getName() + " for " + fieldName);
}
return attr;
} catch (CharacterCodingException e) {
throw new IllegalArgumentException(e);
}
}
public String getFieldName(Key k) {
int index = -1;
ByteSequence sequence = k.getColumnQualifierData();
byte[] arrayReference = sequence.getBackingArray();
for (int i = 0; i < sequence.length(); i++) {
if (!includeGroupingContext && arrayReference[i] == '.') {
index = i;
break;
}
if (arrayReference[i] == 0x00) {
index = i;
break;
}
}
if (0 > index) {
throw new IllegalArgumentException("Could not find null-byte contained in columnqualifier for key: " + k);
}
try {
return Text.decode(arrayReference, 0, index);
} catch (CharacterCodingException e) {
throw new IllegalArgumentException(e);
}
}
/**
* Converts a UTF-8 encoded byte array back into a String.
*
* @param bytes
* @return string
*/
public static String fromUtf8(byte[] bytes) {
try {
return Text.decode(bytes);
} catch (CharacterCodingException e) {
throw new IllegalArgumentException(e);
}
}
public static String decodeDate(Text date) throws CharacterCodingException {
return Text.decode(date.getBytes(), 0, 8);
}
@Nonnull
private static Object getObjectOfCorrespondingPrimitiveType(String s,
PrimitiveTypeInfo mapKeyType) throws IOException {
switch (Type.getPrimitiveHType(mapKeyType)) {
case INT:
return Integer.valueOf(s);
case TINYINT:
return Byte.valueOf(s);
case SMALLINT:
return Short.valueOf(s);
case BIGINT:
return Long.valueOf(s);
case BOOLEAN:
return (s.equalsIgnoreCase("true"));
case FLOAT:
return Float.valueOf(s);
case DOUBLE:
return Double.valueOf(s);
case STRING:
return s;
case BINARY:
try {
String t = Text.decode(s.getBytes(), 0, s.getBytes().length);
return t.getBytes();
} catch (CharacterCodingException e) {
throw new IOException("Error generating json binary type from object.", e);
}
case DATE:
return Date.valueOf(s);
case TIMESTAMP:
return Timestamp.valueOf(s);
case DECIMAL:
return HiveDecimal.create(s);
case VARCHAR:
return new HiveVarchar(s, ((BaseCharTypeInfo) mapKeyType).getLength());
case CHAR:
return new HiveChar(s, ((BaseCharTypeInfo) mapKeyType).getLength());
default:
throw new IOException(
"Could not convert from string to map type " + mapKeyType.getTypeName());
}
}
private Object getObjectOfCorrespondingPrimitiveType(String s, PrimitiveObjectInspector oi)
throws IOException {
PrimitiveTypeInfo typeInfo = oi.getTypeInfo();
if (writeablePrimitives) {
Converter c = ObjectInspectorConverters.getConverter(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
return c.convert(s);
}
switch (typeInfo.getPrimitiveCategory()) {
case INT:
return Integer.valueOf(s);
case BYTE:
return Byte.valueOf(s);
case SHORT:
return Short.valueOf(s);
case LONG:
return Long.valueOf(s);
case BOOLEAN:
return (s.equalsIgnoreCase("true"));
case FLOAT:
return Float.valueOf(s);
case DOUBLE:
return Double.valueOf(s);
case STRING:
return s;
case BINARY:
try {
String t = Text.decode(s.getBytes(), 0, s.getBytes().length);
return t.getBytes();
} catch (CharacterCodingException e) {
LOG.warn("Error generating json binary type from object.", e);
return null;
}
case DATE:
return Date.valueOf(s);
case TIMESTAMP:
return Timestamp.valueOf(s);
case DECIMAL:
return HiveDecimal.create(s);
case VARCHAR:
return new HiveVarchar(s, ((BaseCharTypeInfo) typeInfo).getLength());
case CHAR:
return new HiveChar(s, ((BaseCharTypeInfo) typeInfo).getLength());
default:
throw new IOException(
"Could not convert from string to " + typeInfo.getPrimitiveCategory());
}
}
/**
* Read a String as a VInt n, followed by n Bytes in Text format.
*
* @param in
* The input stream.
* @return The string
* @throws IOException
*/
public static String readString(DataInput in) throws IOException {
int length = readVInt(in);
if (length == -1) return null;
byte[] buffer = new byte[length];
in.readFully(buffer);
return Text.decode(buffer);
}
/**
* Read a String as a VInt n, followed by n Bytes in Text format.
*
* @param in
* The input stream.
* @return The string
* @throws IOException
*/
public static String readString(DataInput in) throws IOException {
int length = readVInt(in);
if (length == -1) return null;
byte[] buffer = new byte[length];
in.readFully(buffer);
return Text.decode(buffer);
}
/**
* Read a String as a VInt n, followed by n Bytes in Text format.
*
* @param in
* The input stream.
* @return The string
* @throws IOException
*/
public static String readString(DataInput in) throws IOException {
int length = readVInt(in);
if (length == -1) return null;
byte[] buffer = new byte[length];
in.readFully(buffer);
return Text.decode(buffer);
}
/**
* Read a String as a VInt n, followed by n Bytes in Text format.
*
* @param in
* The input stream.
* @return The string
* @throws IOException
*/
public static String readString(DataInput in) throws IOException {
int length = readVInt(in);
if (length == -1) return null;
byte[] buffer = new byte[length];
in.readFully(buffer);
return Text.decode(buffer);
}