org.apache.hadoop.io.Text#decode ( )源码实例Demo

下面列出了org.apache.hadoop.io.Text#decode ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。


/**
 * This method will convert bytes into a string value. If not expected to be binary, then it will attempt to decode as UTF8. If that fails or is expected to
 * be binary, then it will decode one byte per character.
 * 
 * @param bytes
 * @param expectBinary
 * @return the value
 */
public static String decode(byte[] bytes, boolean expectBinary) {
    synchronized (NormalizedFieldAndValue.class) {
        if (null == mimeDecoder) {
            IngestConfiguration config = IngestConfigurationFactory.getIngestConfiguration();
            mimeDecoder = config.createMimeDecoder();
        }
    }
    String value = null;
    if (!expectBinary) {
        try {
            value = Text.decode(mimeDecoder.decode(bytes));
        } catch (Exception e) {
            // ok, treat as binary
        }
    }
    if (value == null) {
        try {
            value = new String(bytes, "ISO8859-1");
        } catch (UnsupportedEncodingException uee) {
            // this should never happen, however....
            throw new RuntimeException(uee);
        }
    }
    return value;
}
 
源代码2 项目: datawave   文件: EdgeKey.java

/**
 * @param bytes
 *            byte array holding the parts of the edge key
 * @param bLen
 *            number of bytes to use (important: the byte array may be reused so its length may not be correct)
 */
private void getParts(byte[] bytes, int bLen) {
    try {
        int start = 0;
        for (int i = 0; i < bLen; i++) {
            if (pLen >= parts.length) {
                throw new RuntimeException("Exceeded number of possible number of parts (" + parts.length + ")." + "  bytes as String: "
                                + new Text(bytes) + " parts: " + Arrays.toString(parts));
            }
            if (bytes[i] == COL_SEPARATOR_BYTE) {
                parts[pLen++] = Text.decode(bytes, start, i - start);
                start = i + 1;
            }
        }
        parts[pLen++] = Text.decode(bytes, start, bLen - start);
        
    } catch (CharacterCodingException e) {
        throw new RuntimeException("Edge key column encoding exception", e);
    }
}
 
源代码3 项目: datawave   文件: EdgeKeyDecoder.java

public static String getYYYYMMDD(Text colQual) {
    int numCharsToCheck = Math.min(DATE_LEN + 1, colQual.getLength());
    
    int firstSlashIndex = DATE_LEN; // there may not be a slash
    byte[] bytes = colQual.getBytes();
    
    // find the first slash if it exists
    for (int i = 0; i < numCharsToCheck; i++) {
        if (bytes[i] == COL_SEPARATOR_BYTE) {
            firstSlashIndex = i;
            break;
        }
    }
    
    try {
        return Text.decode(colQual.getBytes(), 0, Math.min(colQual.getLength(), firstSlashIndex));
    } catch (CharacterCodingException e) {
        // same behavior as EdgeKey.getParts
        throw new RuntimeException("Edge key column encoding exception", e);
    }
}
 
源代码4 项目: Hadoop-BAM   文件: FastaInputFormat.java

/**
 * Reads the next key/value pair from the input for processing.
 */
public boolean next(Text key, ReferenceFragment value) throws IOException
{
	if (pos >= end)
		return false; // past end of slice

	int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
	pos += bytesRead;
	if (bytesRead >= MAX_LINE_LENGTH)
		throw new RuntimeException("found abnormally large line (length " + bytesRead + ") at " + makePositionMessage(pos - bytesRead) + ": " + Text.decode(buffer.getBytes(), 0, 500));
	else if (bytesRead <= 0)
		return false; // EOF
	else
	{
		scanFastaLine(buffer, key, value);
		current_split_pos += bytesRead;
		return true;
	}
}
 
源代码5 项目: Hadoop-BAM   文件: QseqInputFormat.java

private int lowLevelQseqRead(Text key, SequencedFragment value) throws IOException
{
	int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
	pos += bytesRead;
	if (bytesRead >= MAX_LINE_LENGTH)
	{
		String line;
		try {
			line = Text.decode(buffer.getBytes(), 0, 500);
		} catch (java.nio.charset.CharacterCodingException e) {
			line = "(line not convertible to printable format)";
		}
		throw new RuntimeException("found abnormally large line (length " + bytesRead + ") at " +
		            makePositionMessage(pos - bytesRead) + ": " + line);
	}
	else if (bytesRead > 0)
		scanQseqLine(buffer, key, value);

	return bytesRead;
}
 
源代码6 项目: datawave   文件: ValueToAttributes.java

public Attribute<?> getFieldValue(String fieldName, Key k) {
    k.getColumnQualifier(holder);
    int index = holder.find(Constants.NULL);
    
    if (0 > index) {
        throw new IllegalArgumentException("Could not find null-byte contained in columnqualifier for key: " + k);
    }
    
    try {
        String data = Text.decode(holder.getBytes(), index + 1, (holder.getLength() - (index + 1)));
        
        ColumnVisibility cv = getCV(k);
        
        Attribute<?> attr = this.attrFactory.create(fieldName, data, k, (attrFilter == null || attrFilter.keep(k)));
        if (attrFilter != null) {
            attr.setToKeep(attrFilter.keep(k));
        }
        
        if (log.isTraceEnabled()) {
            log.trace("Created " + attr.getClass().getName() + " for " + fieldName);
        }
        
        return attr;
    } catch (CharacterCodingException e) {
        throw new IllegalArgumentException(e);
    }
}
 
源代码7 项目: datawave   文件: KeyToFieldName.java

public String getFieldName(Key k) {
    
    int index = -1;
    
    ByteSequence sequence = k.getColumnQualifierData();
    
    byte[] arrayReference = sequence.getBackingArray();
    
    for (int i = 0; i < sequence.length(); i++) {
        if (!includeGroupingContext && arrayReference[i] == '.') {
            index = i;
            break;
        }
        if (arrayReference[i] == 0x00) {
            index = i;
            break;
        }
    }
    
    if (0 > index) {
        throw new IllegalArgumentException("Could not find null-byte contained in columnqualifier for key: " + k);
    }
    
    try {
        
        return Text.decode(arrayReference, 0, index);
    } catch (CharacterCodingException e) {
        throw new IllegalArgumentException(e);
    }
}
 
源代码8 项目: datawave   文件: TextUtil.java

/**
 * Converts a UTF-8 encoded byte array back into a String.
 *
 * @param bytes
 * @return string
 */
public static String fromUtf8(byte[] bytes) {
    try {
        return Text.decode(bytes);
    } catch (CharacterCodingException e) {
        throw new IllegalArgumentException(e);
    }
}
 
源代码9 项目: datawave   文件: EdgeKeyUtil.java

public static String decodeDate(Text date) throws CharacterCodingException {
    return Text.decode(date.getBytes(), 0, 8);
}
 
源代码10 项目: incubator-hivemall   文件: JsonSerdeUtils.java

@Nonnull
private static Object getObjectOfCorrespondingPrimitiveType(String s,
        PrimitiveTypeInfo mapKeyType) throws IOException {
    switch (Type.getPrimitiveHType(mapKeyType)) {
        case INT:
            return Integer.valueOf(s);
        case TINYINT:
            return Byte.valueOf(s);
        case SMALLINT:
            return Short.valueOf(s);
        case BIGINT:
            return Long.valueOf(s);
        case BOOLEAN:
            return (s.equalsIgnoreCase("true"));
        case FLOAT:
            return Float.valueOf(s);
        case DOUBLE:
            return Double.valueOf(s);
        case STRING:
            return s;
        case BINARY:
            try {
                String t = Text.decode(s.getBytes(), 0, s.getBytes().length);
                return t.getBytes();
            } catch (CharacterCodingException e) {
                throw new IOException("Error generating json binary type from object.", e);
            }
        case DATE:
            return Date.valueOf(s);
        case TIMESTAMP:
            return Timestamp.valueOf(s);
        case DECIMAL:
            return HiveDecimal.create(s);
        case VARCHAR:
            return new HiveVarchar(s, ((BaseCharTypeInfo) mapKeyType).getLength());
        case CHAR:
            return new HiveChar(s, ((BaseCharTypeInfo) mapKeyType).getLength());
        default:
            throw new IOException(
                "Could not convert from string to map type " + mapKeyType.getTypeName());
    }
}
 

private Object getObjectOfCorrespondingPrimitiveType(String s, PrimitiveObjectInspector oi)
        throws IOException {
    PrimitiveTypeInfo typeInfo = oi.getTypeInfo();
    if (writeablePrimitives) {
        Converter c = ObjectInspectorConverters.getConverter(
            PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
        return c.convert(s);
    }

    switch (typeInfo.getPrimitiveCategory()) {
        case INT:
            return Integer.valueOf(s);
        case BYTE:
            return Byte.valueOf(s);
        case SHORT:
            return Short.valueOf(s);
        case LONG:
            return Long.valueOf(s);
        case BOOLEAN:
            return (s.equalsIgnoreCase("true"));
        case FLOAT:
            return Float.valueOf(s);
        case DOUBLE:
            return Double.valueOf(s);
        case STRING:
            return s;
        case BINARY:
            try {
                String t = Text.decode(s.getBytes(), 0, s.getBytes().length);
                return t.getBytes();
            } catch (CharacterCodingException e) {
                LOG.warn("Error generating json binary type from object.", e);
                return null;
            }
        case DATE:
            return Date.valueOf(s);
        case TIMESTAMP:
            return Timestamp.valueOf(s);
        case DECIMAL:
            return HiveDecimal.create(s);
        case VARCHAR:
            return new HiveVarchar(s, ((BaseCharTypeInfo) typeInfo).getLength());
        case CHAR:
            return new HiveChar(s, ((BaseCharTypeInfo) typeInfo).getLength());
        default:
            throw new IOException(
                "Could not convert from string to " + typeInfo.getPrimitiveCategory());
    }
}
 
源代码12 项目: hadoop   文件: Utils.java

/**
 * Read a String as a VInt n, followed by n Bytes in Text format.
 * 
 * @param in
 *          The input stream.
 * @return The string
 * @throws IOException
 */
public static String readString(DataInput in) throws IOException {
  int length = readVInt(in);
  if (length == -1) return null;
  byte[] buffer = new byte[length];
  in.readFully(buffer);
  return Text.decode(buffer);
}
 
源代码13 项目: big-c   文件: Utils.java

/**
 * Read a String as a VInt n, followed by n Bytes in Text format.
 * 
 * @param in
 *          The input stream.
 * @return The string
 * @throws IOException
 */
public static String readString(DataInput in) throws IOException {
  int length = readVInt(in);
  if (length == -1) return null;
  byte[] buffer = new byte[length];
  in.readFully(buffer);
  return Text.decode(buffer);
}
 
源代码14 项目: RDFS   文件: Utils.java

/**
 * Read a String as a VInt n, followed by n Bytes in Text format.
 * 
 * @param in
 *          The input stream.
 * @return The string
 * @throws IOException
 */
public static String readString(DataInput in) throws IOException {
  int length = readVInt(in);
  if (length == -1) return null;
  byte[] buffer = new byte[length];
  in.readFully(buffer);
  return Text.decode(buffer);
}
 
源代码15 项目: hadoop-gpu   文件: Utils.java

/**
 * Read a String as a VInt n, followed by n Bytes in Text format.
 * 
 * @param in
 *          The input stream.
 * @return The string
 * @throws IOException
 */
public static String readString(DataInput in) throws IOException {
  int length = readVInt(in);
  if (length == -1) return null;
  byte[] buffer = new byte[length];
  in.readFully(buffer);
  return Text.decode(buffer);
}