org.apache.hadoop.io.Text#append ( )源码实例Demo

下面列出了org.apache.hadoop.io.Text#append ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: Hadoop-BAM   文件: FastaInputFormat.java
private void scanFastaLine(Text line, Text key, ReferenceFragment fragment)
{
    // Build the key.  We concatenate the chromosome/fragment descripion and
    // the start position of the FASTA sequence line, replacing the tabs with colons.
    key.clear();
    
    key.append(current_split_indexseq.getBytes(UTF8), 0, current_split_indexseq.getBytes(UTF8).length);
    key.append(Integer.toString(current_split_pos).getBytes(UTF8), 0, Integer.toString(current_split_pos).getBytes(UTF8).length);
    // replace tabs with :
    byte[] bytes = key.getBytes();
    int temporaryEnd = key.getLength();
    for (int i = 0; i < temporaryEnd; ++i)
	if (bytes[i] == '\t')
	    bytes[i] = ':';
    
    fragment.clear();
    fragment.setPosition(current_split_pos);
    fragment.setIndexSequence(current_split_indexseq);
    fragment.getSequence().append(line.getBytes(), 0, line.getBytes().length);
}
 
@Test
public void testSplit()
        throws Exception
{
    Text key = new Text("123_456789");

    // hard-split using array copy
    int i = key.find("_", 0);

    Text outputKey = new Text("");
    byte[] bytes = key.getBytes();
    outputKey.append(bytes, i + 1, bytes.length - i - 2);

    String fileName = new String(bytes, 0, i);

    assertEquals("123", fileName);
    assertEquals("456789", outputKey.toString());
}
 
源代码3 项目: accumulo-examples   文件: ChunkInputStreamTest.java
private static void addData(List<Entry<Key,Value>> data, String row, String cf, int chunkSize,
    int chunkCount, String vis, String value) {
  Text chunkCQ = new Text(FileDataIngest.intToBytes(chunkSize));
  chunkCQ.append(FileDataIngest.intToBytes(chunkCount), 0, 4);
  data.add(new KeyValue(new Key(new Text(row), new Text(cf), chunkCQ, new Text(vis)),
      value.getBytes()));
}
 
源代码4 项目: hadoop   文件: TeraSort.java
/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private static TrieNode buildTrie(Text[] splits, int lower, int upper, 
                                  Text prefix, int maxDepth) {
  int depth = prefix.getLength();
  if (depth >= maxDepth || lower == upper) {
    return new LeafTrieNode(depth, splits, lower, upper);
  }
  InnerTrieNode result = new InnerTrieNode(depth);
  Text trial = new Text(prefix);
  // append an extra byte on to the prefix
  trial.append(new byte[1], 0, 1);
  int currentBound = lower;
  for(int ch = 0; ch < 255; ++ch) {
    trial.getBytes()[depth] = (byte) (ch + 1);
    lower = currentBound;
    while (currentBound < upper) {
      if (splits[currentBound].compareTo(trial) >= 0) {
        break;
      }
      currentBound += 1;
    }
    trial.getBytes()[depth] = (byte) ch;
    result.child[ch] = buildTrie(splits, lower, currentBound, trial, 
                                 maxDepth);
  }
  // pick up the rest
  trial.getBytes()[depth] = (byte) 255;
  result.child[255] = buildTrie(splits, currentBound, upper, trial,
                                maxDepth);
  return result;
}
 
源代码5 项目: accumulo-examples   文件: TeraSortIngest.java
/**
 * Add the rowid to the row.
 */
private Text getRowIdString(long rowId) {
  Text paddedRowIdString = new Text();
  byte[] rowid = Integer.toString((int) rowId).getBytes();
  int padSpace = 10 - rowid.length;
  if (padSpace > 0) {
    paddedRowIdString.append(spaces, 0, 10 - rowid.length);
  }
  paddedRowIdString.append(rowid, 0, Math.min(rowid.length, 10));
  return paddedRowIdString;
}
 
源代码6 项目: pravega-samples   文件: TeraSort.java
/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private static TrieNode buildTrie(Text[] splits, int lower, int upper,
                                  Text prefix, int maxDepth) {
  int depth = prefix.getLength();
  if (depth >= maxDepth || lower == upper) {
    return new LeafTrieNode(depth, splits, lower, upper);
  }
  InnerTrieNode result = new InnerTrieNode(depth);
  Text trial = new Text(prefix);
  // append an extra byte on to the prefix
  trial.append(new byte[1], 0, 1);
  int currentBound = lower;
  for(int ch = 0; ch < 255; ++ch) {
    trial.getBytes()[depth] = (byte) (ch + 1);
    lower = currentBound;
    while (currentBound < upper) {
      if (splits[currentBound].compareTo(trial) >= 0) {
        break;
      }
      currentBound += 1;
    }
    trial.getBytes()[depth] = (byte) ch;
    result.child[ch] = buildTrie(splits, lower, currentBound, trial, 
                                 maxDepth);
  }
  // pick up the rest
  trial.getBytes()[depth] = (byte) 255;
  result.child[255] = buildTrie(splits, currentBound, upper, trial,
                                maxDepth);
  return result;
}
 
源代码7 项目: RDFS   文件: TeraSort.java
/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private static TrieNode buildTrie(Text[] splits, int lower, int upper, 
                                  Text prefix, int maxDepth) {
  int depth = prefix.getLength();
  if (depth >= maxDepth || lower == upper) {
    return new LeafTrieNode(depth, splits, lower, upper);
  }
  InnerTrieNode result = new InnerTrieNode(depth);
  Text trial = new Text(prefix);
  // append an extra byte on to the prefix
  trial.append(new byte[1], 0, 1);
  int currentBound = lower;
  for(int ch = 0; ch < 255; ++ch) {
    trial.getBytes()[depth] = (byte) (ch + 1);
    lower = currentBound;
    while (currentBound < upper) {
      if (splits[currentBound].compareTo(trial) >= 0) {
        break;
      }
      currentBound += 1;
    }
    trial.getBytes()[depth] = (byte) ch;
    result.child[ch] = buildTrie(splits, lower, currentBound, trial, 
                                 maxDepth);
  }
  // pick up the rest
  trial.getBytes()[depth] = 127;
  result.child[255] = buildTrie(splits, currentBound, upper, trial,
                                maxDepth);
  return result;
}
 
源代码8 项目: accumulo-examples   文件: ChunkInputStreamIT.java
static void addData(List<Entry<Key,Value>> data, String row, String cf, int chunkSize,
    int chunkCount, String vis, String value) {
  Text chunkCQ = new Text(FileDataIngest.intToBytes(chunkSize));
  chunkCQ.append(FileDataIngest.intToBytes(chunkCount), 0, 4);
  data.add(new KeyValue(new Key(new Text(row), new Text(cf), chunkCQ, new Text(vis)),
      value.getBytes()));
}
 
源代码9 项目: rya   文件: ColumnPrefixes.java
private static Text concat(Text prefix, String str) {
	Text temp = new Text(prefix);

	try {
		ByteBuffer buffer = Text.encode(str, false);
		temp.append(buffer.array(), 0, buffer.limit());
	} catch (CharacterCodingException cce) {
		throw new IllegalArgumentException(cce);
	}

	return temp;
}
 
源代码10 项目: hadoop-gpu   文件: TeraSort.java
/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private static TrieNode buildTrie(Text[] splits, int lower, int upper, 
                                  Text prefix, int maxDepth) {
  int depth = prefix.getLength();
  if (depth >= maxDepth || lower == upper) {
    return new LeafTrieNode(depth, splits, lower, upper);
  }
  InnerTrieNode result = new InnerTrieNode(depth);
  Text trial = new Text(prefix);
  // append an extra byte on to the prefix
  trial.append(new byte[1], 0, 1);
  int currentBound = lower;
  for(int ch = 0; ch < 255; ++ch) {
    trial.getBytes()[depth] = (byte) (ch + 1);
    lower = currentBound;
    while (currentBound < upper) {
      if (splits[currentBound].compareTo(trial) >= 0) {
        break;
      }
      currentBound += 1;
    }
    trial.getBytes()[depth] = (byte) ch;
    result.child[ch] = buildTrie(splits, lower, currentBound, trial, 
                                 maxDepth);
  }
  // pick up the rest
  trial.getBytes()[depth] = 127;
  result.child[255] = buildTrie(splits, currentBound, upper, trial,
                                maxDepth);
  return result;
}
 
源代码11 项目: accumulo-examples   文件: QueryUtil.java
/**
 * Given a path, construct an accumulo row prepended with the {@link #FORWARD_PREFIX} for the
 * index table.
 *
 * @param path
 *          the full path of a file or directory
 * @return the accumulo row associated with this path
 */
public static Text getForwardIndex(String path) {
  String part = path.substring(path.lastIndexOf("/") + 1);
  if (part.length() == 0)
    return null;
  Text row = new Text(FORWARD_PREFIX);
  row.append(part.getBytes(), 0, part.length());
  return row;
}
 
源代码12 项目: accumulo-examples   文件: QueryUtil.java
/**
 * Given a path, construct an accumulo row prepended with the {@link #REVERSE_PREFIX} with the
 * path reversed for the index table.
 *
 * @param path
 *          the full path of a file or directory
 * @return the accumulo row associated with this path
 */
public static Text getReverseIndex(String path) {
  String part = path.substring(path.lastIndexOf("/") + 1);
  if (part.length() == 0)
    return null;
  byte[] rev = new byte[part.length()];
  int i = part.length() - 1;
  for (byte b : part.getBytes())
    rev[i--] = b;
  Text row = new Text(REVERSE_PREFIX);
  row.append(rev, 0, rev.length);
  return row;
}
 
源代码13 项目: datawave   文件: LfLineReader.java
/**
 * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF). EOF also terminates an otherwise unterminated line.
 * 
 * @param str
 *            the object to store the given line (without newline)
 * @param maxLineLength
 *            the maximum number of bytes to store into str; the rest of the line is silently discarded.
 * @param maxBytesToConsume
 *            the maximum number of bytes to consume in this call. This is only a hint, because if the line cross this threshold, we allow it to happen. It
 *            can overshoot potentially by as much as one buffer length.
 * 
 * @return the number of bytes read including the (longest) newline found.
 * 
 * @throws IOException
 *             if the underlying stream throws
 */
public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /*
     * We're reading data from in, but the head of the stream may be already buffered in buffer, so we have several cases: 1. No newline characters are in
     * the buffer, so we need to copy everything and read another buffer from the stream. 2. An unambiguously terminated line is in buffer, so we just copy
     * to str.
     */
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    int newlineLength = 0; // length of terminating newline
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the
                                    // last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { // search for
                                                          // newline
            if (buffer[bufferPosn] == LF) {
                newlineLength = 1;
                ++bufferPosn; // at next invocation proceed from following
                              // byte
                break;
            }
        }
        int readLength = bufferPosn - startPosn;
        bytesConsumed += readLength;
        int appendLength;
        if (isNewLineIncluded()) {
            appendLength = readLength;
        } else {
            appendLength = readLength - newlineLength;
        }
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);
    
    if (bytesConsumed > Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    return (int) bytesConsumed;
}
 
源代码14 项目: datawave   文件: TextUtil.java
public static void textAppend(Text t, long s) {
    t.append(nullByte, 0, 1);
    t.append(LongCombiner.FIXED_LEN_ENCODER.encode(s), 0, 8);
}
 
源代码15 项目: incubator-tajo   文件: LineReader.java
/**
 * Read a line terminated by one of CR, LF, or CRLF.
 */
private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume)
    throws IOException {
  /* We're reading data from in, but the head of the stream may be
   * already buffered in buffer, so we have several cases:
   * 1. No newline characters are in the buffer, so we need to copy
   *    everything and read another buffer from the stream.
   * 2. An unambiguously terminated line is in buffer, so we just
   *    copy to str.
   * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
   *    in CR.  In this case we copy everything up to CR to str, but
   *    we also need to see what follows CR: if it's LF, then we
   *    need consume LF as well, so next call to readLine will read
   *    from after that.
   * We use a flag prevCharCR to signal if previous character was CR
   * and, if it happens to be at the end of the buffer, delay
   * consuming it until we have a chance to look at the char that
   * follows.
   */
  str.clear();
  int txtLength = 0; //tracks str.getLength(), as an optimization
  int newlineLength = 0; //length of terminating newline
  boolean prevCharCR = false; //true of prev char was CR
  long bytesConsumed = 0;
  do {
    int startPosn = bufferPosn; //starting from where we left off the last time
    if (bufferPosn >= bufferLength) {
      startPosn = bufferPosn = 0;
      if (prevCharCR) {
        ++bytesConsumed; //account for CR from previous read
      }
      bufferLength = fillBuffer(in, buffer, prevCharCR);
      if (bufferLength <= 0) {
        break; // EOF
      }
    }
    for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
      if (buffer[bufferPosn] == LF) {
        newlineLength = (prevCharCR) ? 2 : 1;
        ++bufferPosn; // at next invocation proceed from following byte
        break;
      }
      if (prevCharCR) { //CR + notLF, we are at notLF
        newlineLength = 1;
        break;
      }
      prevCharCR = (buffer[bufferPosn] == CR);
    }
    int readLength = bufferPosn - startPosn;
    if (prevCharCR && newlineLength == 0) {
      --readLength; //CR at the end of the buffer
    }
    bytesConsumed += readLength;
    int appendLength = readLength - newlineLength;
    if (appendLength > maxLineLength - txtLength) {
      appendLength = maxLineLength - txtLength;
    }
    if (appendLength > 0) {
      str.append(buffer, startPosn, appendLength);
      txtLength += appendLength;
    }
  } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

  if (bytesConsumed > (long) Integer.MAX_VALUE) {
    throw new IOException("Too many bytes before newline: " + bytesConsumed);
  }
  return (int) bytesConsumed;
}
 
源代码16 项目: Hadoop-BAM   文件: QseqInputFormat.java
private void scanQseqLine(Text line, Text key, SequencedFragment fragment)
{
	setFieldPositionsAndLengths(line);

	// Build the key.  We concatenate all fields from 0 to 5 (machine to y-pos)
	// and then the read number, replacing the tabs with colons.
	key.clear();
	// append up and including field[5]
	key.append(line.getBytes(), 0, fieldPositions[5] + fieldLengths[5]);
	// replace tabs with :
	byte[] bytes = key.getBytes();
	int temporaryEnd = key.getLength();
	for (int i = 0; i < temporaryEnd; ++i)
		if (bytes[i] == '\t')
			bytes[i] = ':';
	// append the read number
	key.append(line.getBytes(), fieldPositions[7] - 1, fieldLengths[7] + 1); // +/- 1 to catch the preceding tab.
	// convert the tab preceding the read number into a :
	key.getBytes()[temporaryEnd] = ':';

	// now the fragment
	try
	{
		fragment.clear();
		fragment.setInstrument( Text.decode(line.getBytes(), fieldPositions[0], fieldLengths[0]) );
		fragment.setRunNumber( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[1], fieldLengths[1])) );
		//fragment.setFlowcellId();
		fragment.setLane( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[2], fieldLengths[2])) );
		fragment.setTile( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[3], fieldLengths[3])) );
		fragment.setXpos( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[4], fieldLengths[4])) );
		fragment.setYpos( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[5], fieldLengths[5])) );
		fragment.setRead( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[7], fieldLengths[7])) );
		fragment.setFilterPassed( line.getBytes()[fieldPositions[10]] != '0' );
		//fragment.setControlNumber();
		if (fieldLengths[6] > 0 && line.getBytes()[fieldPositions[6]] == '0') // 0 is a null index sequence
			fragment.setIndexSequence(null);
		else
			fragment.setIndexSequence(Text.decode(line.getBytes(), fieldPositions[6], fieldLengths[6]).replace('.', 'N'));
	}
	catch (CharacterCodingException e) {
		throw new FormatException("Invalid character format at " + makePositionMessage(this.pos - line.getLength()) + "; line: " + line);
	}

	fragment.getSequence().append(line.getBytes(), fieldPositions[8], fieldLengths[8]);
	fragment.getQuality().append(line.getBytes(), fieldPositions[9], fieldLengths[9]);
}
 
源代码17 项目: big-c   文件: LineReader.java
/**
 * Read a line terminated by a custom delimiter.
 */
private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume)
    throws IOException {
 /* We're reading data from inputStream, but the head of the stream may be
  *  already captured in the previous buffer, so we have several cases:
  * 
  * 1. The buffer tail does not contain any character sequence which
  *    matches with the head of delimiter. We count it as a 
  *    ambiguous byte count = 0
  *    
  * 2. The buffer tail contains a X number of characters,
  *    that forms a sequence, which matches with the
  *    head of delimiter. We count ambiguous byte count = X
  *    
  *    // ***  eg: A segment of input file is as follows
  *    
  *    " record 1792: I found this bug very interesting and
  *     I have completely read about it. record 1793: This bug
  *     can be solved easily record 1794: This ." 
  *    
  *    delimiter = "record";
  *        
  *    supposing:- String at the end of buffer =
  *    "I found this bug very interesting and I have completely re"
  *    There for next buffer = "ad about it. record 179       ...."           
  *     
  *     The matching characters in the input
  *     buffer tail and delimiter head = "re" 
  *     Therefore, ambiguous byte count = 2 ****   //
  *     
  *     2.1 If the following bytes are the remaining characters of
  *         the delimiter, then we have to capture only up to the starting 
  *         position of delimiter. That means, we need not include the 
  *         ambiguous characters in str.
  *     
  *     2.2 If the following bytes are not the remaining characters of
  *         the delimiter ( as mentioned in the example ), 
  *         then we have to include the ambiguous characters in str. 
  */
  str.clear();
  int txtLength = 0; // tracks str.getLength(), as an optimization
  long bytesConsumed = 0;
  int delPosn = 0;
  int ambiguousByteCount=0; // To capture the ambiguous characters count
  do {
    int startPosn = bufferPosn; // Start from previous end position
    if (bufferPosn >= bufferLength) {
      startPosn = bufferPosn = 0;
      bufferLength = fillBuffer(in, buffer, ambiguousByteCount > 0);
      if (bufferLength <= 0) {
        str.append(recordDelimiterBytes, 0, ambiguousByteCount);
        break; // EOF
      }
    }
    for (; bufferPosn < bufferLength; ++bufferPosn) {
      if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
        delPosn++;
        if (delPosn >= recordDelimiterBytes.length) {
          bufferPosn++;
          break;
        }
      } else if (delPosn != 0) {
        bufferPosn--;
        delPosn = 0;
      }
    }
    int readLength = bufferPosn - startPosn;
    bytesConsumed += readLength;
    int appendLength = readLength - delPosn;
    if (appendLength > maxLineLength - txtLength) {
      appendLength = maxLineLength - txtLength;
    }
    if (appendLength > 0) {
      if (ambiguousByteCount > 0) {
        str.append(recordDelimiterBytes, 0, ambiguousByteCount);
        //appending the ambiguous characters (refer case 2.2)
        bytesConsumed += ambiguousByteCount;
        ambiguousByteCount=0;
      }
      str.append(buffer, startPosn, appendLength);
      txtLength += appendLength;
    }
    if (bufferPosn >= bufferLength) {
      if (delPosn > 0 && delPosn < recordDelimiterBytes.length) {
        ambiguousByteCount = delPosn;
        bytesConsumed -= ambiguousByteCount; //to be consumed in next
      }
    }
  } while (delPosn < recordDelimiterBytes.length 
      && bytesConsumed < maxBytesToConsume);
  if (bytesConsumed > Integer.MAX_VALUE) {
    throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
  }
  return (int) bytesConsumed; 
}
 
源代码18 项目: rya   文件: KeyParts.java
/**
 * Get a collision unlikely hash string and append to the key,
 * so that if two keys have the same value, then they will be the same,
 * if two different values that occur at the same time there keys are different.
 * If the application uses a very large number of statements at the exact same time,
 * the md5 value might be upgraded to for example sha-1 to avoid collisions.
 * @param statement
 * @param keyText
 */
public static void appendUniqueness(final Statement statement, final Text keyText) {
    keyText.append(HASH_PREFIX, 0, 1);   // delimiter
    final Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writeStatement(statement)));
    final byte[] hashOfValue = Md5Hash.md5Binary(statementValue);
    keyText.append(hashOfValue, 0, hashOfValue.length);
}
 
源代码19 项目: datawave   文件: TextUtil.java
/**
 * Appends a null byte to the given text
 *
 * @param text
 *            the text to which to append the null byte
 */
public static void appendNullByte(Text text) {
    text.append(nullByte, 0, nullByte.length);
}
 
源代码20 项目: accumulo-examples   文件: QueryUtil.java
/**
 * Given a path, construct an accumulo row prepended with the path's depth for the directory
 * table.
 *
 * @param path
 *          the full path of a file or directory
 * @return the accumulo row associated with this path
 */
public static Text getRow(String path) {
  Text row = new Text(String.format("%03d", getDepth(path)));
  row.append(path.getBytes(), 0, path.length());
  return row;
}