下面列出了org.apache.hadoop.io.Text#charAt ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
protected boolean isEventKey(Key k) {
Text cf = k.getColumnFamily();
return cf.getLength() > 0
&& cf.find("\u0000") != -1
&& !((cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == 0) || (cf.getLength() == 1 && cf.charAt(0) == 'd') || (cf
.getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f'));
}
boolean isEventKey(Key k) {
Text cf = k.getColumnFamily();
return cf.getLength() > 0
&& cf.find("\u0000") != -1
&& !((cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == 0) || (cf.getLength() == 1 && cf.charAt(0) == 'd') || (cf
.getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f'));
}
private boolean isNextMetricTheSame(Text nextRow) {
byte[] next = nextRow.getBytes();
if (next.length > prevMetricBytes.getLength()
&& 0 == prevMetricBytes.compareTo(next, 0, prevMetricBytes.getLength())
&& nextRow.charAt(prevMetricBytes.getLength()) == 0x00) {
return true;
} else {
return false;
}
}
public void reduce(
Text key,
Iterator<Text> iter,
OutputCollector<Text, Text> oc,
Reporter reporter) throws IOException {
// For each value, figure out which file it's from and store it
// accordingly.
List<String> first = new ArrayList<String>();
List<String> second = new ArrayList<String>();
while (iter.hasNext()) {
Text txt = iter.next();
if (txt.charAt(0) == '1') {
first.add(txt.toString().substring(1));
}
else second.add(txt.toString().substring(1));
reporter.setStatus("OK");
}
reporter.setStatus("OK");
if (first.size() == 0) return;
if (second.size() == 0) second.add(null);
// Do the cross product
for (String s1 : first) {
for (String s2 : second) {
if (s2==null) oc.collect(null, new Text(key + "\t" + s1 + "\t\t"));
else oc.collect(null, new Text(key + "\t" + s1 + "\t" + key + "\t" + s2));
}
}
first.clear();
}
/**
* Natively parses a text object into a long with the specified radix.
*
* @param text
* @param radix
* @return
* @throws NumberFormatException
*/
public static long parseLong(Text text, int radix) throws NumberFormatException {
if (text == null) {
throw new NumberFormatException("null");
}
if (radix < Character.MIN_RADIX) {
throw new NumberFormatException("radix " + radix + " less than Character.MIN_RADIX");
}
if (radix > Character.MAX_RADIX) {
throw new NumberFormatException("radix " + radix + " greater than Character.MAX_RADIX");
}
long result = 0;
boolean negative = false;
int i = 0, max = text.getLength();
long limit;
long multmin;
int digit;
if (max > 0) {
if (text.charAt(0) == '-') {
negative = true;
limit = Long.MIN_VALUE;
i++;
} else {
limit = -Long.MAX_VALUE;
}
multmin = limit / radix;
if (i < max) {
digit = Character.digit(text.charAt(i++), radix);
if (digit < 0) {
throw new NumberFormatException();
} else {
result = -digit;
}
}
while (i < max) {
// Accumulating negatively avoids surprises near MAX_VALUE
digit = Character.digit(text.charAt(i++), radix);
if (digit < 0) {
throw new NumberFormatException();
}
if (result < multmin) {
throw new NumberFormatException();
}
result *= radix;
if (result < limit + digit) {
throw new NumberFormatException();
}
result -= digit;
}
} else {
throw new NumberFormatException();
}
if (negative) {
if (i > 1) {
return result;
} else { /* Only got "-" */
throw new NumberFormatException();
}
} else {
return -result;
}
}
private boolean isFieldIndexKey(Key key) {
Text cf = key.getColumnFamily();
return (cf.getLength() >= 3 && cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == '\0');
}
protected void findNextDocument() {
topKey = null;
try {
Text cf = new Text();
/*
* Given that we are already at a document key, this method will continue to advance the underlying source until it is either exhausted (hasTop()
* returns false), the returned key is not in the totalRange, and the current top key shares the same row and column family as the source's next
* key.
*/
while (topKey == null && source.hasTop()) {
Key k = source.getTopKey();
if (log.isTraceEnabled())
log.trace("Sought to " + k);
k.getColumnFamily(cf);
if (!isEventKey(k)) {
if (cf.find("fi\0") == 0) {
if (log.isDebugEnabled()) {
log.debug("Seeking over 'fi')");
}
// Try to do an optimized jump over the field index
cf.set("fi\1");
source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()),
columnFamilies, inclusive);
} else if (cf.getLength() == 1 && cf.charAt(0) == 'd') {
if (log.isDebugEnabled()) {
log.debug("Seeking over 'd'");
}
// Try to do an optimized jump over the raw documents
cf.set("d\0");
source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()),
columnFamilies, inclusive);
} else if (cf.getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f') {
if (log.isDebugEnabled()) {
log.debug("Seeking over 'tf'");
}
// Try to do an optimized jump over the term frequencies
cf.set("tf\0");
source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()),
columnFamilies, inclusive);
} else {
if (log.isDebugEnabled()) {
log.debug("Next()'ing over the current key");
}
source.next();
}
} else {
if (dataTypeFilter.apply(source.getTopKey())) {
this.topKey = source.getTopKey();
} else {
Range nextCF = new Range(nextStartKey(source.getTopKey()), true, totalRange.getEndKey(), totalRange.isEndKeyInclusive());
source.seek(nextCF, columnFamilies, inclusive);
}
}
}
} catch (IOException e) {
throw new RuntimeException("Could not seek in findNextDocument", e);
}
}
private boolean isFieldIndexKey(Key key) {
Text cf = key.getColumnFamily();
return (cf.getLength() >= 3 && cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == '\0');
}