org.apache.hadoop.io.Text#getBytes ( )源码实例Demo

下面列出了org.apache.hadoop.io.Text#getBytes ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: hbase   文件: TsvImporterTextMapper.java
/**
 * Convert a line of TSV text into an HBase table row.
 */
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
  try {
    Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength());
    ImmutableBytesWritable rowKey = new ImmutableBytesWritable(
        value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond());
    context.write(rowKey, value);
  } catch (ImportTsv.TsvParser.BadTsvLineException|IllegalArgumentException badLine) {
    if (logBadLines) {
      System.err.println(value);
    }
    System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
    if (skipBadLines) {
      incrementBadLineCount(1);
      return;
    }
    throw new IOException(badLine);
  } catch (InterruptedException e) {
    LOG.error("Interrupted while emitting TSV text", e);
    Thread.currentThread().interrupt();
  }
}
 
源代码2 项目: incubator-hivemall   文件: DecisionPathUDF.java
@Nonnull
public List<String> evaluate(@Nonnull final String modelId, @Nonnull final Text script,
        @Nonnull final Vector features) throws HiveException {
    if (!modelId.equals(prevModelId)) {
        this.prevModelId = modelId;
        int length = script.getLength();
        byte[] b = script.getBytes();
        b = Base91.decode(b, 0, length);
        this.rNode = RegressionTree.deserialize(b, b.length, true);
    }
    Preconditions.checkNotNull(rNode);

    handler.init();
    rNode.predict(features, handler);
    return handler.getResult();
}
 
源代码3 项目: mrgeo   文件: AccumuloUtils.java
/**
 * Convert a Text object of a tileId to a back to a long.
 *
 * @param rowId Text object to convert.
 * @return the long value from the Text object.
 */
public static long toLong(Text rowId)
{

  byte[] outB = new byte[8];
  for (int x = 0; x < outB.length; x++)
  {
    if (x >= rowId.getLength())
    {
      outB[x] = 0x0;
    }
    else
    {
      outB[x] = rowId.getBytes()[x];
    }
  }

  return ByteBuffer.wrap(outB).getLong();
}
 
源代码4 项目: incubator-hivemall   文件: TreePredictUDF.java
@Nonnull
public Object[] evaluate(@Nonnull final String modelId, @Nonnull final Text script,
        @Nonnull final Vector features) throws HiveException {
    if (!modelId.equals(prevModelId)) {
        this.prevModelId = modelId;
        int length = script.getLength();
        byte[] b = script.getBytes();
        b = Base91.decode(b, 0, length);
        this.cNode = DecisionTree.deserialize(b, b.length, true);
    }

    Arrays.fill(result, null);
    Preconditions.checkNotNull(cNode);
    cNode.predict(features, new PredictionHandler() {
        public void visitLeaf(int output, double[] posteriori) {
            result[0] = new IntWritable(output);
            result[1] = WritableUtils.toWritableList(posteriori);
        }
    });

    return result;
}
 
源代码5 项目: kylin   文件: SparkFactDistinct.java
@Override
public int getPartition(Object o) {
    if (initialized == false) {
        synchronized (SparkFactDistinct.class) {
            if (initialized == false) {
                init();
            }
        }
    }

    SelfDefineSortableKey skey = (SelfDefineSortableKey) o;
    Text key = skey.getText();
    if (key.getBytes()[0] == FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER) {
        Long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG);
        return reducerMapping.getReducerIdForCuboidRowCount(cuboidId);
    } else {
        return BytesUtil.readUnsigned(key.getBytes(), 0, 1);
    }
}
 
源代码6 项目: datawave   文件: TextUtilTest.java
@Test
public void testAppend_multiByteChars() throws UnsupportedEncodingException {
    String prefix = "prefix\u6C34";
    int prefixLength = prefix.getBytes("UTF-8").length;
    Text text = new Text(prefix.getBytes("UTF-8"));
    // A random multi-byte char string I found. Don't know what it means.
    String multiByteCharString = "\u007A\u6C34\uD834\uDD1E";
    TextUtil.textAppend(text, multiByteCharString);
    byte[] stringBytes = multiByteCharString.getBytes("UTF-8");
    Assert.assertEquals("Length was wrong", 1 + stringBytes.length, text.getLength() - prefixLength);
    byte[] textBytes = text.getBytes();
    Assert.assertEquals("First byte was wrong", (byte) 0, textBytes[prefixLength]);
    byte[] restOfText = new byte[text.getLength() - prefixLength - 1];
    System.arraycopy(textBytes, 1 + prefixLength, restOfText, 0, restOfText.length);
    Assert.assertArrayEquals("Contents were wrong", stringBytes, restOfText);
}
 
源代码7 项目: hbase   文件: TsvImporterCustomTestMapper.java
/**
 * Convert a line of TSV text into an HBase table row after transforming the
 * values by multiplying them by 3.
 */
@Override
public void map(LongWritable offset, Text value, Context context)
      throws IOException {
  byte[] family = Bytes.toBytes("FAM");
  final byte[][] qualifiers = { Bytes.toBytes("A"), Bytes.toBytes("B") };

  // do some basic line parsing
  byte[] lineBytes = value.getBytes();
  String[] valueTokens = new String(lineBytes, StandardCharsets.UTF_8).split("\u001b");

  // create the rowKey and Put
  ImmutableBytesWritable rowKey =
    new ImmutableBytesWritable(Bytes.toBytes(valueTokens[0]));
  Put put = new Put(rowKey.copyBytes());
  put.setDurability(Durability.SKIP_WAL);

  //The value should look like this: VALUE1 or VALUE2. Let's multiply
  //the integer by 3
  for(int i = 1; i < valueTokens.length; i++) {
    String prefix = valueTokens[i].substring(0, "VALUE".length());
    String suffix = valueTokens[i].substring("VALUE".length());
    String newValue = prefix + Integer.parseInt(suffix) * 3;

    KeyValue kv = new KeyValue(rowKey.copyBytes(), family,
        qualifiers[i-1], Bytes.toBytes(newValue));
    put.add(kv);
  }

  try {
    context.write(rowKey, put);
  } catch (InterruptedException e) {
    e.printStackTrace();
  }
}
 
源代码8 项目: Kylin   文件: BaseCuboidMapperTest.java
@Test
public void testMapperWithNull() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0");
}
 
源代码9 项目: hadoop-gpu   文件: Utils.java
/**
 * Write a String as a VInt n, followed by n Bytes as in Text format.
 * 
 * @param out
 * @param s
 * @throws IOException
 */
public static void writeString(DataOutput out, String s) throws IOException {
  if (s != null) {
    Text text = new Text(s);
    byte[] buffer = text.getBytes();
    int len = text.getLength();
    writeVInt(out, len);
    out.write(buffer, 0, len);
  } else {
    writeVInt(out, -1);
  }
}
 
源代码10 项目: dremio-oss   文件: HiveFieldConverter.java
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((HiveVarcharObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue).getTextValue();
  final int valueLen = value.getLength();
  checkSizeLimit(valueLen);
  final byte[] valueBytes = value.getBytes();
  ((VarCharVector) outputVV).setSafe(outputIndex, valueBytes, 0, valueLen);
}
 
源代码11 项目: dremio-oss   文件: HiveFieldConverter.java
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((StringObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue);
  final int len = value.getLength();
  checkSizeLimit(len);
  final byte[] valueBytes = value.getBytes();
  ((VarCharVector) outputVV).setSafe(outputIndex, valueBytes, 0, len);
}
 
源代码12 项目: rya   文件: JoinSelectProspectOutputTest.java
@Test
public void testOutput() throws InterruptedException, IOException {

    String s = "urn:gem:etype#1234";
    String p = "urn:gem#pred";

    String ts = "798497748386999999";
    
    Text t1 = new Text(TripleValueType.subject.name() + DELIM + s + DELIM + 1);
    Text t2 = new Text(TripleValueType.predicate.name() + DELIM + p + DELIM + 2);
    Text t3 = new Text(TripleValueType.subjectpredicate.name() + DELIM + s + DELIM + p + DELIM + ts);

    byte[] b = new byte[0];
    byte[] c = "25".getBytes();
    byte[] d = "47".getBytes();
    byte[] e = "15".getBytes();

    Key key1 = new Key(t1.getBytes(), b, b, b, 1);
    Key key2 = new Key(t2.getBytes(), b, b, b, 1);
    Key key3 = new Key(t3.getBytes(), b, b, b, 1);
    Value val1 = new Value(c);
    Value val2 = new Value(d);
    Value val3 = new Value(e);
    
   

    // System.out.println("Keys are " + key1 + " and " + key2);

    new MapDriver<Key, Value, CompositeType, TripleCard>()
            .withMapper(new JoinSelectProspectOutput.CardinalityMapper())
            .withInput(key1, val1)
            .withInput(key2, val2)
            .withInput(key3, val3)
            .withOutput(new CompositeType(s, 1), new TripleCard(new CardinalityType(25, "subject", 1)))
            .withOutput(new CompositeType(p, 1), new TripleCard(new CardinalityType(47, "predicate", 2)))
            .withOutput(new CompositeType(s + DELIM + p, 1),
                    new TripleCard(new CardinalityType(15, "subjectpredicate", Long.parseLong(ts)))).runTest();

}
 
源代码13 项目: datawave   文件: AgeOffFilterBase.java
@Override
public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
    super.init(source, options, env);
    if (options == null)
        throw new IllegalArgumentException("ttl must be set for DateBasedAgeOffFilter");
    
    String ttl = options.get("ttl");
    if (ttl == null)
        throw new IllegalArgumentException("ttl must be set for DateBasedAgeOffFilter");
    
    int thresholdDays = Integer.parseInt(ttl);
    Text cutoffDateText = new Text(DateHelper.format(getCutoffDate(thresholdDays)));
    cutoffDate = cutoffDateText.getBytes();
    cutoffDateLen = cutoffDateText.getLength();
}
 
源代码14 项目: dremio-oss   文件: HiveFieldConverter.java
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((HiveCharObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue).getStrippedValue();
  final int valueLen = value.getLength();
  checkSizeLimit(valueLen);
  final byte[] valueBytes = value.getBytes();
  ((VarCharVector) outputVV).setSafe(outputIndex, valueBytes, 0, valueLen);
}
 
源代码15 项目: recsys-offline   文件: PrepareClusterJob.java
public void reduce(Text key, Iterable<Text> values, Context context)
		throws IOException, InterruptedException {
	Text text = values.iterator().next();
	String[] columns = text.toString().split(split_str);
	Put put = new Put(key.getBytes());
	put.add(Bytes.toBytes(Constants.hbase_column_family),
			Bytes.toBytes(Constants.hbase_column_yearrate),
			Bytes.toBytes(columns[0]));
	put.add(Bytes.toBytes(Constants.hbase_column_family),
			Bytes.toBytes(Constants.hbase_column_repaylimittime),
			Bytes.toBytes(columns[1]));
	context.write(NullWritable.get(), put);
}
 
源代码16 项目: Kylin   文件: BaseCuboidMapperTest.java
@Test
public void testMapperWithHeader() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.33"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]", decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33");
}
 
源代码17 项目: incubator-hivemall   文件: TreePredictUDFv1.java
private DoubleWritable evaluateRegression(@Nonnull String modelId, boolean compressed,
        @Nonnull Text script, double[] features) throws HiveException {
    if (!modelId.equals(prevModelId)) {
        this.prevModelId = modelId;
        int length = script.getLength();
        byte[] b = script.getBytes();
        b = Base91.decode(b, 0, length);
        this.rNode = deserializeRegressionTree(b, b.length, compressed);
    }
    assert (rNode != null);
    double result = rNode.predict(features);
    return new DoubleWritable(result);
}
 
源代码18 项目: spork   文件: CSVLoader.java
@Override
public Tuple getNext() throws IOException {
    mProtoTuple = new ArrayList<Object>();

    boolean inField = false;
    boolean inQuotedField = false;
    boolean evenQuotesSeen = true;
    
    if (!mRequiredColumnsInitialized) {
        if (signature != null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
            mRequiredColumns = (boolean[])ObjectSerializer.deserialize(p.getProperty(signature));
        }
        mRequiredColumnsInitialized = true;
    }
    try {
        if (!in.nextKeyValue()) {
            return null;
        }                                                                                           
        Text value = (Text) in.getCurrentValue();
        byte[] buf = value.getBytes();
        int len = value.getLength();
        int fieldID = 0;

        ByteBuffer fieldBuffer = ByteBuffer.allocate(len);

        for (int i = 0; i < len; i++) {
            byte b = buf[i];
            inField = true;
            if (inQuotedField) {
                if (b == DOUBLE_QUOTE) {
                    evenQuotesSeen = !evenQuotesSeen;
                    if (evenQuotesSeen) {
                        fieldBuffer.put(DOUBLE_QUOTE);
                    }
                } else
                    if (!evenQuotesSeen &&
                            (b == FIELD_DEL || b == RECORD_DEL)) {
                        inQuotedField = false;
                        inField = false;
                        readField(fieldBuffer, fieldID++);
                    } else {
                        fieldBuffer.put(b);
                    }
            } else if (b == DOUBLE_QUOTE) {
                inQuotedField = true;
                evenQuotesSeen = true;
            } else if (b == FIELD_DEL) {
                inField = false;
                readField(fieldBuffer, fieldID++); // end of the field
            } else {
                evenQuotesSeen = true;
                fieldBuffer.put(b);
            }
        }
        if (inField) readField(fieldBuffer, fieldID++);
    } catch (InterruptedException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode, 
                PigException.REMOTE_ENVIRONMENT, e);
    }

    Tuple t =  mTupleFactory.newTupleNoCopy(mProtoTuple);
    return t;
}
 
源代码19 项目: kylin   文件: MergeDictReducer.java
@Override
protected void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    String col = key.toString();
    logger.info("merge dictionary for column:{}", col);
    TblColRef tblColRef = colNeedDictMap.get(col);

    if (tblColRef == null) {
        logger.warn("column:{} not found in the columns need dictionary map: {}", col, colNeedDictMap.keySet());
        return;
    }

    DataType dataType = tblColRef.getType();
    List<Dictionary<String>> dicts = Lists.newLinkedList();
    for (Text value : values) {
        ByteArray byteArray = new ByteArray(value.getBytes());
        Dictionary<String> dict = (Dictionary<String>) DictionarySerializer.deserialize(byteArray);
        dicts.add(dict);
    }
    Dictionary mergedDict;
    if (dicts.size() > 1) {
        MultipleDictionaryValueEnumerator multipleDictionaryValueEnumerator = new MultipleDictionaryValueEnumerator(
                dataType, dicts);
        mergedDict = DictionaryGenerator.buildDictionary(dataType, multipleDictionaryValueEnumerator);
    } else if (dicts.size() == 1) {
        mergedDict = dicts.get(0);
    } else {
        throw new IllegalArgumentException("Dictionary missing for column " + col);
    }
    if (mergedDict == null) {
        throw new IllegalArgumentException("Merge dictionaries error for column " + col);
    }

    TableDesc tableDesc = tblColRef.getColumnDesc().getTable();
    IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
    signature.setLastModifiedTime(System.currentTimeMillis());
    signature.setPath(tableDesc.getResourcePath());

    //TODO: Table signature size?
    //        signature.setSize(mergedDict.getSize());

    DictionaryInfo dictionaryInfo = new DictionaryInfo(tblColRef.getTable(), tblColRef.getName(), tblColRef
            .getColumnDesc().getZeroBasedIndex(), tblColRef.getDatatype(), signature);
    dictionaryInfo.setDictionaryObject(mergedDict);
    dictionaryInfo.setDictionaryClass(mergedDict.getClass().getName());
    dictionaryInfo.setCardinality(mergedDict.getSize());

    ByteArrayOutputStream fulBuf = new ByteArrayOutputStream();
    DataOutputStream fulDout = new DataOutputStream(fulBuf);
    DictionaryInfoSerializer.FULL_SERIALIZER.serialize(dictionaryInfo, fulDout);

    Text outValue = new Text(fulBuf.toByteArray());
    context.write(key, outValue);
    logger.debug("output dict info of column {} to path: {}", col,
            context.getConfiguration().get(FileOutputFormat.OUTDIR));
}
 
源代码20 项目: spork   文件: PigStorage.java
@Override
public Tuple getNext() throws IOException {
    mProtoTuple = new ArrayList<Object>();
    if (!mRequiredColumnsInitialized) {
        if (signature!=null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
            mRequiredColumns = (boolean[])ObjectSerializer.deserialize(p.getProperty(signature));
        }
        mRequiredColumnsInitialized = true;
    }
    //Prepend input source path if source tagging is enabled
    if(tagFile) {
        mProtoTuple.add(new DataByteArray(sourcePath.getName()));
    } else if (tagPath) {
        mProtoTuple.add(new DataByteArray(sourcePath.toString()));
    }

    try {
        boolean notDone = in.nextKeyValue();
        if (!notDone) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        byte[] buf = value.getBytes();
        int len = value.getLength();
        int start = 0;
        int fieldID = 0;
        for (int i = 0; i < len; i++) {
            if (buf[i] == fieldDel) {
                if (mRequiredColumns==null || (mRequiredColumns.length>fieldID && mRequiredColumns[fieldID]))
                    addTupleValue(mProtoTuple, buf, start, i);
                start = i + 1;
                fieldID++;
            }
        }
        // pick up the last field
        if (start <= len && (mRequiredColumns==null || (mRequiredColumns.length>fieldID && mRequiredColumns[fieldID]))) {
            addTupleValue(mProtoTuple, buf, start, len);
        }
        Tuple t =  mTupleFactory.newTupleNoCopy(mProtoTuple);

        return dontLoadSchema ? t : applySchema(t);
    } catch (InterruptedException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode,
                PigException.REMOTE_ENVIRONMENT, e);
    }
}