下面列出了org.junit.experimental.theories.FromDataPoints#org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
if (argOIs.length != 2 && argOIs.length != 3) {
throw new UDFArgumentLengthException("str_contains expects two or three arguments");
}
this.queryOI = HiveUtils.asStringOI(argOIs, 0);
if (!HiveUtils.isStringListOI(argOIs[1])) {
throw new UDFArgumentTypeException(1,
"Expected array<string> for the second argument but got "
+ argOIs[1].getTypeName());
}
this.searchTermsOI = HiveUtils.asListOI(argOIs, 1);
if (argOIs.length == 3) {
this.orQueryOI = HiveUtils.asBooleanOI(argOIs, 2);
}
return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
}
@Test
public void testTwoArguments() throws HiveException, IOException {
VectorizeFeaturesUDF udf = new VectorizeFeaturesUDF();
ObjectInspector[] argOIs = new ObjectInspector[3];
List<String> featureNames = Arrays.asList("a", "b");
argOIs[0] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, featureNames);
argOIs[1] = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
argOIs[2] = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
udf.initialize(argOIs);
DeferredObject[] arguments = new DeferredObject[3];
arguments[1] = new DeferredJavaObject(new Double(0.1));
arguments[2] = new DeferredJavaObject("1.1");
List<Text> actuals = udf.evaluate(arguments);
//System.out.println(actuals);
List<Text> expected = WritableUtils.val("a:0.1", "b:1.1");
Assert.assertEquals(expected, actuals);
udf.close();
}
private static double evalPredict(RegressionTree tree, double[] x)
throws HiveException, IOException {
byte[] b = tree.serialize(true);
byte[] encoded = Base91.encode(b);
Text model = new Text(encoded);
TreePredictUDF udf = new TreePredictUDF();
udf.initialize(
new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
PrimitiveObjectInspectorFactory.writableStringObjectInspector,
ObjectInspectorFactory.getStandardListObjectInspector(
PrimitiveObjectInspectorFactory.javaDoubleObjectInspector),
ObjectInspectorUtils.getConstantObjectInspector(
PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, false)});
DeferredObject[] arguments = new DeferredObject[] {new DeferredJavaObject("model_id#1"),
new DeferredJavaObject(model), new DeferredJavaObject(ArrayUtils.toList(x)),
new DeferredJavaObject(false)};
DoubleWritable result = (DoubleWritable) udf.evaluate(arguments);
udf.close();
return result.get();
}
@Test
public void partia1ModelGivenK() throws Exception {
ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, intInspector };
GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
try (GenericUDAFEvaluator eval = new UnionDoublesSketchUDAF().getEvaluator(info)) {
ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);
DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
UpdateDoublesSketch sketch1 = DoublesSketch.builder().setK(256).build();
sketch1.update(1.0);
eval.iterate(state, new Object[] { new BytesWritable(sketch1.toByteArray()), new IntWritable(256) });
UpdateDoublesSketch sketch2 = DoublesSketch.builder().setK(256).build();
sketch2.update(2.0);
eval.iterate(state, new Object[] { new BytesWritable(sketch2.toByteArray()), new IntWritable(256) });
BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
Assert.assertEquals(resultSketch.getK(), 256);
Assert.assertEquals(resultSketch.getRetainedItems(), 2);
Assert.assertEquals(resultSketch.getMinValue(), 1.0);
Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
}
}
@Test
public void testReverseTopK() throws Exception {
// = tail-k
ObjectInspector[] inputOIs =
new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
ObjectInspectorUtils.getConstantObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector,
"-k 2 -reverse")};
final String[] values = new String[] {"banana", "apple", "candy"};
evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
evaluator.reset(agg);
for (int i = 0; i < values.length; i++) {
evaluator.iterate(agg, new Object[] {values[i]});
}
@SuppressWarnings("unchecked")
List<Object> res = (List<Object>) evaluator.terminate(agg);
Assert.assertEquals(2, res.size());
Assert.assertEquals("apple", res.get(0));
Assert.assertEquals("banana", res.get(1));
}
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info)
throws SemanticException {
final ObjectInspector[] inspectors = info.getParameterObjectInspectors();
if (inspectors.length < 1) {
throw new UDFArgumentException("Please specify at least 1 argument");
}
if (inspectors.length > 2) {
throw new
UDFArgumentTypeException(inspectors.length - 1, "Please specify no more than 2 arguments");
}
ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0,
PrimitiveCategory.BINARY);
if (inspectors.length > 1) {
ObjectInspectorValidator.validateIntegralParameter(inspectors[1], 1);
}
return new IntersectSketchUDAFEvaluator();
}
@Test
public void testOneArgument() throws HiveException, IOException {
VectorizeFeaturesUDF udf = new VectorizeFeaturesUDF();
ObjectInspector[] argOIs = new ObjectInspector[2];
List<String> featureNames = Arrays.asList("a");
argOIs[0] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, featureNames);
argOIs[1] = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
udf.initialize(argOIs);
DeferredObject[] arguments = new DeferredObject[2];
arguments[1] = new DeferredJavaObject(new Double(0.1));
List<Text> actuals = udf.evaluate(arguments);
//System.out.println(actuals);
List<Text> expected = WritableUtils.val(new String[] {"a:0.1"});
Assert.assertEquals(expected, actuals);
udf.close();
}
@Nullable
protected final FeatureValue[] parseFeatures(@Nonnull final List<?> features) {
final int size = features.size();
if (size == 0) {
return null;
}
final ObjectInspector featureInspector = featureListOI.getListElementObjectInspector();
final FeatureValue[] featureVector = new FeatureValue[size];
for (int i = 0; i < size; i++) {
Object f = features.get(i);
if (f == null) {
continue;
}
final FeatureValue fv;
if (parseFeature) {
fv = FeatureValue.parse(f);
} else {
Object k = ObjectInspectorUtils.copyToStandardObject(f, featureInspector);
fv = new FeatureValue(k, 1.f);
}
featureVector[i] = fv;
}
return featureVector;
}
@Test
public void testNegativeStepInt() throws HiveException {
GenerateSeriesUDTF udtf = new GenerateSeriesUDTF();
udtf.initialize(
new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaIntObjectInspector,
PrimitiveObjectInspectorFactory.writableIntObjectInspector,
PrimitiveObjectInspectorFactory.javaLongObjectInspector});
final List<IntWritable> actual = new ArrayList<>();
udtf.setCollector(new Collector() {
@Override
public void collect(Object args) throws HiveException {
Object[] row = (Object[]) args;
IntWritable row0 = (IntWritable) row[0];
actual.add(new IntWritable(row0.get()));
}
});
udtf.process(new Object[] {5, new IntWritable(1), -2L});
List<IntWritable> expected =
Arrays.asList(new IntWritable(5), new IntWritable(3), new IntWritable(1));
Assert.assertEquals(expected, actual);
}
@Override
public ObjectInspector init(Mode mode, ObjectInspector[] argOIs) throws HiveException {
assert (argOIs.length == 1);
super.init(mode, argOIs);
// initialize input
if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// from original data
this.inputOI = HiveUtils.asLongCompatibleOI(argOIs[0]);
} else {// from partial aggregation
this.mergeOI = HiveUtils.asListOI(argOIs[0]);
this.mergeListElemOI = HiveUtils.asPrimitiveObjectInspector(
mergeOI.getListElementObjectInspector());
}
// initialize output
final ObjectInspector outputOI;
if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// terminatePartial
outputOI = ObjectInspectorFactory.getStandardListObjectInspector(
PrimitiveObjectInspectorFactory.writableLongObjectInspector);
} else {// terminate
outputOI = ObjectInspectorFactory.getStandardListObjectInspector(
PrimitiveObjectInspectorFactory.writableLongObjectInspector);
}
return outputOI;
}
@Test
public void testNull() throws SerDeException {
List<String> attributeNames = PRIMITIVE_FIELDS.subList(0, 2);
List<ObjectInspector> colOIs = PRIMITIVE_OIS.subList(0, 2);
List<String> data = Lists.newArrayList(PRIMITIVE_STRING_DATA.subList(0, 2));
data.set(1, null);
Map<String, AttributeValue> expectedItemMap = Maps.newHashMap();
expectedItemMap.put(attributeNames.get(0), new AttributeValue(data.get(0)));
List<Object> rowData = Lists.newArrayList();
rowData.addAll(data);
// no null serialization
Map<String, AttributeValue> actualItemMap = getSerializedItem(attributeNames, colOIs, rowData, false);
assertEquals(expectedItemMap, actualItemMap);
// with null serialization
expectedItemMap.put(attributeNames.get(1), new AttributeValue().withNULL(true));
actualItemMap = getSerializedItem(attributeNames, colOIs, rowData, true);
assertEquals(expectedItemMap, actualItemMap);
}
/**
* Get an array of ObjectInspector from the give array of args and their types.
*/
public static ObjectInspector[] toInspectors(Object[] args, DataType[] argTypes) {
assert args.length == argTypes.length;
ObjectInspector[] argumentInspectors = new ObjectInspector[argTypes.length];
for (int i = 0; i < argTypes.length; i++) {
Object constant = args[i];
if (constant == null) {
argumentInspectors[i] =
TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(
HiveTypeUtil.toHiveTypeInfo(argTypes[i]));
} else {
argumentInspectors[i] =
HiveInspectors.getPrimitiveJavaConstantObjectInspector(
(PrimitiveTypeInfo) HiveTypeUtil.toHiveTypeInfo(argTypes[i]),
constant
);
}
}
return argumentInspectors;
}
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
final StringBuilder buf = new StringBuilder(128);
final int numArgs = argOIs.length;
final int last = numArgs - 1;
for (int i = 0; i < numArgs; i++) {
if (HiveUtils.isNumberOI(argOIs[i])) {
buf.append('Q'); // quantitative
} else {
buf.append('C'); // categorical
}
if (i != last) {
buf.append(',');
}
}
String value = buf.toString();
return ObjectInspectorUtils.getConstantObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, value);
}
@Test
public void testMapBuild() throws Exception {
UDFMapBuild udf = new UDFMapBuild();
ObjectInspector keyArrayOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
ObjectInspector valueArrayOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
ObjectInspector[] arguments = {keyArrayOI, valueArrayOI};
udf.initialize(arguments);
List<String> keyArray = ImmutableList.of("key1", "key2", "key3");
List<String> valueArray = ImmutableList.of("value1", "value2", "value3");
DeferredObject keyArrayObj = new DeferredJavaObject(keyArray);
DeferredObject valueArrayObj = new DeferredJavaObject(valueArray);
DeferredObject[] args = {keyArrayObj, valueArrayObj};
LinkedHashMap<String, String> output = (LinkedHashMap<String, String>) udf.evaluate(args);
LinkedHashMap<String, String> expect = Maps.newLinkedHashMap();
expect.putAll(ImmutableMap.<String, String>of("key1", "value1", "key2", "value2", "key3", "value3"));
Assert.assertEquals("map_build() test", true, MapUtils.mapEquals(output, expect));
}
@Test
public void testTop4Dedup() throws Exception {
ObjectInspector[] inputOIs =
new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
PrimitiveObjectInspectorFactory.javaIntObjectInspector,
ObjectInspectorUtils.getConstantObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector,
"-k 4 -dedup -kv_map")};
final int[] keys = new int[] {5, 3, 4, 1, 2, 4};
final String[] values = new String[] {"apple", "banana", "candy", "donut", "egg", "candy"}; // 4:candy is duplicating
evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
evaluator.reset(agg);
for (int i = 0; i < values.length; i++) {
evaluator.iterate(agg, new Object[] {values[i], keys[i]});
}
Object result = evaluator.terminate(agg);
Assert.assertEquals(LinkedHashMap.class, result.getClass());
Map<?, ?> map = (Map<?, ?>) result;
Assert.assertEquals(4, map.size());
Assert.assertEquals("apple", map.get(5));
Assert.assertEquals("candy", map.get(4));
Assert.assertEquals("banana", map.get(3));
Assert.assertEquals("egg", map.get(2));
Assert.assertNull(map.get(1));
}
ListTreeWriter(int columnId,
ObjectInspector inspector,
StreamFactory writer,
boolean nullable) throws IOException {
super(columnId, inspector, writer, nullable);
this.isDirectV2 = isNewWriteFormat(writer);
ListObjectInspector listObjectInspector = (ListObjectInspector) inspector;
childrenWriters = new TreeWriter[1];
childrenWriters[0] =
createTreeWriter(listObjectInspector.getListElementObjectInspector(),
writer, true);
lengths = createIntegerWriter(writer.createStream(columnId,
OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
recordPosition(rowIndexPosition);
}
@Test(expectedExceptions = { UDFArgumentTypeException.class })
public void initInvalidTypeArg3() throws SemanticException {
UnionSketchUDAF udf = new UnionSketchUDAF();
GenericUDAFParameterInfo params = new SimpleGenericUDAFParameterInfo(
new ObjectInspector[] { binaryInspector, intInspector, binaryInspector }, false, false, false);
udf.getEvaluator(params);
}
@Test
public void partial1ModeExplicitParams() throws Exception {
ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, intConstantInspector, stringConstantInspector };
GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
DataToSketchUDAFTest.checkIntermediateResultInspector(resultInspector);
final int lgK = 10;
final TgtHllType hllType = TgtHllType.HLL_6;
State state = (State) eval.getNewAggregationBuffer();
HllSketch sketch1 = new HllSketch(lgK, hllType);
sketch1.update(1);
eval.iterate(state, new Object[] {new BytesWritable(sketch1.toCompactByteArray()),
new IntWritable(lgK), new Text(hllType.toString())});
HllSketch sketch2 = new HllSketch(lgK, hllType);
sketch2.update(2);
eval.iterate(state, new Object[] {new BytesWritable(sketch2.toCompactByteArray()),
new IntWritable(lgK), new Text(hllType.toString())});
Object result = eval.terminatePartial(state);
Assert.assertNotNull(result);
Assert.assertTrue(result instanceof List);
List<?> r = (List<?>) result;
Assert.assertEquals(r.size(), 3);
Assert.assertEquals(((IntWritable) r.get(0)).get(), lgK);
Assert.assertEquals(((Text) r.get(1)).toString(), hllType.toString());
HllSketch resultSketch = HllSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
Assert.assertEquals(resultSketch.getLgConfigK(), lgK);
Assert.assertEquals(resultSketch.getTgtHllType(), hllType);
Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
}
}
/**
* Perform argument count check and argument type checking, returns an
* appropriate evaluator to perform based on input type (which should always
* be BINARY sketch). Also check lgK and target HLL type parameters if they are passed in.
*
* @see org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver
* #getEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo)
*
* @param info The parameter info to validate
* @return The GenericUDAFEvaluator to use to compute the function.
*/
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info) throws SemanticException {
final ObjectInspector[] inspectors = info.getParameterObjectInspectors();
if (inspectors.length < 1) {
throw new UDFArgumentException("Please specify at least 1 argument");
}
if (inspectors.length > 3) {
throw new UDFArgumentTypeException(inspectors.length - 1, "Please specify no more than 3 arguments");
}
ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0, PrimitiveCategory.BINARY);
// Validate second argument if present
if (inspectors.length > 1) {
ObjectInspectorValidator.validateIntegralParameter(inspectors[1], 1);
if (!ObjectInspectorUtils.isConstantObjectInspector(inspectors[1])) {
throw new UDFArgumentTypeException(1, "The second argument must be a constant");
}
}
// Validate third argument if present
if (inspectors.length > 2) {
ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[2], 2, PrimitiveCategory.STRING);
if (!ObjectInspectorUtils.isConstantObjectInspector(inspectors[2])) {
throw new UDFArgumentTypeException(2, "The third argument must be a constant");
}
}
return new UnionSketchUDAFEvaluator();
}
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
if (argOIs.length != 0 && argOIs.length != 1) {
throw new UDFArgumentLengthException(
"Expected one or two arguments for raise_error UDF: " + argOIs.length);
}
return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
}
public static boolean canCreate(ObjectInspector oi) {
if (oi.getCategory() != Category.PRIMITIVE) {
return false;
}
return true;
}
/**
* This method takes an object representing a row of data from Hive, and
* uses the ObjectInspector to get the data for each column and serialize
* it. This implementation deparses the row into an object that Jackson can
* easily serialize into a JSON blob.
*/
@Override
public Writable serialize(final Object obj, final ObjectInspector oi)
throws SerDeException {
final Object deparsedObj = deparseRow(obj, oi);
final ObjectMapper mapper = new ObjectMapper();
try {
// Let Jackson do the work of serializing the object
return new Text(mapper.writeValueAsString(deparsedObj));
} catch (final Exception e) {
throw new SerDeException(e);
}
}
@Test(expected = UDFArgumentException.class)
public void testTwoArgumentsUnsupportedLang() throws IOException, HiveException {
StoptagsExcludeUDF udf = new StoptagsExcludeUDF();
udf.initialize(new ObjectInspector[] {
ObjectInspectorFactory.getStandardListObjectInspector(
PrimitiveObjectInspectorFactory.writableStringObjectInspector),
HiveUtils.getConstStringObjectInspector("kr")});
udf.close();
}
@Test
public void partial1ModeDefaultParams() throws Exception {
ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
try (GenericUDAFEvaluator eval = new UnionArrayOfDoublesSketchUDAF().getEvaluator(info)) {
ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
DataToArrayOfDoublesSketchUDAFTest.checkIntermediateResultInspector(resultInspector);
ArrayOfDoublesState state = (ArrayOfDoublesState) eval.getNewAggregationBuffer();
ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build();
sketch1.update(1, new double[] {1});
eval.iterate(state, new Object[] {new BytesWritable(sketch1.compact().toByteArray())});
ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build();
sketch2.update(2, new double[] {1});
eval.iterate(state, new Object[] {new BytesWritable(sketch2.compact().toByteArray())});
Object result = eval.terminatePartial(state);
Assert.assertNotNull(result);
Assert.assertTrue(result instanceof List);
List<?> r = (List<?>) result;
Assert.assertEquals(r.size(), 3);
Assert.assertEquals(((IntWritable) r.get(0)).get(), DEFAULT_NOMINAL_ENTRIES);
Assert.assertEquals(((IntWritable) r.get(1)).get(), 1);
ArrayOfDoublesSketch resultSketch = ArrayOfDoublesSketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
Assert.assertEquals(resultSketch.getEstimate(), 2.0);
}
}
@Test
public void testSerialization() throws HiveException {
TestUtils.testGenericUDTFSerialization(XGBoostTrainUDTF.class,
new ObjectInspector[] {
ObjectInspectorFactory.getStandardListObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector),
PrimitiveObjectInspectorFactory.javaDoubleObjectInspector,
ObjectInspectorUtils.getConstantObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector,
"-objective reg:linear")},
new Object[][] {{Arrays.asList("1:-2", "2:-1"), 0.d}});
}
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
if (argOIs.length != 1 && argOIs.length != 2) {
throw new UDFArgumentLengthException(
"array_to_str(array, string sep) expects one or two arguments: " + argOIs.length);
}
this.listOI = HiveUtils.asListOI(argOIs[0]);
if (argOIs.length == 2) {
this.sepOI = HiveUtils.asStringOI(argOIs[1]);
}
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
if (argOIs.length != 2) {
throw new UDFArgumentException("_FUNC_ takes exactly 2 arguments: " + argOIs.length);
}
this.latOI = HiveUtils.asDoubleCompatibleOI(argOIs[0]);
this.zoomOI = HiveUtils.asIntegerOI(argOIs[1]);
this.result = new IntWritable();
return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
}
@Override
public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
assert (parameters.length >= 1 && parameters.length <= 3) : parameters.length;
super.init(mode, parameters);
// initialize input
if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// from original data
this.recommendListOI = (ListObjectInspector) parameters[0];
this.truthListOI = (ListObjectInspector) parameters[1];
if (parameters.length == 3) {
this.recommendSizeOI = HiveUtils.asIntegerOI(parameters[2]);
}
} else {// from partial aggregation
StructObjectInspector soi = (StructObjectInspector) parameters[0];
this.internalMergeOI = soi;
this.countField = soi.getStructFieldRef("count");
this.sumField = soi.getStructFieldRef("sum");
}
// initialize output
final ObjectInspector outputOI;
if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// terminatePartial
outputOI = internalMergeOI();
} else {// terminate
outputOI = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
}
return outputOI;
}
@Test
public void testDefaultInit() throws HiveException {
println("--------------------------\n testGaussian()");
OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF();
ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
//ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String("-factor 3 -eta 0.0002"));
ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String("-factor 3"));
ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param};
mf.initialize(argOIs);
Assert.assertTrue(mf.rankInit == RankInitScheme.random);
float[][] rating = {{5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}};
Object[] args = new Object[3];
final int num_iters = 100;
for (int iter = 0; iter < num_iters; iter++) {
for (int row = 0; row < rating.length; row++) {
for (int col = 0, size = rating[row].length; col < size; col++) {
//print(row + "," + col + ",");
args[0] = row;
args[1] = col;
args[2] = (float) rating[row][col];
//println((float) rating[row][col]);
mf.process(args);
}
}
}
for (int row = 0; row < rating.length; row++) {
for (int col = 0, size = rating[row].length; col < size; col++) {
double predicted = mf.predict(row, col);
print(rating[row][col] + "[" + predicted + "]\t");
Assert.assertEquals(rating[row][col], predicted, 0.2d);
}
println();
}
}
@Override
public Writable serialize(Object data, ObjectInspector objInspector) throws SerDeException {
lazyInitializeWrite();
// serialize the type directly to json (to avoid converting to Writable and then serializing)
scratchPad.reset();
hiveType.setObjectInspector(objInspector);
hiveType.setObject(data);
// We use the command directly instead of the bulk entry writer since there is no close() method on SerDes.
// See FileSinkOperator#process() for more info of how this is used with the output format.
command.write(hiveType).copyTo(scratchPad);
result.setContent(scratchPad);
return result;
}