org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader#org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit源码实例Demo

下面列出了org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader#org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit 实例代码，或者点击链接到github查看源代码，也可以在右侧发表评论。

源代码1 项目： spork 文件： JsonLoader.java

@SuppressWarnings("unchecked")
public void prepareToRead(RecordReader reader, PigSplit split)
throws IOException {
    this.reader = reader;
    
    // Get the schema string from the UDFContext object.
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p =
        udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }

    // Parse the schema from the string stored in the properties object.
    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));

    jsonFactory = new JsonFactory();
}

源代码2 项目： spork 文件： PoissonSampleLoader.java

@SuppressWarnings("rawtypes")
@Override
public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
    super.prepareToRead(reader, split);
    numRowsSampled = 0;
    avgTupleMemSz = 0;
    rowNum = 0;
    skipInterval = -1;
    memToSkipPerSample = 0;
    numRowSplTupleReturned = false;
    newSample = null;

    Configuration conf = split.getConf();
    sampleRate = conf.getInt(PigConfiguration.PIG_POISSON_SAMPLER_SAMPLE_RATE, DEFAULT_SAMPLE_RATE);
    heapPerc = conf.getFloat(PigConfiguration.PIG_SKEWEDJOIN_REDUCE_MEMUSAGE,
            PartitionSkewedKeys.DEFAULT_PERCENT_MEMUSAGE);
}

源代码3 项目： spork 文件： ReadToEndLoader.java

private boolean initializeReader() throws IOException, 
InterruptedException {
    if(curSplitIndex > inpSplits.size() - 1) {
        // past the last split, we are done
        return false;
    }
    if(reader != null){
        reader.close();
    }
    InputSplit curSplit = inpSplits.get(curSplitIndex);
    TaskAttemptContext tAContext = HadoopShims.createTaskAttemptContext(conf, 
            new TaskAttemptID());
    reader = inputFormat.createRecordReader(curSplit, tAContext);
    reader.initialize(curSplit, tAContext);
    // create a dummy pigsplit - other than the actual split, the other
    // params are really not needed here where we are just reading the
    // input completely
    PigSplit pigSplit = new PigSplit(new InputSplit[] {curSplit}, -1, 
            new ArrayList<OperatorKey>(), -1);
    // Set the conf object so that if the wrappedLoadFunc uses it,
    // it won't be null
    pigSplit.setConf(conf);
    wrappedLoadFunc.prepareToRead(reader, pigSplit);
    return true;
}

源代码4 项目： spork 文件： POSimpleTezLoad.java

@Override
public void attachInputs(Map<String, LogicalInput> inputs,
        Configuration conf)
        throws ExecException {
    this.conf = conf;
    LogicalInput logInput = inputs.get(inputKey);
    if (logInput == null || !(logInput instanceof MRInput)) {
        throw new ExecException("POSimpleTezLoad only accepts MRInputs");
    }
    input = (MRInput) logInput;
    try {
        reader = input.getReader();
        // Set split index, MergeCoGroup need it. And this input is the only input of the
        // MergeCoGroup vertex.
        if (reader instanceof MRReader) {
            int splitIndex = ((PigSplit)((MRReader)reader).getSplit()).getSplitIndex();
            PigMapReduce.sJobContext.getConfiguration().setInt(PigImplConstants.PIG_SPLIT_INDEX, splitIndex);
        }
    } catch (IOException e) {
        throw new ExecException(e);
    }
}

源代码5 项目： spork 文件： FixedWidthLoader.java

@Override
public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
    // Save reader to use in getNext()
    this.reader = reader;

    splitIndex = split.getSplitIndex();

    // Get schema from front-end
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature });

    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }
    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));

    requiredFields = (boolean[]) ObjectSerializer.deserialize(p.getProperty(REQUIRED_FIELDS_SIGNATURE));
    if (requiredFields != null) {
        numRequiredFields = 0;
        for (int i = 0; i < requiredFields.length; i++) {
            if (requiredFields[i])
                numRequiredFields++;
        }
    }
}

源代码6 项目： accumulo-recipes 文件： MetricsLoaderIT.java

@Test
public void testGetNext() throws Exception {
    setUpJob();

    List<Pair<String, MetricFeature>> mocks = new ArrayList<Pair<String, MetricFeature>>();
    mocks.add(new Pair<String, MetricFeature>("", metric));

    MockRecordReader<String, MetricFeature> mockRecordReader = new MockRecordReader<String, MetricFeature>(mocks);


    MetricFeatureLoader loader = new MetricFeatureLoader();
    setLocation(loader, new Job(), "mockInst", "mockZk",accumuloMiniClusterDriver.getRootPassword());
    loader.prepareToRead(mockRecordReader, new PigSplit());

    org.apache.pig.data.Tuple t;
    while((t = loader.getNext()) != null) {
        assertEquals(metric.getTimestamp(), t.get(0));
        assertEquals(TimeUnit.MINUTES.toString(), t.get(1));
        assertEquals(metric.getGroup(), t.get(2));
        assertEquals(metric.getType(), t.get(3));
        assertEquals(metric.getName(), t.get(4));
        assertEquals(metric.getVisibility(), t.get(5));
        assertEquals(metric.getVector().getSum(), t.get(6));
    }
}

源代码7 项目： phoenix 文件： PhoenixHBaseLoader.java

@SuppressWarnings("unchecked")
@Override
public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
    this.reader = reader;
    final String resourceSchemaAsStr = getValueFromUDFContext(this.contextSignature,RESOURCE_SCHEMA_SIGNATURE);
    if (resourceSchemaAsStr == null) {
        throw new IOException("Could not find schema in UDF context");
    }
   schema = (ResourceSchema)ObjectSerializer.deserialize(resourceSchemaAsStr); 
}

源代码8 项目： Cubert 文件： AvroStorage.java

@SuppressWarnings("rawtypes")
@Override
public void prepareToRead(RecordReader reader, PigSplit split)
throws IOException {
    AvroStorageLog.funcCall("prepareToRead");
    this.reader = (PigAvroRecordReader) reader;
}

源代码9 项目： stratio-cassandra 文件： CqlNativeStorage.java

public void prepareToRead(RecordReader reader, PigSplit split)
{
    this.reader = reader;
    if (reader instanceof CqlRecordReader) {
        nativeProtocolVersion = ((CqlRecordReader) reader).getNativeProtocolVersion();
    }
}

源代码10 项目： parquet-mr 文件： ParquetLoader.java

@SuppressWarnings("unchecked")
@Override
public void prepareToRead(@SuppressWarnings("rawtypes") RecordReader reader, PigSplit split)
    throws IOException {
  LOG.debug("LoadFunc.prepareToRead({}, {})", reader, split);
  this.reader = reader;
}

源代码11 项目： spork 文件： PigStorage.java

@Override
public void prepareToRead(RecordReader reader, PigSplit split) {
    in = reader;
    if (tagFile || tagPath) {
        sourcePath = ((FileSplit)split.getWrappedSplit()).getPath();
    }
}

源代码12 项目： spork 文件： AvroStorage.java

@SuppressWarnings("rawtypes")
@Override
public final void prepareToRead(final RecordReader r, final PigSplit s)
    throws IOException {
  reader = r;
  split = s;
}

源代码13 项目： spork 文件： HadoopExecutableManager.java

private void writeDebugHeader() {
    processError("===== Task Information Header =====" );

    processError("\nCommand: " + command);
    processError("\nStart time: " + new Date(System.currentTimeMillis()));
    if (job.getBoolean(MRConfiguration.TASK_IS_MAP, false)) {
        MapContext context = (MapContext)PigMapReduce.sJobContext;
        PigSplit pigSplit = (PigSplit)context.getInputSplit();
        int numPaths = pigSplit.getNumPaths();
        processError("\nPigSplit contains " + numPaths + " wrappedSplits.");

        StringBuilder sb = new StringBuilder();
        for(int i = 0; i < numPaths; i++) {
          InputSplit wrappedSplit = pigSplit.getWrappedSplit(i);
          if (wrappedSplit instanceof FileSplit) {
              FileSplit mapInputFileSplit = (FileSplit)wrappedSplit;
              sb.append("\nInput-split: file=");
              sb.append(mapInputFileSplit.getPath());
              sb.append(" start-offset=");
              sb.append(Long.toString(mapInputFileSplit.getStart()));
              sb.append(" length=");
              sb.append(Long.toString(mapInputFileSplit.getLength()));
              processError(sb.toString());
              sb.setLength(0);
          }
        }
    }
    processError("\n=====          * * *          =====\n");
}

源代码14 项目： spork 文件： TestSplitCombine.java

@Test
public void test1() throws IOException, InterruptedException {
    ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
    rawSplits.add(new DummyInputSplit(500, new String[] {
                    "l1", "l2", "l3"
    }));
    rawSplits.add(new DummyInputSplit(400, new String[] {
                    "l1", "l2", "l3"
    }));
    rawSplits.add(new DummyInputSplit(400, new String[] {
                    "l1", "l4", "l5"
    }));
    List<InputSplit> result = pigInputFormat.getPigSplits(rawSplits, 0, ok,
                    null, true, conf);
    Assert.assertEquals(result.size(), 2);
    int index = 0;
    for (InputSplit split : result) {
        PigSplit pigSplit = (PigSplit) split;
        int len = pigSplit.getNumPaths();
        if (index == 0) {
            Assert.assertEquals(2, len);
            checkLocations(pigSplit.getLocations(), new String[] {
                            "l1", "l2", "l3"
            });
            Assert.assertEquals(500, pigSplit.getLength(0));
            Assert.assertEquals(400, pigSplit.getLength(1));
        }
        else {
            Assert.assertEquals(1, len);
            checkLocations(pigSplit.getLocations(), new String[] {
                            "l1", "l4", "l5"
            });
            Assert.assertEquals(400, pigSplit.getLength(0));
        }
        index++;
    }
}

源代码15 项目： spork 文件： TestSplitCombine.java

@Test
public void test3() throws IOException, InterruptedException {
    ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
    rawSplits.add(new DummyInputSplit(500, new String[] {
                    "l1", "l2", "l3"
    }));
    rawSplits.add(new DummyInputSplit(200, new String[] {
                    "l1", "l2", "l3"
    }));
    rawSplits.add(new DummyInputSplit(100, new String[] {
                    "l1", "l4", "l5"
    }));
    List<InputSplit> result = pigInputFormat.getPigSplits(rawSplits, 0, ok,
                    null, true, conf);
    Assert.assertEquals(1, result.size());
    for (InputSplit split : result) {
        PigSplit pigSplit = (PigSplit) split;
        int len = pigSplit.getNumPaths();
        Assert.assertEquals(3, len);
        checkLocations(pigSplit.getLocations(), new String[] {
                        "l1", "l2", "l3", "l4", "l5"
        });
        Assert.assertEquals(500, pigSplit.getLength(0));
        Assert.assertEquals(200, pigSplit.getLength(1));
        Assert.assertEquals(100, pigSplit.getLength(2));
    }
}

源代码16 项目： spork 文件： TestSplitCombine.java

@Test
public void test8() throws IOException, InterruptedException {
    ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
    rawSplits.add(new DummyInputSplit(100, new String[] {
                    "l1", "l2", "l3"
    }));
    rawSplits.add(new DummyInputSplit(100, new String[] {
                    "l1", "l2", "l3"
    }));
    rawSplits.add(new DummyInputSplit(200, new String[] {
                    "l1", "l4", "l5"
    }));
    List<InputSplit> result = pigInputFormat.getPigSplits(rawSplits, 0, ok,
                    null, true, conf);
    Assert.assertEquals(result.size(), 1);
    int index = 0;
    for (InputSplit split : result) {
        PigSplit pigSplit = (PigSplit) split;
        int len = pigSplit.getNumPaths();
        Assert.assertEquals(3, len);
        checkLocations(pigSplit.getLocations(), new String[] {
                        "l1", "l2", "l3", "l4", "l5"
        });
        Assert.assertEquals(200, pigSplit.getLength(0));
        Assert.assertEquals(100, pigSplit.getLength(1));
        Assert.assertEquals(100, pigSplit.getLength(2));
        index++;
    }
}

源代码17 项目： spork 文件： TestSplitCombine.java

@Test
public void test9() throws IOException, InterruptedException {
    // verify locations in order
    ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
    rawSplits.add(new DummyInputSplit(100, new String[] {
                    "l1", "l2", "l3"
    }));
    rawSplits.add(new DummyInputSplit(200, new String[] {
                    "l3", "l4", "l5"
    }));
    rawSplits.add(new DummyInputSplit(400, new String[] {
                    "l5", "l6", "l1"
    }));
    List<InputSplit> result = pigInputFormat.getPigSplits(rawSplits, 0, ok,
                    null, true, conf);
    Assert.assertEquals(result.size(), 1);
    int index = 0;
    for (InputSplit split : result) {
        PigSplit pigSplit = (PigSplit) split;
        int len = pigSplit.getNumPaths();
        Assert.assertEquals(3, len);
        // only 5 locations are in list: refer to PIG-1648 for more details
        checkLocationOrdering(pigSplit.getLocations(), new String[] {
                        "l5", "l1", "l6", "l3", "l4"
        });
        Assert.assertEquals(400, pigSplit.getLength(0));
        Assert.assertEquals(200, pigSplit.getLength(1));
        Assert.assertEquals(100, pigSplit.getLength(2));
        index++;
    }
}

源代码18 项目： spork 文件： AvroStorage.java

@SuppressWarnings("rawtypes")
@Override
public void prepareToRead(RecordReader reader, PigSplit split)
throws IOException {
    AvroStorageLog.funcCall("prepareToRead");
    this.reader = (PigAvroRecordReader) reader;
}

源代码19 项目： spork 文件： CSVExcelStorage.java

@Override
public void prepareToRead(@SuppressWarnings("rawtypes") RecordReader reader, PigSplit split) {
    in = reader;
    splitIndex = split.getSplitIndex();
    
    if (headerTreatment == Headers.DEFAULT) {
        headerTreatment = Headers.READ_INPUT_HEADER;
    }
}

源代码20 项目： webarchive-commons 文件： ArchiveMetadataLoader.java

@SuppressWarnings("rawtypes")
	@Override
	public void prepareToRead(RecordReader reader, PigSplit arg1)
			throws IOException {
		this.reader = (ResourceRecordReader) reader;
//		FileSplit fSplit = (FileSplit) arg1.getWrappedSplit();
//		System.err.format("Prepare to read(%s) (%d-%d)\n",
//				fSplit.getPath().toUri().toASCIIString(),
//				fSplit.getStart(),fSplit.getLength());
	}

源代码21 项目： elasticsearch-hadoop 文件： EsStorage.java

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
    this.reader = reader;
    aliasesTupleNames = StringUtils.tokenize(getUDFProperties().getProperty(
            InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS));
}

源代码22 项目： accumulo-recipes 文件： EntityLoaderIT.java

@Test
public void testGetNext() throws Exception {
    setUpJob();

    List<Pair<String, EntityWritable>> mocks = new ArrayList<Pair<String, EntityWritable>>();
    mocks.add(new Pair<String, EntityWritable>("", new EntityWritable(entity)));

    MockRecordReader<String, EntityWritable> mockRecordReader = new MockRecordReader<String, EntityWritable>(mocks);

    EntityLoader loader = new EntityLoader("q.eq('key','val')");
    loader.prepareToRead(mockRecordReader, new PigSplit());

    org.apache.pig.data.Tuple t;
    int count = 0;

    Iterator<org.calrissian.mango.domain.Attribute> attributes = entity.getAttributes().iterator();
    while((t = loader.getNext()) != null) {
        org.calrissian.mango.domain.Attribute attribute = attributes.next();
        count++;
        if(count == 1) {
            assertEquals(entity.getType(), t.get(0));
            assertEquals(entity.getId(), t.get(1));
            assertEquals(attribute.getKey(), t.get(2));
            assertEquals(loader.registry.getAlias(attribute.getValue()), t.get(3));
            assertEquals(loader.registry.encode(attribute.getValue()), t.get(4));
        } else if(count == 2) {
            assertEquals(entity.getType(), t.get(0));
            assertEquals(entity.getId(), t.get(1));
            assertEquals(attribute.getKey(), t.get(2));
            assertEquals(loader.registry.getAlias(attribute.getValue()), t.get(3));
            assertEquals(loader.registry.encode(attribute.getValue()), t.get(4));
        }
    }

    assertEquals(2, count);
}

源代码23 项目： accumulo-recipes 文件： EventLoaderIT.java

@Test
public void testGetNext() throws Exception {
    setUpJob();

    List<Pair<String, EventWritable>> mocks = new ArrayList<Pair<String, EventWritable>>();
    mocks.add(new Pair<String, EventWritable>("", new EventWritable(event)));

    MockRecordReader<String, EventWritable> mockRecordReader = new MockRecordReader<String, EventWritable>(mocks);

    EventLoader loader = new EventLoader("q.eq('key','val')");
    loader.prepareToRead(mockRecordReader, new PigSplit());

    org.apache.pig.data.Tuple t;
    int count = 0;

    Iterator<org.calrissian.mango.domain.Attribute> attributes = event.getAttributes().iterator();
    while((t = loader.getNext()) != null) {
        org.calrissian.mango.domain.Attribute attribute = attributes.next();
        count++;
        if(count == 1) {
            assertEquals(event.getId(), t.get(0));
            assertEquals(event.getTimestamp(), t.get(1));
            assertEquals(attribute.getKey(), t.get(2));
            assertEquals(loader.registry.getAlias(attribute.getValue()), t.get(3));
            assertEquals(loader.registry.encode(attribute.getValue()), t.get(4));
        } else if(count == 2) {
            assertEquals(event.getId(), t.get(0));
            assertEquals(event.getTimestamp(), t.get(1));
            assertEquals(attribute.getKey(), t.get(2));
            assertEquals(loader.registry.getAlias(attribute.getValue()), t.get(3));
            assertEquals(loader.registry.encode(attribute.getValue()), t.get(4));
        }
    }

    assertEquals(2, count);
}

源代码24 项目： logparser 文件： Loader.java

@Override
public void prepareToRead(@SuppressWarnings("rawtypes") RecordReader newReader, PigSplit pigSplit) {
    // Note that for this Loader, we don't care about the PigSplit.
    if (newReader instanceof ApacheHttpdLogfileRecordReader) {
        this.reader = (ApacheHttpdLogfileRecordReader) newReader;
    } else {
        throw new IncorrectRecordReaderException();
    }
}

源代码25 项目： iceberg 文件： IcebergStorage.java

@Override
public void prepareToRead(RecordReader newReader, PigSplit split) {
  LOG.info("[{}]: prepareToRead() -> {}", signature, split);

  this.reader = (IcebergRecordReader) newReader;
}

源代码26 项目： iceberg 文件： IcebergStorage.java

@Override
public void prepareToRead(RecordReader reader, PigSplit split) {
  LOG.info(format("[%s]: prepareToRead() -> %s", signature, split));

  this.reader = (IcebergRecordReader) reader;
}

源代码27 项目： vespa 文件： VespaSimpleJsonLoader.java

@Override
public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
    recordReader = (VespaSimpleJsonInputFormat.VespaJsonRecordReader) reader;
}

源代码28 项目： rya 文件： AccumuloStorage.java

@Override
public void prepareToRead(final RecordReader reader, final PigSplit split) {
    this.reader = reader;
}

源代码29 项目： stratio-cassandra 文件： CassandraStorage.java

public void prepareToRead(RecordReader reader, PigSplit split)
{
    this.reader = reader;
}

源代码30 项目： hiped2 文件： SequenceFileStockLoader.java

@SuppressWarnings("unchecked")
@Override
public void prepareToRead(RecordReader reader, PigSplit split)
    throws IOException {
  this.reader = (SequenceFileRecordReader) reader;
}

方法所在类

org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader