001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.flow.tez.stream.element;
022
023import java.io.IOException;
024
025import cascading.cascade.CascadeException;
026import cascading.flow.FlowProcess;
027import cascading.flow.FlowProcessWrapper;
028import cascading.flow.stream.element.SourceStage;
029import cascading.flow.tez.Hadoop2TezFlowProcess;
030import cascading.flow.tez.util.TezUtil;
031import cascading.tap.Tap;
032import org.apache.hadoop.mapred.RecordReader;
033import org.apache.tez.dag.api.TezConfiguration;
034import org.apache.tez.mapreduce.input.MRInput;
035import org.apache.tez.mapreduce.lib.MRReader;
036import org.apache.tez.runtime.api.LogicalInput;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040/**
041 *
042 */
043public class TezSourceStage extends SourceStage
044  {
045  private static final Logger LOG = LoggerFactory.getLogger( TezSourceStage.class );
046
047  private final MRInput logicalInput;
048  private MRReader reader;
049
050  public TezSourceStage( FlowProcess flowProcess, Tap source, LogicalInput logicalInput )
051    {
052    super( flowProcess, source );
053
054    if( logicalInput == null )
055      throw new IllegalArgumentException( "input must not be null" );
056
057    this.logicalInput = (MRInput) logicalInput;
058    }
059
060  @Override
061  public void prepare()
062    {
063    LOG.info( "calling {}#start() on: {}", logicalInput.getClass().getSimpleName(), getSource() );
064
065    logicalInput.start();
066
067    Hadoop2TezFlowProcess tezFlowProcess = (Hadoop2TezFlowProcess) FlowProcessWrapper.undelegate( flowProcess );
068
069    TezConfiguration configuration = tezFlowProcess.getConfiguration();
070
071    try
072      {
073      reader = (MRReader) logicalInput.getReader();
074      }
075    catch( IOException exception )
076      {
077      throw new CascadeException( "unable to get reader", exception );
078      }
079
080    // set the cascading.source.path property for the current split
081    // if a TezGroupedSplit, currently won't set
082    TezUtil.setSourcePathForSplit( logicalInput, reader, configuration );
083    }
084
085  @Override
086  public void run( Object input ) throws Throwable
087    {
088    RecordReader oldRecordReader = (RecordReader) ( reader ).getRecordReader();
089
090    super.run( oldRecordReader );
091    }
092  }