001/* 002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.flow.tez.stream.element; 022 023import java.io.IOException; 024 025import cascading.cascade.CascadeException; 026import cascading.flow.FlowProcess; 027import cascading.flow.FlowProcessWrapper; 028import cascading.flow.stream.element.SourceStage; 029import cascading.flow.tez.Hadoop2TezFlowProcess; 030import cascading.flow.tez.util.TezUtil; 031import cascading.tap.Tap; 032import org.apache.hadoop.mapred.RecordReader; 033import org.apache.tez.dag.api.TezConfiguration; 034import org.apache.tez.mapreduce.input.MRInput; 035import org.apache.tez.mapreduce.lib.MRReader; 036import org.apache.tez.runtime.api.LogicalInput; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040/** 041 * 042 */ 043public class TezSourceStage extends SourceStage 044 { 045 private static final Logger LOG = LoggerFactory.getLogger( TezSourceStage.class ); 046 047 private final MRInput logicalInput; 048 private MRReader reader; 049 050 public TezSourceStage( FlowProcess flowProcess, Tap source, LogicalInput logicalInput ) 051 { 052 super( flowProcess, source ); 053 054 if( logicalInput == null ) 055 throw new IllegalArgumentException( "input must not be null" ); 056 057 this.logicalInput = (MRInput) logicalInput; 058 } 059 060 @Override 061 public void prepare() 062 { 063 LOG.info( "calling {}#start() on: {}", logicalInput.getClass().getSimpleName(), getSource() ); 064 065 logicalInput.start(); 066 067 Hadoop2TezFlowProcess tezFlowProcess = (Hadoop2TezFlowProcess) FlowProcessWrapper.undelegate( flowProcess ); 068 069 TezConfiguration configuration = tezFlowProcess.getConfiguration(); 070 071 try 072 { 073 reader = (MRReader) logicalInput.getReader(); 074 } 075 catch( IOException exception ) 076 { 077 throw new CascadeException( "unable to get reader", exception ); 078 } 079 080 // set the cascading.source.path property for the current split 081 // if a TezGroupedSplit, currently won't set 082 TezUtil.setSourcePathForSplit( logicalInput, reader, configuration ); 083 } 084 085 @Override 086 public void run( Object input ) throws Throwable 087 { 088 RecordReader oldRecordReader = (RecordReader) ( reader ).getRecordReader(); 089 090 super.run( oldRecordReader ); 091 } 092 }