001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.flow.hadoop.stream; 022 023 import cascading.CascadingException; 024 import cascading.flow.FlowProcess; 025 import cascading.flow.SliceCounters; 026 import cascading.flow.hadoop.HadoopGroupByClosure; 027 import cascading.flow.stream.Duct; 028 import cascading.flow.stream.DuctException; 029 import cascading.pipe.GroupBy; 030 import cascading.tuple.Tuple; 031 import cascading.tuple.TupleEntry; 032 import cascading.tuple.io.TuplePair; 033 034 /** 035 * 036 */ 037 public class HadoopGroupByGate extends HadoopGroupGate 038 { 039 public HadoopGroupByGate( FlowProcess flowProcess, GroupBy groupBy, Role role ) 040 { 041 super( flowProcess, groupBy, role ); 042 } 043 044 @Override 045 public void prepare() 046 { 047 super.prepare(); 048 049 if( role != Role.sink ) 050 closure = new HadoopGroupByClosure( flowProcess, keyFields, valuesFields ); 051 052 if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) 053 grouping.joinerClosure = closure; 054 } 055 056 @Override 057 public void receive( Duct previous, TupleEntry incomingEntry ) 058 { 059 // always use pos == 0 since all key/value/sort fields are guaranteed to be the same 060 Tuple groupTuple = keyBuilder[ 0 ].makeResult( incomingEntry.getTuple(), null ); 061 Tuple sortTuple = sortFields == null ? null : sortBuilder[ 0 ].makeResult( incomingEntry.getTuple(), null ); 062 Tuple valuesTuple = valuesBuilder[ 0 ].makeResult( incomingEntry.getTuple(), null ); // nulls out the dupe values 063 064 Tuple groupKey = sortTuple == null ? groupTuple : new TuplePair( groupTuple, sortTuple ); 065 066 try 067 { 068 collector.collect( groupKey, valuesTuple ); 069 flowProcess.increment( SliceCounters.Tuples_Written, 1 ); 070 } 071 catch( OutOfMemoryError error ) 072 { 073 handleReThrowableException( "out of memory, try increasing task memory allocation", error ); 074 } 075 catch( CascadingException exception ) 076 { 077 handleException( exception, incomingEntry ); 078 } 079 catch( Throwable throwable ) 080 { 081 handleException( new DuctException( "internal error: " + incomingEntry.getTuple().print(), throwable ), incomingEntry ); 082 } 083 } 084 085 @Override 086 protected Tuple unwrapGrouping( Tuple key ) 087 { 088 return sortFields == null ? key : ( (TuplePair) key ).getLhs(); 089 } 090 }