001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.flow.hadoop.stream;
022    
023    import cascading.CascadingException;
024    import cascading.flow.FlowProcess;
025    import cascading.flow.SliceCounters;
026    import cascading.flow.hadoop.HadoopGroupByClosure;
027    import cascading.flow.stream.Duct;
028    import cascading.flow.stream.DuctException;
029    import cascading.pipe.GroupBy;
030    import cascading.tuple.Tuple;
031    import cascading.tuple.TupleEntry;
032    import cascading.tuple.io.TuplePair;
033    
034    /**
035     *
036     */
037    public class HadoopGroupByGate extends HadoopGroupGate
038      {
039      public HadoopGroupByGate( FlowProcess flowProcess, GroupBy groupBy, Role role )
040        {
041        super( flowProcess, groupBy, role );
042        }
043    
044      @Override
045      public void prepare()
046        {
047        super.prepare();
048    
049        if( role != Role.sink )
050          closure = new HadoopGroupByClosure( flowProcess, keyFields, valuesFields );
051    
052        if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() )
053          grouping.joinerClosure = closure;
054        }
055    
056      @Override
057      public void receive( Duct previous, TupleEntry incomingEntry )
058        {
059        // always use pos == 0 since all key/value/sort fields are guaranteed to be the same
060        Tuple groupTuple = keyBuilder[ 0 ].makeResult( incomingEntry.getTuple(), null );
061        Tuple sortTuple = sortFields == null ? null : sortBuilder[ 0 ].makeResult( incomingEntry.getTuple(), null );
062        Tuple valuesTuple = valuesBuilder[ 0 ].makeResult( incomingEntry.getTuple(), null ); // nulls out the dupe values
063    
064        Tuple groupKey = sortTuple == null ? groupTuple : new TuplePair( groupTuple, sortTuple );
065    
066        try
067          {
068          collector.collect( groupKey, valuesTuple );
069          flowProcess.increment( SliceCounters.Tuples_Written, 1 );
070          }
071        catch( OutOfMemoryError error )
072          {
073          handleReThrowableException( "out of memory, try increasing task memory allocation", error );
074          }
075        catch( CascadingException exception )
076          {
077          handleException( exception, incomingEntry );
078          }
079        catch( Throwable throwable )
080          {
081          handleException( new DuctException( "internal error: " + incomingEntry.getTuple().print(), throwable ), incomingEntry );
082          }
083        }
084    
085      @Override
086      protected Tuple unwrapGrouping( Tuple key )
087        {
088        return sortFields == null ? key : ( (TuplePair) key ).getLhs();
089        }
090      }