001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.tuple.hadoop.collect;
022    
023    import java.util.Collection;
024    
025    import cascading.flow.FlowProcess;
026    import cascading.tuple.Tuple;
027    import cascading.tuple.collect.TupleCollectionFactory;
028    import cascading.tuple.hadoop.TupleSerialization;
029    import org.apache.hadoop.io.compress.CompressionCodec;
030    import org.apache.hadoop.mapred.JobConf;
031    
032    import static cascading.tuple.collect.SpillableProps.defaultListThreshold;
033    import static cascading.tuple.collect.SpillableTupleList.getThreshold;
034    
035    /**
036     *
037     */
038    public class HadoopTupleCollectionFactory implements TupleCollectionFactory<JobConf>
039      {
040      private int spillThreshold;
041      private CompressionCodec codec;
042      private TupleSerialization tupleSerialization;
043    
044      @Override
045      public void initialize( FlowProcess<JobConf> flowProcess )
046        {
047        this.spillThreshold = getThreshold( flowProcess, defaultListThreshold );
048        this.codec = HadoopSpillableTupleList.getCodec( flowProcess, HadoopSpillableTupleList.defaultCodecs );
049    
050        this.tupleSerialization = new TupleSerialization( flowProcess );
051        }
052    
053      @Override
054      public Collection<Tuple> create( FlowProcess<JobConf> flowProcess )
055        {
056        return new HadoopSpillableTupleList( spillThreshold, tupleSerialization, codec );
057        }
058      }