001    /*
002     * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.tuple.collect;
022    
023    import java.util.Collection;
024    import java.util.Map;
025    
026    import cascading.provider.CascadingFactory;
027    import cascading.tuple.Tuple;
028    
029    /**
030     * Interface TupleMapFactory allows developers to plugin alternative implementations of a "tuple map"
031     * used to back in memory "join" and "co-group" operations. Typically these implementations are
032     * "spillable", in that to prevent using up all memory in the JVM, after some threshold is met or event
033     * is triggered, values are persisted to disk.
034     * <p/>
035     * The {@link Map} classes returned must take a {@link cascading.tuple.Tuple} as a key, and a {@link Collection} of Tuples as
036     * a value. Further, {@link Map#get(Object)} must never return {@code null}, but on the first call to get() on the map
037     * an empty Collection must be created and stored.
038     * <p/>
039     * That is, {@link Map#put(Object, Object)} is never called on the map instance internally,
040     * only {@code map.get(groupTuple).add(valuesTuple)}.
041     * <p/>
042     * Using the {@link TupleCollectionFactory} to create the underlying Tuple Collections would allow that aspect
043     * to be pluggable as well.
044     * <p/>
045     * If the Map implementation implements the {@link Spillable} interface, it will receive a {@link Spillable.SpillListener}
046     * instance that calls back to the appropriate logging mechanism for the platform. This instance should be passed
047     * down to any child Spillable types, namely an implementation of {@link SpillableTupleList}.
048     * <p/>
049     * The default implementation for the Hadoop platform is the {@link cascading.tuple.hadoop.collect.HadoopTupleMapFactory}
050     * which created a {@link cascading.tuple.hadoop.collect.HadoopSpillableTupleMap} instance.
051     * <p/>
052     * The class {@link SpillableTupleMap} may be used as a base class.
053     *
054     * @see SpillableTupleMap
055     * @see cascading.tuple.hadoop.collect.HadoopTupleMapFactory
056     * @see TupleCollectionFactory
057     * @see cascading.tuple.hadoop.collect.HadoopTupleCollectionFactory
058     */
059    public interface TupleMapFactory<Config> extends CascadingFactory<Config, Map<Tuple, Collection<Tuple>>>
060      {
061      String TUPLE_MAP_FACTORY = "cascading.factory.tuple.map.classname";
062      }