001    /*
002     * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.tap.local;
022    
023    import java.beans.ConstructorProperties;
024    import java.io.IOException;
025    import java.io.OutputStream;
026    import java.util.Properties;
027    
028    import cascading.flow.FlowProcess;
029    import cascading.tap.BaseTemplateTap;
030    import cascading.tap.SinkMode;
031    import cascading.tap.Tap;
032    import cascading.tap.local.io.TapFileOutputStream;
033    import cascading.tuple.Fields;
034    import cascading.tuple.TupleEntrySchemeCollector;
035    
036    /**
037     * Class TemplateTap can be used to write tuple streams out to files and sub-directories based on the values in the {@link cascading.tuple.Tuple}
038     * instance.
039     * <p/>
040     * The constructor takes a {@link FileTap} {@link cascading.tap.Tap} and a {@link java.util.Formatter} format syntax String. This allows
041     * Tuple values at given positions to be used as directory names.
042     * <p/>
043     * {@code openTapsThreshold} limits the number of open files to be output to. This value defaults to 300 files.
044     * Each time the threshold is exceeded, 10% of the least recently used open files will be closed.
045     * <p/>
046     * TemplateTap will populate a given {@code pathTemplate} without regard to case of the values being used. Thus
047     * the resulting paths {@code 2012/June/} and {@code 2012/june/} will likely result in two open files into the same
048     * location. Forcing the case to be consistent with an upstream {@link cascading.operation.Function} is recommended, see
049     * {@link cascading.operation.expression.ExpressionFunction}.
050     *
051     * @deprecated see {@link cascading.tap.local.PartitionTap}
052     */
053    @Deprecated
054    public class TemplateTap extends BaseTemplateTap<Properties, OutputStream>
055      {
056      /**
057       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
058       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
059       *
060       * @param parent       of type Tap
061       * @param pathTemplate of type String
062       */
063      @ConstructorProperties({"parent", "pathTemplate"})
064      public TemplateTap( FileTap parent, String pathTemplate )
065        {
066        this( parent, pathTemplate, OPEN_TAPS_THRESHOLD_DEFAULT );
067        }
068    
069      /**
070       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
071       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
072       * <p/>
073       * {@code openTapsThreshold} limits the number of open files to be output to.
074       *
075       * @param parent            of type Hfs
076       * @param pathTemplate      of type String
077       * @param openTapsThreshold of type int
078       */
079      @ConstructorProperties({"parent", "pathTemplate", "openTapsThreshold"})
080      public TemplateTap( FileTap parent, String pathTemplate, int openTapsThreshold )
081        {
082        super( parent, pathTemplate, openTapsThreshold );
083        }
084    
085      /**
086       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
087       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
088       *
089       * @param parent       of type Tap
090       * @param pathTemplate of type String
091       * @param sinkMode     of type SinkMode
092       */
093      @ConstructorProperties({"parent", "pathTemplate", "sinkMode"})
094      public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode )
095        {
096        super( parent, pathTemplate, sinkMode );
097        }
098    
099      /**
100       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
101       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
102       * <p/>
103       * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)}
104       * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}.
105       *
106       * @param parent             of type Tap
107       * @param pathTemplate       of type String
108       * @param sinkMode           of type SinkMode
109       * @param keepParentOnDelete of type boolean
110       */
111      @ConstructorProperties({"parent", "pathTemplate", "sinkMode", "keepParentOnDelete"})
112      public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode, boolean keepParentOnDelete )
113        {
114        this( parent, pathTemplate, sinkMode, keepParentOnDelete, OPEN_TAPS_THRESHOLD_DEFAULT );
115        }
116    
117      /**
118       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
119       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
120       * <p/>
121       * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)}
122       * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}.
123       * <p/>
124       * {@code openTapsThreshold} limits the number of open files to be output to.
125       *
126       * @param parent             of type Tap
127       * @param pathTemplate       of type String
128       * @param sinkMode           of type SinkMode
129       * @param keepParentOnDelete of type boolean
130       * @param openTapsThreshold  of type int
131       */
132      @ConstructorProperties({"parent", "pathTemplate", "sinkMode", "keepParentOnDelete", "openTapsThreshold"})
133      public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode, boolean keepParentOnDelete, int openTapsThreshold )
134        {
135        super( parent, pathTemplate, sinkMode, keepParentOnDelete, openTapsThreshold );
136        }
137    
138      /**
139       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
140       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
141       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
142       * <p/>
143       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
144       * data not in the result file to be used in the template path name.
145       *
146       * @param parent       of type Tap
147       * @param pathTemplate of type String
148       * @param pathFields   of type Fields
149       */
150      @ConstructorProperties({"parent", "pathTemplate", "pathFields"})
151      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields )
152        {
153        this( parent, pathTemplate, pathFields, OPEN_TAPS_THRESHOLD_DEFAULT );
154        }
155    
156      /**
157       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
158       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
159       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
160       * <p/>
161       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
162       * data not in the result file to be used in the template path name.
163       * <p/>
164       * {@code openTapsThreshold} limits the number of open files to be output to.
165       *
166       * @param parent            of type Hfs
167       * @param pathTemplate      of type String
168       * @param pathFields        of type Fields
169       * @param openTapsThreshold of type int
170       */
171      @ConstructorProperties({"parent", "pathTemplate", "pathFields", "openTapsThreshold"})
172      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, int openTapsThreshold )
173        {
174        super( parent, pathTemplate, pathFields, openTapsThreshold );
175        }
176    
177      /**
178       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
179       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
180       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
181       * <p/>
182       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
183       * data not in the result file to be used in the template path name.
184       *
185       * @param parent       of type Tap
186       * @param pathTemplate of type String
187       * @param pathFields   of type Fields
188       * @param sinkMode     of type SinkMode
189       */
190      @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode"})
191      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode )
192        {
193        super( parent, pathTemplate, pathFields, sinkMode );
194        }
195    
196      /**
197       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
198       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
199       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
200       * <p/>
201       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
202       * data not in the result file to be used in the template path name.
203       * <p/>
204       * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)}
205       * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}.
206       *
207       * @param parent             of type Tap
208       * @param pathTemplate       of type String
209       * @param pathFields         of type Fields
210       * @param sinkMode           of type SinkMode
211       * @param keepParentOnDelete of type boolean
212       */
213      @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode", "keepParentOnDelete"})
214      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode, boolean keepParentOnDelete )
215        {
216        this( parent, pathTemplate, pathFields, sinkMode, keepParentOnDelete, OPEN_TAPS_THRESHOLD_DEFAULT );
217        }
218    
219      /**
220       * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the
221       * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String.
222       * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate.
223       * <p/>
224       * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing
225       * data not in the result file to be used in the template path name.
226       * <p/>
227       * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)}
228       * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}.
229       * <p/>
230       * {@code openTapsThreshold} limits the number of open files to be output to.
231       *
232       * @param parent             of type Hfs
233       * @param pathTemplate       of type String
234       * @param pathFields         of type Fields
235       * @param sinkMode           of type SinkMode
236       * @param keepParentOnDelete of type boolean
237       * @param openTapsThreshold  of type int
238       */
239      @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode", "keepParentOnDelete",
240                              "openTapsThreshold"})
241      public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode, boolean keepParentOnDelete, int openTapsThreshold )
242        {
243        super( parent, pathTemplate, pathFields, sinkMode, keepParentOnDelete, openTapsThreshold );
244        }
245    
246      @Override
247      protected TupleEntrySchemeCollector createTupleEntrySchemeCollector( FlowProcess<Properties> flowProcess, Tap parent, String path ) throws IOException
248        {
249        TapFileOutputStream output = new TapFileOutputStream( parent, path, isUpdate() ); // append if we are in update mode
250    
251        return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, parent, output );
252        }
253      }