001 /* 002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.tap.local; 022 023 import java.beans.ConstructorProperties; 024 import java.io.IOException; 025 import java.io.OutputStream; 026 import java.util.Properties; 027 028 import cascading.flow.FlowProcess; 029 import cascading.tap.BaseTemplateTap; 030 import cascading.tap.SinkMode; 031 import cascading.tap.Tap; 032 import cascading.tap.local.io.TapFileOutputStream; 033 import cascading.tuple.Fields; 034 import cascading.tuple.TupleEntrySchemeCollector; 035 036 /** 037 * Class TemplateTap can be used to write tuple streams out to files and sub-directories based on the values in the {@link cascading.tuple.Tuple} 038 * instance. 039 * <p/> 040 * The constructor takes a {@link FileTap} {@link cascading.tap.Tap} and a {@link java.util.Formatter} format syntax String. This allows 041 * Tuple values at given positions to be used as directory names. 042 * <p/> 043 * {@code openTapsThreshold} limits the number of open files to be output to. This value defaults to 300 files. 044 * Each time the threshold is exceeded, 10% of the least recently used open files will be closed. 045 * <p/> 046 * TemplateTap will populate a given {@code pathTemplate} without regard to case of the values being used. Thus 047 * the resulting paths {@code 2012/June/} and {@code 2012/june/} will likely result in two open files into the same 048 * location. Forcing the case to be consistent with an upstream {@link cascading.operation.Function} is recommended, see 049 * {@link cascading.operation.expression.ExpressionFunction}. 050 * 051 * @deprecated see {@link cascading.tap.local.PartitionTap} 052 */ 053 @Deprecated 054 public class TemplateTap extends BaseTemplateTap<Properties, OutputStream> 055 { 056 /** 057 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 058 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 059 * 060 * @param parent of type Tap 061 * @param pathTemplate of type String 062 */ 063 @ConstructorProperties({"parent", "pathTemplate"}) 064 public TemplateTap( FileTap parent, String pathTemplate ) 065 { 066 this( parent, pathTemplate, OPEN_TAPS_THRESHOLD_DEFAULT ); 067 } 068 069 /** 070 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 071 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 072 * <p/> 073 * {@code openTapsThreshold} limits the number of open files to be output to. 074 * 075 * @param parent of type Hfs 076 * @param pathTemplate of type String 077 * @param openTapsThreshold of type int 078 */ 079 @ConstructorProperties({"parent", "pathTemplate", "openTapsThreshold"}) 080 public TemplateTap( FileTap parent, String pathTemplate, int openTapsThreshold ) 081 { 082 super( parent, pathTemplate, openTapsThreshold ); 083 } 084 085 /** 086 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 087 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 088 * 089 * @param parent of type Tap 090 * @param pathTemplate of type String 091 * @param sinkMode of type SinkMode 092 */ 093 @ConstructorProperties({"parent", "pathTemplate", "sinkMode"}) 094 public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode ) 095 { 096 super( parent, pathTemplate, sinkMode ); 097 } 098 099 /** 100 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 101 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 102 * <p/> 103 * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)} 104 * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}. 105 * 106 * @param parent of type Tap 107 * @param pathTemplate of type String 108 * @param sinkMode of type SinkMode 109 * @param keepParentOnDelete of type boolean 110 */ 111 @ConstructorProperties({"parent", "pathTemplate", "sinkMode", "keepParentOnDelete"}) 112 public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode, boolean keepParentOnDelete ) 113 { 114 this( parent, pathTemplate, sinkMode, keepParentOnDelete, OPEN_TAPS_THRESHOLD_DEFAULT ); 115 } 116 117 /** 118 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 119 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 120 * <p/> 121 * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)} 122 * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}. 123 * <p/> 124 * {@code openTapsThreshold} limits the number of open files to be output to. 125 * 126 * @param parent of type Tap 127 * @param pathTemplate of type String 128 * @param sinkMode of type SinkMode 129 * @param keepParentOnDelete of type boolean 130 * @param openTapsThreshold of type int 131 */ 132 @ConstructorProperties({"parent", "pathTemplate", "sinkMode", "keepParentOnDelete", "openTapsThreshold"}) 133 public TemplateTap( FileTap parent, String pathTemplate, SinkMode sinkMode, boolean keepParentOnDelete, int openTapsThreshold ) 134 { 135 super( parent, pathTemplate, sinkMode, keepParentOnDelete, openTapsThreshold ); 136 } 137 138 /** 139 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 140 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 141 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 142 * <p/> 143 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 144 * data not in the result file to be used in the template path name. 145 * 146 * @param parent of type Tap 147 * @param pathTemplate of type String 148 * @param pathFields of type Fields 149 */ 150 @ConstructorProperties({"parent", "pathTemplate", "pathFields"}) 151 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields ) 152 { 153 this( parent, pathTemplate, pathFields, OPEN_TAPS_THRESHOLD_DEFAULT ); 154 } 155 156 /** 157 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 158 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 159 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 160 * <p/> 161 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 162 * data not in the result file to be used in the template path name. 163 * <p/> 164 * {@code openTapsThreshold} limits the number of open files to be output to. 165 * 166 * @param parent of type Hfs 167 * @param pathTemplate of type String 168 * @param pathFields of type Fields 169 * @param openTapsThreshold of type int 170 */ 171 @ConstructorProperties({"parent", "pathTemplate", "pathFields", "openTapsThreshold"}) 172 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, int openTapsThreshold ) 173 { 174 super( parent, pathTemplate, pathFields, openTapsThreshold ); 175 } 176 177 /** 178 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 179 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 180 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 181 * <p/> 182 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 183 * data not in the result file to be used in the template path name. 184 * 185 * @param parent of type Tap 186 * @param pathTemplate of type String 187 * @param pathFields of type Fields 188 * @param sinkMode of type SinkMode 189 */ 190 @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode"}) 191 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode ) 192 { 193 super( parent, pathTemplate, pathFields, sinkMode ); 194 } 195 196 /** 197 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 198 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 199 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 200 * <p/> 201 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 202 * data not in the result file to be used in the template path name. 203 * <p/> 204 * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)} 205 * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}. 206 * 207 * @param parent of type Tap 208 * @param pathTemplate of type String 209 * @param pathFields of type Fields 210 * @param sinkMode of type SinkMode 211 * @param keepParentOnDelete of type boolean 212 */ 213 @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode", "keepParentOnDelete"}) 214 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode, boolean keepParentOnDelete ) 215 { 216 this( parent, pathTemplate, pathFields, sinkMode, keepParentOnDelete, OPEN_TAPS_THRESHOLD_DEFAULT ); 217 } 218 219 /** 220 * Constructor TemplateTap creates a new TemplateTap instance using the given parent {@link FileTap} Tap as the 221 * base path and default {@link cascading.scheme.Scheme}, and the pathTemplate as the {@link java.util.Formatter} format String. 222 * The pathFields is a selector that selects and orders the fields to be used in the given pathTemplate. 223 * <p/> 224 * This constructor also allows the sinkFields of the parent Tap to be independent of the pathFields. Thus allowing 225 * data not in the result file to be used in the template path name. 226 * <p/> 227 * {@code keepParentOnDelete}, when set to true, prevents the parent Tap from being deleted when {@link #deleteResource(Object)} 228 * is called, typically an issue when used inside a {@link cascading.cascade.Cascade}. 229 * <p/> 230 * {@code openTapsThreshold} limits the number of open files to be output to. 231 * 232 * @param parent of type Hfs 233 * @param pathTemplate of type String 234 * @param pathFields of type Fields 235 * @param sinkMode of type SinkMode 236 * @param keepParentOnDelete of type boolean 237 * @param openTapsThreshold of type int 238 */ 239 @ConstructorProperties({"parent", "pathTemplate", "pathFields", "sinkMode", "keepParentOnDelete", 240 "openTapsThreshold"}) 241 public TemplateTap( FileTap parent, String pathTemplate, Fields pathFields, SinkMode sinkMode, boolean keepParentOnDelete, int openTapsThreshold ) 242 { 243 super( parent, pathTemplate, pathFields, sinkMode, keepParentOnDelete, openTapsThreshold ); 244 } 245 246 @Override 247 protected TupleEntrySchemeCollector createTupleEntrySchemeCollector( FlowProcess<Properties> flowProcess, Tap parent, String path ) throws IOException 248 { 249 TapFileOutputStream output = new TapFileOutputStream( parent, path, isUpdate() ); // append if we are in update mode 250 251 return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, parent, output ); 252 } 253 }