001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.operation.regex; 022 023 import java.beans.ConstructorProperties; 024 import java.util.regex.Pattern; 025 026 import cascading.flow.FlowProcess; 027 import cascading.operation.Function; 028 import cascading.operation.FunctionCall; 029 import cascading.operation.OperationCall; 030 import cascading.tuple.Fields; 031 import cascading.tuple.Tuple; 032 import cascading.util.Pair; 033 034 /** Class RegexSplitter will split an incoming argument value by the given regex delimiter patternString. */ 035 public class RegexSplitter extends RegexOperation<Pair<Pattern, Tuple>> implements Function<Pair<Pattern, Tuple>> 036 { 037 private final int length; 038 039 /** 040 * Constructor RegexSplitter creates a new RegexSplitter instance. 041 * 042 * @param patternString of type String 043 */ 044 @ConstructorProperties({"patternString"}) 045 public RegexSplitter( String patternString ) 046 { 047 super( 1, patternString ); 048 length = fieldDeclaration.isUnknown() ? -1 : fieldDeclaration.size(); 049 } 050 051 /** 052 * Constructor RegexOperation creates a new RegexOperation instance, where the delimiter is the tab character. 053 * 054 * @param fieldDeclaration of type Fields 055 */ 056 @ConstructorProperties({"fieldDeclaration"}) 057 public RegexSplitter( Fields fieldDeclaration ) 058 { 059 super( 1, fieldDeclaration, "\t" ); 060 length = fieldDeclaration.isUnknown() ? -1 : fieldDeclaration.size(); 061 } 062 063 /** 064 * Constructor RegexSplitter creates a new RegexSplitter instance. 065 * 066 * @param fieldDeclaration of type Fields 067 * @param patternString of type String 068 */ 069 @ConstructorProperties({"fieldDeclaration", "patternString"}) 070 public RegexSplitter( Fields fieldDeclaration, String patternString ) 071 { 072 super( 1, fieldDeclaration, patternString ); 073 length = fieldDeclaration.isUnknown() ? -1 : fieldDeclaration.size(); 074 } 075 076 @Override 077 public void prepare( FlowProcess flowProcess, OperationCall<Pair<Pattern, Tuple>> operationCall ) 078 { 079 operationCall.setContext( new Pair<Pattern, Tuple>( getPattern(), new Tuple() ) ); 080 } 081 082 @Override 083 public void operate( FlowProcess flowProcess, FunctionCall<Pair<Pattern, Tuple>> functionCall ) 084 { 085 String value = functionCall.getArguments().getString( 0 ); 086 087 if( value == null ) 088 value = ""; 089 090 Tuple output = functionCall.getContext().getRhs(); 091 092 output.clear(); 093 094 String[] split = functionCall.getContext().getLhs().split( value, length ); 095 096 for( int i = 0; i < split.length; i++ ) 097 output.add( split[ i ] ); 098 099 functionCall.getOutputCollector().add( output ); 100 } 101 }