001 /* 002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.operation.regex; 022 023 import java.beans.ConstructorProperties; 024 import java.util.regex.Matcher; 025 026 import cascading.flow.FlowProcess; 027 import cascading.operation.Filter; 028 import cascading.operation.FilterCall; 029 030 /** 031 * Class RegexFilter will apply the regex patternString against every input Tuple value and filter 032 * the Tuple stream accordingly. 033 * <p/> 034 * By default, Tuples that match the given pattern are kept, and Tuples that do not 035 * match are filtered out. This can be changed by setting removeMatch to true. 036 * <p/> 037 * Also, by default, the whole Tuple is matched against the given patternString (tab delimited). If matchEachElement 038 * is set to true, the pattern is applied to each Tuple value individually. 039 * <p/> 040 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}. 041 * <p/> 042 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before 043 * the regex is applied. 044 * <p/> 045 * Any Object value will be coerced to a String type via its {@code toString()} method. 046 * 047 * @see java.util.regex.Matcher 048 * @see java.util.regex.Pattern 049 */ 050 public class RegexFilter extends RegexMatcher implements Filter<Matcher> 051 { 052 /** Field matchEachElement */ 053 protected final boolean matchEachElement; 054 055 /** 056 * Constructor RegexFilter creates a new RegexFilter instance. 057 * 058 * @param patternString of type String 059 */ 060 @ConstructorProperties({"patternString"}) 061 public RegexFilter( String patternString ) 062 { 063 super( patternString ); 064 this.matchEachElement = false; 065 } 066 067 /** 068 * Constructor RegexFilter creates a new RegexFilter instance. 069 * 070 * @param patternString of type String 071 * @param removeMatch of type boolean 072 */ 073 @ConstructorProperties({"patternString", "removeMatch"}) 074 public RegexFilter( String patternString, boolean removeMatch ) 075 { 076 super( patternString, removeMatch ); 077 this.matchEachElement = false; 078 079 } 080 081 /** 082 * @param patternString of type String 083 * @param removeMatch of type boolean, set to true if a match should be filtered 084 * @param matchEachElement of type boolean, set to true if each element should be matched individually 085 */ 086 @ConstructorProperties({"patternString", "removeMatch", "matchEachElement"}) 087 public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement ) 088 { 089 super( patternString, removeMatch ); 090 this.matchEachElement = matchEachElement; 091 } 092 093 public boolean isMatchEachElement() 094 { 095 return matchEachElement; 096 } 097 098 @Override 099 public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher> filterCall ) 100 { 101 if( matchEachElement ) 102 return matchEachElement( filterCall.getContext(), filterCall.getArguments() ); 103 else 104 return matchWholeTuple( filterCall.getContext(), filterCall.getArguments() ); 105 } 106 107 @Override 108 public boolean equals( Object object ) 109 { 110 if( this == object ) 111 return true; 112 if( !( object instanceof RegexFilter ) ) 113 return false; 114 if( !super.equals( object ) ) 115 return false; 116 117 RegexFilter that = (RegexFilter) object; 118 119 if( matchEachElement != that.matchEachElement ) 120 return false; 121 122 return true; 123 } 124 125 @Override 126 public int hashCode() 127 { 128 int result = super.hashCode(); 129 result = 31 * result + ( matchEachElement ? 1 : 0 ); 130 return result; 131 } 132 }