001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.operation.regex; 023 024import java.beans.ConstructorProperties; 025import java.util.regex.Matcher; 026 027import cascading.flow.FlowProcess; 028import cascading.operation.Filter; 029import cascading.operation.FilterCall; 030 031/** 032 * Class RegexFilter will apply the regex patternString against every input Tuple value and filter 033 * the Tuple stream accordingly. 034 * <p> 035 * By default, Tuples that match the given pattern are kept, and Tuples that do not 036 * match are filtered out. This can be changed by setting removeMatch to true. 037 * <p> 038 * Also, by default, the whole Tuple is matched against the given patternString (tab delimited, unless otherwise 039 * specified). If matchEachElement is set to true, the pattern is applied to each Tuple value individually. 040 * <p> 041 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method 042 * {@link java.util.regex.Matcher#find()}. 043 * <p> 044 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before 045 * the regex is applied. 046 * <p> 047 * Any Object value will be coerced to a String type via any provided {@link cascading.tuple.type.CoercibleType} on 048 * the argument selector or via its {@code toString()} method. 049 * 050 * @see java.util.regex.Matcher 051 * @see java.util.regex.Pattern 052 */ 053public class RegexFilter extends RegexMatcher implements Filter<Matcher> 054 { 055 /** Field matchEachElement */ 056 protected final boolean matchEachElement; 057 058 /** 059 * Constructor RegexFilter creates a new RegexFilter instance. 060 * 061 * @param patternString of type String 062 */ 063 @ConstructorProperties({"patternString"}) 064 public RegexFilter( String patternString ) 065 { 066 super( patternString ); 067 this.matchEachElement = false; 068 } 069 070 /** 071 * Constructor RegexFilter creates a new RegexFilter instance. 072 * 073 * @param patternString of type String 074 * @param delimiter of type String 075 */ 076 @ConstructorProperties({"patternString", "delimiter"}) 077 public RegexFilter( String patternString, String delimiter ) 078 { 079 super( patternString, delimiter ); 080 this.matchEachElement = false; 081 } 082 083 /** 084 * Constructor RegexFilter creates a new RegexFilter instance. 085 * 086 * @param patternString of type String 087 * @param removeMatch of type boolean 088 */ 089 @ConstructorProperties({"patternString", "removeMatch"}) 090 public RegexFilter( String patternString, boolean removeMatch ) 091 { 092 super( patternString, removeMatch ); 093 this.matchEachElement = false; 094 } 095 096 /** 097 * Constructor RegexFilter creates a new RegexFilter instance. 098 * 099 * @param patternString of type String 100 * @param removeMatch of type boolean 101 * @param delimiter of type String 102 */ 103 @ConstructorProperties({"patternString", "removeMatch", "delimiter"}) 104 public RegexFilter( String patternString, boolean removeMatch, String delimiter ) 105 { 106 super( patternString, removeMatch, delimiter ); 107 this.matchEachElement = false; 108 109 } 110 111 /** 112 * @param patternString of type String 113 * @param removeMatch of type boolean, set to true if a match should be filtered 114 * @param matchEachElement of type boolean, set to true if each element should be matched individually 115 */ 116 @ConstructorProperties({"patternString", "removeMatch", "matchEachElement"}) 117 public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement ) 118 { 119 super( patternString, removeMatch ); 120 this.matchEachElement = matchEachElement; 121 } 122 123 /** 124 * @param patternString of type String 125 * @param removeMatch of type boolean, set to true if a match should be filtered 126 * @param matchEachElement of type boolean, set to true if each element should be matched individually 127 * @param delimiter of type String 128 */ 129 @ConstructorProperties({"patternString", "removeMatch", "matchEachElement", "delimiter"}) 130 public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement, String delimiter ) 131 { 132 super( patternString, removeMatch, delimiter ); 133 this.matchEachElement = matchEachElement; 134 } 135 136 public boolean isMatchEachElement() 137 { 138 return matchEachElement; 139 } 140 141 @Override 142 public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher> filterCall ) 143 { 144 if( matchEachElement ) 145 return matchEachElement( filterCall.getContext(), filterCall.getArguments() ); 146 else 147 return matchWholeTuple( filterCall.getContext(), filterCall.getArguments() ); 148 } 149 150 @Override 151 public boolean equals( Object object ) 152 { 153 if( this == object ) 154 return true; 155 if( !( object instanceof RegexFilter ) ) 156 return false; 157 if( !super.equals( object ) ) 158 return false; 159 160 RegexFilter that = (RegexFilter) object; 161 162 if( matchEachElement != that.matchEachElement ) 163 return false; 164 165 return true; 166 } 167 168 @Override 169 public int hashCode() 170 { 171 int result = super.hashCode(); 172 result = 31 * result + ( matchEachElement ? 1 : 0 ); 173 return result; 174 } 175 }