001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.operation.regex; 023 024import java.beans.ConstructorProperties; 025import java.util.regex.Matcher; 026 027import cascading.flow.FlowProcess; 028import cascading.operation.OperationCall; 029import cascading.tuple.Fields; 030import cascading.tuple.TupleEntry; 031import cascading.util.Util; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035/** 036 * Class RegexMatcher is the base class for common regular expression operations. 037 * <p> 038 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}. 039 * 040 * @see java.util.regex.Matcher 041 * @see java.util.regex.Pattern 042 */ 043public class RegexMatcher extends RegexOperation<Matcher> 044 { 045 /** Field LOG */ 046 private static final Logger LOG = LoggerFactory.getLogger( RegexMatcher.class ); 047 048 public static final String DEFAULT_DELIM = "\t"; 049 050 protected final boolean negateMatch; 051 protected final String delimiter; 052 053 @ConstructorProperties({"patternString"}) 054 protected RegexMatcher( String patternString ) 055 { 056 super( patternString ); 057 this.negateMatch = false; 058 this.delimiter = DEFAULT_DELIM; 059 } 060 061 @ConstructorProperties({"patternString", "delimiter"}) 062 protected RegexMatcher( String patternString, String delimiter ) 063 { 064 super( patternString ); 065 this.negateMatch = false; 066 this.delimiter = delimiter; 067 068 if( this.delimiter == null ) 069 throw new IllegalArgumentException( "delimiter may not be null" ); 070 } 071 072 @ConstructorProperties({"patternString", "negateMatch"}) 073 protected RegexMatcher( String patternString, boolean negateMatch ) 074 { 075 super( patternString ); 076 this.negateMatch = negateMatch; 077 this.delimiter = DEFAULT_DELIM; 078 } 079 080 @ConstructorProperties({"patternString", "negateMatch", "delimiter"}) 081 protected RegexMatcher( String patternString, boolean negateMatch, String delimiter ) 082 { 083 super( patternString ); 084 this.negateMatch = negateMatch; 085 this.delimiter = delimiter; 086 087 if( this.delimiter == null ) 088 throw new IllegalArgumentException( "delimiter may not be null" ); 089 } 090 091 @ConstructorProperties({"fieldDeclaration", "patternString"}) 092 protected RegexMatcher( Fields fieldDeclaration, String patternString ) 093 { 094 super( ANY, fieldDeclaration, patternString ); 095 this.negateMatch = false; 096 this.delimiter = DEFAULT_DELIM; 097 098 verify(); 099 } 100 101 @ConstructorProperties({"fieldDeclaration", "patternString", "delimiter"}) 102 protected RegexMatcher( Fields fieldDeclaration, String patternString, String delimiter ) 103 { 104 super( ANY, fieldDeclaration, patternString ); 105 this.negateMatch = false; 106 this.delimiter = delimiter; 107 108 if( this.delimiter == null ) 109 throw new IllegalArgumentException( "delimiter may not be null" ); 110 111 verify(); 112 } 113 114 @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch"}) 115 protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch ) 116 { 117 super( ANY, fieldDeclaration, patternString ); 118 this.negateMatch = negateMatch; 119 this.delimiter = DEFAULT_DELIM; 120 121 verify(); 122 } 123 124 @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch", "delimiter"}) 125 protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch, String delimiter ) 126 { 127 super( ANY, fieldDeclaration, patternString ); 128 this.negateMatch = negateMatch; 129 this.delimiter = delimiter; 130 131 if( this.delimiter == null ) 132 throw new IllegalArgumentException( "delimiter may not be null" ); 133 134 verify(); 135 } 136 137 public final boolean isNegateMatch() 138 { 139 return negateMatch; 140 } 141 142 public final String getDelimiter() 143 { 144 return delimiter; 145 } 146 147 private void verify() 148 { 149 if( fieldDeclaration.size() != 1 ) 150 throw new IllegalArgumentException( "num fields in fieldDeclaration must be one, found: " + fieldDeclaration.printVerbose() ); 151 } 152 153 @Override 154 public void prepare( FlowProcess flowProcess, OperationCall<Matcher> operationCall ) 155 { 156 operationCall.setContext( getPattern().matcher( "" ) ); 157 } 158 159 protected boolean matchWholeTuple( Matcher matcher, TupleEntry input ) 160 { 161 Iterable<String> iterable = input.asIterableOf( String.class ); 162 String join = Util.join( iterable, delimiter, false ); 163 164 matcher.reset( join ); 165 166 boolean matchFound = matcher.find(); 167 168 if( LOG.isDebugEnabled() ) 169 LOG.debug( "pattern: {}, matches: {}", getPatternString(), matchFound ); 170 171 return matchFound == negateMatch; 172 } 173 174 protected boolean matchEachElement( Matcher matcher, TupleEntry input ) 175 { 176 return matchEachElementPos( matcher, input ) != -1; 177 } 178 179 protected int matchEachElementPos( Matcher matcher, TupleEntry input ) 180 { 181 int pos = 0; 182 183 for( int i = 0; i < input.size(); i++ ) 184 { 185 String value = input.getString( i ); 186 187 if( value == null ) 188 value = ""; 189 190 matcher.reset( value ); 191 192 boolean matchFound = matcher.find(); 193 194 if( LOG.isDebugEnabled() ) 195 LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" ); 196 197 if( matchFound == negateMatch ) 198 return pos; 199 200 pos++; 201 } 202 203 return -1; 204 } 205 206 @Override 207 public boolean equals( Object object ) 208 { 209 if( this == object ) 210 return true; 211 if( !( object instanceof RegexMatcher ) ) 212 return false; 213 if( !super.equals( object ) ) 214 return false; 215 216 RegexMatcher that = (RegexMatcher) object; 217 218 if( negateMatch != that.negateMatch ) 219 return false; 220 221 return !( delimiter != null ? !delimiter.equals( that.delimiter ) : that.delimiter != null ); 222 } 223 224 @Override 225 public int hashCode() 226 { 227 int result = super.hashCode(); 228 result = 31 * result + ( negateMatch ? 1 : 0 ); 229 result = 31 * result + ( delimiter != null ? delimiter.hashCode() : 0 ); 230 return result; 231 } 232 }