001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.operation.regex;
022
023import java.beans.ConstructorProperties;
024import java.util.regex.Matcher;
025
026import cascading.flow.FlowProcess;
027import cascading.operation.OperationCall;
028import cascading.tuple.Fields;
029import cascading.tuple.TupleEntry;
030import cascading.util.Util;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034/**
035 * Class RegexMatcher is the base class for common regular expression operations.
036 * <p/>
037 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}.
038 *
039 * @see java.util.regex.Matcher
040 * @see java.util.regex.Pattern
041 */
042public class RegexMatcher extends RegexOperation<Matcher>
043  {
044  /** Field LOG */
045  private static final Logger LOG = LoggerFactory.getLogger( RegexMatcher.class );
046
047  public static final String DEFAULT_DELIM = "\t";
048
049  protected final boolean negateMatch;
050  protected final String delimiter;
051
052  @ConstructorProperties({"patternString"})
053  protected RegexMatcher( String patternString )
054    {
055    super( patternString );
056    this.negateMatch = false;
057    this.delimiter = DEFAULT_DELIM;
058    }
059
060  @ConstructorProperties({"patternString", "delimiter"})
061  protected RegexMatcher( String patternString, String delimiter )
062    {
063    super( patternString );
064    this.negateMatch = false;
065    this.delimiter = delimiter;
066
067    if( this.delimiter == null )
068      throw new IllegalArgumentException( "delimiter may not be null" );
069    }
070
071  @ConstructorProperties({"patternString", "negateMatch"})
072  protected RegexMatcher( String patternString, boolean negateMatch )
073    {
074    super( patternString );
075    this.negateMatch = negateMatch;
076    this.delimiter = DEFAULT_DELIM;
077    }
078
079  @ConstructorProperties({"patternString", "negateMatch", "delimiter"})
080  protected RegexMatcher( String patternString, boolean negateMatch, String delimiter )
081    {
082    super( patternString );
083    this.negateMatch = negateMatch;
084    this.delimiter = delimiter;
085
086    if( this.delimiter == null )
087      throw new IllegalArgumentException( "delimiter may not be null" );
088    }
089
090  @ConstructorProperties({"fieldDeclaration", "patternString"})
091  protected RegexMatcher( Fields fieldDeclaration, String patternString )
092    {
093    super( ANY, fieldDeclaration, patternString );
094    this.negateMatch = false;
095    this.delimiter = DEFAULT_DELIM;
096
097    verify();
098    }
099
100  @ConstructorProperties({"fieldDeclaration", "patternString", "delimiter"})
101  protected RegexMatcher( Fields fieldDeclaration, String patternString, String delimiter )
102    {
103    super( ANY, fieldDeclaration, patternString );
104    this.negateMatch = false;
105    this.delimiter = delimiter;
106
107    if( this.delimiter == null )
108      throw new IllegalArgumentException( "delimiter may not be null" );
109
110    verify();
111    }
112
113  @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch"})
114  protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch )
115    {
116    super( ANY, fieldDeclaration, patternString );
117    this.negateMatch = negateMatch;
118    this.delimiter = DEFAULT_DELIM;
119
120    verify();
121    }
122
123  @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch", "delimiter"})
124  protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch, String delimiter )
125    {
126    super( ANY, fieldDeclaration, patternString );
127    this.negateMatch = negateMatch;
128    this.delimiter = delimiter;
129
130    if( this.delimiter == null )
131      throw new IllegalArgumentException( "delimiter may not be null" );
132
133    verify();
134    }
135
136  public final boolean isNegateMatch()
137    {
138    return negateMatch;
139    }
140
141  public final String getDelimiter()
142    {
143    return delimiter;
144    }
145
146  private void verify()
147    {
148    if( fieldDeclaration.size() != 1 )
149      throw new IllegalArgumentException( "num fields in fieldDeclaration must be one, found: " + fieldDeclaration.printVerbose() );
150    }
151
152  @Override
153  public void prepare( FlowProcess flowProcess, OperationCall<Matcher> operationCall )
154    {
155    operationCall.setContext( getPattern().matcher( "" ) );
156    }
157
158  protected boolean matchWholeTuple( Matcher matcher, TupleEntry input )
159    {
160    Iterable<String> iterable = input.asIterableOf( String.class );
161    String join = Util.join( iterable, delimiter, false );
162
163    matcher.reset( join );
164
165    boolean matchFound = matcher.find();
166
167    if( LOG.isDebugEnabled() )
168      LOG.debug( "pattern: {}, matches: {}", getPatternString(), matchFound );
169
170    return matchFound == negateMatch;
171    }
172
173  protected boolean matchEachElement( Matcher matcher, TupleEntry input )
174    {
175    return matchEachElementPos( matcher, input ) != -1;
176    }
177
178  protected int matchEachElementPos( Matcher matcher, TupleEntry input )
179    {
180    int pos = 0;
181
182    for( int i = 0; i < input.size(); i++ )
183      {
184      String value = input.getString( i );
185
186      if( value == null )
187        value = "";
188
189      matcher.reset( value );
190
191      boolean matchFound = matcher.find();
192
193      if( LOG.isDebugEnabled() )
194        LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" );
195
196      if( matchFound == negateMatch )
197        return pos;
198
199      pos++;
200      }
201
202    return -1;
203    }
204
205  @Override
206  public boolean equals( Object object )
207    {
208    if( this == object )
209      return true;
210    if( !( object instanceof RegexMatcher ) )
211      return false;
212    if( !super.equals( object ) )
213      return false;
214
215    RegexMatcher that = (RegexMatcher) object;
216
217    if( negateMatch != that.negateMatch )
218      return false;
219
220    return !( delimiter != null ? !delimiter.equals( that.delimiter ) : that.delimiter != null );
221    }
222
223  @Override
224  public int hashCode()
225    {
226    int result = super.hashCode();
227    result = 31 * result + ( negateMatch ? 1 : 0 );
228    result = 31 * result + ( delimiter != null ? delimiter.hashCode() : 0 );
229    return result;
230    }
231  }