001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.operation.regex;
023
024import java.beans.ConstructorProperties;
025import java.util.regex.Matcher;
026
027import cascading.flow.FlowProcess;
028import cascading.operation.OperationCall;
029import cascading.tuple.Fields;
030import cascading.tuple.TupleEntry;
031import cascading.util.Util;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035/**
036 * Class RegexMatcher is the base class for common regular expression operations.
037 * <p>
038 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}.
039 *
040 * @see java.util.regex.Matcher
041 * @see java.util.regex.Pattern
042 */
043public class RegexMatcher extends RegexOperation<Matcher>
044  {
045  /** Field LOG */
046  private static final Logger LOG = LoggerFactory.getLogger( RegexMatcher.class );
047
048  public static final String DEFAULT_DELIM = "\t";
049
050  protected final boolean negateMatch;
051  protected final String delimiter;
052
053  @ConstructorProperties({"patternString"})
054  protected RegexMatcher( String patternString )
055    {
056    super( patternString );
057    this.negateMatch = false;
058    this.delimiter = DEFAULT_DELIM;
059    }
060
061  @ConstructorProperties({"patternString", "delimiter"})
062  protected RegexMatcher( String patternString, String delimiter )
063    {
064    super( patternString );
065    this.negateMatch = false;
066    this.delimiter = delimiter;
067
068    if( this.delimiter == null )
069      throw new IllegalArgumentException( "delimiter may not be null" );
070    }
071
072  @ConstructorProperties({"patternString", "negateMatch"})
073  protected RegexMatcher( String patternString, boolean negateMatch )
074    {
075    super( patternString );
076    this.negateMatch = negateMatch;
077    this.delimiter = DEFAULT_DELIM;
078    }
079
080  @ConstructorProperties({"patternString", "negateMatch", "delimiter"})
081  protected RegexMatcher( String patternString, boolean negateMatch, String delimiter )
082    {
083    super( patternString );
084    this.negateMatch = negateMatch;
085    this.delimiter = delimiter;
086
087    if( this.delimiter == null )
088      throw new IllegalArgumentException( "delimiter may not be null" );
089    }
090
091  @ConstructorProperties({"fieldDeclaration", "patternString"})
092  protected RegexMatcher( Fields fieldDeclaration, String patternString )
093    {
094    super( ANY, fieldDeclaration, patternString );
095    this.negateMatch = false;
096    this.delimiter = DEFAULT_DELIM;
097
098    verify();
099    }
100
101  @ConstructorProperties({"fieldDeclaration", "patternString", "delimiter"})
102  protected RegexMatcher( Fields fieldDeclaration, String patternString, String delimiter )
103    {
104    super( ANY, fieldDeclaration, patternString );
105    this.negateMatch = false;
106    this.delimiter = delimiter;
107
108    if( this.delimiter == null )
109      throw new IllegalArgumentException( "delimiter may not be null" );
110
111    verify();
112    }
113
114  @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch"})
115  protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch )
116    {
117    super( ANY, fieldDeclaration, patternString );
118    this.negateMatch = negateMatch;
119    this.delimiter = DEFAULT_DELIM;
120
121    verify();
122    }
123
124  @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch", "delimiter"})
125  protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch, String delimiter )
126    {
127    super( ANY, fieldDeclaration, patternString );
128    this.negateMatch = negateMatch;
129    this.delimiter = delimiter;
130
131    if( this.delimiter == null )
132      throw new IllegalArgumentException( "delimiter may not be null" );
133
134    verify();
135    }
136
137  public final boolean isNegateMatch()
138    {
139    return negateMatch;
140    }
141
142  public final String getDelimiter()
143    {
144    return delimiter;
145    }
146
147  private void verify()
148    {
149    if( fieldDeclaration.size() != 1 )
150      throw new IllegalArgumentException( "num fields in fieldDeclaration must be one, found: " + fieldDeclaration.printVerbose() );
151    }
152
153  @Override
154  public void prepare( FlowProcess flowProcess, OperationCall<Matcher> operationCall )
155    {
156    operationCall.setContext( getPattern().matcher( "" ) );
157    }
158
159  protected boolean matchWholeTuple( Matcher matcher, TupleEntry input )
160    {
161    Iterable<String> iterable = input.asIterableOf( String.class );
162    String join = Util.join( iterable, delimiter, false );
163
164    matcher.reset( join );
165
166    boolean matchFound = matcher.find();
167
168    if( LOG.isDebugEnabled() )
169      LOG.debug( "pattern: {}, matches: {}", getPatternString(), matchFound );
170
171    return matchFound == negateMatch;
172    }
173
174  protected boolean matchEachElement( Matcher matcher, TupleEntry input )
175    {
176    return matchEachElementPos( matcher, input ) != -1;
177    }
178
179  protected int matchEachElementPos( Matcher matcher, TupleEntry input )
180    {
181    int pos = 0;
182
183    for( int i = 0; i < input.size(); i++ )
184      {
185      String value = input.getString( i );
186
187      if( value == null )
188        value = "";
189
190      matcher.reset( value );
191
192      boolean matchFound = matcher.find();
193
194      if( LOG.isDebugEnabled() )
195        LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" );
196
197      if( matchFound == negateMatch )
198        return pos;
199
200      pos++;
201      }
202
203    return -1;
204    }
205
206  @Override
207  public boolean equals( Object object )
208    {
209    if( this == object )
210      return true;
211    if( !( object instanceof RegexMatcher ) )
212      return false;
213    if( !super.equals( object ) )
214      return false;
215
216    RegexMatcher that = (RegexMatcher) object;
217
218    if( negateMatch != that.negateMatch )
219      return false;
220
221    return !( delimiter != null ? !delimiter.equals( that.delimiter ) : that.delimiter != null );
222    }
223
224  @Override
225  public int hashCode()
226    {
227    int result = super.hashCode();
228    result = 31 * result + ( negateMatch ? 1 : 0 );
229    result = 31 * result + ( delimiter != null ? delimiter.hashCode() : 0 );
230    return result;
231    }
232  }