001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.operation.regex;
023
024import java.beans.ConstructorProperties;
025import java.util.regex.Matcher;
026
027import cascading.flow.FlowProcess;
028import cascading.operation.Filter;
029import cascading.operation.FilterCall;
030
031/**
032 * Class RegexFilter will apply the regex patternString against every input Tuple value and filter
033 * the Tuple stream accordingly.
034 * <p>
035 * By default, Tuples that match the given pattern are kept, and Tuples that do not
036 * match are filtered out. This can be changed by setting removeMatch to true.
037 * <p>
038 * Also, by default, the whole Tuple is matched against the given patternString (tab delimited, unless otherwise
039 * specified). If matchEachElement is set to true, the pattern is applied to each Tuple value individually.
040 * <p>
041 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method
042 * {@link java.util.regex.Matcher#find()}.
043 * <p>
044 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before
045 * the regex is applied.
046 * <p>
047 * Any Object value will be coerced to a String type via any provided {@link cascading.tuple.type.CoercibleType} on
048 * the argument selector or via its {@code toString()} method.
049 *
050 * @see java.util.regex.Matcher
051 * @see java.util.regex.Pattern
052 */
053public class RegexFilter extends RegexMatcher implements Filter<Matcher>
054  {
055  /** Field matchEachElement */
056  protected final boolean matchEachElement;
057
058  /**
059   * Constructor RegexFilter creates a new RegexFilter instance.
060   *
061   * @param patternString of type String
062   */
063  @ConstructorProperties({"patternString"})
064  public RegexFilter( String patternString )
065    {
066    super( patternString );
067    this.matchEachElement = false;
068    }
069
070  /**
071   * Constructor RegexFilter creates a new RegexFilter instance.
072   *
073   * @param patternString of type String
074   * @param delimiter     of type String
075   */
076  @ConstructorProperties({"patternString", "delimiter"})
077  public RegexFilter( String patternString, String delimiter )
078    {
079    super( patternString, delimiter );
080    this.matchEachElement = false;
081    }
082
083  /**
084   * Constructor RegexFilter creates a new RegexFilter instance.
085   *
086   * @param patternString of type String
087   * @param removeMatch   of type boolean
088   */
089  @ConstructorProperties({"patternString", "removeMatch"})
090  public RegexFilter( String patternString, boolean removeMatch )
091    {
092    super( patternString, removeMatch );
093    this.matchEachElement = false;
094    }
095
096  /**
097   * Constructor RegexFilter creates a new RegexFilter instance.
098   *
099   * @param patternString of type String
100   * @param removeMatch   of type boolean
101   * @param delimiter     of type String
102   */
103  @ConstructorProperties({"patternString", "removeMatch", "delimiter"})
104  public RegexFilter( String patternString, boolean removeMatch, String delimiter )
105    {
106    super( patternString, removeMatch, delimiter );
107    this.matchEachElement = false;
108
109    }
110
111  /**
112   * @param patternString    of type String
113   * @param removeMatch      of type boolean, set to true if a match should be filtered
114   * @param matchEachElement of type boolean, set to true if each element should be matched individually
115   */
116  @ConstructorProperties({"patternString", "removeMatch", "matchEachElement"})
117  public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement )
118    {
119    super( patternString, removeMatch );
120    this.matchEachElement = matchEachElement;
121    }
122
123  /**
124   * @param patternString    of type String
125   * @param removeMatch      of type boolean, set to true if a match should be filtered
126   * @param matchEachElement of type boolean, set to true if each element should be matched individually
127   * @param delimiter        of type String
128   */
129  @ConstructorProperties({"patternString", "removeMatch", "matchEachElement", "delimiter"})
130  public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement, String delimiter )
131    {
132    super( patternString, removeMatch, delimiter );
133    this.matchEachElement = matchEachElement;
134    }
135
136  public boolean isMatchEachElement()
137    {
138    return matchEachElement;
139    }
140
141  @Override
142  public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher> filterCall )
143    {
144    if( matchEachElement )
145      return matchEachElement( filterCall.getContext(), filterCall.getArguments() );
146    else
147      return matchWholeTuple( filterCall.getContext(), filterCall.getArguments() );
148    }
149
150  @Override
151  public boolean equals( Object object )
152    {
153    if( this == object )
154      return true;
155    if( !( object instanceof RegexFilter ) )
156      return false;
157    if( !super.equals( object ) )
158      return false;
159
160    RegexFilter that = (RegexFilter) object;
161
162    if( matchEachElement != that.matchEachElement )
163      return false;
164
165    return true;
166    }
167
168  @Override
169  public int hashCode()
170    {
171    int result = super.hashCode();
172    result = 31 * result + ( matchEachElement ? 1 : 0 );
173    return result;
174    }
175  }