001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.management.annotation;
023
024import java.net.URI;
025import java.util.Collections;
026import java.util.Set;
027import java.util.TreeSet;
028
029import cascading.util.Util;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033/**
034 * URISanitizer is an implementation of the Sanitizer interface to sanitize URIs of different kinds
035 * (file, HTTP, HDFS, JDBC etc.) Depending on the visibility, the Sanitizer will return different values:
036 * <p>
037 * For hierarchical URIs (jdbc://...):
038 * <ul>
039 * <li>PUBLIC: Only return the path of the URI</li>
040 * <li>PROTECTED: Same as PUBLIC + query parameters</li>
041 * <li>PRIVATE: Same as PROTECTED + URI scheme and authority (host/port)</li>
042 * </ul>
043 * <p>
044 * For opaque URIs (mailto:someone@email.com):
045 * <ul>
046 * <li>PUBLIC: Only return the scheme of the URI, 'mailto:' etc</li>
047 * <li>PROTECTED: Same as PUBLIC</li>
048 * <li>PRIVATE: The whole URI</li>
049 * </ul>
050 * <p>
051 * Parameters containing sensitive information like user-names, passwords, API-keys etc. can be filtered out by setting
052 * the {@link cascading.management.annotation.URISanitizer#PARAMETER_FILTER_PROPERTY} System property to a comma separated
053 * list of names that should never show up in the {@link cascading.management.DocumentService}. Some systems may use
054 * non-standard URIs, which cannot be parsed by {@link java.net.URI}.
055 * <p>
056 * If the sanitizer encounters one of those URIs it
057 * will catch the Exception and return an empty String. This can be overruled by setting the
058 * {@link cascading.management.annotation.URISanitizer#FAILURE_MODE_PASS_THROUGH} System property to {@code true},
059 * which will cause the actual value being returned. <b>Note</b> that this might leak sensitive information to the
060 * {@link cascading.management.DocumentService}.
061 */
062public class URISanitizer implements Sanitizer
063  {
064  /**
065   * Logger.
066   */
067  private static final Logger LOG = LoggerFactory.getLogger( URISanitizer.class );
068
069  /**
070   * System property for listing URI parameters to be filtered out (usernames, passwords etc.)
071   * <p>
072   * Value cases are ignored, thus {@code UserName} will be equivalent to {@code username}.
073   */
074  public static final String PARAMETER_FILTER_PROPERTY = "cascading.management.annotation.urisanitizer.parameternames";
075
076  /** System property to allow values to pass through a parse exception. */
077  public static final String FAILURE_MODE_PASS_THROUGH = "cascading.management.annotation.urisanitizer.failurepassthrough";
078
079  private Set<String> parametersToFilter;
080
081  public URISanitizer()
082    {
083    String parameterProperty = System.getProperty( PARAMETER_FILTER_PROPERTY );
084
085    if( Util.isEmpty( parameterProperty ) )
086      {
087      parametersToFilter = Collections.emptySet();
088      }
089    else
090      {
091      // treat "UserName" equal to "username"
092      parametersToFilter = new TreeSet<String>( String.CASE_INSENSITIVE_ORDER );
093
094      String[] parameterNames = parameterProperty.split( "," );
095
096      for( String parameterName : parameterNames )
097        {
098        if( parameterName != null )
099          parameterName = parameterName.trim();
100
101        if( !Util.isEmpty( parameterName ) )
102          parametersToFilter.add( parameterName );
103        }
104      }
105    }
106
107  @Override
108  public String apply( Visibility visibility, Object value )
109    {
110    if( value == null )
111      return null;
112
113    URI uri;
114
115    if( value instanceof URI )
116      {
117      uri = (URI) value;
118      }
119    else
120      {
121      try
122        {
123        uri = URI.create( encode( value.toString() ) );
124        }
125      catch( IllegalArgumentException exception )
126        {
127        LOG.warn( "failed to parse uri: {}, message: {}", value, exception.getMessage() );
128        LOG.debug( "failed to parse uri: {}", value, exception );
129
130        if( Boolean.parseBoolean( System.getProperty( FAILURE_MODE_PASS_THROUGH ) ) )
131          {
132          LOG.warn( "ignoring uri sanitizer failures, returning unsanitized value, property '{}' set to true", FAILURE_MODE_PASS_THROUGH );
133          return value.toString();
134          }
135
136        // return an empty string, to avoid the leakage of sensitive information.
137        LOG.info( "set property: '{}', to true to return unsanitized value, returning empty string", FAILURE_MODE_PASS_THROUGH );
138        return "";
139        }
140      }
141
142    if( uri.isOpaque() )
143      {
144      switch( visibility )
145        {
146        case PRIVATE:
147          return value.toString();
148        case PROTECTED:
149        case PUBLIC:
150          return uri.getScheme() + ":";
151        }
152      }
153
154    StringBuilder buffer = new StringBuilder();
155
156    if( uri.getPath() != null ) // can happen according to the javadoc
157      buffer.append( uri.getPath() );
158
159    if( ( visibility == Visibility.PROTECTED || visibility == Visibility.PRIVATE ) && uri.getQuery() != null )
160      buffer.append( "?" ).append( sanitizeQuery( uri.getQuery() ) );
161
162    if( visibility == Visibility.PRIVATE )
163      {
164      String currentString = buffer.toString(); // preserve before creating a new instance
165      buffer = new StringBuilder();
166
167      if( uri.getScheme() != null )
168        buffer.append( uri.getScheme() ).append( "://" );
169
170      if( uri.getAuthority() != null )
171        buffer.append( uri.getAuthority() );
172
173      buffer.append( currentString );
174      }
175
176    return buffer.toString();
177    }
178
179  private String encode( String input )
180    {
181    String[] parts = input.split( "://", 2 );
182    String protocol = "";
183    String rest;
184
185    if( parts.length == 2 )
186      protocol = parts[ 0 ];
187
188    rest = parts[ parts.length - 1 ];
189
190    rest = rest.replaceAll( "\\[", "%5B" );
191    rest = rest.replaceAll( "\\]", "%5D" );
192    rest = rest.replaceAll( "\\{", "%7B" );
193    rest = rest.replaceAll( "\\}", "%7D" );
194    rest = rest.replaceAll( "\\\\", "/" );
195    rest = rest.replaceAll( ";", "%3B" );
196    rest = rest.replaceAll( ",", "%2C" );
197
198    StringBuilder builder = new StringBuilder();
199
200    if( !protocol.isEmpty() )
201      builder.append( protocol ).append( "://" );
202
203    builder.append( rest );
204
205    return builder.toString();
206    }
207
208  private String sanitizeQuery( String query )
209    {
210    StringBuilder buffer = new StringBuilder();
211    String[] parts = query.split( "&" );
212
213    for( String part : parts )
214      {
215      String[] keyValuePair = part.split( "=" );
216      String key = keyValuePair[ 0 ];
217
218      if( parametersToFilter.contains( key ) )
219        continue;
220
221      buffer.append( part ).append( "&" );
222      }
223
224    return buffer.toString();
225    }
226  }