001/*
002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.management.annotation;
022
023import java.net.URI;
024import java.util.Collections;
025import java.util.Set;
026import java.util.TreeSet;
027
028import cascading.util.Util;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032/**
033 * URISanitizer is an implementation of the Sanitizer interface to sanitize URIs of different kinds
034 * (file, HTTP, HDFS, JDBC etc.) Depending on the visibility, the Sanitizer will return different values:
035 * <ul>
036 * <li>PUBLIC: Only return the path of the URI</li>
037 * <li>PROTECTED: Same as PUBLIC + query parameters</li>
038 * <li>PRIVATE: Same as PROTECTED + URI scheme and authority (host/port)</li>
039 * </ul>
040 * <p/>
041 * <p>Parameters containing sensitive information like user-names, passwords, API-keys etc. can be filtered out by setting
042 * the {@link cascading.management.annotation.URISanitizer#PARAMETER_FILTER_PROPERTY} System property to a comma separated
043 * list of names that should never show up in the {@link cascading.management.DocumentService}. Some systems may use
044 * non-standard URIs, which cannot be parsed by {@link java.net.URI}.</p>
045 * <p/>
046 * <p>If the sanitizer encounters one of those URIs it
047 * will catch the Exception and return an empty String. This can be overruled by setting the
048 * {@link cascading.management.annotation.URISanitizer#FAILURE_MODE_PASS_THROUGH} System property to <code>true</code>,
049 * which will cause the actual value being returned. <b>Note</b> that this might leak sensitive information to the
050 * {@link cascading.management.DocumentService}.</p>
051 */
052public class URISanitizer implements Sanitizer
053  {
054  /**
055   * Logger.
056   */
057  private static final Logger LOG = LoggerFactory.getLogger( URISanitizer.class );
058
059  /**
060   * System property for listing URI parameters to be filtered out (usernames, passwords etc.)
061   * <p/>
062   * Value cases are ignored, thus {@code UserName} will be equivalent to {@code username}.
063   */
064  public static final String PARAMETER_FILTER_PROPERTY = "cascading.management.annotation.urisanitizer.parameternames";
065
066  /** System property to allow values to pass through a parse exception. */
067  public static final String FAILURE_MODE_PASS_THROUGH = "cascading.management.annotation.urisanitizer.failurepassthrough";
068
069  private Set<String> parametersToFilter;
070
071  public URISanitizer()
072    {
073    String parameterProperty = System.getProperty( PARAMETER_FILTER_PROPERTY );
074
075    if( Util.isEmpty( parameterProperty ) )
076      {
077      parametersToFilter = Collections.emptySet();
078      }
079    else
080      {
081      // treat "UserName" equal to "username"
082      parametersToFilter = new TreeSet<String>( String.CASE_INSENSITIVE_ORDER );
083
084      String[] parameterNames = parameterProperty.split( "," );
085
086      for( String parameterName : parameterNames )
087        {
088        if( parameterName != null )
089          parameterName = parameterName.trim();
090
091        if( !Util.isEmpty( parameterName ) )
092          parametersToFilter.add( parameterName );
093        }
094      }
095    }
096
097  @Override
098  public String apply( Visibility visibility, Object value )
099    {
100    if( value == null )
101      return null;
102
103    URI uri;
104
105    if( value instanceof URI )
106      {
107      uri = (URI) value;
108      }
109    else
110      {
111      try
112        {
113        uri = URI.create( encode( value.toString() ) );
114        }
115      catch( IllegalArgumentException exception )
116        {
117        LOG.warn( "failed to parse uri: {}, message: {}", value, exception.getMessage() );
118        LOG.debug( "failed to parse uri: {}", value, exception );
119
120        if( Boolean.parseBoolean( System.getProperty( FAILURE_MODE_PASS_THROUGH ) ) )
121          {
122          LOG.warn( "ignoring uri sanitizer failures, returning unsanitized value, property '{}' set to true", FAILURE_MODE_PASS_THROUGH );
123          return value.toString();
124          }
125
126        // return an empty string, to avoid the leakage of sensitive information.
127        LOG.info( "set property: '{}', to true to return unsanitized value, returning empty string", FAILURE_MODE_PASS_THROUGH );
128        return "";
129        }
130      }
131
132    StringBuilder buffer = new StringBuilder();
133
134    if( uri.getPath() != null ) // can happen according to the javadoc
135      buffer.append( uri.getPath() );
136
137    if( ( visibility == Visibility.PROTECTED || visibility == Visibility.PRIVATE ) && uri.getQuery() != null )
138      buffer.append( "?" ).append( sanitizeQuery( uri.getQuery() ) );
139
140    if( visibility == Visibility.PRIVATE )
141      {
142      String currentString = buffer.toString(); // preserve before creating a new instance
143      buffer = new StringBuilder();
144
145      if( uri.getScheme() != null )
146        buffer.append( uri.getScheme() ).append( "://" );
147
148      if( uri.getAuthority() != null )
149        buffer.append( uri.getAuthority() );
150
151      buffer.append( currentString );
152      }
153
154    return buffer.toString();
155    }
156
157  private String encode( String input )
158    {
159    input = input.replaceAll( "\\[", "%5B" );
160    input = input.replaceAll( "\\]", "%5D" );
161    input = input.replaceAll( "\\{", "%7B" );
162    input = input.replaceAll( "\\}", "%7D" );
163
164    return input;
165    }
166
167  private String sanitizeQuery( String query )
168    {
169    StringBuilder buffer = new StringBuilder();
170    String[] parts = query.split( "&" );
171
172    for( String part : parts )
173      {
174      String[] keyValuePair = part.split( "=" );
175      String key = keyValuePair[ 0 ];
176
177      if( parametersToFilter.contains( key ) )
178        continue;
179
180      buffer.append( part ).append( "&" );
181      }
182
183    return buffer.toString();
184    }
185  }