001/* 002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.management.annotation; 022 023import java.net.URI; 024import java.util.Collections; 025import java.util.Set; 026import java.util.TreeSet; 027 028import cascading.util.Util; 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032/** 033 * URISanitizer is an implementation of the Sanitizer interface to sanitize URIs of different kinds 034 * (file, HTTP, HDFS, JDBC etc.) Depending on the visibility, the Sanitizer will return different values: 035 * <p/> 036 * For hierarchical URIs (jdbc://...): 037 * <ul> 038 * <li>PUBLIC: Only return the path of the URI</li> 039 * <li>PROTECTED: Same as PUBLIC + query parameters</li> 040 * <li>PRIVATE: Same as PROTECTED + URI scheme and authority (host/port)</li> 041 * </ul> 042 * <p/> 043 * For opaque URIs (mailto:someone@email.com): 044 * <ul> 045 * <li>PUBLIC: Only return the scheme of the URI, 'mailto:' etc</li> 046 * <li>PROTECTED: Same as PUBLIC</li> 047 * <li>PRIVATE: The whole URI</li> 048 * </ul> 049 * <p> 050 * Parameters containing sensitive information like user-names, passwords, API-keys etc. can be filtered out by setting 051 * the {@link cascading.management.annotation.URISanitizer#PARAMETER_FILTER_PROPERTY} System property to a comma separated 052 * list of names that should never show up in the {@link cascading.management.DocumentService}. Some systems may use 053 * non-standard URIs, which cannot be parsed by {@link java.net.URI}.</p> 054 * <p/> 055 * <p>If the sanitizer encounters one of those URIs it 056 * will catch the Exception and return an empty String. This can be overruled by setting the 057 * {@link cascading.management.annotation.URISanitizer#FAILURE_MODE_PASS_THROUGH} System property to <code>true</code>, 058 * which will cause the actual value being returned. <b>Note</b> that this might leak sensitive information to the 059 * {@link cascading.management.DocumentService}.</p> 060 */ 061public class URISanitizer implements Sanitizer 062 { 063 /** 064 * Logger. 065 */ 066 private static final Logger LOG = LoggerFactory.getLogger( URISanitizer.class ); 067 068 /** 069 * System property for listing URI parameters to be filtered out (usernames, passwords etc.) 070 * <p/> 071 * Value cases are ignored, thus {@code UserName} will be equivalent to {@code username}. 072 */ 073 public static final String PARAMETER_FILTER_PROPERTY = "cascading.management.annotation.urisanitizer.parameternames"; 074 075 /** System property to allow values to pass through a parse exception. */ 076 public static final String FAILURE_MODE_PASS_THROUGH = "cascading.management.annotation.urisanitizer.failurepassthrough"; 077 078 private Set<String> parametersToFilter; 079 080 public URISanitizer() 081 { 082 String parameterProperty = System.getProperty( PARAMETER_FILTER_PROPERTY ); 083 084 if( Util.isEmpty( parameterProperty ) ) 085 { 086 parametersToFilter = Collections.emptySet(); 087 } 088 else 089 { 090 // treat "UserName" equal to "username" 091 parametersToFilter = new TreeSet<String>( String.CASE_INSENSITIVE_ORDER ); 092 093 String[] parameterNames = parameterProperty.split( "," ); 094 095 for( String parameterName : parameterNames ) 096 { 097 if( parameterName != null ) 098 parameterName = parameterName.trim(); 099 100 if( !Util.isEmpty( parameterName ) ) 101 parametersToFilter.add( parameterName ); 102 } 103 } 104 } 105 106 @Override 107 public String apply( Visibility visibility, Object value ) 108 { 109 if( value == null ) 110 return null; 111 112 URI uri; 113 114 if( value instanceof URI ) 115 { 116 uri = (URI) value; 117 } 118 else 119 { 120 try 121 { 122 uri = URI.create( encode( value.toString() ) ); 123 } 124 catch( IllegalArgumentException exception ) 125 { 126 LOG.warn( "failed to parse uri: {}, message: {}", value, exception.getMessage() ); 127 LOG.debug( "failed to parse uri: {}", value, exception ); 128 129 if( Boolean.parseBoolean( System.getProperty( FAILURE_MODE_PASS_THROUGH ) ) ) 130 { 131 LOG.warn( "ignoring uri sanitizer failures, returning unsanitized value, property '{}' set to true", FAILURE_MODE_PASS_THROUGH ); 132 return value.toString(); 133 } 134 135 // return an empty string, to avoid the leakage of sensitive information. 136 LOG.info( "set property: '{}', to true to return unsanitized value, returning empty string", FAILURE_MODE_PASS_THROUGH ); 137 return ""; 138 } 139 } 140 141 if( uri.isOpaque() ) 142 { 143 switch( visibility ) 144 { 145 case PRIVATE: 146 return value.toString(); 147 case PROTECTED: 148 case PUBLIC: 149 return uri.getScheme() + ":"; 150 } 151 } 152 153 StringBuilder buffer = new StringBuilder(); 154 155 if( uri.getPath() != null ) // can happen according to the javadoc 156 buffer.append( uri.getPath() ); 157 158 if( ( visibility == Visibility.PROTECTED || visibility == Visibility.PRIVATE ) && uri.getQuery() != null ) 159 buffer.append( "?" ).append( sanitizeQuery( uri.getQuery() ) ); 160 161 if( visibility == Visibility.PRIVATE ) 162 { 163 String currentString = buffer.toString(); // preserve before creating a new instance 164 buffer = new StringBuilder(); 165 166 if( uri.getScheme() != null ) 167 buffer.append( uri.getScheme() ).append( "://" ); 168 169 if( uri.getAuthority() != null ) 170 buffer.append( uri.getAuthority() ); 171 172 buffer.append( currentString ); 173 } 174 175 return buffer.toString(); 176 } 177 178 private String encode( String input ) 179 { 180 String[] parts = input.split( "://", 2 ); 181 String protocol = ""; 182 String rest; 183 184 if( parts.length == 2 ) 185 protocol = parts[ 0 ]; 186 187 rest = parts[ parts.length - 1 ]; 188 189 rest = rest.replaceAll( "\\[", "%5B" ); 190 rest = rest.replaceAll( "\\]", "%5D" ); 191 rest = rest.replaceAll( "\\{", "%7B" ); 192 rest = rest.replaceAll( "\\}", "%7D" ); 193 rest = rest.replaceAll( "\\\\", "/" ); 194 rest = rest.replaceAll( ";", "%3B" ); 195 rest = rest.replaceAll( ",", "%2C" ); 196 197 StringBuilder builder = new StringBuilder(); 198 199 if( !protocol.isEmpty() ) 200 builder.append( protocol ).append( "://" ); 201 202 builder.append( rest ); 203 204 return builder.toString(); 205 } 206 207 private String sanitizeQuery( String query ) 208 { 209 StringBuilder buffer = new StringBuilder(); 210 String[] parts = query.split( "&" ); 211 212 for( String part : parts ) 213 { 214 String[] keyValuePair = part.split( "=" ); 215 String key = keyValuePair[ 0 ]; 216 217 if( parametersToFilter.contains( key ) ) 218 continue; 219 220 buffer.append( part ).append( "&" ); 221 } 222 223 return buffer.toString(); 224 } 225 }