001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.management.annotation; 023 024import java.net.URI; 025import java.util.Collections; 026import java.util.Set; 027import java.util.TreeSet; 028 029import cascading.util.Util; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033/** 034 * URISanitizer is an implementation of the Sanitizer interface to sanitize URIs of different kinds 035 * (file, HTTP, HDFS, JDBC etc.) Depending on the visibility, the Sanitizer will return different values: 036 * <p> 037 * For hierarchical URIs (jdbc://...): 038 * <ul> 039 * <li>PUBLIC: Only return the path of the URI</li> 040 * <li>PROTECTED: Same as PUBLIC + query parameters</li> 041 * <li>PRIVATE: Same as PROTECTED + URI scheme and authority (host/port)</li> 042 * </ul> 043 * <p> 044 * For opaque URIs (mailto:someone@email.com): 045 * <ul> 046 * <li>PUBLIC: Only return the scheme of the URI, 'mailto:' etc</li> 047 * <li>PROTECTED: Same as PUBLIC</li> 048 * <li>PRIVATE: The whole URI</li> 049 * </ul> 050 * <p> 051 * Parameters containing sensitive information like user-names, passwords, API-keys etc. can be filtered out by setting 052 * the {@link cascading.management.annotation.URISanitizer#PARAMETER_FILTER_PROPERTY} System property to a comma separated 053 * list of names that should never show up in the {@link cascading.management.DocumentService}. Some systems may use 054 * non-standard URIs, which cannot be parsed by {@link java.net.URI}. 055 * <p> 056 * If the sanitizer encounters one of those URIs it 057 * will catch the Exception and return an empty String. This can be overruled by setting the 058 * {@link cascading.management.annotation.URISanitizer#FAILURE_MODE_PASS_THROUGH} System property to {@code true}, 059 * which will cause the actual value being returned. <b>Note</b> that this might leak sensitive information to the 060 * {@link cascading.management.DocumentService}. 061 */ 062public class URISanitizer implements Sanitizer 063 { 064 /** 065 * Logger. 066 */ 067 private static final Logger LOG = LoggerFactory.getLogger( URISanitizer.class ); 068 069 /** 070 * System property for listing URI parameters to be filtered out (usernames, passwords etc.) 071 * <p> 072 * Value cases are ignored, thus {@code UserName} will be equivalent to {@code username}. 073 */ 074 public static final String PARAMETER_FILTER_PROPERTY = "cascading.management.annotation.urisanitizer.parameternames"; 075 076 /** System property to allow values to pass through a parse exception. */ 077 public static final String FAILURE_MODE_PASS_THROUGH = "cascading.management.annotation.urisanitizer.failurepassthrough"; 078 079 private Set<String> parametersToFilter; 080 081 public URISanitizer() 082 { 083 String parameterProperty = System.getProperty( PARAMETER_FILTER_PROPERTY ); 084 085 if( Util.isEmpty( parameterProperty ) ) 086 { 087 parametersToFilter = Collections.emptySet(); 088 } 089 else 090 { 091 // treat "UserName" equal to "username" 092 parametersToFilter = new TreeSet<String>( String.CASE_INSENSITIVE_ORDER ); 093 094 String[] parameterNames = parameterProperty.split( "," ); 095 096 for( String parameterName : parameterNames ) 097 { 098 if( parameterName != null ) 099 parameterName = parameterName.trim(); 100 101 if( !Util.isEmpty( parameterName ) ) 102 parametersToFilter.add( parameterName ); 103 } 104 } 105 } 106 107 @Override 108 public String apply( Visibility visibility, Object value ) 109 { 110 if( value == null ) 111 return null; 112 113 URI uri; 114 115 if( value instanceof URI ) 116 { 117 uri = (URI) value; 118 } 119 else 120 { 121 try 122 { 123 uri = URI.create( encode( value.toString() ) ); 124 } 125 catch( IllegalArgumentException exception ) 126 { 127 LOG.warn( "failed to parse uri: {}, message: {}", value, exception.getMessage() ); 128 LOG.debug( "failed to parse uri: {}", value, exception ); 129 130 if( Boolean.parseBoolean( System.getProperty( FAILURE_MODE_PASS_THROUGH ) ) ) 131 { 132 LOG.warn( "ignoring uri sanitizer failures, returning unsanitized value, property '{}' set to true", FAILURE_MODE_PASS_THROUGH ); 133 return value.toString(); 134 } 135 136 // return an empty string, to avoid the leakage of sensitive information. 137 LOG.info( "set property: '{}', to true to return unsanitized value, returning empty string", FAILURE_MODE_PASS_THROUGH ); 138 return ""; 139 } 140 } 141 142 if( uri.isOpaque() ) 143 { 144 switch( visibility ) 145 { 146 case PRIVATE: 147 return value.toString(); 148 case PROTECTED: 149 case PUBLIC: 150 return uri.getScheme() + ":"; 151 } 152 } 153 154 StringBuilder buffer = new StringBuilder(); 155 156 if( uri.getPath() != null ) // can happen according to the javadoc 157 buffer.append( uri.getPath() ); 158 159 if( ( visibility == Visibility.PROTECTED || visibility == Visibility.PRIVATE ) && uri.getQuery() != null ) 160 buffer.append( "?" ).append( sanitizeQuery( uri.getQuery() ) ); 161 162 if( visibility == Visibility.PRIVATE ) 163 { 164 String currentString = buffer.toString(); // preserve before creating a new instance 165 buffer = new StringBuilder(); 166 167 if( uri.getScheme() != null ) 168 buffer.append( uri.getScheme() ).append( "://" ); 169 170 if( uri.getAuthority() != null ) 171 buffer.append( uri.getAuthority() ); 172 173 buffer.append( currentString ); 174 } 175 176 return buffer.toString(); 177 } 178 179 private String encode( String input ) 180 { 181 String[] parts = input.split( "://", 2 ); 182 String protocol = ""; 183 String rest; 184 185 if( parts.length == 2 ) 186 protocol = parts[ 0 ]; 187 188 rest = parts[ parts.length - 1 ]; 189 190 rest = rest.replaceAll( "\\[", "%5B" ); 191 rest = rest.replaceAll( "\\]", "%5D" ); 192 rest = rest.replaceAll( "\\{", "%7B" ); 193 rest = rest.replaceAll( "\\}", "%7D" ); 194 rest = rest.replaceAll( "\\\\", "/" ); 195 rest = rest.replaceAll( ";", "%3B" ); 196 rest = rest.replaceAll( ",", "%2C" ); 197 198 StringBuilder builder = new StringBuilder(); 199 200 if( !protocol.isEmpty() ) 201 builder.append( protocol ).append( "://" ); 202 203 builder.append( rest ); 204 205 return builder.toString(); 206 } 207 208 private String sanitizeQuery( String query ) 209 { 210 StringBuilder buffer = new StringBuilder(); 211 String[] parts = query.split( "&" ); 212 213 for( String part : parts ) 214 { 215 String[] keyValuePair = part.split( "=" ); 216 String key = keyValuePair[ 0 ]; 217 218 if( parametersToFilter.contains( key ) ) 219 continue; 220 221 buffer.append( part ).append( "&" ); 222 } 223 224 return buffer.toString(); 225 } 226 }