001/* 002 * Copyright (c) 2007-2016 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading; 022 023import java.io.File; 024import java.io.IOException; 025import java.util.Collection; 026import java.util.Collections; 027import java.util.LinkedList; 028import java.util.List; 029import java.util.ListIterator; 030 031import junit.framework.Test; 032import junit.framework.TestCase; 033import junit.framework.TestSuite; 034import org.apache.commons.io.FileUtils; 035import org.apache.commons.io.LineIterator; 036import org.apache.commons.io.filefilter.RegexFileFilter; 037import org.apache.commons.io.filefilter.TrueFileFilter; 038import org.junit.internal.runners.SuiteMethod; 039import org.junit.runner.RunWith; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043/** 044 * 045 */ 046@RunWith(SuiteMethod.class) 047public class ComparePlatformsTest extends CascadingTestCase 048 { 049 private static final Logger LOG = LoggerFactory.getLogger( ComparePlatformsTest.class ); 050 public static final String NONDETERMINISTIC = "-nondeterministic"; 051 052 public static Test suite() throws Exception 053 { 054 String root = System.getProperty( "test.output.roots" ); 055 056 if( root == null ) 057 return new TestSuite(); 058 059 LOG.info( "output roots: {}", root ); 060 061 String[] roots = root.split( "," ); 062 063 File localRoot = new File( find( roots, "/cascading-local/" ), "local" ); 064 File hadoopRoot = new File( find( roots, "/cascading-hadoop/" ), "hadoop" ); 065 File hadoop2Root = new File( find( roots, "/cascading-hadoop2-mr1/" ), "hadoop2-mr1" ); 066 067 LOG.info( "local path: {}", localRoot ); 068 LOG.info( "hadoop path: {}", hadoopRoot ); 069 LOG.info( "hadoop2 path: {}", hadoop2Root ); 070 071 TestSuite suite = new TestSuite(); 072 073 createComparisons( "local~hadoop", localRoot, hadoopRoot, suite ); 074 createComparisons( "local~hadoop2-mr1", localRoot, hadoop2Root, suite ); 075 076 return suite; 077 } 078 079 private static void createComparisons( String comparison, File lhsRoot, File rhsRoot, TestSuite suite ) 080 { 081 LOG.info( "comparing directory: {}, with: {}", lhsRoot, rhsRoot ); 082 083 LinkedList<File> lhsFiles = new LinkedList<File>( FileUtils.listFiles( lhsRoot, new RegexFileFilter( "^[\\w-]+" ), TrueFileFilter.INSTANCE ) ); 084 LinkedList<File> rhsFiles = new LinkedList<File>(); 085 086 LOG.info( "found lhs files: {}", lhsFiles.size() ); 087 088 int rootLength = lhsRoot.toString().length() + 1; 089 090 ListIterator<File> iterator = lhsFiles.listIterator(); 091 while( iterator.hasNext() ) 092 { 093 File localFile = iterator.next(); 094 File file = new File( rhsRoot, localFile.toString().substring( rootLength ) ); 095 096 if( localFile.toString().endsWith( NONDETERMINISTIC ) ) 097 iterator.remove(); 098 else if( file.exists() ) 099 rhsFiles.add( file ); 100 else 101 iterator.remove(); 102 } 103 104 LOG.info( "running {} comparisons", lhsFiles.size() ); 105 106 for( int i = 0; i < lhsFiles.size(); i++ ) 107 { 108 File localFile = lhsFiles.get( i ); 109 File hadoopFile = rhsFiles.get( i ); 110 111 suite.addTest( new CompareTestCase( comparison, localFile, hadoopFile ) ); 112 } 113 } 114 115 private static String find( String[] roots, String string ) 116 { 117 for( String root : roots ) 118 { 119 if( root.contains( string ) ) 120 return root; 121 } 122 123 throw new IllegalStateException( "not found in roots: " + string ); 124 } 125 126 public static class CompareTestCase extends TestCase 127 { 128 File localFile; 129 File hadoopFile; 130 131 public CompareTestCase( String comparison, File localFile, File hadoopFile ) 132 { 133 super( "testFiles" ); 134 135 this.localFile = localFile; 136 this.hadoopFile = hadoopFile; 137 138 // craps out junit, unsure how to set display name 139// setName( String.format( "%s..%s", comparison, localFile.getName() ) ); // relevant bits have same file name 140 } 141 142 @org.junit.Test 143 public void testFiles() throws IOException 144 { 145 LinkedList<String> localLines = getLines( localFile ); 146 LinkedList<String> hadoopLines = getLines( hadoopFile ); 147 148 assertEquals( localFile + " != " + hadoopFile, localLines.size(), hadoopLines.size() ); 149 150 if( localLines.size() == 0 ) 151 return; 152 153 Collections.sort( localLines ); 154 Collections.sort( hadoopLines ); 155 156 if( hasLineNumbers( localLines ) ) 157 { 158 trimLineNumbers( localLines ); 159 trimLineNumbers( hadoopLines ); 160 } 161 162 for( int i = 0; i < localLines.size(); i++ ) 163 { 164 String localLine = localLines.get( i ); 165 166 assertTrue( localFile + " - not in hadoop lines: " + localLine, hadoopLines.contains( localLine ) ); 167 } 168 } 169 170 private void trimLineNumbers( LinkedList<String> lines ) 171 { 172 ListIterator<String> iterator = lines.listIterator(); 173 174 while( iterator.hasNext() ) 175 iterator.set( iterator.next().replaceFirst( "^\\d+\\s(.*)$", "$1" ) ); 176 } 177 178 private boolean hasLineNumbers( List<String> lines ) 179 { 180 List<Integer> values = new LinkedList<Integer>(); 181 182 for( String line : lines ) 183 { 184 if( !line.matches( "^\\d+\\s.*$" ) ) 185 return false; 186 187 String value = line.replaceFirst( "^(\\d+)\\s.*$", "$1" ); 188 189 if( value == null || value.isEmpty() ) 190 return false; 191 192 values.add( Integer.parseInt( value ) ); 193 } 194 195 Collections.sort( values ); 196 197 int last = -1; 198 for( Integer value : values ) 199 { 200 if( last >= value ) 201 return false; 202 203 last = value; 204 } 205 206 return true; 207 } 208 209 private LinkedList<String> getLines( File localFile ) throws IOException 210 { 211 LinkedList<String> lines = new LinkedList<String>(); 212 213 if( !localFile.isDirectory() ) 214 return populate( localFile, lines ); 215 216 Collection<File> subFiles = FileUtils.listFiles( localFile, new RegexFileFilter( "^part-.*" ), null ); 217 218 for( File subFile : subFiles ) 219 populate( subFile, lines ); 220 221 return lines; 222 } 223 224 private LinkedList<String> populate( File localFile, LinkedList<String> lines ) throws IOException 225 { 226 LineIterator iterator = FileUtils.lineIterator( localFile, "UTF-8" ); 227 228 while( iterator.hasNext() ) 229 lines.add( iterator.next() ); 230 231 return lines; 232 } 233 } 234 }