001/*
002 * Copyright (c) 2007-2016 Concurrent, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading;
022
023import java.io.File;
024import java.io.IOException;
025import java.util.Collection;
026import java.util.Collections;
027import java.util.LinkedList;
028import java.util.List;
029import java.util.ListIterator;
030
031import junit.framework.Test;
032import junit.framework.TestCase;
033import junit.framework.TestSuite;
034import org.apache.commons.io.FileUtils;
035import org.apache.commons.io.LineIterator;
036import org.apache.commons.io.filefilter.RegexFileFilter;
037import org.apache.commons.io.filefilter.TrueFileFilter;
038import org.junit.internal.runners.SuiteMethod;
039import org.junit.runner.RunWith;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043/**
044 *
045 */
046@RunWith(SuiteMethod.class)
047public class ComparePlatformsTest extends CascadingTestCase
048  {
049  private static final Logger LOG = LoggerFactory.getLogger( ComparePlatformsTest.class );
050  public static final String NONDETERMINISTIC = "-nondeterministic";
051
052  public static Test suite() throws Exception
053    {
054    String root = System.getProperty( "test.output.roots" );
055
056    if( root == null )
057      return new TestSuite();
058
059    LOG.info( "output roots: {}", root );
060
061    String[] roots = root.split( "," );
062
063    File localRoot = new File( find( roots, "/cascading-local/" ), "local" );
064    File hadoopRoot = new File( find( roots, "/cascading-hadoop/" ), "hadoop" );
065    File hadoop2Root = new File( find( roots, "/cascading-hadoop2-mr1/" ), "hadoop2-mr1" );
066
067    LOG.info( "local path: {}", localRoot );
068    LOG.info( "hadoop path: {}", hadoopRoot );
069    LOG.info( "hadoop2 path: {}", hadoop2Root );
070
071    TestSuite suite = new TestSuite();
072
073    createComparisons( "local~hadoop", localRoot, hadoopRoot, suite );
074    createComparisons( "local~hadoop2-mr1", localRoot, hadoop2Root, suite );
075
076    return suite;
077    }
078
079  private static void createComparisons( String comparison, File lhsRoot, File rhsRoot, TestSuite suite )
080    {
081    LOG.info( "comparing directory: {}, with: {}", lhsRoot, rhsRoot );
082
083    LinkedList<File> lhsFiles = new LinkedList<File>( FileUtils.listFiles( lhsRoot, new RegexFileFilter( "^[\\w-]+" ), TrueFileFilter.INSTANCE ) );
084    LinkedList<File> rhsFiles = new LinkedList<File>();
085
086    LOG.info( "found lhs files: {}", lhsFiles.size() );
087
088    int rootLength = lhsRoot.toString().length() + 1;
089
090    ListIterator<File> iterator = lhsFiles.listIterator();
091    while( iterator.hasNext() )
092      {
093      File localFile = iterator.next();
094      File file = new File( rhsRoot, localFile.toString().substring( rootLength ) );
095
096      if( localFile.toString().endsWith( NONDETERMINISTIC ) )
097        iterator.remove();
098      else if( file.exists() )
099        rhsFiles.add( file );
100      else
101        iterator.remove();
102      }
103
104    LOG.info( "running {} comparisons", lhsFiles.size() );
105
106    for( int i = 0; i < lhsFiles.size(); i++ )
107      {
108      File localFile = lhsFiles.get( i );
109      File hadoopFile = rhsFiles.get( i );
110
111      suite.addTest( new CompareTestCase( comparison, localFile, hadoopFile ) );
112      }
113    }
114
115  private static String find( String[] roots, String string )
116    {
117    for( String root : roots )
118      {
119      if( root.contains( string ) )
120        return root;
121      }
122
123    throw new IllegalStateException( "not found in roots: " + string );
124    }
125
126  public static class CompareTestCase extends TestCase
127    {
128    File localFile;
129    File hadoopFile;
130
131    public CompareTestCase( String comparison, File localFile, File hadoopFile )
132      {
133      super( "testFiles" );
134
135      this.localFile = localFile;
136      this.hadoopFile = hadoopFile;
137
138      // craps out junit, unsure how to set display name
139//      setName( String.format( "%s..%s", comparison, localFile.getName() ) ); // relevant bits have same file name
140      }
141
142    @org.junit.Test
143    public void testFiles() throws IOException
144      {
145      LinkedList<String> localLines = getLines( localFile );
146      LinkedList<String> hadoopLines = getLines( hadoopFile );
147
148      assertEquals( localFile + " != " + hadoopFile, localLines.size(), hadoopLines.size() );
149
150      if( localLines.size() == 0 )
151        return;
152
153      Collections.sort( localLines );
154      Collections.sort( hadoopLines );
155
156      if( hasLineNumbers( localLines ) )
157        {
158        trimLineNumbers( localLines );
159        trimLineNumbers( hadoopLines );
160        }
161
162      for( int i = 0; i < localLines.size(); i++ )
163        {
164        String localLine = localLines.get( i );
165
166        assertTrue( localFile + " - not in hadoop lines: " + localLine, hadoopLines.contains( localLine ) );
167        }
168      }
169
170    private void trimLineNumbers( LinkedList<String> lines )
171      {
172      ListIterator<String> iterator = lines.listIterator();
173
174      while( iterator.hasNext() )
175        iterator.set( iterator.next().replaceFirst( "^\\d+\\s(.*)$", "$1" ) );
176      }
177
178    private boolean hasLineNumbers( List<String> lines )
179      {
180      List<Integer> values = new LinkedList<Integer>();
181
182      for( String line : lines )
183        {
184        if( !line.matches( "^\\d+\\s.*$" ) )
185          return false;
186
187        String value = line.replaceFirst( "^(\\d+)\\s.*$", "$1" );
188
189        if( value == null || value.isEmpty() )
190          return false;
191
192        values.add( Integer.parseInt( value ) );
193        }
194
195      Collections.sort( values );
196
197      int last = -1;
198      for( Integer value : values )
199        {
200        if( last >= value )
201          return false;
202
203        last = value;
204        }
205
206      return true;
207      }
208
209    private LinkedList<String> getLines( File localFile ) throws IOException
210      {
211      LinkedList<String> lines = new LinkedList<String>();
212
213      if( !localFile.isDirectory() )
214        return populate( localFile, lines );
215
216      Collection<File> subFiles = FileUtils.listFiles( localFile, new RegexFileFilter( "^part-.*" ), null );
217
218      for( File subFile : subFiles )
219        populate( subFile, lines );
220
221      return lines;
222      }
223
224    private LinkedList<String> populate( File localFile, LinkedList<String> lines ) throws IOException
225      {
226      LineIterator iterator = FileUtils.lineIterator( localFile, "UTF-8" );
227
228      while( iterator.hasNext() )
229        lines.add( iterator.next() );
230
231      return lines;
232      }
233    }
234  }