Source code

001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.tuple;
023
024import java.io.InputStream;
025
026/**
027 * The StreamComparator interface allows for two {@link InputStream} instances to be compared, typically bit-wise.
028 * This is most useful when defining custom types to be stored in {@link Tuple} instances and the values will need
029 * to be grouped on and/or secondary sorted (via {@link cascading.pipe.GroupBy} and/or {@link cascading.pipe.CoGroup})
030 * and the underlying serialization implementation enables a useful bit-wise comparison without deserializing the custom
031 * type into memory.
032 * <p>
033 * Typically this interface is used to mark a {@link java.util.Comparator} as additionally
034 * supporting the ability to compare raw streams in tandem with comparing Object instances.
035 * Thus concrete implementations should implement this interface and the Comparator interface when being used
036 * as a "grouping" or "sorting" field Comparator
037 * <p>
038 * When used with Hadoop, a {@link cascading.tuple.hadoop.io.BufferedInputStream} is passed into the
039 * {@link #compare(java.io.InputStream, java.io.InputStream)}
040 * method. This class gives access to the underlying byte[] array so each individual byte need to be
041 * {@link java.io.InputStream#read()}.
042 * So it is useful to declare an implementation as
043 * {@code public class YourCustomComparator implements StreamComparator&lt;BufferedInputStream>, Comparator&lt;YourCustomType>, Serializable}
044 * <p>
045 * Note the method {@link cascading.tuple.hadoop.io.BufferedInputStream#skip(long)} will need to be called with the number
046 * of bytes read from the underlying byte buffer before the compare() method returns.
047 *
048 * @param <T>
049 */
050public interface StreamComparator<T extends InputStream>
051  {
052  int compare( T lhsStream, T rhsStream );
053  }