001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.operation; 023 024import cascading.flow.FlowProcess; 025import cascading.flow.planner.DeclaresResults; 026import cascading.tuple.Fields; 027 028/** 029 * Interface Operation is the base interface for all functions applied to {@link cascading.tuple.Tuple} streams. 030 * <p> 031 * Specifically {@link Function}, {@link Filter}, {@link Aggregator}, {@link Buffer}, and {@link Assertion}. 032 * <p> 033 * Use {@link BaseOperation} for a convenient way to create new Operation types. 034 * 035 * @see cascading.operation.BaseOperation 036 * @see Function 037 * @see Filter 038 * @see Aggregator 039 * @see Buffer 040 * @see Assertion 041 */ 042public interface Operation<Context> extends DeclaresResults 043 { 044 /** Field ANY denotes that a given Operation will take any number of argument values */ 045 int ANY = Integer.MAX_VALUE; 046 047 /** 048 * The prepare method is called immediately before the current Operation instance is put into play processing Tuples. 049 * This method should initialize any resources that can be shutdown or released in the 050 * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} method. 051 * <p> 052 * Any resources created should be stored in the {@code Context}, not as instance fields on the class. 053 * <p> 054 * This method may be called more than once during the life of this instance. But it will never be called multiple times 055 * without a cleanup invocation immediately before subsequent invocations. 056 * <p> 057 * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called 058 * cluster side, not client side. 059 * 060 * @param flowProcess 061 * @param operationCall 062 */ 063 void prepare( FlowProcess flowProcess, OperationCall<Context> operationCall ); 064 065 /** 066 * The flush method is called when an Operation that is caching values must empty the cache. It is called before 067 * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} is invoked. 068 * <p> 069 * It is safe to cast the {@link cascading.operation.OperationCall} to a {@link FunctionCall}, or equivalent, and 070 * get its {@link cascading.operation.FunctionCall#getOutputCollector()}. 071 * 072 * @param flowProcess 073 * @param operationCall 074 */ 075 void flush( FlowProcess flowProcess, OperationCall<Context> operationCall ); 076 077 /** 078 * The cleanup method is called immediately after the current Operation instance is taken out of play processing Tuples. 079 * This method should shutdown any resources created or initialized during the 080 * {@link #prepare(cascading.flow.FlowProcess, OperationCall)} method. 081 * <p> 082 * This method may be called more than once during the life of this instance. But it will never be called multiple times 083 * without a prepare invocation before. 084 * <p> 085 * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called 086 * cluster side, not client side. 087 * 088 * @param flowProcess 089 * @param operationCall 090 */ 091 void cleanup( FlowProcess flowProcess, OperationCall<Context> operationCall ); 092 093 /** 094 * Returns the fields created by this Operation instance. If this instance is a {@link Filter}, it should always 095 * return {@link Fields#ALL}. 096 * 097 * @return a Fields instance 098 */ 099 Fields getFieldDeclaration(); 100 101 /** 102 * The minimum number of arguments this Operation expects from the calling {@link cascading.pipe.Each} or 103 * {@link cascading.pipe.Every} Operator. 104 * <p> 105 * Operations should be willing to receive more arguments than expected, but should ignore them if they are unused, 106 * instead of failing. 107 * 108 * @return an int 109 */ 110 int getNumArgs(); 111 112 /** 113 * Returns {@code true} if this Operation instance can safely execute on the same 'record' multiple 114 * times, {@code false} otherwise. 115 * <p> 116 * That is, this Operation is safe if it has no side-effects, or if it does, they are idempotent. 117 * <p> 118 * If seeing the same 'record' more than once can cause errors (internally or externally), 119 * this method must return {@code false}. 120 * 121 * @return a boolean 122 */ 123 boolean isSafe(); 124 }