001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.flow; 023 024import java.util.Collection; 025import java.util.List; 026import java.util.Map; 027import java.util.Set; 028 029import cascading.flow.planner.Scope; 030import cascading.flow.planner.graph.ElementGraph; 031import cascading.flow.planner.process.ProcessModel; 032import cascading.stats.FlowNodeStats; 033import cascading.tap.Tap; 034 035/** 036 * Class FlowNode represents the smallest parallelizable unit of work. It is a child to a 037 * {@link cascading.flow.FlowStep} and may have many siblings within the FlowStep. 038 * <p> 039 * A FlowNode is commonly executed as one or more slices, where a slice is a JVM executing against a portion 040 * of data. 041 * <p> 042 * Most slices within a FlowNode are identical, except for the sub-set of data they will be processing against. 043 * <p> 044 * But on some platforms, like MapReduce, a slice is executing a single flow pipeline. Thus a FlowNode may consist of 045 * some set of pipelines (or pipeline graph). One pipeline per 'streamed' input source Tap. 046 * <p> 047 * In a MapReduce model (like Apache Hadoop MapReduce) a FlowNode can by the Map or Reduce side of a job (where a job 048 * is a FlowStep). 049 * <p> 050 * In a DAG model (like Apache Tez), a FlowNode is a 'vertex', and the 'DAG' is a FlowStep. 051 */ 052public interface FlowNode extends ProcessModel 053 { 054 String CASCADING_FLOW_NODE = "cascading.flow.node"; 055 056 String getID(); 057 058 /** 059 * Returns an immutable map of properties giving more details about the FlowNode object. 060 * <p> 061 * FlowNode descriptions provide meta-data to monitoring systems describing the workload a given FlowNode represents. 062 * For known description types, see {@link FlowNodeDescriptors}. 063 * 064 * @return Map 065 */ 066 Map<String, String> getFlowNodeDescriptor(); 067 068 FlowNodeStats getFlowNodeStats(); 069 070 FlowStep getFlowStep(); 071 072 Collection<? extends FlowElement> getFlowElementsFor( Enum annotation ); 073 074 Set<? extends FlowElement> getSourceElements( Enum annotation ); 075 076 Set<? extends FlowElement> getSinkElements( Enum annotation ); 077 078 Set<String> getSourceElementNames(); 079 080 Set<String> getSinkElementNames(); 081 082 Set<String> getSourceTapNames( Tap flowElement ); 083 084 Set<String> getSinkTapNames( Tap flowElement ); 085 086 Tap getTrap( String branchName ); 087 088 Collection<? extends Tap> getTraps(); 089 090 Collection<? extends Scope> getPreviousScopes( FlowElement flowElement ); 091 092 Collection<? extends Scope> getNextScopes( FlowElement flowElement ); 093 094 List<? extends ElementGraph> getPipelineGraphs(); 095 096 ElementGraph getPipelineGraphFor( FlowElement streamedSource ); 097 }