001    /*
002     * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation.xml;
022    
023    import javax.xml.parsers.DocumentBuilder;
024    import javax.xml.xpath.XPathConstants;
025    import javax.xml.xpath.XPathExpressionException;
026    
027    import cascading.flow.FlowProcess;
028    import cascading.operation.Function;
029    import cascading.operation.FunctionCall;
030    import cascading.operation.OperationException;
031    import cascading.tuple.Fields;
032    import cascading.tuple.Tuple;
033    import cascading.tuple.TupleEntry;
034    import cascading.util.Pair;
035    import org.slf4j.Logger;
036    import org.slf4j.LoggerFactory;
037    import org.w3c.dom.Document;
038    import org.w3c.dom.NodeList;
039    
040    /**
041     * XPathGenerator is a Generator function that will emit a new Tuple for every Node returned by
042     * the given XPath expression.
043     */
044    public class XPathGenerator extends XPathOperation implements Function<Pair<DocumentBuilder, Tuple>>
045      {
046      /** Field LOG */
047      private static final Logger LOG = LoggerFactory.getLogger( XPathGenerator.class );
048    
049      /**
050       * Constructor XPathGenerator creates a new XPathGenerator instance.
051       *
052       * @param fieldDeclaration of type Fields
053       * @param namespaces       of type String[][]
054       * @param paths            of type String...
055       */
056      public XPathGenerator( Fields fieldDeclaration, String[][] namespaces, String... paths )
057        {
058        super( 1, fieldDeclaration, namespaces, paths );
059    
060        if( fieldDeclaration.size() != 1 )
061          throw new IllegalArgumentException( "only one field can be declared: " + fieldDeclaration.print() );
062    
063        }
064    
065      @Override
066      public void operate( FlowProcess flowProcess, FunctionCall<Pair<DocumentBuilder, Tuple>> functionCall )
067        {
068        TupleEntry input = functionCall.getArguments();
069    
070        if( input.getObject( 0 ) == null || !( input.getObject( 0 ) instanceof String ) )
071          return;
072    
073        String value = input.getString( 0 );
074    
075        if( value.length() == 0 ) // intentionally not trim()ing this value
076          return;
077    
078        Document document = parseDocument( functionCall.getContext().getLhs(), value );
079    
080        for( int i = 0; i < getExpressions().size(); i++ )
081          {
082          try
083            {
084            NodeList nodeList = (NodeList) getExpressions().get( i ).evaluate( document, XPathConstants.NODESET );
085    
086            if( LOG.isDebugEnabled() )
087              LOG.debug( "xpath: {} was: {}", paths[ i ], nodeList != null && nodeList.getLength() != 0 );
088    
089            if( nodeList == null )
090              continue;
091    
092            for( int j = 0; j < nodeList.getLength(); j++ )
093              {
094              functionCall.getContext().getRhs().set( 0, writeAsXML( nodeList.item( j ) ) );
095              functionCall.getOutputCollector().add( functionCall.getContext().getRhs() );
096              }
097    
098            }
099          catch( XPathExpressionException exception )
100            {
101            throw new OperationException( "could not evaluate xpath expression: " + paths[ i ], exception );
102            }
103          }
104        }
105      }