Home » pdfbox-1.1.0-src » org.apache.pdfbox.filter » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   package org.apache.pdfbox.filter;
   18   
   19   import java.io.ByteArrayOutputStream;
   20   import java.io.IOException;
   21   import java.io.InputStream;
   22   import java.io.OutputStream;
   23   import java.io.PushbackInputStream;
   24   import java.io.StreamCorruptedException;
   25   
   26   import org.apache.pdfbox.cos.COSDictionary;
   27   
   28   import org.apache.pdfbox.io.NBitInputStream;
   29   import org.apache.pdfbox.io.NBitOutputStream;
   30   
   31   /**
   32    * This is the used for the LZWDecode filter.
   33    *
   34    * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
   35    * @version $Revision: 1.15 $
   36    */
   37   public class LZWFilter implements Filter
   38   {
   39   
   40       /**
   41        * The LZW clear table code.
   42        */
   43       public static final long CLEAR_TABLE = 256;
   44       /**
   45        * The LZW end of data code.
   46        */
   47       public static final long EOD = 257;
   48   
   49       /**
   50        * {@inheritDoc}
   51        */
   52       public void decode( InputStream compressedData, OutputStream result, COSDictionary options, int filterIndex ) 
   53           throws IOException
   54       {
   55           //log.debug("decode( )");
   56           NBitInputStream in = null;
   57           in = new NBitInputStream( compressedData );
   58           in.setBitsInChunk( 9 );
   59           LZWDictionary dic = new LZWDictionary();
   60           byte firstByte = 0;
   61           long nextCommand = 0;
   62           while( (nextCommand = in.read() ) != EOD )
   63           {
   64               // log.debug( "decode - nextCommand=" + nextCommand + ", bitsInChunk: " + in.getBitsInChunk());
   65   
   66               if( nextCommand == CLEAR_TABLE )
   67               {
   68                   in.setBitsInChunk( 9 );
   69                   dic = new LZWDictionary();
   70               }
   71               else
   72               {
   73                   byte[] data = dic.getData( nextCommand );
   74                   if( data == null )
   75                   {
   76                       dic.visit( firstByte );
   77                       data = dic.getData( nextCommand );
   78                       dic.clear();
   79                   }
   80                   if( data == null )
   81                   {
   82                       throw new StreamCorruptedException( "Error: data is null" );
   83                   }
   84                   dic.visit(data);
   85   
   86                   //log.debug( "decode - dic.getNextCode(): " + dic.getNextCode());
   87   
   88                   if( dic.getNextCode() >= 2047 )
   89                   {
   90                       in.setBitsInChunk( 12 );
   91                   }
   92                   else if( dic.getNextCode() >= 1023 )
   93                   {
   94                       in.setBitsInChunk( 11 );
   95                   }
   96                   else if( dic.getNextCode() >= 511 )
   97                   {
   98                       in.setBitsInChunk( 10 );
   99                   }
  100                   else
  101                   {
  102                       in.setBitsInChunk( 9 );
  103                   }
  104                   /**
  105                   if( in.getBitsInChunk() != dic.getCodeSize() )
  106                   {
  107                       in.unread( nextCommand );
  108                       in.setBitsInChunk( dic.getCodeSize() );
  109                       System.out.print( "Switching " + nextCommand + " to " );
  110                       nextCommand = in.read();
  111                       System.out.println( "" +  nextCommand );
  112                       data = dic.getData( nextCommand );
  113                   }**/
  114                   firstByte = data[0];
  115                   result.write( data );
  116               }
  117           }
  118           result.flush();
  119       }
  120   
  121   
  122       /**
  123        * {@inheritDoc}
  124        */
  125       public void encode( InputStream rawData, OutputStream result, COSDictionary options, int filterIndex ) 
  126           throws IOException
  127       {
  128           //log.debug("encode( )");
  129           PushbackInputStream input = new PushbackInputStream( rawData, 4096 );
  130           LZWDictionary dic = new LZWDictionary();
  131           NBitOutputStream out = new NBitOutputStream( result );
  132           out.setBitsInChunk( 9 ); //initially nine
  133           out.write( CLEAR_TABLE );
  134           ByteArrayOutputStream buffer = new ByteArrayOutputStream();
  135           int byteRead = 0;
  136           for( int i=0; (byteRead = input.read()) != -1; i++ )
  137           {
  138               //log.debug( "byteRead = '" + (char)byteRead + "' (0x" + Integer.toHexString(byteRead) + "), i=" + i);
  139               buffer.write( byteRead );
  140               dic.visit( (byte)byteRead );
  141               out.setBitsInChunk( dic.getCodeSize() );
  142   
  143               //log.debug( "Getting node '" + new String( buffer.toByteArray() ) + "', buffer.size = " + buffer.size() );
  144               LZWNode node = dic.getNode( buffer.toByteArray() );
  145               int nextByte = input.read();
  146               if( nextByte != -1 )
  147               {
  148                   //log.debug( "nextByte = '" + (char)nextByte + "' (0x" + Integer.toHexString(nextByte) + ")");
  149                   LZWNode next = node.getNode( (byte)nextByte );
  150                   if( next == null )
  151                   {
  152                       //log.debug("encode - No next node, writing node and resetting buffer (" +
  153                       //          " node.getCode: " + node.getCode() + ")" +
  154                       //          " bitsInChunk: " + out.getBitsInChunk() +
  155                       //          ")");
  156                       out.write( node.getCode() );
  157                       buffer.reset();
  158                   }
  159   
  160                   input.unread( nextByte );
  161               }
  162               else
  163               {
  164                   //log.debug("encode - EOF on lookahead: writing node, resetting buffer, and terminating read loop (" +
  165                   //          " node.getCode: " + node.getCode() + ")" +
  166                   //          " bitsInChunk: " + out.getBitsInChunk() +
  167                   //          ")");
  168                   out.write( node.getCode() );
  169                   buffer.reset();
  170                   break;
  171               }
  172   
  173               if( dic.getNextCode() == 4096 )
  174               {
  175                   //log.debug("encode - Clearing dictionary and unreading pending buffer data (" +
  176                   //          " bitsInChunk: " + out.getBitsInChunk() +
  177                   //          ")");
  178                   out.write( CLEAR_TABLE );
  179                   dic = new LZWDictionary();
  180                   input.unread( buffer.toByteArray() );
  181                   buffer.reset();
  182               }
  183           }
  184   
  185           // Fix the code size based on the fact that we are writing the EOD
  186           //
  187           if( dic.getNextCode() >= 2047 )
  188           {
  189               out.setBitsInChunk( 12 );
  190           }
  191           else if( dic.getNextCode() >= 1023 )
  192           {
  193               out.setBitsInChunk( 11 );
  194           }
  195           else if( dic.getNextCode() >= 511 )
  196           {
  197               out.setBitsInChunk( 10 );
  198           }
  199           else
  200           {
  201               out.setBitsInChunk( 9 );
  202           }
  203   
  204           //log.debug("encode - Writing EOD (" +
  205           //          " bitsInChunk: " + out.getBitsInChunk() +
  206           //          ")");
  207           out.write( EOD );
  208           out.close();
  209           result.flush();
  210       }
  211   }

Home » pdfbox-1.1.0-src » org.apache.pdfbox.filter » [javadoc | source]