1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.pdfbox.filter; 18 19 import java.io.ByteArrayOutputStream; 20 import java.io.IOException; 21 import java.io.InputStream; 22 import java.io.OutputStream; 23 import java.io.PushbackInputStream; 24 import java.io.StreamCorruptedException; 25 26 import org.apache.pdfbox.cos.COSDictionary; 27 28 import org.apache.pdfbox.io.NBitInputStream; 29 import org.apache.pdfbox.io.NBitOutputStream; 30 31 /** 32 * This is the used for the LZWDecode filter. 33 * 34 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a> 35 * @version $Revision: 1.15 $ 36 */ 37 public class LZWFilter implements Filter 38 { 39 40 /** 41 * The LZW clear table code. 42 */ 43 public static final long CLEAR_TABLE = 256; 44 /** 45 * The LZW end of data code. 46 */ 47 public static final long EOD = 257; 48 49 /** 50 * {@inheritDoc} 51 */ 52 public void decode( InputStream compressedData, OutputStream result, COSDictionary options, int filterIndex ) 53 throws IOException 54 { 55 //log.debug("decode( )"); 56 NBitInputStream in = null; 57 in = new NBitInputStream( compressedData ); 58 in.setBitsInChunk( 9 ); 59 LZWDictionary dic = new LZWDictionary(); 60 byte firstByte = 0; 61 long nextCommand = 0; 62 while( (nextCommand = in.read() ) != EOD ) 63 { 64 // log.debug( "decode - nextCommand=" + nextCommand + ", bitsInChunk: " + in.getBitsInChunk()); 65 66 if( nextCommand == CLEAR_TABLE ) 67 { 68 in.setBitsInChunk( 9 ); 69 dic = new LZWDictionary(); 70 } 71 else 72 { 73 byte[] data = dic.getData( nextCommand ); 74 if( data == null ) 75 { 76 dic.visit( firstByte ); 77 data = dic.getData( nextCommand ); 78 dic.clear(); 79 } 80 if( data == null ) 81 { 82 throw new StreamCorruptedException( "Error: data is null" ); 83 } 84 dic.visit(data); 85 86 //log.debug( "decode - dic.getNextCode(): " + dic.getNextCode()); 87 88 if( dic.getNextCode() >= 2047 ) 89 { 90 in.setBitsInChunk( 12 ); 91 } 92 else if( dic.getNextCode() >= 1023 ) 93 { 94 in.setBitsInChunk( 11 ); 95 } 96 else if( dic.getNextCode() >= 511 ) 97 { 98 in.setBitsInChunk( 10 ); 99 } 100 else 101 { 102 in.setBitsInChunk( 9 ); 103 } 104 /** 105 if( in.getBitsInChunk() != dic.getCodeSize() ) 106 { 107 in.unread( nextCommand ); 108 in.setBitsInChunk( dic.getCodeSize() ); 109 System.out.print( "Switching " + nextCommand + " to " ); 110 nextCommand = in.read(); 111 System.out.println( "" + nextCommand ); 112 data = dic.getData( nextCommand ); 113 }**/ 114 firstByte = data[0]; 115 result.write( data ); 116 } 117 } 118 result.flush(); 119 } 120 121 122 /** 123 * {@inheritDoc} 124 */ 125 public void encode( InputStream rawData, OutputStream result, COSDictionary options, int filterIndex ) 126 throws IOException 127 { 128 //log.debug("encode( )"); 129 PushbackInputStream input = new PushbackInputStream( rawData, 4096 ); 130 LZWDictionary dic = new LZWDictionary(); 131 NBitOutputStream out = new NBitOutputStream( result ); 132 out.setBitsInChunk( 9 ); //initially nine 133 out.write( CLEAR_TABLE ); 134 ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 135 int byteRead = 0; 136 for( int i=0; (byteRead = input.read()) != -1; i++ ) 137 { 138 //log.debug( "byteRead = '" + (char)byteRead + "' (0x" + Integer.toHexString(byteRead) + "), i=" + i); 139 buffer.write( byteRead ); 140 dic.visit( (byte)byteRead ); 141 out.setBitsInChunk( dic.getCodeSize() ); 142 143 //log.debug( "Getting node '" + new String( buffer.toByteArray() ) + "', buffer.size = " + buffer.size() ); 144 LZWNode node = dic.getNode( buffer.toByteArray() ); 145 int nextByte = input.read(); 146 if( nextByte != -1 ) 147 { 148 //log.debug( "nextByte = '" + (char)nextByte + "' (0x" + Integer.toHexString(nextByte) + ")"); 149 LZWNode next = node.getNode( (byte)nextByte ); 150 if( next == null ) 151 { 152 //log.debug("encode - No next node, writing node and resetting buffer (" + 153 // " node.getCode: " + node.getCode() + ")" + 154 // " bitsInChunk: " + out.getBitsInChunk() + 155 // ")"); 156 out.write( node.getCode() ); 157 buffer.reset(); 158 } 159 160 input.unread( nextByte ); 161 } 162 else 163 { 164 //log.debug("encode - EOF on lookahead: writing node, resetting buffer, and terminating read loop (" + 165 // " node.getCode: " + node.getCode() + ")" + 166 // " bitsInChunk: " + out.getBitsInChunk() + 167 // ")"); 168 out.write( node.getCode() ); 169 buffer.reset(); 170 break; 171 } 172 173 if( dic.getNextCode() == 4096 ) 174 { 175 //log.debug("encode - Clearing dictionary and unreading pending buffer data (" + 176 // " bitsInChunk: " + out.getBitsInChunk() + 177 // ")"); 178 out.write( CLEAR_TABLE ); 179 dic = new LZWDictionary(); 180 input.unread( buffer.toByteArray() ); 181 buffer.reset(); 182 } 183 } 184 185 // Fix the code size based on the fact that we are writing the EOD 186 // 187 if( dic.getNextCode() >= 2047 ) 188 { 189 out.setBitsInChunk( 12 ); 190 } 191 else if( dic.getNextCode() >= 1023 ) 192 { 193 out.setBitsInChunk( 11 ); 194 } 195 else if( dic.getNextCode() >= 511 ) 196 { 197 out.setBitsInChunk( 10 ); 198 } 199 else 200 { 201 out.setBitsInChunk( 9 ); 202 } 203 204 //log.debug("encode - Writing EOD (" + 205 // " bitsInChunk: " + out.getBitsInChunk() + 206 // ")"); 207 out.write( EOD ); 208 out.close(); 209 result.flush(); 210 } 211 }