1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.pdfbox.encoding.conversion; 19 20 import org.apache.fontbox.cmap.CMap; 21 import java.io.UnsupportedEncodingException; 22 23 24 /** 25 * CJKConverter converts encodings defined in CJKEncodings. 26 * 27 * @author Pin Xue (http://www.pinxue.net), Holly Lee (holly.lee (at) gmail.com) 28 * @version $Revision: 1.0 $ 29 */ 30 public class CJKConverter implements EncodingConverter 31 { 32 // The encoding 33 private String encodingName = null; 34 // The java charset name 35 private String charsetName = null; 36 37 38 /** 39 * Constructs a CJKConverter from a PDF encoding name. 40 * 41 * @param encoding the encoding to be used 42 */ 43 public CJKConverter(String encoding) 44 { 45 encodingName = encoding; 46 charsetName = CJKEncodings.getCharset(encoding); 47 } 48 49 /** 50 * Convert a string. It occurs when a cmap lookup returned 51 * converted bytes successfully, but we still need to convert its 52 * encoding. The parameter s is constructs as one byte or a UTF-16BE 53 * encoded string. 54 * 55 * Note: pdfbox set string to UTF-16BE charset before calling into 56 * this. 57 * 58 * {@inheritDoc} 59 */ 60 public String convertString(String s) 61 { 62 if ( s.length() == 1 ) 63 { 64 return s; 65 } 66 67 if ( charsetName.equalsIgnoreCase("UTF-16BE") ) 68 { 69 return s; 70 } 71 72 try 73 { 74 return new String(s.getBytes("UTF-16BE"), charsetName); 75 } 76 catch ( UnsupportedEncodingException uee ) 77 { 78 return s; 79 } 80 } 81 82 /** 83 * Convert bytes to a string. We just convert bytes within 84 * coderange defined in CMap. 85 * 86 * {@inheritDoc} 87 */ 88 public String convertBytes(byte [] c, int offset, int length, CMap cmap) 89 { 90 if ( cmap != null ) 91 { 92 try 93 { 94 if ( cmap.isInCodeSpaceRanges(c, offset, length) ) 95 { 96 return new String(c, offset, length, charsetName); 97 } 98 else 99 { 100 return null; 101 } 102 103 } 104 catch ( UnsupportedEncodingException uee ) 105 { 106 return new String(c, offset, length); 107 } 108 } 109 // No cmap? 110 return null; 111 } 112 113 }