Home » pdfbox-1.1.0-src » org.apache.pdfbox.encoding.conversion » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.pdfbox.encoding.conversion;
   19   
   20   import org.apache.fontbox.cmap.CMap;
   21   import java.io.UnsupportedEncodingException;
   22   
   23   
   24   /**
   25    *  CJKConverter converts encodings defined in CJKEncodings.
   26    *
   27    *  @author  Pin Xue (http://www.pinxue.net), Holly Lee (holly.lee (at) gmail.com)
   28    *  @version $Revision: 1.0 $
   29    */
   30   public class CJKConverter implements EncodingConverter
   31   {
   32       // The encoding
   33       private String encodingName = null;    
   34       // The java charset name
   35       private String charsetName = null;
   36   
   37   
   38       /**
   39        *  Constructs a CJKConverter from a PDF encoding name.
   40        *  
   41        *  @param encoding the encoding to be used
   42        */
   43       public CJKConverter(String encoding)
   44       {
   45           encodingName = encoding;
   46           charsetName = CJKEncodings.getCharset(encoding);
   47       }
   48   
   49      /**
   50       *  Convert a string. It occurs when a cmap lookup returned
   51       *  converted bytes successfully, but we still need to convert its
   52       *  encoding. The parameter s is constructs as one byte or a UTF-16BE
   53       *  encoded string.
   54       *
   55       *  Note: pdfbox set string to UTF-16BE charset before calling into
   56       *  this.
   57       *  
   58       *  {@inheritDoc}
   59       */
   60       public String convertString(String s)
   61       {
   62           if ( s.length() == 1 )
   63           {
   64               return s;
   65           }
   66   
   67           if ( charsetName.equalsIgnoreCase("UTF-16BE") )
   68           {
   69               return s;
   70           }
   71   
   72           try 
   73           {
   74               return new String(s.getBytes("UTF-16BE"), charsetName);
   75           }
   76           catch ( UnsupportedEncodingException uee ) 
   77           {
   78               return s;   
   79           }
   80       }
   81   
   82      /**
   83       *  Convert bytes to a string. We just convert bytes within
   84       *  coderange defined in CMap.
   85       *
   86       *  {@inheritDoc}
   87       */
   88       public String convertBytes(byte [] c, int offset, int length, CMap cmap)
   89       {
   90           if ( cmap != null ) 
   91           {
   92               try 
   93               {
   94                   if ( cmap.isInCodeSpaceRanges(c, offset, length) )
   95                   {
   96                       return new String(c, offset, length, charsetName);
   97                   }
   98                   else
   99                   {
  100                       return null;
  101                   }
  102   
  103               }
  104               catch ( UnsupportedEncodingException uee ) 
  105               {
  106                   return new String(c, offset, length);
  107               }
  108           }
  109           // No cmap?
  110           return null;
  111       }
  112   
  113   }

Home » pdfbox-1.1.0-src » org.apache.pdfbox.encoding.conversion » [javadoc | source]