/*******************************************************************************
Scriba EBook Maker
Copyright (C) 2011 Senato della Repubblica (http://www.senato.it/)
Offices:
Ufficio Stampa e internet [1]
Servizio dell'Informatica [2]
Contributors:
Roberto Battistoni (2, roberto.battistoni@senato.it): software engineer and developer
Carlo Marchetti (2, carlo.marchetti@senato.it): project manager
Marco Tagliavini (1, marco.tagliavini@senato.it): project visionary
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see
*******************************************************************************/
package it.senato.areatesti.ebook.ebookmaker.plugin.defaultplugin;
import it.senato.areatesti.ebook.ebookmaker.Context;
import it.senato.areatesti.ebook.ebookmaker.misc.pdf.PdfEncryption;
import it.senato.areatesti.ebook.ebookmaker.misc.pdf.PdfTexter;
import it.senato.areatesti.ebook.ebookmaker.plugin.base.AbstractPlugin;
import it.senato.areatesti.ebook.ebookmaker.scf.bean.ContentItem;
import it.senato.areatesti.ebook.ebookmaker.scf.bean.base.IItem;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.CharacterCodingException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
public class PdfToHtmlPlugin extends AbstractPlugin
{
/**
* The encoding is determined at run-time and not set before
*/
@Override
public List elaborateContent(ContentItem content,
ArrayList metadataList) throws IOException
{
ArrayList ciList = new ArrayList();
ciList.add(content);
return ciList;
}
@Override
public ArrayList makesHtmlFromPdf(
ContentItem contentItemOfPdfRef, String fileNamePdf)
throws IOException
{
String textPdfEncoding = Context.WINDOWS_CP1252_LATIN1;
ArrayList ciList = new ArrayList();
try
{
PdfTexter pdfTexter;
pdfTexter = new PdfTexter(PdfEncryption.decryptDocument(null),textPdfEncoding , fileNamePdf);
ByteArrayOutputStream bo = pdfTexter.convertToHtml();
String text = bo.toString(textPdfEncoding);
String htmlContent = adjustConvertedHtml(text);
String packageFile = FilenameUtils.removeExtension(contentItemOfPdfRef.getPackageFile())+ ".out" + Context.XHTML_EXT;
// inherits attributes from the original ContentItem
ContentItem ciText = new ContentItem(
contentItemOfPdfRef.getPackagePath(), packageFile, Context.ID_OPF_PREFIX +packageFile,
null, null, Context.XHTML_MIMETYPE, null,
contentItemOfPdfRef.isCover(), contentItemOfPdfRef.isInSpine(),
contentItemOfPdfRef.isNeededTidy(),
contentItemOfPdfRef.isNeededXsl(), false);
ciText.setStringContent(htmlContent);
ciList.add(ciText);
} catch (CryptographyException e)
{
Context.getInstance().getLogger()
.error(ExceptionUtils.getStackTrace(e));
} catch (InvalidPasswordException e)
{
Context.getInstance().getLogger()
.error(ExceptionUtils.getStackTrace(e));
}
return ciList;
}
@Override
public String adjustConvertedHtml(String htmlContent)
{
return htmlContent;
}
@Override
public String convertEncoding(byte[] byteContent, String outputEncoding)
throws UnsupportedEncodingException, CharacterCodingException,
IOException
{
throw new RuntimeException("Not implemented!");
}
}