Hi
the scenario is File to Proxy, i have to read a pdf files content(all text) i have written the code
import java.io.IOException;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.*;
import org.apache.pdfbox.util.*;
import org.apache.pdfbox.pdmodel.*;
class ReadPdf
{
public static void main(String args[])
{
PDDocument pd;
BufferedWriter wr;
try {
File input = new File("original.pdf"); // The PDF file from where you would like to extract
File output = new File("SampleText.txt"); // The text file where you are going to store the extracted data
pd = PDDocument.load(input);
System.out.println(pd.getNumberOfPages()); //prints number of pages
System.out.println(pd.isEncrypted()); //false as not encrypted
pd.save("CopyOfOriginal.pdf"); // Creates a copy called "CopyOforiginal.pdf"
PDFTextStripper stripper = new PDFTextStripper();
stripper.setStartPage(1); //Start extracting from page 1
stripper.setEndPage(1); //Extract till page 1
wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output)));
stripper.writeText(pd, wr);
if (pd != null) {
pd.close();
}
// I use close() to flush the stream.
wr.close();
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
it works i have modified it to work in java mapping as
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Map;
import java.util.HashMap;
import java.io.IOException;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.*;
import org.apache.pdfbox.util.*;
import org.apache.pdfbox.pdmodel.*;
import com.sap.aii.mapping.api.AbstractTransformation;
import com.sap.aii.mapping.api.StreamTransformationException;
import com.sap.aii.mapping.api.TransformationInput;
import com.sap.aii.mapping.api.TransformationOutput;
public class PdftoXml extends AbstractTransformation
{
public void transform(TransformationInput in, TransformationOutput out) throws StreamTransformationException
{
PDDocument pd;
BufferedWriter wr;
try {
pd = PDDocument.load(in.getInputPayload().getInputStream()); //convert Tranformationimput to inputstream than pass it to PDDocument constructor to read Pdf from Inputstream.
//System.out.println(pd.getNumberOfPages()); //prints number of pages
PDFTextStripper stripper = new PDFTextStripper();
stripper.setStartPage(1); //Start extracting from page 1
stripper.setEndPage(1); //Extract till page 1
String str = stripper.getText(pd);
String content[] = str.split("\n");
String result ="<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
result = result.concat("<ns0:MTPdf xmlns:ns0=\"urn:mmm-com:pi:Vinay:10\">");
result = result.concat("<field1>"+content[0]+"</field1>");
result = result.concat("<field2>"+content[1]+"</field1>");
result = result.concat("<field3>"+content[2]+"</field1>");
result = result.concat("<field4>"+content[3]+"</field1>");
result = result.concat("</ns0:MTPdf>");
out.getOutputPayload().getOutputStream().write(result.getBytes("UTF-8")); //writing to output
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
i am using apache third party API "PdfBox" where shall i import this API in ESR for my java mapping to work