Skip to Content

Parsing PDF Files using SAP PI Adapter Module

Jun 22, 2017 at 11:07 AM


avatar image
Former Member

I want to parse PDF files using Adapter Module. I have built the Adapter Module using PDFBox API, file passes from Adapter Module, but writes on output unconverted.

Not getting where the problem is: In Adapter Module or in the sender channel, because it looks like the payload is not getting identified as it is Binary PDF data.

package com.xxxxxxxx.pi; import; import; import; import; import; import; import; import javax.annotation.PostConstruct; import javax.annotation.PreDestroy; import javax.ejb.Stateless; import javax.ejb.Local; import javax.ejb.LocalHome; import javax.ejb.Remote; import javax.ejb.RemoteHome; import javax.ejb.Stateless; import javax.xml.bind.DatatypeConverter; import com.qoppa.pdfText.PDFText; import org.w3c.dom.*; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import; import; import; import; import; import; import; import; import; import; import; import; import; import; import; @Stateless(name="PDFBean") @Local(value={ModuleLocal.class}) @Remote(value={ModuleRemote.class}) @LocalHome(value=ModuleLocalHome.class) @RemoteHome(value=ModuleHome.class) /** * Session Bean implementation class PDFExtractor */ public class PDFExtractor implements Module { private AuditAccess audit=null; private MessageKey mk = null; public ModuleData process(ModuleContext moduleContext, ModuleData inputModuleData) throws ModuleException { Object obj = null; try{ obj = inputModuleData.getPrincipalData(); String s = inputModuleData.contentToString(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos); oos.writeObject(obj); File f = new File("/pinfs/inbound/File.txt"); BufferedWriter bw = null; FileWriter fw = new FileWriter(f); bw = new BufferedWriter(fw); InputStream is = new ByteArrayInputStream(baos.toByteArray()); Message msg = (Message) obj; mk = msg.getMessageKey(); audit.addAuditLogEntry(mk,AuditLogStatus.SUCCESS, "Executing Module PDF Extractor"); bw.write(s); bw.close(); // Get the text for the document audit.addAuditLogEntry(mk, AuditLogStatus.SUCCESS,"Stage 1 crossed"); PDFText pdfText = new PDFText(is, null); audit.addAuditLogEntry(mk, AuditLogStatus.SUCCESS,"Stage 2 crossed"); String text = pdfText.getText(); audit.addAuditLogEntry(mk, AuditLogStatus.SUCCESS,"Stage 3 crossed"); audit.addAuditLogEntry(mk, AuditLogStatus.SUCCESS,"Stage 4 crossed"); String msgType = "MT_PDFReader"; String nameSpace = "urn://"; String str=" \n"+ " "; audit.addAuditLogEntry(mk, AuditLogStatus.SUCCESS,"Stage 5 crossed"); inputModuleData.setPrincipalData(str+text+" "); audit.addAuditLogEntry(mk, AuditLogStatus.SUCCESS,"Stage 6 crossed"); }catch(Exception e){ audit.addAuditLogEntry(mk, AuditLogStatus.SUCCESS,"AO: Module Exception Caught ."); audit.addAuditLogEntry(mk, AuditLogStatus.ERROR,e.toString()); } return inputModuleData; } @PostConstruct public void initialResources() { try { audit = PublicAPIAccessFactory.getPublicAPIAccess().getAuditAccess(); } catch (Exception e) { throw new RuntimeException("error in initialiseResources():"+e.getMessage()); } } @PreDestroy public void ReleaseResources() { } /** * Default constructor. */ public PDFExtractor() { // TODO Auto-generated constructor stub } }

10 |10000 characters needed characters left characters exceeded
* Please Login or Register to Answer, Follow or Comment.

0 Answers