HTML-to-DOCX conversion
As a further development of HTML to RTF conversion feature, we are pleased to introduce the ability to create DOCX documents from styled HTML templates using PD4ML.
PD4ML can convert from HTML to DOCX the following elements:
- Page margins
- Text styles and fonts
- Page, text, paragraph and table cell backgrounds
- Text indentation
- Ordered and unordered lists (right-to-left Arabic and Hebrew direction support is coming soon)
- Tables (with correct table nesting). It supports col- and row-spans,
table and cell backgrounds, cell paddings, border styles. - Images
- Hyperlinks (external and internal)
- Complex headers / footers (i.e. including tables). There is a possibility to define individual header
and footer for title page. - Forced page breaks
The HTML to DOCX conversion can be triggered by the following API calls:
// read and parse HTML pd4ml.readHTML(inputStream); pd4ml.writeDOCX(outputStream);
pd4ml.outputFormat(PD4Constants.DOCX); pd4ml.render(inputStream, outputStream);
The equivalents in JSP taglib:
<pd4tl:transform ... outputFormat="docx"> ... </pd4tl:transform>
<pd4ml:transform ... outputFormat="docx"> ... </pd4ml:transform>
java -Xmx512m -Djava.awt.headless=true -jar ./pd4ml.jar <URL> 1200 -out doc.docx -outformat docx
java -Xmx512m -Djava.awt.headless=true -cp ./pd4ml.jar Pd4Cmd <URL> 1200 -out doc.docx -outformat docx rtfwmf
Full converter Java application examples:
package samples; import java.awt.Insets; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.security.InvalidParameterException; import com.pd4ml.Dimensions.Units; import com.pd4ml.PD4ML; import com.pd4ml.PageMargins; import com.pd4ml.PageSize; public class GettingStarted2 { protected int topValue = 10; protected int leftValue = 20; protected int rightValue = 10; protected int bottomValue = 10; protected int userSpaceWidth = 1300; public static void main(String[] args) { try { GettingStarted2 jt = new GettingStarted2(); jt.doConversion("https://pd4ml.com/i/rtf/demo.htm", "c:/invoice.docx"); } catch (Exception e) { e.printStackTrace(); } } public void doConversion( String url, String outputPath ) throws InvalidParameterException, MalformedURLException, IOException { File output = new File(outputPath); java.io.FileOutputStream fos = new java.io.FileOutputStream(output); PD4ML pd4ml = new PD4ML(); pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser" // choose target paper format and "rotate" it to landscape orientation pd4ml.setPageSize(PageSize.A4.rotate()); // define PDF page margins pd4ml.setPageMargins(new PageMargins(topValue, leftValue, bottomValue, rightValue, Units.MM)); // read and parse HTML pd4ml.readHTML(new URL(url)); pd4ml.writeDOCX(fos); // actual document conversion from URL to DOCX file fos.close(); System.out.println( outputPath + "\ndone." ); } }
package samples; import java.awt.Insets; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.security.InvalidParameterException; import org.zefer.pd4ml.PD4Constants; import org.zefer.pd4ml.PD4ML; public class GettingStarted2 { protected int topValue = 10; protected int leftValue = 20; protected int rightValue = 10; protected int bottomValue = 10; protected int userSpaceWidth = 1300; public static void main(String[] args) { try { GettingStarted2 jt = new GettingStarted2(); jt.doConversion("https://pd4ml.com/i/rtf/demo.htm", "c:/invoice.docx"); } catch (Exception e) { e.printStackTrace(); } } public void doConversion( String url, String outputPath ) throws InvalidParameterException, MalformedURLException, IOException { File output = new File(outputPath); java.io.FileOutputStream fos = new java.io.FileOutputStream(output); PD4ML pd4ml = new PD4ML(); pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser" // choose target paper format and "rotate" it to landscape orientation pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4)); // define PDF page margins pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue)); // Force generate DOCX instead of PDF pd4ml.outputFormat(PD4Constants.DOCX); pd4ml.render(new URL(url), fos); // actual document conversion from URL to RTF file fos.close(); System.out.println( outputPath + "\ndone." ); } }
DOCX conversion samples:
- Source HTML
- DOCX conversion result
- PDF document, generated from the same source HTML