XPATH 語法
XSLT 基礎
這邊主要是針對XML to HTML做處理, 還有在XSLT裡面使用java method , javabean 來達到一些效果。
原始的XML檔案大概長這樣,實作EDRM 的mail XML 檔會長的...非常難看
<?xml version='1.0' encoding='UTF-8' ?>
<Root caseId="Case1" description="Test Case" locale="US" majorVersion="1" minorVersion="2" rootFilePath="/temp/">
<batch>
<Document MimeType="multipart/mixed" DocType="Header" DocID="1">
<FieldValues>
<contentType>multipart/mixed;
boundary="------------010709010402030905080005"</contentType>
<mailAuthor>N2<tamino2@tsai.james></mailAuthor>
<mailCC>tamino0@tsai.james</mailCC>
<mailSubject>2011/06/14-test01</mailSubject>
<mailTo>tamino1@tsai.james,tamino0@tsai.james</mailTo>
<messageID><4DF6BDD9.7090905@tsai.james></messageID>
</FieldValues >
</Document>
<Document MimeType="multipart/alternative" DocType="MIME Entity" DocID="2">
<FieldValues>
<contentType>multipart/alternative;
boundary="------------080407070307020309080702"</contentType>
</FieldValues>
</Document>
<Document MimeType="text/plain" DocType="MIME Entity" DocID="3">
<FieldValues>
<content>aabbccd
*eeffggy*
myImg1
myImg2
</content>
<contentType>text/plain; charset=UTF-8; format=flowed</contentType>
</FieldValues>
</Document>
<Document MimeType="multipart/related" DocType="MIME Entity" DocID="4">
<FieldValues>
<contentType>multipart/related;
boundary="------------000300090903000901010201"</contentType>
</FieldValues>
</Document>
<Document MimeType="text/html" DocType="MIME Entity" DocID="5">
<FieldValues>
<content><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body text="#000000" bgcolor="#ffffff">
aabbccd<br>
<b><big><big><big>eeffggy</big></big></big></b><br>
myImg1<br>
<img src="cid:part1.03050007.07010308@tsai.james" alt=""><br>
<br>
myImg2<br>
<img src="cid:part2.01030500.03060103@tsai.james" alt=""><br>
</body>
</html>
</content>
<contentType>text/html; charset=UTF-8</contentType>
</FieldValues>
</Document>
<Document MimeType="image/jpeg" DocType="MIME Entity" DocID="6">
<FieldValues>
<content>/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0a
HBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIy
MjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAaAVADASIA
</content>
<contentID><part1.03050007.07010308@tsai.james></contentID>
<contentTransferEncoding>base64</contentTransferEncoding>
<contentType>image/jpeg;
name="new_pagingtoolbar.JPG"</contentType>
</FieldValues>
</Document>
<Document MimeType="image/jpeg" DocType="MIME Entity" DocID="7">
<FieldValues>
<content>/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0a
HBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIy
</content>
<contentID><part2.01030500.03060103@tsai.james></contentID>
<contentTransferEncoding>base64</contentTransferEncoding>
<contentType>image/jpeg;
name="SaveAs_SubMenu.JPG"</contentType>
</FieldValues>
</Document>
<Document MimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document" DocType="Attach File" DocID="8">
<FieldValues>
<contentTransferEncoding>7bit</contentTransferEncoding>
<contentType>application/vnd.openxmlformats-officedocument.wordprocessingml.document;
name="temp.docx"</contentType>
</FieldValues>
</Document>
<Document MimeType="application/octet-stream" DocType="Attach File" DocID="9">
<FieldValues>
<contentTransferEncoding>base64</contentTransferEncoding>
<contentType>application/octet-stream;
name="test.rar"</contentType>
</FieldValues>
</Document>
</batch>
<fields/>
</Root>
為了閱讀方便,我把file content 的base64 encode 部份刪掉部份,所以上面的xml是會有問題的, 一般使用XSLT在做XML轉換的時候,如果XML定義檔很單純簡單的話,XSL檔的設計上就會簡單很多,碰到上面這種XML很復雜的時候處理上就會麻煩點。先來看看轉出來的結果是長什麼樣子, 轉成HTML 檔 或著是回傳HTML String ,大概像這樣:說穿了只是把原始的E-Mail內容轉換成EDRM XML , 然後會有一堆XML檔,透過XSLT轉換成HTML View , 所以在XSL的設計上就要把相關的屬性取出來,然後變成HTML, 下面是實作出來的XSL檔
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:cu="com.mypackage.util.XsltCommonUtils"
xmlns:obj="com.mypackage.xsltObject.XsltDataObject"
exclude-result-prefixes="cu obj" >
<xsl:output method="html" encoding="UTF-8" />
<xsl:variable name="Documents">
<xsl:value-of select="/Documents" />
</xsl:variable>
<xsl:variable name="From">
<xsl:value-of select="cu:processEscapingString(//FieldValues/mailAuthor)" disable-output-escaping="yes"/>
</xsl:variable>
<xsl:variable name="To">
<xsl:value-of select="//FieldValues/mailTo" />
</xsl:variable>
<xsl:variable name="Cc">
<xsl:value-of select="//FieldValues/mailCC" />
</xsl:variable>
<xsl:variable name="Bcc">
<xsl:value-of select="//FieldValues/mailBCC" />
</xsl:variable>
<xsl:variable name="SendDate">
<xsl:value-of select="//FieldValues/mailSentDate" />
</xsl:variable>
<xsl:variable name="Subject">
<xsl:value-of select="//FieldValues/mailSubject" />
</xsl:variable>
<xsl:variable name="uid">
<xsl:value-of select="//FieldValues/messageID" />
</xsl:variable>
<xsl:variable name="bodyObj" select="obj:new()"/>
<xsl:param name="htmlBody">
<xsl:for-each select="//Document">
<xsl:choose>
<xsl:when test="contains(@MimeType,'text/html')">
<xsl:value-of select="cu:setXlstDataObject($bodyObj,FieldValues/content)" disable-output-escaping="yes"/>
<xsl:value-of select="cu:setXlsDataObjectIsHtml($bodyObj)"/>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:param>
<xsl:variable name="textBody">
<xsl:if test="not(cu:getXlsDataObjectIsHtml($bodyObj))">
<xsl:for-each select="//Document">
<xsl:choose>
<xsl:when test="contains(@MimeType,'text/plain')">
<xsl:if test="contains(@DocType,'MIME Entity')">
<xsl:value-of select="//FieldValues/content" />
</xsl:if>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:if>
</xsl:variable>
<xsl:template match="/">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<style type="text/css">
BODY, TD {
font-family: Arial;
color: black;
font-size: 9pt;
font-weight: normal;
text-decoration: none; }
* {margin:0; padding:0;}
.header{
background-image: url(img/bg_line.jpg);
background-repeat: repeat-x;
background-position: left top;
height: 94px;
width: 100%;
}
.headerbg{
background-image: url(img/bg.jpg);
background-repeat: no-repeat;
background-position: left top;
height: 94px;
width: 100%;
}
.main{
margin-right: 5px;
padding-top: 15px;
width: 200px;
}
.right{
float: right;
margin-right: 5px;
margin-left: 3px;
padding-top: 4px;
text-align: right;
}
.left{
float: left;
margin-right: 5px;
margin-left: 3px;
padding-top: 4px;
text-align: left;
}
.logoimg{
width: 50px;
padding-right: 5px;
padding-left: 10px;
padding-top: 15px;
}
.input_a{
border: 1px solid #333;
font-family: Arial;
font-size: 9pt;
font-weight: normal;
color: #000;
height: 18px;
width: 312px;
background-image: url(img/usericon.jpg);
background-repeat: no-repeat;
background-color: #FFF;
padding-left: 17px;
background-position: 2px;
}
.input_b{
border: 1px solid #333;
font-family: Arial;
font-size: 9pt;
font-weight: normal;
color: #000;
height: 18px;
width: 500px;
background-color: #FFF;
padding-left: 0px;
background-position: 2px;
}
TABLE.tablestyle1 TD, TABLE.tablestyle1
TD { border: 1 solid black;
}
TABLE.tablestyle1 { border-collapse:
collapse; border-spacing: 0px
0px; empty-cells: show }
</style>
</head>
<body>
<table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr>
<td align="left" valign="top" class="headerbg">
<table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr>
<td align="left" valign="top" class="logoimg">
<img src="img/logo.gif" width="60" height="60" alt="" />
</td>
<td align="left" valign="top" class="main">
<b>
<xsl:value-of select="$From"
disable-output-escaping="yes" />
</b>
<br />
<xsl:value-of select="$SendDate" />
</td>
<td align="left" valign="top" class="left">
<table width="50px" border="0" cellspacing="1"
cellpadding="0">
<tr>
<td align="right" valign="top">To%uFF1A</td>
<td width="20%" align="right" valign="top">
<xsl:text disable-output-escaping="yes"><![CDATA[<input type="text" size="50" class="input_b" readonly="true" value=']]></xsl:text>
<xsl:value-of select="$To"
disable-output-escaping="yes" />
<xsl:text disable-output-escaping="yes"><![CDATA['/>]]></xsl:text>
</td>
</tr>
<tr>
<td align="right" valign="top">cc%uFF1A</td>
<td align="right" valign="top">
<xsl:text disable-output-escaping="yes"><![CDATA[<input type="text" size="50" class="input_b" readonly="true" value=']]></xsl:text>
<xsl:value-of select="$Cc"
disable-output-escaping="yes" />
<xsl:text disable-output-escaping="yes"><![CDATA['/>]]></xsl:text>
</td>
</tr>
<tr>
<td align="right" valign="top">bcc%uFF1A</td>
<td align="right" valign="top">
<xsl:text disable-output-escaping="yes"><![CDATA[<input type="text" size="50" class="input_b" readonly="true" value=']]></xsl:text>
<xsl:value-of select="$Bcc"
disable-output-escaping="yes" />
<xsl:text disable-output-escaping="yes"><![CDATA['/>]]></xsl:text>
</td>
</tr>
<tr>
<td align="right" valign="top">Subject%uFF1A</td>
<td align="right" valign="top">
<xsl:text disable-output-escaping="yes"><![CDATA[<input type="text" size="50" class="input_b" readonly="true" value=']]></xsl:text>
<xsl:value-of select="$Subject"
disable-output-escaping="yes" />
<xsl:text disable-output-escaping="yes"><![CDATA['/>]]></xsl:text>
</td>
</tr>
</table>
</td>
</tr>
</table>
</td>
</tr>
</table>
<br />
<table width="100%">
<tr>
<td>
<!--
<xsl:for-each select="//item[@name='Body']/itemdata[@type='19']">
<pd4ml-include encoding="qp" debug="false">
<xsl:copy-of select="text()" />
</pd4ml-include>
</xsl:for-each>
<xsl:for-each select="//item/rawitemdata[@type='19']">
<table border="0" width="100%">
<tr>
<td style="word-wrap: break-word">
<pd4ml-include xencoding="qp" debug="false">
<xsl:copy-of select="text()" />
</pd4ml-include>
</td>
</tr>
</table>
</xsl:for-each>
-->
<!--
<xsl:for-each
select="//item[@name='$FILE']/object/file[@name='mime.htm']/filedata">
<pd4ml-include debug="false">
<xsl:attribute name="encoding"><xsl:value-of
select="../@encoding" /></xsl:attribute>
<xsl:copy-of select="text()" />
</pd4ml-include>
</xsl:for-each>
<xsl:apply-templates select="item" />
-->
<!-- commented out because file[contains(@name,'mime.gif')] causes
java.lang.ArrayIndexOutOfBoundsException in Xalan under some conditions xsl:for-each
select="//item[@name='$FILE']/object/file[contains(@name,'mime.gif')]/filedata">
<img> <xsl:attribute name="src">data:image/gif;base64,<xsl:value-of select="normalize-space(.)"
disable-output-escaping="yes"/></xsl:attribute> <xsl:attribute name="border">0</xsl:attribute>
</img> </xsl:for-each -->
</td>
</tr>
</table>
<!--
<xsl:for-each select="//Document">
<xsl:choose>
<xsl:when test="contains(@DocType,'MIME Entity')">
<xsl:value-of select="Document/FieldValues/ContentID" disable-output-escaping="yes"/>
<xsl:value-of select="cu:testNodeSet(Document/FieldValues)" />
</xsl:when>
</xsl:choose>
</xsl:for-each>
-->
<!-- to get body string with inline images. -->
<xsl:apply-templates select="//Document" ></xsl:apply-templates>
<xsl:value-of select="$textBody" disable-output-escaping="yes"/>
<xsl:value-of select="cu:getXlstDataObject($bodyObj)" disable-output-escaping="yes"/>
</body>
</html>
</xsl:template>
<xsl:template match="Document">
<xsl:choose>
<xsl:when test="contains(@MimeType,'image')">
<xsl:variable name="contentId">
<xsl:value-of select="FieldValues/contentID" disable-output-escaping="yes"/>
</xsl:variable>
<xsl:variable name="content">
<xsl:value-of select="FieldValues/content" disable-output-escaping="yes"/>
</xsl:variable>
<xsl:variable name="fileType">
<xsl:value-of select="Files/File/@FileType" disable-output-escaping="yes"/>
</xsl:variable>
<xsl:variable name="filePath">
<xsl:value-of select="Files/File/ExternalFile/@FilePath" disable-output-escaping="yes"/>
</xsl:variable>
<xsl:variable name="fileName">
<xsl:value-of select="Files/File/ExternalFile/@FileName" disable-output-escaping="yes"/>
</xsl:variable>
<xsl:variable name="hashMd5">
<xsl:value-of select="Files/File/ExternalFile/@Hash" disable-output-escaping="yes"/>
</xsl:variable>
<xsl:value-of select="cu:setDataObjectBodyByInlineImageUrl($bodyObj,$contentId,$fileType,$content,$fileName,$hashMd5) " disable-output-escaping="yes"/>
</xsl:when>
<xsl:when test="contains(@DocType,'Attach File')">
<!-- do something -->
</xsl:when>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
還有相關的Java Class
XsltDataObject.java
package com.mypackage.xsltObject;
public class XsltDataObject {
private String bodyString;
private boolean isHtml = false;
public boolean isHtml() {
return isHtml;
}
public void setHtml(boolean isHtml) {
this.isHtml = isHtml;
}
public String getBodyString() {
return bodyString;
}
public void setBodyString(String bodyString) {
this.bodyString = bodyString;
}
}
還有XsltCommonUtils.java
package com.mypackage.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import com.mypackage.xsltObject.XsltDataObject;
public class XsltCommonUtils {
public static void setXlstDataObject(XsltDataObject object,String bodyString){
String bodyStr = bodyString;
bodyStr = bodyStr.replaceAll("cid:", "");
object.setBodyString(bodyStr);
}
public static String getXlstDataObject(XsltDataObject object){
return object.getBodyString();
}
public static void setXlsDataObjectIsHtml(XsltDataObject object){
System.out.println("setXlsDataObjectIsHtml to true ");
object.setHtml(true);
}
public static boolean getXlsDataObjectIsHtml(XsltDataObject object){
System.out.println("getXlsDataObjectIsHtml = " object.isHtml());
return object.isHtml();
}
public static void setDataObjectBodyByInlineImageUrl(XsltDataObject inObject,
String contentId, String fileType, String content,
String fileName, String hashMd5) {
//for debug
System.out.println("contentId = " contentId);
System.out.println("fileType = " fileType);
System.out.println("fileName = " fileName);
String contentIdStr = replaceContentId(contentId.trim());
System.out.println("contentIdStr =" contentIdStr);
StringBuffer resultString = new StringBuffer();
String bodyString = inObject.getBodyString();
resultString.append("data:");
resultString.append(fileType);
resultString.append(";base64,");
//will be change to get file name rule.
/*
String fullFileName = filePath "/" contentIdStr;
System.out.println("fullFileName = " fullFileName);
fullFileName = fullFileName.replaceAll("\\\\", "/");
File f = new File(fullFileName);
BufferedReader reader = null;
StringBuffer contents = new StringBuffer();
if(f.exists()){
try {
reader = new BufferedReader(new FileReader(f));
String text = null;
while ((text = reader.readLine()) != null) {
contents.append(text);
}
reader.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
*/
// System.out.println("contents = " contents.toString());
resultString.append(content);
bodyString = bodyString.replaceAll(contentIdStr, resultString.toString());
// System.out.println("Result bodyString = " bodyString);
inObject.setBodyString(bodyString);
}
private static String replaceContentId(String contentId){
String result = contentId;
result = result.replaceAll("<", "");
result = result.replaceAll(">", "");
result = result.replaceAll("<", "");
result = result.replaceAll(">", "");
return result;
}
public static String processEscapingString(String inputString){
System.out.println("processEscapingString = " inputString);
String result = inputString;
result = result.replaceAll("<", "< ");
result = result.replaceAll(">", " >");
result = result.replaceAll("<", "< ");
result = result.replaceAll(">", " >");
System.out.println("processEscapingString result= " result);
return result;
}
}
可以先別理會Java Class在做什麼事, 在原始的XSL 檔裡面可以注意下面這個
<xsl:variable name="bodyObj" select="obj:new()"/>這邊會設定一個variable 為bodyObj ,XSLT 的variable 一旦設定後就不可變更,但是我們可以對這個Java Object的值做處理。
看看下面的param設定
<xsl:param name="htmlBody">
<xsl:for-each select="//Document">
<xsl:choose>
<xsl:when test="contains(@MimeType,'text/html')">
<xsl:value-of select="cu:setXlstDataObject($bodyObj,FieldValues/content)" disable-output-escaping="yes"/>
<xsl:value-of select="cu:setXlsDataObjectIsHtml($bodyObj)"/>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:param>
cu:setXlstDataObject , cu是我們指定的xmlns name = cu , 指向java class = com.mypackage.util.XsltCommonUtils, 所以當你在XSLT裡調用
cu:setXlstDataObject($bodyObj,FieldValues/content)會呼叫XsltCommonUtils的setXlstDataObject method,
public static void setXlstDataObject(XsltDataObject object,String bodyString){
String bodyStr = bodyString;
bodyStr = bodyStr.replaceAll("cid:", "");
object.setBodyString(bodyStr);
}
注意在XSL 裡面$bodyObj會自動對應Java method裡面的setXlstDataObject為XsltDataObject, 然後在XsltCommonUtils就寫一些針對JavaBean做處理的動作把XSL裡面的$bodyObj再做操作處理。相同的處理在XSL 跟 Java之間的物件型態轉換可以自己參考文件。
所以我們的XSL檔跟XML都有了之後就是要透過TransformerFactory轉換成HTML
XmlToXLSTtest.java
package com;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
public class XmlToXLSTtest {
/**
* @param args
*/
public static void main(String[] args) {
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer transformer;
try {
transformer = tFactory.newTransformer(new javax.xml.transform.stream.StreamSource("test.xsl"));
// StreamSource xslt = new StreamSource(dxlReader);
// StreamResult result = new StreamResult(outWriter);
transformer.transform(new javax.xml.transform.stream.StreamSource("EDRM_Sample-2.xml"),
new javax.xml.transform.stream.StreamResult(new FileOutputStream("EDRM.html")));
System.out.println("transformer finished.");
} catch (TransformerConfigurationException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
Reference:
Java and XSLT
XSLT Wiki
Processing XML with Java
XSL Transformations (XSLT) Version 2.0 (W3c)
XPATH 語法
XSLT 基礎
简单的 Xalan 扩展函数

沒有留言:
張貼留言