Transform SVN log diff using Java XPath

Sometimes the output of a version control system’s tools or command options must be transformed for further use. For example, you generate a log report and must then import that into a spreadsheet or database.

Options
One of the options for doing this is with text based tools and scripting. Another approach is using the XML output of the version control system if available. With Subversion, XML can be used for the log and diff reports. Now you can use the various XML tools, such as XQuery, XPath, XSLT, and so forth.

DVCS?
Out of the box Git or Mercurial, two popular Distributed Version Control Systems (DVCS), do not output in XML as easily as SVN’s –xml option. To use XML, you have to define a ‘formatter’ or ‘pretty-print’ for them to use.

XPath
Below I use Java and XPath to transform the SVN output into Comma-separated values (CSV) files. One good reason for using an imperative approach is that this gives the opportunity to perform more complex transformations. A case in point is that in SVN the XML output format is only available for summary reports, and thus to create more comprehensive results or send to complex destinations, more processing would be required.

Source
The source is just a simple approach with minimal error handling and limited to CSV output. Originally I had two methods with internal loops that were almost identical, so I made the loop handling into a call-back that gets invoked by an Anonymous class. Kind of a simple Visitor or Strategy Pattern? Closures, as available in scripting languages like Groovy, would have been a simpler solution.

Of course a more comprehensive approach would abstract the output format or use other methods like POI to create a spreadsheet. Note that this approach requires loading the full XML into memory. For very large logs or diffs, a streaming approach would be needed.

Source also available here.

Example source SVN diff and log tranform to CSV

/**
 * 
 */
package com.octodecillion.utils;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Writer;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

/**
 * Transform SVN diff and log XML output files.
 * 
 */
@SuppressWarnings("javadoc")
public class SvnOutputTransform {

	/** Example run */
	public static void main(final String[] args) {
		try {
			SvnOutputTransform svnTransform = new SvnOutputTransform();

			svnTransform.diffSummaryToCsv("data/diff-summary.xml",
					"bin/SvnDiff.csv");

			svnTransform.logSummaryToCsv("data/log-summary.xml",
					"bin/SvnLog.csv");

		} catch (Exception e) {
			e.printStackTrace();
		}

	}

	/** Call back interface */
	interface RowProcess {
		/** accept method of Vistor pattern */
		public void doRows(NodeList nodeList) throws Exception;
	}

	/**
	 * 
	 * @param dataFilePath
	 * @param reportFilePath
	 * @param nodeString
	 * @param processRows
	 */
	public void generateReport(final String dataFilePath,
			final String reportFilePath, final String nodeString,
			final RowProcess processRows) {

		try {
			NodeList nodeList = setup(dataFilePath, reportFilePath, nodeString);
			reportOut.println(HEADER_COLUMN);
			processRows.doRows(nodeList);
		} catch (Exception e) {
			throw new SvnTransformException("", e);
		} finally {
			finallyHandler(reportOut, fr);
		}
	}

	/**
	 * 
	 * @param dataFilePath
	 * @param reportFilePath
	 * @param xpathString
	 * @return
	 * @throws Exception
	 */
	private NodeList setup(final String dataFilePath,
			final String reportFilePath, final String xpathString) throws Exception {

		builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
		xp = XPathFactory.newInstance().newXPath();
		fr = new FileReader(dataFilePath);
		dom = builder.parse(new InputSource(fr));
		reportOut = new PrintWriter(new File(reportFilePath));

		Object nodes = xp.evaluate(xpathString, dom, XPathConstants.NODESET);
		NodeList nodeList = (NodeList) nodes;

		return nodeList;

	}

	/**
	 * Convert SVN generated diff summary xml file to CSV. 
	 * 
	 * The format of the input XML is:
	 * <diff>
	 *	<paths>
	 *		<path props="none" kind="file" item="modified">
	 *			full path of resource
	 *		</path>
	 *  </paths>
	 * </diff>
	 * 
	 * @param dataFilePath xml file generated by svn diff --xml --summary ....
	 * @param reportFilePath destination of CSV file
	 * @throws SvnTransformException 
	 */
	public void diffSummaryToCsv(final String dataFilePath,
			final String reportFilePath) {

		preconditionCheck(dataFilePath, reportFilePath);
		
		generateReport(dataFilePath, reportFilePath, "//diff/paths/path",
				new RowProcess() {
					@SuppressWarnings("synthetic-access")
					@Override
					public void doRows(final NodeList nodeList) throws Exception {
						for (int i = 0; i < nodeList.getLength(); i++) {
							Node node = nodeList.item(i);
							String kind = xp.evaluate(KIND_ATTR_NAME, node);
							String item = xp.evaluate(ITEM_ATTR_NAME, node);
							String pathEntry = xp.evaluate("text()", node);

							// row
							reportOut.println(String.format(
									"%s,%s,%s,%s,%s,%s,%s", DIFFUSER, revision,
									date, item, kind, pathEntry, message));
						}
					}
				});
	}

	/**
	 * Convert SVN generated log summary xml file to CSV.
	 * 
	 * <log>
	 * 		<logentry revision="10879">
	 * 			<author>T16205</author>
	 * 			<date>2013-03-15T18:10:07.264531Z</date>
	 * 			<paths>
	 * 				<path kind="file" action="A">
	 * 					/2013/Amica/branches/SOW114-LifeAPP/Test/Resources/Properties/Test/HomeQuotingFields.inix
	 * 				</path>
	 * 			</paths>
	 *      </logentry>
	 * </log>
	 * 
	 * @throws SvnTransformException 
	 */
	public void logSummaryToCsv(final String dataFilePath,
			final String reportFilePath) {
		preconditionCheck(dataFilePath, reportFilePath);

		generateReport(dataFilePath, reportFilePath, "//log/logentry",
				new RowProcess() {
					@SuppressWarnings("synthetic-access")
					@Override
					public void doRows(final NodeList nodeList) throws Exception {
						for (int i = 0; i < nodeList.getLength(); i++) {
							Node node = nodeList.item(i);
							String author = xp.evaluate(AUTHOR_NODE, node);
							String date = xp.evaluate(DATE_NODE, node);
							String revision = xp.evaluate("@revision", node);
							String message = "\"" + xp.evaluate(MSG_NODE, node) + "\"";

							NodeList paths = (NodeList) xp.evaluate(
									"paths/path", node, XPathConstants.NODESET);

							if (paths != null) {
								for (int k = 0; k < paths.getLength(); k++) {
									Node aPath = paths.item(k);

									String action = xp.evaluate("@action",
											aPath);
									action = actionToName(action);

									String filePath = xp.evaluate("text()",
											aPath);

									// row
									reportOut.println(String.format(
											"%s,%s,%s,%s,%s,%s", author,
											revision, date.split("T")[0],
											action, filePath, message));
								}
							}

						} // end each logentry		
					}
				});

	} // end logToCsv
	
	/**  */
	private String actionToName(final String n) {

		try {
			return ACTION.valueOf(n).getName();
		} catch (Exception e) {
			return n;
		}
	}

	private void finallyHandler(final Writer reportOut, final Reader fr) {
		if (reportOut != null) {
			try {
				reportOut.flush();
				reportOut.close();
			} catch (Exception e) {
				// 
			}
		}

		if (fr != null) {
			try {
				fr.close();
			} catch (IOException e) {
				// 				
			}
		}
	}

	/**
	 * @param dataFilePath
	 * @param reportFilePath
	 * @throws IllegalArgumentException
	 */
	private void preconditionCheck(final String dataFilePath,
			final String reportFilePath) throws IllegalArgumentException {
		if ((dataFilePath == null) || (reportFilePath == null)) {
			throw new IllegalArgumentException(String.format(
					"dataFilePath='%s',reportFilePath='%s'", dataFilePath,
					reportFilePath));
		}
	}

	/**
	 * SVN action codes.
	 * 
	 */
	enum ACTION {
		A("add"), M("modify"), D("delete"), Z("z");

		private final String name;

		private ACTION(final String name) {
			this.name = name;
		}

		public String getName() {
			return name;
		}
	}

	/** Svn transform runtime exception */
	static public class SvnTransformException extends RuntimeException {
		private static final long serialVersionUID = 1L;

		/**  */
		public SvnTransformException(final String message, final Throwable cause) {
			super(message, cause);
		}
	}

	private static final String HEADER_COLUMN = "Dev,Revision,Date,Action,Kind,Path";
	private static final String ITEM_ATTR_NAME = "@item";
	private static final String KIND_ATTR_NAME = "@kind";
	private static final String MSG_NODE = "msg";
	private static final String DATE_NODE = "date";
	private static final String AUTHOR_NODE = "author";
	private PrintWriter reportOut;
	private FileReader fr;
	private static final String DIFFUSER = "DIFF";
	private final String revision = "";
	private final String date = "";
	private final String message = "";
	private DocumentBuilder builder;
	private XPath xp;
	private Document dom;

}

 

Updates

Links

Similar Posts:

Creative Commons License
This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported License.

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>