In previous blog post i have mentioned databricks (scala) function for generating XML file. Here it is.
def createXMLFile
(
parInputDataDataframe: DataFrame,
// -- dataframe with main data, names of columns are used as names of elements
parXmlDeclaration: String,
// -- declaration XML file, version, encoding, etc.
parXmlHeader: String,
// -- XML header which contains any elements which are before main data part
parXmlMainElementStart: String,
// -- start root tag
parXmlMainElementEnd: String,
// -- end root tag
parDestinationFileName: String,
// -- only XML file name
parDestinationFilePath: String,
// -- path to XML file (without file name, only path) with "/" on the end
parXmlRootTag: String,
// -- root tag for main data part
parXmlRowTag: String
// -- tag for rows of main data
)
{
try
{
// mounting of ADLS storage for saving XML document
mountDestinationStorage("/mnt/export")
// creation of temp XML file, it's for main data, we create xml file with main data using XML library and then we are reading the XML file back as plain text
// XML library can't save more complex xml files
parInputDataDataframe.write
.format("xml")
.option("rootTag", parXmlRootTag)
.option("rowTag", parXmlRowTag)
.mode("overwrite")
.save("dbfs:/mnt/export/" + parDestinationFilePath + parDestinationFileName)
// reading of the XML file as plain text
val fileContents = Source.fromFile("/dbfs/mnt/export/" + parDestinationFilePath + parDestinationFileName + "/part-00000").mkString
// composition of the whole final xml file
val txtFinal = parXmlDeclaration + parXmlMainElementStart + parXmlHeader + "\r\n" + fileContents + parXmlMainElementEnd
// final save of xml file (as plain text, without using of XML library)
dbutils.fs.put("dbfs:/mnt/export/" + parDestinationFilePath + parDestinationFileName + ".xml", txtFinal, true)
// deleteion of temp file
val directory = new Directory(new File("/dbfs/mnt/export/" + parDestinationFilePath + parDestinationFileName + "/"))
directory.deleteRecursively()
// unmounting of ADLS storage
unmountDestinationStorage("/mnt/export")
}
catch
{
case e: java.rmi.RemoteException => {
println("Directory is Already Mounted")
dbutils.fs.unmount("/mnt/export")
}
case e: Exception => {
println("There was some other error.")
}
}
}