Categories
Azure Databricks

Databricks function for generating XML file

In previous blog post i have mentioned databricks (scala) function for generating XML file. Here it is.

def createXMLFile
(
  parInputDataDataframe: DataFrame,
// -- dataframe with main data, names of columns are used as names of elements
  parXmlDeclaration: String,
// -- declaration XML file, version, encoding, etc.  
  parXmlHeader: String,
// -- XML header which contains any elements which are before main data part
  parXmlMainElementStart: String,
// -- start root tag
  parXmlMainElementEnd: String,
// -- end root tag
  parDestinationFileName: String,
// -- only XML file name  
  parDestinationFilePath: String,
// -- path to XML file (without file name, only path) with "/" on the end
  parXmlRootTag: String,
// -- root tag for main data part
  parXmlRowTag: String
// -- tag for rows of main data
)
{
try  
  {
// mounting of ADLS storage for saving XML document
    mountDestinationStorage("/mnt/export")

// creation of temp XML file, it's for main data, we create xml file with main data using XML library and then we are reading the XML file back as plain text  
// XML library can't save more complex xml files
    parInputDataDataframe.write
    .format("xml")
    .option("rootTag", parXmlRootTag)
    .option("rowTag", parXmlRowTag)
    .mode("overwrite")
    .save("dbfs:/mnt/export/" + parDestinationFilePath + parDestinationFileName)

// reading of the XML file as plain text
  val fileContents = Source.fromFile("/dbfs/mnt/export/" + parDestinationFilePath + parDestinationFileName + "/part-00000").mkString    
  
// composition of the whole final xml file
  val txtFinal = parXmlDeclaration + parXmlMainElementStart + parXmlHeader + "\r\n" + fileContents + parXmlMainElementEnd

// final save of xml file (as plain text, without using of XML library)
  dbutils.fs.put("dbfs:/mnt/export/" + parDestinationFilePath + parDestinationFileName + ".xml", txtFinal, true)  

// deleteion of temp file
  val directory = new Directory(new File("/dbfs/mnt/export/" + parDestinationFilePath + parDestinationFileName + "/"))
  directory.deleteRecursively()  

// unmounting of ADLS storage
    unmountDestinationStorage("/mnt/export")
  }  
catch
  {
   case e: java.rmi.RemoteException => {
      println("Directory is Already Mounted")
      dbutils.fs.unmount("/mnt/export")
    }
    case e: Exception => {
      println("There was some other error.")
    }
  } 
}