import java.io.*; /* Strip all library data UTF-16 XML files in the data directory and convert them into a single merged ASCII file that are easier to parse. The XML files can be anywhere beneath the data directory. The converted file will be in the data directory itself. The converted file will be named "splxmldata.txt". */ void setup() { /* legitimate fields are: itemNumber bibNumber ckodate ckotime ckidate ckitime collcode itemtype barcode title callNumber deweyClass subject Each of the above, except for "subject" will always return a single entry. There may be multiple "subject" fields for any single transaction. Thus if you need the "subject" field it should be listed last in the array of keepers so that you will be able to keep track of the indexing. For example, if keepers contains {"itemNumber", "title"} then a converted xml file will contain lines looking something like this: 2818704,God is dead 3106563,Bella the bunny fairy etc... and you can easily map index 0 to the indexNumber field and index 1 to the title field if keepers contains {"itemNumber", "title", "subject"} then a converted xml file will look something like this: 2818704,God is dead,Political fiction,God Fiction,Darfur Sudan Fiction 3106563,Bella the bunny fairy,Pets Fiction,Fairies Fiction with the same indexing for 0 and 1, and where you can iterate from index 2 through the index length to get the subjects. In some cases there may be no subjects at all for particular transactions. */ String[] keepers = new String[] { // "itemNumber", "bibNumber", "ckodate", "ckotime", "ckidate", "ckitime", "collcode", "itemtype", "barcode", "title", "callNumber", "deweyClass", "subject" "ckitime", "deweyClass" }; println("about to strip files in data directory..."); filterData(keepers); println("...done stripping files in data directory!"); System.exit(1); } void filterData(String[] keepers) { ArrayList allFiles = listFilesRecursive(dataPath("")); try { String writeTo = "splxmldata.txt"; FileOutputStream fos = new FileOutputStream(dataPath(writeTo)); Writer out = new OutputStreamWriter(fos, "ASCII"); //StringBuilder transaction; //transaction = new StringBuilder(); //""; for (int fn = 0; fn < allFiles.size(); fn++) { File f = (File) allFiles.get(fn); if (!f.getName().endsWith(".xml")) { continue; } println("\treading \"" + f.getName()); //println("abs : " + f.getAbsolutePath()); //String writeTo = (f.getName().substring(0, f.getName().length() - 4) + ".txt"); InputStreamReader reader = new InputStreamReader(new FileInputStream(dataPath(f.getAbsolutePath())), "UTF16"); BufferedReader in = new BufferedReader(reader); //FileOutputStream fos = new FileOutputStream(dataPath(writeTo)); //Writer out = new OutputStreamWriter(fos, "ASCII"); String line; String transaction = ""; while ((line = in.readLine()) != null) { //transaction = ""; //transaction = new StringBuilder(); //""; line = line.trim(); if (line.indexOf("") >= 0) { transaction = ""; } for (int i = 0; i < keepers.length; i++) { if (line.indexOf("<" + keepers[i] + ">") >= 0) { int startIdx = keepers[i].length() + 2; int endIdx = line.indexOf(""); line = line.substring(startIdx, endIdx); if (i != 0) { //transaction.append(","); //transaction.append(line); transaction += "," + line; } else { transaction += line; //transaction.append(line); } } } if (line.indexOf("") >= 0) { //println(transaction); //transaction.append("\n"); transaction += "\n"; out.write(transaction.toString()); } } in.close(); } println(".....wrote \"" + writeTo + "\""); //out.write(transaction.toString()); out.close(); } catch(IOException e) { e.printStackTrace(); } } // This function returns all the files in a directory as an array of File objects // This is useful if you want more info about the file File[] listFiles(String dir) { File file = new File(dir); if (file.isDirectory()) { File[] files = file.listFiles(); return files; } else { // If it's not a directory return null; } } // Function to get a list of all files in a directory and all subdirectories ArrayList listFilesRecursive(String dir) { ArrayList fileList = new ArrayList(); recurseDir(fileList,dir); return fileList; } // Recursive function to traverse subdirectories void recurseDir(ArrayList a, String dir) { File file = new File(dir); if (file.isDirectory()) { // If you want to include directories in the list a.add(file); File[] subfiles = file.listFiles(); for (int i = 0; i < subfiles.length; i++) { // Call this function on all files in this directory recurseDir(a,subfiles[i].getAbsolutePath()); } } else { a.add(file); } } //we are not drawing anything! void draw() { frame.setVisible(false); }