// Darren Hardy // MAT 259 - Project One // 10 January 2009 // import java.text.SimpleDateFormat; // global constants final int nCategory = 4; final DateFormat dfParseable = new SimpleDateFormat("yyyy-MM-dd"); final String mediaKeys[][] = { {"Book", "Music", "Kit", "Folder", "Record", "Pamphlet", "Art", "Poster", "Newspaper"}, {"AudioTape", "Magazine", "Map", "Slides", "Microfiche", "Photo" }, {"Video", "Diskette"}, {"DVD", "CD", "CD-ROM", "Web"} }; // gloabel variables SortedMap summaryMap = new TreeMap(); SortedMap mediaCounts = new TreeMap(); void setup() { setVisible(false); generateHistogram(); generateCounts(); exit(); } // example summarized.txt data: // 2005-05-06,jccd,153 // 2005-05-06,jccdrom,5 // 2005-05-06,jcdvd,396 // 2005-05-06,jckit,27 // 2005-05-06,jcvhs,420 // 2005-05-06,jrbk,1 // 2005-05-06,ucflpdr,1 // 2005-05-06,ucfold,27 // 2005-05-06,ucunkn,1 // 2005-05-06,ucunknj,3 private void generateHistogram() { ArrayList parseList = new ArrayList(); String fn = dataPath("summarized.txt"); println("loading summarized data from " + fn); String[] lines = loadStrings(fn); for (int i = 0; i < lines.length; i++) { if (lines[i].charAt(0) != '#') { try { String data[] = splitTokens(lines[i], WHITESPACE + ","); if (data.length == 3) { Date dt = dfParseable.parse(data[0]); if ((dt.getYear()+1900) >= 2005) { parseList.add(dt); parseList.add(new ItemType(data[1])); parseList.add(new Integer(data[2])); } } } catch (ParseException e) { println(e); } } } println("loaded " + parseList.size()/3 + " data points"); PrintWriter hOut = createWriter(dataPath("histogram.txt")); hOut.println("# date,h1,h2,h3,h4,total"); Date currentDate = null; Map dataPoints = new HashMap(); for (Iterator i = parseList.iterator(); i.hasNext();) { // load record Date dt = (Date)i.next(); ItemType it = (ItemType)i.next(); Integer n = (Integer)i.next(); if (it.getMedia() == null) { continue; } if (currentDate == null) { currentDate = dt; } // XXX: does not process final record correctly if (currentDate.equals(dt) == false) { int counts[] = computeModernityCounts(dataPoints); int total = 0; float histogram[] = new float[nCategory]; // compute total per category for (int j = 0; j < nCategory; j++) { total += counts[j]; } hOut.print(dfParseable.format(currentDate)); for (int j = 0; j < nCategory; j++) { histogram[j] = float(counts[j]) / total; hOut.print(","); hOut.print(histogram[j]); } hOut.print(","); hOut.println(total); // register the histogram and init for next record summaryMap.put(currentDate, histogram); dataPoints.clear(); currentDate = dt; } dataPoints.put(it, n); // compute total counts by media Integer x = (Integer)mediaCounts.get(it.getMedia()); if (x == null) x = new Integer(0); mediaCounts.put(it.getMedia(), new Integer(x+n)); } hOut.close(); } private void generateCounts() { PrintWriter cOut = createWriter(dataPath("mediacount.txt")); for (int i = 0; i < nCategory; i++) { for (int j = 0; j < mediaKeys[i].length; j++) { String k = mediaKeys[i][j]; Integer v = (Integer)mediaCounts.get(k); cOut.println(i+1 + "," + k + "," + v); } } cOut.close(); } private int[] computeModernityCounts(Map dataPoints) { int counts[] = new int[4]; Iterator i = dataPoints.keySet().iterator(); while (i.hasNext()) { ItemType it = (ItemType)i.next(); Integer n = (Integer)dataPoints.get(it); int m = it.getModern(); if (m >= 1 && m <= 4) { counts[m-1] += n; } else { println("Unknown media type: " + it); } } return counts; }