package com.webfoot.prefuse; //TODO @@@ How can I prevent people from modifying the table? Can I mark //the columns as read-only? //I'm afraid of doing too much to prevent setting, as the constructor needs //to set things. Can I set things by doing super.set() etc? //TODO @@@ do I want to allow using predicates on this table? Probably... import java.util.ArrayList; import java.util.Enumeration; import java.util.Hashtable; import java.util.Iterator; import java.lang.Math; import java.util.HashMap; import prefuse.data.column.Column; import prefuse.data.event.ColumnListener; import prefuse.data.parser.IntParser; import prefuse.data.parser.StringParser; import prefuse.data.tuple.TupleManager; import prefuse.data.util.RowManager; import prefuse.util.collections.CopyOnWriteArrayList; import prefuse.data.Table; import prefuse.data.tuple.TableTuple; import prefuse.data.parser.DoubleParser; import prefuse.data.Tuple; /** *

A HistogramTable is a subclass of (@link prefuse.data.Table), but * that has been histogramized: one column of the original * (@link prefuse.data.Table) gets counted and slotted into a Table. * The first (@link prefuse.data.column.Column) is the ranges of the data, * with the value of the cell corresponding to the minimum value of the range. * The second column holds the number of each element in the original Table * that falls within the data range represented by the cell in column 1. * * NOTE: This only works with numeric and string fields. * It has not been tested with booleans or derived fields. * Booleans will probably be treated as strings. * * Known bug: See the HistogramFrame class comments about an axis bug. * * @author Kaitlin Duck Sherwood * @author jeffrey heer */ public class HistogramTable extends Table implements ColumnListener { // m_bin{Min, Max} are the min and max of the data column BUT for Strings, min is 1 // and max is the number of unique strings. protected Hashtable m_binMin = new Hashtable(); protected Hashtable m_binMax = new Hashtable(); protected Hashtable m_countMins = new Hashtable(); protected Hashtablem_countMaxes = new Hashtable(); protected double m_binWidth; protected int m_binCount; static final int DEFAULT_BIN_COUNT = 15; public HistogramTable(Table aTable) { this(aTable, DEFAULT_BIN_COUNT); } /** * @param aTable a Prefuse Table with data values (i.e. non-histogrammized) in it * @param aBinCount how many bins the data's range should be split into */ public HistogramTable(Table aTable, int aBinCount) { super(); String[] fieldNames = getFieldNames(aTable); m_binCount = aBinCount; initializeHistogramTable(fieldNames, m_binCount); for(int fieldIndex = 0; fieldIndex < fieldNames.length; fieldIndex++) { String field = fieldNames[fieldIndex]; Column dataColumn = aTable.getColumn(field); if(dataColumn == null) { // @@@ TODO do I want to throw an exception here? System.out.println("column not found for field "+field); System.exit(-23); } if ( aBinCount <= 0 ) { System.out.println("Uh-oh, HistogramTable can't cope with negative bin counts."); System.exit(23); } if(dataColumn.canGetDouble()) { initializeNumericColumn(field, dataColumn); } else if (dataColumn.canGetString()) { initializeStringColumn(field, dataColumn); } else { // TODO @@@ maybe someday throw an exception System.err.println("Hmm, column "+field+ " is not a number and not a string."+ "I don't know what to do with this column."); continue; } } } /** * @param aTable a HistogramTable or Prefuse Table * @return fieldNames a list of the names of the columns in the table. * Note that a HistogramTable will have all the same column names as * are in its (Prefuse Table) data table's, but will also have an additional * set of columns that have the counts. See getCountField(). * TODO It might be interesting to have a method getNonCountFieldNames which * strips out the count fields. */ public static String[] getFieldNames(Table aTable) { int columnCount = aTable.getColumnCount(); String[] fieldNames = new String[columnCount]; for(int columnIndex = 0; columnIndex values = new Hashtable(); int count = 0; String key; for (int rowIndex=0; rowIndex keys = values.keys(); while(keys.hasMoreElements() && rowIndex < m_binCount) { key = (String)keys.nextElement(); { binColumn.setString(key, rowIndex); countColumn.setInt(values.get(key), rowIndex++); } } // insert dummy values if there are fewer unique strings than bins for(int i = rowIndex; i= 0.0 : "m_binWidth < 0!"; } /** * Fill in the histogram table. * @param fields the names of all the fields * @param rowCount the number of rows */ private void initializeHistogramTable(String[] fields, int rowCount) { int columnCount = 2 * fields.length; m_listeners = new CopyOnWriteArrayList(); m_columns = new ArrayList(columnCount); m_names = new ArrayList(columnCount); m_rows = new RowManager(this); m_entries = new HashMap(columnCount+5); m_tuples = new TupleManager(this, null, TableTuple.class); addRows(rowCount); } /** * Initialize the bin column. The bin columns have information on * the range of values that the count columns have counts for. For * example, you can say "there are 17 elements between the value of * 2 and 14". 17 would be the value in the count field, and 2-14 * would be represented by the bin field. Note that the way that * bin fields are represented, the value in the bin field is the low * end of the range. In the example, the bin field would have a 2 * in it. * @param field the name of the dataColumn to histogrammize */ private void initializeNumericBinColumn(String field) { double dataColumnMin = m_binMin.get(field); for (int binIndex = 0; binIndex < m_binCount; binIndex++) { set(binIndex, field,dataColumnMin + binIndex*m_binWidth); } } /** * Initialize the column with the counts of elements in them. * @param field the name of the dataColumn to histogrammize * @param dataColumn the column in the original (@link prefuse.data.Table) * to be histogramized. */ private void initializeCountColumn(String field, Column dataColumn) { int binSlot; int currentCount; // separate var just for debugging ease String countField = getCountField(field); // initialize everything to 0 before starting to count for (int binIndex = 0; binIndex < m_binCount; binIndex++) { set(binIndex, countField, 0); } double dataColumnMin = m_binMin.get(field); double cellValue; for(int dataRowIndex = 0; dataRowIndex tuplesIterator = tuples(); tuplesIterator.hasNext();) { t = tuplesIterator.next(); System.out.println(t.toString()); } } public double getBinMin(String field) { return m_binMin.get(field); } public double getBinMax(String field) { return m_binMax.get(field); } public double getBinCount(String field) { return m_binCount; } /** * @param aColumn the column to get min/max of * @return min and max (in an array) of aColumn */ private double[] getNumericColumnMinMax(Column aColumn) { double oldMin = aColumn.getDouble(0); double oldMax = oldMin; double[] minMax = new double[2]; if(aColumn.canGetDouble()) { double currentValue; for(int rowIndex = 1; rowIndex < aColumn.getRowCount(); rowIndex++) { currentValue = aColumn.getDouble(rowIndex); oldMin = Math.min(oldMin, currentValue); oldMax = Math.max(oldMax, currentValue); } } minMax[0] = oldMin; minMax[1] = oldMax; return minMax; } /** * @param field the name of the histogramColumn * @return the minimum and maximum values of the associated * count column in an array. */ private double[] getNumericColumnMinMax(String field) { Column col = getColumn(field); return getNumericColumnMinMax(col); } /** * @param field the name of the histogramColumn * @return the minimum value of the associated count column * (In other words, if you ask for getCountMin("A"), you * will get the min of the column "A counts".) */ public double getCountMin(String field) { String countField = getCountField(field); double[] minMax = new double[2]; if(null == m_countMaxes.get(countField)) { minMax = getNumericColumnMinMax(countField); m_countMins.put(countField, (int)minMax[0]); m_countMaxes.put(countField, (int)minMax[1]); } return m_countMins.get(countField); } /** * @param field the name of the histogramColumn * @return the max value of the associated count column * (In other words, if you ask for getCountMin("A"), you * will get the max of the column "A counts".) */ public double getCountMax(String field) { String countField = getCountField(field); if(null == m_countMaxes.get(countField)) { getCountMin(field); // sets both } return m_countMaxes.get(countField); } public static String getCountField(String field) { return field+" count"; } } // end of class HistogramTable