package com.webfoot.prefuse;
//TODO @@@ How can I prevent people from modifying the table? Can I mark
//the columns as read-only?
//I'm afraid of doing too much to prevent setting, as the constructor needs
//to set things. Can I set things by doing super.set() etc?
//TODO @@@ do I want to allow using predicates on this table? Probably...
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.lang.Math;
import java.util.HashMap;
import prefuse.data.column.Column;
import prefuse.data.event.ColumnListener;
import prefuse.data.parser.IntParser;
import prefuse.data.parser.StringParser;
import prefuse.data.tuple.TupleManager;
import prefuse.data.util.RowManager;
import prefuse.util.collections.CopyOnWriteArrayList;
import prefuse.data.Table;
import prefuse.data.tuple.TableTuple;
import prefuse.data.parser.DoubleParser;
import prefuse.data.Tuple;
/**
*
A HistogramTable is a subclass of (@link prefuse.data.Table), but
* that has been histogramized: one column of the original
* (@link prefuse.data.Table) gets counted and slotted into a Table.
* The first (@link prefuse.data.column.Column) is the ranges of the data,
* with the value of the cell corresponding to the minimum value of the range.
* The second column holds the number of each element in the original Table
* that falls within the data range represented by the cell in column 1.
*
* NOTE: This only works with numeric and string fields.
* It has not been tested with booleans or derived fields.
* Booleans will probably be treated as strings.
*
* Known bug: See the HistogramFrame class comments about an axis bug.
*
* @author Kaitlin Duck Sherwood
* @author jeffrey heer
*/
public class HistogramTable extends Table implements ColumnListener {
// m_bin{Min, Max} are the min and max of the data column BUT for Strings, min is 1
// and max is the number of unique strings.
protected Hashtable m_binMin = new Hashtable();
protected Hashtable m_binMax = new Hashtable();
protected Hashtable m_countMins = new Hashtable();
protected Hashtablem_countMaxes = new Hashtable();
protected double m_binWidth;
protected int m_binCount;
static final int DEFAULT_BIN_COUNT = 15;
public HistogramTable(Table aTable)
{
this(aTable, DEFAULT_BIN_COUNT);
}
/**
* @param aTable a Prefuse Table with data values (i.e. non-histogrammized) in it
* @param aBinCount how many bins the data's range should be split into
*/
public HistogramTable(Table aTable, int aBinCount)
{
super();
String[] fieldNames = getFieldNames(aTable);
m_binCount = aBinCount;
initializeHistogramTable(fieldNames, m_binCount);
for(int fieldIndex = 0; fieldIndex < fieldNames.length; fieldIndex++)
{
String field = fieldNames[fieldIndex];
Column dataColumn = aTable.getColumn(field);
if(dataColumn == null)
{
// @@@ TODO do I want to throw an exception here?
System.out.println("column not found for field "+field);
System.exit(-23);
}
if ( aBinCount <= 0 )
{
System.out.println("Uh-oh, HistogramTable can't cope with negative bin counts.");
System.exit(23);
}
if(dataColumn.canGetDouble()) {
initializeNumericColumn(field, dataColumn);
} else if (dataColumn.canGetString()) {
initializeStringColumn(field, dataColumn);
} else {
// TODO @@@ maybe someday throw an exception
System.err.println("Hmm, column "+field+
" is not a number and not a string."+
"I don't know what to do with this column.");
continue;
}
}
}
/**
* @param aTable a HistogramTable or Prefuse Table
* @return fieldNames a list of the names of the columns in the table.
* Note that a HistogramTable will have all the same column names as
* are in its (Prefuse Table) data table's, but will also have an additional
* set of columns that have the counts. See getCountField().
* TODO It might be interesting to have a method getNonCountFieldNames which
* strips out the count fields.
*/
public static String[] getFieldNames(Table aTable) {
int columnCount = aTable.getColumnCount();
String[] fieldNames = new String[columnCount];
for(int columnIndex = 0; columnIndex values = new Hashtable();
int count = 0;
String key;
for (int rowIndex=0; rowIndex keys = values.keys();
while(keys.hasMoreElements() && rowIndex < m_binCount) {
key = (String)keys.nextElement();
{
binColumn.setString(key, rowIndex);
countColumn.setInt(values.get(key), rowIndex++);
}
}
// insert dummy values if there are fewer unique strings than bins
for(int i = rowIndex; i= 0.0 : "m_binWidth < 0!";
}
/**
* Fill in the histogram table.
* @param fields the names of all the fields
* @param rowCount the number of rows
*/
private void initializeHistogramTable(String[] fields, int rowCount)
{
int columnCount = 2 * fields.length;
m_listeners = new CopyOnWriteArrayList();
m_columns = new ArrayList(columnCount);
m_names = new ArrayList(columnCount);
m_rows = new RowManager(this);
m_entries = new HashMap(columnCount+5);
m_tuples = new TupleManager(this, null, TableTuple.class);
addRows(rowCount);
}
/**
* Initialize the bin column. The bin columns have information on
* the range of values that the count columns have counts for. For
* example, you can say "there are 17 elements between the value of
* 2 and 14". 17 would be the value in the count field, and 2-14
* would be represented by the bin field. Note that the way that
* bin fields are represented, the value in the bin field is the low
* end of the range. In the example, the bin field would have a 2
* in it.
* @param field the name of the dataColumn to histogrammize
*/
private void initializeNumericBinColumn(String field)
{
double dataColumnMin = m_binMin.get(field);
for (int binIndex = 0; binIndex < m_binCount; binIndex++)
{
set(binIndex, field,dataColumnMin + binIndex*m_binWidth);
}
}
/**
* Initialize the column with the counts of elements in them.
* @param field the name of the dataColumn to histogrammize
* @param dataColumn the column in the original (@link prefuse.data.Table)
* to be histogramized.
*/
private void initializeCountColumn(String field, Column dataColumn)
{
int binSlot;
int currentCount; // separate var just for debugging ease
String countField = getCountField(field);
// initialize everything to 0 before starting to count
for (int binIndex = 0; binIndex < m_binCount; binIndex++)
{
set(binIndex, countField, 0);
}
double dataColumnMin = m_binMin.get(field);
double cellValue;
for(int dataRowIndex = 0; dataRowIndex tuplesIterator = tuples(); tuplesIterator.hasNext();) {
t = tuplesIterator.next();
System.out.println(t.toString());
}
}
public double getBinMin(String field) {
return m_binMin.get(field);
}
public double getBinMax(String field) {
return m_binMax.get(field);
}
public double getBinCount(String field) {
return m_binCount;
}
/**
* @param aColumn the column to get min/max of
* @return min and max (in an array) of aColumn
*/
private double[] getNumericColumnMinMax(Column aColumn) {
double oldMin = aColumn.getDouble(0);
double oldMax = oldMin;
double[] minMax = new double[2];
if(aColumn.canGetDouble())
{
double currentValue;
for(int rowIndex = 1; rowIndex < aColumn.getRowCount(); rowIndex++)
{
currentValue = aColumn.getDouble(rowIndex);
oldMin = Math.min(oldMin, currentValue);
oldMax = Math.max(oldMax, currentValue);
}
}
minMax[0] = oldMin;
minMax[1] = oldMax;
return minMax;
}
/**
* @param field the name of the histogramColumn
* @return the minimum and maximum values of the associated
* count column in an array.
*/
private double[] getNumericColumnMinMax(String field) {
Column col = getColumn(field);
return getNumericColumnMinMax(col);
}
/**
* @param field the name of the histogramColumn
* @return the minimum value of the associated count column
* (In other words, if you ask for getCountMin("A"), you
* will get the min of the column "A counts".)
*/
public double getCountMin(String field) {
String countField = getCountField(field);
double[] minMax = new double[2];
if(null == m_countMaxes.get(countField))
{
minMax = getNumericColumnMinMax(countField);
m_countMins.put(countField, (int)minMax[0]);
m_countMaxes.put(countField, (int)minMax[1]);
}
return m_countMins.get(countField);
}
/**
* @param field the name of the histogramColumn
* @return the max value of the associated count column
* (In other words, if you ask for getCountMin("A"), you
* will get the max of the column "A counts".)
*/
public double getCountMax(String field) {
String countField = getCountField(field);
if(null == m_countMaxes.get(countField))
{
getCountMin(field); // sets both
}
return m_countMaxes.get(countField);
}
public static String getCountField(String field)
{
return field+" count";
}
} // end of class HistogramTable