/**
 * pdfXtk-Extras - PDF Extraction Toolkit Extras
 * Copyright (c) by the authors/contributors.  All rights reserved.
 * This project includes code from PDFBox and TouchGraph.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the names pdfXtk or PDF Extraction Toolkit; nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * http://pdfxtk.sourceforge.net
 *
 */
package at.ac.tuwien.dbai.pdfwrap.table;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

import at.ac.tuwien.dbai.pdfwrap.analysis.CandidateCluster;
import at.ac.tuwien.dbai.pdfwrap.analysis.PageSegmenter;
import at.ac.tuwien.dbai.pdfwrap.comparators.ColumnComparator;
import at.ac.tuwien.dbai.pdfwrap.comparators.XComparator;
import at.ac.tuwien.dbai.pdfwrap.comparators.YComparator;
import at.ac.tuwien.dbai.pdfwrap.model.document.CompositeSegment;
import at.ac.tuwien.dbai.pdfwrap.model.document.GenericSegment;
import at.ac.tuwien.dbai.pdfwrap.model.document.LineSegment;
import at.ac.tuwien.dbai.pdfwrap.model.document.TextBlock;
import at.ac.tuwien.dbai.pdfwrap.model.document.TextSegment;
import at.ac.tuwien.dbai.pdfwrap.model.graph.AdjacencyGraph;
import at.ac.tuwien.dbai.pdfwrap.util.ExtraUtils;
import at.ac.tuwien.dbai.pdfwrap.utils.ListUtils;
import at.ac.tuwien.dbai.pdfwrap.utils.SegmentUtils;
import at.ac.tuwien.dbai.pdfwrap.utils.Utils;


/**
 * Table-finding algorithms
 *
 * @author Tamir Hassan, hassan@dbai.tuwien.ac.at
 * @version GraphWrap Beta 1
 */
public class TableFinder
{
	/*
    //NeighbourhoodGraph ng;
    AdjacencyGraph ng;
    EdgeList edges;
    
    public TableFinder(
        AdjacencyGraph ng)
    {
		this.ng = ng;
        edges = ng.getEdges();
    }
    */
	
	public static List<CandidateColumn> clusterTextBlocksIntoCandidateColumns
		(AdjacencyGraph<TextBlock> textblockAG, int maxIterations, 
		HashMap<GenericSegment, CandidateCluster> clustHash)
		{
//			System.out.println("in ctbicc");
			List<CandidateColumn> retVal = new ArrayList<CandidateColumn>();
			List<CandidateCluster> l = PageSegmenter.
				orderedEdgeCluster(textblockAG, new CandColSegmentationRules(), maxIterations, clustHash);
			
			for (CandidateCluster c : l)
			{
				CandidateColumn col = new CandidateColumn(c.getX1(), c.getX2(), c.getY1(), c.getY2(),
					c.getText(), c.getFontName(), c.getFontSize());
				for (TextSegment ts : c.getItems())
					col.getItems().add((TextBlock)ts);
				retVal.add(col);
			}
			return retVal;
		}
	
    /*
    // note: table does not have to have correct bBox...  it will be found here.
    // doch!! the table locator finds the bbox anyway (as it needs to fill it
    //        with its contents), so no point re-finding it here.
    public OrderedTable understandTable(CandidateTable pt, boolean preserveCarriageReturns)
    {
    	//pt.findBoundingBox();
    	
    	float threshold = 6.0f;
    	OrderedTable retVal = new OrderedTable();
    	List<Float> horizPositions = new ArrayList<Float>();
    	List<Float> vertPositions = new ArrayList<Float>();
    	
    	List<GenericSegment> items = pt.getItems();
    	List<LineSegment> horizLines = ListUtils.getHorizLineSegments(items);
    	List<LineSegment> vertLines = ListUtils.getVertLineSegments(items);
    	
    	//System.out.println("vertLines: " + vertLines.size());
    	
    	Collections.sort(horizLines, new YComparator());
    	Collections.sort(vertLines, new XComparator());
    	
    	//System.out.println("horizLines:\n" + horizLines);
    	//System.out.println("vertLines:\n" + vertLines);
    	
    	// this works from left to right
    	Iterator vLineIter = vertLines.iterator();
    	horizPositions.add(new Float(pt.getX1()));
    	float prevPos = pt.getX1();
    	while(vLineIter.hasNext())
    	{
    		LineSegment l = (LineSegment)vLineIter.next();
    		float pos = l.getXmid();
    		if (!Utils.within(pos, prevPos, threshold))
    			horizPositions.add(new Float(pos));
    		prevPos = pos;
    	}
    	if (!Utils.within(pt.getX2(), prevPos, threshold))
			horizPositions.add(new Float(pt.getX2()));
    	
    	// this works from the top of the page downwards
    	Iterator hLineIter = horizLines.iterator();
    	vertPositions.add(new Float(pt.getY2()));
    	prevPos = pt.getY2();
    	while(hLineIter.hasNext())
    	{
    		LineSegment l = (LineSegment)hLineIter.next();
    		float pos = l.getYmid();
    		if (!Utils.within(pos, prevPos, threshold))
    			vertPositions.add(new Float(pos));
    		prevPos = pos;
    	}
    	if (!Utils.within(pt.getY1(), prevPos, threshold))
			vertPositions.add(new Float(pt.getY1()));
    	
    	// first, we need to find the row- and colspans
    	int noCols = horizPositions.size() - 1;
    	int noRows = vertPositions.size() - 1;
    	
    	// here we go through each position
    	// first across all the columns to see if we need to 'merge'
    	// then do the same down the rows
    	// we could even look at the text at this point, but we
    	// won't do that yet
    	// though we should create the smallest permissible
    	// TextCell objects now, considering all cells independently
    	
    	// go across the columns first
    	
    	ListUtils textItems = items.getTextSegments();
    	TextSegment hiddenCell = new TextSegment();
    	hiddenCell.setSegText("hidden-cell");
    	
    	// rows is a list of lists
    	//ArrayList rows = new ArrayList(noRows);
    	ArrayList rows = new ArrayList();
    	//SegmentList rows = new SegmentList();
    	for (int r = 0; r < noRows; r ++)
    	{
    		//CompoundTextSegment theCol = new CompoundTextSegment();
    		//SegmentList cols = new SegmentList(noCols);
    		// forcing noCols crashes with 0-item col (though
    		// (this is a non-table anyway...)
    		ListUtils cols = new ListUtils();
    		//SegmentList cols = theCol.getItems();
    		float tcy2 = ((Float)vertPositions.get(r)).floatValue();
    		float tcy1 = ((Float)vertPositions.get(r + 1)).floatValue();
    		float tcmid = (tcy1 + tcy2) * 0.5f;
    		for (int c = 0; c < noCols; c ++)
    		{
    			// row r column c
    			// find out raw co-ords of table cell from positions
    			float tcx1 = ((Float)horizPositions.get(c)).floatValue();
    			float tcx2 = ((Float)horizPositions.get(c + 1)).floatValue();
    			int colspan = 1;
    			
    			// try expanding right, i.e. check if any intersecting
    			//   vertical line on THIS ROW above certain threshold height
    			//   for ruled tables, no line = colspan; no ifs or buts
    			//   when successful expansion, mark the next col as blank
    			boolean expand = true;
    			int x = c;
    			//System.out.println("tcx1: " + tcx1 + " tcx2: " + tcx2 +
    			//	" tcy1: " + tcy1 + " tcy2: " + tcy2);
    			//System.out.println("vertLines: " + vertLines.size());
    			//System.out.println("vertLines: " + vertLines);
    			//System.out.println("lines: " + items.getLines());
    			Collections.sort(vertLines, new XComparator());
    			//System.out.println("before loop: c=" + c);
    			for (x = c; x < noCols && expand; x ++)
    			{
    				// position to check is tcx2
    				tcx2 = ((Float)horizPositions.get(x + 1)).floatValue();
    				
    				//System.out.println("\nx = " + x + " tcx2 = " + tcx2);
    				//boolean expand = true;
    				Iterator vlIter = vertLines.iterator();
    				while(vlIter.hasNext())
    				{
    					LineSegment l = (LineSegment)vlIter.next();
    					//System.out.println("checking with l: " + l);
    					if (Utils.within(tcx2, l.getXmid(), threshold) &&
    						Utils.between(tcmid, l.getY1(), l.getY2()) &&
    						l.getSegHeight() >= ((tcy2 - tcy1) * 0.75f))
    					{
    						// line must intersect tc-midpoint and
    						// must be at least 0.75 of the tc-height.
    						//System.out.println("setting false!!!");
    						expand = false;
    					}
    				}
    			}
    			// so... tc?? are the co-ordinates
    			// and x - c is the colspan - 1
    			// if x > c, need to set the next cells to blank...
    			
    			TableCell tc = new TableCell(tcx1, tcx2, tcy1, tcy2);
    			//cols.set(c, tc);
    			//System.out.println("c: " + c + " cols.size():" + cols.size());
    			cols.add(tc);
    			//System.out.println("x: " + x);
    			for (int e = (c + 1); e < x; e ++)
    			{
    				//System.out.println("e: " + e);
    				tc.setColspan(tc.getColspan() + 1);
    				//System.out.println("colspan increased to: " + tc.getColspan());
    				tcx2 = ((Float)horizPositions.get(e + 1)).floatValue();
    				tc.setX2(tcx2);
    				// add null cell -- prob unnecessary
    				// it's necessary all right
    				//System.out.println("adding hidden cell & incrementing c");
    				cols.add(hiddenCell);
    				c ++;
    			}
    			// swallow the items that fit within the final co-ordinates
    			//SegmentList cellItems = textItems.getElementsWithinBBox
    			//	(tc);
    			ListUtils cellItems = textItems.
    				getElementsWithCentresWithinBBox(tc);
    			tc.setItems(cellItems);
    			tc.findText(preserveCarriageReturns);
    		}
    		//System.out.println("cols: \n" + cols);
    		rows.add(cols);
    		//System.out.println("cols: " + cols);
    		//theCol.findAvgFontSize();
    		//theCol.findBoundingBox();
    		//theCol.findText(false);
    		//rows.add(theCol);
    	}
    	
    	// once we have done this, we work across neighbouring rows
    	// and then across neighbouring columns merging them together
    	// heuristically/probabilistically (more likely to help us
    	// with multi-row stuff).
    	// perhaps even by looking at the gaps between neighbours/
    	// Y-positions of text.
    	
    	
    	
    	// now we have the col and row positions, we need
    	// to add the text in the right place
    	
    	// todo: we only want textlines?  or fragments?  we need to think about it.
    	
    	//SegmentList textItems = items.getTextSegments();
    	
    	ListUtils rvItems = retVal.getItems();
    	// now create the new table object, removing all null cells
    	for (int r = 0; r < rows.size(); r ++)
    	{
    		ListUtils cols = (ListUtils)rows.get(r);
    		CompositeSegment row = new CompositeSegment();
    		ListUtils rowItems = row.getItems();
    		for (int c = 0; c < cols.size(); c ++)
    		{
    			GenericSegment s = (GenericSegment)cols.get(c);
    			if (s instanceof TableCell) rowItems.add(s);
    		}
    		row.findAvgFontSize();
    		row.findFont();
    		row.findBoundingBox();
    		row.findText(preserveCarriageReturns);
    		rvItems.add(row);
    	}
    	
    	
    	//retVal.setItems(rows);
    	retVal.findBoundingBox();
    	retVal.findFont();
    	retVal.findText(preserveCarriageReturns);
    	return retVal;
    }
    */
    
    public static List<OrderedTable> findRuledTables
    	(List<TextBlock> theClusters, List<LineSegment> theLines)
    {
    	List<OrderedTable> retVal = new ArrayList<OrderedTable>();
    	
    	// separate into horiz and vert lists
    	List<LineSegment> horizLines = new ArrayList<LineSegment>();
    	List<LineSegment> vertLines = new ArrayList<LineSegment>();
    	
    	for (LineSegment ls : theLines)
    		if (ls.getDirection() == LineSegment.DIR_HORIZ)
    			horizLines.add(ls);
    		else if (ls.getDirection() == LineSegment.DIR_VERT)
    			vertLines.add(ls);
    	
    	Collections.sort(vertLines, new XComparator());
    	Collections.sort(horizLines, new YComparator());
    	
    	boolean loop = true;
    	while(loop)
    	{
    		// generate lists of intersecting lines for each hLine
        	List<List<LineSegment>> hIntersections = 
        		new ArrayList<List<LineSegment>>();
        	float maxIntersections = 0;
    		int maxIndex = -1;
        	for (int n = 0; n < horizLines.size(); n ++)
    		{
    			LineSegment hls = (LineSegment)horizLines.get(n);
        		List<LineSegment> intersections = new ArrayList<LineSegment>();
        		int noIntersections = 0;
        		Iterator vIter = vertLines.iterator();
        		while(vIter.hasNext())
        		{
        			LineSegment vls = (LineSegment)vIter.next();
        			if (SegmentUtils.intersects(vls, hls))
        			{
        				intersections.add(vls);
        				noIntersections ++;
        			}
        		}
        		if (noIntersections > maxIntersections)
        		{
        			maxIntersections = noIntersections;
        			maxIndex = n;
        		}
        		hIntersections.add(intersections);
        	}
    	
        	if (maxIndex >= 0)
        	{
	    		// find horiz line with the most vert intersections
	    		LineSegment thisLine = (LineSegment)horizLines.get(maxIndex);
	    		
	    		// clone the list of remaining intersections
	    		List<LineSegment> remainingIntersections = new ArrayList<LineSegment>();
//	    		ArrayList intersectionsWithMax = (ArrayList)hIntersections.get(maxIndex);
//	    		remainingIntersections.addAll(intersectionsWithMax);
	    		remainingIntersections.addAll((hIntersections.get(maxIndex)));
	    		
	    		// expand below until no original int. line remaining
	    		
//	    		System.out.println("maxIndex: " + maxIndex);
	    		
	    		for (int n = maxIndex + 1; n < horizLines.size(); n ++)
	    		{
	    			LineSegment newLine = (LineSegment) horizLines.get(n);
	    			List<LineSegment> newIntersections = new ArrayList<LineSegment>();
	    			// cast all GenericSegments back to LineSegments
	    			for (GenericSegment gs : ListUtils.intersection
	    				(remainingIntersections, hIntersections.get(n)))
	    			{
	    				newIntersections.add((LineSegment)gs);
	    			}
	    			
	    			if (newIntersections.size() > 0)
	    			{
	    				remainingIntersections = newIntersections;
	    			}
	    			else
	    			{
//	    				n = Integer.MAX_VALUE; // seems not to work! NEGATIVE! break out of loop
	    				n = horizLines.size() + 1;
	    			}
	    		}
	    		
	    		// expand above "" ""
	    		for (int n = maxIndex - 1; n >= 0; n --)
	    		{
	    			LineSegment newLine = (LineSegment) horizLines.get(n);
	    			List newIntersections = ListUtils.intersection
	    				(remainingIntersections, (List)hIntersections.get(n));
	    			
	    			if (newIntersections.size() > 0)
	    			{
	    				remainingIntersections = newIntersections;
	    			}
	    			else
	    			{
//	    				n = Integer.MIN_VALUE; // break out of loop POSITIVE this time...
	    				n = -1;
	    			}
	    		}
	    		
	    		// find all lines intersecting bbox
	    		CompositeSegment<LineSegment> potTable = 
	    			new CompositeSegment<LineSegment>();
	    		potTable.getItems().add(thisLine);
	    		potTable.getItems().addAll(remainingIntersections);
	    		potTable.findBoundingBox();
	    		
	    		List<GenericSegment> tableLines = // only contains LineSegments
	    			ListUtils.findElementsIntersectingBBox(theLines, potTable);
//	    		potTable.setItems(tableLines);
	    		potTable.getItems().clear();
	    		for (GenericSegment gs : tableLines)
		    		potTable.getItems().add((LineSegment)gs);
	    		
	    		// remove from horizLines & vertLines
	    		horizLines.removeAll(tableLines);
	    		vertLines.removeAll(tableLines);
	    		
	    		// if no horiz line found, then loop = false
	    		if (tableLines.size() == 0)
	    			loop = false;
	    		else
	    		{
	    			
	    			OrderedTable t = understandRuledTable(theClusters, potTable.getItems());
	    			
	    			if (validateRuledTable(t)) retVal.add(t);
	    		}
        	}
        	else loop = false;

    	}
    	
//    	System.out.println("ruled tables found: " + retVal.size());
    	
    	return retVal;
    }
    
    public static boolean validateRuledTable(OrderedTable t)
    {
    	// check if table contains at least 2 row items
    	List<TableRow> theRows = t.getItems();
    	if (theRows.size() < 2) return false;
    	
    	// check if at least one of these rows contains >= 2 items
    	boolean retVal = false;
    	Iterator iter = theRows.iterator();
    	while(iter.hasNext() && !retVal)
    	{
    		TableRow thisRow = (TableRow)iter.next();
    		if (thisRow.getItems().size() >= 2) retVal = true;
    		
    		// if row too high, return false
    		if (thisRow.getHeight() >= 250) return false;
    	}
    	
    	// TODO: check for tables whose rows/columns are much too large (newspapers, etc.)
    	// see above
    	
    	return retVal;
    }
    
    // pre: theLines sorted in V and H order!
    public static OrderedTable understandRuledTable(List<TextBlock> theClusters, List<LineSegment> theLines)
    {
    	OrderedTable retVal = new OrderedTable();
    	
    	// separate into horiz and vert lists
    	List<LineSegment> horizLines = new ArrayList<LineSegment>();
    	List<LineSegment> vertLines = new ArrayList<LineSegment>();
    	
    	for (LineSegment ls : theLines)
    		if (ls.getDirection() == LineSegment.DIR_HORIZ)
    			horizLines.add(ls);
    		else if (ls.getDirection() == LineSegment.DIR_VERT)
    			vertLines.add(ls);
    	
    	// find x values for columns
    	List<Float> xPositions = new ArrayList<Float>();
    	for (LineSegment ls : vertLines)
    	{
    		boolean addThisPosition = true;
    		// check if this position is already there
    		Iterator xPosIter = xPositions.iterator();
    		while(xPosIter.hasNext() && addThisPosition)
    		{
    			Float thisPos = (Float)xPosIter.next();
    			if (Utils.within(ls.getXmid(), thisPos, 10.0f))
    				addThisPosition = false;
    		}
    		
    		if (addThisPosition)
    			xPositions.add(new Float(ls.getXmid()));
    	}
    	
    	// iterate through the rows
    	for (int h = 0; h < horizLines.size() - 1; h ++)
    	{
    		TableRow thisRow = new TableRow();
    		
    		LineSegment upperLine = (LineSegment)horizLines.get(h);
    		LineSegment lowerLine = (LineSegment)horizLines.get(h + 1);
    		float rowY2 = upperLine.getYmid();
    		float rowY1 = lowerLine.getYmid();
    		
    		// generate list of column dividing lines for this row
    		List<LineSegment> dividingLines = new ArrayList<LineSegment>();
    		for (LineSegment ls : vertLines)
    		{
    			if (SegmentUtils.intersects(ls, upperLine) &&
    				SegmentUtils.intersects(ls, lowerLine))
    			{
    				dividingLines.add(ls);
    			}
    		}
    		
    		// now iterate through the columns
    		int colIndex = 0;
    		for (int v = 0; v < dividingLines.size() - 1; v ++)
    		{
    			LineSegment leftLine = (LineSegment)dividingLines.get(v);
        		LineSegment rightLine = (LineSegment)dividingLines.get(v + 1);
        		float cellX1 = leftLine.getXmid();
        		float cellX2 = rightLine.getXmid();
        		
        		// work out colspan
        		int colspan = 0;
        		for (int c = colIndex; c < xPositions.size(); c ++)
        		{
        			// go through xPositions in turn until one equals cellX2
        			float x = (Float)xPositions.get(c);
//        			System.out.println("x: " + x);
//        			System.out.println("cellX2: " + cellX2);
        			if (x > cellX2 + 10.0f)
        			{
        				// break out of loop
        				c = xPositions.size();
        			}
        			else
        			{
        				colspan ++;
        			}
        		}
        		
        		TableCell thisCell = new TableCell();
        		thisCell.setRowspan(1);
//        		System.out.println("setting colspan to: " + colspan);
        		thisCell.setColspan(colspan);
        		float[] cellBBox = {cellX1, cellX2, rowY1, rowY2};
        		thisCell.setBoundingBox(cellBBox);
        			
        		// add relevant items to thisCell
//        		thisCell.setItems(ListUtils.getElementsWithCentresWithinBBox(theClusters, thisCell));
        		thisCell.getItems().clear();
        		List<GenericSegment> textBlocks = // items are TextBlocks
        			ListUtils.findElementsWithCentresWithinBBox(theClusters, thisCell);
        		for (GenericSegment gs : textBlocks)
        		{
        			TextBlock tb = (TextBlock)gs;
        			thisCell.getItems().addAll(tb.getItems());
        		}
        			
        		// calculate text, boundingBox, etc. (bBox already there... findText better?)
        		//thisCell.setCalculatedFields(); if blank, then problem!
        		thisCell.findText();//(false);
        		
        		// and finally add the cell to the row
        		thisRow.getItems().add(thisCell);
    		}
    		if (thisRow.getItems().size() > 0)
    		{
	    		thisRow.setCalculatedFields();
	    		retVal.getItems().add(thisRow);
    		}
    	}
    	
//    	2.08.10: this breaks Table.addAsXHTML
//    	retVal.getItems().addAll(theLines);
    	
    	retVal.findBoundingBox();
    	return retVal;
    }
    
    public static List<CandidateTable> findNonRuledTables//(SegmentList theSegments, SegmentList candColSegs, 
    	(List<CandidateColumn> candCols, List<LineSegment> theLines)
    {
//    	System.out.println("entering findTables");
    	
    	//System.out.println("theSegments: " + theSegments.toExtendedString());
    	//System.out.println("candcolsegs: " + candColSegs.toExtendedString());
//    	System.out.println("candcols: " + candCols.toExtendedString());
//    	System.out.println("thelines: " + theLines.toExtendedString());
    	
    	List<CandidateTable> retVal = new ArrayList<CandidateTable>();
    	
    	// rate each of these candidate columns
    	// -- basically, bigger is better
    	// (but first, exclude all 'heading' columns)
    	
    	Collections.sort(candCols, new ColumnComparator());
    	
//    	System.out.println("candcols: ");
//    	ListUtils.printList(candCols);
    	
    	// shallowcopy candCols list
    	List<CandidateColumn> candColsDup = new ArrayList<CandidateColumn>();
    	candColsDup.addAll(candCols);
    	
    	int loopCounter = 0;
    	
    	int maxLoopCount = Integer.MAX_VALUE;
//    	int maxLoopCount = 15;
    	
    	// loop through the candidate cols
    	while (candCols.size() > 0 && loopCounter < maxLoopCount)
    	{
    		int nextTask = 3;
    		boolean swallow = true;
    		//Table tableToAdd = new Table();
    		CandidateTable tableToAdd = new CandidateTable();
    		tableToAdd.getItems().add(candCols.remove(0));
//    		System.out.println("\nremovedFirst; remaining: " + candCols.size());
    		//System.out.println(tableToAdd.toExtendedString());
//    		System.out.println("Sub-items: " + tableToAdd.getItems().toExtendedString());
    		
    		tableToAdd.findBoundingBox();
    		tableToAdd.findFontName();
    		
    		List<GenericSegment> lastValidTableItems = new ArrayList<GenericSegment>();
//    		ListUtils lastValidTableItems = 
//    			(ListUtils)tableToAdd.getItems().clone();//new SegmentList();
    		for (GenericSegment tc : tableToAdd.getItems())
    			lastValidTableItems.add(tc);
    		
    		int lastValidTask = 0;
    		
    		boolean loop = true;
    		boolean horizRuled = false;
    		
    		while (loop == true)
    		{
//BB
    			tableToAdd.findBoundingBox();
    			// ***** SWALLOW
    			
    			//boolean changeMade = false;
    			int oldSize = tableToAdd.getItems().size();
    			
    			if (swallow)
    			{
    				// HACK -- to be replaced by a separate pre-processing step
    				// that joins together-lines too
	    			// see if any lines intersect the bounding box; if so, add them
	    			//tableToAdd.addAllItems
	    				//(theLines.getElementsIntersectingBBox(tableToAdd));
	    			for (int n = 0; n < theLines.size(); n ++)
	    			{
	    				LineSegment l = (LineSegment)theLines.get(n);
	    				//System.out.println("examining tc: " + tc);
	    				if (SegmentUtils.intersects(l, tableToAdd))
	    				{
	    					boolean longEnough = true;
	    					for (int p = 0; p < tableToAdd.getItems().size(); p ++)
	    					{
	    						Object o = tableToAdd.getItems().get(p);
	    						if (o instanceof CandidateColumn)
	    						{
	    							if (((CandidateColumn)o).getWidth() >
	    								(l.getWidth() * 0.9f))
	    								longEnough = false;
	    						}
	    					}
	    					if (longEnough) tableToAdd.getItems().add(l);
	    				}
	    			}
	    			
	    			// and do the same for other candidate cols
	    			for (int n = 0; n < candCols.size(); n ++)
	    			{
	    				CandidateColumn tc = candCols.get(n);
//	    				tc.findBoundingBox(); // unnecc.
//	    				tableToAdd.findBoundingBox(); // necessary! but done above now
//	    				System.out.println("examining tc: " + tc);
//	    				System.out.println("examining tableToAdd: " + tableToAdd);
	    				if (SegmentUtils.intersects(tc, tableToAdd))
	    				{
	    					//tableToAdd.getItems().addAll(tc.getItems());
//	    					System.out.println("intersection with tc: " + tc.toExtendedString());
	    					tableToAdd.getItems().add(tc);
	    					candCols.remove(tc);
	    				}
	    			}
	    			
	    			/*
	    			// and do the same for segments -- ONLY if not part of a col? ???
	    			tableToAdd.getItems().addAll
	    				(candColSegs.getElementsIntersectingBBox(tableToAdd));
	    			*/
	    			
//	    			tableToAdd.getItems().removeDuplicates();
	    			ListUtils.removeDuplicates(tableToAdd.getItems());
	    			
	    			// for safety, we findBoundingBox regardless
	    			// instead size is same but items are different
	    			// as duplicates have been removed
	    			// though this prob. shouldn't happen AT ALL in practice!
//BB
	    			//tableToAdd.findBoundingBox();
    			}
    			
    			if (tableToAdd.getItems().size() != oldSize)
    			{
    				//System.out.println(tableToAdd.toExtendedString());
//    				System.out.println("swallowed; swallow set to true");
    				swallow = true;
    				//nextTask = 1;
    			}
    			else
    			{
    				//System.out.println(tableToAdd.toExtendedString());
//    				System.out.println("swallow was: " + swallow);
//    				System.out.println("didn't swallow; swallow set to false");
    				swallow = false;
    				//if (nextTask == 1) nextTask = 3;
    				//System.out.println(" and is now " + nextTask);
    			}
    			
    			// nextTask == 1 means there's still stuff to swallow...
    			if (!swallow)
    			{
    				long start = System.currentTimeMillis();
    				// ***** EVALUATE TABLE
        			if (tableToAdd.isTable())
        			{
//        				System.out.println("total time for evaluation: " + (System.currentTimeMillis() - start));
//        				System.out.println("passed evaluation; lastValidTableItems set");
//        				lastValidTableItems = (ListUtils)tableToAdd.getItems().clone();
        				lastValidTableItems.clear();
        				for (GenericSegment tc : tableToAdd.getItems())
        	    			lastValidTableItems.add(tc);
        				lastValidTask = nextTask;
        			}
        			else if (ExtraUtils.getCandidateColumns(tableToAdd.getItems()).size() > 1)
        			{
//        				System.out.println("total time for evaluation: " + (System.currentTimeMillis() - start));
//        				tableToAdd.getItems().findText(false);
//        				TODO: IS FINDTEXT REALLY NECESSARY HERE?
        				for (GenericSegment gs : tableToAdd.getItems())
        				{
        					if (gs instanceof CompositeSegment<?>)
        					{
        						CompositeSegment<?> cs = (CompositeSegment<?>)gs;
        						cs.findText();
        					}
        				}
//        				System.out.println("table rejected: " + tableToAdd.toExtendedString());
//        				System.out.print("failed valuation; nextTask set to ");
        				Iterator itemIter;
        				switch (nextTask)
        				{
        					case 3:
        						// return items to pool
        						for (GenericSegment i : tableToAdd.getItems())
        						{
        							if (!(lastValidTableItems.contains(i)))
        							{
        								if (i instanceof CandidateColumn)
        									candCols.add((CandidateColumn)i);
        							}
        						}
        						
        						// set tableToAdd = lastValid
        						tableToAdd.setItems(lastValidTableItems);
        						
        						// set nextTask == 4
        						nextTask = 4;
//        						System.out.println("4");
        						break;
        					case 4:
        						// return items to pool
        						for (GenericSegment i : tableToAdd.getItems())
        						{
        							if (!(lastValidTableItems.contains(i)))
        							{
        								if (i instanceof CandidateColumn)
        									candCols.add((CandidateColumn)i);
        							}
        						}
        						
        						// set tableToAdd = lastValid
        						tableToAdd.setItems(lastValidTableItems);
        						// set nextTask == 0
        						nextTask = 0;
//        						System.out.println("0");
        						break;
        					default: // case 0??
        						// return items to pool
        						for (GenericSegment i : tableToAdd.getItems())
        						{
        							if (!(lastValidTableItems.contains(i)))
        							{
        								if (i instanceof CandidateColumn)
        									candCols.add((CandidateColumn)i);
        							}
        						}
        						
        						// set tableToAdd = lastValid
        						tableToAdd.setItems(lastValidTableItems);
//        						System.out.println("N/A");
        						break;
        				}
        			}
        			else
    				{
//    					System.out.println("only one col; validation academic");
    				}
        			
	    			// ***** CHECK IF HORIZ-RULED
	    			List<LineSegment> horizLines = ListUtils.selectHorizLineSegments(tableToAdd.getItems());
	    			for (LineSegment thisLine : horizLines)
	    			{
	    				if (thisLine.getDirection() == LineSegment.DIR_HORIZ)
	    				{
	    					//boolean longerThanAllTextSegments = true;
	    					boolean longerThanAllColumns = true;
	    					Iterator itemIter = tableToAdd.getItems().iterator();
	    					while(itemIter.hasNext())
	    					{
	    						Object o = itemIter.next();
	    						if (o instanceof CandidateColumn)
	    						{
	    							CandidateColumn tc = (CandidateColumn)o;
	    							
	    							//System.out.println("thisLine: " + thisLine);
	    							//System.out.println("tc: " + tc);
	    							
	    							if (tc.getWidth() > (thisLine.getWidth() * 0.9f))
	    							{
	    								longerThanAllColumns = false;
	    								//System.out.println("FALSE!!");
	    							}
	    							/*
	    							Iterator clusterIter = tc.getItems().iterator();
	    							while(clusterIter.hasNext())
	    							{
	    								TextSegment c = (TextSegment)clusterIter.next();
	    								// foo
	    							}
	    							*/
	    						}
	    					}
	    					if (longerThanAllColumns) horizRuled = true;
	    				}
	    			}
	    			/*  debugging code!!
	    			if (tableToAdd.getItems().getHorizLines().size() > 0)
	    			{
	    				System.out.println("contains horiz lines");
	    				System.out.println(tableToAdd.toExtendedString());
	    				System.out.println();
	    			}
	    			System.out.println("horizRuled: " + horizRuled);
	    			*/

// BB
	    			//tableToAdd.findBoundingBox();
	    			
	    			if (horizRuled)
	    			{
//	    				System.out.println("horizRuled is true; nextTask set to 0");
	    				nextTask = 0;
	    			}
	    			else
	    			{
//	    				System.out.println("horizRuled is false");
	    				//nextTask = 3;
	    			}
	    			
	    			// candCols.findBoundingBoxes();
	    			AdjacencyGraph<CandidateColumn> colng = new AdjacencyGraph<CandidateColumn>();
	    			colng.addList(candColsDup);
//	    			colng.generateEdges();
	    			// 19.10.10 fixed Alexandra_Weisz.pdf! by using GenerateEdgesMultiple...
	    			colng.generateEdgesMultiple();
		    		if (nextTask == 3)
		    		{
		    			//System.out.println("f00edges: " + ng.getEdges());
		    			//retVal.addAll(colng.getEdges().toSegmentList());
		    			
		    			// try expanding left till either fail or no more neighbours
		    			//loop = (tableToAdd.isTable() || tableToAdd.getColumns().size() == 1);
		    			
		    			// try expanding left; if fail or no neighbours set expandRight = true
		    			
		    			GenericSegment leftmostSegment = 
//		    				(GenericSegment)tableToAdd.getLeftMostSegmentUsingBBox();
		    				ListUtils.findLeftMostSegment(tableToAdd.getItems());
		    			if (leftmostSegment != null && leftmostSegment instanceof CandidateColumn)
		    			{
		    				CandidateColumn leftmostColumn = (CandidateColumn)leftmostSegment;
		    				List<CandidateColumn> neighboursLeft = colng.returnNeighboursLeft(leftmostColumn);
		    				if (neighboursLeft.size() > 0)
		    				{
		    					CandidateColumn newCol = neighboursLeft.get(0);
		    					if (candCols.contains(newCol))
		    					{
//			    					System.out.println("added a new column");
			    					tableToAdd.getItems().add(newCol);
			    					candCols.remove(newCol);
			    					swallow = true;
		    					}
		    					else
		    					{
//		    						System.out.println("column not a candidate; nextTask set to 4");
			    					nextTask = 4; // column not a candidate
		    					}
		    				}
		    				else
		    				{
//		    					System.out.println("no neighbours left; nextTask set to 4");
		    					nextTask = 4; // no neighbours left...
		    				}
		    			}
		    			else
		    			{
//		    				System.out.println("blank table?; nextTask set to 4");
		    				nextTask = 4; // shouldn't happen unless table is blank...
		    			}
		    		}
		    		else if (nextTask == 4)
		    		{
		    			// try expanding right till either fail or no more neighbours
		    			
	    				GenericSegment rightmostSegment = 
//		    				(GenericSegment)tableToAdd.getRightMostSegmentUsingBBox();
	    					ListUtils.findRightMostSegment(tableToAdd.getItems());
//	    				System.out.println("rightmostseg: " + rightmostSegment);
		    			if (rightmostSegment != null && rightmostSegment instanceof CandidateColumn)
		    			{
		    				CandidateColumn rightmostColumn = (CandidateColumn)rightmostSegment;
		    				List<CandidateColumn> neighboursRight = colng.returnNeighboursRight(rightmostColumn);
		    				if (neighboursRight.size() > 0)
		    				{
		    					CandidateColumn newCol = neighboursRight.get(0);
		    					if (candCols.contains(newCol))
		    					{
//			    					System.out.println("added a new column");
			    					tableToAdd.getItems().add(newCol);
			    					candCols.remove(newCol);
			    					swallow = true;
		    					}
		    					else
		    					{
//		    						System.out.println("column not a candidate; nextTask set to 0");
			    					nextTask = 0; // column not a candidate
		    					}
		    				}
		    				else
		    				{
//		    					System.out.println("no neighbours right; nextTask set to 0");
		    					//System.out.println("seg edges: " + colng.getEdges(rightmostColumn));
		    					nextTask = 0; // no neighbours right...
		    				}
		    			}
		    			else
		    			{
//		    				System.out.println("blank table?; nextTask set to 0");
		    				nextTask = 0; // shouldn't happen unless table is blank...
		    			}
		    		}
	    		}
    			if (nextTask == 0)
    			{
//    				System.out.println("in nextTask == 0; loop is false");
    				loop = false;
    			}
    		}
    		
    		long start = System.currentTimeMillis();
    		if (tableToAdd.isTable())
    		{
//    			System.out.println("total time for evaluation: " + (System.currentTimeMillis() - start));
//	    		System.out.println("adding table: " + tableToAdd.toExtendedString());
	    		//tableToAdd.getItems().findText(false); //debug
//	    		System.out.println("items: " + tableToAdd.getItems().toExtendedString());
    			
    			// clone table items
	    		List<GenericSegment> tabItems = new ArrayList<GenericSegment>();
	    		for (GenericSegment gs : tableToAdd.getItems())
	    			tabItems.add(gs);
	    		
	    		tableToAdd.setItems(tabItems);
	    		tableToAdd.findBoundingBox();
	    		tableToAdd.findFontName();
	    		retVal.add(tableToAdd);
	    		
	    		// EXIT the loop -- just try one table first!
	    		loopCounter ++;
	    		//if (loopCounter >= 0)
	    		//	candCols.clear();
    		}
    		else
    		{
//    			System.out.println("total time for evaluation: " + (System.currentTimeMillis() - start));
//    			tableToAdd.getItems().findText(false);
//    			TODO: IS FINDTEXT REALLY NECESSARY HERE?
				for (GenericSegment gs : tableToAdd.getItems())
				{
					if (gs instanceof CompositeSegment<?>)
					{
						CompositeSegment<?> cs = (CompositeSegment<?>)gs;
						cs.findText();
					}
				}
    			
//    			System.out.println("table rejected: " + tableToAdd.toExtendedString());
    			//tableToAdd.getItems().findText(false); //debug
//    			System.out.println("items: " + tableToAdd.getItems().toExtendedString());
    			candCols.remove(tableToAdd.getItems().get(0));
    			
    			// clone table items
	    		List<GenericSegment> tabItems = new ArrayList<GenericSegment>();
	    		for (GenericSegment gs : tableToAdd.getItems())
	    			tabItems.add(gs);
	    		
	    		tableToAdd.setItems(tabItems);
    			//retVal.add(tableToAdd);
    			
    			loopCounter ++;
    			// EXIT the loop -- just try one table first!
    			//if (loopCounter >= 2)
	    		//	candCols.clear();
    		}
    		
//    		System.out.println("loopCounter is: " + loopCounter);
    		
    	}
    	
//    	System.out.println("retValAtEnd: ");// + retVal.toExtendedString());
    	
    	Iterator rvIter = retVal.iterator();
    	while(rvIter.hasNext())
    	{
    		CandidateTable pt = (CandidateTable)rvIter.next();
//    		System.out.println("pt: " + pt);
//    		System.out.println("pt.items: " + pt.getItems().toExtendedString());
    	}
    	
    	return retVal;
    	// for each (unused) candidate col
    		// add any _directly_ neighbouring lines
    		// swallow
    		//	and carry on swallowing till no extra objects are added
    		// if horizontal lines are present, we already know the width of the table
    	
    		// todo later: (see how this one works first:)
    		// extending to neighbouring candidate columns in all four directions
    		// (two if we are certain about width)
    		// 
    }
}
