class ITableGranulator(Interface):
"""Provides methods to granulate a document into tables."""
def getTableItemList(file):
"""Returns the list of table IDs in the form of (id, title)
"""
def getColumnItemList(file, table_id):
"""Returns the list of columns in the form of (id, title)
"""
def getLineItemList(file, table_id):
"""Returns the lines of a given table
as (key, value) pairs
"""
class IImageGranulator(Interface):
"""Provides methods to granulate a document into images."""
def getImageItemList(file):
"""Returns the list of images in the form of (id, title)
"""
def getImage(file, image_id, format=None, resolution=None, **kw):
"""Returns the given image
"""
The IGranulator interface provides APIs to extract tables from a document, to extract images from a document and to extract paragraphs from a document.
Implementation is normally based on a analysis of a base format (ex. ODT, HTML). Initial conversion to that base format may thus be required. Output of granulation is provided in a standard XML-RPC output. For images, it is provided in any image format and can then be converted by a conversion handler.