diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a18c16..233b605 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 012 (December 2023) +* Add new worksheet class +* Add worksheet items attribute to workbook class + ## 011 (November 2022) * Remove extraneous debug print statements diff --git a/docs/docs/api-ref.md b/docs/docs/api-ref.md index e6491f2..ae70c87 100644 --- a/docs/docs/api-ref.md +++ b/docs/docs/api-ref.md @@ -34,12 +34,35 @@ Saves any changes to the workbook to a new file specified by the `new_file` para `self.worksheets:` Returns a list of worksheets found in the workbook. +`self.worksheet_items:` Returns a list of Worksheet objects found in the workbook. + `self.datasources:` Returns a list of Datasource objects found in the workbook. `self.filename:` Returns the filename of the workbook. `self.shapes` Returns a list of strings with the names of shapes found in the workbook. +## Worksheets +```python +class Worksheet(wsxml): +``` + +The Worksheet class represents the worksheets found in a Tableau Workbook. The library will access key attributes of each worksheet it finds. + +**Properties:** + +`self.name`: Returns the name of the worksheet. + +`self.datasources`: Returns list of the Datasource objects that are used in the worksheet. + +`self.fields`: Returns list of Field objects that are used somewhere within the sheet. + +`self.rows`: Returns list of Field objects present on the rows shelf. Certain items will return a string value, such as "Measure Names" which is not a field. + +`self.cols`: Returns list of Field objects present on the columns shelf. Certain items will return a string value, such as "Measure Names" which is not a field. + +`self.filter_fields`: Returns list of Field objects that are present on the Filter pane. Certain items will return a string value, such as Filter Actions. + ## Datasources ```python class Datasource(dsxml, filename=None) diff --git a/setup.py b/setup.py index 90668ae..e06da6a 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='tableaudocumentapi', - version='0.11', + version='0.12', author='Tableau', author_email='github@tableau.com', url='https://github.com/tableau/document-api-python', diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index 4c425da..3c0ed1d 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -1,9 +1,157 @@ import weakref +import re -from tableaudocumentapi import Datasource, xfile +from tableaudocumentapi import Datasource, Field, xfile from tableaudocumentapi.xfile import xml_open, TableauInvalidFileException +def _remove_brackets(text): + return text.lstrip("[").rstrip("]") + +def _clean_columns(marks): + """ + Extract rows/cols data that is stored such as [datasource].[column] + We use a regex to find multiple marks and another regex to extract the field name + + We return a dictionary of datasource: [fields] so we can map them to field items + """ + if marks is None: + return None + # find all [datasource].[column] strings by positive lookahead of ), space, or string end + # some will have three parts so we need to use a lookahead to ensure we capture the entire object + matching_marks = re.findall(r"(\[.*?\])(?=\)|\s|$)",str(marks)) + datasource_fields = {} + for mark in matching_marks: + # split column into datasource and field display + column = mark.split("].[") + datasource = _remove_brackets(column[0]) + # initialize dictionary entry + if datasource not in datasource_fields: + datasource_fields[datasource] = [] + # the field is always the last item in the list + field_display = _remove_brackets(column[-1]) + # use ordinal (ok), quantitative (qk), nominal (nk), or string end as lookahead + field_match = re.match(r".*?(?<=:)([^:]+)(?=:ok|:qk|:nk|$)", field_display) + # if no match, eg. Measure Names, just return the string + if field_match: + field = field_match.groups(1)[0] + else: + field = field_display + datasource_fields[datasource].append(field) + return datasource_fields + +def _ds_fields_to_tems(ds_fields, ds_index): + fields = [] + for ds, field_ids in ds_fields.items(): + fields_dict = ds_index[ds].fields + for field_id in field_ids: + # many field ids include brackets, so we need to check for these as well + field_id_brackets = f"[{field_id}]" + if field_id in fields_dict: + field = fields_dict.get(field_id) + elif field_id_brackets in fields_dict: + field = fields_dict.get(field_id_brackets) + else: + field = field_id + fields.append(field) + return fields + +class Worksheet(object): + """ + A class to parse key attributes of a worksheet. + """ + + def __init__(self, worksheet_element, ds_index): + self._worksheetRoot = worksheet_element + self.name = worksheet_element.attrib['name'] + self._datasource_index = ds_index + self._datasources = self._prepare_datasources(self._worksheetRoot, self._datasource_index) + self._fields = self._prepare_datasource_dependencies(self._worksheetRoot) + self._rows = self._prepare_rows(self._worksheetRoot, self._datasource_index) + self._cols = self._prepare_cols(self._worksheetRoot, self._datasource_index) + self._filter_fields = self._prepare_filter_fields(self._worksheetRoot, self._datasource_index) + + def __repr__(self): + name = self.name + datasources = ", ".join([ds.caption or ds.name for ds in self._datasources]) + fields = ", ".join([f.name for f in self._fields]) + return f"name: {name}, datasources: {datasources}, fields: {fields}" + + def __iter__(self): + keys = self.__dict__.keys() + filtered_keys = [key for key in keys if key != "_worksheetRoot"] + for key in filtered_keys: + yield key.lstrip("_"), getattr(self, key) + @staticmethod + def _prepare_filter_fields(worksheet_element, ds_index): + filters = [] + slices_list = worksheet_element.find(".//slices") + if slices_list is None: + return filters + slices = [column.text for column in slices_list] + # combine slices into single string to use same function as rows/cols + ds_fields = _clean_columns(" ".join(slices)) + if ds_fields == None or len(ds_fields) == 0: + return None + fields = _ds_fields_to_tems(ds_fields, ds_index) + return fields + + @staticmethod + def _prepare_datasources(worksheet_element, ds_index): + worksheet_datasources = worksheet_element.find(".//datasources") + datasource_names = [ds.attrib["name"] for ds in worksheet_datasources] + datasource_list = [ds_index[name] for name in datasource_names] + return datasource_list + + @property + def datasources(self): + return self._datasources + + @staticmethod + def _prepare_datasource_dependencies(worksheet_element): + dependencies = worksheet_element.findall('.//datasource-dependencies') + for dependency in dependencies: + columns = dependency.findall('.//column') + return [Field.from_column_xml(column) for column in columns] + + @property + def fields(self): + return self._prepare_datasource_dependencies + + @property + def fields_list(self): + return [field.caption for field in self._fields] + + @staticmethod + def _prepare_rows(worksheet_element, ds_index): + rows = worksheet_element.find('.//rows') + ds_fields = _clean_columns(rows.text) + if ds_fields == None or len(ds_fields) == 0: + return None + fields = _ds_fields_to_tems(ds_fields, ds_index) + return fields + + @staticmethod + def _prepare_cols(worksheet_element, ds_index): + cols = worksheet_element.find('.//cols') + ds_fields = _clean_columns(cols.text) + if ds_fields == None or len(ds_fields) == 0: + return None + fields = _ds_fields_to_tems(ds_fields, ds_index) + return fields + + @property + def rows(self): + return self._rows + + @property + def cols(self): + return self._cols + + @property + def filter_fields(self): + return self._filter_fields + class Workbook(object): """A class for writing Tableau workbook files.""" @@ -31,6 +179,8 @@ def __init__(self, filename): self._worksheets = self._prepare_worksheets( self._workbookRoot, self._datasource_index) + + self._worksheet_items = self._prepare_worksheet_items(self._workbookRoot, self._datasource_index) self._shapes = self._prepare_shapes(self._workbookRoot) @@ -45,6 +195,10 @@ def datasources(self): @property def worksheets(self): return self._worksheets + + @property + def worksheet_items(self): + return self._worksheet_items @property def filename(self): @@ -142,7 +296,16 @@ def _prepare_worksheets(xml_root, ds_index): datasource.fields[column_name].add_used_in(worksheet_name) return worksheets - + + @staticmethod + def _prepare_worksheet_items(xml_root, ds_index): + worksheets = [] + worksheets_element = xml_root.find('.//worksheets') + if worksheets_element is None: + return worksheets + worksheets = [Worksheet(worksheet_element, ds_index) for worksheet_element in worksheets_element] + return worksheets + @staticmethod def _prepare_shapes(xml_root): shapes = [] diff --git a/test/test_workbook.py b/test/test_workbook.py index 41501db..3cc082c 100644 --- a/test/test_workbook.py +++ b/test/test_workbook.py @@ -49,3 +49,11 @@ def test_dashboards_setup(self): wb = Workbook(DASHBOARDS_FILE) self.assertIsNotNone(wb) self.assertEqual(wb.dashboards, ['setTest']) + +class Worksheets(unittest.TestCase): + def test_worksheets_setup(self): + wb = Workbook(DASHBOARDS_FILE) + self.assertEqual(len(wb.worksheet_items), 2) + worksheet_names = [ws.name for ws in wb.worksheet_items] + worksheet_names.sort() + self.assertEqual(worksheet_names[0], 'Sheet 1') \ No newline at end of file
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: