diff --git a/.gitignore b/.gitignore index 3fc52ff..ee250af 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,4 @@ target/ #Other things .DS_Store +.idea diff --git a/.travis.yml b/.travis.yml index db3656e..75674b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,10 +12,11 @@ install: # command to run tests script: # Tests - - python test.py + - python setup.py test # pep8 - - pep8 --ignore=E501 . + - pep8 . # Examples - (cd "Examples/Replicate Workbook" && python replicateWorkbook.py) - (cd "Examples/List TDS Info" && python listTDSInfo.py) + - (cd "Examples/GetFields" && python show_fields.py) diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..33b164c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,11 @@ +## 0.2 (22 July 2016) + +* Added support for loading twbx and tdsx files (#43, #44) +* Added Fields property to datasource (#45) +* Added Example for using the Fields Property (#51) +* Added Ability to get fields used by a specific sheet (#54) +* Code clean up and test reorganization + +## 0.1 (29 June 2016) + +* Initial Release to the world diff --git a/Examples/GetFields/World.tds b/Examples/GetFields/World.tds new file mode 120000 index 0000000..397f696 --- /dev/null +++ b/Examples/GetFields/World.tds @@ -0,0 +1 @@ +../List TDS Info/World.tds \ No newline at end of file diff --git a/Examples/GetFields/show_fields.py b/Examples/GetFields/show_fields.py new file mode 100644 index 0000000..ee45f87 --- /dev/null +++ b/Examples/GetFields/show_fields.py @@ -0,0 +1,29 @@ +############################################################ +# Step 1) Use Datasource object from the Document API +############################################################ +from tableaudocumentapi import Datasource + +############################################################ +# Step 2) Open the .tds we want to inspect +############################################################ +sourceTDS = Datasource.from_file('World.tds') + +############################################################ +# Step 3) Print out all of the fields and what type they are +############################################################ +print('----------------------------------------------------------') +print('--- {} total fields in this datasource'.format(len(sourceTDS.fields))) +print('----------------------------------------------------------') +for count, field in enumerate(sourceTDS.fields.values()): + print('{:>4}: {} is a {}'.format(count+1, field.name, field.datatype)) + blank_line = False + if field.calculation: + print(' the formula is {}'.format(field.calculation)) + blank_line = True + if field.default_aggregation: + print(' the default aggregation is {}'.format(field.default_aggregation)) + blank_line = True + + if blank_line: + print('') +print('----------------------------------------------------------') diff --git a/README.md b/README.md index 2fb0bdd..a6534d2 100644 --- a/README.md +++ b/README.md @@ -6,15 +6,26 @@ This repo contains Python source and example files for the Tableau Document API. Document API --------------- -The Document API provides a supported way to programmatically make updates to Tableau workbook (`.twb`) and datasource (`.tds`) files. If you've been making changes to these file types by directly updating the XML--that is, by XML hacking--this SDK is for you :) - -Currently only the following operations are supported: - -- Modify database server -- Modify database name -- Modify database user - -We don't yet support creating files from scratch. In addition, support for `.twbx` and `.tdsx` files is coming. +The Document API provides a supported way to programmatically make updates to Tableau workbook and data source files. If you've been making changes to these file types by directly updating the XML--that is, by XML hacking--this SDK is for you :) + +Features include: +- Support for 9.X, and 10.X workbook and data source files + - Including TDSX and TWBX files +- Getting connection information from data sources and workbooks + - Server Name + - Username + - Database Name + - Authentication Type + - Connection Type +- Updating connection information in workbooks and data sources + - Server Name + - Username + - Database Name +- Getting Field information from data sources and workbooks + - Get all fields in a data source + - Get all feilds in use by certain sheets in a workbook + +We don't yet support creating files from scratch, adding extracts into workbooks or data sources, or updating field information ###Getting Started @@ -34,8 +45,19 @@ Download the `.zip` file that contains the SDK. Unzip the file and then run the pip install -e ``` -We plan on putting the package in PyPi to make installation easier. +#### Installing the Development Version From Git + +*Only do this if you know you want the development version, no guarantee that we won't break APIs during development* + +```text +pip install git+https://github.com/tableau/document-api-python.git@development +``` + +If you go this route, but want to switch back to the non-development version, you need to run the following command before installing the stable version: +```text +pip uninstall tableaudocumentapi +``` ###Basics The following example shows the basic syntax for using the Document API to update a workbook: @@ -52,7 +74,7 @@ sourceWB.datasources[0].connections[0].username = "benl" sourceWB.save() ``` -With Data Integration in Tableau 10, a datasource can have multiple connections. To access the connections simply index them like you would datasources +With Data Integration in Tableau 10, a data source can have multiple connections. To access the connections simply index them like you would datasources ```python from tableaudocumentapi import Workbook @@ -75,13 +97,13 @@ sourceWB.save() **Notes** - Import the `Workbook` object from the `tableaudocumentapi` module. -- To open a workbook, instantiate a `Workbook` object and pass the `.twb` file name in the constructor. -- The `Workbook` object exposes a `datasources` collection. -- Each datasource object has a `connection` object that supports a `server`, `dbname`, and `username` property. +- To open a workbook, instantiate a `Workbook` object and pass the file name as the first argument. +- The `Workbook` object exposes a list of `datasources` in the workbook +- Each data source object has a `connection` object that supports a `server`, `dbname`, and `username` property. - Save changes to the workbook by calling the `save` or `save_as` method. ###Examples -The downloadable package contains an example named `replicateWorkbook.py` (in the folder `\Examples\Replicate Workbook`). This example reads an existing workbook and reads a .csv file that contains a list of servers, database names, and users. For each new user in the .csv file, the code copies the original workbook, updates the `server`, `dbname`, and `username` properties, and saves the workbook under a new name. +The downloadable package contains several example scripts that show more detailed usage of the Document API diff --git a/contributing.md b/contributing.md new file mode 100644 index 0000000..15fc5f8 --- /dev/null +++ b/contributing.md @@ -0,0 +1,33 @@ +# Contributing + +We welcome contributions to this project! + +Contribution can include, but are not limited to, any of the following: + +* File an Issue +* Request a Feature +* Implement a Requested Feature +* Fix an Issue/Bug +* Add/Fix documentation + +Contributions must follow the guidelines outlined on the [Tableau Organization](http://tableau.github.io/) page, though filing an issue or requesting +a feature do not require the CLA. + +## Issues and Feature Requests + +To submit an issue/bug report, or to request a feature, please submit a [github issue](https://github.com/tableau/document-api-python/issues) to the repo. + +If you are submiting a bug report, please provide as much information as you can, including clear and concise repro steps, attaching any necessary +files to assist in the repro. **Be sure to scrub the files of any potentially sensitive information. Issues are public.** + +For a feature request, please try to describe the scenario you are trying to accomplish that requires the feature. This will help us understand +the limitations that you are running into, and provide us with a use case to know if we've satisfied your request. + +## Fixes, Implementations, and Documentation + +For all other things, please submit a PR that includes the fix, documentation, or new code that you are trying to contribute. More information on +creating a PR can be found in the [github documentation](https://help.github.com/articles/creating-a-pull-request/) + +If the feature is complex or has multiple solutions that could be equally appropriate approaches, it would be helpful to file an issue to discuss the +design trade-offs of each solution before implementing, to allow us to collectively arrive at the best solution, which most likely exists in the middle +somewhere. diff --git a/publish.sh b/publish.sh new file mode 100755 index 0000000..99a3115 --- /dev/null +++ b/publish.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -e + +rm -rf dist +python setup.py sdist +python setup.py bdist_wheel +python3 setup.py bdist_wheel +twine upload dist/* diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..066edef --- /dev/null +++ b/setup.cfg @@ -0,0 +1,10 @@ +[wheel] +universal = 1 + +[pycodestyle] +select = +max_line_length = 120 + +[pep8] +max_line_length = 120 + diff --git a/setup.py b/setup.py index c184acf..e047b34 100644 --- a/setup.py +++ b/setup.py @@ -5,11 +5,12 @@ setup( name='tableaudocumentapi', - version='0.0.1', + version='0.2', author='Tableau Software', author_email='github@tableau.com', url='https://github.com/tableau/document-api-python', packages=['tableaudocumentapi'], license='MIT', - description='A Python module for working with Tableau files.' + description='A Python module for working with Tableau files.', + test_suite='test' ) diff --git a/tableaudocumentapi/__init__.py b/tableaudocumentapi/__init__.py index 2ee7df1..6a10f6f 100644 --- a/tableaudocumentapi/__init__.py +++ b/tableaudocumentapi/__init__.py @@ -1,5 +1,7 @@ +from .field import Field from .connection import Connection from .datasource import Datasource, ConnectionParser from .workbook import Workbook + __version__ = '0.0.1' __VERSION__ = __version__ diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index 617004a..3f64145 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -3,12 +3,67 @@ # Datasource - A class for writing datasources to Tableau files # ############################################################################### +import collections +import itertools import xml.etree.ElementTree as ET -from tableaudocumentapi import Connection +import xml.sax.saxutils as sax +from tableaudocumentapi import Connection, xfile +from tableaudocumentapi import Field +from tableaudocumentapi.multilookup_dict import MultiLookupDict +from tableaudocumentapi.xfile import xml_open -class ConnectionParser(object): +######## +# This is needed in order to determine if something is a string or not. It is necessary because +# of differences between python2 (basestring) and python3 (str). If python2 support is every +# dropped, remove this and change the basestring references below to str +try: + basestring +except NameError: + basestring = str +######## + +_ColumnObjectReturnTuple = collections.namedtuple('_ColumnObjectReturnTupleType', ['id', 'object']) + + +def _get_metadata_xml_for_field(root_xml, field_name): + if "'" in field_name: + field_name = sax.escape(field_name, {"'": "'"}) + xpath = ".//metadata-record[@class='column'][local-name='{}']".format(field_name) + return root_xml.find(xpath) + + +def _is_used_by_worksheet(names, field): + return any((y for y in names if y in field.worksheets)) + + +class FieldDictionary(MultiLookupDict): + def used_by_sheet(self, name): + # If we pass in a string, no need to get complicated, just check to see if name is in + # the field's list of worksheets + if isinstance(name, basestring): + return [x for x in self.values() if name in x.worksheets] + + # if we pass in a list, we need to check to see if any of the names in the list are in + # the field's list of worksheets + return [x for x in self.values() if _is_used_by_worksheet(name, x)] + + +def _column_object_from_column_xml(root_xml, column_xml): + field_object = Field.from_column_xml(column_xml) + local_name = field_object.id + metadata_record = _get_metadata_xml_for_field(root_xml, local_name) + if metadata_record is not None: + field_object.apply_metadata(metadata_record) + return _ColumnObjectReturnTuple(field_object.id, field_object) + +def _column_object_from_metadata_xml(metadata_xml): + field_object = Field.from_metadata_xml(metadata_xml) + return _ColumnObjectReturnTuple(field_object.id, field_object) + + +class ConnectionParser(object): def __init__(self, datasource_xml, version): self._dsxml = datasource_xml self._dsversion = version @@ -52,11 +107,13 @@ def __init__(self, dsxml, filename=None): self._connection_parser = ConnectionParser( self._datasourceXML, version=self._version) self._connections = self._connection_parser.get_connections() + self._fields = None @classmethod def from_file(cls, filename): - "Initialize datasource from file (.tds)" - dsxml = ET.parse(filename).getroot() + """Initialize datasource from file (.tds)""" + + dsxml = xml_open(filename).getroot() return cls(dsxml, filename) def save(self): @@ -72,7 +129,8 @@ def save(self): """ # save the file - self._datasourceTree.write(self._filename, encoding="utf-8", xml_declaration=True) + + xfile._save_file(self._filename, self._datasourceTree) def save_as(self, new_filename): """ @@ -85,7 +143,7 @@ def save_as(self, new_filename): Nothing. """ - self._datasourceTree.write(new_filename, encoding="utf-8", xml_declaration=True) + xfile._save_file(self._filename, self._datasourceTree, new_filename) ########### # name @@ -107,3 +165,28 @@ def version(self): @property def connections(self): return self._connections + + ########### + # fields + ########### + @property + def fields(self): + if not self._fields: + self._fields = self._get_all_fields() + return self._fields + + def _get_all_fields(self): + column_field_objects = self._get_column_objects() + existing_column_fields = [x.id for x in column_field_objects] + metadata_only_field_objects = (x for x in self._get_metadata_objects() if x.id not in existing_column_fields) + field_objects = itertools.chain(column_field_objects, metadata_only_field_objects) + + return FieldDictionary({k: v for k, v in field_objects}) + + def _get_metadata_objects(self): + return (_column_object_from_metadata_xml(x) + for x in self._datasourceTree.findall(".//metadata-record[@class='column']")) + + def _get_column_objects(self): + return [_column_object_from_column_xml(self._datasourceTree, xml) + for xml in self._datasourceTree.findall('.//column')] diff --git a/tableaudocumentapi/field.py b/tableaudocumentapi/field.py new file mode 100644 index 0000000..4af648f --- /dev/null +++ b/tableaudocumentapi/field.py @@ -0,0 +1,184 @@ +import functools + +_ATTRIBUTES = [ + 'id', # Name of the field as specified in the file, usually surrounded by [ ] + 'caption', # Name of the field as displayed in Tableau unless an aliases is defined + 'datatype', # Type of the field within Tableau (string, integer, etc) + 'role', # Dimension or Measure + 'type', # three possible values: quantitative, ordinal, or nominal + 'alias', # Name of the field as displayed in Tableau if the default name isn't wanted + 'calculation', # If this field is a calculated field, this will be the formula +] + +_METADATA_ATTRIBUTES = [ + 'aggregation', # The type of aggregation on the field (e.g Sum, Avg) +] + +_METADATA_TO_FIELD_MAP = [ + ('local-name', 'id'), + ('local-type', 'datatype'), + ('remote-alias', 'alias') +] + + +def _find_metadata_record(record, attrib): + element = record.find('.//{}'.format(attrib)) + if element is None: + return None + return element.text + + +class Field(object): + """ Represents a field in a datasource """ + + def __init__(self, column_xml=None, metadata_xml=None): + + # Initialize all the possible attributes + for attrib in _ATTRIBUTES: + setattr(self, '_{}'.format(attrib), None) + for attrib in _METADATA_ATTRIBUTES: + setattr(self, '_{}'.format(attrib), None) + self._worksheets = set() + + if column_xml is not None: + self._initialize_from_column_xml(column_xml) + if metadata_xml is not None: + self.apply_metadata(metadata_xml) + + elif metadata_xml is not None: + self._initialize_from_metadata_xml(metadata_xml) + + else: + raise AttributeError('column_xml or metadata_xml needed to initialize field') + + def _initialize_from_column_xml(self, xmldata): + for attrib in _ATTRIBUTES: + self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None)) + + def _initialize_from_metadata_xml(self, xmldata): + for metadata_name, field_name in _METADATA_TO_FIELD_MAP: + self._apply_attribute(xmldata, field_name, lambda x: xmldata.find('.//{}'.format(metadata_name)).text, + read_name=metadata_name) + self.apply_metadata(xmldata) + + ######################################## + # Special Case methods for construction fields from various sources + # not intended for client use + ######################################## + def apply_metadata(self, metadata_record): + for attrib in _METADATA_ATTRIBUTES: + self._apply_attribute(metadata_record, attrib, functools.partial(_find_metadata_record, metadata_record)) + + def add_used_in(self, name): + self._worksheets.add(name) + + @classmethod + def from_column_xml(cls, xmldata): + return cls(column_xml=xmldata) + + @classmethod + def from_metadata_xml(cls, xmldata): + return cls(metadata_xml=xmldata) + + def _apply_attribute(self, xmldata, attrib, default_func, read_name=None): + if read_name is None: + read_name = attrib + if hasattr(self, '_read_{}'.format(read_name)): + value = getattr(self, '_read_{}'.format(read_name))(xmldata) + else: + value = default_func(attrib) + + setattr(self, '_{}'.format(attrib), value) + + @property + def name(self): + """ Provides a nice name for the field which is derived from the alias, caption, or the id. + + The name resolves as either the alias if it's defined, or the caption if alias is not defined, + and finally the id which is the underlying name if neither of the fields exist. """ + alias = getattr(self, 'alias', None) + if alias: + return alias + + caption = getattr(self, 'caption', None) + if caption: + return caption + + return self.id + + @property + def id(self): + """ Name of the field as specified in the file, usually surrounded by [ ] """ + return self._id + + @property + def caption(self): + """ Name of the field as displayed in Tableau unless an aliases is defined """ + return self._caption + + @property + def alias(self): + """ Name of the field as displayed in Tableau if the default name isn't wanted """ + return self._alias + + @property + def datatype(self): + """ Type of the field within Tableau (string, integer, etc) """ + return self._datatype + + @property + def role(self): + """ Dimension or Measure """ + return self._role + + @property + def is_quantitative(self): + """ A dependent value, usually a measure of something + + e.g. Profit, Gross Sales """ + return self._type == 'quantitative' + + @property + def is_ordinal(self): + """ Is this field a categorical field that has a specific order + + e.g. How do you feel? 1 - awful, 2 - ok, 3 - fantastic """ + return self._type == 'ordinal' + + @property + def is_nominal(self): + """ Is this field a categorical field that does not have a specific order + + e.g. What color is your hair? """ + return self._type == 'nominal' + + @property + def calculation(self): + """ If this field is a calculated field, this will be the formula """ + return self._calculation + + @property + def default_aggregation(self): + """ The default type of aggregation on the field (e.g Sum, Avg)""" + return self._aggregation + + @property + def worksheets(self): + return list(self._worksheets) + + ###################################### + # Special Case handling methods for reading the values from the XML + ###################################### + @staticmethod + def _read_id(xmldata): + # ID is actually the name of the field, but to provide a nice name, we call this ID + return xmldata.attrib.get('name', None) + + @staticmethod + def _read_calculation(xmldata): + # The formula for a calculation is stored in a child element, so we need to pull it out separately. + calc = xmldata.find('.//calculation') + if calc is None: + return None + + return calc.attrib.get('formula', None) diff --git a/tableaudocumentapi/multilookup_dict.py b/tableaudocumentapi/multilookup_dict.py new file mode 100644 index 0000000..64b742a --- /dev/null +++ b/tableaudocumentapi/multilookup_dict.py @@ -0,0 +1,66 @@ +import weakref + + +_no_default_value = object() + + +def _resolve_value(key, value): + retval = None + try: + if hasattr(value, 'get'): + retval = value.get(key, None) + + if retval is None: + retval = getattr(value, key, None) + except AttributeError: + retval = None + return retval + + +def _build_index(key, d): + return {_resolve_value(key, v): k + for k, v in d.items() + if _resolve_value(key, v) is not None} + + +# TODO: Improve this to be more generic +class MultiLookupDict(dict): + def __init__(self, args=None): + if args is None: + args = {} + super(MultiLookupDict, self).__init__(args) + self._indexes = { + 'alias': weakref.WeakValueDictionary(), + 'caption': weakref.WeakValueDictionary() + } + self._populate_indexes() + + def _populate_indexes(self): + self._indexes['alias'] = _build_index('alias', self) + self._indexes['caption'] = _build_index('caption', self) + + def __setitem__(self, key, value): + alias = _resolve_value('alias', value) + caption = _resolve_value('caption', value) + if alias is not None: + self._indexes['alias'][alias] = key + if caption is not None: + self._indexes['caption'][caption] = key + + dict.__setitem__(self, key, value) + + def get(self, key, default_value=_no_default_value): + try: + return self[key] + except KeyError: + if default_value is not _no_default_value: + return default_value + raise + + def __getitem__(self, key): + if key in self._indexes['alias']: + key = self._indexes['alias'][key] + elif key in self._indexes['caption']: + key = self._indexes['caption'][key] + + return dict.__getitem__(self, key) diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index 889f746..28ddd03 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -4,8 +4,13 @@ # ############################################################################### import os +import zipfile +import weakref + import xml.etree.ElementTree as ET -from tableaudocumentapi import Datasource + +from tableaudocumentapi import Datasource, xfile +from tableaudocumentapi.xfile import xml_open class Workbook(object): @@ -24,30 +29,21 @@ def __init__(self, filename): Constructor. """ - # We have a valid type of input file - if self._is_valid_file(filename): - # set our filename, open .twb, initialize things - self._filename = filename - self._workbookTree = ET.parse(filename) - self._workbookRoot = self._workbookTree.getroot() - - # prepare our datasource objects - self._datasources = self._prepare_datasources( - self._workbookRoot) # self.workbookRoot.find('datasources') - else: - print('Invalid file type. Must be .twb or .tds.') - raise Exception() - - @classmethod - def from_file(cls, filename): - "Initialize datasource from file (.tds)" - if self._is_valid_file(filename): - self._filename = filename - dsxml = ET.parse(filename).getroot() - return cls(dsxml) - else: - print('Invalid file type. Must be .twb or .tds.') - raise Exception() + + self._filename = filename + + self._workbookTree = xml_open(self._filename) + + self._workbookRoot = self._workbookTree.getroot() + # prepare our datasource objects + self._datasources = self._prepare_datasources( + self._workbookRoot) # self.workbookRoot.find('datasources') + + self._datasource_index = self._prepare_datasource_index(self._datasources) + + self._worksheets = self._prepare_worksheets( + self._workbookRoot, self._datasource_index + ) ########### # datasources @@ -56,6 +52,13 @@ def from_file(cls, filename): def datasources(self): return self._datasources + ########### + # worksheets + ########### + @property + def worksheets(self): + return self._worksheets + ########### # filename ########### @@ -76,7 +79,7 @@ def save(self): """ # save the file - self._workbookTree.write(self._filename, encoding="utf-8", xml_declaration=True) + xfile._save_file(self._filename, self._workbookTree) def save_as(self, new_filename): """ @@ -89,25 +92,56 @@ def save_as(self, new_filename): Nothing. """ - - self._workbookTree.write(new_filename, encoding="utf-8", xml_declaration=True) + xfile._save_file( + self._filename, self._workbookTree, new_filename) ########################################################################### # # Private API. # ########################################################################### - def _prepare_datasources(self, xmlRoot): + @staticmethod + def _prepare_datasource_index(datasources): + retval = weakref.WeakValueDictionary() + for datasource in datasources: + retval[datasource.name] = datasource + + return retval + + @staticmethod + def _prepare_datasources(xml_root): datasources = [] # loop through our datasources and append - for datasource in xmlRoot.find('datasources'): + datasource_elements = xml_root.find('datasources') + if datasource_elements is None: + return [] + + for datasource in datasource_elements: ds = Datasource(datasource) datasources.append(ds) return datasources @staticmethod - def _is_valid_file(filename): - fileExtension = os.path.splitext(filename)[-1].lower() - return fileExtension in ('.twb', '.tds') + def _prepare_worksheets(xml_root, ds_index): + worksheets = [] + worksheets_element = xml_root.find('.//worksheets') + if worksheets_element is None: + return worksheets + + for worksheet_element in worksheets_element: + worksheet_name = worksheet_element.attrib['name'] + worksheets.append(worksheet_name) # TODO: A real worksheet object, for now, only name + + dependencies = worksheet_element.findall('.//datasource-dependencies') + + for dependency in dependencies: + datasource_name = dependency.attrib['datasource'] + datasource = ds_index[datasource_name] + for column in dependency.findall('.//column'): + column_name = column.attrib['name'] + if column_name in datasource.fields: + datasource.fields[column_name].add_used_in(worksheet_name) + + return worksheets diff --git a/tableaudocumentapi/xfile.py b/tableaudocumentapi/xfile.py new file mode 100644 index 0000000..a0cd62e --- /dev/null +++ b/tableaudocumentapi/xfile.py @@ -0,0 +1,98 @@ +import contextlib +import os +import shutil +import tempfile +import zipfile +import xml.etree.ElementTree as ET + +try: + from distutils2.version import NormalizedVersion as Version +except ImportError: + from distutils.version import LooseVersion as Version + +MIN_SUPPORTED_VERSION = Version("9.0") + + +class TableauVersionNotSupportedException(Exception): + pass + + +def xml_open(filename): + # Determine if this is a twb or twbx and get the xml root + if zipfile.is_zipfile(filename): + tree = get_xml_from_archive(filename) + else: + tree = ET.parse(filename) + file_version = Version(tree.getroot().attrib.get('version', '0.0')) + if file_version < MIN_SUPPORTED_VERSION: + raise TableauVersionNotSupportedException(file_version) + return tree + + +@contextlib.contextmanager +def temporary_directory(*args, **kwargs): + d = tempfile.mkdtemp(*args, **kwargs) + try: + yield d + finally: + shutil.rmtree(d) + + +def find_file_in_zip(zip_file): + for filename in zip_file.namelist(): + try: + with zip_file.open(filename) as xml_candidate: + ET.parse(xml_candidate).getroot().tag in ( + 'workbook', 'datasource') + return filename + except ET.ParseError: + # That's not an XML file by gosh + pass + + +def get_xml_from_archive(filename): + with zipfile.ZipFile(filename) as zf: + with zf.open(find_file_in_zip(zf)) as xml_file: + xml_tree = ET.parse(xml_file) + + return xml_tree + + +def build_archive_file(archive_contents, zip_file): + for root_dir, _, files in os.walk(archive_contents): + relative_dir = os.path.relpath(root_dir, archive_contents) + for f in files: + temp_file_full_path = os.path.join( + archive_contents, relative_dir, f) + zipname = os.path.join(relative_dir, f) + zip_file.write(temp_file_full_path, arcname=zipname) + + +def save_into_archive(xml_tree, filename, new_filename=None): + # Saving a archive means extracting the contents into a temp folder, + # saving the changes over the twb/tds in that folder, and then + # packaging it back up into a specifically formatted zip with the correct + # relative file paths + + if new_filename is None: + new_filename = filename + + # Extract to temp directory + with temporary_directory() as temp_path: + with zipfile.ZipFile(filename) as zf: + xml_file = find_file_in_zip(zf) + zf.extractall(temp_path) + # Write the new version of the file to the temp directory + xml_tree.write(os.path.join( + temp_path, xml_file), encoding="utf-8", xml_declaration=True) + + # Write the new archive with the contents of the temp folder + with zipfile.ZipFile(new_filename, "w", compression=zipfile.ZIP_DEFLATED) as new_archive: + build_archive_file(temp_path, new_archive) + + +def _save_file(container_file, xml_tree, new_filename=None): + if zipfile.is_zipfile(container_file): + save_into_archive(xml_tree, container_file, new_filename) + else: + xml_tree.write(container_file, encoding="utf-8", xml_declaration=True) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..c715da8 --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,2 @@ +from . import bvt +from . import test_datasource diff --git a/test/assets/CONNECTION.xml b/test/assets/CONNECTION.xml new file mode 100644 index 0000000..392d112 --- /dev/null +++ b/test/assets/CONNECTION.xml @@ -0,0 +1 @@ + diff --git a/test/assets/TABLEAU_10_TDS.tds b/test/assets/TABLEAU_10_TDS.tds new file mode 100644 index 0000000..7a81784 --- /dev/null +++ b/test/assets/TABLEAU_10_TDS.tds @@ -0,0 +1 @@ + diff --git a/test/assets/TABLEAU_10_TDSX.tdsx b/test/assets/TABLEAU_10_TDSX.tdsx new file mode 100644 index 0000000..f94b678 Binary files /dev/null and b/test/assets/TABLEAU_10_TDSX.tdsx differ diff --git a/test/assets/TABLEAU_10_TWB.twb b/test/assets/TABLEAU_10_TWB.twb new file mode 100644 index 0000000..aa0207f --- /dev/null +++ b/test/assets/TABLEAU_10_TWB.twb @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + diff --git a/test/assets/TABLEAU_10_TWBX.twbx b/test/assets/TABLEAU_10_TWBX.twbx new file mode 100644 index 0000000..ef8f910 Binary files /dev/null and b/test/assets/TABLEAU_10_TWBX.twbx differ diff --git a/test/assets/TABLEAU_93_TDS.tds b/test/assets/TABLEAU_93_TDS.tds new file mode 100644 index 0000000..2afa3ea --- /dev/null +++ b/test/assets/TABLEAU_93_TDS.tds @@ -0,0 +1 @@ + diff --git a/test/assets/TABLEAU_93_TWB.twb b/test/assets/TABLEAU_93_TWB.twb new file mode 100644 index 0000000..cdb6484 --- /dev/null +++ b/test/assets/TABLEAU_93_TWB.twb @@ -0,0 +1 @@ + diff --git a/test/assets/datasource_test.tds b/test/assets/datasource_test.tds new file mode 100644 index 0000000..a1e78a8 --- /dev/null +++ b/test/assets/datasource_test.tds @@ -0,0 +1,86 @@ + + + + + + + a + 130 + [a] + [xy] + a + 1 + string + Count + 255 + true + + "SQL_WVARCHAR" + "SQL_C_WCHAR" + "true" + + + + Today's Date + 130 + [Today's Date] + [xy] + a + 1 + string + Count + 255 + true + + "SQL_WVARCHAR" + "SQL_C_WCHAR" + "true" + + + + x + 3 + [x] + [xy] + x + 2 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + y + 3 + [y] + [xy] + y + 3 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + + + + + + + + + + + + + + diff --git a/test/assets/datasource_test.twb b/test/assets/datasource_test.twb new file mode 100644 index 0000000..af87659 --- /dev/null +++ b/test/assets/datasource_test.twb @@ -0,0 +1,172 @@ + + + + + + + + + + + + + + + a + 130 + [a] + [xy] + a + 1 + string + Count + 255 + true + + "SQL_WVARCHAR" + "SQL_C_WCHAR" + "true" + + + + x + 3 + [x] + [xy] + x + 2 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + y + 3 + [y] + [xy] + y + 3 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +