From 6663c876a5795b9aa56cff3935a09e9bace39cbf Mon Sep 17 00:00:00 2001 From: T8y8 Date: Tue, 24 May 2016 10:58:58 -0700 Subject: [PATCH 01/26] Here's an intermediate stab at twbx support. Does not include TDSX yet. I'm not sure where a lot of the zip stuff should go. --- tableaudocumentapi/workbook.py | 115 +++++++++++++++++++++++++-------- test.py | 52 +++++++++++++-- 2 files changed, 137 insertions(+), 30 deletions(-) diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index 67dbc32..80449f1 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -3,10 +3,57 @@ # Workbook - A class for writing Tableau workbook files # ############################################################################### +import contextlib import os +import shutil +import tempfile +import zipfile + import xml.etree.ElementTree as ET + from tableaudocumentapi import Datasource +########################################################################### +# +# Utility Functions +# +########################################################################### + + +@contextlib.contextmanager +def temporary_directory(*args, **kwargs): + d = tempfile.mkdtemp(*args, **kwargs) + try: + yield d + finally: + shutil.rmtree(d) + + +def find_twb_in_zip(zip): + for filename in zip.namelist(): + if os.path.splitext(filename)[-1].lower() == '.twb': + return filename + + +def get_twb_xml_from_twbx(filename): + with temporary_directory() as temp: + with zipfile.ZipFile(filename) as zf: + zf.extractall(temp) + twb_file = find_twb_in_zip(zf) + twb_xml = ET.parse(os.path.join(temp, twb_file)) + + return twb_xml + + +def build_twbx_file(twbx_contents, zip): + for root_dir, _, files in os.walk(twbx_contents): + relative_dir = os.path.relpath(root_dir, twbx_contents) + for f in files: + temp_file_full_path = os.path.join( + twbx_contents, relative_dir, f) + zipname = os.path.join(relative_dir, f) + zip.write(temp_file_full_path, arcname=zipname) + class Workbook(object): """ @@ -24,30 +71,18 @@ def __init__(self, filename): Constructor. """ - # We have a valid type of input file - if self._is_valid_file(filename): - # set our filename, open .twb, initialize things - self._filename = filename - self._workbookTree = ET.parse(filename) - self._workbookRoot = self._workbookTree.getroot() - - # prepare our datasource objects - self._datasources = self._prepare_datasources( - self._workbookRoot) # self.workbookRoot.find('datasources') - else: - print('Invalid file type. Must be .twb or .tds.') - raise Exception() - - @classmethod - def from_file(cls, filename): - "Initialize datasource from file (.tds)" - if self._is_valid_file(filename): - self._filename = filename - dsxml = ET.parse(filename).getroot() - return cls(dsxml) + self._filename = filename + + # Determine if this is a twb or twbx and get the xml root + if zipfile.is_zipfile(self._filename): + self._workbookTree = get_twb_xml_from_twbx(self._filename) else: - print('Invalid file type. Must be .twb or .tds.') - raise Exception() + self._workbookTree = ET.parse(self._filename) + + self._workbookRoot = self._workbookTree.getroot() + # prepare our datasource objects + self._datasources = self._prepare_datasources( + self._workbookRoot) # self.workbookRoot.find('datasources') ########### # datasources @@ -76,7 +111,11 @@ def save(self): """ # save the file - self._workbookTree.write(self._filename) + + if zipfile.is_zipfile(self._filename): + self._save_into_twbx(self._filename) + else: + self._workbookTree.write(self._filename) def save_as(self, new_filename): """ @@ -89,8 +128,10 @@ def save_as(self, new_filename): Nothing. """ - - self._workbookTree.write(new_filename) + if zipfile.is_zipfile(self._filename): + self._save_into_twbx(new_filename) + else: + self._workbookTree.write(new_filename) ########################################################################### # @@ -107,6 +148,28 @@ def _prepare_datasources(self, xmlRoot): return datasources + def _save_into_twbx(self, filename=None): + # Save reuses existing filename, 'save as' takes a new one + if filename is None: + filename = self._filename + + # Saving a twbx means extracting the contents into a temp folder, + # saving the changes over the twb in that folder, and then + # packaging it back up into a specifically formatted zip with the correct + # relative file paths + + # Extract to temp directory + with temporary_directory() as temp_path: + with zipfile.ZipFile(self._filename) as zf: + twb_file = find_twb_in_zip(zf) + zf.extractall(temp_path) + # Write the new version of the twb to the temp directory + self._workbookTree.write(os.path.join(temp_path, twb_file)) + + # Write the new twbx with the contents of the temp folder + with zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED) as new_twbx: + build_twbx_file(temp_path, new_twbx) + @staticmethod def _is_valid_file(filename): fileExtension = os.path.splitext(filename)[-1].lower() diff --git a/test.py b/test.py index fd7d1bd..56364e2 100644 --- a/test.py +++ b/test.py @@ -1,6 +1,8 @@ -import unittest +import base64 import io import os +import unittest + import xml.etree.ElementTree as ET from tableaudocumentapi import Workbook, Datasource, Connection, ConnectionParser @@ -17,6 +19,9 @@ TABLEAU_CONNECTION_XML = ET.fromstring( '''''') +TABLEAU_10_TWBX = '' + + class HelperMethodTests(unittest.TestCase): def test_is_valid_file_with_valid_inputs(self): @@ -39,7 +44,6 @@ def test_can_extract_legacy_connection(self): self.assertIsInstance(connections[0], Connection) self.assertEqual(connections[0].dbname, 'TestV1') - def test_can_extract_federated_connections(self): parser = ConnectionParser(ET.fromstring(TABLEAU_10_TDS), '10.0') connections = parser.get_connections() @@ -122,7 +126,8 @@ def test_can_update_datasource_connection_and_save(self): original_wb.save() new_wb = Workbook(self.workbook_file.name) - self.assertEqual(new_wb.datasources[0].connections[0].dbname, 'newdb.test.tsi.lan') + self.assertEqual(new_wb.datasources[0].connections[ + 0].dbname, 'newdb.test.tsi.lan') class WorkbookModelV10Tests(unittest.TestCase): @@ -152,7 +157,46 @@ def test_can_update_datasource_connection_and_saveV10(self): original_wb.save() new_wb = Workbook(self.workbook_file.name) - self.assertEqual(new_wb.datasources[0].connections[0].dbname, 'newdb.test.tsi.lan') + self.assertEqual(new_wb.datasources[0].connections[ + 0].dbname, 'newdb.test.tsi.lan') + + +class WorkbookModelV10TWBXTests(unittest.TestCase): + + def setUp(self): + self.workbook_file = io.FileIO('testtwbx.twbx', 'wb') + self.workbook_file.write(base64.b64decode(TABLEAU_10_TWBX)) + self.workbook_file.seek(0) + + def tearDown(self): + self.workbook_file.close() + os.unlink(self.workbook_file.name) + + def test_can_open_twbx(self): + wb = Workbook(self.workbook_file.name) + self.assertTrue(wb.datasources) + self.assertTrue(wb.datasources[0].connections) + + def test_can_open_twbx_and_save_changes(self): + original_wb = Workbook(self.workbook_file.name) + original_wb.datasources[0].connections[0].server = 'newdb.test.tsi.lan' + original_wb.save() + + new_wb = Workbook(self.workbook_file.name) + self.assertEqual(new_wb.datasources[0].connections[ + 0].server, 'newdb.test.tsi.lan') + + def test_can_open_twbx_and_save_as_changes(self): + new_twbx_filename = self.workbook_file.name + "_TEST_SAVE_AS" + original_wb = Workbook(self.workbook_file.name) + original_wb.datasources[0].connections[0].server = 'newdb.test.tsi.lan' + original_wb.save_as(new_twbx_filename) + + new_wb = Workbook(new_twbx_filename) + self.assertEqual(new_wb.datasources[0].connections[ + 0].server, 'newdb.test.tsi.lan') + + os.unlink(new_twbx_filename) if __name__ == '__main__': unittest.main() From 8f037a9a0fd65976fae32f6329970306cf7a04bf Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Tue, 28 Jun 2016 15:03:40 -0700 Subject: [PATCH 02/26] Adding override for pycodestyle/pep8 and moving test files around --- .gitignore | 1 + setup.cfg | 9 +++++++++ setup.py | 6 +++--- test/__init__.py | 0 test.py => test/bvt.py | 0 5 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 setup.cfg create mode 100644 test/__init__.py rename test.py => test/bvt.py (100%) diff --git a/.gitignore b/.gitignore index 3fc52ff..ee250af 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,4 @@ target/ #Other things .DS_Store +.idea diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..bfa3e53 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[wheel] +universal = 1 + +[pycodestyle] +select = +max_line_length = 120 + +[pep8] +max_line_length = 120 \ No newline at end of file diff --git a/setup.py b/setup.py index 8925444..f693919 100644 --- a/setup.py +++ b/setup.py @@ -5,12 +5,12 @@ setup( name='tableaudocumentapi', - version='0.0.1', - summary='A Python module for working with Tableau files.', + version='0.1.0-dev', author='Tableau Software', author_email='github@tableau.com', url='https://github.com/tableau/document-api-python', py_modules=['tableaudocumentapi'], license='MIT', - description='A Python module for working with Tableau files.' + description='A Python module for working with Tableau files.', + test_suite='test' ) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test.py b/test/bvt.py similarity index 100% rename from test.py rename to test/bvt.py From 83abb63b9698cbea72e7e065699db5356c8b1a5a Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Tue, 28 Jun 2016 15:12:24 -0700 Subject: [PATCH 03/26] Improve the call of pep8 and move to using setup.py's test feature --- .travis.yml | 4 ++-- test/bvt.py | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index db3656e..2480df6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,9 +12,9 @@ install: # command to run tests script: # Tests - - python test.py + - python setup.py test # pep8 - - pep8 --ignore=E501 . + - pep8 . # Examples - (cd "Examples/Replicate Workbook" && python replicateWorkbook.py) - (cd "Examples/List TDS Info" && python listTDSInfo.py) diff --git a/test/bvt.py b/test/bvt.py index 3172673..f521465 100644 --- a/test/bvt.py +++ b/test/bvt.py @@ -5,17 +5,19 @@ from tableaudocumentapi import Workbook, Datasource, Connection, ConnectionParser +# Disable the 120 line limit because of the embedded XML on these lines +# TODO: Move the XML into external files and load them when needed -TABLEAU_93_WORKBOOK = '''''' +TABLEAU_93_WORKBOOK = '''''' # noqa -TABLEAU_93_TDS = '''''' +TABLEAU_93_TDS = '''''' # noqa -TABLEAU_10_TDS = '''''' +TABLEAU_10_TDS = '''''' # noqa -TABLEAU_10_WORKBOOK = '''''' +TABLEAU_10_WORKBOOK = '''''' # noqa TABLEAU_CONNECTION_XML = ET.fromstring( - '''''') + '''''') # noqa class HelperMethodTests(unittest.TestCase): From 69aadaef5be5d6e0b185d2daa58383d735057670 Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Tue, 28 Jun 2016 15:14:51 -0700 Subject: [PATCH 04/26] Adding EOF newline --- setup.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index bfa3e53..066edef 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,4 +6,5 @@ select = max_line_length = 120 [pep8] -max_line_length = 120 \ No newline at end of file +max_line_length = 120 + From 26d5116c022d995486892edf8ede8602e2e581e9 Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Wed, 29 Jun 2016 10:01:06 -0700 Subject: [PATCH 05/26] script for publishing versions to pypi --- publish.sh | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100755 publish.sh diff --git a/publish.sh b/publish.sh new file mode 100755 index 0000000..99a3115 --- /dev/null +++ b/publish.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -e + +rm -rf dist +python setup.py sdist +python setup.py bdist_wheel +python3 setup.py bdist_wheel +twine upload dist/* From c70cb5fda4bc07d51948cf53eb78ff19987938e2 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Wed, 29 Jun 2016 15:16:40 -0700 Subject: [PATCH 06/26] Update the tests to use external files --- test/assets/CONNECTION.xml | 1 + test/assets/TABLEAU_10_TDS.TDS | 1 + test/assets/TABLEAU_10_TWB.twb | 1 + test/assets/TABLEAU_10_TWBX.twbx | Bin 0 -> 12146 bytes test/assets/TABLEAU_93_TDS.tds | 1 + test/assets/TABLEAU_93_TWB.twb | 1 + test/bvt.py | 51 ++++++++++++++----------------- 7 files changed, 28 insertions(+), 28 deletions(-) create mode 100644 test/assets/CONNECTION.xml create mode 100644 test/assets/TABLEAU_10_TDS.TDS create mode 100644 test/assets/TABLEAU_10_TWB.twb create mode 100644 test/assets/TABLEAU_10_TWBX.twbx create mode 100644 test/assets/TABLEAU_93_TDS.tds create mode 100644 test/assets/TABLEAU_93_TWB.twb diff --git a/test/assets/CONNECTION.xml b/test/assets/CONNECTION.xml new file mode 100644 index 0000000..392d112 --- /dev/null +++ b/test/assets/CONNECTION.xml @@ -0,0 +1 @@ + diff --git a/test/assets/TABLEAU_10_TDS.TDS b/test/assets/TABLEAU_10_TDS.TDS new file mode 100644 index 0000000..7a81784 --- /dev/null +++ b/test/assets/TABLEAU_10_TDS.TDS @@ -0,0 +1 @@ + diff --git a/test/assets/TABLEAU_10_TWB.twb b/test/assets/TABLEAU_10_TWB.twb new file mode 100644 index 0000000..c116bdf --- /dev/null +++ b/test/assets/TABLEAU_10_TWB.twb @@ -0,0 +1 @@ + diff --git a/test/assets/TABLEAU_10_TWBX.twbx b/test/assets/TABLEAU_10_TWBX.twbx new file mode 100644 index 0000000000000000000000000000000000000000..ef8f910d02036fbaeb6d9948f46df02078a45b39 GIT binary patch literal 12146 zcmd_Qbxa&i6fR0jf#RhU*Wylbik4E`iWO&ZclRw;+T!l+x=_5ZELw_FY;oA)x{KSg zd;7b|y~(}F%lqs7^=6WpoXLDMbLPxB$(ei~)i9sE!obFOj$xqJq5Ny6L{^Ug0|Q?Q z0|OTW1LM8ByR!hFkH6K4iKF|9I5kS97m0HcW8~Y|$TQ+^FxjgyqdCo%&!(}gsr{-& zW5vm*L&7RJ>`~^GLUxL+q;|fWeG^}js+4&T`$lBi)yj--nk02p8~jRNepWv^h#BY- zJljL9lxl3CA0~N!S6K-Bfzqz!99Z=G^R@S^5kN9725k$2f3#;@geOA#lE5+u_V4*O<+nCKiMz0oJn%lk@~BH=v{aw-{PT| z|4p>?+K2oJ;om_@3sL>@9_OIDy0$Fa`B(=->z5>xr34_~R6(W-j^qWea;ZSIf&E_? zwJuXl(nX|L0{XhE>^|z4FaI=E5XS&^*Q(vtMF$#fU%I%t zSE$799jAa@SSidm&T`B(zA9|b|B5j~z!HDdFjYK9oBxMWls8Xg6<$PH##xeDUbCJ) z^2|f;;>|_E!frUj9rMOPEjM+^SZV8TKS#pZ2d+zDInu+ z_=k@RoZe784i{#Uk8HI)IV7#Qp4AyV1~n;^YJf*^`uoC`#IbpH;rd(4*7976KuInYZ6IOE-xomr|rZ^psCH6pz$8p1Z7T%|7(%nGl8I%j~<^{ zTnG2w@|DE^SwM#Nhl<(HnxxtcbPr7wM!ucZ7(tearAx_6PDZ{c!~K|Q#l?Yv916)v zk`=+apq~0SmYTn;vjUXO`o}rd_gsW2D&+R7gEVQM##m8LXg04nhz0lm0_2;A^mO;! zhuDVo1p50WLxtHwV?S6-zWD-E{?vD6wO08i)FveIoW1)MLTmp`fjL{v1^L?HA6dw+ zuuFKY7)WM=fG-iqV0w<-s9UESnCdSaA_1EAMq!yH`22^`}kU$@lVEH zu|7YgbHb&26`{x?gCmiKMjilQkh{OvL}{7WapJIqUG)X;oCm-XQUSN;-R>Q!hG z$jZNxiun!Y5|5yy;3bQ?@&+Dq+~icc?yc@sx5V~D{dbjovu^27&-;Bg->bu`p;!j3 zBZF){2F;^vrIflN#=s>50Z=Fq-ulb8a$!K~CK!Zve%vrU5<~U|0hIqn2;=_tv*-Rp zMa`sd1A~dje^(%niqBOFo*_>;{OU2rz;ETRj5$=ukty}{!ywfy5wcxPg%aoM+idy$ zT7WgVXKyDnfdLu0vaEt0oO#9K%m#2}MbTA8Yhe8KQL*WHY~XqdTVzfDjPm8{`E+?g zMOMuAiZ`r`W3AXVAJkAnSy4#)YdlQIj>)x)K=2w3Wr;+G|Z7tJ;`HtsuFBDv};Z<7cFtml?W|#%t4H0X8fw92u&dL|VX!o4 z1Z8Es!+JN$QUv3f-S?Sq=~RR#U2}XrbWZK{cP3qtOHnJK8nbhCk9rYbuHg7>iz0&! z`BVe;#(7mx;Q6ubee-i?Wj2S;s+gdd##QmSuaY2`JBEm527C)X%1C z^I3Ya8Nr8(w1|mlj!#wOdrszqBV3LcIROXp@eY}!T6!N;#>&(VNbN1a7Ce09}lO0F|!_xu{}9*p0wG}Xtpte@%G|lNBtM`r|?9~ zrypcO4IN@W@Ks;_-nX3*$(_IT*BlnDZKT(^|`Fqq? z(vx1!p%jMdU!5guOIcG+E}QnfbxK|WM_KtIBFY@CJGL{fGsDB>Cc~Msr@-#-h)Fa{ zI*10;=|%5v($bsE$ui^K@EWiv+Wi`U`?_{evhRYOwJR^cfg2`>jOz6AAw4Gb1B?VA z;?Q6>$a#7K-}4E1ij zcEeuG?}j-vUwlF|j7r>m{FcN^S`s%XoOsk%8pD~)ki@Cc=Ff8<08@Q+Y~Z32G!*}+ z9hrXh_M;BPqDNS##m?(*=*h@h`9Ut{@tZ*|lc`2}+S;;fFSNx@ai^>hUngbb>jbW3 z#odtD@2%Wc92zJ>u!%?{?*uBBI^j_0a+KC+p;M@U9O>v3XWx zC8(YH9K$>!NbQKq^#EFpjYPcOS`db~o(f=ni_qw*P1?h!+aNOtmH!*ckBI(=L30!- zp8#jTb1^$}*V8d9G+|g&gG)KcMrE2?_YAAKOM9bX-NJr73`;8h#3se=$W57_CAEu( zr8FAg`BTwgJE((L*WzEIqmqdNOFzl4m^t}Gw7x^nOXn2YTjqq?gX{gqgeAy~nkN); z$s$|f`{}v61&qwL79YKiZ__dSloQS;`(*3 z8JTV>HDdGm1#Gec|A3D7D0<*njV$A=L`_2Lh2FWOl=HQv9vFkPa-Tb9*8(;;#JxJL znd6m}W|>(eFj$Q^Sp)<;#q}Ff+jWXKVPhJx$$ZxScXSxz!ZZ&dzhr|)Nk|}vOS8^M znU7&bVdOQtC?eW9R5P~vYb_{bI_)=iI5!wSaGCxq-jKaIa-g_p`AsykPC@X!MPXm0 zTvGYT-^&*iP95(8>VtGq%qi%n10`FW8m&XcsmeU*UP&jS-qGbs9bNMa7hnrg%*^%f z+yTY2dbNVYyVo6}uV5=pYmr$YqF>(JV5)mPwagkLe94+DJ5`EDW=fh8`e}NEc$9ez zgt>-Esva!vQz4}w9k%$Hj+07CRm~5qGTAE z-pcXbKE8l+QZ0&zO9f2n6xE>aay`)u&BTA)iLI9Hh5tPa@(Yq4ntdBAO+`yAvx&Ah zh^L6%Fd931u(yY=H%}sXQQZ<|c|@yse=VS`uNs0e_cD&q9;ITx^gUBGBnJuCqNWvS7=IL-gBqhn_JNRwlnms z*k@e+{+U!@e9SoiBq@R@oF!u!HjZvYhAfrjx)n> zG6hi)6-9L zs$yN5%c;)Uji#A>*}x&~Q*|^#LWgVceTyd&)0EmVhKO)<-u|TE(S4>pnU5l6{fs>$ zG&Kz8K!!F2)gDP_e;P=~JAvy{&sQi~ULuvO3T<5wtAHA0w(n81O>t`F`MWtDtlr!3 zWxKjg&6U3-tk2G{;mfQk%+qWus~MxwxvEyz>DT3Qa?V}etpincdnt-adK(L_>1`|t z^O`g@vCirhIH-$>Rj`Kg%@%NJ8Cs^UO_pm}Ew_-b`b)woPxUKbdJ7>0vMzynRHkV3ePSnm`2HQ*TBjecvJu}TiW^HKR zxLxg7{FCa-pg%=Y+5l$}_`bTjVfN_mCoRcf)rNHr{r1=MDRq2+x#TgHBd;CYc9vYj zM5;TT_iU^=GYR04I;XQ;O^CWf)QTdJ(aw}v6Jt4lJgfaM+G+C)`GdCs%A81 zhqYspx<==*N9vxqTl}@|))JY<{0T!dA4LS~C#;OrxUyZRzPgHT>7R`FG-wYByQNuI zc-5qwbS{iIef$Y#O|9|f=5hE@FLd~JP}MqFd$tuj{wT$@;(g87o*JRQbK!(-nSeic z;j2O;+anqi9Y^n9H9rp)GJg5|c)JTa^3bxlo%x-S-8RCJWxWRNNq*AVjFe{ae6@y5>e?(p>xH+?_7i+sMgAXT z;wPpgy@M2XE)TqxT^!bgjcC`*GgA+tLaWrd>7=U0a%2RrVbp3Ee!$^J;(Ue2L-9X_Ia1 zwiEKnk@p%#a28e#orGhSYn9ulFvV|F*tLZl9lKXC(BYgIv%KWj3d8ppl zOsnh)HQFFPUQqeWB&9y?)H4H=3{g)oaR}*kyi!PuWYI+DCwL?7hqE2WL+3_pJ?@@P zLue{?Sc?SVIj}S8&_ujv3W!) z>B&BAV0dL5yLy?6xsjcF@3(r0w8a_{-HS%~#r>8PG(P~Btg(k5pm(!L&D)j+cpe{Z zU4yN?8wAYsSF*T8DEK>*9W?cMI|4>(%Ez2C0H)1WCrB)AenYUs_isMBS=yiUU26%z zDF-w6dh$QOZ|(V;^*d*X8UG4%zu;q*6ddLHuM(rRyYJ3_2M*IHsWD35vH}N z1)JKOf`#4U-|`??kz_~_Bq5R)Nr#k{!a!r7pQE3l3D8fwHUThiggXL`U_cZiAe;Xx zOzw6A5qK+$kVU+(d9%6y(RCApM&LQo7kLX@DO{px_5qy$JX&l3M$EuJTL*B_KGFJL zhNdM15DfQb_BJ8PJ!nIy)O#}rgf_W1vk^Sv^ce%V2p4gZo|l>tsjye#WS@E+8((V1 z2yLp1<4=C^-hA7L*Mm11J&!OApwGkE8@PZy?Rdc#0r~<8kA-0&-UpDFKF#d6MOYbP z0`M~j$UIn0u?z;FeQKNK9)uy%-2~=DpwG@QiL59={l9L$jr0{3n5M*Hy^4#^Kv5Ho zSQVDsE|CU(1mpd*E-^GPw#6rsFuPmq0Fe;pZtNahw{Z6_(Ej$`_Wq;ID}*cJ1VQJ) z9m3xIqzAu;rU%C@5*{tqf76${Ie{P#;O-{uVQ`CK_}o4~-2czIn?$#k0qn@<-S}az z)(K^?;o%H1H~kNr?3?VcM})2iJ5o3VBMe*Sxhy_IL}6st<{oSSL2}E1d>TR!MsoTE z5CQlC2u~aU^|Nn6H=!`G%~V7#g7cR5*7cUI11F3kj5v$|g&{*&7%m$X3`^XUxwXC} zz16)Hy7lg0Ks}Yg2jT*;fP_FyAU+TWNKD)-+bi2I+b25!>E*jMxTQtXASIDFNOmOu zE&r{1zzdXE2tA4zNi6m3>I<;<-3pvADst08{8`4kntOu`tTp{|`O6gTS1?dD9u@)$z0;s&>=qRw4km`_(_!j>+sB zgOPdgn&PB3k}vcHWknErNUvhhSdwqb=tGRR%s8@m)?zID+e!tFhp|h3`UJKBre^iZ|KH- zp7+lQi`FI36ml^wVxT}NGPrB(m{K)~U6N43J(?`v@y+74c zTVRO9AEF>Ak>+i!KT2XX>}W91`jOT@&$s*G$YUk^Dq<_GiIjGaEVcIHSKm?T=uz#4 zA_WzN^PT5#rsqk#iQDvPDKCW64m>fGDAI_Fy@K{js0Bn`lhr~wLQ765BEn=$rHR{7 zrI+)54CR0{bP1B&g<{yG^FGAc8f?p`k)q(S)GkOLZCemO&bf3K$zQ+wgv&NgtFinTT;<!BAv0Y?|+zKSV=H=-p0t&PFQ8cEWe>o(6)UfS-^{h^~^3m z-H&?cO=Ip4G`vW=r#czx^75oX>(Bf>_>|kTC zmOUR(Ax!8Dj_V!pWzr8dH5eHqinJT+JoQ@SjHJj7&I@YnL z{LM{kR`Hq}OD44P{&eX)(rm>t%yp4Klz!QEs^|j`^c7SuD^obs?@x1|$P1)zE%aM{ z18M}1*4TGBnSLTDPtBgxF~Zu&7|+65(_e207ET?_iJsJ_(HUv`YVS$G`%@V4ra?NF zv);XKjI80tb5=F(gT3vLj#-1M|IflTS9QBXQ5K;(2`6Pyd-)8yZ+S_T{;ogRy8r0= z8}A&?)YaK)>FY12D+_V&rrFu*r}CJs&lwi3d>oROQO#wfV$&~hB_EHq*TF@;MBPqN zY9o;DG-2zKkm-$UWi)!vZ|#6z@OTckC$Pn6IuD?Y>$aYFz5i!}N6rm+##-x!Iw!?htLqE!KdKumL6Yk;6t(bDGC&Kpp!>EQ6hzC9I=cvoV}jhUzt1hE_WjvIIm>v65$|H<0&JJ`?C#ix+M5~|^hcM_cT zt)7owSV}X1kyJ&N(NytPW#+{GuhUb@^wdqc0q?;|T7H>JbR zw@sh=KCV8yq#2Ej#vR<4JY$XW1s7+ue>uN=FutjhMv!iUJ~SNq^1?W}_?XSdaF?$z z&b0l~5%1(ra*AQky9x-?D4-3-+fI*bS{;Uxi;5Dh{EI{dugxDtQgE2HQTJN!%%=fD zzZrHw!e@sd8WCC;D1ERN<@tE^;%=@1NRseFZR2HtNK3-wQBbv0~A0rQt20(?U82Y|7;X@d0rtT&Fmte zsLD^oS-ii&bMR)NVdZz_iG?i8>SXimU0|Qz$Not5E2vbzHY(@CzZEW_WP?g~M{IxR z0mP2t{Qv`;(byHD>lCdM>LGF7(T2$XG@M{X3+)H8Z8h3=Hk7}mr&U^i(ZS~=6Dn)? z(VWan@g1F(TY1RUiYql+egZpa?@46>x3fy+Rryt+Jdn@;`&TB!I)rKtgn1_y+-gdf z2u;x(oA-2?J}p8@T8~0rGoqa?6H`Eb6)Nq`%3#NP#>lHcF;skw!8~`3xqcyby%uu4 z7{+vE@-1bv!$ld$ zI(GJ5AQ`V-N^OV$l7^P!E1>6THCn5Fua4Oh^d)tITNtz@BThKnfG^wW96QIV(fJ?8 z46s-9SHsxdx^$#3!Qm#%m(xDR8+~C10j~DLYQXYG)kYxq!YQ@YH8a@NKPX=}G-!qB zp$j;BU9JZ%SuVd9azb8XJQgI#(?dNrCCaxFoy8lrFw!B>7jjoMtaW=k&kmJll@4Yr z9!0v%2T1PqUV!c$?z}PYS1G6W41%P01JY!;;6YdTJJodUM`zC;ee#WyD!J42{;U^oflFll$Z-;(Nf!N61ip%=$8kOZ#DasTqS)WgGlI3GOufr-Arn z@3Q-&Byx)1PYAS;%$c`=PSiae_nl1D7w7$P$ol%|`*;wQ8L{q(DcjlZ%?Q^jII63E zs7`CfS5{P^tL~#?(mc1fvTuQR!*TiAl&Oj{&|tJK&5`23mx=ZD!9)&q^HUCXpj~uA zyXtRMSC2&MP<86m{(Wt3pvDg(`-Hl>Y7efR_CI!CdyMQW_8k`pL{xri-pOht=zo>w zO@ZKo$CnwJnYejpT&P&B06H>XS{pSDxeD`?NAYmJ=Y=!5H->Q8m?hl-I17_c*In#? z3Yqw1u~F-fhJ6e6Nf=Z8CID0YM#F)|8MFUFTm8z_el+>O%jRbk_D?1I-v!A|0SQRi z;cV8siLmnRY)((bLmpfz$L1X&rSqxeG)&fs?8&ElR`%bTRl{POvvs$4GC#&Q_ZuCJ zXJ(~OVof^Toz!y*=sM;PX@H#5e<_YadW|^n4@!K5n|2KlE^3?^nrbu@oN%V5D;@jy z$pN52an2L|dNQr*Wd*2m{M?AwACK%gwU(F*-D>y06$ZSMUoZQd{`9wGDomD9I!XbC zG+ZCa_*ES-5$Yv$P%bK#mx3;tzq30_xI z8^`5X#gV2n^EFKlrjfEa^Oh|2dA^^D=O!*`-AKJ5XBJtolc!>tEy`S26^B25uU18X zB?$FB)3|lQUA8aGE5v-H&roZ?>Y$}18SC7I|L{i9ke`|!q@zTkJl`V-;Vb#~+2}KI za@WHCwq}URE4l!l#}6G*NCx-)cRQe8YV|vb-;!LVT*81N>g`DLqCGw1$;IbJ*8~h3 z)^+wKXFk`*{27|9-_Lu{wDISavhkIc-E+oaO#N=~kOD*DXam3QmIZmwnVJlg=u4^G z$5w{83*n=;=@~-(_O%65D8JHSsZLlEm%HA(f`Z~O5KVT_YU+wE-VtQ~m2uFtGwS-~ zWsL0PY{`Eczw4~zOw~eF4FU(9q{cN$q3nc99WTELxmQrK-BevalI_ybu)1-uySG)P z2RmN)kz7cQJBl3x*+*DLmUzxuy<~+a-kO|(AK^>_H-E#bVeYH~?yqozaef<9U1duc zGcwy>xDj8R5?s*76`wcYmulqFicsgaC;EJ~v)fo5%Wb!^8GQOMw#(cFgsq~;f6{eg zB^m?Ld@J<NC1H&5plOPeBhkU9CoR z`w>~`e29JHE_en#$%{bRtut}8L!=4Qg7{OldSbqGD$Sw#ULL09?5Cym0hk$f>Hnnd zKGaBN4cBxwc*xWOH{5>&p;Gkw3N;hL25_q%V07#aWl8XR`ttTXL-|!W9u2S^wzmeQ zxL-lici%ZB7`fAkw5Q6V%R~;uFK#ubs3`$ouIm$Y`)-{0qU^waY4mqnOo2BiMhgYs z8UxHhtB>1npHJK$S9wMHt0XjcEoXES{~B`*#u|3awfA<9_VjuXE_C;1h>{MJM*d{K zzlc8zebYmrl}IWCUq4B+r9XePAAew-18<@HZ+|gsjYb|G`;O`1%%9bLS048L3HrC11A%$15|irj=z>-L zzUDSgh_6SY1M&!j$V z#U-CQ;?8XQh}HVILjKq1X67{D;#2VDOG9LcVTThcWq^(+RjlkH&iq^bj<0{PQ$iOi z2tTOMFSm671aU-Hzy)V%?z9#k3(RApCf@M0PPM1$Vs)h_(|JD}64RhhS=J(7-#Y?R zfxw?ueSa0y_MkkFj*5qZMwx82pY9~0GF3IMuCAJ_v*lOSUu=TC-Ohj6cn8;TK_A&H z2G7|xO)K&#o>1T=)WkSmXG+DJSA2Mo`P+Yd2PQd!HC>$EP%OkqzYK*qAzNcY%wFHk z1MVK10ic2Im@x{Ho%HF44rJXOy})&vYx^g71nRmP^=7XPINLY^DGZs!alr$k8rw62 z-LKlNk{LHTU*A2*pw3MI$Ds>Yj>O)t$dgEQOq4tr$vL_kd!VqrU^o6cejr_Ls7WUM z+O<|sLlSam1<3G=BW#SwI!`A{dPA%N;4>9NY|Jl3^mvjDvK;e{LriSLnE*?4qVck5 zkGlf77^!%{jX)9iS^nfo=gd*12b)w#L`bas`3?N;Bl0e0!#*?^rOkM-m`~QX$8Z~X z6N7g&hDHtmWltfHs}0Ks&~jJjg6Xz`xYy*>HVOC|5a#J zFBW($cR|2Zv)aGM5X4F;uSLq=o?yF|?5x4>6`TVvGYEQly?s@cTz~CSo;Px48xVB- z8vZ08h?xG;P8oTk^pN-fGzMSL=*P?4I?zaY&>OvoiHe_g0fuTO9Ka|FbWWo2$E+GMn7Y{XI z{ChWGmx7_wD;#hk-XGb3HE)oAnYWD7$@L1&UQj0EMqU!~(HA`H*xKEdhE#eyB$CvI zRwi#WWdX-@ArFdkb5jz?pog{SE9(Y#y9BNP*L_FS5KM{^Gj{LlKJ~w_s}Xz-FzDX} zTuS9AOU6!ar8md4;1p#O<7SYGqDx!@2xPc3w1Srd%K)U@g_V0mhufC*xei>?Y?1qB#^@UhE>2GzeK;K52*2pWSb4 zN+B#>5`;@=)n5FNK#w{nZa)D0wnQY)UFUG-zQ&I4BYHL>b!&qA@i!Pg#zDY%R}@HQlnvY7#^6Q5f? z$jlnB^tkX15P`ZE$3%10KIi6VsjFkJ>&|IR8D>d1*ZiB#jmwSeH?Y4FU0@<(OZ;%9 z(NW8B*-kM5V&+SSKijjpR)AEBbHVQt{QC!GWY|+ zyvDMpl$+Liis-m%$;d$qaxFVl2@Hv^aBr5ClN+w1IG=dTlKXPzPZJh)IU9{yxMT2! zn;{>_P+d2oio7P4rnu{i9&}q@ClD>lbP+uNxHc2Gj$P14@G7`%&rUQWY)5vbsM{YkSx^j6|#0JC_{^91%eNJTf_X!b=?`YFHGBb67# zV{uktabs+$8^-GMe24f6LR~*_pS=zC!MuDpLjS!AqwyPG z(Kkdt`hXJ!9u*$19k0jOnYZJlX4O4BOK(3x7ZFr zXh&l={nTF1I?cX8h@ow>$0fkQS%7?OHy@2emk!dIwS3TiXzyL>;l}h??P8 zpO!y58Yl;>Pq0OA@$}5kn95#-sad%qrN&?;}(b^AMXQ1&vsy%s%f${%LZ2w0v y!Fcjt^FPb~rn&#$u>YHs^1lEW7{UKo^8arXm5*vz*#9A({;SdddMMw2(*FSGkDJQ? literal 0 HcmV?d00001 diff --git a/test/assets/TABLEAU_93_TDS.tds b/test/assets/TABLEAU_93_TDS.tds new file mode 100644 index 0000000..2afa3ea --- /dev/null +++ b/test/assets/TABLEAU_93_TDS.tds @@ -0,0 +1 @@ + diff --git a/test/assets/TABLEAU_93_TWB.twb b/test/assets/TABLEAU_93_TWB.twb new file mode 100644 index 0000000..cdb6484 --- /dev/null +++ b/test/assets/TABLEAU_93_TWB.twb @@ -0,0 +1 @@ + diff --git a/test/bvt.py b/test/bvt.py index 589a9ee..779fd7b 100644 --- a/test/bvt.py +++ b/test/bvt.py @@ -1,5 +1,3 @@ -import base64 -import io import os import unittest @@ -7,21 +5,18 @@ from tableaudocumentapi import Workbook, Datasource, Connection, ConnectionParser -# Disable the 120 line limit because of the embedded XML on these lines -# TODO: Move the XML into external files and load them when needed -TABLEAU_93_WORKBOOK = '''''' # noqa +TABLEAU_93_TWB = 'test/assets/TABLEAU_93_TWB.twb' -TABLEAU_93_TDS = '''''' # noqa +TABLEAU_93_TDS = 'test/assets/TABLEAU_93_TDS.tds' -TABLEAU_10_TDS = '''''' # noqa +TABLEAU_10_TDS = 'test/assets/TABLEAU_10_TDS.tds' -TABLEAU_10_WORKBOOK = '''''' # noqa +TABLEAU_10_TWB = 'test/assets/TABLEAU_10_TWB.twb' -TABLEAU_CONNECTION_XML = ET.fromstring( - '''''') # noqa +TABLEAU_CONNECTION_XML = ET.parse('test/assets/CONNECTION.xml').getroot() -TABLEAU_10_TWBX = '' +TABLEAU_10_TWBX = 'test/assets/TABLEAU_10_TWBX.twbx' class HelperMethodTests(unittest.TestCase): @@ -40,14 +35,14 @@ def test_is_valid_file_with_invalid_inputs(self): class ConnectionParserTests(unittest.TestCase): def test_can_extract_legacy_connection(self): - parser = ConnectionParser(ET.fromstring(TABLEAU_93_TDS), '9.2') + parser = ConnectionParser(ET.parse(TABLEAU_93_TDS), '9.2') connections = parser.get_connections() self.assertIsInstance(connections, list) self.assertIsInstance(connections[0], Connection) self.assertEqual(connections[0].dbname, 'TestV1') def test_can_extract_federated_connections(self): - parser = ConnectionParser(ET.fromstring(TABLEAU_10_TDS), '10.0') + parser = ConnectionParser(ET.parse(TABLEAU_10_TDS), '10.0') connections = parser.get_connections() self.assertIsInstance(connections, list) self.assertIsInstance(connections[0], Connection) @@ -80,9 +75,9 @@ def test_can_write_attributes_to_connection(self): class DatasourceModelTests(unittest.TestCase): def setUp(self): - self.tds_file = io.FileIO('test.tds', 'w') - self.tds_file.write(TABLEAU_93_TDS.encode('utf8')) - self.tds_file.seek(0) + with open(TABLEAU_93_TDS, 'rb') as in_file, open('test.tds', 'wb') as out_file: + out_file.write(in_file.read()) + self.tds_file = out_file def tearDown(self): self.tds_file.close() @@ -121,9 +116,9 @@ def test_save_has_xml_declaration(self): class DatasourceModelV10Tests(unittest.TestCase): def setUp(self): - self.tds_file = io.FileIO('test10.tds', 'w') - self.tds_file.write(TABLEAU_10_TDS.encode('utf8')) - self.tds_file.seek(0) + with open(TABLEAU_10_TDS, 'rb') as in_file, open('test.twb', 'wb') as out_file: + out_file.write(in_file.read()) + self.tds_file = out_file def tearDown(self): self.tds_file.close() @@ -151,9 +146,9 @@ def test_can_save_tds(self): class WorkbookModelTests(unittest.TestCase): def setUp(self): - self.workbook_file = io.FileIO('test.twb', 'w') - self.workbook_file.write(TABLEAU_93_WORKBOOK.encode('utf8')) - self.workbook_file.seek(0) + with open(TABLEAU_93_TWB, 'rb') as in_file, open('test.twb', 'wb') as out_file: + out_file.write(in_file.read()) + self.workbook_file = out_file def tearDown(self): self.workbook_file.close() @@ -179,9 +174,9 @@ def test_can_update_datasource_connection_and_save(self): class WorkbookModelV10Tests(unittest.TestCase): def setUp(self): - self.workbook_file = io.FileIO('testv10.twb', 'w') - self.workbook_file.write(TABLEAU_10_WORKBOOK.encode('utf8')) - self.workbook_file.seek(0) + with open(TABLEAU_10_TWB, 'rb') as in_file, open('test.twb', 'wb') as out_file: + out_file.write(in_file.read()) + self.workbook_file = out_file def tearDown(self): self.workbook_file.close() @@ -221,9 +216,9 @@ def test_save_has_xml_declaration(self): class WorkbookModelV10TWBXTests(unittest.TestCase): def setUp(self): - self.workbook_file = io.FileIO('testtwbx.twbx', 'wb') - self.workbook_file.write(base64.b64decode(TABLEAU_10_TWBX)) - self.workbook_file.seek(0) + with open(TABLEAU_10_TWBX, 'rb') as in_file, open('test.twbx', 'wb') as out_file: + out_file.write(in_file.read()) + self.workbook_file = out_file def tearDown(self): self.workbook_file.close() From 955e418d3235db7a28fe6209f3048b7b8441d81f Mon Sep 17 00:00:00 2001 From: T8y8 Date: Wed, 29 Jun 2016 15:37:27 -0700 Subject: [PATCH 07/26] Fix case sensitivity --- test/assets/{TABLEAU_10_TDS.TDS => TABLEAU_10_TDS.tds} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/assets/{TABLEAU_10_TDS.TDS => TABLEAU_10_TDS.tds} (100%) diff --git a/test/assets/TABLEAU_10_TDS.TDS b/test/assets/TABLEAU_10_TDS.tds similarity index 100% rename from test/assets/TABLEAU_10_TDS.TDS rename to test/assets/TABLEAU_10_TDS.tds From 92668e0b065ea4a3d7bf70146c0c918af3c3ba30 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Wed, 29 Jun 2016 16:00:54 -0700 Subject: [PATCH 08/26] Use more flexible paths for test assets --- test/bvt.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/bvt.py b/test/bvt.py index 779fd7b..aa4a247 100644 --- a/test/bvt.py +++ b/test/bvt.py @@ -5,18 +5,19 @@ from tableaudocumentapi import Workbook, Datasource, Connection, ConnectionParser +TEST_DIR = os.path.dirname(__file__) -TABLEAU_93_TWB = 'test/assets/TABLEAU_93_TWB.twb' +TABLEAU_93_TWB = os.path.join(TEST_DIR, 'assets', 'TABLEAU_93_TWB.twb') -TABLEAU_93_TDS = 'test/assets/TABLEAU_93_TDS.tds' +TABLEAU_93_TDS = os.path.join(TEST_DIR, 'assets', 'TABLEAU_93_TDS.tds') -TABLEAU_10_TDS = 'test/assets/TABLEAU_10_TDS.tds' +TABLEAU_10_TDS = os.path.join(TEST_DIR, 'assets', 'TABLEAU_10_TDS.tds') -TABLEAU_10_TWB = 'test/assets/TABLEAU_10_TWB.twb' +TABLEAU_10_TWB = os.path.join(TEST_DIR, 'assets', 'TABLEAU_10_TWB.twb') -TABLEAU_CONNECTION_XML = ET.parse('test/assets/CONNECTION.xml').getroot() +TABLEAU_CONNECTION_XML = ET.parse(os.path.join(TEST_DIR, 'assets', 'CONNECTION.xml')).getroot() -TABLEAU_10_TWBX = 'test/assets/TABLEAU_10_TWBX.twbx' +TABLEAU_10_TWBX = os.path.join(TEST_DIR, 'assets', 'TABLEAU_10_TWBX.twbx') class HelperMethodTests(unittest.TestCase): From 9a8f0d165a313996a05be31a9fcc8c4a4561d24f Mon Sep 17 00:00:00 2001 From: T8y8 Date: Wed, 29 Jun 2016 21:15:20 -0700 Subject: [PATCH 09/26] Initial TDSX support. Lots of duplicated code with TWBX's --- tableaudocumentapi/datasource.py | 83 +++++++++++++++++++++++++++++-- test/assets/TABLEAU_10_TDSX.tdsx | Bin 0 -> 1866 bytes test/bvt.py | 43 +++++++++++++++- 3 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 test/assets/TABLEAU_10_TDSX.tdsx diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index 617004a..3d0a419 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -3,10 +3,51 @@ # Datasource - A class for writing datasources to Tableau files # ############################################################################### +import contextlib +import os +import shutil +import tempfile +import zipfile + import xml.etree.ElementTree as ET from tableaudocumentapi import Connection +@contextlib.contextmanager +def temporary_directory(*args, **kwargs): + d = tempfile.mkdtemp(*args, **kwargs) + try: + yield d + finally: + shutil.rmtree(d) + + +def find_tds_in_zip(zip): + for filename in zip.namelist(): + if os.path.splitext(filename)[-1].lower() == '.tds': + return filename + + +def get_tds_xml_from_tdsx(filename): + with temporary_directory() as temp: + with zipfile.ZipFile(filename) as zf: + zf.extractall(temp) + tds_file = find_tds_in_zip(zf) + tds_xml = ET.parse(os.path.join(temp, tds_file)) + + return tds_xml + + +def build_tdsx_file(tdsx_contents, zip): + for root_dir, _, files in os.walk(tdsx_contents): + relative_dir = os.path.relpath(root_dir, tdsx_contents) + for f in files: + temp_file_full_path = os.path.join( + tdsx_contents, relative_dir, f) + zipname = os.path.join(relative_dir, f) + zip.write(temp_file_full_path, arcname=zipname) + + class ConnectionParser(object): def __init__(self, datasource_xml, version): @@ -56,9 +97,36 @@ def __init__(self, dsxml, filename=None): @classmethod def from_file(cls, filename): "Initialize datasource from file (.tds)" - dsxml = ET.parse(filename).getroot() + + if zipfile.is_zipfile(filename): + dsxml = get_tds_xml_from_tdsx(filename).getroot() + else: + dsxml = ET.parse(filename).getroot() return cls(dsxml, filename) + def _save_into_tdsx(self, filename=None): + # Save reuses existing filename, 'save as' takes a new one + if filename is None: + filename = self._filename + + # Saving a tdsx means extracting the contents into a temp folder, + # saving the changes over the tds in that folder, and then + # packaging it back up into a specifically formatted zip with the correct + # relative file paths + + # Extract to temp directory + with temporary_directory() as temp_path: + with zipfile.ZipFile(self._filename) as zf: + tds_file = find_tds_in_zip(zf) + zf.extractall(temp_path) + # Write the new version of the tds to the temp directory + self._datasourceTree.write(os.path.join( + temp_path, tds_file), encoding="utf-8", xml_declaration=True) + + # Write the new tdsx with the contents of the temp folder + with zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED) as new_tdsx: + build_tdsx_file(temp_path, new_tdsx) + def save(self): """ Call finalization code and save file. @@ -72,7 +140,12 @@ def save(self): """ # save the file - self._datasourceTree.write(self._filename, encoding="utf-8", xml_declaration=True) + + if zipfile.is_zipfile(self._filename): + self._save_into_tdsx(self._filename) + else: + self._datasourceTree.write( + self._filename, encoding="utf-8", xml_declaration=True) def save_as(self, new_filename): """ @@ -85,7 +158,11 @@ def save_as(self, new_filename): Nothing. """ - self._datasourceTree.write(new_filename, encoding="utf-8", xml_declaration=True) + if zipfile.is_zipfile(self._filename): + self._save_into_tdsx(new_filename) + else: + self._datasourceTree.write( + new_filename, encoding="utf-8", xml_declaration=True) ########### # name diff --git a/test/assets/TABLEAU_10_TDSX.tdsx b/test/assets/TABLEAU_10_TDSX.tdsx new file mode 100644 index 0000000000000000000000000000000000000000..f94b678fbdbf718b81ff4353b4014424cad97a55 GIT binary patch literal 1866 zcma)-cRbq(1IB+r)yP>f+R`)57MmE2J!;jcJB_18XsFd95=6|@sB$aco zo#Rkdi4~g?xuPhC_U+yK_xt(0&mYh8dH#5wf4}zD%q;u>E5HV@S1(yK*gsdwW(5Eq zV*ro`008yC`$32&`hmBSYA`lT3xiTo*YL(@X{jP%4q5|i zHV!8@Gb?Nf10GGchV)6uGFg=edWLbD4TZglPU@Vt-9$s=KFt2% zXmfX9U^V~1;wbaWm)dPBZi2b(3U@{oI>R(dq48#nzM(2{G|3{;AnJUJA$+OjoI6ep z4jdgwZ}8Qtd`CYL2>j|{7rADa+eZlUN+gL@31*@|$0}Zfx%j}6Lxfkn9NWC>Nky}#lRSoV^Z151-~x#+qr{k6vMOBSK0;FCd?d$)93*>fZuef} zTFxEIbO@2dj*yf_`^B|bRY)Ib3`|RXzr`TGpw81&ykuohtbQ+mEB7t)DR3a0I2{qY zruT3CwvM>rEP}>CZ)e#2G@-njr9oh9~E$niG^{$yW5U&YhMz>zJ8LoMSc1iS}GF z(vGzT@jK@BgP@ziJq`%V=1L5yQ6`NgPm zr@RJ-iAgDWvbpIjBz)T z_oHk_;d3jimOSS+_lBhVHl~~AkTqc*{YUO)-0`tS$fw>0m?iN^p3Y91G?=R?&}7AY zZ~%@d54a$qR{5yZX9RU26U?69`Q_z6H|gAv1c<{8_IoCQ586ud-wyqyrFadD~7MHL-(|Fw%qU*8!$4KHYmT|Trd z)*MMQ8QlWQV6|FpdKtxLl)Rrgf8UtnJ5>ryP=eMR71Re2+?r;6hBu`8+;|_tbp%+& z$4elYiyn}JcGecP8(j^;X;(8U=RE=vRL>Kk$nE?-ZSn94N30+=jNs+hoD#bTxLnV| zH3(;Afaa3}erT>rz96oMDj?!8YvfA%TkJVIidqy?XnkMZv%$-1iMI#W^FMLimvw)V zll|vRKjh$1$Y1c!CoaAI2o9~*X3VjXPC#Ks8+RIWOlMajB5?HFLN|sD2(H9A{(QMJO;|4OO7i;|{9!2>znk&>#@FXb zq02FkPkw`k<)u&fK@<9ZhK$~0asHDQL=4BiBB<;}p->u!<2JE~J4-U$JPU*8wNf+) zY4|k`sga5K@7Lg~%D)=SmmG?mu`?F40%A^%WIgY(7@u}^tlzL%nlq){SAx#LQ~P3} zxS1gmHc2QvIjjbIykb9mDpotU@O>LY1UEvO)I_;8&a&XlGT!e6PA+27yUio`&ktw( zH9`@5O3cOfJ?XQmP-}cHKYFo)O_S{AXQN(b;&}i+u`#ankiO&iLfyAK zrWY<0DH^v62L#|F28PjlaWwIJhIFwX+tyrrigrJ!ALv=pm;wC$RveRzFQy}Z#y#Kq zrI8FWOQia{ztJWBN^QDKEL5X$$!pMoNfkCBX>K0Oz!ey~MO@$d6ofDQ4(LP5_R3QF zGy}_x+m|McPYO^ryXlmZE_`4EXZ)F2W3~}R*ah4sAM?bh?YM)>Il4ER`uP_fgZHST9zug#6rJU&}y!*c5HlSeFH)-IBFQu$Zb-`HD& km;m5^)9oAJ0MI}7ul|45*;`-v#>juRAOP_#7yoVl0Xp7+R{#J2 literal 0 HcmV?d00001 diff --git a/test/bvt.py b/test/bvt.py index aa4a247..81b49b9 100644 --- a/test/bvt.py +++ b/test/bvt.py @@ -15,10 +15,13 @@ TABLEAU_10_TWB = os.path.join(TEST_DIR, 'assets', 'TABLEAU_10_TWB.twb') -TABLEAU_CONNECTION_XML = ET.parse(os.path.join(TEST_DIR, 'assets', 'CONNECTION.xml')).getroot() +TABLEAU_CONNECTION_XML = ET.parse(os.path.join( + TEST_DIR, 'assets', 'CONNECTION.xml')).getroot() TABLEAU_10_TWBX = os.path.join(TEST_DIR, 'assets', 'TABLEAU_10_TWBX.twbx') +TABLEAU_10_TDSX = os.path.join(TEST_DIR, 'assets', 'TABLEAU_10_TDSX.tdsx') + class HelperMethodTests(unittest.TestCase): @@ -144,6 +147,44 @@ def test_can_save_tds(self): self.assertEqual(new_tds.connections[0].dbname, 'newdb.test.tsi.lan') +class DatasourceModelV10TDSXTests(unittest.TestCase): + + def setUp(self): + with open(TABLEAU_10_TDSX, 'rb') as in_file, open('test.tdsx', 'wb') as out_file: + out_file.write(in_file.read()) + self.tdsx_file = out_file + + def tearDown(self): + self.tdsx_file.close() + os.unlink(self.tdsx_file.name) + + def test_can_open_tdsx(self): + ds = Datasource.from_file(self.tdsx_file.name) + self.assertTrue(ds.connections) + self.assertTrue(ds.name) + + def test_can_open_tdsx_and_save_changes(self): + original_tdsx = Datasource.from_file(self.tdsx_file.name) + original_tdsx.connections[0].server = 'newdb.test.tsi.lan' + original_tdsx.save() + + new_tdsx = Datasource.from_file(self.tdsx_file.name) + self.assertEqual(new_tdsx.connections[ + 0].server, 'newdb.test.tsi.lan') + + def test_can_open_tdsx_and_save_as_changes(self): + new_tdsx_filename = self.tdsx_file.name + "_TEST_SAVE_AS" + original_wb = Datasource.from_file(self.tdsx_file.name) + original_wb.connections[0].server = 'newdb.test.tsi.lan' + original_wb.save_as(new_tdsx_filename) + + new_wb = Datasource.from_file(new_tdsx_filename) + self.assertEqual(new_wb.connections[ + 0].server, 'newdb.test.tsi.lan') + + os.unlink(new_tdsx_filename) + + class WorkbookModelTests(unittest.TestCase): def setUp(self): From bd6d3c9b04c489f99fc7ee8e98c6304edbce836b Mon Sep 17 00:00:00 2001 From: T8y8 Date: Wed, 29 Jun 2016 22:18:42 -0700 Subject: [PATCH 10/26] Refactor all the archive manipulation logic into one module --- tableaudocumentapi/archivefile.py | 67 ++++++++++++++++++++++++++++ tableaudocumentapi/datasource.py | 70 +++--------------------------- tableaudocumentapi/workbook.py | 72 +++---------------------------- test/bvt.py | 5 +-- 4 files changed, 81 insertions(+), 133 deletions(-) create mode 100644 tableaudocumentapi/archivefile.py diff --git a/tableaudocumentapi/archivefile.py b/tableaudocumentapi/archivefile.py new file mode 100644 index 0000000..5b28a15 --- /dev/null +++ b/tableaudocumentapi/archivefile.py @@ -0,0 +1,67 @@ +import contextlib +import os +import shutil +import tempfile +import zipfile + +import xml.etree.ElementTree as ET + + +@contextlib.contextmanager +def temporary_directory(*args, **kwargs): + d = tempfile.mkdtemp(*args, **kwargs) + try: + yield d + finally: + shutil.rmtree(d) + + +def find_file_in_zip(zip, ext): + for filename in zip.namelist(): + if os.path.splitext(filename)[-1].lower() == ext[:-1]: + return filename + + +def get_xml_from_archive(filename): + file_type = os.path.splitext(filename)[-1].lower() + with temporary_directory() as temp: + with zipfile.ZipFile(filename) as zf: + zf.extractall(temp) + xml_file = find_file_in_zip(zf, file_type) + xml_tree = ET.parse(os.path.join(temp, xml_file)) + + return xml_tree + + +def build_archive_file(archive_contents, zip): + for root_dir, _, files in os.walk(archive_contents): + relative_dir = os.path.relpath(root_dir, archive_contents) + for f in files: + temp_file_full_path = os.path.join( + archive_contents, relative_dir, f) + zipname = os.path.join(relative_dir, f) + zip.write(temp_file_full_path, arcname=zipname) + + +def save_into_archive(xml_tree, filename, new_filename=None): + # Saving a archive means extracting the contents into a temp folder, + # saving the changes over the twb in that folder, and then + # packaging it back up into a specifically formatted zip with the correct + # relative file paths + + if new_filename is None: + new_filename = filename + + # Extract to temp directory + with temporary_directory() as temp_path: + file_type = os.path.splitext(filename)[-1].lower() + with zipfile.ZipFile(filename) as zf: + twb_file = find_file_in_zip(zf, file_type) + zf.extractall(temp_path) + # Write the new version of the twb to the temp directory + xml_tree.write(os.path.join( + temp_path, twb_file), encoding="utf-8", xml_declaration=True) + + # Write the new archive with the contents of the temp folder + with zipfile.ZipFile(new_filename, "w", compression=zipfile.ZIP_DEFLATED) as new_archive: + build_archive_file(temp_path, new_archive) diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index 3d0a419..3d23412 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -3,49 +3,11 @@ # Datasource - A class for writing datasources to Tableau files # ############################################################################### -import contextlib import os -import shutil -import tempfile import zipfile import xml.etree.ElementTree as ET -from tableaudocumentapi import Connection - - -@contextlib.contextmanager -def temporary_directory(*args, **kwargs): - d = tempfile.mkdtemp(*args, **kwargs) - try: - yield d - finally: - shutil.rmtree(d) - - -def find_tds_in_zip(zip): - for filename in zip.namelist(): - if os.path.splitext(filename)[-1].lower() == '.tds': - return filename - - -def get_tds_xml_from_tdsx(filename): - with temporary_directory() as temp: - with zipfile.ZipFile(filename) as zf: - zf.extractall(temp) - tds_file = find_tds_in_zip(zf) - tds_xml = ET.parse(os.path.join(temp, tds_file)) - - return tds_xml - - -def build_tdsx_file(tdsx_contents, zip): - for root_dir, _, files in os.walk(tdsx_contents): - relative_dir = os.path.relpath(root_dir, tdsx_contents) - for f in files: - temp_file_full_path = os.path.join( - tdsx_contents, relative_dir, f) - zipname = os.path.join(relative_dir, f) - zip.write(temp_file_full_path, arcname=zipname) +from tableaudocumentapi import Connection, archivefile class ConnectionParser(object): @@ -99,34 +61,11 @@ def from_file(cls, filename): "Initialize datasource from file (.tds)" if zipfile.is_zipfile(filename): - dsxml = get_tds_xml_from_tdsx(filename).getroot() + dsxml = archivefile.get_xml_from_archive(filename).getroot() else: dsxml = ET.parse(filename).getroot() return cls(dsxml, filename) - def _save_into_tdsx(self, filename=None): - # Save reuses existing filename, 'save as' takes a new one - if filename is None: - filename = self._filename - - # Saving a tdsx means extracting the contents into a temp folder, - # saving the changes over the tds in that folder, and then - # packaging it back up into a specifically formatted zip with the correct - # relative file paths - - # Extract to temp directory - with temporary_directory() as temp_path: - with zipfile.ZipFile(self._filename) as zf: - tds_file = find_tds_in_zip(zf) - zf.extractall(temp_path) - # Write the new version of the tds to the temp directory - self._datasourceTree.write(os.path.join( - temp_path, tds_file), encoding="utf-8", xml_declaration=True) - - # Write the new tdsx with the contents of the temp folder - with zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED) as new_tdsx: - build_tdsx_file(temp_path, new_tdsx) - def save(self): """ Call finalization code and save file. @@ -142,7 +81,7 @@ def save(self): # save the file if zipfile.is_zipfile(self._filename): - self._save_into_tdsx(self._filename) + archivefile.save_into_archive(self._datasourceTree, self._filename) else: self._datasourceTree.write( self._filename, encoding="utf-8", xml_declaration=True) @@ -159,7 +98,8 @@ def save_as(self, new_filename): """ if zipfile.is_zipfile(self._filename): - self._save_into_tdsx(new_filename) + archivefile.save_into_archive( + self._datasourceTree, self._filename, new_filename) else: self._datasourceTree.write( new_filename, encoding="utf-8", xml_declaration=True) diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index 0da1827..fb2c824 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -3,15 +3,12 @@ # Workbook - A class for writing Tableau workbook files # ############################################################################### -import contextlib import os -import shutil -import tempfile import zipfile import xml.etree.ElementTree as ET -from tableaudocumentapi import Datasource +from tableaudocumentapi import Datasource, archivefile ########################################################################### # @@ -20,41 +17,6 @@ ########################################################################### -@contextlib.contextmanager -def temporary_directory(*args, **kwargs): - d = tempfile.mkdtemp(*args, **kwargs) - try: - yield d - finally: - shutil.rmtree(d) - - -def find_twb_in_zip(zip): - for filename in zip.namelist(): - if os.path.splitext(filename)[-1].lower() == '.twb': - return filename - - -def get_twb_xml_from_twbx(filename): - with temporary_directory() as temp: - with zipfile.ZipFile(filename) as zf: - zf.extractall(temp) - twb_file = find_twb_in_zip(zf) - twb_xml = ET.parse(os.path.join(temp, twb_file)) - - return twb_xml - - -def build_twbx_file(twbx_contents, zip): - for root_dir, _, files in os.walk(twbx_contents): - relative_dir = os.path.relpath(root_dir, twbx_contents) - for f in files: - temp_file_full_path = os.path.join( - twbx_contents, relative_dir, f) - zipname = os.path.join(relative_dir, f) - zip.write(temp_file_full_path, arcname=zipname) - - class Workbook(object): """ A class for writing Tableau workbook files. @@ -75,7 +37,8 @@ def __init__(self, filename): # Determine if this is a twb or twbx and get the xml root if zipfile.is_zipfile(self._filename): - self._workbookTree = get_twb_xml_from_twbx(self._filename) + self._workbookTree = archivefile.get_xml_from_archive( + self._filename) else: self._workbookTree = ET.parse(self._filename) @@ -113,7 +76,8 @@ def save(self): # save the file if zipfile.is_zipfile(self._filename): - self._save_into_twbx(self._filename) + archivefile.save_into_archive( + self._workbookTree, filename=self._filename) else: self._workbookTree.write( self._filename, encoding="utf-8", xml_declaration=True) @@ -131,7 +95,8 @@ def save_as(self, new_filename): """ if zipfile.is_zipfile(self._filename): - self._save_into_twbx(new_filename) + archivefile.save_into_archive( + self._workbookTree, self._filename, new_filename) else: self._workbookTree.write( new_filename, encoding="utf-8", xml_declaration=True) @@ -151,29 +116,6 @@ def _prepare_datasources(self, xmlRoot): return datasources - def _save_into_twbx(self, filename=None): - # Save reuses existing filename, 'save as' takes a new one - if filename is None: - filename = self._filename - - # Saving a twbx means extracting the contents into a temp folder, - # saving the changes over the twb in that folder, and then - # packaging it back up into a specifically formatted zip with the correct - # relative file paths - - # Extract to temp directory - with temporary_directory() as temp_path: - with zipfile.ZipFile(self._filename) as zf: - twb_file = find_twb_in_zip(zf) - zf.extractall(temp_path) - # Write the new version of the twb to the temp directory - self._workbookTree.write(os.path.join( - temp_path, twb_file), encoding="utf-8", xml_declaration=True) - - # Write the new twbx with the contents of the temp folder - with zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED) as new_twbx: - build_twbx_file(temp_path, new_twbx) - @staticmethod def _is_valid_file(filename): fileExtension = os.path.splitext(filename)[-1].lower() diff --git a/test/bvt.py b/test/bvt.py index 81b49b9..49393b3 100644 --- a/test/bvt.py +++ b/test/bvt.py @@ -173,7 +173,7 @@ def test_can_open_tdsx_and_save_changes(self): 0].server, 'newdb.test.tsi.lan') def test_can_open_tdsx_and_save_as_changes(self): - new_tdsx_filename = self.tdsx_file.name + "_TEST_SAVE_AS" + new_tdsx_filename = 'newtdsx.tdsx' original_wb = Datasource.from_file(self.tdsx_file.name) original_wb.connections[0].server = 'newdb.test.tsi.lan' original_wb.save_as(new_tdsx_filename) @@ -181,7 +181,6 @@ def test_can_open_tdsx_and_save_as_changes(self): new_wb = Datasource.from_file(new_tdsx_filename) self.assertEqual(new_wb.connections[ 0].server, 'newdb.test.tsi.lan') - os.unlink(new_tdsx_filename) @@ -281,7 +280,7 @@ def test_can_open_twbx_and_save_changes(self): 0].server, 'newdb.test.tsi.lan') def test_can_open_twbx_and_save_as_changes(self): - new_twbx_filename = self.workbook_file.name + "_TEST_SAVE_AS" + new_twbx_filename = 'newtwbx.twbx' original_wb = Workbook(self.workbook_file.name) original_wb.datasources[0].connections[0].server = 'newdb.test.tsi.lan' original_wb.save_as(new_twbx_filename) From 8385b5c92065f97c23fc882f719c86b0875e4956 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Thu, 30 Jun 2016 09:33:30 -0700 Subject: [PATCH 11/26] Move save logic into helper function. Remove unused tests and methods from Workbook class --- .../{archivefile.py => containerfile.py} | 18 ++++++++++--- tableaudocumentapi/datasource.py | 17 +++--------- tableaudocumentapi/workbook.py | 26 ++++--------------- test/bvt.py | 13 ---------- 4 files changed, 23 insertions(+), 51 deletions(-) rename tableaudocumentapi/{archivefile.py => containerfile.py} (79%) diff --git a/tableaudocumentapi/archivefile.py b/tableaudocumentapi/containerfile.py similarity index 79% rename from tableaudocumentapi/archivefile.py rename to tableaudocumentapi/containerfile.py index 5b28a15..a4a7930 100644 --- a/tableaudocumentapi/archivefile.py +++ b/tableaudocumentapi/containerfile.py @@ -45,7 +45,7 @@ def build_archive_file(archive_contents, zip): def save_into_archive(xml_tree, filename, new_filename=None): # Saving a archive means extracting the contents into a temp folder, - # saving the changes over the twb in that folder, and then + # saving the changes over the twb/tds in that folder, and then # packaging it back up into a specifically formatted zip with the correct # relative file paths @@ -56,12 +56,22 @@ def save_into_archive(xml_tree, filename, new_filename=None): with temporary_directory() as temp_path: file_type = os.path.splitext(filename)[-1].lower() with zipfile.ZipFile(filename) as zf: - twb_file = find_file_in_zip(zf, file_type) + xml_file = find_file_in_zip(zf, file_type) zf.extractall(temp_path) - # Write the new version of the twb to the temp directory + # Write the new version of the file to the temp directory xml_tree.write(os.path.join( - temp_path, twb_file), encoding="utf-8", xml_declaration=True) + temp_path, xml_file), encoding="utf-8", xml_declaration=True) # Write the new archive with the contents of the temp folder with zipfile.ZipFile(new_filename, "w", compression=zipfile.ZIP_DEFLATED) as new_archive: build_archive_file(temp_path, new_archive) + + +def _save_file(container_file, xml_tree, new_filename=None): + if zipfile.is_zipfile(container_file): + save_into_archive(xml_tree, container_file, new_filename) + else: + xml_tree.write(container_file, encoding="utf-8", xml_declaration=True) + + + diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index 3d23412..33c0c4f 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -7,7 +7,7 @@ import zipfile import xml.etree.ElementTree as ET -from tableaudocumentapi import Connection, archivefile +from tableaudocumentapi import Connection, containerfile class ConnectionParser(object): @@ -61,7 +61,7 @@ def from_file(cls, filename): "Initialize datasource from file (.tds)" if zipfile.is_zipfile(filename): - dsxml = archivefile.get_xml_from_archive(filename).getroot() + dsxml = containerfile.get_xml_from_archive(filename).getroot() else: dsxml = ET.parse(filename).getroot() return cls(dsxml, filename) @@ -80,11 +80,7 @@ def save(self): # save the file - if zipfile.is_zipfile(self._filename): - archivefile.save_into_archive(self._datasourceTree, self._filename) - else: - self._datasourceTree.write( - self._filename, encoding="utf-8", xml_declaration=True) + containerfile._save_file(self._filename, self._datasourceTree) def save_as(self, new_filename): """ @@ -97,12 +93,7 @@ def save_as(self, new_filename): Nothing. """ - if zipfile.is_zipfile(self._filename): - archivefile.save_into_archive( - self._datasourceTree, self._filename, new_filename) - else: - self._datasourceTree.write( - new_filename, encoding="utf-8", xml_declaration=True) + containerfile._save_file(self._filename, self._datasourceTree, new_filename) ########### # name diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index fb2c824..ecfc13d 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -8,7 +8,7 @@ import xml.etree.ElementTree as ET -from tableaudocumentapi import Datasource, archivefile +from tableaudocumentapi import Datasource, containerfile ########################################################################### # @@ -37,7 +37,7 @@ def __init__(self, filename): # Determine if this is a twb or twbx and get the xml root if zipfile.is_zipfile(self._filename): - self._workbookTree = archivefile.get_xml_from_archive( + self._workbookTree = containerfile.get_xml_from_archive( self._filename) else: self._workbookTree = ET.parse(self._filename) @@ -74,13 +74,7 @@ def save(self): """ # save the file - - if zipfile.is_zipfile(self._filename): - archivefile.save_into_archive( - self._workbookTree, filename=self._filename) - else: - self._workbookTree.write( - self._filename, encoding="utf-8", xml_declaration=True) + containerfile._save_file(self._filename, self._workbookTree) def save_as(self, new_filename): """ @@ -93,13 +87,8 @@ def save_as(self, new_filename): Nothing. """ - - if zipfile.is_zipfile(self._filename): - archivefile.save_into_archive( - self._workbookTree, self._filename, new_filename) - else: - self._workbookTree.write( - new_filename, encoding="utf-8", xml_declaration=True) + containerfile._save_file( + self._filename, self._workbookTree, new_filename) ########################################################################### # @@ -115,8 +104,3 @@ def _prepare_datasources(self, xmlRoot): datasources.append(ds) return datasources - - @staticmethod - def _is_valid_file(filename): - fileExtension = os.path.splitext(filename)[-1].lower() - return fileExtension in ('.twb', '.tds') diff --git a/test/bvt.py b/test/bvt.py index 49393b3..1dedd57 100644 --- a/test/bvt.py +++ b/test/bvt.py @@ -23,19 +23,6 @@ TABLEAU_10_TDSX = os.path.join(TEST_DIR, 'assets', 'TABLEAU_10_TDSX.tdsx') -class HelperMethodTests(unittest.TestCase): - - def test_is_valid_file_with_valid_inputs(self): - self.assertTrue(Workbook._is_valid_file('file1.tds')) - self.assertTrue(Workbook._is_valid_file('file2.twb')) - self.assertTrue(Workbook._is_valid_file('tds.twb')) - - def test_is_valid_file_with_invalid_inputs(self): - self.assertFalse(Workbook._is_valid_file('')) - self.assertFalse(Workbook._is_valid_file('file1.tds2')) - self.assertFalse(Workbook._is_valid_file('file2.twb3')) - - class ConnectionParserTests(unittest.TestCase): def test_can_extract_legacy_connection(self): From cbcda8f0baa691ff0b7c1e0533df85f186cd3841 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Thu, 30 Jun 2016 11:33:54 -0700 Subject: [PATCH 12/26] Now find the file by checking if it's a workbook or datasource, and open the file directly from the zip instead of extracting multiple times --- tableaudocumentapi/containerfile.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tableaudocumentapi/containerfile.py b/tableaudocumentapi/containerfile.py index a4a7930..30b389f 100644 --- a/tableaudocumentapi/containerfile.py +++ b/tableaudocumentapi/containerfile.py @@ -16,19 +16,21 @@ def temporary_directory(*args, **kwargs): shutil.rmtree(d) -def find_file_in_zip(zip, ext): +def find_file_in_zip(zip): for filename in zip.namelist(): - if os.path.splitext(filename)[-1].lower() == ext[:-1]: + try: + ET.parse(zip.open(filename)).getroot().tag in ( + 'workbook', 'datasource') return filename + except ET.ParseError: + # That's not an XML file by gosh + pass def get_xml_from_archive(filename): - file_type = os.path.splitext(filename)[-1].lower() - with temporary_directory() as temp: - with zipfile.ZipFile(filename) as zf: - zf.extractall(temp) - xml_file = find_file_in_zip(zf, file_type) - xml_tree = ET.parse(os.path.join(temp, xml_file)) + with zipfile.ZipFile(filename) as zf: + xml_file = zf.open(find_file_in_zip(zf)) + xml_tree = ET.parse(xml_file) return xml_tree @@ -54,9 +56,8 @@ def save_into_archive(xml_tree, filename, new_filename=None): # Extract to temp directory with temporary_directory() as temp_path: - file_type = os.path.splitext(filename)[-1].lower() with zipfile.ZipFile(filename) as zf: - xml_file = find_file_in_zip(zf, file_type) + xml_file = find_file_in_zip(zf) zf.extractall(temp_path) # Write the new version of the file to the temp directory xml_tree.write(os.path.join( @@ -72,6 +73,3 @@ def _save_file(container_file, xml_tree, new_filename=None): save_into_archive(xml_tree, container_file, new_filename) else: xml_tree.write(container_file, encoding="utf-8", xml_declaration=True) - - - From 813e07921912f10b19ea7c822d8cea7c3dba9c88 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Thu, 30 Jun 2016 12:12:21 -0700 Subject: [PATCH 13/26] ZipFile.open should be a context manager --- tableaudocumentapi/containerfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tableaudocumentapi/containerfile.py b/tableaudocumentapi/containerfile.py index 30b389f..87cef69 100644 --- a/tableaudocumentapi/containerfile.py +++ b/tableaudocumentapi/containerfile.py @@ -29,8 +29,8 @@ def find_file_in_zip(zip): def get_xml_from_archive(filename): with zipfile.ZipFile(filename) as zf: - xml_file = zf.open(find_file_in_zip(zf)) - xml_tree = ET.parse(xml_file) + with zf.open(find_file_in_zip(zf)) as xml_file: + xml_tree = ET.parse(xml_file) return xml_tree From be4c9c8ecc5d1047b1908dbb288d6d68a0a361d2 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Thu, 30 Jun 2016 12:14:44 -0700 Subject: [PATCH 14/26] Missed a spot with the context manager --- tableaudocumentapi/containerfile.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tableaudocumentapi/containerfile.py b/tableaudocumentapi/containerfile.py index 87cef69..13e08c7 100644 --- a/tableaudocumentapi/containerfile.py +++ b/tableaudocumentapi/containerfile.py @@ -19,9 +19,10 @@ def temporary_directory(*args, **kwargs): def find_file_in_zip(zip): for filename in zip.namelist(): try: - ET.parse(zip.open(filename)).getroot().tag in ( - 'workbook', 'datasource') - return filename + with zip.open(filename) as xml_candidate: + ET.parse(xml_candidate).getroot().tag in ( + 'workbook', 'datasource') + return filename except ET.ParseError: # That's not an XML file by gosh pass From a885f159e239c29fadb5071ad570ee3d31c3cc14 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Thu, 30 Jun 2016 14:02:57 -0700 Subject: [PATCH 15/26] The truth is out there... --- tableaudocumentapi/datasource.py | 8 ++++---- tableaudocumentapi/workbook.py | 8 ++++---- tableaudocumentapi/{containerfile.py => xfile.py} | 0 3 files changed, 8 insertions(+), 8 deletions(-) rename tableaudocumentapi/{containerfile.py => xfile.py} (100%) diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index 33c0c4f..b4fb8ed 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -7,7 +7,7 @@ import zipfile import xml.etree.ElementTree as ET -from tableaudocumentapi import Connection, containerfile +from tableaudocumentapi import Connection, xfile class ConnectionParser(object): @@ -61,7 +61,7 @@ def from_file(cls, filename): "Initialize datasource from file (.tds)" if zipfile.is_zipfile(filename): - dsxml = containerfile.get_xml_from_archive(filename).getroot() + dsxml = xfile.get_xml_from_archive(filename).getroot() else: dsxml = ET.parse(filename).getroot() return cls(dsxml, filename) @@ -80,7 +80,7 @@ def save(self): # save the file - containerfile._save_file(self._filename, self._datasourceTree) + xfile._save_file(self._filename, self._datasourceTree) def save_as(self, new_filename): """ @@ -93,7 +93,7 @@ def save_as(self, new_filename): Nothing. """ - containerfile._save_file(self._filename, self._datasourceTree, new_filename) + xfile._save_file(self._filename, self._datasourceTree, new_filename) ########### # name diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index ecfc13d..9e29973 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -8,7 +8,7 @@ import xml.etree.ElementTree as ET -from tableaudocumentapi import Datasource, containerfile +from tableaudocumentapi import Datasource, xfile ########################################################################### # @@ -37,7 +37,7 @@ def __init__(self, filename): # Determine if this is a twb or twbx and get the xml root if zipfile.is_zipfile(self._filename): - self._workbookTree = containerfile.get_xml_from_archive( + self._workbookTree = xfile.get_xml_from_archive( self._filename) else: self._workbookTree = ET.parse(self._filename) @@ -74,7 +74,7 @@ def save(self): """ # save the file - containerfile._save_file(self._filename, self._workbookTree) + xfile._save_file(self._filename, self._workbookTree) def save_as(self, new_filename): """ @@ -87,7 +87,7 @@ def save_as(self, new_filename): Nothing. """ - containerfile._save_file( + xfile._save_file( self._filename, self._workbookTree, new_filename) ########################################################################### diff --git a/tableaudocumentapi/containerfile.py b/tableaudocumentapi/xfile.py similarity index 100% rename from tableaudocumentapi/containerfile.py rename to tableaudocumentapi/xfile.py From 481f38c1eb10777b39109c3e009f9fa070d1cf8d Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Fri, 1 Jul 2016 16:35:13 -0700 Subject: [PATCH 16/26] Initial attempt at enabling reading the columns from the datasource (#45) Fixes #42 #46 * Initial attempt at enabling reading the columns from the datasource * Fixing pep8 errors for EOFEOL * Changing to OrderedDict for getting columns * Add documentation for the various column attributes * rename column to field * Fixed #46 encode apostrophes in field names * Enable multilook up for Fields * Rename properties on the field based on feedback given in #45 --- setup.py | 2 +- tableaudocumentapi/__init__.py | 2 + tableaudocumentapi/datasource.py | 35 +++++++++- tableaudocumentapi/field.py | 89 ++++++++++++++++++++++++++ tableaudocumentapi/multilookup_dict.py | 49 ++++++++++++++ test/__init__.py | 2 + test/assets/datasource_test.tds | 86 +++++++++++++++++++++++++ test/test_datasource.py | 29 +++++++++ test/test_multidict.py | 47 ++++++++++++++ 9 files changed, 337 insertions(+), 4 deletions(-) create mode 100644 tableaudocumentapi/field.py create mode 100644 tableaudocumentapi/multilookup_dict.py create mode 100644 test/assets/datasource_test.tds create mode 100644 test/test_datasource.py create mode 100644 test/test_multidict.py diff --git a/setup.py b/setup.py index 5ef3e85..96d8659 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tableaudocumentapi', - version='0.1.0-dev', + version='0.1.0.dev0', author='Tableau Software', author_email='github@tableau.com', url='https://github.com/tableau/document-api-python', diff --git a/tableaudocumentapi/__init__.py b/tableaudocumentapi/__init__.py index 2ee7df1..6a10f6f 100644 --- a/tableaudocumentapi/__init__.py +++ b/tableaudocumentapi/__init__.py @@ -1,5 +1,7 @@ +from .field import Field from .connection import Connection from .datasource import Datasource, ConnectionParser from .workbook import Workbook + __version__ = '0.0.1' __VERSION__ = __version__ diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index b4fb8ed..924575d 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -3,15 +3,29 @@ # Datasource - A class for writing datasources to Tableau files # ############################################################################### -import os +import collections +import xml.etree.ElementTree as ET +import xml.sax.saxutils as sax import zipfile -import xml.etree.ElementTree as ET from tableaudocumentapi import Connection, xfile +from tableaudocumentapi import Field +from tableaudocumentapi.multilookup_dict import MultiLookupDict -class ConnectionParser(object): +def _mapping_from_xml(root_xml, column_xml): + retval = Field.from_xml(column_xml) + local_name = retval.id + if "'" in local_name: + local_name = sax.escape(local_name, {"'": "'"}) + xpath = ".//metadata-record[@class='column'][local-name='{}']".format(local_name) + metadata_record = root_xml.find(xpath) + if metadata_record is not None: + retval.apply_metadata(metadata_record) + return retval.id, retval + +class ConnectionParser(object): def __init__(self, datasource_xml, version): self._dsxml = datasource_xml self._dsversion = version @@ -55,6 +69,7 @@ def __init__(self, dsxml, filename=None): self._connection_parser = ConnectionParser( self._datasourceXML, version=self._version) self._connections = self._connection_parser.get_connections() + self._fields = None @classmethod def from_file(cls, filename): @@ -115,3 +130,17 @@ def version(self): @property def connections(self): return self._connections + + ########### + # fields + ########### + @property + def fields(self): + if not self._fields: + self._fields = self._get_all_fields() + return self._fields + + def _get_all_fields(self): + column_objects = (_mapping_from_xml(self._datasourceTree, xml) + for xml in self._datasourceTree.findall('.//column')) + return MultiLookupDict({k: v for k, v in column_objects}) diff --git a/tableaudocumentapi/field.py b/tableaudocumentapi/field.py new file mode 100644 index 0000000..1eb68ef --- /dev/null +++ b/tableaudocumentapi/field.py @@ -0,0 +1,89 @@ +import functools + +_ATTRIBUTES = [ + 'id', # Name of the field as specified in the file, usually surrounded by [ ] + 'caption', # Name of the field as displayed in Tableau unless an aliases is defined + 'datatype', # Type of the field within Tableau (string, integer, etc) + 'role', # Dimension or Measure + 'type', # three possible values: quantitative, ordinal, or nominal + 'alias', # Name of the field as displayed in Tableau if the default name isn't wanted + 'calculation', # If this field is a calculated field, this will be the formula +] + +_METADATA_ATTRIBUTES = [ + 'aggregation', # The type of aggregation on the field (e.g Sum, Avg) +] + + +def _find_metadata_record(record, attrib): + element = record.find('.//{}'.format(attrib)) + if element is None: + return None + return element.text + + +class Field(object): + """ Represents a field in a datasource """ + + def __init__(self, xmldata): + for attrib in _ATTRIBUTES: + self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None)) + + # All metadata attributes begin at None + for attrib in _METADATA_ATTRIBUTES: + setattr(self, '_{}'.format(attrib), None) + + def apply_metadata(self, metadata_record): + for attrib in _METADATA_ATTRIBUTES: + self._apply_attribute(metadata_record, attrib, functools.partial(_find_metadata_record, metadata_record)) + + @classmethod + def from_xml(cls, xmldata): + return cls(xmldata) + + def __getattr__(self, item): + private_name = '_{}'.format(item) + if item in _ATTRIBUTES or item in _METADATA_ATTRIBUTES: + return getattr(self, private_name) + raise AttributeError(item) + + def _apply_attribute(self, xmldata, attrib, default_func): + if hasattr(self, '_read_{}'.format(attrib)): + value = getattr(self, '_read_{}'.format(attrib))(xmldata) + else: + value = default_func(attrib) + + setattr(self, '_{}'.format(attrib), value) + + @property + def name(self): + """ Provides a nice name for the field which is derived from the alias, caption, or the id. + + The name resolves as either the alias if it's defined, or the caption if alias is not defined, + and finally the id which is the underlying name if neither of the fields exist. """ + alias = getattr(self, 'alias', None) + if alias: + return alias + + caption = getattr(self, 'caption', None) + if caption: + return caption + + return self.id + + ###################################### + # Special Case handling methods for reading the values from the XML + ###################################### + @staticmethod + def _read_id(xmldata): + # ID is actually the name of the field, but to provide a nice name, we call this ID + return xmldata.attrib.get('name', None) + + @staticmethod + def _read_calculation(xmldata): + # The formula for a calculation is stored in a child element, so we need to pull it out separately. + calc = xmldata.find('.//calculation') + if calc is None: + return None + + return calc.attrib.get('formula', None) diff --git a/tableaudocumentapi/multilookup_dict.py b/tableaudocumentapi/multilookup_dict.py new file mode 100644 index 0000000..39c92c6 --- /dev/null +++ b/tableaudocumentapi/multilookup_dict.py @@ -0,0 +1,49 @@ +def _resolve_value(key, value): + try: + retval = value.get(key, None) + if retval is None: + retval = value.getattr(key, None) + except AttributeError: + retval = None + return retval + + +def _build_index(key, d): + return {_resolve_value(key, v): k + for k, v in d.items() + if _resolve_value(key, v) is not None} + + +# TODO: Improve this to be more generic +class MultiLookupDict(dict): + def __init__(self, args=None): + if args is None: + args = {} + super(MultiLookupDict, self).__init__(args) + self._indexes = { + 'alias': {}, + 'caption': {} + } + self._populate_indexes() + + def _populate_indexes(self): + self._indexes['alias'] = _build_index('alias', self) + self._indexes['caption'] = _build_index('caption', self) + + def __setitem__(self, key, value): + alias = _resolve_value('alias', value) + caption = _resolve_value('caption', value) + if alias is not None: + self._indexes['alias'][alias] = key + if caption is not None: + self._indexes['caption'][caption] = key + + dict.__setitem__(self, key, value) + + def __getitem__(self, key): + if key in self._indexes['alias']: + key = self._indexes['alias'][key] + elif key in self._indexes['caption']: + key = self._indexes['caption'][key] + + return dict.__getitem__(self, key) diff --git a/test/__init__.py b/test/__init__.py index e69de29..c715da8 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -0,0 +1,2 @@ +from . import bvt +from . import test_datasource diff --git a/test/assets/datasource_test.tds b/test/assets/datasource_test.tds new file mode 100644 index 0000000..227e006 --- /dev/null +++ b/test/assets/datasource_test.tds @@ -0,0 +1,86 @@ + + + + + + + a + 130 + [a] + [xy] + a + 1 + string + Count + 255 + true + + "SQL_WVARCHAR" + "SQL_C_WCHAR" + "true" + + + + Today's Date + 130 + [Today's Date] + [xy] + a + 1 + string + Count + 255 + true + + "SQL_WVARCHAR" + "SQL_C_WCHAR" + "true" + + + + x + 3 + [x] + [xy] + x + 2 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + y + 3 + [y] + [xy] + y + 3 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + + + + + + + + + + + + + + diff --git a/test/test_datasource.py b/test/test_datasource.py new file mode 100644 index 0000000..da956ee --- /dev/null +++ b/test/test_datasource.py @@ -0,0 +1,29 @@ +import unittest +import os.path +import functools + +from tableaudocumentapi import Datasource + +TEST_TDS_FILE = os.path.join( + os.path.dirname(__file__), + 'assets', + 'datasource_test.tds' +) + + +class DataSourceFields(unittest.TestCase): + def setUp(self): + self.ds = Datasource.from_file(TEST_TDS_FILE) + + def test_datasource_returns_correct_fields(self): + self.assertIsNotNone(self.ds.fields) + self.assertIsNotNone(self.ds.fields.get('[Number of Records]', None)) + + def test_datasource_returns_calculation_from_fields(self): + self.assertEqual('1', self.ds.fields['[Number of Records]'].calculation) + + def test_datasource_uses_metadata_record(self): + self.assertEqual('Sum', self.ds.fields['[x]'].aggregation) + + def test_datasource_column_name_contains_apostrophy(self): + self.assertIsNotNone(self.ds.fields.get("[Today's Date]", None)) diff --git a/test/test_multidict.py b/test/test_multidict.py new file mode 100644 index 0000000..abb01c5 --- /dev/null +++ b/test/test_multidict.py @@ -0,0 +1,47 @@ +import unittest +import os.path +import functools + +from tableaudocumentapi.multilookup_dict import MultiLookupDict + + +class MLDTests(unittest.TestCase): + def setUp(self): + self.mld = MultiLookupDict({ + '[foo]': { + 'alias': 'bar', + 'caption': 'baz', + 'value': 1 + }, + '[bar]': { + 'caption': 'foo', + 'value': 2 + }, + '[baz]': { + 'value': 3 + } + }) + + def test_multilookupdict_name_only(self): + actual = self.mld['[baz]'] + self.assertEqual(3, actual['value']) + + def test_multilookupdict_alias_overrides_everything(self): + actual = self.mld['bar'] + self.assertEqual(1, actual['value']) + + def test_mutlilookupdict_caption_overrides_id(self): + actual = self.mld['foo'] + self.assertEqual(2, actual['value']) + + def test_mutlilookupdict_can_still_find_id_even_with_alias(self): + actual = self.mld['[foo]'] + self.assertEqual(1, actual['value']) + + def test_mutlilookupdict_can_still_find_caption_even_with_alias(self): + actual = self.mld['baz'] + self.assertEqual(1, actual['value']) + + def test_mutlilookupdict_can_still_find_id_even_with_caption(self): + actual = self.mld['[bar]'] + self.assertEqual(2, actual['value']) From d9f90f3b31c56cc282a2fbd395c5d0914aec74fe Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Mon, 11 Jul 2016 13:09:33 -0700 Subject: [PATCH 17/26] Adding contributing.md first draft (#48) * Adding contributing.md first draft * Adding information about Issues, Feature requests, and PRs Based on feedback from @lbrendanl * Adding additional detail to the the feature PR section based on feedback from @benlower * fixing typo --- contributing.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 contributing.md diff --git a/contributing.md b/contributing.md new file mode 100644 index 0000000..15fc5f8 --- /dev/null +++ b/contributing.md @@ -0,0 +1,33 @@ +# Contributing + +We welcome contributions to this project! + +Contribution can include, but are not limited to, any of the following: + +* File an Issue +* Request a Feature +* Implement a Requested Feature +* Fix an Issue/Bug +* Add/Fix documentation + +Contributions must follow the guidelines outlined on the [Tableau Organization](http://tableau.github.io/) page, though filing an issue or requesting +a feature do not require the CLA. + +## Issues and Feature Requests + +To submit an issue/bug report, or to request a feature, please submit a [github issue](https://github.com/tableau/document-api-python/issues) to the repo. + +If you are submiting a bug report, please provide as much information as you can, including clear and concise repro steps, attaching any necessary +files to assist in the repro. **Be sure to scrub the files of any potentially sensitive information. Issues are public.** + +For a feature request, please try to describe the scenario you are trying to accomplish that requires the feature. This will help us understand +the limitations that you are running into, and provide us with a use case to know if we've satisfied your request. + +## Fixes, Implementations, and Documentation + +For all other things, please submit a PR that includes the fix, documentation, or new code that you are trying to contribute. More information on +creating a PR can be found in the [github documentation](https://help.github.com/articles/creating-a-pull-request/) + +If the feature is complex or has multiple solutions that could be equally appropriate approaches, it would be helpful to file an issue to discuss the +design trade-offs of each solution before implementing, to allow us to collectively arrive at the best solution, which most likely exists in the middle +somewhere. From c8eb27e77fea823cf4f48490d7a4383d61bd645c Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Mon, 11 Jul 2016 15:24:05 -0700 Subject: [PATCH 18/26] Get Fields Example Code (#51) * Get Fields Example Code * Adding field count and numbering * updating comment to be accurate * Adding GetFields to travis run * Updating to use enumerate to keep track of count --- .travis.yml | 1 + Examples/GetFields/World.tds | 1 + Examples/GetFields/show_fields.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+) create mode 120000 Examples/GetFields/World.tds create mode 100644 Examples/GetFields/show_fields.py diff --git a/.travis.yml b/.travis.yml index 2480df6..75674b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,4 +18,5 @@ script: # Examples - (cd "Examples/Replicate Workbook" && python replicateWorkbook.py) - (cd "Examples/List TDS Info" && python listTDSInfo.py) + - (cd "Examples/GetFields" && python show_fields.py) diff --git a/Examples/GetFields/World.tds b/Examples/GetFields/World.tds new file mode 120000 index 0000000..397f696 --- /dev/null +++ b/Examples/GetFields/World.tds @@ -0,0 +1 @@ +../List TDS Info/World.tds \ No newline at end of file diff --git a/Examples/GetFields/show_fields.py b/Examples/GetFields/show_fields.py new file mode 100644 index 0000000..b04a056 --- /dev/null +++ b/Examples/GetFields/show_fields.py @@ -0,0 +1,29 @@ +############################################################ +# Step 1) Use Datasource object from the Document API +############################################################ +from tableaudocumentapi import Datasource + +############################################################ +# Step 2) Open the .tds we want to inspect +############################################################ +sourceTDS = Datasource.from_file('World.tds') + +############################################################ +# Step 3) Print out all of the fields and what type they are +############################################################ +print('----------------------------------------------------------') +print('--- {} total fields in this datasource'.format(len(sourceTDS.fields))) +print('----------------------------------------------------------') +for count, field in enumerate(sourceTDS.fields.values()): + print('{:>4}: {} is a {}'.format(count+1, field.name, field.datatype)) + blank_line = False + if field.calculation: + print(' the formula is {}'.format(field.calculation)) + blank_line = True + if field.aggregation: + print(' the default aggregation is {}'.format(field.aggregation)) + blank_line = True + + if blank_line: + print('') +print('----------------------------------------------------------') From 00a9649cf057b28e4fc79f837433ee2b6aa46e38 Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Wed, 13 Jul 2016 09:43:10 -0700 Subject: [PATCH 19/26] convert the indices to be weak ref value dictionaries (#53) --- tableaudocumentapi/multilookup_dict.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tableaudocumentapi/multilookup_dict.py b/tableaudocumentapi/multilookup_dict.py index 39c92c6..21e2736 100644 --- a/tableaudocumentapi/multilookup_dict.py +++ b/tableaudocumentapi/multilookup_dict.py @@ -1,3 +1,6 @@ +import weakref + + def _resolve_value(key, value): try: retval = value.get(key, None) @@ -21,8 +24,8 @@ def __init__(self, args=None): args = {} super(MultiLookupDict, self).__init__(args) self._indexes = { - 'alias': {}, - 'caption': {} + 'alias': weakref.WeakValueDictionary(), + 'caption': weakref.WeakValueDictionary() } self._populate_indexes() From f46f3d96bc9af1653662ee3b7da383cfc662b34d Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Thu, 14 Jul 2016 11:12:51 -0700 Subject: [PATCH 20/26] Fixes #50 - materialize the properties for completion (#52) * Fixes #50 - materialize the properties for completion This also fixes a bug where captions and aliases weren't being populated into the multidict correctly due to insufficient testing on my part originally * fixing example to be correct * really fix it this time * Add docstrings for is_* properties * Cleaner version of get for mld * Move to is not instead of standard equality and fixed py3 --- Examples/GetFields/show_fields.py | 4 +- tableaudocumentapi/field.py | 62 +++++++++++++++++++++++--- tableaudocumentapi/multilookup_dict.py | 18 +++++++- test/assets/datasource_test.tds | 2 +- test/test_datasource.py | 19 +++++++- test/test_multidict.py | 16 +++++++ 6 files changed, 108 insertions(+), 13 deletions(-) diff --git a/Examples/GetFields/show_fields.py b/Examples/GetFields/show_fields.py index b04a056..ee45f87 100644 --- a/Examples/GetFields/show_fields.py +++ b/Examples/GetFields/show_fields.py @@ -20,8 +20,8 @@ if field.calculation: print(' the formula is {}'.format(field.calculation)) blank_line = True - if field.aggregation: - print(' the default aggregation is {}'.format(field.aggregation)) + if field.default_aggregation: + print(' the default aggregation is {}'.format(field.default_aggregation)) blank_line = True if blank_line: diff --git a/tableaudocumentapi/field.py b/tableaudocumentapi/field.py index 1eb68ef..8162cdb 100644 --- a/tableaudocumentapi/field.py +++ b/tableaudocumentapi/field.py @@ -41,12 +41,6 @@ def apply_metadata(self, metadata_record): def from_xml(cls, xmldata): return cls(xmldata) - def __getattr__(self, item): - private_name = '_{}'.format(item) - if item in _ATTRIBUTES or item in _METADATA_ATTRIBUTES: - return getattr(self, private_name) - raise AttributeError(item) - def _apply_attribute(self, xmldata, attrib, default_func): if hasattr(self, '_read_{}'.format(attrib)): value = getattr(self, '_read_{}'.format(attrib))(xmldata) @@ -71,6 +65,62 @@ def name(self): return self.id + @property + def id(self): + """ Name of the field as specified in the file, usually surrounded by [ ] """ + return self._id + + @property + def caption(self): + """ Name of the field as displayed in Tableau unless an aliases is defined """ + return self._caption + + @property + def alias(self): + """ Name of the field as displayed in Tableau if the default name isn't wanted """ + return self._alias + + @property + def datatype(self): + """ Type of the field within Tableau (string, integer, etc) """ + return self._datatype + + @property + def role(self): + """ Dimension or Measure """ + return self._role + + @property + def is_quantitative(self): + """ A dependent value, usually a measure of something + + e.g. Profit, Gross Sales """ + return self._type == 'quantitative' + + @property + def is_ordinal(self): + """ Is this field a categorical field that has a specific order + + e.g. How do you feel? 1 - awful, 2 - ok, 3 - fantastic """ + return self._type == 'ordinal' + + @property + def is_nominal(self): + """ Is this field a categorical field that does not have a specific order + + e.g. What color is your hair? """ + return self._type == 'nominal' + + @property + def calculation(self): + """ If this field is a calculated field, this will be the formula """ + return self._calculation + + @property + def default_aggregation(self): + """ The default type of aggregation on the field (e.g Sum, Avg)""" + return self._aggregation + ###################################### # Special Case handling methods for reading the values from the XML ###################################### diff --git a/tableaudocumentapi/multilookup_dict.py b/tableaudocumentapi/multilookup_dict.py index 21e2736..64b742a 100644 --- a/tableaudocumentapi/multilookup_dict.py +++ b/tableaudocumentapi/multilookup_dict.py @@ -1,11 +1,17 @@ import weakref +_no_default_value = object() + + def _resolve_value(key, value): + retval = None try: - retval = value.get(key, None) + if hasattr(value, 'get'): + retval = value.get(key, None) + if retval is None: - retval = value.getattr(key, None) + retval = getattr(value, key, None) except AttributeError: retval = None return retval @@ -43,6 +49,14 @@ def __setitem__(self, key, value): dict.__setitem__(self, key, value) + def get(self, key, default_value=_no_default_value): + try: + return self[key] + except KeyError: + if default_value is not _no_default_value: + return default_value + raise + def __getitem__(self, key): if key in self._indexes['alias']: key = self._indexes['alias'][key] diff --git a/test/assets/datasource_test.tds b/test/assets/datasource_test.tds index 227e006..a1e78a8 100644 --- a/test/assets/datasource_test.tds +++ b/test/assets/datasource_test.tds @@ -77,7 +77,7 @@ - + diff --git a/test/test_datasource.py b/test/test_datasource.py index da956ee..0a2457e 100644 --- a/test/test_datasource.py +++ b/test/test_datasource.py @@ -1,6 +1,5 @@ import unittest import os.path -import functools from tableaudocumentapi import Datasource @@ -23,7 +22,23 @@ def test_datasource_returns_calculation_from_fields(self): self.assertEqual('1', self.ds.fields['[Number of Records]'].calculation) def test_datasource_uses_metadata_record(self): - self.assertEqual('Sum', self.ds.fields['[x]'].aggregation) + self.assertEqual('Sum', self.ds.fields['[x]'].default_aggregation) def test_datasource_column_name_contains_apostrophy(self): self.assertIsNotNone(self.ds.fields.get("[Today's Date]", None)) + + def test_datasource_field_can_get_caption(self): + self.assertEqual(self.ds.fields['[a]'].caption, 'A') + self.assertEqual(getattr(self.ds.fields['[a]'], 'caption', None), 'A') + + def test_datasource_field_caption_can_be_used_to_query(self): + self.assertIsNotNone(self.ds.fields.get('A', None)) + + def test_datasource_field_is_nominal(self): + self.assertTrue(self.ds.fields['[a]'].is_nominal) + + def test_datasource_field_is_quantitative(self): + self.assertTrue(self.ds.fields['[y]'].is_quantitative) + + def test_datasource_field_is_ordinal(self): + self.assertTrue(self.ds.fields['[x]'].is_ordinal) diff --git a/test/test_multidict.py b/test/test_multidict.py index abb01c5..0a78e9d 100644 --- a/test/test_multidict.py +++ b/test/test_multidict.py @@ -45,3 +45,19 @@ def test_mutlilookupdict_can_still_find_caption_even_with_alias(self): def test_mutlilookupdict_can_still_find_id_even_with_caption(self): actual = self.mld['[bar]'] self.assertEqual(2, actual['value']) + + def test_multilookupdict_gives_key_error_on_invalid_key(self): + try: + self.mld.get('foobar') + self.fail('should have thrown key error') + except KeyError as ex: + self.assertEqual(str(ex), "'foobar'") + + def test_multilookupdict_get_returns_default_value(self): + default_value = ('default', 'return', 'value') + actual = self.mld.get('foobar', default_value) + self.assertEqual(actual, default_value) + + def test_multilookupdict_get_returns_value(self): + actual = self.mld.get('baz') + self.assertEqual(1, actual['value']) From 3757adaeb62877bb66c051fee6f64992d08556bb Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Wed, 20 Jul 2016 20:57:15 -0700 Subject: [PATCH 21/26] fixes#47 Implement ability to query fields used on a worksheet (#54) * first stab at an API, not correct in retrospect * Initial revision of api based on API discussions * adding additional testing and enabling lists to be passed in * removing left over stuff from previous attempt * Some fields are not listed in tags, so we need to construct from * removing thumbnail * move _column_object_from* to use a named tuple * removing cruft from previous implementation * renaming retval to something more useful * cleaning up _is_used_by_worksheet * reformatting import statements * removing PredicateDictionary * A workbook doc without any content should still load * found_in -> used_by_sheet --- tableaudocumentapi/datasource.py | 73 ++++++++++--- tableaudocumentapi/field.py | 63 +++++++++-- tableaudocumentapi/workbook.py | 60 +++++++++-- test/assets/TABLEAU_10_TWB.twb | 23 ++++- test/assets/datasource_test.twb | 172 +++++++++++++++++++++++++++++++ test/assets/empty_workbook.twb | 3 + test/bvt.py | 9 ++ test/test_datasource.py | 60 ++++++++++- 8 files changed, 428 insertions(+), 35 deletions(-) create mode 100644 test/assets/datasource_test.twb create mode 100644 test/assets/empty_workbook.twb diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index 924575d..0fdc3fb 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -12,17 +12,54 @@ from tableaudocumentapi import Field from tableaudocumentapi.multilookup_dict import MultiLookupDict +######## +# This is needed in order to determine if something is a string or not. It is necessary because +# of differences between python2 (basestring) and python3 (str). If python2 support is every +# dropped, remove this and change the basestring references below to str +try: + basestring +except NameError: + basestring = str +######## -def _mapping_from_xml(root_xml, column_xml): - retval = Field.from_xml(column_xml) - local_name = retval.id - if "'" in local_name: - local_name = sax.escape(local_name, {"'": "'"}) - xpath = ".//metadata-record[@class='column'][local-name='{}']".format(local_name) - metadata_record = root_xml.find(xpath) +_ColumnObjectReturnTuple = collections.namedtuple('_ColumnObjectReturnTupleType', ['id', 'object']) + + +def _get_metadata_xml_for_field(root_xml, field_name): + if "'" in field_name: + field_name = sax.escape(field_name, {"'": "'"}) + xpath = ".//metadata-record[@class='column'][local-name='{}']".format(field_name) + return root_xml.find(xpath) + + +def _is_used_by_worksheet(names, field): + return any((y for y in names if y in field.worksheets)) + + +class FieldDictionary(MultiLookupDict): + def used_by_sheet(self, name): + # If we pass in a string, no need to get complicated, just check to see if name is in + # the field's list of worksheets + if isinstance(name, basestring): + return [x for x in self.values() if name in x.worksheets] + + # if we pass in a list, we need to check to see if any of the names in the list are in + # the field's list of worksheets + return [x for x in self.values() if _is_used_by_worksheet(name, x)] + + +def _column_object_from_column_xml(root_xml, column_xml): + field_object = Field.from_column_xml(column_xml) + local_name = field_object.id + metadata_record = _get_metadata_xml_for_field(root_xml, local_name) if metadata_record is not None: - retval.apply_metadata(metadata_record) - return retval.id, retval + field_object.apply_metadata(metadata_record) + return _ColumnObjectReturnTuple(field_object.id, field_object) + + +def _column_object_from_metadata_xml(metadata_xml): + field_object = Field.from_metadata_xml(metadata_xml) + return _ColumnObjectReturnTuple(field_object.id, field_object) class ConnectionParser(object): @@ -73,7 +110,7 @@ def __init__(self, dsxml, filename=None): @classmethod def from_file(cls, filename): - "Initialize datasource from file (.tds)" + """Initialize datasource from file (.tds)""" if zipfile.is_zipfile(filename): dsxml = xfile.get_xml_from_archive(filename).getroot() @@ -141,6 +178,16 @@ def fields(self): return self._fields def _get_all_fields(self): - column_objects = (_mapping_from_xml(self._datasourceTree, xml) - for xml in self._datasourceTree.findall('.//column')) - return MultiLookupDict({k: v for k, v in column_objects}) + column_objects = [_column_object_from_column_xml(self._datasourceTree, xml) + for xml in self._datasourceTree.findall('.//column')] + existing_fields = [x.id for x in column_objects] + metadata_fields = (x.text + for x in self._datasourceTree.findall(".//metadata-record[@class='column']/local-name")) + + missing_fields = (x for x in metadata_fields if x not in existing_fields) + column_objects.extend(( + _column_object_from_metadata_xml(_get_metadata_xml_for_field(self._datasourceTree, field_name)) + for field_name in missing_fields + )) + + return FieldDictionary({k: v for k, v in column_objects}) diff --git a/tableaudocumentapi/field.py b/tableaudocumentapi/field.py index 8162cdb..4af648f 100644 --- a/tableaudocumentapi/field.py +++ b/tableaudocumentapi/field.py @@ -14,6 +14,12 @@ 'aggregation', # The type of aggregation on the field (e.g Sum, Avg) ] +_METADATA_TO_FIELD_MAP = [ + ('local-name', 'id'), + ('local-type', 'datatype'), + ('remote-alias', 'alias') +] + def _find_metadata_record(record, attrib): element = record.find('.//{}'.format(attrib)) @@ -25,25 +31,60 @@ def _find_metadata_record(record, attrib): class Field(object): """ Represents a field in a datasource """ - def __init__(self, xmldata): - for attrib in _ATTRIBUTES: - self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None)) + def __init__(self, column_xml=None, metadata_xml=None): - # All metadata attributes begin at None + # Initialize all the possible attributes + for attrib in _ATTRIBUTES: + setattr(self, '_{}'.format(attrib), None) for attrib in _METADATA_ATTRIBUTES: setattr(self, '_{}'.format(attrib), None) + self._worksheets = set() + + if column_xml is not None: + self._initialize_from_column_xml(column_xml) + if metadata_xml is not None: + self.apply_metadata(metadata_xml) + + elif metadata_xml is not None: + self._initialize_from_metadata_xml(metadata_xml) + + else: + raise AttributeError('column_xml or metadata_xml needed to initialize field') + + def _initialize_from_column_xml(self, xmldata): + for attrib in _ATTRIBUTES: + self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None)) + + def _initialize_from_metadata_xml(self, xmldata): + for metadata_name, field_name in _METADATA_TO_FIELD_MAP: + self._apply_attribute(xmldata, field_name, lambda x: xmldata.find('.//{}'.format(metadata_name)).text, + read_name=metadata_name) + self.apply_metadata(xmldata) + ######################################## + # Special Case methods for construction fields from various sources + # not intended for client use + ######################################## def apply_metadata(self, metadata_record): for attrib in _METADATA_ATTRIBUTES: self._apply_attribute(metadata_record, attrib, functools.partial(_find_metadata_record, metadata_record)) + def add_used_in(self, name): + self._worksheets.add(name) + @classmethod - def from_xml(cls, xmldata): - return cls(xmldata) + def from_column_xml(cls, xmldata): + return cls(column_xml=xmldata) - def _apply_attribute(self, xmldata, attrib, default_func): - if hasattr(self, '_read_{}'.format(attrib)): - value = getattr(self, '_read_{}'.format(attrib))(xmldata) + @classmethod + def from_metadata_xml(cls, xmldata): + return cls(metadata_xml=xmldata) + + def _apply_attribute(self, xmldata, attrib, default_func, read_name=None): + if read_name is None: + read_name = attrib + if hasattr(self, '_read_{}'.format(read_name)): + value = getattr(self, '_read_{}'.format(read_name))(xmldata) else: value = default_func(attrib) @@ -121,6 +162,10 @@ def default_aggregation(self): """ The default type of aggregation on the field (e.g Sum, Avg)""" return self._aggregation + @property + def worksheets(self): + return list(self._worksheets) + ###################################### # Special Case handling methods for reading the values from the XML ###################################### diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index 9e29973..fd85b3c 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -5,17 +5,12 @@ ############################################################################### import os import zipfile +import weakref import xml.etree.ElementTree as ET from tableaudocumentapi import Datasource, xfile -########################################################################### -# -# Utility Functions -# -########################################################################### - class Workbook(object): """ @@ -33,6 +28,7 @@ def __init__(self, filename): Constructor. """ + self._filename = filename # Determine if this is a twb or twbx and get the xml root @@ -47,6 +43,12 @@ def __init__(self, filename): self._datasources = self._prepare_datasources( self._workbookRoot) # self.workbookRoot.find('datasources') + self._datasource_index = self._prepare_datasource_index(self._datasources) + + self._worksheets = self._prepare_worksheets( + self._workbookRoot, self._datasource_index + ) + ########### # datasources ########### @@ -54,6 +56,13 @@ def __init__(self, filename): def datasources(self): return self._datasources + ########### + # worksheets + ########### + @property + def worksheets(self): + return self._worksheets + ########### # filename ########### @@ -95,12 +104,47 @@ def save_as(self, new_filename): # Private API. # ########################################################################### - def _prepare_datasources(self, xmlRoot): + @staticmethod + def _prepare_datasource_index(datasources): + retval = weakref.WeakValueDictionary() + for datasource in datasources: + retval[datasource.name] = datasource + + return retval + + @staticmethod + def _prepare_datasources(xml_root): datasources = [] # loop through our datasources and append - for datasource in xmlRoot.find('datasources'): + datasource_elements = xml_root.find('datasources') + if datasource_elements is None: + return [] + + for datasource in datasource_elements: ds = Datasource(datasource) datasources.append(ds) return datasources + + @staticmethod + def _prepare_worksheets(xml_root, ds_index): + worksheets = [] + worksheets_element = xml_root.find('.//worksheets') + if worksheets_element is None: + return worksheets + + for worksheet_element in worksheets_element: + worksheet_name = worksheet_element.attrib['name'] + worksheets.append(worksheet_name) # TODO: A real worksheet object, for now, only name + + dependencies = worksheet_element.findall('.//datasource-dependencies') + + for dependency in dependencies: + datasource_name = dependency.attrib['datasource'] + datasource = ds_index[datasource_name] + for column in dependency.findall('.//column'): + column_name = column.attrib['name'] + datasource.fields[column_name].add_used_in(worksheet_name) + + return worksheets diff --git a/test/assets/TABLEAU_10_TWB.twb b/test/assets/TABLEAU_10_TWB.twb index c116bdf..aa0207f 100644 --- a/test/assets/TABLEAU_10_TWB.twb +++ b/test/assets/TABLEAU_10_TWB.twb @@ -1 +1,22 @@ - + + + + + + + + + + + + + + + + + diff --git a/test/assets/datasource_test.twb b/test/assets/datasource_test.twb new file mode 100644 index 0000000..af87659 --- /dev/null +++ b/test/assets/datasource_test.twb @@ -0,0 +1,172 @@ + + + + + + + + + + + + + + + a + 130 + [a] + [xy] + a + 1 + string + Count + 255 + true + + "SQL_WVARCHAR" + "SQL_C_WCHAR" + "true" + + + + x + 3 + [x] + [xy] + x + 2 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + y + 3 + [y] + [xy] + y + 3 + integer + Sum + 10 + true + + "SQL_INTEGER" + "SQL_C_SLONG" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +