Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions tableaudocumentapi/archivefile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import contextlib
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'archivefile' is obviously a terrible name

Copy link
Copy Markdown
Contributor

@graysonarts graysonarts Jun 30, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hah, containerfile? It's only a zip archive because of implementation details. Ultimately, it's a container file format.
(also, I'm horrible at naming things)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

containerfile works for me.

<Insert joke about naming things is hard here / >

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what exactly are we trying to convey here? just that there is some kind of container (i.e. TDSX or TWBX) vs. a single file (TDS or TWB)?

if these containers will always end in "x" then let's call this xfile.py :)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh damn, @benlower with the winning name! we should go with xfiles

import os
import shutil
import tempfile
import zipfile

import xml.etree.ElementTree as ET


@contextlib.contextmanager
def temporary_directory(*args, **kwargs):
d = tempfile.mkdtemp(*args, **kwargs)
try:
yield d
finally:
shutil.rmtree(d)


def find_file_in_zip(zip, ext):
for filename in zip.namelist():
if os.path.splitext(filename)[-1].lower() == ext[:-1]:
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fairly hacky way to determine the right content type (TWB or TDS) we're looking for.

Copy link
Copy Markdown
Contributor

@graysonarts graysonarts Jun 30, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there a way to get the file stream directly from the zipfile without having to extract it? It might be better to inspect the first tag of the file rather than rely on file extension. Maybe I'll just open an issue for investigating that as an option.

There is! zipfile.open allows you to open the file without extracting the zip file.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call, I'll look into it, along with your later suggestion of when to extract vs open directly.

Originally my brain didn't want to wrap itself around how to deal with the read-only stream and writing to files, but now that I've had coffee...

Copy link
Copy Markdown
Contributor Author

@t8y8 t8y8 Jun 30, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I came up with this, and it works, should protect against a random XML file and bad file extensions:

def find_file_in_zip(zip, ext): for filename in zip.namelist(): try: ET.parse(zip.open(filename)).getroot().tag in ('workbook', 'datasource') return filename except ET.ParseError: # that ain't it pass

Not sure how much better than checking the ext it is though

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer this because it's based on the actual content of the file rather than the filename/extension which I always find to be better by some definition of better.

return filename


def get_xml_from_archive(filename):
file_type = os.path.splitext(filename)[-1].lower()
with temporary_directory() as temp:
with zipfile.ZipFile(filename) as zf:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we reextract this when saving the changes, why not move this to use zipfile.open on the archive file directly, avoiding having multiple tempdirs and not needing to extract the extract (oi) unless the user is saving the file.

zf.extractall(temp)
xml_file = find_file_in_zip(zf, file_type)
xml_tree = ET.parse(os.path.join(temp, xml_file))

return xml_tree


def build_archive_file(archive_contents, zip):
for root_dir, _, files in os.walk(archive_contents):
relative_dir = os.path.relpath(root_dir, archive_contents)
for f in files:
temp_file_full_path = os.path.join(
archive_contents, relative_dir, f)
zipname = os.path.join(relative_dir, f)
zip.write(temp_file_full_path, arcname=zipname)


def save_into_archive(xml_tree, filename, new_filename=None):
# Saving a archive means extracting the contents into a temp folder,
# saving the changes over the twb in that folder, and then
# packaging it back up into a specifically formatted zip with the correct
# relative file paths

if new_filename is None:
new_filename = filename

# Extract to temp directory
with temporary_directory() as temp_path:
file_type = os.path.splitext(filename)[-1].lower()
with zipfile.ZipFile(filename) as zf:
twb_file = find_file_in_zip(zf, file_type)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

twb_file should be "tableau_file" or "xml_file" or something.

zf.extractall(temp_path)
# Write the new version of the twb to the temp directory
xml_tree.write(os.path.join(
temp_path, twb_file), encoding="utf-8", xml_declaration=True)

# Write the new archive with the contents of the temp folder
with zipfile.ZipFile(new_filename, "w", compression=zipfile.ZIP_DEFLATED) as new_archive:
build_archive_file(temp_path, new_archive)
70 changes: 5 additions & 65 deletions tableaudocumentapi/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,49 +3,11 @@
# Datasource - A class for writing datasources to Tableau files
#
###############################################################################
import contextlib
import os
import shutil
import tempfile
import zipfile

import xml.etree.ElementTree as ET
from tableaudocumentapi import Connection


@contextlib.contextmanager
def temporary_directory(*args, **kwargs):
d = tempfile.mkdtemp(*args, **kwargs)
try:
yield d
finally:
shutil.rmtree(d)


def find_tds_in_zip(zip):
for filename in zip.namelist():
if os.path.splitext(filename)[-1].lower() == '.tds':
return filename


def get_tds_xml_from_tdsx(filename):
with temporary_directory() as temp:
with zipfile.ZipFile(filename) as zf:
zf.extractall(temp)
tds_file = find_tds_in_zip(zf)
tds_xml = ET.parse(os.path.join(temp, tds_file))

return tds_xml


def build_tdsx_file(tdsx_contents, zip):
for root_dir, _, files in os.walk(tdsx_contents):
relative_dir = os.path.relpath(root_dir, tdsx_contents)
for f in files:
temp_file_full_path = os.path.join(
tdsx_contents, relative_dir, f)
zipname = os.path.join(relative_dir, f)
zip.write(temp_file_full_path, arcname=zipname)
from tableaudocumentapi import Connection, archivefile


class ConnectionParser(object):
Expand Down Expand Up @@ -99,34 +61,11 @@ def from_file(cls, filename):
"Initialize datasource from file (.tds)"

if zipfile.is_zipfile(filename):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this pattern repeated? Seems like this could be a good candidate for a helper function.

dsxml = get_tds_xml_from_tdsx(filename).getroot()
dsxml = archivefile.get_xml_from_archive(filename).getroot()
else:
dsxml = ET.parse(filename).getroot()
return cls(dsxml, filename)

def _save_into_tdsx(self, filename=None):
# Save reuses existing filename, 'save as' takes a new one
if filename is None:
filename = self._filename

# Saving a tdsx means extracting the contents into a temp folder,
# saving the changes over the tds in that folder, and then
# packaging it back up into a specifically formatted zip with the correct
# relative file paths

# Extract to temp directory
with temporary_directory() as temp_path:
with zipfile.ZipFile(self._filename) as zf:
tds_file = find_tds_in_zip(zf)
zf.extractall(temp_path)
# Write the new version of the tds to the temp directory
self._datasourceTree.write(os.path.join(
temp_path, tds_file), encoding="utf-8", xml_declaration=True)

# Write the new tdsx with the contents of the temp folder
with zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED) as new_tdsx:
build_tdsx_file(temp_path, new_tdsx)

def save(self):
"""
Call finalization code and save file.
Expand All @@ -142,7 +81,7 @@ def save(self):
# save the file

if zipfile.is_zipfile(self._filename):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto, this sees like it would be ripe for a helper function.

self._save_into_tdsx(self._filename)
archivefile.save_into_archive(self._datasourceTree, self._filename)
else:
self._datasourceTree.write(
self._filename, encoding="utf-8", xml_declaration=True)
Expand All @@ -159,7 +98,8 @@ def save_as(self, new_filename):

"""
if zipfile.is_zipfile(self._filename):
self._save_into_tdsx(new_filename)
archivefile.save_into_archive(
self._datasourceTree, self._filename, new_filename)
else:
self._datasourceTree.write(
new_filename, encoding="utf-8", xml_declaration=True)
Expand Down
72 changes: 7 additions & 65 deletions tableaudocumentapi/workbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@
# Workbook - A class for writing Tableau workbook files
#
###############################################################################
import contextlib
import os
import shutil
import tempfile
import zipfile

import xml.etree.ElementTree as ET

from tableaudocumentapi import Datasource
from tableaudocumentapi import Datasource, archivefile

###########################################################################
#
Expand All @@ -20,41 +17,6 @@
###########################################################################


@contextlib.contextmanager
def temporary_directory(*args, **kwargs):
d = tempfile.mkdtemp(*args, **kwargs)
try:
yield d
finally:
shutil.rmtree(d)


def find_twb_in_zip(zip):
for filename in zip.namelist():
if os.path.splitext(filename)[-1].lower() == '.twb':
return filename


def get_twb_xml_from_twbx(filename):
with temporary_directory() as temp:
with zipfile.ZipFile(filename) as zf:
zf.extractall(temp)
twb_file = find_twb_in_zip(zf)
twb_xml = ET.parse(os.path.join(temp, twb_file))

return twb_xml


def build_twbx_file(twbx_contents, zip):
for root_dir, _, files in os.walk(twbx_contents):
relative_dir = os.path.relpath(root_dir, twbx_contents)
for f in files:
temp_file_full_path = os.path.join(
twbx_contents, relative_dir, f)
zipname = os.path.join(relative_dir, f)
zip.write(temp_file_full_path, arcname=zipname)


class Workbook(object):
"""
A class for writing Tableau workbook files.
Expand All @@ -75,7 +37,8 @@ def __init__(self, filename):

# Determine if this is a twb or twbx and get the xml root
if zipfile.is_zipfile(self._filename):
self._workbookTree = get_twb_xml_from_twbx(self._filename)
self._workbookTree = archivefile.get_xml_from_archive(
self._filename)
else:
self._workbookTree = ET.parse(self._filename)

Expand Down Expand Up @@ -113,7 +76,8 @@ def save(self):
# save the file

if zipfile.is_zipfile(self._filename):
self._save_into_twbx(self._filename)
archivefile.save_into_archive(
self._workbookTree, filename=self._filename)
else:
self._workbookTree.write(
self._filename, encoding="utf-8", xml_declaration=True)
Expand All @@ -131,7 +95,8 @@ def save_as(self, new_filename):
"""

if zipfile.is_zipfile(self._filename):
self._save_into_twbx(new_filename)
archivefile.save_into_archive(
self._workbookTree, self._filename, new_filename)
else:
self._workbookTree.write(
new_filename, encoding="utf-8", xml_declaration=True)
Expand All @@ -151,29 +116,6 @@ def _prepare_datasources(self, xmlRoot):

return datasources

def _save_into_twbx(self, filename=None):
# Save reuses existing filename, 'save as' takes a new one
if filename is None:
filename = self._filename

# Saving a twbx means extracting the contents into a temp folder,
# saving the changes over the twb in that folder, and then
# packaging it back up into a specifically formatted zip with the correct
# relative file paths

# Extract to temp directory
with temporary_directory() as temp_path:
with zipfile.ZipFile(self._filename) as zf:
twb_file = find_twb_in_zip(zf)
zf.extractall(temp_path)
# Write the new version of the twb to the temp directory
self._workbookTree.write(os.path.join(
temp_path, twb_file), encoding="utf-8", xml_declaration=True)

# Write the new twbx with the contents of the temp folder
with zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED) as new_twbx:
build_twbx_file(temp_path, new_twbx)

@staticmethod
def _is_valid_file(filename):
fileExtension = os.path.splitext(filename)[-1].lower()
Expand Down
5 changes: 2 additions & 3 deletions test/bvt.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,14 @@ def test_can_open_tdsx_and_save_changes(self):
0].server, 'newdb.test.tsi.lan')

def test_can_open_tdsx_and_save_as_changes(self):
new_tdsx_filename = self.tdsx_file.name + "_TEST_SAVE_AS"
new_tdsx_filename = 'newtdsx.tdsx'
original_wb = Datasource.from_file(self.tdsx_file.name)
original_wb.connections[0].server = 'newdb.test.tsi.lan'
original_wb.save_as(new_tdsx_filename)

new_wb = Datasource.from_file(new_tdsx_filename)
self.assertEqual(new_wb.connections[
0].server, 'newdb.test.tsi.lan')

os.unlink(new_tdsx_filename)


Expand Down Expand Up @@ -281,7 +280,7 @@ def test_can_open_twbx_and_save_changes(self):
0].server, 'newdb.test.tsi.lan')

def test_can_open_twbx_and_save_as_changes(self):
new_twbx_filename = self.workbook_file.name + "_TEST_SAVE_AS"
new_twbx_filename = 'newtwbx.twbx'
original_wb = Workbook(self.workbook_file.name)
original_wb.datasources[0].connections[0].server = 'newdb.test.tsi.lan'
original_wb.save_as(new_twbx_filename)
Expand Down