"""This packages provides classes for reading and writing Maya's IFF_
inspired :ref:`binary file format <binary_anatomy>`.
.. _IFF: http://en.wikipedia.org/wiki/Interchange_File_Format
"""
import array
import functools
import itertools
import struct
import string
_is_printable = set(string.printable).difference(string.whitespace).__contains__
[docs]class Encoder(object):
"""The base class for encoding/decoding packed data to/from native types."""
[docs] def split(self, encoded, size_hint):
"""Return an iterator over a split version of the encoded data.
:param str encoded: The packed data to split.
:param int size_hint: The suggested split size. Feel free to ignore
this if your data has an implicit size of its own.
"""
for i in xrange(0, len(encoded), size_hint):
yield encoded[i:i + size_hint]
[docs] def repr_chunk(self, chunk):
"""Create string representation of a chunk returned from :meth:`split`."""
return ''.join(c if _is_printable(c) else '.' for c in chunk)
class StructEncoder(Encoder):
def __init__(self, format_char):
self.format_char = format_char
self.size = struct.Struct('>' + format_char).size
def split(self, encoded, size_hint):
size_hint = size_hint - (size_hint % self.size)
return super(StructEncoder, self).split(encoded, size_hint)
def unpack(self, encoded):
count, rem = divmod(len(encoded), self.size)
if rem:
raise ValueError('encoded length %d is not multiple of %d; %d remains' % (len(encoded), self.size, rem))
return struct.unpack('>%d%s' % (count, self.format_char), encoded)
def repr_chunk(self, encoded):
return ' '.join(repr(x) for x in self.unpack(encoded))
class StringEncoder(Encoder):
def split(self, encoded, size_hint):
return encoded.rstrip('\0').split('\0')
def repr_chunk(self, chunk):
return repr(chunk)
# Map encoding names to the object which exposes the Encoder interface.
encoders = {}
[docs]def register_encoder(names, encoder=None):
"""Register an :class:`Encoder` for the given type names.
These types are a concept of this module, and have no parallel in the file
format itself. These are what we use to unpack the raw binary data into
something standard Python types.
Types that are registered upon import include:
* ``"float"``;
* ``"uint"`` (32-bit big-endian integer);
* ``"string"`` (``NULL`` terminated).
:param names: A string, or iterable of strings.
:param encoder: The :class:`Encoder` to use for this type.
This function can operate as a decorator as well::
@register_encoder('attr')
class AttributeEncoder(mayatools.binary.Encoder):
pass
"""
if isinstance(names, basestring):
names = [names]
if encoder is None:
return functools.partial(register_encoder, names)
else:
for name in names:
encoders[name] = encoder
return encoder
register_encoder('float', StructEncoder('f'))
register_encoder('uint', StructEncoder('L'))
register_encoder('string', StringEncoder())
#: Map tag names to the name of an encoding. Add to this dict to interpret tags
#: as certain types.
tag_encoding = {
# Maya headers.
'VERS': 'string', # app version
'UVER': 'string',
'MADE': 'string',
'CHNG': 'string', # timestamp
'ICON': 'string',
'INFO': 'string',
'OBJN': 'string',
'INCL': 'string',
'LUNI': 'string', # linear unit
'TUNI': 'string', # time unit
'AUNI': 'string', # angle unit
'FINF': 'string', # file info
# Generic.
'SIZE': 'uint',
# DAG
'CREA': 'string', # create node
'STR ': 'string', # string attribute
# Cache headers.
'VRSN': 'string', # cache version
'STIM': 'uint', # cache start time
'ETIM': 'uint', # cache end time
# Cache channels.
'CHNM': 'string', # channel name
# Cache data.
'FBCA': 'float', # floating cache array
}
[docs]def get_encoder(tag):
"""Get an :class:`Encoder` for the given tag.
:param str: The 4 character node "tag".
:returns: The appropriate :class:`Encoder` or ``None``.
"""
encoding = tag_encoding.get(tag, 'raw')
return encoders.get(encoding) or Encoder()
def hexdump(*args, **kwargs):
return ''.join(_hexdump(*args, **kwargs))
def _hexdump(raw, initial_offset=0, chunk=4, line=16, indent='', tag=None):
chunk2 = 2 * chunk
line2 = 2 * line
encoder = get_encoder(tag)
offset = initial_offset
for encoded_chunk in encoder.split(raw, line):
if not encoded_chunk:
continue
yield indent
yield '%04x: ' % offset
offset += len(encoded_chunk)
# Encode the chunk to hex, pad it, and chunk it further.
hex_chunk = encoded_chunk.encode('hex')
hex_chunk += ' ' * (line2 - len(hex_chunk))
for i in xrange(0, len(hex_chunk), chunk2):
yield '%s ' % hex_chunk[i:i + chunk2]
yield encoder.repr_chunk(encoded_chunk)
yield '\n'
_group_tags = set()
_tag_alignments = {}
for base in ('FORM', 'CAT ', 'LIST', 'PROP'):
for char, alignment in (('', 2), ('4', 4), ('8', 8)):
tag = base[:-len(char)] + char if char else base
_group_tags.add(tag)
_tag_alignments[tag] = alignment
def _get_tag_alignment(tag):
return _tag_alignments.get(tag, 2)
def _get_padding(size, alignment):
if size % alignment == 0:
return 0
else:
return alignment - size % alignment
[docs]class Node(object):
"""Base class for group nodes in, and the root node of a Maya file graph."""
def __init__(self):
#: The children of this node.
self.children = []
def add_child(self, child):
self.children.append(child)
child.parent = self
return child
def add_group(self, *args, **kwargs):
return self.add_child(Group(*args, **kwargs))
def add_chunk(self, *args, **kwargs):
return self.add_child(Chunk(*args, **kwargs))
[docs] def find(self, tag):
"""Iterate across all descendants of this node with a given tag."""
for child in self.children:
if child.tag == tag:
yield child
if isinstance(child, Node):
for x in child.find(tag):
yield x
[docs] def find_one(self, tag, *args):
"""Find the first descendant of this node with a given tag.
:param str tag: The tag to find.
:param default: What to return if we can't find a node.
:raises KeyError: if we can't find a tag and no default is given.
"""
for child in self.find(tag):
return child
if args:
return args[0]
raise KeyError(tag)
[docs] def dumps_iter(self):
"""Iterate chunks of the packed version of this node and its children.
To write to a file::
with open(path, 'wb') as fh:
for chunk in node.dumps_iter():
fh.write(chunk)
"""
for child in self.children:
for x in child.dumps_iter():
yield x
[docs]class Group(Node):
"""A group node in a Maya file graph."""
def __init__(self, tag, type_='FOR4', size=0, start=0):
super(Group, self).__init__()
#: The group type (e.g. ``FORM``, ``LIST``, ``PROP``, ``CAT``).
self.type = type_
self.size = size
self.start = start
#: The data type.
self.tag = tag
self.alignment = _get_tag_alignment(self.type)
self.end = self.start + self.size + _get_padding(self.size, self.alignment)
[docs] def pprint(self, _indent=0):
"""Print a structured representation of the group to stdout."""
print _indent * ' ' + ('%s group (%s); %d bytes for %d children:' % (self.tag, self.type, self.size, len(self.children)))
for child in self.children:
child.pprint(_indent=_indent + 1)
def dumps_iter(self):
output = []
for child in self.children:
output.extend(child.dumps_iter())
yield self.type
yield struct.pack(">L", sum(len(x) for x in output) + 4)
yield self.tag
for x in output:
yield x
[docs]class Chunk(object):
def __init__(self, tag, data='', offset=None, **kwargs):
self.parent = None
#: The data type.
self.tag = tag
#: Raw binary data.
self.data = data
self.offset = offset
for k, v in kwargs.iteritems():
setattr(self, k, v)
[docs] def pprint(self, _indent):
"""Print a structured representation of the node to stdout."""
encoding = tag_encoding.get(self.tag)
if encoding:
header = '%d bytes as %s(s)' % (len(self.data), encoding)
else:
header = '%d raw bytes' % len(self.data)
print _indent * ' ' + ('%s; %s' % (self.tag, header))
print hexdump(self.data, self.offset, tag=self.tag, indent=(_indent + 1) * ' ').rstrip()
def __repr__(self):
return '<%s %s; %d bytes>' % (self.__class__.__name__, self.tag, len(self.data))
def dumps_iter(self):
yield self.tag
yield struct.pack(">L", len(self.data))
yield self.data
padding = _get_padding(len(self.data), self.parent.alignment)
if padding:
yield '\0' * padding
def _unpack(self, format_char):
element_size = struct.calcsize('>' + format_char)
if len(self.data) % element_size:
raise ValueError('%s is not multiple of %d for %r format' % (len(self.data), element_size, format_char))
format_string = '>%d%s' % (len(self.data) / element_size, format_char)
unpacked = struct.unpack(format_string, self.data)
return array.array(format_char, unpacked)
def _pack(self, format_char, values):
self.data = struct.pack('>%d%s' % (len(values), format_char), *values)
@property
def ints(self):
"""Binary data interpreted as array of unsigned integers.
This is settable to an iterable of integers."""
return self._unpack('L')
@ints.setter
def ints(self, values):
self._pack('L', values)
@property
def floats(self):
"""Binary data interpreted as array of floats.
This is settable to an iterable of floats."""
return self._unpack('f')
@floats.setter
def floats(self, values):
self._pack('f', values)
@property
def string(self):
"""Binary data interpreted as a string.
This is settable with a string."""
return self.data.rstrip('\0')
@string.setter
def string(self, v):
self.data = str(v).rstrip('\0') + '\0'
[docs]class Parser(Node):
"""Maya binary file parser.
:param file: The file-like object to parse from; must support ``read(size)``
and ``tell()``.
"""
def __init__(self, file):
super(Parser, self).__init__()
self._file = file
self._group_stack = []
self.children = []
def close(self):
self._file.close()
[docs] def pprint(self, _indent=-1):
"""Print a structured representation of the file to stdout."""
for child in self.children:
child.pprint(_indent=_indent + 1)
[docs] def parse_next(self):
"""Parse to the next :class:`Group` or :class:`Chunk`, returning it.
This is useful when you want to head the headers of a file without
loading its entire contents into memory.
"""
# Clean the group stack.
while self._group_stack and self._group_stack[-1].end <= self._file.tell():
self._group_stack.pop(-1)
# Read a tag and size from the file.
tag = self._file.read(4)
if not tag:
return
size = struct.unpack(">L", self._file.read(4))[0]
if tag in _group_tags:
offset = self._file.tell()
group_tag = self._file.read(4)
group = Group(group_tag, tag, size, offset)
# Add it as a child of the current group.
group_head = self._group_stack[-1] if self._group_stack else self
group_head.add_child(group)
self._group_stack.append(group)
return group
else:
offset = self._file.tell()
data = self._file.read(size)
chunk = Chunk(tag, data, offset)
assert self._group_stack, 'Data chunk outside of group.'
self._group_stack[-1].add_child(chunk)
# Cleanup padding.
padding = _get_padding(size, self._group_stack[-1].alignment)
if padding:
self._file.read(padding)
return chunk
[docs] def parse_all(self):
"""Parse the entire (remaining) file."""
while self.parse_next() is not None:
pass
if __name__ == '__main__':
import sys
from optparse import OptionParser
opt_parser = OptionParser()
opt_parser.add_option('-t', '--type', action='append', default=[])
opt_parser.add_option('-n', '--no-types', action='store_true')
opt_parser.add_option('-x', '--hex', action='store_true')
opts, args = opt_parser.parse_args()
if opts.hex:
for arg in args:
print hexdump(open(arg).read())
exit()
if opts.no_types:
tag_encoding.clear()
for type_spec in opts.type:
type_spec = type_spec.split(':')
names = type_spec[0].split(',')
if len(type_spec) == 1:
for name in names:
tag_encoding.pop(name, None)
elif len(type_spec) == 2:
for name in names:
tag_encoding[name] = type_spec[1]
else:
raise ValueError('type spec should look like NAME:type')
for arg in args:
parser = Parser(open(arg))
parser.parse_all()
parser.pprint()