Source code for mayatools.binary

"""This packages provides classes for reading and writing Maya's IFF_
inspired :ref:`binary file format <binary_anatomy>`.

.. _IFF: http://en.wikipedia.org/wiki/Interchange_File_Format

"""

import array
import functools
import itertools
import struct
import string


_is_printable = set(string.printable).difference(string.whitespace).__contains__


[docs]class Encoder(object): """The base class for encoding/decoding packed data to/from native types."""
[docs] def split(self, encoded, size_hint): """Return an iterator over a split version of the encoded data. :param str encoded: The packed data to split. :param int size_hint: The suggested split size. Feel free to ignore this if your data has an implicit size of its own. """ for i in xrange(0, len(encoded), size_hint): yield encoded[i:i + size_hint]
[docs] def repr_chunk(self, chunk): """Create string representation of a chunk returned from :meth:`split`.""" return ''.join(c if _is_printable(c) else '.' for c in chunk)
class StructEncoder(Encoder): def __init__(self, format_char): self.format_char = format_char self.size = struct.Struct('>' + format_char).size def split(self, encoded, size_hint): size_hint = size_hint - (size_hint % self.size) return super(StructEncoder, self).split(encoded, size_hint) def unpack(self, encoded): count, rem = divmod(len(encoded), self.size) if rem: raise ValueError('encoded length %d is not multiple of %d; %d remains' % (len(encoded), self.size, rem)) return struct.unpack('>%d%s' % (count, self.format_char), encoded) def repr_chunk(self, encoded): return ' '.join(repr(x) for x in self.unpack(encoded)) class StringEncoder(Encoder): def split(self, encoded, size_hint): return encoded.rstrip('\0').split('\0') def repr_chunk(self, chunk): return repr(chunk) # Map encoding names to the object which exposes the Encoder interface. encoders = {}
[docs]def register_encoder(names, encoder=None): """Register an :class:`Encoder` for the given type names. These types are a concept of this module, and have no parallel in the file format itself. These are what we use to unpack the raw binary data into something standard Python types. Types that are registered upon import include: * ``"float"``; * ``"uint"`` (32-bit big-endian integer); * ``"string"`` (``NULL`` terminated). :param names: A string, or iterable of strings. :param encoder: The :class:`Encoder` to use for this type. This function can operate as a decorator as well:: @register_encoder('attr') class AttributeEncoder(mayatools.binary.Encoder): pass """ if isinstance(names, basestring): names = [names] if encoder is None: return functools.partial(register_encoder, names) else: for name in names: encoders[name] = encoder return encoder
register_encoder('float', StructEncoder('f')) register_encoder('uint', StructEncoder('L')) register_encoder('string', StringEncoder()) #: Map tag names to the name of an encoding. Add to this dict to interpret tags #: as certain types. tag_encoding = { # Maya headers. 'VERS': 'string', # app version 'UVER': 'string', 'MADE': 'string', 'CHNG': 'string', # timestamp 'ICON': 'string', 'INFO': 'string', 'OBJN': 'string', 'INCL': 'string', 'LUNI': 'string', # linear unit 'TUNI': 'string', # time unit 'AUNI': 'string', # angle unit 'FINF': 'string', # file info # Generic. 'SIZE': 'uint', # DAG 'CREA': 'string', # create node 'STR ': 'string', # string attribute # Cache headers. 'VRSN': 'string', # cache version 'STIM': 'uint', # cache start time 'ETIM': 'uint', # cache end time # Cache channels. 'CHNM': 'string', # channel name # Cache data. 'FBCA': 'float', # floating cache array }
[docs]def get_encoder(tag): """Get an :class:`Encoder` for the given tag. :param str: The 4 character node "tag". :returns: The appropriate :class:`Encoder` or ``None``. """ encoding = tag_encoding.get(tag, 'raw') return encoders.get(encoding) or Encoder()
def hexdump(*args, **kwargs): return ''.join(_hexdump(*args, **kwargs)) def _hexdump(raw, initial_offset=0, chunk=4, line=16, indent='', tag=None): chunk2 = 2 * chunk line2 = 2 * line encoder = get_encoder(tag) offset = initial_offset for encoded_chunk in encoder.split(raw, line): if not encoded_chunk: continue yield indent yield '%04x: ' % offset offset += len(encoded_chunk) # Encode the chunk to hex, pad it, and chunk it further. hex_chunk = encoded_chunk.encode('hex') hex_chunk += ' ' * (line2 - len(hex_chunk)) for i in xrange(0, len(hex_chunk), chunk2): yield '%s ' % hex_chunk[i:i + chunk2] yield encoder.repr_chunk(encoded_chunk) yield '\n' _group_tags = set() _tag_alignments = {} for base in ('FORM', 'CAT ', 'LIST', 'PROP'): for char, alignment in (('', 2), ('4', 4), ('8', 8)): tag = base[:-len(char)] + char if char else base _group_tags.add(tag) _tag_alignments[tag] = alignment def _get_tag_alignment(tag): return _tag_alignments.get(tag, 2) def _get_padding(size, alignment): if size % alignment == 0: return 0 else: return alignment - size % alignment
[docs]class Node(object): """Base class for group nodes in, and the root node of a Maya file graph.""" def __init__(self): #: The children of this node. self.children = [] def add_child(self, child): self.children.append(child) child.parent = self return child def add_group(self, *args, **kwargs): return self.add_child(Group(*args, **kwargs)) def add_chunk(self, *args, **kwargs): return self.add_child(Chunk(*args, **kwargs))
[docs] def find(self, tag): """Iterate across all descendants of this node with a given tag.""" for child in self.children: if child.tag == tag: yield child if isinstance(child, Node): for x in child.find(tag): yield x
[docs] def find_one(self, tag, *args): """Find the first descendant of this node with a given tag. :param str tag: The tag to find. :param default: What to return if we can't find a node. :raises KeyError: if we can't find a tag and no default is given. """ for child in self.find(tag): return child if args: return args[0] raise KeyError(tag)
[docs] def dumps_iter(self): """Iterate chunks of the packed version of this node and its children. To write to a file:: with open(path, 'wb') as fh: for chunk in node.dumps_iter(): fh.write(chunk) """ for child in self.children: for x in child.dumps_iter(): yield x
[docs]class Group(Node): """A group node in a Maya file graph.""" def __init__(self, tag, type_='FOR4', size=0, start=0): super(Group, self).__init__() #: The group type (e.g. ``FORM``, ``LIST``, ``PROP``, ``CAT``). self.type = type_ self.size = size self.start = start #: The data type. self.tag = tag self.alignment = _get_tag_alignment(self.type) self.end = self.start + self.size + _get_padding(self.size, self.alignment)
[docs] def pprint(self, _indent=0): """Print a structured representation of the group to stdout.""" print _indent * ' ' + ('%s group (%s); %d bytes for %d children:' % (self.tag, self.type, self.size, len(self.children))) for child in self.children: child.pprint(_indent=_indent + 1)
def dumps_iter(self): output = [] for child in self.children: output.extend(child.dumps_iter()) yield self.type yield struct.pack(">L", sum(len(x) for x in output) + 4) yield self.tag for x in output: yield x
[docs]class Chunk(object): def __init__(self, tag, data='', offset=None, **kwargs): self.parent = None #: The data type. self.tag = tag #: Raw binary data. self.data = data self.offset = offset for k, v in kwargs.iteritems(): setattr(self, k, v)
[docs] def pprint(self, _indent): """Print a structured representation of the node to stdout.""" encoding = tag_encoding.get(self.tag) if encoding: header = '%d bytes as %s(s)' % (len(self.data), encoding) else: header = '%d raw bytes' % len(self.data) print _indent * ' ' + ('%s; %s' % (self.tag, header)) print hexdump(self.data, self.offset, tag=self.tag, indent=(_indent + 1) * ' ').rstrip()
def __repr__(self): return '<%s %s; %d bytes>' % (self.__class__.__name__, self.tag, len(self.data)) def dumps_iter(self): yield self.tag yield struct.pack(">L", len(self.data)) yield self.data padding = _get_padding(len(self.data), self.parent.alignment) if padding: yield '\0' * padding def _unpack(self, format_char): element_size = struct.calcsize('>' + format_char) if len(self.data) % element_size: raise ValueError('%s is not multiple of %d for %r format' % (len(self.data), element_size, format_char)) format_string = '>%d%s' % (len(self.data) / element_size, format_char) unpacked = struct.unpack(format_string, self.data) return array.array(format_char, unpacked) def _pack(self, format_char, values): self.data = struct.pack('>%d%s' % (len(values), format_char), *values) @property def ints(self): """Binary data interpreted as array of unsigned integers. This is settable to an iterable of integers.""" return self._unpack('L') @ints.setter def ints(self, values): self._pack('L', values) @property def floats(self): """Binary data interpreted as array of floats. This is settable to an iterable of floats.""" return self._unpack('f') @floats.setter def floats(self, values): self._pack('f', values) @property def string(self): """Binary data interpreted as a string. This is settable with a string.""" return self.data.rstrip('\0') @string.setter def string(self, v): self.data = str(v).rstrip('\0') + '\0'
[docs]class Parser(Node): """Maya binary file parser. :param file: The file-like object to parse from; must support ``read(size)`` and ``tell()``. """ def __init__(self, file): super(Parser, self).__init__() self._file = file self._group_stack = [] self.children = [] def close(self): self._file.close()
[docs] def pprint(self, _indent=-1): """Print a structured representation of the file to stdout.""" for child in self.children: child.pprint(_indent=_indent + 1)
[docs] def parse_next(self): """Parse to the next :class:`Group` or :class:`Chunk`, returning it. This is useful when you want to head the headers of a file without loading its entire contents into memory. """ # Clean the group stack. while self._group_stack and self._group_stack[-1].end <= self._file.tell(): self._group_stack.pop(-1) # Read a tag and size from the file. tag = self._file.read(4) if not tag: return size = struct.unpack(">L", self._file.read(4))[0] if tag in _group_tags: offset = self._file.tell() group_tag = self._file.read(4) group = Group(group_tag, tag, size, offset) # Add it as a child of the current group. group_head = self._group_stack[-1] if self._group_stack else self group_head.add_child(group) self._group_stack.append(group) return group else: offset = self._file.tell() data = self._file.read(size) chunk = Chunk(tag, data, offset) assert self._group_stack, 'Data chunk outside of group.' self._group_stack[-1].add_child(chunk) # Cleanup padding. padding = _get_padding(size, self._group_stack[-1].alignment) if padding: self._file.read(padding) return chunk
[docs] def parse_all(self): """Parse the entire (remaining) file.""" while self.parse_next() is not None: pass
if __name__ == '__main__': import sys from optparse import OptionParser opt_parser = OptionParser() opt_parser.add_option('-t', '--type', action='append', default=[]) opt_parser.add_option('-n', '--no-types', action='store_true') opt_parser.add_option('-x', '--hex', action='store_true') opts, args = opt_parser.parse_args() if opts.hex: for arg in args: print hexdump(open(arg).read()) exit() if opts.no_types: tag_encoding.clear() for type_spec in opts.type: type_spec = type_spec.split(':') names = type_spec[0].split(',') if len(type_spec) == 1: for name in names: tag_encoding.pop(name, None) elif len(type_spec) == 2: for name in names: tag_encoding[name] = type_spec[1] else: raise ValueError('type spec should look like NAME:type') for arg in args: parser = Parser(open(arg)) parser.parse_all() parser.pprint()