Source code for ilpm.raw

#!/usr/bin/env python
import numpy as np
import os
import zipfile

[docs]def load_raw(fn, size=None, dtype=None):
    '''Load a raw binary file.
    The size and data type of the array will be determined automatically if
    specified correctly in the name of the file, for example:
    ``my_array.float64.100x100x200.bin``,
    which corresponds to an 100x100x200 array of double precision floats.  Note
    that the index ordering is "C" style; the fast axis is last.
    
    Optionally, the binary file may be zipped, in which case the extension
    should be ``".zip"``.
    
    Parameters
    ----------
    size : tuple (optional)
    dtype : string (optional)
        The size and data type (numpy format).  Should be specified if the file
        name is not in the required format.
        
    Returns
    -------
    array : numpy array
    '''
    
    basename, ext = os.path.splitext(fn)
    ext = ext.lower()
        
    if size is None and dtype is None:
        parts = basename.split('.')
        if parts[-1] == 'bin': parts.pop(-1)
        
        if len(parts) >= 3:
            dt, s = parts[-2:]
            
            if len(dt) > 2: dt = dt.lower() #Complex128 is (and shouldn't be) interpreted different than complex128

            try:
                dtype = np.dtype(dt)
            except TypeError:
                print "open_raw warning: '%s': didn't understand raw data type '%s', assuming 'float64'." % (fn, dt)
        
            try:
                size = map(int, s.lower().split('x'))
            except ValueError:
                print "open_raw warning: '%s': couldn't turn '%s' into a size, making 1D array." % (fn, s)
        
    if size is None: size = (-1,)
    if dtype is None: dtype = 'float64'

    if not hasattr(size, "__iter__"): size = (size, )
   
    try: dtype = np.dtype(dtype)
    except: raise ValueError("%s is not a valid numpy data type." % dtype)
    
    if ext == ".zip":
        f = zipfile.ZipFile(fn, 'r')
        fns = f.namelist()
        raw = f.read(fns[0])
        f.close()
    else:
        f = open(fn, 'rb')
        raw = f.read()
        f.close()
        
    try:
        return np.fromstring(raw, dtype=dtype).reshape(*size)
    except ValueError:
        raise ValueError("Specified type (%s) and size (%s) do not have same size as raw data (%d != %d)" % (dtype.name, 'x'.join(map(str, size)), dtype.itemsize * np.prod(size), len(raw)))
    
    
    
[docs]def save_raw(fn, x, append_info=True, compress=False):
    '''Write a raw binary file.
    
    Parameters
    ----------
    fn : string
        The filename base.  If append_info is False, it is recommended not to
        include an extension.
    array : numpy array
        The data to save.
    append_info : bool (default: True)
        If true, appends the shape, data type, and ``".bin"`` to the file name.
    compress : bool (default: False)
        Puts the resulting file in a zip archive.  Not recommended unless data
        is exptected to compress well (e.g., a sparse array).  Appends ``".zip"``
        to the final file name.
        
    Returns
    -------
    filename : string
        The complete file name, including added info.
    '''
    
    x = np.asarray(x)
    raw = x.tostring()
    
    if append_info:
        fn = os.path.splitext(fn)[0]
        fn = fn + '.%s.%s.bin' % (x.dtype.name, 'x'.join(map(str, x.shape)))
        
    if compress:
        ofn = fn
        fn = fn + '.zip'
        zf = zipfile.ZipFile(fn, 'w', compression=zipfile.ZIP_DEFLATED)
        #f = zf.open(ofn, 'wb')
        zf.writestr(ofn, raw)
        zf.close()
    
    else:    
        f = open(fn, 'wb')        
        f.write(raw)
        f.close()    
   
    return fn


if __name__ == '__main__':
    import time
    
    test = np.zeros((100, 20, 150), dtype='d')
    
    for n in range(np.prod(test.shape)//50):
        coord = tuple(map(np.random.randint, test.shape))
        test[coord] = np.random.rand()
    
    print "Array: %s, %0.1f%% non-zero" % (' x '.join(map(str, test.shape)), (test != 0).sum()/float(np.prod(test.shape)) * 100)
    
    
    print
    
    start = time.time()
    fn1 = save_raw('test1', test)
    print "Saved to: %s (%.3fs)" % (fn1, time.time()-start)
    start = time.time()
    test1 = load_raw(fn1)
    print "Loaded from: %s (%.3fs)\n   Data equal? %s" % (fn1, time.time()-start, (test1 == test).all())

    print
    start = time.time()
    fn2 = save_raw('test2', test, compress=True)
    print "Saved to: %s (%.3fs)" % (fn2, time.time()-start)
    start = time.time()
    test2 = load_raw(fn2)
    print "Loaded from: %s (%.3fs)\n   Data equal? %s" % (fn2, time.time()-start, (test2 == test).all())