Source code for ilpm.raw

#!/usr/bin/env python
import numpy as np
import os
import zipfile

[docs]def load_raw(fn, size=None, dtype=None): '''Load a raw binary file. The size and data type of the array will be determined automatically if specified correctly in the name of the file, for example: ``my_array.float64.100x100x200.bin``, which corresponds to an 100x100x200 array of double precision floats. Note that the index ordering is "C" style; the fast axis is last. Optionally, the binary file may be zipped, in which case the extension should be ``".zip"``. Parameters ---------- size : tuple (optional) dtype : string (optional) The size and data type (numpy format). Should be specified if the file name is not in the required format. Returns ------- array : numpy array ''' basename, ext = os.path.splitext(fn) ext = ext.lower() if size is None and dtype is None: parts = basename.split('.') if parts[-1] == 'bin': parts.pop(-1) if len(parts) >= 3: dt, s = parts[-2:] if len(dt) > 2: dt = dt.lower() #Complex128 is (and shouldn't be) interpreted different than complex128 try: dtype = np.dtype(dt) except TypeError: print "open_raw warning: '%s': didn't understand raw data type '%s', assuming 'float64'." % (fn, dt) try: size = map(int, s.lower().split('x')) except ValueError: print "open_raw warning: '%s': couldn't turn '%s' into a size, making 1D array." % (fn, s) if size is None: size = (-1,) if dtype is None: dtype = 'float64' if not hasattr(size, "__iter__"): size = (size, ) try: dtype = np.dtype(dtype) except: raise ValueError("%s is not a valid numpy data type." % dtype) if ext == ".zip": f = zipfile.ZipFile(fn, 'r') fns = f.namelist() raw = f.read(fns[0]) f.close() else: f = open(fn, 'rb') raw = f.read() f.close() try: return np.fromstring(raw, dtype=dtype).reshape(*size) except ValueError: raise ValueError("Specified type (%s) and size (%s) do not have same size as raw data (%d != %d)" % (dtype.name, 'x'.join(map(str, size)), dtype.itemsize * np.prod(size), len(raw)))
[docs]def save_raw(fn, x, append_info=True, compress=False): '''Write a raw binary file. Parameters ---------- fn : string The filename base. If append_info is False, it is recommended not to include an extension. array : numpy array The data to save. append_info : bool (default: True) If true, appends the shape, data type, and ``".bin"`` to the file name. compress : bool (default: False) Puts the resulting file in a zip archive. Not recommended unless data is exptected to compress well (e.g., a sparse array). Appends ``".zip"`` to the final file name. Returns ------- filename : string The complete file name, including added info. ''' x = np.asarray(x) raw = x.tostring() if append_info: fn = os.path.splitext(fn)[0] fn = fn + '.%s.%s.bin' % (x.dtype.name, 'x'.join(map(str, x.shape))) if compress: ofn = fn fn = fn + '.zip' zf = zipfile.ZipFile(fn, 'w', compression=zipfile.ZIP_DEFLATED) #f = zf.open(ofn, 'wb') zf.writestr(ofn, raw) zf.close() else: f = open(fn, 'wb') f.write(raw) f.close() return fn
if __name__ == '__main__': import time test = np.zeros((100, 20, 150), dtype='d') for n in range(np.prod(test.shape)//50): coord = tuple(map(np.random.randint, test.shape)) test[coord] = np.random.rand() print "Array: %s, %0.1f%% non-zero" % (' x '.join(map(str, test.shape)), (test != 0).sum()/float(np.prod(test.shape)) * 100) print start = time.time() fn1 = save_raw('test1', test) print "Saved to: %s (%.3fs)" % (fn1, time.time()-start) start = time.time() test1 = load_raw(fn1) print "Loaded from: %s (%.3fs)\n Data equal? %s" % (fn1, time.time()-start, (test1 == test).all()) print start = time.time() fn2 = save_raw('test2', test, compress=True) print "Saved to: %s (%.3fs)" % (fn2, time.time()-start) start = time.time() test2 = load_raw(fn2) print "Loaded from: %s (%.3fs)\n Data equal? %s" % (fn2, time.time()-start, (test2 == test).all())