#!/usr/bin/env python
import numpy as np
import os
import zipfile
[docs]def load_raw(fn, size=None, dtype=None):
'''Load a raw binary file.
The size and data type of the array will be determined automatically if
specified correctly in the name of the file, for example:
``my_array.float64.100x100x200.bin``,
which corresponds to an 100x100x200 array of double precision floats. Note
that the index ordering is "C" style; the fast axis is last.
Optionally, the binary file may be zipped, in which case the extension
should be ``".zip"``.
Parameters
----------
size : tuple (optional)
dtype : string (optional)
The size and data type (numpy format). Should be specified if the file
name is not in the required format.
Returns
-------
array : numpy array
'''
basename, ext = os.path.splitext(fn)
ext = ext.lower()
if size is None and dtype is None:
parts = basename.split('.')
if parts[-1] == 'bin': parts.pop(-1)
if len(parts) >= 3:
dt, s = parts[-2:]
if len(dt) > 2: dt = dt.lower() #Complex128 is (and shouldn't be) interpreted different than complex128
try:
dtype = np.dtype(dt)
except TypeError:
print "open_raw warning: '%s': didn't understand raw data type '%s', assuming 'float64'." % (fn, dt)
try:
size = map(int, s.lower().split('x'))
except ValueError:
print "open_raw warning: '%s': couldn't turn '%s' into a size, making 1D array." % (fn, s)
if size is None: size = (-1,)
if dtype is None: dtype = 'float64'
if not hasattr(size, "__iter__"): size = (size, )
try: dtype = np.dtype(dtype)
except: raise ValueError("%s is not a valid numpy data type." % dtype)
if ext == ".zip":
f = zipfile.ZipFile(fn, 'r')
fns = f.namelist()
raw = f.read(fns[0])
f.close()
else:
f = open(fn, 'rb')
raw = f.read()
f.close()
try:
return np.fromstring(raw, dtype=dtype).reshape(*size)
except ValueError:
raise ValueError("Specified type (%s) and size (%s) do not have same size as raw data (%d != %d)" % (dtype.name, 'x'.join(map(str, size)), dtype.itemsize * np.prod(size), len(raw)))
[docs]def save_raw(fn, x, append_info=True, compress=False):
'''Write a raw binary file.
Parameters
----------
fn : string
The filename base. If append_info is False, it is recommended not to
include an extension.
array : numpy array
The data to save.
append_info : bool (default: True)
If true, appends the shape, data type, and ``".bin"`` to the file name.
compress : bool (default: False)
Puts the resulting file in a zip archive. Not recommended unless data
is exptected to compress well (e.g., a sparse array). Appends ``".zip"``
to the final file name.
Returns
-------
filename : string
The complete file name, including added info.
'''
x = np.asarray(x)
raw = x.tostring()
if append_info:
fn = os.path.splitext(fn)[0]
fn = fn + '.%s.%s.bin' % (x.dtype.name, 'x'.join(map(str, x.shape)))
if compress:
ofn = fn
fn = fn + '.zip'
zf = zipfile.ZipFile(fn, 'w', compression=zipfile.ZIP_DEFLATED)
#f = zf.open(ofn, 'wb')
zf.writestr(ofn, raw)
zf.close()
else:
f = open(fn, 'wb')
f.write(raw)
f.close()
return fn
if __name__ == '__main__':
import time
test = np.zeros((100, 20, 150), dtype='d')
for n in range(np.prod(test.shape)//50):
coord = tuple(map(np.random.randint, test.shape))
test[coord] = np.random.rand()
print "Array: %s, %0.1f%% non-zero" % (' x '.join(map(str, test.shape)), (test != 0).sum()/float(np.prod(test.shape)) * 100)
print
start = time.time()
fn1 = save_raw('test1', test)
print "Saved to: %s (%.3fs)" % (fn1, time.time()-start)
start = time.time()
test1 = load_raw(fn1)
print "Loaded from: %s (%.3fs)\n Data equal? %s" % (fn1, time.time()-start, (test1 == test).all())
print
start = time.time()
fn2 = save_raw('test2', test, compress=True)
print "Saved to: %s (%.3fs)" % (fn2, time.time()-start)
start = time.time()
test2 = load_raw(fn2)
print "Loaded from: %s (%.3fs)\n Data equal? %s" % (fn2, time.time()-start, (test2 == test).all())