Source code for ilpm.misc

#!/usr/bin/env python
import numpy as np
import time
import sys

try:
    from . import vector
except:
    import vector

def fmt_time(t):
    return '%d:%02d:%02d' % (int(t/3600.), int(t/60.)%60, int(t)%60)

#def fmt_time(t):
#    return '%d:%02d' % (int(t/60.), int(t)%60)

#------------------------------------------------------------------------------
# Class for printing pretty status counters
#------------------------------------------------------------------------------

[docs]class StatusCounter(object):
    '''A class for printing status messages for long computations.
    
    Parameters
    ----------
    count : int or iterable object
        The number of items *or* the items over which to iterate.
    msg : string
        The status message
    min_time : float, optional (default: 1./30)
        The minimum time before a new mssage is printed.  Used to prevent
        very fast updates from slowing down computations (print to stderr can
        be slow!)
    '''
    
    def __init__(self, count, msg='Calculating...', min_time=1./30):
        if hasattr(count, '__len__'):
            self.data = count
            self.count = len(count)
        elif hasattr(count, '__iter__'):
            self.data = list(count)
            self.count = len(self.data)
        else:
            self.count = count
        
        self.msg = msg
        self.current = -1
        self.start = time.time()
        self.last_time = time.time()
        self.max_len = 0
        self.min_time = min_time
        self.extra_msg = ''

        if not hasattr(self, 'data'): self.update()
    
[docs]    def message(self, msg):
        '''Change the print message during a computation.'''
        self.extra_msg = msg
    
[docs]    def print_msg(self, msg, final=False):
        '''Print the message (usually handled automatically.)'''
        if len(msg) > self.max_len: self.max_len = len(msg)
        
        msg = ('%%-%ds' % self.max_len) % msg
        
        if final: print '\r' + msg
        else: print '\r' + msg,
            
        sys.stdout.flush()        


[docs]    def update(self, extra_msg=''):
        '''Increment the counter manually (usually handlded automatically.)'''
        self.current += 1
        t = time.time()
        
        if self.current < self.count and (t - self.last_time) < self.min_time:
            return None
        
        self.last_time = t
        
        percent = int(100 * self.current / self.count + 0.5)
        
        if not extra_msg: extra_msg = self.extra_msg
        if extra_msg: extra_msg = ' [%s]' % extra_msg
        
        elapsed = t - self.start

        if self.current == self.count:
            ft = fmt_time(elapsed)
            if elapsed < 60: ft += '.%02d' % round((elapsed%1)*100)
            self.print_msg(self.msg + ' done in %s. ' % ft + extra_msg, final=True)
        elif self.current <= 0:
            self.print_msg(self.msg + ' %2d%% ' % percent + extra_msg)
        elif self.current < self.count:
            est = elapsed / self.current * (self.count - self.current)
                
            self.print_msg(self.msg + ' %2d%% (%s remaining) ' % (percent, fmt_time(est)) + extra_msg)

    def __iter__(self):
        return self
    
    def next(self):
        self.update()
        
        if self.current < self.count:
            if hasattr(self, 'data'): return self.data[self.current]
            else: return self.current
        else:
            raise StopIteration
        
        


[docs]class GroupById(object):
    '''Create an object which returns the values of objects with a given id.
    
    After it is created, behaves like a dictionary object which returns
    numpy arrays of values with varying length, where missing values return
    the value specified in ``missing``.
    
    Items may also be accessed with lists or arrays of ids.
    
        * Lists of keys return lists of numpy arrays
        * Arrays of keys return a concatenated array of the values
    

    
    Example usage:
    
    .. code-block:: python
    
        from ilpm import misc, vector

        #Create some random points
        X = (np.random.rand(100, 3) - 0.5) * 10
        
        #Our id is going to be numbered by radial shell, e.g. a point with radius
        #  r=5.2... will have id 5.
        ids = vector.mag(X).astype('i')
        
        #Make the group
        X_by_r = misc.GroupById(ids, X)
        
        #Lets see whats in there.
        for ir in sorted(X_by_r.keys()):
            X = X_by_r[ir]
            r = vector.mag(X)
            print 'key: %d, %2d values ranging from r = %.2f - %.2f' % (ir, len(r), r.min(), r.max())
    
        #What if we wanted all particles with radii from 2-5?
        ids = np.arange(2, 5)
        X = X_by_r[ids]
        r = vector.mag(X)
        print 'keys: %s, %2d values ranging from r = %.2f - %.2f' % (repr(ids), len(r), r.min(), r.max())


    Sample output::

        key: 1,  2 values ranging from r = 1.45 - 1.65
        key: 2,  2 values ranging from r = 2.35 - 2.96
        key: 3, 17 values ranging from r = 3.00 - 3.93
        key: 4, 23 values ranging from r = 4.11 - 4.99
        key: 5, 32 values ranging from r = 5.03 - 5.90
        key: 6, 18 values ranging from r = 6.05 - 6.97
        key: 7,  6 values ranging from r = 7.13 - 7.75
        keys: array([2, 3, 4]), 42 values ranging from r = 2.35 - 4.99 
    
    Parameters
    ----------
    ids : 1D numpy array
        A list of id's.
    values : (len(id), ...) numpy array, optional
        The value which correspond to each id.  If not specified, indices
        are returned instead of values (i.e. ``value = arange(len(id))``)
    missing : any object, optional (default: empty numpy array)
        The default value to return if there are no objects with this id.
        If not specified, returns an empty numpy array with the same data
        type as values.
    
    '''
    def __init__(self, ids, values=None, missing=None):
        
        if values is None: values = np.arange(len(id))
        
        #The [:0] may seem odd, but it keeps the extra dimensions and data
        #  type of values, allowing it to be concatenated with the actual
        #  values without raising errors.
        if missing is None: missing = np.empty_like(values[:0]) 

        ids = np.asarray(ids)
        if ids.ndim != 1:
            raise ValueError('ids should be 1D')
        
        si = np.argsort(ids)
        ids_s = ids[si]
        vals_s = values[si]
        
        breaks = np.where(ids_s[:-1] != ids_s[1:])[0] + 1
        #start = np.zeros(len(self.points), 'i8')
        #end = np.zeros(len(self.points), 'i8')
        
        #end[ip[breaks]] = breaks+1
        #start[ip[breaks+1]] = breaks+1
        #end[ip[breaks[-1]+1]] = len(ip)
        
        self.groups = {}
        self.missing = missing
        
        for s, e in zip(np.concatenate([[0], breaks]), np.concatenate([breaks, [len(ids)]])):
            self.groups[ids_s[s]] = vals_s[s:e]
        
        
    def __contains__(self, id):
        return id in self.groups
    
    
[docs]    def keys(self):
        '''Return the keys which have items in them.'''
        return self.groups.keys()
    
    def __getitem__(self, ids):
        if hasattr(ids, '__iter__'):
            vals = map(lambda id: self.groups.get(id, self.missing), ids)
            if isinstance(ids, np.ndarray):
                vals = np.concatenate(vals)
            return vals
        elif isinstance(ids, slice):
            raise TypeError('slice indexing not supported for GroupById objects')
        else:
            return self.groups.get(ids, self.missing)
        
BIT_MASK = (1<<20)-1


[docs]class NeighborList(object):
    '''Generate a object for finding point neighbors quickly.
    
    *Note: using this class is only faster than brute force for > 1000 points.
    However, for very large numbers of points (> 10,000) it is quite fast.*
    
    Parameters
    ----------
    cutoff : float
        The default radius to consider a particle a "neighbor".  Although
        different radii may be requested later, this sets the box width for the
        cell-list, so optimially it is the average required radius.
        *Note: the cutoff should be more than 2**20 ~ 1E6 times the total
        separation.*
    points : vector array
        Points to put in the list
    
    ''' 
    
    def __init__(self, cutoff, points):
        self.P0 = points.min(0)
        self.P1 = points.max(0)
        self.N = np.clip(np.ceil((self.P1 - self.P0)/cutoff), 1, np.inf).astype('i8')
        self.k = 1./cutoff
        self.default_r = cutoff
        
        self.points = points.copy()
        self.cell_list = GroupById(self._get_id(self.points), np.arange(len(self)))
        
    def _get_id(self, X):
        i = (X * self.k).astype('u8')
        return (i[:, 0]&BIT_MASK) + ((i[:, 1]&BIT_MASK)<<20) + ((i[:, 2]&BIT_MASK)<<40)
        
    def __len__(self):
        return len(self.points)
    
[docs]    def indices_within_r(self, P0, r=None):
        '''Return the indices of all points within a specified radius.
        
        Arguments
        ---------
        P0 : vector
        r : float, optional (default: cutoff specified on object creation)
        '''
        if r is None: r = self.default_r
        
        ns = int(np.ceil(r * self.k))

        i0, j0, k0 = (P0 * self.k).astype('u8')

        #i, j, k = np.ogrid[i0-ns:i0+ns+1, j0-ns:j0+ns+1, k0-ns:k0+ns+1]
        #ids = (i.astype('u8')&BIT_MASK) + ((j.astype('u8')&BIT_MASK)<<20) + ((k.astype('u8')&BIT_MASK)<<40)

        i = np.arange(i0-ns, i0+ns+1, dtype='u8').reshape(1, 1, -1)
        j = np.arange(j0-ns, j0+ns+1, dtype='u8').reshape(1, -1, 1)
        k = np.arange(k0-ns, k0+ns+1, dtype='u8').reshape(-1, 1, 1)
        ids = (i&BIT_MASK) + ((j&BIT_MASK)<<20) + ((k&BIT_MASK)<<40)
        
        i = self.cell_list[ids.reshape(-1)]
        
        i = i[np.where(vector.mag(self.points[i] - P0) < r)]
        
        return i

        
if __name__ == '__main__':
    print
    print '-'*80
    print ' testing GroupById'
    print '-'*80
    print

    #Create some random points
    X = (np.random.rand(100, 3) - 0.5) * 10
    
    #Our id is going to be numbered by radial shell, e.g. a point with radius
    #  r=5.2... will have id 5.
    ids = vector.mag(X).astype('i')
    
    #Make the group
    X_by_r = GroupById(ids, X)
    
    #Lets see whats in there.
    for ir in sorted(X_by_r.keys()):
        X = X_by_r[ir]
        r = vector.mag(X)
        print 'key: %d, %2d values ranging from r = %.2f - %.2f' % (ir, len(r), r.min(), r.max())

    #What if we wanted all particles with radii from 2-5?
    ids = np.arange(2, 5)
    X = X_by_r[ids]
    r = vector.mag(X)
    print 'keys: %s, %2d values ranging from r = %.2f - %.2f' % (repr(ids), len(r), r.min(), r.max())
    
    print
    print '-'*80
    print ' testing StatusCounter'
    print '-'*80
    print
    
    
    #These are the values over which we are going to compute stuff.
    values = np.linspace(0, 1.0, 10000)
    
    #Use the status counter as an iterator -- this automatically prints the
    #  status message as we loop!
    for val in StatusCounter(values, 'Squaring stuff...'):
        square = val**2 #Do something
        time.sleep(1E-3) #This something is too fast, so lets slow it.
        if val > 0.5: break
    
    #Create it as a named object so we can change the print message
    stat = StatusCounter(values, 'Squaring stuff...')


    for val in stat:
        square = val**2 #Do something
        stat.message('%.3f -> %.3f' % (val, square)) #Print the result as we compute.
        time.sleep(1E-3) #This something is too fast, so lets slow it.
        
    print