Source code for pyec.distribution.boltzmann.rbm

from numpy import *
from pyec.distribution.basic import Distribution
from pyec.util.TernaryString import TernaryString
from time import time
from sample import *
import numpy.linalg
import gc

[docs]class rbm(Distribution): """ a binary rbm """ center = 0.5 scale = 0.5 def __init__(self, vsize, hsize, lr=0.01, mo=0.9): self.vsize = vsize self.hsize = hsize self.dim = vsize + hsize self.rate = lr self.momentum = mo self.w = random.standard_cauchy((vsize, hsize)) / vsize / hsize self.bv = random.standard_cauchy(vsize) / vsize / hsize self.bh = random.standard_cauchy(hsize) / hsize / vsize self.wc = zeros((vsize,hsize)) self.bvc = zeros(vsize) self.bhc = zeros(hsize) self.wg = self.w.copy() self.bvg = self.bv.copy() self.bhg = self.bh.copy() self.wcg = zeros((vsize,hsize)) self.bvcg = zeros(vsize) self.bhcg = zeros(hsize) self.sampler = RBMSimulatedTempering(1000) self.samplerAlt = RBMGibbsSampler(1000, 100) self.epoch = 0 self.batchSize = 100 def __call__(self, x): return -self.energy(x)
[docs] def energy(self, x, useAlternate=False): """ compute the energy function """ if useAlternate: return self.energy2(x) v = x[:self.vsize].toArray(self.vsize) h = x[self.vsize:self.vsize+self.hsize].toArray(self.hsize) ret = -(dot(v, dot(self.w, h)) + dot(v, self.bv) + dot(h, self.bh)) return ret
[docs] def energy2(self, x): """ compute the energy function """ v = x[:self.vsize].toArray(self.vsize) h = x[self.vsize:self.vsize+self.hsize].toArray(self.hsize) ret = -(dot(v, dot(self.wg, h)) + dot(v, self.bvg) + dot(h, self.bhg)) return ret
[docs] def partition(self): """ compute the partition function - only for small dimension !!! """ from pyec.util.TernaryString import TernaryString total = 0 vsize = self.vsize hsize = self.hsize all = (1L << (vsize+hsize)) - 1L for i in xrange(1 << (vsize+hsize)): total += exp(self.__call__(TernaryString(long(i), all))) return total
def scoreSample(self, sample, Z=1.0): return [(x, exp(-self.__call_(x))/Z) for x in sample] def batch(self, size): return self.sampler(self, size)
[docs] def bucket(self, sample, Z=1.0): """build a dictionary containing a histogram""" d = {} size = len(sample) incr = 1.0 / size for x in sample: y = str(x) if d.has_key(y): d[y][0] += incr else: d[y] = [incr, exp(-self.__call__(x))/Z] return d
def complete(self, data, sample=True): completed = [] for v in data: x = zeros(self.vsize + self.hsize) x[:self.vsize] = v h = dot(v, self.w) + self.bh if sample: x[self.vsize:] = random.binomial(1, 1. / (1. + exp(-h)), self.hsize) completed.append(TernaryString.fromArray(x)) else: x[self.vsize:] = 1. / (1. + exp(-h)) completed.append(x) return completed def completeV(self, data, sample=True): completed = [] for h in data: h2 = h.toArray(self.bh.size) x = zeros(self.vsize + self.hsize) x[self.vsize:] = h2 v = dot(self.w, h2) + self.bv if sample: x[:self.vsize] = random.binomial(1, 1. / (1. + exp(-v)), self.vsize) completed.append(TernaryString.fromArray(x)) else: x[:self.vsize] = 1. / (1. + exp(-v)) completed.append(x) return completed def complete2(self, data): completed = [] for v in data: x = zeros(self.vsize + self.hsize) x[:self.vsize] = v h = dot(v, self.wg) + self.bhg x[self.vsize:] = 1. / (1. + exp(-h)) completed.append(TernaryString.fromArray(x)) return completed def logistic(self, x): x = minimum(maximum(x, -10.), 10.) return 1. / (1. + exp(-x)) def correlate(self, data): ws = zeros((self.vsize, self.hsize)) vs = zeros(self.vsize) hs = zeros(self.hsize) for d in data: x = d.toArray(self.vsize + self.hsize) v = x[:self.vsize] h = x[self.vsize:] ws += outer(v,h) / len(data) vs += v / len(data) hs += h / len(data) return ws, vs, hs def train(self, n): from pyec.util.partitions import ScoreTree, Partition, Point from pyec.trainer import RunStats stats = RunStats() stats.recording = False numBatches = len(self.data) / self.batchSize lr = 0.001 / ((n/numBatches+1.)**2) current = self.epoch % numBatches start = current * self.batchSize end = start + self.batchSize data = self.data[start:end] completed = self.complete(data) energy = sum([self.energy(d) for d in completed]) / len(completed) print "Energy of data: ", energy#, " v ", energy2, "\n\n" sampled = self.sampler.batch(self.batchSize) energys = sum([self.energy(d) for d in sampled]) / len(sampled) print "Energy of sample: ", energys for point in completed: gp = Point(point=None, bayes=None, binary=point, score=-self.energy(point), count=1, segment=self.sampler.selectors[-1].segment) gp.save() try: Partition.objects.separate(gp, self.sampler.config, stats) ScoreTree.objects.insert(gp, self.sampler.config, stats) except: gp.alive = False gp.save() wcb = abs(self.w).sum() dw, dv, dh = self.correlate(completed) mw, mv, mh = self.correlate(sampled) diffw = dw - mw diffv = dv - mv diffh = dh - mh self.wc += (1 - self.momentum) * lr * diffw self.bvc += (1 - self.momentum) * lr * diffv self.bhc += (1 - self.momentum) * lr * diffh self.w += self.wc self.bv += self.bvc self.bh += self.bhc self.wc *= self.momentum self.bhc *= self.momentum self.bvc *= self.momentum print "scale of deriv: ", average(diffw), average(diffv), average(diffh) """ gw, gv, gh = self.correlate(g) dw, dv, dh = self.correlate(c2) self.wcg += (1 - self.momentum) * self.rate * (dw - gw) self.bvcg += (1 - self.momentum) * self.rate * (dv - gv) self.bhcg += (1 - self.momentum) * self.rate * (dh - gh) self.wg += self.wc self.bvg += self.bvc self.bhg += self.bhc self.wcg *= self.momentum self.bhcg *= self.momentum self.bvcg *= self.momentum """ self.sampler.completeTraining(self, n) def meanFieldUp(self, n, vs): wt = self.w.transpose() hs = random.random_sample((n,self.hsize)) for j in xrange(n): for i in xrange(25): hs[j] = self.logistic(dot(wt, vs[j]) + self.bh) gc.collect() return hs def meanFieldDown(self, n, hs): vs = random.random_sample((n,self.vsize)) for j in xrange(n): for i in xrange(25): vs[j] = self.logistic(dot(self.w, hs[j]) + self.bv) gc.collect() return vs def updateChains(self, nchains, vchains, hchains, nsteps): wt = self.w.transpose() for i in xrange(nchains): for j in xrange(nsteps): nvp = self.bv.copy() nvp += dot(self.w,hchains[i]) nvp = self.logistic(nvp) vchains[i] = random.binomial(1, nvp, self.vsize) nhp = self.bh.copy() nhp += dot(wt, vchains[i]) nhp = self.logistic(nhp) hchains[i] = random.binomial(1, nhp, self.hsize) return vchains, hchains def postTrain(self, vs, hs): pass
[docs] def trainAutonomous(self, data, epochs, nchains=100, nsteps=1): """ data is 3-d d1 = batch num d2 = example num d3 = input index """ hchains = random.random_sample((nchains, self.hsize)).round() vchains = random.random_sample((nchains, self.vsize)).round() vchains, hchains = self.updateChains(nchains, vchains, hchains, 1000) mo = 0.5 for i in xrange(epochs): if i > 5: mo = 0.9 lr = self.rate / (i+1.) err = 0.0 for k, vs in enumerate(data): n = shape(vs)[0] hs = self.meanFieldUp(n, vs) #gvs = self.meanFieldDown(n, hs) #err += sqrt((abs(vs - gvs) ** 2).sum(axis=1)).sum() / len(data) sn = sqrt(n) vsn = vs / sn hsn = hs / sn # compute correlation matrices ws = tensordot(vsn, hsn, axes=(0,0)) bvs = vs.sum(axis=0) / n bhs = hs.sum(axis=0) / n # update the gibbs chains vchains, hchains = self.updateChains(nchains, vchains, hchains, nsteps) # compute sample correlations snchains = sqrt(nchains) vchainsn = vchains / snchains hchainsn = hchains / snchains ws2 = tensordot(vchainsn, hchainsn, axes=(0,0)) bvs2 = vchains.sum(axis=0) / nchains bhs2 = hchains.sum(axis=0) / nchains # compute gradient dw = ws - ws2 self.wc += lr * dw self.bvc += lr * (bvs - bvs2) self.bhc += lr * (bhs - bhs2) self.w += self.wc self.bv += self.bvc self.bh += self.bhc self.wc *= mo self.bv *= mo self.bh *= mo err += (abs(dw).sum() / self.vsize / self.hsize) / 3. self.postTrain(vs, hs) gc.collect() print "\tBatch: ", i, k, ": ", err / (k+1.) print "Epoch ", i, ": ", err / len(data)
class rbmGL(rbm): def __init__(self, vsize, hsize): super(rbmGL, self).__init__(vsize, hsize) self.vsig = 0.5 * ones(vsize) def energy(self, x, useAlternate=False): """ compute the energy function """ v = x[:self.vsize].toArray(self.vsize) h = x[self.vsize:self.vsize+self.hsize].toArray(self.hsize) extra = ((self.vsig * v) ** 2).sum() ret = -(extra + dot(v, dot(self.w, h)) + dot(v, self.bv) + dot(h, self.bh)) return ret def meanFieldDown(self, n, hs): vs = random.random_sample((n,self.vsize)) for j in xrange(n): for i in xrange(25): vs[j] = dot(self.w, hs[j]) + self.bv vs[j] = vs[j] * (self.vsig ** 2) + random.randn(self.vsize) * self.vsig gc.collect() return vs def updateChains(self, nchains, vchains, hchains, nsteps): wt = self.w.transpose() for i in xrange(nchains): for j in xrange(nsteps): nvp = self.bv.copy() nvp += dot(self.w,hchains[i]) nvp *= self.vsig ** 2 vchains[i] = nvp + random.randn(self.vsize) * self.vsig nhp = self.bh.copy() nhp += dot(wt, vchains[i]) nhp = self.logistic(nhp) hchains[i] = random.binomial(1, nhp, self.hsize) return vchains, hchains def postTrain(self, vs, hs): # update variance gvs = self.meanFieldDown(shape(vs)[0], hs) var = sqrt(((vs - gvs) ** 2).sum(axis=0) / shape(vs)[0]) self.vsig = 0.95 * self.vsig + 0.05 * var class DeepBeliefNet(object): def __init__(self, sizes): self.sizes = sizes self.stack = [] def wrap(self, data): wrapped = [] for vs in data: for i, r in enumerate(self.stack): nvs = r.meanFieldUp(shape(vs)[0], vs) if i != 0: del vs vs = nvs wrapped.append(vs) return wrapped def train(self, depth, data, epochs, nchains=100, nsteps=5): while len(self.stack) < depth: print "Training at depth ", len(self.stack) vsize = self.sizes[len(self.stack)] hsize = self.sizes[len(self.stack)+1] if len(self.stack) > 0: lsize = self.sizes[len(self.stack)-1] r = rbm(vsize, hsize) # need to invert (lsize,vsize) to (vsize,hsize) # either remove columns or pad with 0s r.bv = self.stack[-1].bh if lsize < hsize: r.w = zeros((vsize, hsize)) r.bh = zeros(hsize) r.w[:, :lsize] = self.stack[-1].w.transpose() r.bh[:lsize] = self.stack[-1].bv else: #take the middle low = (lsize - hsize) / 2 high = low + hsize r.w = self.stack[-1].w.transpose()[:,low:high] r.bh = self.stack[-1].bv[low:high] else: r = rbm(vsize, hsize) wrapped = self.wrap(data) r.trainAutonomous(wrapped, epochs, nchains, nsteps) del wrapped gc.collect() self.stack.append(r) print "Trained ", depth, "levels"