You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

random.py 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663
  1. """Random variable generators.
  2. integers
  3. --------
  4. uniform within range
  5. sequences
  6. ---------
  7. pick random element
  8. generate random permutation
  9. distributions on the real line:
  10. ------------------------------
  11. uniform
  12. normal (Gaussian)
  13. lognormal
  14. negative exponential
  15. gamma
  16. beta
  17. distributions on the circle (angles 0 to 2pi)
  18. ---------------------------------------------
  19. circular uniform
  20. von Mises
  21. Translated from anonymously contributed C/C++ source.
  22. Multi-threading note: the random number generator used here is not thread-
  23. safe; it is possible that two calls return the same random value. However,
  24. you can instantiate a different instance of Random() in each thread to get
  25. generators that don't share state, then use .setstate() and .jumpahead() to
  26. move the generators to disjoint segments of the full period. For example,
  27. def create_generators(num, delta, firstseed=None):
  28. ""\"Return list of num distinct generators.
  29. Each generator has its own unique segment of delta elements from
  30. Random.random()'s full period.
  31. Seed the first generator with optional arg firstseed (default is
  32. None, to seed from current time).
  33. ""\"
  34. from random import Random
  35. g = Random(firstseed)
  36. result = [g]
  37. for i in range(num - 1):
  38. laststate = g.getstate()
  39. g = Random()
  40. g.setstate(laststate)
  41. g.jumpahead(delta)
  42. result.append(g)
  43. return result
  44. gens = create_generators(10, 1000000)
  45. That creates 10 distinct generators, which can be passed out to 10 distinct
  46. threads. The generators don't share state so can be called safely in
  47. parallel. So long as no thread calls its g.random() more than a million
  48. times (the second argument to create_generators), the sequences seen by
  49. each thread will not overlap.
  50. The period of the underlying Wichmann-Hill generator is 6,953,607,871,644,
  51. and that limits how far this technique can be pushed.
  52. Just for fun, note that since we know the period, .jumpahead() can also be
  53. used to "move backward in time":
  54. >>> g = Random(42) # arbitrary
  55. >>> g.random()
  56. 0.25420336316883324
  57. >>> g.jumpahead(6953607871644L - 1) # move *back* one
  58. >>> g.random()
  59. 0.25420336316883324
  60. """
  61. # XXX The docstring sucks.
  62. from math import log as _log, exp as _exp, pi as _pi, e as _e
  63. from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
  64. __all__ = ["Random","seed","random","uniform","randint","choice",
  65. "randrange","shuffle","normalvariate","lognormvariate",
  66. "cunifvariate","expovariate","vonmisesvariate","gammavariate",
  67. "stdgamma","gauss","betavariate","paretovariate","weibullvariate",
  68. "getstate","setstate","jumpahead","whseed"]
  69. def _verify(name, computed, expected):
  70. if abs(computed - expected) > 1e-7:
  71. raise ValueError(
  72. "computed value for %s deviates too much "
  73. "(computed %g, expected %g)" % (name, computed, expected))
  74. NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0)
  75. _verify('NV_MAGICCONST', NV_MAGICCONST, 1.71552776992141)
  76. TWOPI = 2.0*_pi
  77. _verify('TWOPI', TWOPI, 6.28318530718)
  78. LOG4 = _log(4.0)
  79. _verify('LOG4', LOG4, 1.38629436111989)
  80. SG_MAGICCONST = 1.0 + _log(4.5)
  81. _verify('SG_MAGICCONST', SG_MAGICCONST, 2.50407739677627)
  82. del _verify
  83. # Translated by Guido van Rossum from C source provided by
  84. # Adrian Baddeley.
  85. class Random:
  86. VERSION = 1 # used by getstate/setstate
  87. def __init__(self, x=None):
  88. """Initialize an instance.
  89. Optional argument x controls seeding, as for Random.seed().
  90. """
  91. self.seed(x)
  92. self.gauss_next = None
  93. ## -------------------- core generator -------------------
  94. # Specific to Wichmann-Hill generator. Subclasses wishing to use a
  95. # different core generator should override the seed(), random(),
  96. # getstate(), setstate() and jumpahead() methods.
  97. def seed(self, a=None):
  98. """Initialize internal state from hashable object.
  99. None or no argument seeds from current time.
  100. If a is not None or an int or long, hash(a) is used instead.
  101. If a is an int or long, a is used directly. Distinct values between
  102. 0 and 27814431486575L inclusive are guaranteed to yield distinct
  103. internal states (this guarantee is specific to the default
  104. Wichmann-Hill generator).
  105. """
  106. if a is None:
  107. # Initialize from current time
  108. import time
  109. a = long(time.time() * 256)
  110. if type(a) not in (type(3), type(3L)):
  111. a = hash(a)
  112. a, x = divmod(a, 30268)
  113. a, y = divmod(a, 30306)
  114. a, z = divmod(a, 30322)
  115. self._seed = int(x)+1, int(y)+1, int(z)+1
  116. def random(self):
  117. """Get the next random number in the range [0.0, 1.0)."""
  118. # Wichman-Hill random number generator.
  119. #
  120. # Wichmann, B. A. & Hill, I. D. (1982)
  121. # Algorithm AS 183:
  122. # An efficient and portable pseudo-random number generator
  123. # Applied Statistics 31 (1982) 188-190
  124. #
  125. # see also:
  126. # Correction to Algorithm AS 183
  127. # Applied Statistics 33 (1984) 123
  128. #
  129. # McLeod, A. I. (1985)
  130. # A remark on Algorithm AS 183
  131. # Applied Statistics 34 (1985),198-200
  132. # This part is thread-unsafe:
  133. # BEGIN CRITICAL SECTION
  134. x, y, z = self._seed
  135. x = (171 * x) % 30269
  136. y = (172 * y) % 30307
  137. z = (170 * z) % 30323
  138. self._seed = x, y, z
  139. # END CRITICAL SECTION
  140. # Note: on a platform using IEEE-754 double arithmetic, this can
  141. # never return 0.0 (asserted by Tim; proof too long for a comment).
  142. return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0
  143. def getstate(self):
  144. """Return internal state; can be passed to setstate() later."""
  145. return self.VERSION, self._seed, self.gauss_next
  146. def setstate(self, state):
  147. """Restore internal state from object returned by getstate()."""
  148. version = state[0]
  149. if version == 1:
  150. version, self._seed, self.gauss_next = state
  151. else:
  152. raise ValueError("state with version %s passed to "
  153. "Random.setstate() of version %s" %
  154. (version, self.VERSION))
  155. def jumpahead(self, n):
  156. """Act as if n calls to random() were made, but quickly.
  157. n is an int, greater than or equal to 0.
  158. Example use: If you have 2 threads and know that each will
  159. consume no more than a million random numbers, create two Random
  160. objects r1 and r2, then do
  161. r2.setstate(r1.getstate())
  162. r2.jumpahead(1000000)
  163. Then r1 and r2 will use guaranteed-disjoint segments of the full
  164. period.
  165. """
  166. if not n >= 0:
  167. raise ValueError("n must be >= 0")
  168. x, y, z = self._seed
  169. x = int(x * pow(171, n, 30269)) % 30269
  170. y = int(y * pow(172, n, 30307)) % 30307
  171. z = int(z * pow(170, n, 30323)) % 30323
  172. self._seed = x, y, z
  173. def __whseed(self, x=0, y=0, z=0):
  174. """Set the Wichmann-Hill seed from (x, y, z).
  175. These must be integers in the range [0, 256).
  176. """
  177. if not type(x) == type(y) == type(z) == type(0):
  178. raise TypeError('seeds must be integers')
  179. if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256):
  180. raise ValueError('seeds must be in range(0, 256)')
  181. if 0 == x == y == z:
  182. # Initialize from current time
  183. import time
  184. t = long(time.time() * 256)
  185. t = int((t&0xffffff) ^ (t>>24))
  186. t, x = divmod(t, 256)
  187. t, y = divmod(t, 256)
  188. t, z = divmod(t, 256)
  189. # Zero is a poor seed, so substitute 1
  190. self._seed = (x or 1, y or 1, z or 1)
  191. def whseed(self, a=None):
  192. """Seed from hashable object's hash code.
  193. None or no argument seeds from current time. It is not guaranteed
  194. that objects with distinct hash codes lead to distinct internal
  195. states.
  196. This is obsolete, provided for compatibility with the seed routine
  197. used prior to Python 2.1. Use the .seed() method instead.
  198. """
  199. if a is None:
  200. self.__whseed()
  201. return
  202. a = hash(a)
  203. a, x = divmod(a, 256)
  204. a, y = divmod(a, 256)
  205. a, z = divmod(a, 256)
  206. x = (x + a) % 256 or 1
  207. y = (y + a) % 256 or 1
  208. z = (z + a) % 256 or 1
  209. self.__whseed(x, y, z)
  210. ## ---- Methods below this point do not need to be overridden when
  211. ## ---- subclassing for the purpose of using a different core generator.
  212. ## -------------------- pickle support -------------------
  213. def __getstate__(self): # for pickle
  214. return self.getstate()
  215. def __setstate__(self, state): # for pickle
  216. self.setstate(state)
  217. ## -------------------- integer methods -------------------
  218. def randrange(self, start, stop=None, step=1, int=int, default=None):
  219. """Choose a random item from range(start, stop[, step]).
  220. This fixes the problem with randint() which includes the
  221. endpoint; in Python this is usually not what you want.
  222. Do not supply the 'int' and 'default' arguments.
  223. """
  224. # This code is a bit messy to make it fast for the
  225. # common case while still doing adequate error checking
  226. istart = int(start)
  227. if istart != start:
  228. raise ValueError, "non-integer arg 1 for randrange()"
  229. if stop is default:
  230. if istart > 0:
  231. return int(self.random() * istart)
  232. raise ValueError, "empty range for randrange()"
  233. istop = int(stop)
  234. if istop != stop:
  235. raise ValueError, "non-integer stop for randrange()"
  236. if step == 1:
  237. if istart < istop:
  238. return istart + int(self.random() *
  239. (istop - istart))
  240. raise ValueError, "empty range for randrange()"
  241. istep = int(step)
  242. if istep != step:
  243. raise ValueError, "non-integer step for randrange()"
  244. if istep > 0:
  245. n = (istop - istart + istep - 1) / istep
  246. elif istep < 0:
  247. n = (istop - istart + istep + 1) / istep
  248. else:
  249. raise ValueError, "zero step for randrange()"
  250. if n <= 0:
  251. raise ValueError, "empty range for randrange()"
  252. return istart + istep*int(self.random() * n)
  253. def randint(self, a, b):
  254. """Return random integer in range [a, b], including both end points.
  255. (Deprecated; use randrange(a, b+1).)
  256. """
  257. return self.randrange(a, b+1)
  258. ## -------------------- sequence methods -------------------
  259. def choice(self, seq):
  260. """Choose a random element from a non-empty sequence."""
  261. return seq[int(self.random() * len(seq))]
  262. def shuffle(self, x, random=None, int=int):
  263. """x, random=random.random -> shuffle list x in place; return None.
  264. Optional arg random is a 0-argument function returning a random
  265. float in [0.0, 1.0); by default, the standard random.random.
  266. Note that for even rather small len(x), the total number of
  267. permutations of x is larger than the period of most random number
  268. generators; this implies that "most" permutations of a long
  269. sequence can never be generated.
  270. """
  271. if random is None:
  272. random = self.random
  273. for i in xrange(len(x)-1, 0, -1):
  274. # pick an element in x[:i+1] with which to exchange x[i]
  275. j = int(random() * (i+1))
  276. x[i], x[j] = x[j], x[i]
  277. ## -------------------- real-valued distributions -------------------
  278. ## -------------------- uniform distribution -------------------
  279. def uniform(self, a, b):
  280. """Get a random number in the range [a, b)."""
  281. return a + (b-a) * self.random()
  282. ## -------------------- normal distribution --------------------
  283. def normalvariate(self, mu, sigma):
  284. # mu = mean, sigma = standard deviation
  285. # Uses Kinderman and Monahan method. Reference: Kinderman,
  286. # A.J. and Monahan, J.F., "Computer generation of random
  287. # variables using the ratio of uniform deviates", ACM Trans
  288. # Math Software, 3, (1977), pp257-260.
  289. random = self.random
  290. while 1:
  291. u1 = random()
  292. u2 = random()
  293. z = NV_MAGICCONST*(u1-0.5)/u2
  294. zz = z*z/4.0
  295. if zz <= -_log(u2):
  296. break
  297. return mu + z*sigma
  298. ## -------------------- lognormal distribution --------------------
  299. def lognormvariate(self, mu, sigma):
  300. return _exp(self.normalvariate(mu, sigma))
  301. ## -------------------- circular uniform --------------------
  302. def cunifvariate(self, mean, arc):
  303. # mean: mean angle (in radians between 0 and pi)
  304. # arc: range of distribution (in radians between 0 and pi)
  305. return (mean + arc * (self.random() - 0.5)) % _pi
  306. ## -------------------- exponential distribution --------------------
  307. def expovariate(self, lambd):
  308. # lambd: rate lambd = 1/mean
  309. # ('lambda' is a Python reserved word)
  310. random = self.random
  311. u = random()
  312. while u <= 1e-7:
  313. u = random()
  314. return -_log(u)/lambd
  315. ## -------------------- von Mises distribution --------------------
  316. def vonmisesvariate(self, mu, kappa):
  317. # mu: mean angle (in radians between 0 and 2*pi)
  318. # kappa: concentration parameter kappa (>= 0)
  319. # if kappa = 0 generate uniform random angle
  320. # Based upon an algorithm published in: Fisher, N.I.,
  321. # "Statistical Analysis of Circular Data", Cambridge
  322. # University Press, 1993.
  323. # Thanks to Magnus Kessler for a correction to the
  324. # implementation of step 4.
  325. random = self.random
  326. if kappa <= 1e-6:
  327. return TWOPI * random()
  328. a = 1.0 + _sqrt(1.0 + 4.0 * kappa * kappa)
  329. b = (a - _sqrt(2.0 * a))/(2.0 * kappa)
  330. r = (1.0 + b * b)/(2.0 * b)
  331. while 1:
  332. u1 = random()
  333. z = _cos(_pi * u1)
  334. f = (1.0 + r * z)/(r + z)
  335. c = kappa * (r - f)
  336. u2 = random()
  337. if not (u2 >= c * (2.0 - c) and u2 > c * _exp(1.0 - c)):
  338. break
  339. u3 = random()
  340. if u3 > 0.5:
  341. theta = (mu % TWOPI) + _acos(f)
  342. else:
  343. theta = (mu % TWOPI) - _acos(f)
  344. return theta
  345. ## -------------------- gamma distribution --------------------
  346. def gammavariate(self, alpha, beta):
  347. # beta times standard gamma
  348. ainv = _sqrt(2.0 * alpha - 1.0)
  349. return beta * self.stdgamma(alpha, ainv, alpha - LOG4, alpha + ainv)
  350. def stdgamma(self, alpha, ainv, bbb, ccc):
  351. # ainv = sqrt(2 * alpha - 1)
  352. # bbb = alpha - log(4)
  353. # ccc = alpha + ainv
  354. random = self.random
  355. if alpha <= 0.0:
  356. raise ValueError, 'stdgamma: alpha must be > 0.0'
  357. if alpha > 1.0:
  358. # Uses R.C.H. Cheng, "The generation of Gamma
  359. # variables with non-integral shape parameters",
  360. # Applied Statistics, (1977), 26, No. 1, p71-74
  361. while 1:
  362. u1 = random()
  363. u2 = random()
  364. v = _log(u1/(1.0-u1))/ainv
  365. x = alpha*_exp(v)
  366. z = u1*u1*u2
  367. r = bbb+ccc*v-x
  368. if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z):
  369. return x
  370. elif alpha == 1.0:
  371. # expovariate(1)
  372. u = random()
  373. while u <= 1e-7:
  374. u = random()
  375. return -_log(u)
  376. else: # alpha is between 0 and 1 (exclusive)
  377. # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle
  378. while 1:
  379. u = random()
  380. b = (_e + alpha)/_e
  381. p = b*u
  382. if p <= 1.0:
  383. x = pow(p, 1.0/alpha)
  384. else:
  385. # p > 1
  386. x = -_log((b-p)/alpha)
  387. u1 = random()
  388. if not (((p <= 1.0) and (u1 > _exp(-x))) or
  389. ((p > 1) and (u1 > pow(x, alpha - 1.0)))):
  390. break
  391. return x
  392. ## -------------------- Gauss (faster alternative) --------------------
  393. def gauss(self, mu, sigma):
  394. # When x and y are two variables from [0, 1), uniformly
  395. # distributed, then
  396. #
  397. # cos(2*pi*x)*sqrt(-2*log(1-y))
  398. # sin(2*pi*x)*sqrt(-2*log(1-y))
  399. #
  400. # are two *independent* variables with normal distribution
  401. # (mu = 0, sigma = 1).
  402. # (Lambert Meertens)
  403. # (corrected version; bug discovered by Mike Miller, fixed by LM)
  404. # Multithreading note: When two threads call this function
  405. # simultaneously, it is possible that they will receive the
  406. # same return value. The window is very small though. To
  407. # avoid this, you have to use a lock around all calls. (I
  408. # didn't want to slow this down in the serial case by using a
  409. # lock here.)
  410. random = self.random
  411. z = self.gauss_next
  412. self.gauss_next = None
  413. if z is None:
  414. x2pi = random() * TWOPI
  415. g2rad = _sqrt(-2.0 * _log(1.0 - random()))
  416. z = _cos(x2pi) * g2rad
  417. self.gauss_next = _sin(x2pi) * g2rad
  418. return mu + z*sigma
  419. ## -------------------- beta --------------------
  420. ## See
  421. ## http://sourceforge.net/bugs/?func=detailbug&bug_id=130030&group_id=5470
  422. ## for Ivan Frohne's insightful analysis of why the original implementation:
  423. ##
  424. ## def betavariate(self, alpha, beta):
  425. ## # Discrete Event Simulation in C, pp 87-88.
  426. ##
  427. ## y = self.expovariate(alpha)
  428. ## z = self.expovariate(1.0/beta)
  429. ## return z/(y+z)
  430. ##
  431. ## was dead wrong, and how it probably got that way.
  432. def betavariate(self, alpha, beta):
  433. # This version due to Janne Sinkkonen, and matches all the std
  434. # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution").
  435. y = self.gammavariate(alpha, 1.)
  436. if y == 0:
  437. return 0.0
  438. else:
  439. return y / (y + self.gammavariate(beta, 1.))
  440. ## -------------------- Pareto --------------------
  441. def paretovariate(self, alpha):
  442. # Jain, pg. 495
  443. u = self.random()
  444. return 1.0 / pow(u, 1.0/alpha)
  445. ## -------------------- Weibull --------------------
  446. def weibullvariate(self, alpha, beta):
  447. # Jain, pg. 499; bug fix courtesy Bill Arms
  448. u = self.random()
  449. return alpha * pow(-_log(u), 1.0/beta)
  450. ## -------------------- test program --------------------
  451. def _test_generator(n, funccall):
  452. import time
  453. print n, 'times', funccall
  454. code = compile(funccall, funccall, 'eval')
  455. sum = 0.0
  456. sqsum = 0.0
  457. smallest = 1e10
  458. largest = -1e10
  459. t0 = time.time()
  460. for i in range(n):
  461. x = eval(code)
  462. sum = sum + x
  463. sqsum = sqsum + x*x
  464. smallest = min(x, smallest)
  465. largest = max(x, largest)
  466. t1 = time.time()
  467. print round(t1-t0, 3), 'sec,',
  468. avg = sum/n
  469. stddev = _sqrt(sqsum/n - avg*avg)
  470. print 'avg %g, stddev %g, min %g, max %g' % \
  471. (avg, stddev, smallest, largest)
  472. def _test(N=200):
  473. print 'TWOPI =', TWOPI
  474. print 'LOG4 =', LOG4
  475. print 'NV_MAGICCONST =', NV_MAGICCONST
  476. print 'SG_MAGICCONST =', SG_MAGICCONST
  477. _test_generator(N, 'random()')
  478. _test_generator(N, 'normalvariate(0.0, 1.0)')
  479. _test_generator(N, 'lognormvariate(0.0, 1.0)')
  480. _test_generator(N, 'cunifvariate(0.0, 1.0)')
  481. _test_generator(N, 'expovariate(1.0)')
  482. _test_generator(N, 'vonmisesvariate(0.0, 1.0)')
  483. _test_generator(N, 'gammavariate(0.5, 1.0)')
  484. _test_generator(N, 'gammavariate(0.9, 1.0)')
  485. _test_generator(N, 'gammavariate(1.0, 1.0)')
  486. _test_generator(N, 'gammavariate(2.0, 1.0)')
  487. _test_generator(N, 'gammavariate(20.0, 1.0)')
  488. _test_generator(N, 'gammavariate(200.0, 1.0)')
  489. _test_generator(N, 'gauss(0.0, 1.0)')
  490. _test_generator(N, 'betavariate(3.0, 3.0)')
  491. _test_generator(N, 'paretovariate(1.0)')
  492. _test_generator(N, 'weibullvariate(1.0, 1.0)')
  493. # Test jumpahead.
  494. s = getstate()
  495. jumpahead(N)
  496. r1 = random()
  497. # now do it the slow way
  498. setstate(s)
  499. for i in range(N):
  500. random()
  501. r2 = random()
  502. if r1 != r2:
  503. raise ValueError("jumpahead test failed " + `(N, r1, r2)`)
  504. # Create one instance, seeded from current time, and export its methods
  505. # as module-level functions. The functions are not threadsafe, and state
  506. # is shared across all uses (both in the user's code and in the Python
  507. # libraries), but that's fine for most programs and is easier for the
  508. # casual user than making them instantiate their own Random() instance.
  509. _inst = Random()
  510. seed = _inst.seed
  511. random = _inst.random
  512. uniform = _inst.uniform
  513. randint = _inst.randint
  514. choice = _inst.choice
  515. randrange = _inst.randrange
  516. shuffle = _inst.shuffle
  517. normalvariate = _inst.normalvariate
  518. lognormvariate = _inst.lognormvariate
  519. cunifvariate = _inst.cunifvariate
  520. expovariate = _inst.expovariate
  521. vonmisesvariate = _inst.vonmisesvariate
  522. gammavariate = _inst.gammavariate
  523. stdgamma = _inst.stdgamma
  524. gauss = _inst.gauss
  525. betavariate = _inst.betavariate
  526. paretovariate = _inst.paretovariate
  527. weibullvariate = _inst.weibullvariate
  528. getstate = _inst.getstate
  529. setstate = _inst.setstate
  530. jumpahead = _inst.jumpahead
  531. whseed = _inst.whseed
  532. if __name__ == '__main__':
  533. _test()