Ticket #778: fecparams.py

File fecparams.py, 3.0 KB (added by swillden, at 2009-08-19T16:30:03Z)

Implementation of proposed k_e, m_e selection algorithm, per comment 32

Line 
1import math
2
3def compute_fec_params(k, m, n):
4    """
5    Returns the FEC encoding parameters k_e and m_e, where k_e is the
6    number of shares required to reconstruct the file, and m_e is the
7    number of shares to be deployed to the servers.  'k' is the numer
8    of servers that must be available to reconstruct the file, 'm' is
9    the maximum number of servers to which shares should be deployed,
10    and 'n' is the number of servers available.
11
12    In the nominal case, n >= k, and in that case this function
13    returns parameters that ensure each server gets at least one share
14    (to maximize parallelism available for retrieval), that any
15    k-server subset of n has sufficient shares to reconstruct the file
16    and that FEC expansion is minimal, consistent with the retrieval
17    performance and availability goals.
18
19    If n < k, this function ensures that enough shares are deployed
20    that the file can be retrieved, providing as much redundancy as
21    possible, but without much more FEC expansion than is allowed by
22    the user's choice of k and m.
23    """
24    assert k <= m
25    assert n <= m
26
27    if n <= k:
28        return k, min(k * n, k * int(math.ceil(float(m) / k)))
29   
30    if n % k == 0:
31        return n, n * n / k
32    else:
33        return n, n - k + 1 + n * (n // k)
34
35def compute_p_failure(k_e, m_e, n, p):
36    """
37    Compute the probability that the file is lost given that 'm_e'
38    shares are deployed to 'n' servers, that 'k_e' shares are required
39    for recovery and that any given server fails with probability 'p'
40    """
41    # Allocate shares to servers.
42    share_lst = [ m_e // n ] * n
43    m_e -= m_e // n * n
44    for i in range(0, n):
45        if m_e == 0:
46            break
47        share_lst[i] += 1
48        m_e -= 1
49   
50    # Construct per-server share survival probability mass functions
51    server_pmfs = []
52    for shares in share_lst:
53        server_pmf = [ p ] + [ 0 ] * (shares - 1) + [ 1 - p ]
54        server_pmfs.append(server_pmf)
55
56    # Convolve per-server PMFs to produce aggregate share survival PMF
57    pmf = reduce(convolve, server_pmfs)
58
59    # Probability of file survival is the probability that at least
60    # k_e shares survive.
61    return 1 - sum(pmf[k_e:])
62
63def convolve(list_a, list_b):
64    """
65    Returns the discrete convolution of two lists.
66
67    Given two random variables X and Y, the convolution of their
68    probability mass functions Pr(X) and Pr(Y) is equal to the
69    Pr(X+Y).
70    """
71    n = len(list_a)
72    m = len(list_b)
73
74    result = []
75    for i in range(n + m - 1):
76        sum = 0.0
77
78        lower = max(0, i - n + 1)
79        upper = min(m - 1, i)
80
81        for j in range(lower, upper+1):
82            sum += list_a[i-j] * list_b[j]
83
84        result.append(sum)
85
86    return result
87
88if __name__ == "__main__":
89
90    for n in range(1, 11):
91        k_e, m_e = compute_fec_params(3, 10, n)
92        p_fail = compute_p_failure(k_e, m_e, n, 0.001)
93        s = "%2d server(s), %2d shares, %2d needed, %2.2f expansion, %0.2e p_fail" 
94        print s % (n, m_e, k_e, float(m_e) / float(k_e), p_fail)