# Ticket #778: fecparams.py

File fecparams.py, 3.0 KB (added by swillden, at 2009-08-19T16:30:03Z) |
---|

Line | |
---|---|

1 | import math |

2 | |

3 | def compute_fec_params(k, m, n): |

4 | """ |

5 | Returns the FEC encoding parameters k_e and m_e, where k_e is the |

6 | number of shares required to reconstruct the file, and m_e is the |

7 | number of shares to be deployed to the servers. 'k' is the numer |

8 | of servers that must be available to reconstruct the file, 'm' is |

9 | the maximum number of servers to which shares should be deployed, |

10 | and 'n' is the number of servers available. |

11 | |

12 | In the nominal case, n >= k, and in that case this function |

13 | returns parameters that ensure each server gets at least one share |

14 | (to maximize parallelism available for retrieval), that any |

15 | k-server subset of n has sufficient shares to reconstruct the file |

16 | and that FEC expansion is minimal, consistent with the retrieval |

17 | performance and availability goals. |

18 | |

19 | If n < k, this function ensures that enough shares are deployed |

20 | that the file can be retrieved, providing as much redundancy as |

21 | possible, but without much more FEC expansion than is allowed by |

22 | the user's choice of k and m. |

23 | """ |

24 | assert k <= m |

25 | assert n <= m |

26 | |

27 | if n <= k: |

28 | return k, min(k * n, k * int(math.ceil(float(m) / k))) |

29 | |

30 | if n % k == 0: |

31 | return n, n * n / k |

32 | else: |

33 | return n, n - k + 1 + n * (n // k) |

34 | |

35 | def compute_p_failure(k_e, m_e, n, p): |

36 | """ |

37 | Compute the probability that the file is lost given that 'm_e' |

38 | shares are deployed to 'n' servers, that 'k_e' shares are required |

39 | for recovery and that any given server fails with probability 'p' |

40 | """ |

41 | # Allocate shares to servers. |

42 | share_lst = [ m_e // n ] * n |

43 | m_e -= m_e // n * n |

44 | for i in range(0, n): |

45 | if m_e == 0: |

46 | break |

47 | share_lst[i] += 1 |

48 | m_e -= 1 |

49 | |

50 | # Construct per-server share survival probability mass functions |

51 | server_pmfs = [] |

52 | for shares in share_lst: |

53 | server_pmf = [ p ] + [ 0 ] * (shares - 1) + [ 1 - p ] |

54 | server_pmfs.append(server_pmf) |

55 | |

56 | # Convolve per-server PMFs to produce aggregate share survival PMF |

57 | pmf = reduce(convolve, server_pmfs) |

58 | |

59 | # Probability of file survival is the probability that at least |

60 | # k_e shares survive. |

61 | return 1 - sum(pmf[k_e:]) |

62 | |

63 | def convolve(list_a, list_b): |

64 | """ |

65 | Returns the discrete convolution of two lists. |

66 | |

67 | Given two random variables X and Y, the convolution of their |

68 | probability mass functions Pr(X) and Pr(Y) is equal to the |

69 | Pr(X+Y). |

70 | """ |

71 | n = len(list_a) |

72 | m = len(list_b) |

73 | |

74 | result = [] |

75 | for i in range(n + m - 1): |

76 | sum = 0.0 |

77 | |

78 | lower = max(0, i - n + 1) |

79 | upper = min(m - 1, i) |

80 | |

81 | for j in range(lower, upper+1): |

82 | sum += list_a[i-j] * list_b[j] |

83 | |

84 | result.append(sum) |

85 | |

86 | return result |

87 | |

88 | if __name__ == "__main__": |

89 | |

90 | for n in range(1, 11): |

91 | k_e, m_e = compute_fec_params(3, 10, n) |

92 | p_fail = compute_p_failure(k_e, m_e, n, 0.001) |

93 | s = "%2d server(s), %2d shares, %2d needed, %2.2f expansion, %0.2e p_fail" |

94 | print s % (n, m_e, k_e, float(m_e) / float(k_e), p_fail) |