source: trunk/src/allmydata/mutable/layout.py

Last change on this file was 1cfe843d, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-02-22T23:40:25Z

more python2 removal

  • Property mode set to 100644
File size: 69.7 KB
Line 
1"""
2Ported to Python 3.
3"""
4
5from past.utils import old_div
6
7import struct
8from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError, \
9     BadShareError
10from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \
11                                 MDMF_VERSION, IMutableSlotWriter
12from allmydata.util import mathutil
13from twisted.python import failure
14from twisted.internet import defer
15from zope.interface import implementer
16
17
18# These strings describe the format of the packed structs they help process.
19# Here's what they mean:
20#
21#  PREFIX:
22#    >: Big-endian byte order; the most significant byte is first (leftmost).
23#    B: The container version information; stored as an unsigned 8-bit integer.
24#       This is currently either SDMF_VERSION or MDMF_VERSION.
25#    Q: The sequence number; this is sort of like a revision history for
26#       mutable files; they start at 1 and increase as they are changed after
27#       being uploaded. Stored as an unsigned 64-bit integer.
28#  32s: The root hash of the share hash tree. We use sha-256d, so we use 32
29#       bytes to store the value.
30#  16s: The salt for the readkey. This is a 16-byte random value.
31#
32#  SIGNED_PREFIX additions, things that are covered by the signature:
33#    B: The "k" encoding parameter. We store this as an unsigned 8-bit
34#       integer, since our erasure coding scheme cannot encode to more than
35#       255 pieces.
36#    B: The "N" encoding parameter. Stored as an unsigned 8-bit integer for
37#       the same reason as above.
38#    Q: The segment size of the uploaded file. This is an unsigned 64-bit
39#       integer, to allow handling large segments and files. For SDMF the
40#       segment size is the data length plus padding; for MDMF it can be
41#       smaller.
42#    Q: The data length of the uploaded file. Like the segment size field,
43#       it is an unsigned 64-bit integer.
44#
45#   HEADER additions:
46#     L: The offset of the signature. An unsigned 32-bit integer.
47#     L: The offset of the share hash chain. An unsigned 32-bit integer.
48#     L: The offset of the block hash tree. An unsigned 32-bit integer.
49#     L: The offset of the share data. An unsigned 32-bit integer.
50#     Q: The offset of the encrypted private key. An unsigned 64-bit integer,
51#        to account for the possibility of a lot of share data.
52#     Q: The offset of the EOF. An unsigned 64-bit integer, to account for
53#        the possibility of a lot of share data.
54#
55#  After all of these, we have the following:
56#    - The verification key: Occupies the space between the end of the header
57#      and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']].
58#    - The signature, which goes from the signature offset to the share hash
59#      chain offset.
60#    - The share hash chain, which goes from the share hash chain offset to
61#      the block hash tree offset.
62#    - The share data, which goes from the share data offset to the encrypted
63#      private key offset.
64#    - The encrypted private key offset, which goes until the end of the file.
65#
66#  The block hash tree in this encoding has only one share, so the offset of
67#  the share data will be 32 bits more than the offset of the block hash tree.
68#  Given this, we may need to check to see how many bytes a reasonably sized
69#  block hash tree will take up.
70
71PREFIX = ">BQ32s16s" # each version may have a different prefix
72SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature
73SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX)
74HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets
75HEADER_LENGTH = struct.calcsize(HEADER)
76OFFSETS = ">LLLLQQ"
77OFFSETS_LENGTH = struct.calcsize(OFFSETS)
78
79MAX_MUTABLE_SHARE_SIZE = 69105*1000*1000*1000*1000 # 69105 TB, kind of arbitrary
80
81
82# These are still used for some tests of SDMF files.
83def unpack_header(data):
84    o = {}
85    (version,
86     seqnum,
87     root_hash,
88     IV,
89     k, N, segsize, datalen,
90     o['signature'],
91     o['share_hash_chain'],
92     o['block_hash_tree'],
93     o['share_data'],
94     o['enc_privkey'],
95     o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
96    return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o)
97
98def unpack_share(data):
99    assert len(data) >= HEADER_LENGTH
100    o = {}
101    (version,
102     seqnum,
103     root_hash,
104     IV,
105     k, N, segsize, datalen,
106     o['signature'],
107     o['share_hash_chain'],
108     o['block_hash_tree'],
109     o['share_data'],
110     o['enc_privkey'],
111     o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
112
113    if version != 0:
114        raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
115
116    if len(data) < o['EOF']:
117        raise NeedMoreDataError(o['EOF'],
118                                o['enc_privkey'], o['EOF']-o['enc_privkey'])
119
120    pubkey = data[HEADER_LENGTH:o['signature']]
121    signature = data[o['signature']:o['share_hash_chain']]
122    share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']]
123    share_hash_format = ">H32s"
124    hsize = struct.calcsize(share_hash_format)
125    if len(share_hash_chain_s) % hsize != 0:
126        raise BadShareError("hash chain is %d bytes, not multiple of %d"
127                            % (len(share_hash_chain_s), hsize))
128    share_hash_chain = []
129    for i in range(0, len(share_hash_chain_s), hsize):
130        chunk = share_hash_chain_s[i:i+hsize]
131        (hid, h) = struct.unpack(share_hash_format, chunk)
132        share_hash_chain.append( (hid, h) )
133    share_hash_chain = dict(share_hash_chain)
134    block_hash_tree_s = data[o['block_hash_tree']:o['share_data']]
135    if len(block_hash_tree_s) % 32 != 0:
136        raise BadShareError("block_hash_tree is %d bytes, not multiple of %d"
137                            % (len(block_hash_tree_s), 32))
138    block_hash_tree = []
139    for i in range(0, len(block_hash_tree_s), 32):
140        block_hash_tree.append(block_hash_tree_s[i:i+32])
141
142    share_data = data[o['share_data']:o['enc_privkey']]
143    enc_privkey = data[o['enc_privkey']:o['EOF']]
144
145    return (seqnum, root_hash, IV, k, N, segsize, datalen,
146            pubkey, signature, share_hash_chain, block_hash_tree,
147            share_data, enc_privkey)
148
149def get_version_from_checkstring(checkstring):
150    (t, ) = struct.unpack(">B", checkstring[:1])
151    return t
152
153def unpack_sdmf_checkstring(checkstring):
154    cs_len = struct.calcsize(PREFIX)
155    version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len])
156    assert version == SDMF_VERSION, version
157    return (seqnum, root_hash, IV)
158
159def unpack_mdmf_checkstring(checkstring):
160    cs_len = struct.calcsize(MDMFCHECKSTRING)
161    version, seqnum, root_hash = struct.unpack(MDMFCHECKSTRING, checkstring[:cs_len])
162    assert version == MDMF_VERSION, version
163    return (seqnum, root_hash)
164
165def pack_offsets(verification_key_length, signature_length,
166                 share_hash_chain_length, block_hash_tree_length,
167                 share_data_length, encprivkey_length):
168    post_offset = HEADER_LENGTH
169    offsets = {}
170    o1 = offsets['signature'] = post_offset + verification_key_length
171    o2 = offsets['share_hash_chain'] = o1 + signature_length
172    o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
173    o4 = offsets['share_data'] = o3 + block_hash_tree_length
174    o5 = offsets['enc_privkey'] = o4 + share_data_length
175    offsets['EOF'] = o5 + encprivkey_length
176
177    return struct.pack(">LLLLQQ",
178                       offsets['signature'],
179                       offsets['share_hash_chain'],
180                       offsets['block_hash_tree'],
181                       offsets['share_data'],
182                       offsets['enc_privkey'],
183                       offsets['EOF'])
184
185def pack_share(prefix, verification_key, signature,
186               share_hash_chain, block_hash_tree,
187               share_data, encprivkey):
188    share_hash_chain_s = b"".join([struct.pack(">H32s", i, share_hash_chain[i])
189                                   for i in sorted(share_hash_chain.keys())])
190    for h in block_hash_tree:
191        assert len(h) == 32
192    block_hash_tree_s = b"".join(block_hash_tree)
193
194    offsets = pack_offsets(len(verification_key),
195                           len(signature),
196                           len(share_hash_chain_s),
197                           len(block_hash_tree_s),
198                           len(share_data),
199                           len(encprivkey))
200    final_share = b"".join([prefix,
201                            offsets,
202                            verification_key,
203                            signature,
204                            share_hash_chain_s,
205                            block_hash_tree_s,
206                            share_data,
207                            encprivkey])
208    return final_share
209
210def pack_prefix(seqnum, root_hash, IV,
211                required_shares, total_shares,
212                segment_size, data_length):
213    prefix = struct.pack(SIGNED_PREFIX,
214                         0, # version,
215                         seqnum,
216                         root_hash,
217                         IV,
218                         required_shares,
219                         total_shares,
220                         segment_size,
221                         data_length,
222                         )
223    return prefix
224
225
226@implementer(IMutableSlotWriter)
227class SDMFSlotWriteProxy(object):
228    """
229    I represent a remote write slot for an SDMF mutable file. I build a
230    share in memory, and then write it in one piece to the remote
231    server. This mimics how SDMF shares were built before MDMF (and the
232    new MDMF uploader), but provides that functionality in a way that
233    allows the MDMF uploader to be built without much special-casing for
234    file format, which makes the uploader code more readable.
235    """
236    def __init__(self,
237                 shnum,
238                 storage_server, # an IStorageServer
239                 storage_index,
240                 secrets, # (write_enabler, renew_secret, cancel_secret)
241                 seqnum, # the sequence number of the mutable file
242                 required_shares,
243                 total_shares,
244                 segment_size,
245                 data_length): # the length of the original file
246        self.shnum = shnum
247        self._storage_server = storage_server
248        self._storage_index = storage_index
249        self._secrets = secrets
250        self._seqnum = seqnum
251        self._required_shares = required_shares
252        self._total_shares = total_shares
253        self._segment_size = segment_size
254        self._data_length = data_length
255
256        # This is an SDMF file, so it should have only one segment, so,
257        # modulo padding of the data length, the segment size and the
258        # data length should be the same.
259        expected_segment_size = mathutil.next_multiple(data_length,
260                                                       self._required_shares)
261        assert expected_segment_size == segment_size
262
263        self._block_size = old_div(self._segment_size, self._required_shares)
264
265        # This is meant to mimic how SDMF files were built before MDMF
266        # entered the picture: we generate each share in its entirety,
267        # then push it off to the storage server in one write. When
268        # callers call set_*, they are just populating this dict.
269        # finish_publishing will stitch these pieces together into a
270        # coherent share, and then write the coherent share to the
271        # storage server.
272        self._share_pieces = {}
273
274        # This tells the write logic what checkstring to use when
275        # writing remote shares.
276        self._testvs = []
277
278        self._readvs = [(0, struct.calcsize(PREFIX))]
279
280
281    def set_checkstring(self, checkstring_or_seqnum,
282                              root_hash=None,
283                              salt=None):
284        """
285        Set the checkstring that I will pass to the remote server when
286        writing.
287
288            @param checkstring_or_seqnum: A packed checkstring to use,
289                   or a sequence number. I will treat this as a checkstr
290
291        Note that implementations can differ in which semantics they
292        wish to support for set_checkstring -- they can, for example,
293        build the checkstring themselves from its constituents, or
294        some other thing.
295        """
296        if root_hash and salt:
297            checkstring = struct.pack(PREFIX,
298                                      0,
299                                      checkstring_or_seqnum,
300                                      root_hash,
301                                      salt)
302        else:
303            checkstring = checkstring_or_seqnum
304        self._testvs = [(0, len(checkstring), checkstring)]
305
306
307    def get_checkstring(self):
308        """
309        Get the checkstring that I think currently exists on the remote
310        server.
311        """
312        if self._testvs:
313            return self._testvs[0][2]
314        return b""
315
316
317    def put_block(self, data, segnum, salt):
318        """
319        Add a block and salt to the share.
320        """
321        # SDMF files have only one segment
322        assert segnum == 0
323        assert len(data) == self._block_size
324        assert len(salt) == SALT_SIZE
325
326        self._share_pieces['sharedata'] = data
327        self._share_pieces['salt'] = salt
328
329        # TODO: Figure out something intelligent to return.
330        return defer.succeed(None)
331
332
333    def put_encprivkey(self, encprivkey):
334        """
335        Add the encrypted private key to the share.
336        """
337        self._share_pieces['encprivkey'] = encprivkey
338
339        return defer.succeed(None)
340
341
342    def put_blockhashes(self, blockhashes):
343        """
344        Add the block hash tree to the share.
345        """
346        assert isinstance(blockhashes, list)
347        for h in blockhashes:
348            assert len(h) == HASH_SIZE
349
350        # serialize the blockhashes, then set them.
351        blockhashes_s = b"".join(blockhashes)
352        self._share_pieces['block_hash_tree'] = blockhashes_s
353
354        return defer.succeed(None)
355
356
357    def put_sharehashes(self, sharehashes):
358        """
359        Add the share hash chain to the share.
360        """
361        assert isinstance(sharehashes, dict)
362        for h in sharehashes.values():
363            assert len(h) == HASH_SIZE
364
365        # serialize the sharehashes, then set them.
366        sharehashes_s = b"".join([struct.pack(">H32s", i, sharehashes[i])
367                                  for i in sorted(sharehashes.keys())])
368        self._share_pieces['share_hash_chain'] = sharehashes_s
369
370        return defer.succeed(None)
371
372
373    def put_root_hash(self, root_hash):
374        """
375        Add the root hash to the share.
376        """
377        assert len(root_hash) == HASH_SIZE
378
379        self._share_pieces['root_hash'] = root_hash
380
381        return defer.succeed(None)
382
383
384    def put_salt(self, salt):
385        """
386        Add a salt to an empty SDMF file.
387        """
388        assert len(salt) == SALT_SIZE
389
390        self._share_pieces['salt'] = salt
391        self._share_pieces['sharedata'] = b""
392
393
394    def get_signable(self):
395        """
396        Return the part of the share that needs to be signed.
397
398        SDMF writers need to sign the packed representation of the
399        first eight fields of the remote share, that is:
400            - version number (0)
401            - sequence number
402            - root of the share hash tree
403            - salt
404            - k
405            - n
406            - segsize
407            - datalen
408
409        This method is responsible for returning that to callers.
410        """
411        return struct.pack(SIGNED_PREFIX,
412                           0,
413                           self._seqnum,
414                           self._share_pieces['root_hash'],
415                           self._share_pieces['salt'],
416                           self._required_shares,
417                           self._total_shares,
418                           self._segment_size,
419                           self._data_length)
420
421
422    def put_signature(self, signature):
423        """
424        Add the signature to the share.
425        """
426        self._share_pieces['signature'] = signature
427
428        return defer.succeed(None)
429
430
431    def put_verification_key(self, verification_key):
432        """
433        Add the verification key to the share.
434        """
435        self._share_pieces['verification_key'] = verification_key
436
437        return defer.succeed(None)
438
439
440    def get_verinfo(self):
441        """
442        I return my verinfo tuple. This is used by the ServermapUpdater
443        to keep track of versions of mutable files.
444
445        The verinfo tuple for MDMF files contains:
446            - seqnum
447            - root hash
448            - a blank (nothing)
449            - segsize
450            - datalen
451            - k
452            - n
453            - prefix (the thing that you sign)
454            - a tuple of offsets
455
456        We include the nonce in MDMF to simplify processing of version
457        information tuples.
458
459        The verinfo tuple for SDMF files is the same, but contains a
460        16-byte IV instead of a hash of salts.
461        """
462        return (self._seqnum,
463                self._share_pieces['root_hash'],
464                self._share_pieces['salt'],
465                self._segment_size,
466                self._data_length,
467                self._required_shares,
468                self._total_shares,
469                self.get_signable(),
470                self._get_offsets_tuple())
471
472    def _get_offsets_dict(self):
473        post_offset = HEADER_LENGTH
474        offsets = {}
475
476        verification_key_length = len(self._share_pieces['verification_key'])
477        o1 = offsets['signature'] = post_offset + verification_key_length
478
479        signature_length = len(self._share_pieces['signature'])
480        o2 = offsets['share_hash_chain'] = o1 + signature_length
481
482        share_hash_chain_length = len(self._share_pieces['share_hash_chain'])
483        o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
484
485        block_hash_tree_length = len(self._share_pieces['block_hash_tree'])
486        o4 = offsets['share_data'] = o3 + block_hash_tree_length
487
488        share_data_length = len(self._share_pieces['sharedata'])
489        o5 = offsets['enc_privkey'] = o4 + share_data_length
490
491        encprivkey_length = len(self._share_pieces['encprivkey'])
492        offsets['EOF'] = o5 + encprivkey_length
493        return offsets
494
495
496    def _get_offsets_tuple(self):
497        offsets = self._get_offsets_dict()
498        return tuple([(key, value) for key, value in offsets.items()])
499
500
501    def _pack_offsets(self):
502        offsets = self._get_offsets_dict()
503        return struct.pack(">LLLLQQ",
504                           offsets['signature'],
505                           offsets['share_hash_chain'],
506                           offsets['block_hash_tree'],
507                           offsets['share_data'],
508                           offsets['enc_privkey'],
509                           offsets['EOF'])
510
511
512    def finish_publishing(self):
513        """
514        Do anything necessary to finish writing the share to a remote
515        server. I require that no further publishing needs to take place
516        after this method has been called.
517        """
518        for k in ["sharedata", "encprivkey", "signature", "verification_key",
519                  "share_hash_chain", "block_hash_tree"]:
520            assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys())
521        # This is the only method that actually writes something to the
522        # remote server.
523        # First, we need to pack the share into data that we can write
524        # to the remote server in one write.
525        offsets = self._pack_offsets()
526        prefix = self.get_signable()
527        final_share = b"".join([prefix,
528                                offsets,
529                                self._share_pieces['verification_key'],
530                                self._share_pieces['signature'],
531                                self._share_pieces['share_hash_chain'],
532                                self._share_pieces['block_hash_tree'],
533                                self._share_pieces['sharedata'],
534                                self._share_pieces['encprivkey']])
535
536        # Our only data vector is going to be writing the final share,
537        # in its entirely.
538        datavs = [(0, final_share)]
539
540        if not self._testvs:
541            # Our caller has not provided us with another checkstring
542            # yet, so we assume that we are writing a new share, and set
543            # a test vector that will only allow a new share to be written.
544            self._testvs = []
545            self._testvs.append(tuple([0, 1, b""]))
546
547        tw_vectors = {}
548        tw_vectors[self.shnum] = (self._testvs, datavs, None)
549        return self._storage_server.slot_testv_and_readv_and_writev(
550            self._storage_index,
551            self._secrets,
552            tw_vectors,
553            # TODO is it useful to read something?
554            self._readvs,
555        )
556
557
558MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ"
559MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ"
560MDMFHEADERSIZE = struct.calcsize(MDMFHEADER)
561MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
562MDMFCHECKSTRING = ">BQ32s"
563MDMFSIGNABLEHEADER = ">BQ32sBBQQ"
564MDMFOFFSETS = ">QQQQQQQQ"
565MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS)
566
567PRIVATE_KEY_SIZE = 1220
568SIGNATURE_SIZE = 260
569VERIFICATION_KEY_SIZE = 292
570# We know we won't have more than 256 shares, and we know that we won't need
571# to store more than ln2(256) hash-chain nodes to validate, so that's our
572# bound. Each node requires 2 bytes of node-number plus 32 bytes of hash.
573SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2)
574
575@implementer(IMutableSlotWriter)
576class MDMFSlotWriteProxy(object):
577
578    """
579    I represent a remote write slot for an MDMF mutable file.
580
581    I abstract away from my caller the details of block and salt
582    management, and the implementation of the on-disk format for MDMF
583    shares.
584    """
585    # Expected layout, MDMF:
586    # offset:     size:       name:
587    #-- signed part --
588    # 0           1           version number (01)
589    # 1           8           sequence number
590    # 9           32          share tree root hash
591    # 41          1           The "k" encoding parameter
592    # 42          1           The "N" encoding parameter
593    # 43          8           The segment size of the uploaded file
594    # 51          8           The data length of the original plaintext
595    #-- end signed part --
596    # 59          8           The offset of the encrypted private key
597    # 67          8           The offset of the share hash chain
598    # 75          8           The offset of the signature
599    # 83          8           The offset of the verification key
600    # 91          8           The offset of the end of the v. key.
601    # 99          8           The offset of the share data
602    # 107         8           The offset of the block hash tree
603    # 115         8           The offset of EOF
604    # 123         var         encrypted private key
605    # var         var         share hash chain
606    # var         var         signature
607    # var         var         verification key
608    # var         large       share data
609    # var         var         block hash tree
610    #
611    # We order the fields that way to make smart downloaders -- downloaders
612    # which prempetively read a big part of the share -- possible.
613    #
614    # The checkstring is the first three fields -- the version number,
615    # sequence number, root hash and root salt hash. This is consistent
616    # in meaning to what we have with SDMF files, except now instead of
617    # using the literal salt, we use a value derived from all of the
618    # salts -- the share hash root.
619    #
620    # The salt is stored before the block for each segment. The block
621    # hash tree is computed over the combination of block and salt for
622    # each segment. In this way, we get integrity checking for both
623    # block and salt with the current block hash tree arrangement.
624    #
625    # The ordering of the offsets is different to reflect the dependencies
626    # that we'll run into with an MDMF file. The expected write flow is
627    # something like this:
628    #
629    #   0: Initialize with the sequence number, encoding parameters and
630    #      data length. From this, we can deduce the number of segments,
631    #      and where they should go.. We can also figure out where the
632    #      encrypted private key should go, because we can figure out how
633    #      big the share data will be.
634    #
635    #   1: Encrypt, encode, and upload the file in chunks. Do something
636    #      like
637    #
638    #       put_block(data, segnum, salt)
639    #
640    #      to write a block and a salt to the disk. We can do both of
641    #      these operations now because we have enough of the offsets to
642    #      know where to put them.
643    #
644    #   2: Put the encrypted private key. Use:
645    #
646    #        put_encprivkey(encprivkey)
647    #
648    #      Now that we know the length of the private key, we can fill
649    #      in the offset for the block hash tree.
650    #
651    #   3: We're now in a position to upload the block hash tree for
652    #      a share. Put that using something like:
653    #
654    #        put_blockhashes(block_hash_tree)
655    #
656    #      Note that block_hash_tree is a list of hashes -- we'll take
657    #      care of the details of serializing that appropriately. When
658    #      we get the block hash tree, we are also in a position to
659    #      calculate the offset for the share hash chain, and fill that
660    #      into the offsets table.
661    #
662    #   4: We're now in a position to upload the share hash chain for
663    #      a share. Do that with something like:
664    #
665    #        put_sharehashes(share_hash_chain)
666    #
667    #      share_hash_chain should be a dictionary mapping shnums to
668    #      32-byte hashes -- the wrapper handles serialization.
669    #      We'll know where to put the signature at this point, also.
670    #      The root of this tree will be put explicitly in the next
671    #      step.
672    #
673    #   5: Before putting the signature, we must first put the
674    #      root_hash. Do this with:
675    #
676    #        put_root_hash(root_hash).
677    #
678    #      In terms of knowing where to put this value, it was always
679    #      possible to place it, but it makes sense semantically to
680    #      place it after the share hash tree, so that's why you do it
681    #      in this order.
682    #
683    #   6: With the root hash put, we can now sign the header. Use:
684    #
685    #        get_signable()
686    #
687    #      to get the part of the header that you want to sign, and use:
688    #
689    #        put_signature(signature)
690    #
691    #      to write your signature to the remote server.
692    #
693    #   6: Add the verification key, and finish. Do:
694    #
695    #        put_verification_key(key)
696    #
697    #      and
698    #
699    #        finish_publish()
700    #
701    # Checkstring management:
702    #
703    # To write to a mutable slot, we have to provide test vectors to ensure
704    # that we are writing to the same data that we think we are. These
705    # vectors allow us to detect uncoordinated writes; that is, writes
706    # where both we and some other shareholder are writing to the
707    # mutable slot, and to report those back to the parts of the program
708    # doing the writing.
709    #
710    # With SDMF, this was easy -- all of the share data was written in
711    # one go, so it was easy to detect uncoordinated writes, and we only
712    # had to do it once. With MDMF, not all of the file is written at
713    # once.
714    #
715    # If a share is new, we write out as much of the header as we can
716    # before writing out anything else. This gives other writers a
717    # canary that they can use to detect uncoordinated writes, and, if
718    # they do the same thing, gives us the same canary. We them update
719    # the share. We won't be able to write out two fields of the header
720    # -- the share tree hash and the salt hash -- until we finish
721    # writing out the share. We only require the writer to provide the
722    # initial checkstring, and keep track of what it should be after
723    # updates ourselves.
724    #
725    # If we haven't written anything yet, then on the first write (which
726    # will probably be a block + salt of a share), we'll also write out
727    # the header. On subsequent passes, we'll expect to see the header.
728    # This changes in two places:
729    #
730    #   - When we write out the salt hash
731    #   - When we write out the root of the share hash tree
732    #
733    # since these values will change the header. It is possible that we
734    # can just make those be written in one operation to minimize
735    # disruption.
736    def __init__(self,
737                 shnum,
738                 storage_server, # a remote reference to a storage server
739                 storage_index,
740                 secrets, # (write_enabler, renew_secret, cancel_secret)
741                 seqnum, # the sequence number of the mutable file
742                 required_shares,
743                 total_shares,
744                 segment_size,
745                 data_length): # the length of the original file
746        self.shnum = shnum
747        self._storage_server = storage_server
748        self._storage_index = storage_index
749        self._seqnum = seqnum
750        self._required_shares = required_shares
751        assert self.shnum >= 0 and self.shnum < total_shares
752        self._total_shares = total_shares
753        # We build up the offset table as we write things. It is the
754        # last thing we write to the remote server.
755        self._offsets = {}
756        self._testvs = []
757        # This is a list of write vectors that will be sent to our
758        # remote server once we are directed to write things there.
759        self._writevs = []
760        self._secrets = secrets
761        # The segment size needs to be a multiple of the k parameter --
762        # any padding should have been carried out by the publisher
763        # already.
764        assert segment_size % required_shares == 0
765        self._segment_size = segment_size
766        self._data_length = data_length
767
768        # These are set later -- we define them here so that we can
769        # check for their existence easily
770
771        # This is the root of the share hash tree -- the Merkle tree
772        # over the roots of the block hash trees computed for shares in
773        # this upload.
774        self._root_hash = None
775
776        # We haven't yet written anything to the remote bucket. By
777        # setting this, we tell the _write method as much. The write
778        # method will then know that it also needs to add a write vector
779        # for the checkstring (or what we have of it) to the first write
780        # request. We'll then record that value for future use.  If
781        # we're expecting something to be there already, we need to call
782        # set_checkstring before we write anything to tell the first
783        # write about that.
784        self._written = False
785
786        # When writing data to the storage servers, we get a read vector
787        # for free. We'll read the checkstring, which will help us
788        # figure out what's gone wrong if a write fails.
789        self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))]
790
791        # We calculate the number of segments because it tells us
792        # where the salt part of the file ends/share segment begins,
793        # and also because it provides a useful amount of bounds checking.
794        self._num_segments = mathutil.div_ceil(self._data_length,
795                                               self._segment_size)
796        self._block_size = old_div(self._segment_size, self._required_shares)
797        # We also calculate the share size, to help us with block
798        # constraints later.
799        tail_size = self._data_length % self._segment_size
800        if not tail_size:
801            self._tail_block_size = self._block_size
802        else:
803            self._tail_block_size = mathutil.next_multiple(tail_size,
804                                                           self._required_shares)
805            self._tail_block_size = old_div(self._tail_block_size, self._required_shares)
806
807        # We already know where the sharedata starts; right after the end
808        # of the header (which is defined as the signable part + the offsets)
809        # We can also calculate where the encrypted private key begins
810        # from what we know know.
811        self._actual_block_size = self._block_size + SALT_SIZE
812        data_size = self._actual_block_size * (self._num_segments - 1)
813        data_size += self._tail_block_size
814        data_size += SALT_SIZE
815        self._offsets['enc_privkey'] = MDMFHEADERSIZE
816
817        # We don't define offsets for these because we want them to be
818        # tightly packed -- this allows us to ignore the responsibility
819        # of padding individual values, and of removing that padding
820        # later. So nonconstant_start is where we start writing
821        # nonconstant data.
822        nonconstant_start = self._offsets['enc_privkey']
823        nonconstant_start += PRIVATE_KEY_SIZE
824        nonconstant_start += SIGNATURE_SIZE
825        nonconstant_start += VERIFICATION_KEY_SIZE
826        nonconstant_start += SHARE_HASH_CHAIN_SIZE
827
828        self._offsets['share_data'] = nonconstant_start
829
830        # Finally, we know how big the share data will be, so we can
831        # figure out where the block hash tree needs to go.
832        # XXX: But this will go away if Zooko wants to make it so that
833        # you don't need to know the size of the file before you start
834        # uploading it.
835        self._offsets['block_hash_tree'] = self._offsets['share_data'] + \
836                    data_size
837
838        # Done. We can snow start writing.
839
840
841    def set_checkstring(self,
842                        seqnum_or_checkstring,
843                        root_hash=None,
844                        salt=None):
845        """
846        Set checkstring checkstring for the given shnum.
847
848        This can be invoked in one of two ways.
849
850        With one argument, I assume that you are giving me a literal
851        checkstring -- e.g., the output of get_checkstring. I will then
852        set that checkstring as it is. This form is used by unit tests.
853
854        With two arguments, I assume that you are giving me a sequence
855        number and root hash to make a checkstring from. In that case, I
856        will build a checkstring and set it for you. This form is used
857        by the publisher.
858
859        By default, I assume that I am writing new shares to the grid.
860        If you don't explcitly set your own checkstring, I will use
861        one that requires that the remote share not exist. You will want
862        to use this method if you are updating a share in-place;
863        otherwise, writes will fail.
864        """
865        # You're allowed to overwrite checkstrings with this method;
866        # I assume that users know what they are doing when they call
867        # it.
868        if root_hash:
869            checkstring = struct.pack(MDMFCHECKSTRING,
870                                      1,
871                                      seqnum_or_checkstring,
872                                      root_hash)
873        else:
874            checkstring = seqnum_or_checkstring
875
876        if checkstring == b"":
877            # We special-case this, since len("") = 0, but we need
878            # length of 1 for the case of an empty share to work on the
879            # storage server, which is what a checkstring that is the
880            # empty string means.
881            self._testvs = []
882        else:
883            self._testvs = []
884            self._testvs.append((0, len(checkstring), checkstring))
885
886
887    def __repr__(self):
888        return "MDMFSlotWriteProxy for share %d" % self.shnum
889
890
891    def get_checkstring(self):
892        """
893        Given a share number, I return a representation of what the
894        checkstring for that share on the server will look like.
895
896        I am mostly used for tests.
897        """
898        if self._root_hash:
899            roothash = self._root_hash
900        else:
901            roothash = b"\x00" * 32
902        return struct.pack(MDMFCHECKSTRING,
903                           1,
904                           self._seqnum,
905                           roothash)
906
907
908    def put_block(self, data, segnum, salt):
909        """
910        I queue a write vector for the data, salt, and segment number
911        provided to me. I return None, as I do not actually cause
912        anything to be written yet.
913        """
914        if segnum >= self._num_segments:
915            raise LayoutInvalid("I won't overwrite the block hash tree")
916        if len(salt) != SALT_SIZE:
917            raise LayoutInvalid("I was given a salt of size %d, but "
918                                "I wanted a salt of size %d")
919        if segnum + 1 == self._num_segments:
920            if len(data) != self._tail_block_size:
921                raise LayoutInvalid("I was given the wrong size block to write")
922        elif len(data) != self._block_size:
923            raise LayoutInvalid("I was given the wrong size block to write")
924
925        # We want to write at len(MDMFHEADER) + segnum * block_size.
926        offset = self._offsets['share_data'] + \
927            (self._actual_block_size * segnum)
928        data = salt + data
929
930        self._writevs.append(tuple([offset, data]))
931
932
933    def put_encprivkey(self, encprivkey):
934        """
935        I queue a write vector for the encrypted private key provided to
936        me.
937        """
938        assert self._offsets
939        assert self._offsets['enc_privkey']
940        # You shouldn't re-write the encprivkey after the block hash
941        # tree is written, since that could cause the private key to run
942        # into the block hash tree. Before it writes the block hash
943        # tree, the block hash tree writing method writes the offset of
944        # the share hash chain. So that's a good indicator of whether or
945        # not the block hash tree has been written.
946        if "signature" in self._offsets:
947            raise LayoutInvalid("You can't put the encrypted private key "
948                                "after putting the share hash chain")
949
950        self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \
951                len(encprivkey)
952
953        self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey]))
954
955
956    def put_blockhashes(self, blockhashes):
957        """
958        I queue a write vector to put the block hash tree in blockhashes
959        onto the remote server.
960
961        The encrypted private key must be queued before the block hash
962        tree, since we need to know how large it is to know where the
963        block hash tree should go. The block hash tree must be put
964        before the share hash chain, since its size determines the
965        offset of the share hash chain.
966        """
967        assert self._offsets
968        assert "block_hash_tree" in self._offsets
969
970        assert isinstance(blockhashes, list)
971
972        blockhashes_s = b"".join(blockhashes)
973        self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s)
974
975        self._writevs.append(tuple([self._offsets['block_hash_tree'],
976                                  blockhashes_s]))
977
978
979    def put_sharehashes(self, sharehashes):
980        """
981        I queue a write vector to put the share hash chain in my
982        argument onto the remote server.
983
984        The block hash tree must be queued before the share hash chain,
985        since we need to know where the block hash tree ends before we
986        can know where the share hash chain starts. The share hash chain
987        must be put before the signature, since the length of the packed
988        share hash chain determines the offset of the signature. Also,
989        semantically, you must know what the root of the block hash tree
990        is before you can generate a valid signature.
991        """
992        assert isinstance(sharehashes, dict)
993        assert self._offsets
994        if "share_hash_chain" not in self._offsets:
995            raise LayoutInvalid("You must put the block hash tree before "
996                                "putting the share hash chain")
997
998        # The signature comes after the share hash chain. If the
999        # signature has already been written, we must not write another
1000        # share hash chain. The signature writes the verification key
1001        # offset when it gets sent to the remote server, so we look for
1002        # that.
1003        if "verification_key" in self._offsets:
1004            raise LayoutInvalid("You must write the share hash chain "
1005                                "before you write the signature")
1006        sharehashes_s = b"".join([struct.pack(">H32s", i, sharehashes[i])
1007                                  for i in sorted(sharehashes.keys())])
1008        self._offsets['signature'] = self._offsets['share_hash_chain'] + \
1009            len(sharehashes_s)
1010        self._writevs.append(tuple([self._offsets['share_hash_chain'],
1011                            sharehashes_s]))
1012
1013
1014    def put_root_hash(self, roothash):
1015        """
1016        Put the root hash (the root of the share hash tree) in the
1017        remote slot.
1018        """
1019        # It does not make sense to be able to put the root
1020        # hash without first putting the share hashes, since you need
1021        # the share hashes to generate the root hash.
1022        #
1023        # Signature is defined by the routine that places the share hash
1024        # chain, so it's a good thing to look for in finding out whether
1025        # or not the share hash chain exists on the remote server.
1026        if len(roothash) != HASH_SIZE:
1027            raise LayoutInvalid("hashes and salts must be exactly %d bytes"
1028                                 % HASH_SIZE)
1029        self._root_hash = roothash
1030        # To write both of these values, we update the checkstring on
1031        # the remote server, which includes them
1032        checkstring = self.get_checkstring()
1033        self._writevs.append(tuple([0, checkstring]))
1034        # This write, if successful, changes the checkstring, so we need
1035        # to update our internal checkstring to be consistent with the
1036        # one on the server.
1037
1038
1039    def get_signable(self):
1040        """
1041        Get the first seven fields of the mutable file; the parts that
1042        are signed.
1043        """
1044        if not self._root_hash:
1045            raise LayoutInvalid("You need to set the root hash "
1046                                "before getting something to "
1047                                "sign")
1048        return struct.pack(MDMFSIGNABLEHEADER,
1049                           1,
1050                           self._seqnum,
1051                           self._root_hash,
1052                           self._required_shares,
1053                           self._total_shares,
1054                           self._segment_size,
1055                           self._data_length)
1056
1057
1058    def put_signature(self, signature):
1059        """
1060        I queue a write vector for the signature of the MDMF share.
1061
1062        I require that the root hash and share hash chain have been put
1063        to the grid before I will write the signature to the grid.
1064        """
1065        if "signature" not in self._offsets:
1066            raise LayoutInvalid("You must put the share hash chain "
1067        # It does not make sense to put a signature without first
1068        # putting the root hash and the salt hash (since otherwise
1069        # the signature would be incomplete), so we don't allow that.
1070                       "before putting the signature")
1071        if not self._root_hash:
1072            raise LayoutInvalid("You must complete the signed prefix "
1073                                "before computing a signature")
1074        # If we put the signature after we put the verification key, we
1075        # could end up running into the verification key, and will
1076        # probably screw up the offsets as well. So we don't allow that.
1077        if "verification_key_end" in self._offsets:
1078            raise LayoutInvalid("You can't put the signature after the "
1079                                "verification key")
1080        # The method that writes the verification key defines the EOF
1081        # offset before writing the verification key, so look for that.
1082        self._offsets['verification_key'] = self._offsets['signature'] +\
1083            len(signature)
1084        self._writevs.append(tuple([self._offsets['signature'], signature]))
1085
1086
1087    def put_verification_key(self, verification_key):
1088        """
1089        I queue a write vector for the verification key.
1090
1091        I require that the signature have been written to the storage
1092        server before I allow the verification key to be written to the
1093        remote server.
1094        """
1095        if "verification_key" not in self._offsets:
1096            raise LayoutInvalid("You must put the signature before you "
1097                                "can put the verification key")
1098
1099        self._offsets['verification_key_end'] = \
1100            self._offsets['verification_key'] + len(verification_key)
1101        assert self._offsets['verification_key_end'] <= self._offsets['share_data']
1102        self._writevs.append(tuple([self._offsets['verification_key'],
1103                            verification_key]))
1104
1105
1106    def _get_offsets_tuple(self):
1107        return tuple([(key, value) for key, value in self._offsets.items()])
1108
1109
1110    def get_verinfo(self):
1111        return (self._seqnum,
1112                self._root_hash,
1113                None,
1114                self._segment_size,
1115                self._data_length,
1116                self._required_shares,
1117                self._total_shares,
1118                self.get_signable(),
1119                self._get_offsets_tuple())
1120
1121
1122    def finish_publishing(self):
1123        """
1124        I add a write vector for the offsets table, and then cause all
1125        of the write vectors that I've dealt with so far to be published
1126        to the remote server, ending the write process.
1127        """
1128        if "verification_key_end" not in self._offsets:
1129            raise LayoutInvalid("You must put the verification key before "
1130                                "you can publish the offsets")
1131        offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
1132        offsets = struct.pack(MDMFOFFSETS,
1133                              self._offsets['enc_privkey'],
1134                              self._offsets['share_hash_chain'],
1135                              self._offsets['signature'],
1136                              self._offsets['verification_key'],
1137                              self._offsets['verification_key_end'],
1138                              self._offsets['share_data'],
1139                              self._offsets['block_hash_tree'],
1140                              self._offsets['EOF'])
1141        self._writevs.append(tuple([offsets_offset, offsets]))
1142        encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING)
1143        params = struct.pack(">BBQQ",
1144                             self._required_shares,
1145                             self._total_shares,
1146                             self._segment_size,
1147                             self._data_length)
1148        self._writevs.append(tuple([encoding_parameters_offset, params]))
1149        return self._write(self._writevs)
1150
1151
1152    def _write(self, datavs, on_failure=None, on_success=None):
1153        """I write the data vectors in datavs to the remote slot."""
1154        tw_vectors = {}
1155        if not self._testvs:
1156            # Make sure we will only successfully write if the share didn't
1157            # previously exist.
1158            self._testvs = []
1159            self._testvs.append(tuple([0, 1, b""]))
1160        if not self._written:
1161            # Write a new checkstring to the share when we write it, so
1162            # that we have something to check later.
1163            new_checkstring = self.get_checkstring()
1164            datavs.append((0, new_checkstring))
1165            def _first_write():
1166                self._written = True
1167                self._testvs = [(0, len(new_checkstring), new_checkstring)]
1168            on_success = _first_write
1169        tw_vectors[self.shnum] = (self._testvs, datavs, None)
1170        d = self._storage_server.slot_testv_and_readv_and_writev(
1171            self._storage_index,
1172            self._secrets,
1173            tw_vectors,
1174            self._readv,
1175        )
1176        def _result(results):
1177            if isinstance(results, failure.Failure) or not results[0]:
1178                # Do nothing; the write was unsuccessful.
1179                if on_failure: on_failure()
1180            else:
1181                if on_success: on_success()
1182            return results
1183        d.addBoth(_result)
1184        return d
1185
1186def _handle_bad_struct(f):
1187    # struct.unpack errors mean the server didn't give us enough data, so
1188    # this share is bad
1189    f.trap(struct.error)
1190    raise BadShareError(f.value.args[0])
1191
1192class MDMFSlotReadProxy(object):
1193    """
1194    I read from a mutable slot filled with data written in the MDMF data
1195    format (which is described above).
1196
1197    I can be initialized with some amount of data, which I will use (if
1198    it is valid) to eliminate some of the need to fetch it from servers.
1199    """
1200    def __init__(self,
1201                 storage_server,
1202                 storage_index,
1203                 shnum,
1204                 data=b"",
1205                 data_is_everything=False):
1206        # Start the initialization process.
1207        self._storage_server = storage_server
1208        self._storage_index = storage_index
1209        self.shnum = shnum
1210
1211        # Before doing anything, the reader is probably going to want to
1212        # verify that the signature is correct. To do that, they'll need
1213        # the verification key, and the signature. To get those, we'll
1214        # need the offset table. So fetch the offset table on the
1215        # assumption that that will be the first thing that a reader is
1216        # going to do.
1217
1218        # The fact that these encoding parameters are None tells us
1219        # that we haven't yet fetched them from the remote share, so we
1220        # should. We could just not set them, but the checks will be
1221        # easier to read if we don't have to use hasattr.
1222        self._version_number = None
1223        self._sequence_number = None
1224        self._root_hash = None
1225        # Filled in if we're dealing with an SDMF file. Unused
1226        # otherwise.
1227        self._salt = None
1228        self._required_shares = None
1229        self._total_shares = None
1230        self._segment_size = None
1231        self._data_length = None
1232        self._offsets = None
1233
1234        # If the user has chosen to initialize us with some data, we'll
1235        # try to satisfy subsequent data requests with that data before
1236        # asking the storage server for it.
1237        self._data = data
1238
1239        # If the provided data is known to be complete, then we know there's
1240        # nothing to be gained by querying the server, so we should just
1241        # partially satisfy requests with what we have.
1242        self._data_is_everything = data_is_everything
1243
1244        # The way callers interact with cache in the filenode returns
1245        # None if there isn't any cached data, but the way we index the
1246        # cached data requires a string, so convert None to "".
1247        if self._data == None:
1248            self._data = b""
1249
1250
1251    def _maybe_fetch_offsets_and_header(self, force_remote=False):
1252        """
1253        I fetch the offset table and the header from the remote slot if
1254        I don't already have them. If I do have them, I do nothing and
1255        return an empty Deferred.
1256        """
1257        if self._offsets:
1258            return defer.succeed(None)
1259        # At this point, we may be either SDMF or MDMF. Fetching 107
1260        # bytes will be enough to get header and offsets for both SDMF and
1261        # MDMF, though we'll be left with 4 more bytes than we
1262        # need if this ends up being MDMF. This is probably less
1263        # expensive than the cost of a second roundtrip.
1264        readvs = [(0, 123)]
1265        d = self._read(readvs, force_remote)
1266        d.addCallback(self._process_encoding_parameters)
1267        d.addCallback(self._process_offsets)
1268        d.addErrback(_handle_bad_struct)
1269        return d
1270
1271
1272    def _process_encoding_parameters(self, encoding_parameters):
1273        if self.shnum not in encoding_parameters:
1274            raise BadShareError("no data for shnum %d" % self.shnum)
1275        encoding_parameters = encoding_parameters[self.shnum][0]
1276        # The first byte is the version number. It will tell us what
1277        # to do next.
1278        (verno,) = struct.unpack(">B", encoding_parameters[:1])
1279        if verno == MDMF_VERSION:
1280            read_size = MDMFHEADERWITHOUTOFFSETSSIZE
1281            (verno,
1282             seqnum,
1283             root_hash,
1284             k,
1285             n,
1286             segsize,
1287             datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS,
1288                                      encoding_parameters[:read_size])
1289            if segsize == 0 and datalen == 0:
1290                # Empty file, no segments.
1291                self._num_segments = 0
1292            else:
1293                self._num_segments = mathutil.div_ceil(datalen, segsize)
1294
1295        elif verno == SDMF_VERSION:
1296            read_size = SIGNED_PREFIX_LENGTH
1297            (verno,
1298             seqnum,
1299             root_hash,
1300             salt,
1301             k,
1302             n,
1303             segsize,
1304             datalen) = struct.unpack(">BQ32s16s BBQQ",
1305                                encoding_parameters[:SIGNED_PREFIX_LENGTH])
1306            self._salt = salt
1307            if segsize == 0 and datalen == 0:
1308                # empty file
1309                self._num_segments = 0
1310            else:
1311                # non-empty SDMF files have one segment.
1312                self._num_segments = 1
1313        else:
1314            raise UnknownVersionError("You asked me to read mutable file "
1315                                      "version %d, but I only understand "
1316                                      "%d and %d" % (verno, SDMF_VERSION,
1317                                                     MDMF_VERSION))
1318
1319        self._version_number = verno
1320        self._sequence_number = seqnum
1321        self._root_hash = root_hash
1322        self._required_shares = k
1323        self._total_shares = n
1324        self._segment_size = segsize
1325        self._data_length = datalen
1326
1327        self._block_size = old_div(self._segment_size, self._required_shares)
1328        # We can upload empty files, and need to account for this fact
1329        # so as to avoid zero-division and zero-modulo errors.
1330        if datalen > 0:
1331            tail_size = self._data_length % self._segment_size
1332        else:
1333            tail_size = 0
1334        if not tail_size:
1335            self._tail_block_size = self._block_size
1336        else:
1337            self._tail_block_size = mathutil.next_multiple(tail_size,
1338                                                    self._required_shares)
1339            self._tail_block_size = old_div(self._tail_block_size, self._required_shares)
1340
1341        return encoding_parameters
1342
1343
1344    def _process_offsets(self, offsets):
1345        if self._version_number == 0:
1346            read_size = OFFSETS_LENGTH
1347            read_offset = SIGNED_PREFIX_LENGTH
1348            end = read_size + read_offset
1349            (signature,
1350             share_hash_chain,
1351             block_hash_tree,
1352             share_data,
1353             enc_privkey,
1354             EOF) = struct.unpack(">LLLLQQ",
1355                                  offsets[read_offset:end])
1356            self._offsets = {}
1357            self._offsets['signature'] = signature
1358            self._offsets['share_data'] = share_data
1359            self._offsets['block_hash_tree'] = block_hash_tree
1360            self._offsets['share_hash_chain'] = share_hash_chain
1361            self._offsets['enc_privkey'] = enc_privkey
1362            self._offsets['EOF'] = EOF
1363
1364        elif self._version_number == 1:
1365            read_offset = MDMFHEADERWITHOUTOFFSETSSIZE
1366            read_length = MDMFOFFSETS_LENGTH
1367            end = read_offset + read_length
1368            (encprivkey,
1369             sharehashes,
1370             signature,
1371             verification_key,
1372             verification_key_end,
1373             sharedata,
1374             blockhashes,
1375             eof) = struct.unpack(MDMFOFFSETS,
1376                                  offsets[read_offset:end])
1377            self._offsets = {}
1378            self._offsets['enc_privkey'] = encprivkey
1379            self._offsets['block_hash_tree'] = blockhashes
1380            self._offsets['share_hash_chain'] = sharehashes
1381            self._offsets['signature'] = signature
1382            self._offsets['verification_key'] = verification_key
1383            self._offsets['verification_key_end']= \
1384                verification_key_end
1385            self._offsets['EOF'] = eof
1386            self._offsets['share_data'] = sharedata
1387
1388
1389    def get_block_and_salt(self, segnum):
1390        """
1391        I return (block, salt), where block is the block data and
1392        salt is the salt used to encrypt that segment.
1393        """
1394        d = self._maybe_fetch_offsets_and_header()
1395        def _then(ignored):
1396            base_share_offset = self._offsets['share_data']
1397
1398            if segnum + 1 > self._num_segments:
1399                raise LayoutInvalid("Not a valid segment number")
1400
1401            if self._version_number == 0:
1402                share_offset = base_share_offset + self._block_size * segnum
1403            else:
1404                share_offset = base_share_offset + (self._block_size + \
1405                                                    SALT_SIZE) * segnum
1406            if segnum + 1 == self._num_segments:
1407                data = self._tail_block_size
1408            else:
1409                data = self._block_size
1410
1411            if self._version_number == 1:
1412                data += SALT_SIZE
1413
1414            readvs = [(share_offset, data)]
1415            return readvs
1416        d.addCallback(_then)
1417        d.addCallback(lambda readvs: self._read(readvs))
1418        def _process_results(results):
1419            if self.shnum not in results:
1420                raise BadShareError("no data for shnum %d" % self.shnum)
1421            if self._version_number == 0:
1422                # We only read the share data, but we know the salt from
1423                # when we fetched the header
1424                data = results[self.shnum]
1425                if not data:
1426                    data = b""
1427                else:
1428                    if len(data) != 1:
1429                        raise BadShareError("got %d vectors, not 1" % len(data))
1430                    data = data[0]
1431                salt = self._salt
1432            else:
1433                data = results[self.shnum]
1434                if not data:
1435                    salt = data = b""
1436                else:
1437                    salt_and_data = results[self.shnum][0]
1438                    salt = salt_and_data[:SALT_SIZE]
1439                    data = salt_and_data[SALT_SIZE:]
1440            return data, salt
1441        d.addCallback(_process_results)
1442        return d
1443
1444
1445    def get_blockhashes(self, needed=None, force_remote=False):
1446        """
1447        I return the block hash tree
1448
1449        I take an optional argument, needed, which is a set of indices
1450        correspond to hashes that I should fetch. If this argument is
1451        missing, I will fetch the entire block hash tree; otherwise, I
1452        may attempt to fetch fewer hashes, based on what needed says
1453        that I should do. Note that I may fetch as many hashes as I
1454        want, so long as the set of hashes that I do fetch is a superset
1455        of the ones that I am asked for, so callers should be prepared
1456        to tolerate additional hashes.
1457        """
1458        # TODO: Return only the parts of the block hash tree necessary
1459        # to validate the blocknum provided?
1460        # This is a good idea, but it is hard to implement correctly. It
1461        # is bad to fetch any one block hash more than once, so we
1462        # probably just want to fetch the whole thing at once and then
1463        # serve it.
1464        if needed == set([]):
1465            return defer.succeed([])
1466        d = self._maybe_fetch_offsets_and_header()
1467        def _then(ignored):
1468            blockhashes_offset = self._offsets['block_hash_tree']
1469            if self._version_number == 1:
1470                blockhashes_length = self._offsets['EOF'] - blockhashes_offset
1471            else:
1472                blockhashes_length = self._offsets['share_data'] - blockhashes_offset
1473            readvs = [(blockhashes_offset, blockhashes_length)]
1474            return readvs
1475        d.addCallback(_then)
1476        d.addCallback(lambda readvs:
1477            self._read(readvs, force_remote=force_remote))
1478        def _build_block_hash_tree(results):
1479            if self.shnum not in results:
1480                raise BadShareError("no data for shnum %d" % self.shnum)
1481
1482            rawhashes = results[self.shnum][0]
1483            results = [rawhashes[i:i+HASH_SIZE]
1484                       for i in range(0, len(rawhashes), HASH_SIZE)]
1485            return results
1486        d.addCallback(_build_block_hash_tree)
1487        return d
1488
1489
1490    def get_sharehashes(self, needed=None, force_remote=False):
1491        """
1492        I return the part of the share hash chain placed to validate
1493        this share.
1494
1495        I take an optional argument, needed. Needed is a set of indices
1496        that correspond to the hashes that I should fetch. If needed is
1497        not present, I will fetch and return the entire share hash
1498        chain. Otherwise, I may fetch and return any part of the share
1499        hash chain that is a superset of the part that I am asked to
1500        fetch. Callers should be prepared to deal with more hashes than
1501        they've asked for.
1502        """
1503        if needed == set([]):
1504            return defer.succeed([])
1505        d = self._maybe_fetch_offsets_and_header()
1506
1507        def _make_readvs(ignored):
1508            sharehashes_offset = self._offsets['share_hash_chain']
1509            if self._version_number == 0:
1510                sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset
1511            else:
1512                sharehashes_length = self._offsets['signature'] - sharehashes_offset
1513            readvs = [(sharehashes_offset, sharehashes_length)]
1514            return readvs
1515        d.addCallback(_make_readvs)
1516        d.addCallback(lambda readvs:
1517            self._read(readvs, force_remote=force_remote))
1518        def _build_share_hash_chain(results):
1519            if self.shnum not in results:
1520                raise BadShareError("no data for shnum %d" % self.shnum)
1521
1522            sharehashes = results[self.shnum][0]
1523            results = [sharehashes[i:i+(HASH_SIZE + 2)]
1524                       for i in range(0, len(sharehashes), HASH_SIZE + 2)]
1525            results = dict([struct.unpack(">H32s", data)
1526                            for data in results])
1527            return results
1528        d.addCallback(_build_share_hash_chain)
1529        d.addErrback(_handle_bad_struct)
1530        return d
1531
1532
1533    def get_encprivkey(self):
1534        """
1535        I return the encrypted private key.
1536        """
1537        d = self._maybe_fetch_offsets_and_header()
1538
1539        def _make_readvs(ignored):
1540            privkey_offset = self._offsets['enc_privkey']
1541            if self._version_number == 0:
1542                privkey_length = self._offsets['EOF'] - privkey_offset
1543            else:
1544                privkey_length = self._offsets['share_hash_chain'] - privkey_offset
1545            readvs = [(privkey_offset, privkey_length)]
1546            return readvs
1547        d.addCallback(_make_readvs)
1548        d.addCallback(lambda readvs: self._read(readvs))
1549        def _process_results(results):
1550            if self.shnum not in results:
1551                raise BadShareError("no data for shnum %d" % self.shnum)
1552            privkey = results[self.shnum][0]
1553            return privkey
1554        d.addCallback(_process_results)
1555        return d
1556
1557
1558    def get_signature(self):
1559        """
1560        I return the signature of my share.
1561        """
1562        d = self._maybe_fetch_offsets_and_header()
1563
1564        def _make_readvs(ignored):
1565            signature_offset = self._offsets['signature']
1566            if self._version_number == 1:
1567                signature_length = self._offsets['verification_key'] - signature_offset
1568            else:
1569                signature_length = self._offsets['share_hash_chain'] - signature_offset
1570            readvs = [(signature_offset, signature_length)]
1571            return readvs
1572        d.addCallback(_make_readvs)
1573        d.addCallback(lambda readvs: self._read(readvs))
1574        def _process_results(results):
1575            if self.shnum not in results:
1576                raise BadShareError("no data for shnum %d" % self.shnum)
1577            signature = results[self.shnum][0]
1578            return signature
1579        d.addCallback(_process_results)
1580        return d
1581
1582
1583    def get_verification_key(self):
1584        """
1585        I return the verification key.
1586        """
1587        d = self._maybe_fetch_offsets_and_header()
1588
1589        def _make_readvs(ignored):
1590            if self._version_number == 1:
1591                vk_offset = self._offsets['verification_key']
1592                vk_length = self._offsets['verification_key_end'] - vk_offset
1593            else:
1594                vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ")
1595                vk_length = self._offsets['signature'] - vk_offset
1596            readvs = [(vk_offset, vk_length)]
1597            return readvs
1598        d.addCallback(_make_readvs)
1599        d.addCallback(lambda readvs: self._read(readvs))
1600        def _process_results(results):
1601            if self.shnum not in results:
1602                raise BadShareError("no data for shnum %d" % self.shnum)
1603            verification_key = results[self.shnum][0]
1604            return verification_key
1605        d.addCallback(_process_results)
1606        return d
1607
1608
1609    def get_encoding_parameters(self):
1610        """
1611        I return (k, n, segsize, datalen)
1612        """
1613        d = self._maybe_fetch_offsets_and_header()
1614        d.addCallback(lambda ignored:
1615            (self._required_shares,
1616             self._total_shares,
1617             self._segment_size,
1618             self._data_length))
1619        return d
1620
1621
1622    def get_seqnum(self):
1623        """
1624        I return the sequence number for this share.
1625        """
1626        d = self._maybe_fetch_offsets_and_header()
1627        d.addCallback(lambda ignored:
1628            self._sequence_number)
1629        return d
1630
1631
1632    def get_root_hash(self):
1633        """
1634        I return the root of the block hash tree
1635        """
1636        d = self._maybe_fetch_offsets_and_header()
1637        d.addCallback(lambda ignored: self._root_hash)
1638        return d
1639
1640
1641    def get_checkstring(self):
1642        """
1643        I return the packed representation of the following:
1644
1645            - version number
1646            - sequence number
1647            - root hash
1648            - salt hash
1649
1650        which my users use as a checkstring to detect other writers.
1651        """
1652        d = self._maybe_fetch_offsets_and_header()
1653        def _build_checkstring(ignored):
1654            if self._salt:
1655                checkstring = struct.pack(PREFIX,
1656                                          self._version_number,
1657                                          self._sequence_number,
1658                                          self._root_hash,
1659                                          self._salt)
1660            else:
1661                checkstring = struct.pack(MDMFCHECKSTRING,
1662                                          self._version_number,
1663                                          self._sequence_number,
1664                                          self._root_hash)
1665
1666            return checkstring
1667        d.addCallback(_build_checkstring)
1668        return d
1669
1670
1671    def get_prefix(self, force_remote):
1672        d = self._maybe_fetch_offsets_and_header(force_remote)
1673        d.addCallback(lambda ignored:
1674            self._build_prefix())
1675        return d
1676
1677
1678    def _build_prefix(self):
1679        # The prefix is another name for the part of the remote share
1680        # that gets signed. It consists of everything up to and
1681        # including the datalength, packed by struct.
1682        if self._version_number == SDMF_VERSION:
1683            return struct.pack(SIGNED_PREFIX,
1684                           self._version_number,
1685                           self._sequence_number,
1686                           self._root_hash,
1687                           self._salt,
1688                           self._required_shares,
1689                           self._total_shares,
1690                           self._segment_size,
1691                           self._data_length)
1692
1693        else:
1694            return struct.pack(MDMFSIGNABLEHEADER,
1695                           self._version_number,
1696                           self._sequence_number,
1697                           self._root_hash,
1698                           self._required_shares,
1699                           self._total_shares,
1700                           self._segment_size,
1701                           self._data_length)
1702
1703
1704    def _get_offsets_tuple(self):
1705        # The offsets tuple is another component of the version
1706        # information tuple. It is basically our offsets dictionary,
1707        # itemized and in a tuple.
1708        return self._offsets.copy()
1709
1710
1711    def get_verinfo(self):
1712        """
1713        I return my verinfo tuple. This is used by the ServermapUpdater
1714        to keep track of versions of mutable files.
1715
1716        The verinfo tuple for MDMF files contains:
1717            - seqnum
1718            - root hash
1719            - a blank (nothing)
1720            - segsize
1721            - datalen
1722            - k
1723            - n
1724            - prefix (the thing that you sign)
1725            - a tuple of offsets
1726
1727        We include the nonce in MDMF to simplify processing of version
1728        information tuples.
1729
1730        The verinfo tuple for SDMF files is the same, but contains a
1731        16-byte IV instead of a hash of salts.
1732        """
1733        d = self._maybe_fetch_offsets_and_header()
1734        def _build_verinfo(ignored):
1735            if self._version_number == SDMF_VERSION:
1736                salt_to_use = self._salt
1737            else:
1738                salt_to_use = None
1739            return (self._sequence_number,
1740                    self._root_hash,
1741                    salt_to_use,
1742                    self._segment_size,
1743                    self._data_length,
1744                    self._required_shares,
1745                    self._total_shares,
1746                    self._build_prefix(),
1747                    self._get_offsets_tuple())
1748        d.addCallback(_build_verinfo)
1749        return d
1750
1751
1752    def _read(self, readvs, force_remote=False):
1753        unsatisfiable = [x for x in readvs if x[0] + x[1] > len(self._data)]
1754        # TODO: It's entirely possible to tweak this so that it just
1755        # fulfills the requests that it can, and not demand that all
1756        # requests are satisfiable before running it.
1757
1758        if not unsatisfiable or self._data_is_everything:
1759            results = [self._data[offset:offset+length]
1760                       for (offset, length) in readvs]
1761            results = {self.shnum: results}
1762            return defer.succeed(results)
1763        else:
1764            return self._storage_server.slot_readv(
1765                self._storage_index,
1766                [self.shnum],
1767                readvs,
1768            )
1769
1770
1771    def is_sdmf(self):
1772        """I tell my caller whether or not my remote file is SDMF or MDMF
1773        """
1774        d = self._maybe_fetch_offsets_and_header()
1775        d.addCallback(lambda ignored:
1776            self._version_number == 0)
1777        return d
1778
1779
1780class LayoutInvalid(BadShareError):
1781    """
1782    This isn't a valid MDMF mutable file
1783    """
Note: See TracBrowser for help on using the repository browser.