1 | --- old-tahoe/docs/architecture.txt 2010-01-14 03:46:11.969000000 +0000 |
---|
2 | +++ new-tahoe/docs/architecture.txt 2010-01-14 03:46:12.222000000 +0000 |
---|
3 | @@ -5,14 +5,15 @@ |
---|
4 | |
---|
5 | OVERVIEW |
---|
6 | |
---|
7 | -At a high-level this system consists of three layers: the grid, the |
---|
8 | -filesystem, and the application. |
---|
9 | +At a high-level this system consists of three layers: the key-value store, |
---|
10 | +the filesystem, and the application. |
---|
11 | |
---|
12 | -The lowest layer is the "grid", a key-value store mapping from capabilities to |
---|
13 | -data. The capabilities are relatively short ascii strings, each used as a |
---|
14 | -reference to an arbitrary-length sequence of data bytes, and are like a URI |
---|
15 | -for that data. This data is encrypted and distributed across a number of |
---|
16 | -nodes, such that it will survive the loss of most of the nodes. |
---|
17 | +The lowest layer is the key-value store, which is a distributed hashtable |
---|
18 | +mapping from capabilities to data. The capabilities are relatively short |
---|
19 | +ASCII strings, each used as a reference to an arbitrary-length sequence of |
---|
20 | +data bytes, and are like a URI for that data. This data is encrypted and |
---|
21 | +distributed across a number of nodes, such that it will survive the loss of |
---|
22 | +most of the nodes. |
---|
23 | |
---|
24 | The middle layer is the decentralized filesystem: a directed graph in which |
---|
25 | the intermediate nodes are directories and the leaf nodes are files. The leaf |
---|
26 | @@ -31,19 +32,21 @@ |
---|
27 | |
---|
28 | THE GRID OF STORAGE SERVERS |
---|
29 | |
---|
30 | -The grid is composed of peer nodes -- processes running on computers. They |
---|
31 | -establish TCP connections to each other using Foolscap, a secure remote |
---|
32 | -message passing library. |
---|
33 | +A key-value store is implemented by a collection of peer nodes -- processes |
---|
34 | +running on computers -- called a "grid". (The term "grid" is also used loosely |
---|
35 | +for the filesystem supported by these nodes.) The nodes in a grid establish |
---|
36 | +TCP connections to each other using Foolscap, a secure remote-message-passing |
---|
37 | +library. |
---|
38 | |
---|
39 | -Each peer offers certain services to the others. The primary service is that |
---|
40 | +Each node offers certain services to the others. The primary service is that |
---|
41 | of the storage server, which holds data in the form of "shares". Shares are |
---|
42 | encoded pieces of files. There are a configurable number of shares for each |
---|
43 | file, 10 by default. Normally, each share is stored on a separate server, but |
---|
44 | a single server can hold multiple shares for a single file. |
---|
45 | |
---|
46 | -Peers learn about each other through an "introducer". Each peer connects to a |
---|
47 | -central introducer at startup, and receives a list of all other peers from |
---|
48 | -it. Each peer then connects to all other peers, creating a fully-connected |
---|
49 | +Nodes learn about each other through an "introducer". Each node connects to a |
---|
50 | +central introducer at startup, and receives a list of all other nodes from |
---|
51 | +it. Each node then connects to all other nodes, creating a fully-connected |
---|
52 | topology. In the current release, nodes behind NAT boxes will connect to all |
---|
53 | nodes that they can open connections to, but they cannot open connections to |
---|
54 | other nodes behind NAT boxes. Therefore, the more nodes behind NAT boxes, the |
---|
55 | @@ -62,16 +65,17 @@ |
---|
56 | "gossip-based" introduction, simply knowing how to contact any one node will |
---|
57 | be enough to contact all of them. |
---|
58 | |
---|
59 | + |
---|
60 | FILE ENCODING |
---|
61 | |
---|
62 | -When a peer stores a file on the grid, it first encrypts the file, using a key |
---|
63 | +When a node stores a file on its grid, it first encrypts the file, using a key |
---|
64 | that is optionally derived from the hash of the file itself. It then segments |
---|
65 | the encrypted file into small pieces, in order to reduce the memory footprint, |
---|
66 | and to decrease the lag between initiating a download and receiving the first |
---|
67 | part of the file; for example the lag between hitting "play" and a movie |
---|
68 | actually starting. |
---|
69 | |
---|
70 | -The peer then erasure-codes each segment, producing blocks such that only a |
---|
71 | +The node then erasure-codes each segment, producing blocks such that only a |
---|
72 | subset of them are needed to reconstruct the segment. It sends one block from |
---|
73 | each segment to a given server. The set of blocks on a given server |
---|
74 | constitutes a "share". Only a subset of the shares (3 out of 10, by default) |
---|
75 | @@ -79,7 +83,7 @@ |
---|
76 | |
---|
77 | A tagged hash of the encryption key is used to form the "storage index", which |
---|
78 | is used for both server selection (described below) and to index shares within |
---|
79 | -the Storage Servers on the selected peers. |
---|
80 | +the Storage Servers on the selected nodes. |
---|
81 | |
---|
82 | Hashes are computed while the shares are being produced, to validate the |
---|
83 | ciphertext and the shares themselves. Merkle hash trees are used to enable |
---|
84 | @@ -144,49 +148,49 @@ |
---|
85 | to retrieve a set of bytes, and then you can use it to validate ("identify") |
---|
86 | that these potential bytes are indeed the ones that you were looking for. |
---|
87 | |
---|
88 | -The "grid" layer is insufficient to provide a virtual drive: an actual |
---|
89 | -filesystem requires human-meaningful names. Capabilities sit on the |
---|
90 | -"global+secure" edge of Zooko's Triangle[1]. They are self-authenticating, |
---|
91 | -meaning that nobody can trick you into using a file that doesn't match the |
---|
92 | -capability you used to refer to that file. |
---|
93 | +The "key-value store" layer is insufficient to provide a usable filesystem, |
---|
94 | +which requires human-meaningful names. Capabilities sit on the "global+secure" |
---|
95 | +edge of Zooko's Triangle[1]. They are self-authenticating, meaning that |
---|
96 | +nobody can trick you into using a file that doesn't match the capability |
---|
97 | +you used to refer to that file. |
---|
98 | |
---|
99 | |
---|
100 | SERVER SELECTION |
---|
101 | |
---|
102 | -When a file is uploaded, the encoded shares are sent to other peers. But to |
---|
103 | +When a file is uploaded, the encoded shares are sent to other nodes. But to |
---|
104 | which ones? The "server selection" algorithm is used to make this choice. |
---|
105 | |
---|
106 | In the current version, the storage index is used to consistently-permute the |
---|
107 | -set of all peers (by sorting the peers by HASH(storage_index+peerid)). Each |
---|
108 | -file gets a different permutation, which (on average) will evenly distribute |
---|
109 | +set of all peer nodes (by sorting the peer nodes by HASH(storage_index+peerid)). |
---|
110 | +Each file gets a different permutation, which (on average) will evenly distribute |
---|
111 | shares among the grid and avoid hotspots. |
---|
112 | |
---|
113 | -We use this permuted list of peers to ask each peer, in turn, if it will hold |
---|
114 | +We use this permuted list of nodes to ask each node, in turn, if it will hold |
---|
115 | a share for us, by sending an 'allocate_buckets() query' to each one. Some |
---|
116 | -will say yes, others (those who are full) will say no: when a peer refuses our |
---|
117 | -request, we just take that share to the next peer on the list. We keep going |
---|
118 | +will say yes, others (those who are full) will say no: when a node refuses our |
---|
119 | +request, we just take that share to the next node on the list. We keep going |
---|
120 | until we run out of shares to place. At the end of the process, we'll have a |
---|
121 | -table that maps each share number to a peer, and then we can begin the |
---|
122 | +table that maps each share number to a node, and then we can begin the |
---|
123 | encode+push phase, using the table to decide where each share should be sent. |
---|
124 | |
---|
125 | -Most of the time, this will result in one share per peer, which gives us |
---|
126 | +Most of the time, this will result in one share per node, which gives us |
---|
127 | maximum reliability (since it disperses the failures as widely as possible). |
---|
128 | -If there are fewer useable peers than there are shares, we'll be forced to |
---|
129 | -loop around, eventually giving multiple shares to a single peer. This reduces |
---|
130 | +If there are fewer useable nodes than there are shares, we'll be forced to |
---|
131 | +loop around, eventually giving multiple shares to a single node. This reduces |
---|
132 | reliability, so it isn't the sort of thing we want to happen all the time, and |
---|
133 | either indicates that the default encoding parameters are set incorrectly |
---|
134 | -(creating more shares than you have peers), or that the grid does not have |
---|
135 | -enough space (many peers are full). But apart from that, it doesn't hurt. If |
---|
136 | -we have to loop through the peer list a second time, we accelerate the query |
---|
137 | -process, by asking each peer to hold multiple shares on the second pass. In |
---|
138 | +(creating more shares than you have nodes), or that the grid does not have |
---|
139 | +enough space (many nodes are full). But apart from that, it doesn't hurt. If |
---|
140 | +we have to loop through the node list a second time, we accelerate the query |
---|
141 | +process, by asking each node to hold multiple shares on the second pass. In |
---|
142 | most cases, this means we'll never send more than two queries to any given |
---|
143 | -peer. |
---|
144 | +node. |
---|
145 | |
---|
146 | -If a peer is unreachable, or has an error, or refuses to accept any of our |
---|
147 | +If a node is unreachable, or has an error, or refuses to accept any of our |
---|
148 | shares, we remove them from the permuted list, so we won't query them a second |
---|
149 | -time for this file. If a peer already has shares for the file we're uploading |
---|
150 | +time for this file. If a node already has shares for the file we're uploading |
---|
151 | (or if someone else is currently sending them shares), we add that information |
---|
152 | -to the share-to-peer table. This lets us do less work for files which have |
---|
153 | +to the share-to-peer-node table. This lets us do less work for files which have |
---|
154 | been uploaded once before, while making sure we still wind up with as many |
---|
155 | shares as we desire. |
---|
156 | |
---|
157 | @@ -197,10 +201,10 @@ |
---|
158 | The current defaults use k=3, shares_of_happiness=7, and N=10, meaning that |
---|
159 | we'll try to place 10 shares, we'll be happy if we can place 7, and we need to |
---|
160 | get back any 3 to recover the file. This results in a 3.3x expansion |
---|
161 | -factor. In general, you should set N about equal to the number of peers in |
---|
162 | +factor. In general, you should set N about equal to the number of nodes in |
---|
163 | your grid, then set N/k to achieve your desired availability goals. |
---|
164 | |
---|
165 | -When downloading a file, the current release just asks all known peers for any |
---|
166 | +When downloading a file, the current release just asks all known nodes for any |
---|
167 | shares they might have, chooses the minimal necessary subset, then starts |
---|
168 | downloading and processing those shares. A later release will use the full |
---|
169 | algorithm to reduce the number of queries that must be sent out. This |
---|
170 | @@ -209,26 +213,26 @@ |
---|
171 | queries that must be sent before downloading can begin. |
---|
172 | |
---|
173 | The actual number of queries is directly related to the availability of the |
---|
174 | -peers and the degree of overlap between the peerlist used at upload and at |
---|
175 | +nodes and the degree of overlap between the node list used at upload and at |
---|
176 | download. For stable grids, this overlap is very high, and usually the first k |
---|
177 | queries will result in shares. The number of queries grows as the stability |
---|
178 | decreases. Some limits may be imposed in large grids to avoid querying a |
---|
179 | -million peers; this provides a tradeoff between the work spent to discover |
---|
180 | +million nodes; this provides a tradeoff between the work spent to discover |
---|
181 | that a file is unrecoverable and the probability that a retrieval will fail |
---|
182 | when it could have succeeded if we had just tried a little bit harder. The |
---|
183 | appropriate value of this tradeoff will depend upon the size of the grid, and |
---|
184 | will change over time. |
---|
185 | |
---|
186 | -Other peer selection algorithms are possible. One earlier version (known as |
---|
187 | -"tahoe 3") used the permutation to place the peers around a large ring, |
---|
188 | +Other peer-node selection algorithms are possible. One earlier version (known |
---|
189 | +as "Tahoe 3") used the permutation to place the nodes around a large ring, |
---|
190 | distributed shares evenly around the same ring, then walks clockwise from 0 |
---|
191 | with a basket: each time we encounter a share, put it in the basket, each time |
---|
192 | -we encounter a peer, give them as many shares from our basket as they'll |
---|
193 | +we encounter a node, give them as many shares from our basket as they'll |
---|
194 | accept. This reduced the number of queries (usually to 1) for small grids |
---|
195 | -(where N is larger than the number of peers), but resulted in extremely |
---|
196 | +(where N is larger than the number of nodes), but resulted in extremely |
---|
197 | non-uniform share distribution, which significantly hurt reliability |
---|
198 | (sometimes the permutation resulted in most of the shares being dumped on a |
---|
199 | -single peer). |
---|
200 | +single node). |
---|
201 | |
---|
202 | Another algorithm (known as "denver airport"[2]) uses the permuted hash to |
---|
203 | decide on an approximate target for each share, then sends lease requests via |
---|
204 | @@ -243,12 +247,12 @@ |
---|
205 | SWARMING DOWNLOAD, TRICKLING UPLOAD |
---|
206 | |
---|
207 | Because the shares being downloaded are distributed across a large number of |
---|
208 | -peers, the download process will pull from many of them at the same time. The |
---|
209 | +nodes, the download process will pull from many of them at the same time. The |
---|
210 | current encoding parameters require 3 shares to be retrieved for each segment, |
---|
211 | -which means that up to 3 peers will be used simultaneously. For larger |
---|
212 | -networks, 8-of-22 encoding could be used, meaning 8 peers can be used |
---|
213 | +which means that up to 3 nodes will be used simultaneously. For larger |
---|
214 | +networks, 8-of-22 encoding could be used, meaning 8 nodes can be used |
---|
215 | simultaneously. This allows the download process to use the sum of the |
---|
216 | -available peers' upload bandwidths, resulting in downloads that take full |
---|
217 | +available nodes' upload bandwidths, resulting in downloads that take full |
---|
218 | advantage of the common 8x disparity between download and upload bandwith on |
---|
219 | modern ADSL lines. |
---|
220 | |
---|
221 | @@ -301,105 +305,25 @@ |
---|
222 | that are globally visible. |
---|
223 | |
---|
224 | |
---|
225 | -LEASES, REFRESHING, GARBAGE COLLECTION, QUOTAS |
---|
226 | +LEASES, REFRESHING, GARBAGE COLLECTION |
---|
227 | + |
---|
228 | +When a file or directory in the virtual filesystem is no longer referenced, |
---|
229 | +the space that its shares occupied on each storage server can be freed, |
---|
230 | +making room for other shares. Tahoe currently uses a garbage collection |
---|
231 | +("GC") mechanism to implement this space-reclamation process. Each share has |
---|
232 | +one or more "leases", which are managed by clients who want the |
---|
233 | +file/directory to be retained. The storage server accepts each share for a |
---|
234 | +pre-defined period of time, and is allowed to delete the share if all of the |
---|
235 | +leases are cancelled or allowed to expire. |
---|
236 | + |
---|
237 | +Garbage collection is not enabled by default: storage servers will not delete |
---|
238 | +shares without being explicitly configured to do so. When GC is enabled, |
---|
239 | +clients are responsible for renewing their leases on a periodic basis at |
---|
240 | +least frequently enough to prevent any of the leases from expiring before the |
---|
241 | +next renewal pass. |
---|
242 | |
---|
243 | -THIS SECTION IS OUT OF DATE. Since we wrote this we've changed our minds |
---|
244 | -about how we intend to implement these features. Neither the old design, |
---|
245 | -documented below, nor the new one, documented on the tahoe-dev mailing list |
---|
246 | -and the wiki and the issue tracker, have actually been implemented yet. |
---|
247 | - |
---|
248 | -Shares are uploaded to a storage server, but they do not necessarily stay |
---|
249 | -there forever. We are anticipating three main share-lifetime management modes |
---|
250 | -for Tahoe: 1) per-share leases which expire, 2) per-account timers which |
---|
251 | -expire and cancel all leases for the account, and 3) centralized account |
---|
252 | -management without expiration timers. |
---|
253 | - |
---|
254 | -To be clear, none of these have been implemented yet. The |
---|
255 | -http://allmydata.org/trac/tahoe/wiki/QuotaManagement "Quota Management" wiki |
---|
256 | -page describes some of our plans for managing data lifetime and limited-space |
---|
257 | -user accounts. |
---|
258 | - |
---|
259 | -Multiple clients may be interested in a given share, for example if two |
---|
260 | -clients uploaded the same file, or if two clients are sharing a directory and |
---|
261 | -both want to make sure the files therein remain available. Consequently, each |
---|
262 | -share (technically each "bucket", which may contain multiple shares for a |
---|
263 | -single storage index) has a set of leases, one per client. One way to |
---|
264 | -visualize this is with a large table, with shares (i.e. buckets, or storage |
---|
265 | -indices, or files) as the rows, and accounts as columns. Each square of this |
---|
266 | -table might hold a lease. |
---|
267 | - |
---|
268 | -Using limited-duration leases reduces the storage consumed by clients who have |
---|
269 | -(for whatever reason) forgotten about the share they once cared about. |
---|
270 | -Clients are supposed to explicitly cancel leases for every file that they |
---|
271 | -remove from their vdrive, and when the last lease is removed on a share, the |
---|
272 | -storage server deletes that share. However, the storage server might be |
---|
273 | -offline when the client deletes the file, or the client might experience a bug |
---|
274 | -or a race condition that results in forgetting about the file. Using leases |
---|
275 | -that expire unless otherwise renewed ensures that these lost files will not |
---|
276 | -consume storage space forever. On the other hand, they require periodic |
---|
277 | -maintenance, which can become prohibitively expensive for large grids. In |
---|
278 | -addition, clients who go offline for a while are then obligated to get someone |
---|
279 | -else to keep their files alive for them. |
---|
280 | - |
---|
281 | - |
---|
282 | -In the first mode, each client holds a limited-duration lease on each share |
---|
283 | -(typically one month), and clients are obligated to periodically renew these |
---|
284 | -leases to keep them from expiring (typically once a week). In this mode, the |
---|
285 | -storage server does not know anything about which client is which: it only |
---|
286 | -knows about leases. |
---|
287 | - |
---|
288 | -In the second mode, each server maintains a list of clients and which leases |
---|
289 | -they hold. This is called the "account list", and each time a client wants to |
---|
290 | -upload a share or establish a lease, it provides credentials to allow the |
---|
291 | -server to know which Account it will be using. Rather than putting individual |
---|
292 | -timers on each lease, the server puts a timer on the Account. When the account |
---|
293 | -expires, all of the associated leases are cancelled. |
---|
294 | - |
---|
295 | -In this mode, clients are obligated to renew the Account periodically, but not |
---|
296 | -the (thousands of) individual share leases. Clients which forget about files |
---|
297 | -are still incurring a storage cost for those files. An occasional |
---|
298 | -reconcilliation process (in which the client presents the storage server with |
---|
299 | -a list of all the files it cares about, and the server removes leases for |
---|
300 | -anything that isn't on the list) can be used to free this storage, but the |
---|
301 | -effort involved is large, so reconcilliation must be done very infrequently. |
---|
302 | - |
---|
303 | -Our plan is to have the clients create their own Accounts, based upon the |
---|
304 | -possession of a private key. Clients can create as many accounts as they wish, |
---|
305 | -but they are responsible for their own maintenance. Servers can add up all the |
---|
306 | -leases for each account and present a report of usage, in bytes per |
---|
307 | -account. This is intended for friendnet scenarios where it would be nice to |
---|
308 | -know how much space your friends are consuming on your disk. |
---|
309 | - |
---|
310 | -In the third mode, the Account objects are centrally managed, and are not |
---|
311 | -expired by the storage servers. In this mode, the client presents credentials |
---|
312 | -that are issued by a central authority, such as a signed message which the |
---|
313 | -storage server can verify. The storage used by this account is not freed |
---|
314 | -unless and until the central account manager says so. |
---|
315 | - |
---|
316 | -This mode is more appropriate for a commercial offering, in which use of the |
---|
317 | -storage servers is contingent upon a monthly fee, or other membership |
---|
318 | -criteria. Being able to ask the storage usage for each account (or establish |
---|
319 | -limits on it) helps to enforce whatever kind of membership policy is desired. |
---|
320 | - |
---|
321 | - |
---|
322 | -Each lease is created with a pair of secrets: the "renew secret" and the |
---|
323 | -"cancel secret". These are just random-looking strings, derived by hashing |
---|
324 | -other higher-level secrets, starting with a per-client master secret. Anyone |
---|
325 | -who knows the secret is allowed to restart the expiration timer, or cancel the |
---|
326 | -lease altogether. Having these be individual values allows the original |
---|
327 | -uploading node to delegate these capabilities to others. |
---|
328 | - |
---|
329 | -In the current release, clients provide lease secrets to the storage server, |
---|
330 | -and each lease contains an expiration time, but there is no facility to |
---|
331 | -actually expire leases, nor are there explicit owners (the "ownerid" field of |
---|
332 | -each lease is always set to zero). In addition, many features have not been |
---|
333 | -implemented yet: the client should claim leases on files which are added to |
---|
334 | -the vdrive by linking (as opposed to uploading), and the client should cancel |
---|
335 | -leases on files which are removed from the vdrive, but neither has been |
---|
336 | -written yet. This means that shares are not ever deleted in this |
---|
337 | -release. (Note, however, that if read-cap to a file is deleted then it will no |
---|
338 | -longer be possible to decrypt that file, even if the shares which contain the |
---|
339 | -erasure-coded ciphertext still exist.) |
---|
340 | +See docs/garbage-collection.txt for further information, and how to configure |
---|
341 | +garbage collection. |
---|
342 | |
---|
343 | |
---|
344 | FILE REPAIRER |
---|
345 | @@ -423,10 +347,10 @@ |
---|
346 | The repairer process does not get the full capability of the file to be |
---|
347 | maintained: it merely gets the "repairer capability" subset, which does not |
---|
348 | include the decryption key. The File Verifier uses that data to find out which |
---|
349 | -peers ought to hold shares for this file, and to see if those peers are still |
---|
350 | +nodes ought to hold shares for this file, and to see if those nodes are still |
---|
351 | around and willing to provide the data. If the file is not healthy enough, the |
---|
352 | File Repairer is invoked to download the ciphertext, regenerate any missing |
---|
353 | -shares, and upload them to new peers. The goal of the File Repairer is to |
---|
354 | +shares, and upload them to new nodes. The goal of the File Repairer is to |
---|
355 | finish up with a full set of "N" shares. |
---|
356 | |
---|
357 | There are a number of engineering issues to be resolved here. The bandwidth, |
---|
358 | @@ -439,13 +363,13 @@ |
---|
359 | performed at the same time, and repair of files can be delegated off to other |
---|
360 | nodes. |
---|
361 | |
---|
362 | -The security model we are currently using assumes that peers who claim to hold |
---|
363 | +The security model we are currently using assumes that nodes who claim to hold |
---|
364 | a share will actually provide it when asked. (We validate the data they |
---|
365 | -provide before using it in any way, but if enough peers claim to hold the data |
---|
366 | +provide before using it in any way, but if enough nodes claim to hold the data |
---|
367 | and are wrong, the file will not be repaired, and may decay beyond |
---|
368 | recoverability). There are several interesting approaches to mitigate this |
---|
369 | threat, ranging from challenges to provide a keyed hash of the allegedly-held |
---|
370 | -data (using "buddy nodes", in which two peers hold the same block, and check |
---|
371 | +data (using "buddy nodes", in which two nodes hold the same block, and check |
---|
372 | up on each other), to reputation systems, or even the original Mojo Nation |
---|
373 | economic model. |
---|
374 | |
---|
375 | @@ -475,20 +399,20 @@ |
---|
376 | technique used to generate shares. |
---|
377 | |
---|
378 | Many of these security properties depend upon the usual cryptographic |
---|
379 | -assumptions: the resistance of AES and RSA to attack, the resistance of SHA256 |
---|
380 | +assumptions: the resistance of AES and RSA to attack, the resistance of SHA-256 |
---|
381 | to pre-image attacks, and upon the proximity of 2^-128 and 2^-256 to zero. A |
---|
382 | break in AES would allow a confidentiality violation, a pre-image break in |
---|
383 | -SHA256 would allow a consistency violation, and a break in RSA would allow a |
---|
384 | -mutability violation. The discovery of a collision in SHA256 is unlikely to |
---|
385 | +SHA-256 would allow a consistency violation, and a break in RSA would allow a |
---|
386 | +mutability violation. The discovery of a collision in SHA-256 is unlikely to |
---|
387 | allow much, but could conceivably allow a consistency violation in data that |
---|
388 | -was uploaded by the attacker. If SHA256 is threatened, further analysis will |
---|
389 | +was uploaded by the attacker. If SHA-256 is threatened, further analysis will |
---|
390 | be warranted. |
---|
391 | |
---|
392 | There is no attempt made to provide anonymity, neither of the origin of a |
---|
393 | piece of data nor the identity of the subsequent downloaders. In general, |
---|
394 | anyone who already knows the contents of a file will be in a strong position |
---|
395 | to determine who else is uploading or downloading it. Also, it is quite easy |
---|
396 | -for a sufficiently-large coalition of nodes to correlate the set of peers who |
---|
397 | +for a sufficiently large coalition of nodes to correlate the set of nodes who |
---|
398 | are all uploading or downloading the same file, even if the attacker does not |
---|
399 | know the contents of the file in question. |
---|
400 | |
---|
401 | @@ -522,18 +446,18 @@ |
---|
402 | |
---|
403 | RELIABILITY |
---|
404 | |
---|
405 | -File encoding and peer selection parameters can be adjusted to achieve |
---|
406 | +File encoding and peer-node selection parameters can be adjusted to achieve |
---|
407 | different goals. Each choice results in a number of properties; there are many |
---|
408 | tradeoffs. |
---|
409 | |
---|
410 | First, some terms: the erasure-coding algorithm is described as K-out-of-N |
---|
411 | (for this release, the default values are K=3 and N=10). Each grid will have |
---|
412 | -some number of peers; this number will rise and fall over time as peers join, |
---|
413 | +some number of nodes; this number will rise and fall over time as nodes join, |
---|
414 | drop out, come back, and leave forever. Files are of various sizes, some are |
---|
415 | -popular, others are rare. Peers have various capacities, variable |
---|
416 | +popular, others are rare. Nodes have various capacities, variable |
---|
417 | upload/download bandwidths, and network latency. Most of the mathematical |
---|
418 | -models that look at peer failure assume some average (and independent) |
---|
419 | -probability 'P' of a given peer being available: this can be high (servers |
---|
420 | +models that look at node failure assume some average (and independent) |
---|
421 | +probability 'P' of a given node being available: this can be high (servers |
---|
422 | tend to be online and available >90% of the time) or low (laptops tend to be |
---|
423 | turned on for an hour then disappear for several days). Files are encoded in |
---|
424 | segments of a given maximum size, which affects memory usage. |
---|
425 | @@ -549,24 +473,24 @@ |
---|
426 | roughly 10^50 times better), because there are more shares that can be lost |
---|
427 | without losing the file. |
---|
428 | |
---|
429 | -Likewise, the total number of peers in the network affects the same |
---|
430 | -granularity: having only one peer means a single point of failure, no matter |
---|
431 | -how many copies of the file you make. Independent peers (with uncorrelated |
---|
432 | +Likewise, the total number of nodes in the network affects the same |
---|
433 | +granularity: having only one node means a single point of failure, no matter |
---|
434 | +how many copies of the file you make. Independent nodes (with uncorrelated |
---|
435 | failures) are necessary to hit the mathematical ideals: if you have 100 nodes |
---|
436 | but they are all in the same office building, then a single power failure will |
---|
437 | take out all of them at once. The "Sybil Attack" is where a single attacker |
---|
438 | convinces you that they are actually multiple servers, so that you think you |
---|
439 | -are using a large number of independent peers, but in fact you have a single |
---|
440 | +are using a large number of independent nodes, but in fact you have a single |
---|
441 | point of failure (where the attacker turns off all their machines at |
---|
442 | -once). Large grids, with lots of truly-independent peers, will enable the use |
---|
443 | +once). Large grids, with lots of truly independent nodes, will enable the use |
---|
444 | of lower expansion factors to achieve the same reliability, but will increase |
---|
445 | -overhead because each peer needs to know something about every other, and the |
---|
446 | -rate at which peers come and go will be higher (requiring network maintenance |
---|
447 | +overhead because each node needs to know something about every other, and the |
---|
448 | +rate at which nodes come and go will be higher (requiring network maintenance |
---|
449 | traffic). Also, the File Repairer work will increase with larger grids, |
---|
450 | -although then the job can be distributed out to more peers. |
---|
451 | +although then the job can be distributed out to more nodes. |
---|
452 | |
---|
453 | Higher values of N increase overhead: more shares means more Merkle hashes |
---|
454 | -that must be included with the data, and more peers to contact to retrieve the |
---|
455 | +that must be included with the data, and more nodes to contact to retrieve the |
---|
456 | shares. Smaller segment sizes reduce memory usage (since each segment must be |
---|
457 | held in memory while erasure coding runs) and improves "alacrity" (since |
---|
458 | downloading can validate a smaller piece of data faster, delivering it to the |
---|
459 | @@ -592,9 +516,9 @@ |
---|
460 | |
---|
461 | [2]: all of these names are derived from the location where they were |
---|
462 | concocted, in this case in a car ride from Boulder to DEN. To be |
---|
463 | - precise, "tahoe 1" was an unworkable scheme in which everyone who holds |
---|
464 | + precise, "Tahoe 1" was an unworkable scheme in which everyone who holds |
---|
465 | shares for a given file would form a sort of cabal which kept track of |
---|
466 | - all the others, "tahoe 2" is the first-100-peers in the permuted hash |
---|
467 | - described in this document, and "tahoe 3" (or perhaps "potrero hill 1") |
---|
468 | + all the others, "Tahoe 2" is the first-100-nodes in the permuted hash |
---|
469 | + described in this document, and "Tahoe 3" (or perhaps "Potrero hill 1") |
---|
470 | was the abandoned ring-with-many-hands approach. |
---|
471 | |
---|
472 | |
---|
473 | --- old-tahoe/src/allmydata/scripts/cli.py 2010-01-14 03:46:11.986000000 +0000 |
---|
474 | +++ new-tahoe/src/allmydata/scripts/cli.py 2010-01-14 03:46:12.233000000 +0000 |
---|
475 | @@ -69,10 +69,10 @@ |
---|
476 | def getSynopsis(self): |
---|
477 | return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),) |
---|
478 | |
---|
479 | - longdesc = """Creates a new directory and adds an alias for it.""" |
---|
480 | + longdesc = """Create a new directory and add an alias for it.""" |
---|
481 | |
---|
482 | class ListAliasOptions(VDriveOptions): |
---|
483 | - longdesc = """Displays a table of all configured aliases.""" |
---|
484 | + longdesc = """Display a table of all configured aliases.""" |
---|
485 | |
---|
486 | class ListOptions(VDriveOptions): |
---|
487 | optFlags = [ |
---|
488 | @@ -85,7 +85,7 @@ |
---|
489 | def parseArgs(self, where=""): |
---|
490 | self.where = where |
---|
491 | |
---|
492 | - longdesc = """List the contents of some portion of the virtual drive.""" |
---|
493 | + longdesc = """List the contents of some portion of the grid.""" |
---|
494 | |
---|
495 | class GetOptions(VDriveOptions): |
---|
496 | def parseArgs(self, arg1, arg2=None): |
---|
497 | @@ -100,11 +100,12 @@ |
---|
498 | self.to_file = None |
---|
499 | |
---|
500 | def getSynopsis(self): |
---|
501 | - return "%s get VDRIVE_FILE LOCAL_FILE" % (os.path.basename(sys.argv[0]),) |
---|
502 | + return "%s get REMOTE_FILE LOCAL_FILE" % (os.path.basename(sys.argv[0]),) |
---|
503 | |
---|
504 | - longdesc = """Retrieve a file from the virtual drive and write it to the |
---|
505 | - local filesystem. If LOCAL_FILE is omitted or '-', the contents of the file |
---|
506 | - will be written to stdout.""" |
---|
507 | + longdesc = """ |
---|
508 | + Retrieve a file from the grid and write it to the local filesystem. If |
---|
509 | + LOCAL_FILE is omitted or '-', the contents of the file will be written to |
---|
510 | + stdout.""" |
---|
511 | |
---|
512 | def getUsage(self, width=None): |
---|
513 | t = VDriveOptions.getUsage(self, width) |
---|
514 | @@ -123,12 +124,7 @@ |
---|
515 | ] |
---|
516 | |
---|
517 | def parseArgs(self, arg1=None, arg2=None): |
---|
518 | - # cat FILE | tahoe put # create unlinked file from stdin |
---|
519 | - # cat FILE | tahoe put - # same |
---|
520 | - # tahoe put bar # create unlinked file from local 'bar' |
---|
521 | - # cat FILE | tahoe put - FOO # create tahoe:FOO from stdin |
---|
522 | - # tahoe put bar FOO # copy local 'bar' to tahoe:FOO |
---|
523 | - # tahoe put bar tahoe:FOO # same |
---|
524 | + # see Examples below |
---|
525 | |
---|
526 | if arg1 is not None and arg2 is not None: |
---|
527 | self.from_file = arg1 |
---|
528 | @@ -143,13 +139,14 @@ |
---|
529 | self.from_file = None |
---|
530 | |
---|
531 | def getSynopsis(self): |
---|
532 | - return "%s put LOCAL_FILE VDRIVE_FILE" % (os.path.basename(sys.argv[0]),) |
---|
533 | + return "%s put LOCAL_FILE REMOTE_FILE" % (os.path.basename(sys.argv[0]),) |
---|
534 | |
---|
535 | - longdesc = """Put a file into the virtual drive (copying the file's |
---|
536 | - contents from the local filesystem). If VDRIVE_FILE is missing, upload |
---|
537 | - the file but do not link it into a directory: prints the new filecap to |
---|
538 | - stdout. If LOCAL_FILE is missing or '-', data will be copied from stdin. |
---|
539 | - VDRIVE_FILE is assumed to start with tahoe: unless otherwise specified.""" |
---|
540 | + longdesc = """ |
---|
541 | + Put a file into the grid, copying its contents from the local filesystem. |
---|
542 | + If REMOTE_FILE is missing, upload the file but do not link it into a directory; |
---|
543 | + also print the new filecap to stdout. If LOCAL_FILE is missing or '-', data |
---|
544 | + will be copied from stdin. REMOTE_FILE is assumed to start with tahoe: unless |
---|
545 | + otherwise specified.""" |
---|
546 | |
---|
547 | def getUsage(self, width=None): |
---|
548 | t = VDriveOptions.getUsage(self, width) |
---|
549 | @@ -171,7 +168,7 @@ |
---|
550 | ("verbose", "v", "Be noisy about what is happening."), |
---|
551 | ("caps-only", None, |
---|
552 | "When copying to local files, write out filecaps instead of actual " |
---|
553 | - "data. (only useful for debugging and tree-comparison purposes)"), |
---|
554 | + "data (only useful for debugging and tree-comparison purposes)."), |
---|
555 | ] |
---|
556 | def parseArgs(self, *args): |
---|
557 | if len(args) < 2: |
---|
558 | @@ -181,12 +178,12 @@ |
---|
559 | def getSynopsis(self): |
---|
560 | return "Usage: tahoe [options] cp FROM.. TO" |
---|
561 | longdesc = """ |
---|
562 | - Use 'tahoe cp' to copy files between a local filesystem and a Tahoe |
---|
563 | - virtual filesystem. Any FROM/TO arguments that begin with an alias |
---|
564 | - indicate Tahoe-side files, and arguments which do not indicate local |
---|
565 | - files. Directories will be copied recursively. New Tahoe-side directories |
---|
566 | - will be created when necessary. Assuming that you have previously set up |
---|
567 | - an alias 'home' with 'tahoe create-alias home', here are some examples: |
---|
568 | + Use 'tahoe cp' to copy files between a local filesystem and a Tahoe grid. |
---|
569 | + Any FROM/TO arguments that begin with an alias indicate Tahoe-side |
---|
570 | + files or non-file arguments. Directories will be copied recursively. |
---|
571 | + New Tahoe-side directories will be created when necessary. Assuming that |
---|
572 | + you have previously set up an alias 'home' with 'tahoe create-alias home', |
---|
573 | + here are some examples: |
---|
574 | |
---|
575 | tahoe cp ~/foo.txt home: # creates tahoe-side home:foo.txt |
---|
576 | |
---|
577 | @@ -210,7 +207,7 @@ |
---|
578 | self.where = where |
---|
579 | |
---|
580 | def getSynopsis(self): |
---|
581 | - return "%s rm VDRIVE_FILE" % (os.path.basename(sys.argv[0]),) |
---|
582 | + return "%s rm REMOTE_FILE" % (os.path.basename(sys.argv[0]),) |
---|
583 | |
---|
584 | class MvOptions(VDriveOptions): |
---|
585 | def parseArgs(self, frompath, topath): |
---|
586 | @@ -220,11 +217,15 @@ |
---|
587 | def getSynopsis(self): |
---|
588 | return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),) |
---|
589 | longdesc = """ |
---|
590 | - Use 'tahoe mv' to move files that are already on the grid elsewhere on the grid, e.g., 'tahoe mv alias:some_file alias:new_file'. |
---|
591 | + Use 'tahoe mv' to move files that are already on the grid elsewhere on the |
---|
592 | + grid, e.g., 'tahoe mv alias:some_file alias:new_file'. |
---|
593 | |
---|
594 | - If moving a remote file into a remote directory, you'll need to append a '/' to the name of the remote directory, e.g., 'tahoe mv tahoe:file1 tahoe:dir/', not 'tahoe mv tahoe:file1 tahoe:dir'. |
---|
595 | + If moving a remote file into a remote directory, you'll need to append a '/' |
---|
596 | + to the name of the remote directory, e.g., 'tahoe mv tahoe:file1 tahoe:dir/', |
---|
597 | + not 'tahoe mv tahoe:file1 tahoe:dir'. |
---|
598 | |
---|
599 | - Note that it is not possible to use this command to move local files to the grid -- use 'tahoe cp' for that. |
---|
600 | + Note that it is not possible to use this command to move local files to the |
---|
601 | + grid -- use 'tahoe cp' for that. |
---|
602 | """ |
---|
603 | |
---|
604 | class LnOptions(VDriveOptions): |
---|
605 | @@ -241,7 +242,7 @@ |
---|
606 | class BackupOptions(VDriveOptions): |
---|
607 | optFlags = [ |
---|
608 | ("verbose", "v", "Be noisy about what is happening."), |
---|
609 | - ("ignore-timestamps", None, "Do not use backupdb timestamps to decide if a local file is unchanged."), |
---|
610 | + ("ignore-timestamps", None, "Do not use backupdb timestamps to decide whether a local file is unchanged."), |
---|
611 | ] |
---|
612 | |
---|
613 | vcs_patterns = ('CVS', 'RCS', 'SCCS', '.git', '.gitignore', '.cvsignore', '.svn', |
---|
614 | @@ -298,7 +299,12 @@ |
---|
615 | else: |
---|
616 | yield filename |
---|
617 | |
---|
618 | - longdesc = """Add a versioned backup of the local FROM directory to a timestamped subdir of the (tahoe) TO/Archives directory, sharing as many files and directories as possible with the previous backup. Creates TO/Latest as a reference to the latest backup. Behaves somewhat like 'rsync -a --link-dest=TO/Archives/(previous) FROM TO/Archives/(new); ln -sf TO/Archives/(new) TO/Latest'.""" |
---|
619 | + longdesc = """ |
---|
620 | + Add a versioned backup of the local FROM directory to a timestamped |
---|
621 | + subdirectory of the TO/Archives directory on the grid, sharing as many |
---|
622 | + files and directories as possible with the previous backup. Create |
---|
623 | + TO/Latest as a reference to the latest backup. Behaves somewhat like |
---|
624 | + 'rsync -a --link-dest=TO/Archives/(previous) FROM TO/Archives/(new); ln -sf TO/Archives/(new) TO/Latest'.""" |
---|
625 | |
---|
626 | class WebopenOptions(VDriveOptions): |
---|
627 | def parseArgs(self, where=''): |
---|
628 | @@ -307,7 +313,7 @@ |
---|
629 | def getSynopsis(self): |
---|
630 | return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) |
---|
631 | |
---|
632 | - longdesc = """Opens a webbrowser to the contents of some portion of the virtual drive. When called without arguments, opens to the Welcome page.""" |
---|
633 | + longdesc = """Open a web browser to the contents of some file or directory on the grid.""" |
---|
634 | |
---|
635 | class ManifestOptions(VDriveOptions): |
---|
636 | optFlags = [ |
---|
637 | @@ -322,7 +328,7 @@ |
---|
638 | def getSynopsis(self): |
---|
639 | return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) |
---|
640 | |
---|
641 | - longdesc = """Print a list of all files/directories reachable from the given starting point.""" |
---|
642 | + longdesc = """Print a list of all files and directories reachable from the given starting point.""" |
---|
643 | |
---|
644 | class StatsOptions(VDriveOptions): |
---|
645 | optFlags = [ |
---|
646 | @@ -334,7 +340,7 @@ |
---|
647 | def getSynopsis(self): |
---|
648 | return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) |
---|
649 | |
---|
650 | - longdesc = """Print statistics about of all files/directories reachable from the given starting point.""" |
---|
651 | + longdesc = """Print statistics about of all files and directories reachable from the given starting point.""" |
---|
652 | |
---|
653 | class CheckOptions(VDriveOptions): |
---|
654 | optFlags = [ |
---|
655 | @@ -349,7 +355,9 @@ |
---|
656 | def getSynopsis(self): |
---|
657 | return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) |
---|
658 | |
---|
659 | - longdesc = """Check a single file or directory: count how many shares are available, verify their hashes. Optionally repair the file if any problems were found.""" |
---|
660 | + longdesc = """ |
---|
661 | + Check a single file or directory: count how many shares are available and |
---|
662 | + verify their hashes. Optionally repair the file if any problems were found.""" |
---|
663 | |
---|
664 | class DeepCheckOptions(VDriveOptions): |
---|
665 | optFlags = [ |
---|
666 | @@ -365,7 +373,10 @@ |
---|
667 | def getSynopsis(self): |
---|
668 | return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) |
---|
669 | |
---|
670 | - longdesc = """Check all files/directories reachable from the given starting point (which must be a directory), like 'tahoe check' but for multiple files. Optionally repair any problems found.""" |
---|
671 | + longdesc = """ |
---|
672 | + Check all files and directories reachable from the given starting point |
---|
673 | + (which must be a directory), like 'tahoe check' but for multiple files. |
---|
674 | + Optionally repair any problems found.""" |
---|
675 | |
---|
676 | subCommands = [ |
---|
677 | ["mkdir", None, MakeDirectoryOptions, "Create a new directory"], |
---|
678 | @@ -373,16 +384,16 @@ |
---|
679 | ["create-alias", None, CreateAliasOptions, "Create a new alias cap"], |
---|
680 | ["list-aliases", None, ListAliasOptions, "List all alias caps"], |
---|
681 | ["ls", None, ListOptions, "List a directory"], |
---|
682 | - ["get", None, GetOptions, "Retrieve a file from the virtual drive."], |
---|
683 | - ["put", None, PutOptions, "Upload a file into the virtual drive."], |
---|
684 | + ["get", None, GetOptions, "Retrieve a file from the grid."], |
---|
685 | + ["put", None, PutOptions, "Upload a file into the grid."], |
---|
686 | ["cp", None, CpOptions, "Copy one or more files."], |
---|
687 | - ["rm", None, RmOptions, "Unlink a file or directory in the virtual drive."], |
---|
688 | - ["mv", None, MvOptions, "Move a file within the virtual drive."], |
---|
689 | + ["rm", None, RmOptions, "Unlink a file or directory on the grid."], |
---|
690 | + ["mv", None, MvOptions, "Move a file within the grid."], |
---|
691 | ["ln", None, LnOptions, "Make an additional link to an existing file."], |
---|
692 | ["backup", None, BackupOptions, "Make target dir look like local dir."], |
---|
693 | - ["webopen", None, WebopenOptions, "Open a webbrowser to the root_dir"], |
---|
694 | - ["manifest", None, ManifestOptions, "List all files/dirs in a subtree"], |
---|
695 | - ["stats", None, StatsOptions, "Print statistics about all files/dirs in a subtree"], |
---|
696 | + ["webopen", None, WebopenOptions, "Open a web browser to a grid file or directory."], |
---|
697 | + ["manifest", None, ManifestOptions, "List all files/directories in a subtree"], |
---|
698 | + ["stats", None, StatsOptions, "Print statistics about all files/directories in a subtree"], |
---|
699 | ["check", None, CheckOptions, "Check a single file or directory"], |
---|
700 | ["deep-check", None, DeepCheckOptions, "Check all files/directories reachable from a starting point"], |
---|
701 | ] |
---|
702 | |
---|
703 | --- old-tahoe/src/allmydata/provisioning.py 2010-01-14 03:46:11.998000000 +0000 |
---|
704 | +++ new-tahoe/src/allmydata/provisioning.py 2010-01-14 03:46:12.237000000 +0000 |
---|
705 | @@ -128,7 +128,7 @@ |
---|
706 | files_per_user_counts, |
---|
707 | 1000) |
---|
708 | add_input("Users", |
---|
709 | - "How many files in each user's vdrive? (avg)", |
---|
710 | + "How many files for each user? (avg)", |
---|
711 | i_files_per_user) |
---|
712 | |
---|
713 | space_per_user_sizes = [(1e6, "1MB"), |
---|
714 | @@ -147,7 +147,7 @@ |
---|
715 | space_per_user_sizes, |
---|
716 | 200e6) |
---|
717 | add_input("Users", |
---|
718 | - "How much data is in each user's vdrive? (avg)", |
---|
719 | + "How much data for each user? (avg)", |
---|
720 | i_space_per_user) |
---|
721 | |
---|
722 | sharing_ratios = [(1.0, "1.0x"), |
---|
723 | |
---|
724 | --- old-tahoe/src/allmydata/test/check_load.py 2010-01-14 03:46:12.013000000 +0000 |
---|
725 | +++ new-tahoe/src/allmydata/test/check_load.py 2010-01-14 03:46:12.253000000 +0000 |
---|
726 | @@ -97,12 +97,12 @@ |
---|
727 | directories_read = 0 |
---|
728 | directories_written = 0 |
---|
729 | |
---|
730 | -def listdir(nodeurl, root, vdrive_pathname): |
---|
731 | +def listdir(nodeurl, root, remote_pathname): |
---|
732 | if nodeurl[-1] != "/": |
---|
733 | nodeurl += "/" |
---|
734 | url = nodeurl + "uri/%s/" % urllib.quote(root) |
---|
735 | - if vdrive_pathname: |
---|
736 | - url += urllib.quote(vdrive_pathname) |
---|
737 | + if remote_pathname: |
---|
738 | + url += urllib.quote(remote_pathname) |
---|
739 | url += "?t=json" |
---|
740 | data = urllib.urlopen(url).read() |
---|
741 | try: |
---|
742 | @@ -203,11 +203,11 @@ |
---|
743 | path = "/" |
---|
744 | return scheme, host, port, path |
---|
745 | |
---|
746 | -def generate_and_put(nodeurl, root, vdrive_fname, size): |
---|
747 | +def generate_and_put(nodeurl, root, remote_filename, size): |
---|
748 | if nodeurl[-1] != "/": |
---|
749 | nodeurl += "/" |
---|
750 | url = nodeurl + "uri/%s/" % urllib.quote(root) |
---|
751 | - url += urllib.quote(vdrive_fname) |
---|
752 | + url += urllib.quote(remote_filename) |
---|
753 | |
---|
754 | scheme, host, port, path = parse_url(url) |
---|
755 | if scheme == "http": |
---|
756 | |
---|
757 | --- old-tahoe/src/allmydata/test/test_system.py 2010-01-14 03:46:12.046000000 +0000 |
---|
758 | +++ new-tahoe/src/allmydata/test/test_system.py 2010-01-14 03:46:12.269000000 +0000 |
---|
759 | @@ -28,7 +28,7 @@ |
---|
760 | from allmydata.test.common import SystemTestMixin |
---|
761 | |
---|
762 | LARGE_DATA = """ |
---|
763 | -This is some data to publish to the virtual drive, which needs to be large |
---|
764 | +This is some data to publish to the remote grid.., which needs to be large |
---|
765 | enough to not fit inside a LIT uri. |
---|
766 | """ |
---|
767 | |
---|
768 | @@ -698,8 +698,8 @@ |
---|
769 | # the key, which should cause the download to fail the post-download |
---|
770 | # plaintext_hash check. |
---|
771 | |
---|
772 | - def test_vdrive(self): |
---|
773 | - self.basedir = "system/SystemTest/test_vdrive" |
---|
774 | + def test_filesystem(self): |
---|
775 | + self.basedir = "system/SystemTest/test_filesystem" |
---|
776 | self.data = LARGE_DATA |
---|
777 | d = self.set_up_nodes(use_stats_gatherer=True) |
---|
778 | d.addCallback(self._test_introweb) |
---|
779 | |
---|
780 | --- old-tahoe/src/allmydata/test/test_client.py 2010-01-14 03:46:12.062000000 +0000 |
---|
781 | +++ new-tahoe/src/allmydata/test/test_client.py 2010-01-14 03:46:12.273000000 +0000 |
---|
782 | @@ -32,20 +32,12 @@ |
---|
783 | basedir = "test_client.Basic.test_loadable" |
---|
784 | os.mkdir(basedir) |
---|
785 | open(os.path.join(basedir, "introducer.furl"), "w").write("") |
---|
786 | - open(os.path.join(basedir, "vdrive.furl"), "w").write("") |
---|
787 | - c = client.Client(basedir) |
---|
788 | - |
---|
789 | - def test_loadable_without_vdrive(self): |
---|
790 | - basedir = "test_client.Basic.test_loadable_without_vdrive" |
---|
791 | - os.mkdir(basedir) |
---|
792 | - open(os.path.join(basedir, "introducer.furl"), "w").write("") |
---|
793 | c = client.Client(basedir) |
---|
794 | |
---|
795 | def test_loadable_old_config_bits(self): |
---|
796 | basedir = "test_client.Basic.test_loadable_old_config_bits" |
---|
797 | os.mkdir(basedir) |
---|
798 | open(os.path.join(basedir, "introducer.furl"), "w").write("") |
---|
799 | - open(os.path.join(basedir, "vdrive.furl"), "w").write("") |
---|
800 | open(os.path.join(basedir, "no_storage"), "w").write("") |
---|
801 | open(os.path.join(basedir, "readonly_storage"), "w").write("") |
---|
802 | open(os.path.join(basedir, "debug_discard_storage"), "w").write("") |
---|
803 | @@ -60,7 +52,6 @@ |
---|
804 | basedir = "test_client.Basic.test_loadable_old_storage_config_bits" |
---|
805 | os.mkdir(basedir) |
---|
806 | open(os.path.join(basedir, "introducer.furl"), "w").write("") |
---|
807 | - open(os.path.join(basedir, "vdrive.furl"), "w").write("") |
---|
808 | open(os.path.join(basedir, "readonly_storage"), "w").write("") |
---|
809 | open(os.path.join(basedir, "debug_discard_storage"), "w").write("") |
---|
810 | c = client.Client(basedir) |
---|
811 | @@ -72,7 +63,6 @@ |
---|
812 | basedir = "test_client.Basic.test_secrets" |
---|
813 | os.mkdir(basedir) |
---|
814 | open(os.path.join(basedir, "introducer.furl"), "w").write("") |
---|
815 | - open(os.path.join(basedir, "vdrive.furl"), "w").write("") |
---|
816 | c = client.Client(basedir) |
---|
817 | secret_fname = os.path.join(basedir, "private", "secret") |
---|
818 | self.failUnless(os.path.exists(secret_fname), secret_fname) |
---|
819 | @@ -161,7 +151,6 @@ |
---|
820 | basedir = "test_client.Basic.test_versions" |
---|
821 | os.mkdir(basedir) |
---|
822 | open(os.path.join(basedir, "introducer.furl"), "w").write("") |
---|
823 | - open(os.path.join(basedir, "vdrive.furl"), "w").write("") |
---|
824 | c = client.Client(basedir) |
---|
825 | ss = c.getServiceNamed("storage") |
---|
826 | verdict = ss.remote_get_version() |
---|
827 | |
---|
828 | --- old-tahoe/src/allmydata/test/test_cli.py 2010-01-14 03:46:12.078000000 +0000 |
---|
829 | +++ new-tahoe/src/allmydata/test/test_cli.py 2010-01-14 03:46:12.279000000 +0000 |
---|
830 | @@ -376,17 +376,17 @@ |
---|
831 | |
---|
832 | def test_get(self): |
---|
833 | help = str(cli.GetOptions()) |
---|
834 | - self.failUnless("get VDRIVE_FILE LOCAL_FILE" in help, help) |
---|
835 | + self.failUnless("get REMOTE_FILE LOCAL_FILE" in help, help) |
---|
836 | self.failUnless("% tahoe get FOO |less" in help, help) |
---|
837 | |
---|
838 | def test_put(self): |
---|
839 | help = str(cli.PutOptions()) |
---|
840 | - self.failUnless("put LOCAL_FILE VDRIVE_FILE" in help, help) |
---|
841 | + self.failUnless("put LOCAL_FILE REMOTE_FILE" in help, help) |
---|
842 | self.failUnless("% cat FILE | tahoe put" in help, help) |
---|
843 | |
---|
844 | def test_rm(self): |
---|
845 | help = str(cli.RmOptions()) |
---|
846 | - self.failUnless("rm VDRIVE_FILE" in help, help) |
---|
847 | + self.failUnless("rm REMOTE_FILE" in help, help) |
---|
848 | |
---|
849 | def test_mv(self): |
---|
850 | help = str(cli.MvOptions()) |
---|
851 | |
---|
852 | --- old-tahoe/src/allmydata/scripts/tahoe_put.py 2010-01-14 03:46:12.176000000 +0000 |
---|
853 | +++ new-tahoe/src/allmydata/scripts/tahoe_put.py 2010-01-14 03:46:12.353000000 +0000 |
---|
854 | @@ -34,6 +34,7 @@ |
---|
855 | # /oops/subdir/foo : DISALLOWED |
---|
856 | # ALIAS:foo : aliases[ALIAS]/foo |
---|
857 | # ALIAS:subdir/foo : aliases[ALIAS]/subdir/foo |
---|
858 | + |
---|
859 | # ALIAS:/oops/subdir/foo : DISALLOWED |
---|
860 | # DIRCAP:./foo : DIRCAP/foo |
---|
861 | # DIRCAP:./subdir/foo : DIRCAP/subdir/foo |
---|
862 | @@ -45,7 +46,7 @@ |
---|
863 | rootcap, path = get_alias(aliases, to_file, DEFAULT_ALIAS) |
---|
864 | if path.startswith("/"): |
---|
865 | suggestion = to_file.replace("/", "", 1) |
---|
866 | - print >>stderr, "ERROR: The VDRIVE filename must not start with a slash" |
---|
867 | + print >>stderr, "ERROR: The remote filename must not start with a slash" |
---|
868 | print >>stderr, "Please try again, perhaps with:", suggestion |
---|
869 | return 1 |
---|
870 | url = nodeurl + "uri/%s/" % urllib.quote(rootcap) |
---|