Ticket #534: fsencode.3.py

File fsencode.3.py, 2.8 KB (added by zooko, at 2009-05-01T05:09:31Z)
Line 
1# A wrapper around the Python Standard Library's filename access functions to
2# provide a uniform API for all platforms and to prevent lossy en/de-coding.
3
4class Fname:
5    def __init__(self, name, failed_decode=False, alleged_encoding=None):
6        self.name = name
7        self.failed_decode = failed_decode
8        self.alleged_encoding = alleged_encoding
9
10if platform.system() in ('Linux', 'Solaris'):
11    # on byte-oriented filesystems, such as Linux and Solaris
12
13    def unicode_to_fs(fn):
14        """ Encode an unicode object to bytes. """
15        precondition(isinstance(fn, Fname), fn)
16        precondition(isinstance(fn.name, unicode), fn.name)
17
18        if fn.failed_decode:
19            # This means that the unicode string in .name is not
20            # actually the result of a successful decoding with a
21            # suggested codec, but is instead the result of stuffing the
22            # bytes into a unicode by dint of the utf-8b trick.  This
23            # means that on a byte-oriented system, you shouldn't treat
24            # the .name as a unicode string containing chars, but
25            # instead you should get the original bytes back out of it.
26            return fn.name.encode('utf-8', 'python-escape')
27        else:
28            try:
29                return fn.name.encode(sys.getfilesystemencoding(), 'strict')
30            except UnicodeEncodeError:
31                raise usage.UsageError("Filename '%s' cannot be \
32encoded using the current encoding of your filesystem (%s). Please \
33configure your locale correctly or rename this file." %
34                                       (s, sys.getfilesystemencoding()))
35
36    def fs_to_unicode(bytesfn):
37        """ Decode bytes from the filesystem to a unicode object. """
38        precondition(isinstance(bytesfn, str), str)
39
40        alleged_encoding = sys.getfilesystemencoding()
41        if alleged_encoding in (None, '', 'ascii'):
42            alleged_encoding = 'utf-8'
43           
44        try:
45            unicodefn = bytesfn.decode(alleged_encoding, 'strict')
46        except UnicodeDecodeError:
47            unicodefn = bytesfn.decode('utf-8b', 'python-escape')
48            return Fname(unicodefn, failed_decode=True)
49        else:
50            unicodefn = unicodedata.normalize('NFC', unicodefn)
51            if alleged_encoding == 'utf-8':
52                return Fname(unicodefn)
53            else:
54                return Fname(unicodefn, alleged_encoding)
55
56    def listdir(fn):
57        assert isinstance(fn, Fname), fn
58        assert isinstance(fn.name, unicode), fn.name
59        bytesfn = unicode_to_fs(fn.name)
60        res = os.listdir(bytesfn)
61        return([fs_to_unicode(fn) for fn in res])
62
63else:
64    # on unicode-oriented filesystems, such as Mac and Windows
65    def listdir(fn):
66        assert isinstance(fn, Fname), fn
67        assert isinstance(fn.name, unicode), fn.name
68        return [Fname(n) for n in os.listdir(fn.name)]