aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/ipython/py2/IPython/utils/encoding.py
blob: ba8ca095344cfd09112be9aa8df19bb1aeff5e0a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# coding: utf-8 
""" 
Utilities for dealing with text encodings 
""" 
 
#----------------------------------------------------------------------------- 
#  Copyright (C) 2008-2012  The IPython Development Team 
# 
#  Distributed under the terms of the BSD License.  The full license is in 
#  the file COPYING, distributed as part of this software. 
#----------------------------------------------------------------------------- 
 
#----------------------------------------------------------------------------- 
# Imports 
#----------------------------------------------------------------------------- 
import sys 
import locale 
import warnings 
 
# to deal with the possibility of sys.std* not being a stream at all 
def get_stream_enc(stream, default=None): 
    """Return the given stream's encoding or a default. 
 
    There are cases where ``sys.std*`` might not actually be a stream, so 
    check for the encoding attribute prior to returning it, and return 
    a default if it doesn't exist or evaluates as False. ``default`` 
    is None if not provided. 
    """ 
    if not hasattr(stream, 'encoding') or not stream.encoding: 
        return default 
    else: 
        return stream.encoding 
 
# Less conservative replacement for sys.getdefaultencoding, that will try 
# to match the environment. 
# Defined here as central function, so if we find better choices, we 
# won't need to make changes all over IPython. 
def getdefaultencoding(prefer_stream=True): 
    """Return IPython's guess for the default encoding for bytes as text. 
     
    If prefer_stream is True (default), asks for stdin.encoding first, 
    to match the calling Terminal, but that is often None for subprocesses. 
     
    Then fall back on locale.getpreferredencoding(), 
    which should be a sensible platform default (that respects LANG environment), 
    and finally to sys.getdefaultencoding() which is the most conservative option, 
    and usually ASCII on Python 2 or UTF8 on Python 3. 
    """ 
    enc = None 
    if prefer_stream: 
        enc = get_stream_enc(sys.stdin) 
    if not enc or enc=='ascii': 
        try: 
            # There are reports of getpreferredencoding raising errors 
            # in some cases, which may well be fixed, but let's be conservative here. 
            enc = locale.getpreferredencoding() 
        except Exception: 
            pass 
    enc = enc or sys.getdefaultencoding() 
    # On windows `cp0` can be returned to indicate that there is no code page. 
    # Since cp0 is an invalid encoding return instead cp1252 which is the 
    # Western European default. 
    if enc == 'cp0': 
        warnings.warn( 
            "Invalid code page cp0 detected - using cp1252 instead." 
            "If cp1252 is incorrect please ensure a valid code page " 
            "is defined for the process.", RuntimeWarning) 
        return 'cp1252' 
    return enc 
 
DEFAULT_ENCODING = getdefaultencoding()