/tags/pyragua-0.2.5/pyragua/pyclbr.py – Pyragua

root/tags/pyragua-0.2.5/pyragua/pyclbr.py

Revision 390, 13.1 KB (checked in by ark, 2 years ago)

directorio faltante

Line 
1"""Parse a Python module and describe its classes and methods.
2
3Parse enough of a Python file to recognize imports and class and
4method definitions, and to find out the superclasses of a class.
5
6The interface consists of a single function:
7        readmodule_ex(module [, path])
8where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched.  If present,
10path is prepended to the system search path sys.path.  The return value
11is a dictionary.  The keys of the dictionary are the names of the
12classes and top-level functions defined in the module (including ones
13that are defined via the from XXX import YYY construct).  The values are
14instances of the classes Class and Function defined here.  One special
15key/value pair is present for packages: the key '__path__' has as its
16value a list which contains the package search path.
17
18(For compatibility, a function readmodule is also defined: it works just
19like readmodule_ex, but the dictionary it returns has only key/value
20pairs whose value is an instance of class Class, _not_ ones for which it
21would be an instance of Function. nor the special key/value pair for key
22'__path__' as described in the previous paragraph).
23
24A class is described by the class Class in this module.  Instances
25of this class have the following instance variables:
26        module -- the module name
27        name -- the name of the class
28        super -- a list of super classes (Class instances)
29        methods -- a dictionary of methods
30        file -- the file in which the class was defined
31        lineno -- the line in the file on which the class statement occurred
32The dictionary of methods uses the method names as keys and the line
33numbers on which the method was defined as values.
34If the name of a super class is not recognized, the corresponding
35entry in the list of super classes is not a class instance but a
36string giving the name of the super class.  Since import statements
37are recognized and imported modules are scanned as well, this
38shouldn't happen often.
39
40A function is described by the class Function in this module.
41Instances of this class have the following instance variables:
42        module -- the module name
43        name -- the name of the function
44        file -- the file in which the function was defined
45        lineno -- the line in the file on which the def statement occurred
46"""
47
48import sys
49import imp
50import tokenize # Python tokenizer
51from token import NAME, DEDENT, NEWLINE
52
53__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
54
55_modules = {}                           # cache of modules we've seen
56
57# each Python class is represented by an instance of this class
58class Class:
59    '''Class to represent a Python class.'''
60    def __init__(self, module, name, super, file, lineno):
61        self.module = module
62        self.name = name
63        if super is None:
64            super = []
65        self.super = super
66        self.methods = {}
67        self.file = file
68        self.lineno = lineno
69
70    def _addmethod(self, name, lineno):
71        self.methods[name] = lineno
72
73class Function:
74    '''Class to represent a top-level Python function'''
75    def __init__(self, module, name, file, lineno):
76        self.module = module
77        self.name = name
78        self.file = file
79        self.lineno = lineno
80
81def readmodule(module, path=[]):
82    '''Backwards compatible interface.
83
84    Call readmodule_ex() and then only keep Class objects from the
85    resulting dictionary.'''
86
87    dict = _readmodule(module, path)
88    res = {}
89    for key, value in dict.items():
90        if isinstance(value, Class):
91            res[key] = value
92    return res
93
94def readmodule_ex(module, path=[]):
95    '''Read a module file and return a dictionary of classes.
96
97    Search for MODULE in PATH and sys.path, read and parse the
98    module and return a dictionary with one entry for each class
99    found in the module.
100
101    If INPACKAGE is true, it must be the dotted name of the package in
102    which we are searching for a submodule, and then PATH must be the
103    package search path; otherwise, we are searching for a top-level
104    module, and PATH is combined with sys.path.
105    '''
106    return _readmodule(module, path)
107
108def _readmodule(module, path, inpackage=None):
109    '''Do the hard work for readmodule[_ex].'''
110    # Compute the full module name (prepending inpackage if set)
111    if inpackage:
112        fullmodule = "%s.%s" % (inpackage, module)
113    else:
114        fullmodule = module
115
116    # Check in the cache
117    if fullmodule in _modules:
118        return _modules[fullmodule]
119
120    # Initialize the dict for this module's contents
121    dict = {}
122
123    # Check if it is a built-in module; we don't do much for these
124    if module in sys.builtin_module_names and not inpackage:
125        _modules[module] = dict
126        return dict
127
128    # Check for a dotted module name
129    i = module.rfind('.')
130    if i >= 0:
131        package = module[:i]
132        submodule = module[i+1:]
133        parent = _readmodule(package, path, inpackage)
134        if inpackage:
135            package = "%s.%s" % (inpackage, package)
136        return _readmodule(submodule, parent['__path__'], package)
137
138    # Search the path for the module
139    f = None
140    if inpackage:
141        f, file, (suff, mode, type) = imp.find_module(module, path)
142    else:
143        f, file, (suff, mode, type) = imp.find_module(module, path + sys.path)
144    if type == imp.PKG_DIRECTORY:
145        dict['__path__'] = [file]
146        path = [file] + path
147        f, file, (suff, mode, type) = imp.find_module('__init__', [file])
148    _modules[fullmodule] = dict
149    if type != imp.PY_SOURCE:
150        # not Python source, can't do anything with this module
151        f.close()
152        return dict
153
154    stack = [] # stack of (class, indent) pairs
155
156    g = tokenize.generate_tokens(f.readline)
157    try:
158        for tokentype, token, start, end, line in g:
159            if tokentype == DEDENT:
160                lineno, thisindent = start
161                # close nested classes and defs
162                while stack and stack[-1][1] >= thisindent:
163                    del stack[-1]
164            elif token == 'def':
165                lineno, thisindent = start
166                # close previous nested classes and defs
167                while stack and stack[-1][1] >= thisindent:
168                    del stack[-1]
169                tokentype, meth_name, start, end, line = g.next()
170                if tokentype != NAME:
171                    continue # Syntax error
172                if stack:
173                    cur_class = stack[-1][0]
174                    if isinstance(cur_class, Class):
175                        # it's a method
176                        cur_class._addmethod(meth_name, lineno)
177                    # else it's a nested def
178                else:
179                    # it's a function
180                    dict[meth_name] = Function(module, meth_name, file, lineno)
181                stack.append((None, thisindent)) # Marker for nested fns
182            elif token == 'class':
183                lineno, thisindent = start
184                # close previous nested classes and defs
185                while stack and stack[-1][1] >= thisindent:
186                    del stack[-1]
187                tokentype, class_name, start, end, line = g.next()
188                if tokentype != NAME:
189                    continue # Syntax error
190                # parse what follows the class name
191                tokentype, token, start, end, line = g.next()
192                inherit = None
193                if token == '(':
194                    names = [] # List of superclasses
195                    # there's a list of superclasses
196                    level = 1
197                    super = [] # Tokens making up current superclass
198                    while True:
199                        tokentype, token, start, end, line = g.next()
200                        if token in (')', ',') and level == 1:
201                            n = "".join(super)
202                            if n in dict:
203                                # we know this super class
204                                n = dict[n]
205                            else:
206                                c = n.split('.')
207                                if len(c) > 1:
208                                    # super class is of the form
209                                    # module.class: look in module for
210                                    # class
211                                    m = c[-2]
212                                    c = c[-1]
213                                    if m in _modules:
214                                        d = _modules[m]
215                                        if c in d:
216                                            n = d[c]
217                            names.append(n)
218                            super = []
219                        if token == '(':
220                            level += 1
221                        elif token == ')':
222                            level -= 1
223                            if level == 0:
224                                break
225                        elif token == ',' and level == 1:
226                            pass
227                        else:
228                            super.append(token)
229                    inherit = names
230                cur_class = Class(module, class_name, inherit, file, lineno)
231                if not stack:
232                    dict[class_name] = cur_class
233                stack.append((cur_class, thisindent))
234            elif token == 'import' and start[1] == 0:
235                modules = _getnamelist(g)
236                for mod, mod2 in modules:
237                    try:
238                        # Recursively read the imported module
239                        if not inpackage:
240                            _readmodule(mod, path)
241                        else:
242                            try:
243                                _readmodule(mod, path, inpackage)
244                            except ImportError:
245                                _readmodule(mod, [])
246                    except:
247                        # If we can't find or parse the imported module,
248                        # too bad -- don't die here.
249                        pass
250            elif token == 'from' and start[1] == 0:
251                mod, token = _getname(g)
252                if not mod or token != "import":
253                    continue
254                names = _getnamelist(g)
255                try:
256                    # Recursively read the imported module
257                    d = _readmodule(mod, path, inpackage)
258                except:
259                    # If we can't find or parse the imported module,
260                    # too bad -- don't die here.
261                    continue
262                # add any classes that were defined in the imported module
263                # to our name space if they were mentioned in the list
264                for n, n2 in names:
265                    if n in d:
266                        dict[n2 or n] = d[n]
267                    elif n == '*':
268                        # don't add names that start with _
269                        for n in d:
270                            if n[0] != '_':
271                                dict[n] = d[n]
272    except StopIteration:
273        pass
274
275    f.close()
276    return dict
277
278def _getnamelist(g):
279    # Helper to get a comma-separated list of dotted names plus 'as'
280    # clauses.  Return a list of pairs (name, name2) where name2 is
281    # the 'as' name, or None if there is no 'as' clause.
282    names = []
283    while True:
284        name, token = _getname(g)
285        if not name:
286            break
287        if token == 'as':
288            name2, token = _getname(g)
289        else:
290            name2 = None
291        names.append((name, name2))
292        while token != "," and "\n" not in token:
293            tokentype, token, start, end, line = g.next()
294        if token != ",":
295            break
296    return names
297
298def _getname(g):
299    # Helper to get a dotted name, return a pair (name, token) where
300    # name is the dotted name, or None if there was no dotted name,
301    # and token is the next input token.
302    parts = []
303    tokentype, token, start, end, line = g.next()
304    if tokentype != NAME and token != '*':
305        return (None, token)
306    parts.append(token)
307    while True:
308        tokentype, token, start, end, line = g.next()
309        if token != '.':
310            break
311        tokentype, token, start, end, line = g.next()
312        if tokentype != NAME:
313            break
314        parts.append(token)
315    return (".".join(parts), token)
316
317def _main():
318    # Main program for testing.
319    import os
320    mod = sys.argv[1]
321    if os.path.exists(mod):
322        path = [os.path.dirname(mod)]
323        mod = os.path.basename(mod)
324        if mod.lower().endswith(".py"):
325            mod = mod[:-3]
326    else:
327        path = []
328    dict = readmodule_ex(mod, path)
329    objs = dict.values()
330    objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
331                               getattr(b, 'lineno', 0)))
332    for obj in objs:
333        if isinstance(obj, Class):
334            print "class", obj.name, obj.super, obj.lineno
335            methods = obj.methods.items()
336            methods.sort(lambda a, b: cmp(a[1], b[1]))
337            for name, lineno in methods:
338                if name != "__path__":
339                    print "  def", name, lineno
340        elif isinstance(obj, Function):
341            print "def", obj.name, obj.lineno
342
343if __name__ == "__main__":
344    _main()
Note: See TracBrowser for help on using the browser.