-
Notifications
You must be signed in to change notification settings - Fork 4
/
hostlookup-trace
executable file
·253 lines (218 loc) · 8.96 KB
/
hostlookup-trace
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/usr/bin/gdb --python
# vim: set filetype=python:
'''Trace hostname lookups through gdb.
Before running this script, be sure to install debuginfo for glibc
e.g. debuginfo-install glibc
usage: hostlookup-trace [OPTIONS] [EXE ARGS...]
e.g.
$ hostlookup-trace curl http://www.google.com
<...>
getaddrinfo() called
name = 0x632760 "www.google.com"
service = 0x7ffff1b8cd80 "80"
hints = 0x632728
pai = 0x7ffff1b8cd38
$ hostlookup-trace python -c "import socket; socket.gethostbyaddr('www.google.com')"
<...>
getaddrinfo() called
name = 0x702a64 "www.google.com"
service = 0x0
hints = 0x7fffffff9180
pai = 0x7fffffff9178
gethostbyaddr_r("74.125.31.106") called
addr = 0x7fffffff9234
len = 4
type = 2
resbuf = 0x7fffffff9210
buffer = 0x7fffffff92b0 "x\312p"
buflen = 16383
result = 0x7fffffff9208
h_errnop = 0x7fffffff91fc
getnameinfo("74.125.31.106") called
sa = 0x7fffffff9170
addrlen = 16
host = 0x7fffffff8d40 "H\307\312\360\377\177"
hostlen = 1025
serv = 0x0
servlen = 0
flags = 1
$ hostlookup-trace -p $(pgrep sshd)
See "hostlookup-trace --help" for the "--backtrace" and "--python-backtrace" options.
As written, hostlookup-trace only works for programs that use the standard gethostbyX(), get{name,addr}info() functions.
There are other ways to lookup hostnames, e.g. glibc provides a rarely used getaddrinfo_a() interface.
Apps could use the c-ares async DNS library[1] or the bind derived libresolv or other custom DNS code.
nslookup and dig fall into the "uses libresolv directly and thus doesn't work" category. (They're shipped with bind)
On the other hand, programs that don't use glibc to resolve hostnames wouldn't respect /etc/hosts or work with setups like Active Directory / LDAP anyway[2].
[1]: http://c-ares.haxx.se/
[2]: http://en.wikipedia.org/wiki/Name_Service_Switch
'''
import sys
import argparse
import re
import gdb
def gdb_fix_argv():
'''
gdb breaks sys.argv[0] in the way it embeds Python
./gdb-python-script -> sys.argv: ['']
when argv should be: [ './gdb-python-script' ]
./gdb-python-script 0 1 2 -> sys.argv: ['0', '1', '2']
when argv should be should be: [ './gdb-python-script', '0', '1', '2' ]
'''
try:
f = open('/proc/self/cmdline')
except IOError:
return
l = f.readline()
f.close()
cmdline = l.split('\x00')
# /proc/X/cmdline always ends with '\x00'
cmdline = cmdline[:-1]
c = cmdline[0]
# Only mess with sys.argv if we're running 'gdb --python'
if not c.endswith('/gdb') and not c.endswith('-gdb'):
return
if cmdline[1] != '--python':
return
sys.argv = cmdline[2:]
class _BreakpointWithCallback(gdb.Breakpoint):
def __init__(self, location, callback):
self.callback = callback
gdb.Breakpoint.__init__(self, location)
def stop(self):
return self.callback(self)
def breakp(location, condition=None, callback=None):
'''
breakp(location, condition=None, callback=None)
'''
# FIXME: need to figure out how to do conditional breakpoints in Python
assert(condition is None)
if callback is None:
bp = gdb.Breakpoint(location)
else:
bp = _BreakpointWithCallback(location, callback)
bp.silent = True
return bp
def _inet_ntop(af_expr, addr_expr):
t = gdb.parse_and_eval('(char*)malloc(4096)')
r = gdb.parse_and_eval('(void*)inet_ntop(%s, %s, %d, 4096)' % (af_expr, addr_expr, long(t)))
assert(long(r) != 0)
s = t.string()
gdb.parse_and_eval('free(%d)' % (long(t),))
return s
AF_INET = 2
AF_INET6 = 10
def gethostbyaddr_decode():
# 'type', 'addr' etc must match libc source code
sys.stderr.write('gethostbyaddr("%s") called\n' % (_inet_ntop('type', 'addr'),))
def gethostbyaddr_r_decode():
sys.stderr.write('gethostbyaddr_r("%s") called\n' % (_inet_ntop('type', 'addr'),))
def getnameinfo_decode():
af = gdb.parse_and_eval('sa->sa_family')
if af == AF_INET:
addr = '&(((struct sockaddr_in*)sa)->sin_addr)'
else:
addr = '&(((struct sockaddr_in6*)sa)->sin6_addr)'
sys.stderr.write('getnameinfo("%s") called\n' % (_inet_ntop('sa->sa_family', addr)))
# NOTE: With glibc 2.15 and gdb 7.4,
# breaking on 'gethostbyaddr_r' doesn't do what we want.
# It breaks on 'gethostbyaddr_r@plt' in the caller's PLT instead of the real implementaion in glibc and "info args" doesn't work.
# Thus we have to break on the glibc internal function name '__gethostbyaddr_r' instead.
# i.e. gdb doesn't know that 'gethostbyaddr_r' would be resolved to 'gethostbyaddr_r@@GLIBC_2.2.5'
# according to the GNU ELF symbol versioning rules.
# This applies to versioned symbols in general.
# The list of breakpoints is generated by
# nm /lib64/libc-2.15.so | grep 'gethostby' | sort -n
# and matchign up the symbol addresses manually
# Obviously you need glibc debuginfo for 'nm' to work.
hostname_lookup_funs = [
# glibc internal name, external name, handler function
('getaddrinfo', None, None),
('getnameinfo', None, getnameinfo_decode),
('gethostbyname', None, None),
('gethostbynamer', None, None),
('__gethostbyname2_r', 'gethostbyname2_r', None),
('__gethostbyname_r', 'gethostbyname_r', None),
('gethostbyaddr', None, gethostbyaddr_decode),
('__gethostbyaddr_r', 'gethostbyaddr_r', gethostbyaddr_r_decode),
# On i686, glibc also provides gethostbyaddr_r@GLIBC_2.0, gethostbyname2_r@GLIBC_2.0 and gethostbyname_r@GLIBC_2.0
('__old_gethostbyaddr_r', 'gethostbyaddr_r', None),
('__old_gethostbyname2_r', 'gethostbyname2_r', None),
('__old_gethostbyname_r', 'gethostbyname_r', None),
]
def dump_function_gen(name, decode_func, options):
def dump(bp):
if decode_func:
decode_func()
else:
sys.stderr.write('%s() called\n' % (name,))
try:
t = gdb.execute('info args', to_string=True)
except gdb.error:
pass
else:
for j in t.split('\n'):
sys.stderr.write('\t%s\n' % (j,))
if options.python_backtrace:
gdb.execute('py-bt')
if options.backtrace:
gdb.execute('backtrace')
return dump
# shelx_quote() copied from Python 3.3's shlex.quote(), see: http://hg.python.org/cpython/file/default/Lib/shlex.py#l279
def shelx_quote(s):
"""Return a shell-escaped version of the string *s*."""
_find_unsafe = re.compile(r'[^\w@%+=:,./-]').search
if not s:
return "''"
if _find_unsafe(s) is None:
return s
# use single quotes, and put single quotes into double quotes
# the string $'b is then quoted as '$'"'"'b'
return "'" + s.replace("'", "'\"'\"'") + "'"
def program_name():
return os.path.basename(sys.argv[0])
def print_usage_exit():
sys.stderr.write('usage: %s [OPTIONS] [EXE ARGS...]\n' % (program_name(),))
sys.exit(2)
def gdb_multi_process_setup():
# see http://tromey.com/blog/?p=734
gdb.execute('set detach-on-fork off')
gdb.execute('set non-stop on')
# NOTE: gdb 7.4 allows the inferior to dump core with unhandled "int3"'s on x86-64 with target-async
# gdb.execute('set target-async on')
def main(args):
p = argparse.ArgumentParser()
p.add_argument('--attach', '-p', type=int, default=None, metavar='PID', help='attach to running process')
p.add_argument('--backtrace', '--bt', action='store_true', help='do backtrace when a hostname lookup funciton is called')
p.add_argument('--python-backtrace', '--py-bt', action='store_true', help=(
'like "--backtrace" but do a Python backtrace. '
'Only useful if the program being traced is running on the CPython interpreter'))
p.add_argument('args', nargs=argparse.REMAINDER, help='program to be traced')
options = p.parse_args(args)
args = options.args
if not options.attach and len(args) < 1:
print_usage_exit()
gdb.execute('set python print-stack full')
gdb_multi_process_setup()
if not options.attach:
exe = args[0]
args = args[1:]
gdb.execute('file %s' % (exe,))
gdb.execute('set args %s' % (' '.join(shelx_quote(x) for x in args)))
gdb.execute('set breakpoint pending on')
for (i, real_name, decode_func) in hostname_lookup_funs:
if real_name is None:
real_name = i
breakp("'%s'" % (i,), callback=dump_function_gen(real_name, decode_func, options))
try:
if options.attach:
gdb.execute('attach %d' % (options.attach,))
gdb.execute('continue')
else:
gdb.execute('run')
finally:
if not options.attach:
for i in gdb.inferiors():
gdb.execute('kill inferior %d' % (i.num,))
if __name__ == '__main__':
gdb_fix_argv()
main(sys.argv[1:])