/
elisp-reader.el
650 lines (590 loc) · 23 KB
/
elisp-reader.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
;;; -*- lexical-binding: t -*-
;;; elisp-reader.el --- A customizable Lisp reader for Emacs
;; Copyright (C) 2016 Mihai Bazon
;; Author: Mihai Bazon <mihai.bazon@gmail.com>
;; Keywords: languages
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; This replaces Elisp's `read' function, which is implemented in C,
;; with one implemented in Elisp which supports customizable syntax.
;; The way it does this is quite unorthodox -- it implements a rather
;; complete Elisp reader, but we do fall back to the original reader
;; for certain cases (like literal strings, characters, and other
;; syntax which the original reader supports but appears to be
;; internal to Emacs itself, such as byte-compiled code).
;;
;; It works nicely, if a bit slow. To make it much faster you should
;; byte-compile this file:
;;
;; emacs --batch --eval '(byte-compile-file "elisp-reader.el")'
;;
;; After loading this file, everything you eval with C-M-x in an Emacs
;; Lisp buffer, or via M-: or via M-x `eval-region' or in the REPL,
;; and even .el files loaded with `load', will be fed into our reader,
;; which will produce the actual AST to be evaluated.
;;
;; REGEXPS, PEOPLE!
;; ----------------
;;
;; One syntax extension it has built-in is for "literal regexps". For
;; example, in the Elisp REPL (M-x `ielm'):
;;
;; ELISP> #/(foo|bar)/
;; "\\(foo\\|bar\\)"
;;
;; You get back a string with the Elisp regexp, nicely backslashed by
;; the rules that everybody and their uncle hate. More examples:
;;
;; ELISP> #/foo\nbar/
;; "foo\nbar" ;; literal newline
;; ELISP> #/foo\(bar\)/
;; "foo(bar)" ;; match literal parens
;; ELISP> #/\.elc$/
;; "\\.elc$" ;; a single \ to quote the dot
;;
;; Note that these are still Emacs (not Perl) regexes, with this small
;; twist: (1) grouping characters need not be backslashed and (2) you
;; write the regexp "literally", as opposed to inside a string, so you
;; don't need a ridiculous number of backslashes and guesswork to get
;; it right. See `er-read-regexp'.
;;
;; DEFINING CUSTOM READER SYNTAX
;; -----------------------------
;;
;; Example of defining custom syntax (note this function uses
;; `er-read-list', which see, to get a list of Lisp datums):
;;
;; (def-reader-syntax ?{
;; (lambda (in ch)
;; (let ((list (er-read-list in ?} t)))
;; `(list ,@(cl-loop for (key val) on list by #'cddr
;; collect `(cons ,key ,val))))))
;;
;; and now you can type into the REPL:
;;
;; ELISP> { :foo 1 :bar "string" :baz (+ 2 3) }
;; ((:foo . 1)
;; (:bar . "string")
;; (:baz . 5))
;;
;; That's a less parenthesized way to write an alist. Just an
;; example, it's not that I'd recommend that. In fact, the famous
;; words come to mind: "if you ever actually do this, then.. WAT!"
;;
;; The problem with writing your own syntax extensions is that for the
;; time being, they're GLOBAL. That's even worse than unprefixed
;; global variables. Until we figure out how to/and implement
;; something similar to Common Lisp's "named-readtables" [1], YOU
;; SHOULD NOT USE THIS PACKAGE, except perhaps for the regexp syntax.
;;
;; [1] https://common-lisp.net/project/named-readtables/
;;
;; FILE-LOCAL SYMBOLS
;; ------------------
;;
;; A `local' macro is provided which allows you to declare a list of
;; names to be kept "internal" to the current file. Example:
;;
;; (local "my-package" ("foo" "bar"))
;;
;; After this declaration, you can use freely foo and bar in the
;; current file:
;;
;; (defun foo () ...)
;; (defvar bar ...)
;;
;; In other files, or in the REPL, they are accessible as
;; my-package-foo and my-package-bar. But if you place the same
;; `local' declaration in another file, they'd be auto-prefixed as
;; well so you can refer to them just as foo and bar. This is a poor
;; man's package system.
;;
;; See the docstring of `local' for more information.
;;
;; Another idea I was thinking about (did not do it but it's trivial)
;; is to support some special character, say $ — if some symbol is
;; prefixed with $ then make sure it's "local" by default (by giving
;; it some name that can't be easily accessed from other file). That
;; would make it possible for two different libraries to use a symbol
;; named $foo without conflict.
;;; Code:
(defvar *er-orig-read* (symbol-function #'read)
"Remember the original `read' function, because we'll have to
use it in some situations that can't be handled from Lisp code.")
(defvar *er-macro-chars* (make-hash-table :test 'eq)
"Custom read functions. A hash that maps character to a
function of two arguments, stream (as a function) and character.
This function should return the AST that has been read. See
usage of `def-reader-syntax' later on.")
(defvar *er-read-filename* nil
"This dynamic variable will be bound by our read functions
while parsing is in progress. It'll contain the value of
`load-file-name', or the name of the current buffer if it doesn't
have an associated file.")
(defun er-make-stream (in)
"Given an input stream (which can be a buffer, a marker, a
string, a function, or t or nil--see Elisp docs) this returns the
stream as a function of one optional argument. When called with
no arguments, this function should return the next character from
the stream. When called with a non-nil argument (character),
this function should arrange that character to be returned on
next invokation with no arguments.
The Elisp docs aren't entirely clear about this, but the closures
returned by this function will be able to push back multiple
characters. Also, when the input argument is a string, the
produced function will support a keyword :pos argument, which
when passed it will return the current (zero-based) position of
the stream. Example:
(let ((stream (er-make-stream \"foo\")))
(message \"%c%c\" (funcall stream) (funcall stream)) ;; fo
(message \"%d\" (funcall stream :pos))) ;; 2
This helps us implement `read-from-string', which has to return
the position of the stream."
(let ((unget nil))
(when (symbolp in)
(setq in (symbol-function in)))
(cond
((bufferp in) (lambda (&optional ch)
(with-current-buffer in
(cond
(ch (push ch unget))
(unget (pop unget))
(t
(when (not (eobp))
(prog1 (char-after)
(forward-char 1))))))))
((markerp in) (lambda (&optional ch)
(with-current-buffer (marker-buffer in)
(cond
(ch (push ch unget))
(unget (pop unget))
(t
(when (< (marker-position in) (point-max))
(prog1 (char-after in)
(move-marker in
(1+ (marker-position in))
(marker-buffer in)))))))))
((stringp in) (let ((pos 0))
(lambda (&optional ch)
(cond
((eq ch :pos)
(if (< pos (length in))
(- pos 1)
pos))
(ch (push ch unget))
(unget (pop unget))
((< pos (length in))
(prog1 (aref in pos)
(setq pos (1+ pos))))))))
((functionp in) (lambda (&optional ch)
(cond
(ch (push ch unget))
(unget (pop unget))
(t (funcall in)))))
(t
(read-string "Lisp expression:")))))
(defun er-orig-read ()
"Calls the original (low-level C) `read'. This function should
be invoked only within the dynamic extent of some `read' or
`read-from-string' execution."
(funcall *er-orig-read* '%er-reader-insym))
(defun er-peek (in)
"Given a stream function, return the next character without
dropping it from the stream."
(let ((ch (funcall in)))
(funcall in ch)
ch))
(defun er-next (in)
"Given a stream function, return and discard the next
character."
(funcall in))
(defun er-read-while (in pred)
"Read and return a string from the input stream, as long as the
predicate--which will be called for each character--returns
true."
(let ((chars (list)) ch)
(while (and (setq ch (er-peek in))
(funcall pred ch))
(push (er-next in) chars))
(apply #'string (nreverse chars))))
(defun er-croak (msg &rest args)
"Error out in case of parse error."
(if args
(apply #'error msg args)
(error "%s" msg)))
(defun er-read-string ()
"Read a string from the current stream. It defers to
`er-orig-read' and thus this should only be called within the
dynamic extent of some `read' function."
(er-orig-read))
(defun er-read-char ()
"Read a character from the current stream. It defers to
`er-orig-read' and thus this should only be called within the
dynamic extent of some `read' function."
(er-orig-read))
(defun er-letter? (ch)
"Tests whether the given character is a Unicode letter."
(memq (get-char-code-property ch 'general-category)
'(Ll Lu Lo Lt Lm Mn Mc Me Nl)))
(defun er-whitespace? (ch)
"Tests if the given character is whitespace (XXX actually not
all Unicode whitespace chars are handled; I'm not even sure that
would be correct)."
(memq ch '(? ?\t ?\n ?\f ?\r #xa0)))
(defun er-digit? (ch)
"Tests if the given character is a plain digit."
(<= ?0 ch ?9))
(defun er-number? (str)
"Tests if the given string should be interpreted as number."
(string-match "^[-+]?\\(?:\\(?:[0-9]+\\|[0-9]*\\.[0-9]+\\)\\(?:[E|e][+|-]?[0-9]+\\)?\\)$" str))
(defun er-skip-whitespace (in)
"Skip whitespace in the given stream."
(er-read-while in #'er-whitespace?))
(defun er-read-symbol-name (in)
"Read and return the name of a symbol."
(er-read-while in (lambda (ch)
(cond
((eq ch ?\\)
(er-next in)
(if (er-peek in) t (er-croak "Unterminated input")))
(t
(or (er-letter? ch)
(er-digit? ch)
(memq ch '(?- ?+ ?= ?* ?/ ?_ ?~ ?! ?@ ?. ?\|
?$ ?% ?^ ?& ?: ?< ?> ?{ ?} ?\?))))))))
(defun er-read-integer (in)
"Read and return an integer (NIL if there is no integer at
current position in stream)."
(let ((num (er-read-while in #'er-digit?)))
(when (< 0 (length num))
(string-to-number num))))
(defun er-skip-comment (in)
"Skip over a comment (move to end-of-line)."
(er-read-while in (lambda (ch)
(not (eq ch ?\n)))))
(defun er-read-symbol (in)
"Reads a symbol or a number. If what follows in the stream
looks like a number, a number will be returned (via the original
reader). If a symbol, it might be auto-prefixed if declared
`local' in the current file."
(let ((name (er-read-symbol-name in)))
(cond
((er-number? name)
(funcall *er-orig-read* name))
((zerop (length name))
'##)
(t
(intern (er-maybe-prefixed name))))))
(defvar *er-prefixed-symbols* (make-hash-table :test #'equal))
(defun er-maybe-prefixed (name &optional filename)
(unless filename (setq filename (er-get-filename)))
(let* ((f (gethash filename *er-prefixed-symbols*))
prefix)
(cond
((not f)
;; nothing special with this file, return unchanged name
name)
((and (setq prefix (gethash name f))
(zerop (length prefix)))
;; if defined but empty prefix for this name, this is actually
;; an "exported" symbol -- leave as is.
name)
(prefix
;; if we have a prefix, join it with a dash
(format "%s-%s" prefix name))
((intern-soft name)
;; no prefix found and the symbol is already interned, so leave
;; as is.
name)
((setq prefix (gethash "" f))
;; "global" prefix registered for this file, and the symbol is
;; uninterned -- let's join them
(format "%s-%s" prefix name))
(t
;; when none of the above, leave the name as is
name))))
(defun er-make-prefixed (name &optional prefix filename)
(unless filename (setq filename (er-get-filename)))
(unless prefix (setq prefix filename))
(let ((f (gethash filename *er-prefixed-symbols*)))
(unless f
(setq f (make-hash-table :test #'equal))
(puthash filename f *er-prefixed-symbols*))
(puthash name prefix f)
nil))
(defun er-read-list (in end &optional no-dot)
"Read a list of elements from the input stream, until the end
character has been observed. If `no-dot' is nil then it will
support a dot character before the last element, producing an
\"improper\" list. If `no-dot' is true, then if a single dot
character is encountered this will produce an error."
(let ((ret nil) (p nil) ch)
(catch 'exit
(while t
(er-skip-whitespace in)
(setq ch (er-peek in))
(cond
((not ch)
(er-croak "Unterminated list"))
((eq ch end)
(er-next in)
(throw 'exit ret))
((eq ch ?\;)
(er-skip-comment in))
(t
(let ((x (er-read-datum in)))
(cond
((eq x '\.)
(cond
(no-dot (er-croak "Dot in wrong context"))
(t
(rplacd p (er-read-datum in))
(er-skip-whitespace in)
(setq ch (er-next in))
(unless (eq ch end)
(er-croak "Dot in wrong context"))
(throw 'exit ret))))
(t
(let ((cell (cons x nil)))
(setq p (if ret
(rplacd p cell)
(setq ret cell)))))))))))))
(defun er-read-datum (in)
"Read and return a Lisp datum from the input stream."
(er-skip-whitespace in)
(let ((ch (er-peek in)) macrochar)
(cond
((not ch)
(er-croak "End of file during parsing"))
((eq ch ?\;)
(er-skip-comment in)
(er-read-datum in))
((eq ch ?\")
(er-read-string))
((eq ch ?\?)
(er-read-char))
((eq ch ?\()
(er-next in)
(er-read-list in ?\)))
((eq ch ?\[)
(er-next in)
(apply #'vector (er-read-list in ?\] t)))
((eq ch ?\')
(er-next in)
(list 'quote (er-read-datum in)))
((eq ch ?\`)
(er-next in)
(list '\` (er-read-datum in)))
((eq ch ?\,)
(er-next in)
(cond
((eq (er-peek in) ?\@)
(er-next in)
(list '\,@ (er-read-datum in)))
(t
(list '\, (er-read-datum in)))))
((setq macrochar (gethash ch *er-macro-chars*))
(er-next in)
(funcall macrochar in ch))
(t
(er-read-symbol in)))))
(defvar *er-substitutions*)
(defun er-read-internal (in)
;; HACK: calling the original reader with (funcall *er-orig-read*
;; in) will not work. After digging the C code (lread.c) my
;; conclusion is that `read' does not support a (uncompiled) lambda
;; expression as input stream. This contradicts the documentation
;; so I'd call it a bug in Emacs. Any case, a fbound symbol works
;; so we use %er-reader-insym for that.
;;
;; Update: it is a bug and it was confirmed [1] and fixed [2] within
;; hours! Emacs is ALIVE!
;;
;; [1] https://debbugs.gnu.org/cgi/bugreport.cgi?bug=22961
;; [2] http://git.savannah.gnu.org/cgit/emacs.git/commit/?id=711ca36
(fset '%er-reader-insym in)
(unwind-protect
(let ((*er-substitutions* (list)))
(er-read-datum in))
(fset '%er-reader-insym nil)))
(defun def-reader-syntax (ch reader)
(puthash ch reader *er-macro-chars*))
(defun er-index (elt lst)
(let ((index 0))
(catch 'exit
(while lst
(if (eq elt (car lst))
(throw 'exit index)
(setq index (1+ index))
(setq lst (cdr lst)))))))
(defun er-substitute (orig cell)
(cl-labels ((subst-in (thing)
(cond
((eq thing cell)
orig)
((consp thing)
(subst-in-list thing)
thing)
((stringp thing)
thing)
((arrayp thing)
(subst-in-array thing)
thing)
(t
thing)))
(subst-in-list (lst)
(rplaca lst (subst-in (car lst)))
(rplacd lst (subst-in (cdr lst))))
(subst-in-array (array)
(cl-loop for el across array
for i upfrom 0
do (aset array i (subst-in el)))))
(subst-in orig)))
(defconst *er-all-digits* '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9
?a ?b ?c ?d ?e ?f ?g ?h ?i ?j
?k ?l ?m ?n ?o ?p ?q ?r ?s ?t
?u ?v ?w ?x ?y ?z))
(def-reader-syntax ?#
(lambda (in ch)
(let ((x (funcall in)))
(cond
((er-digit? x)
(funcall in x)
(let ((num (er-read-integer in)))
(setq x (er-peek in))
(cond
((and read-circle (eq x ?=))
;; #1=...
(er-next in)
(let* ((placeholder (cons nil nil))
(cell (cons num placeholder)))
(setq *er-substitutions* (cons cell *er-substitutions*))
(let ((tok (er-read-datum in)))
(er-substitute tok placeholder)
(rplacd cell tok))))
((and read-circle (eq x ?#))
;; #1#
(er-next in)
(let ((x (assq num *er-substitutions*)))
(if (consp x)
(cdr x)
(er-croak "Cannot find substitution for #%d#" num))))
((and (<= num 36)
(or (eq x ?r) (eq x ?R)))
;; #16rFF
(er-next in)
(let* ((base num)
(digits (cl-subseq *er-all-digits* 0 base))
(num 0)
(negative? (cond ((eq ?- (er-peek in))
(er-next in)
t)
((eq ?+ (er-peek in))
(er-next in)
nil))))
(er-read-while in (lambda (ch)
(let ((v (er-index (downcase ch) digits)))
(when v
(setq num (+ v (* num base)))))))
(if negative? (- num) num)))
(t (er-croak "Unsupported #%d%c syntax" num x)))))
((memq x '(?s ?^ ?& ?\[ ?\( ?@ ?! ?$ ?: ?#
?x ?X ?o ?O ?b ?B))
;; let the original reader to deal with these.
(funcall in x)
(funcall in ch)
(er-orig-read))
((eq x ?\')
(list 'function (er-read-datum in)))
((eq x ?\/)
(er-read-regexp in))
(t
(er-croak "Unsupported #%c syntax" x))))))
(defun er-read-regexp (in)
(let ((ret (list)))
(catch 'exit
(while t
(let ((ch (funcall in)))
(cond
((eq ch ?\\)
(let ((next (funcall in)))
(cond
((memq next '(?\\ ?/ ?\) ?\( ?\| ?\{ ?\}))
(push next ret))
((eq next ?n)
(push ?\n ret))
((eq next ?f)
(push ?\f ret))
((eq next ?r)
(push ?\r ret))
((eq next ?t)
(push ?\t ret))
(t
(when (memq next '(?\* ?\+ ?\. ?\? ?\[ ?\] ?\^ ?\$ ?\\))
(push ?\\ ret))
(funcall in next)))))
((memq ch '(?\) ?\( ?\| ?\{ ?\}))
(push ?\\ ret)
(push ch ret))
((eq ch ?/)
(throw 'exit nil))
((not ch)
(er-croak "Unterminated regexp"))
(t
(push ch ret))))))
(apply #'string (nreverse ret))))
(defun er-read (&optional in)
(if (and load-file-name
(string-match "\\.elc$" load-file-name))
(funcall *er-orig-read* in)
(let ((*er-read-filename* (er-get-filename)))
(er-read-internal (er-make-stream in)))))
(defun er-read-from-string (str &optional start end)
(let ((*er-read-filename* (er-get-filename)))
(let* ((stream (er-make-stream
(substring-no-properties str start end)))
(token (er-read-internal stream)))
(cons token (+ (or start 0)
(funcall stream :pos))))))
(defun er-get-filename ()
(or *er-read-filename*
load-file-name
(and (boundp 'byte-compile-current-file) byte-compile-current-file)
(and (boundp 'byte-compile-dest-file) byte-compile-dest-file)
(buffer-file-name (current-buffer))
(buffer-name (current-buffer))))
(defmacro local (prefix &optional names)
"Declare that the given names (list of strings) should be
auto-prefixed with the given prefix (symbol, string or nil).
This makes it possible to write:
(local \"my-library-name\" (\"foo\" \"bar\"))
(defun foo (...) ...)
(defun bar (...) ...)
;; you can refer to foo and bar freely in the current file
;; but from other files, the symbols will be accessed
;; as my-library-name-foo and my-library-name-bar
If the prefix is nil, it will default to the current file/buffer
name and those symbols will be rather inaccessible from other
files (or it'll be really hard to type them manually). Note that
without a prefix, the full file name is recorded in the elc file
during byte compilation."
(unless names (setq names '("")))
`(eval-when-compile
,@(mapcar (lambda (name)
`(er-make-prefixed ,name ,prefix)) names)))
;; install in a prog, so they're read all at once with the original
;; reader
(progn
(fset 'read (symbol-function 'er-read))
(fset 'read-from-string (symbol-function 'er-read-from-string))
(setq load-read-function (symbol-function 'er-read)))
(provide 'elisp-reader)