-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathref-man-core.el
4708 lines (4286 loc) · 216 KB
/
ref-man-core.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;; ref-man-core.el --- Core Components for `ref-man'. ;;; -*- lexical-binding: t; -*-
;; Copyright (C) 2018,2019,2020,2021,2022,2023
;; Akshay Badola
;; Author: Akshay Badola <akshay.badola.cs@gmail.com>
;; Maintainer: Akshay Badola <akshay.badola.cs@gmail.com>
;; Time-stamp: <Tuesday 11 July 2023 13:01:08 PM IST>
;; Keywords: pdfs, references, bibtex, org, eww
;; This file is *NOT* part of GNU Emacs.
;; This program is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;; This program is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
;; more details.
;; You should have received a copy of the GNU General Public License along with
;; GNU Emacs; see the file COPYING. If not, write to the Free Software
;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
;;; Commentary:
;;
;; Core components include data structures and functions and commands forx
;; `org', `bibtex', `science-parse' and `python'. A python flask server
;; interface is used as an interface to arxiv, dblp and semanticscholar. There
;; are some file and pdf functions also and functions specific for gscholar
;; also.
;;
;; Perhaps I'll add functions for markdown also (to convert to manuscript)
;; though `org' export can also be used, though I'll have to reconfigure my
;; settings for that and I'm not sure if it'll be as capable as pandoc.
;;
;; Actually `org-ref' has a lot of useful features and I feel in my zeal I've
;; reinvented the wheel a bit, but some of the features here I didn't have
;; so...not sure. Especially the `eww' ones. semanticscholar also I won't find I
;; think easily.
;;
;; TODO: I have to separate these according to:
;; - ref-man-bibtex
;; - ref-man-org
;; - ref-man-pdf
;; - ref-man-dblp
;; - ref-man-ss (for semanticscholar)
;; - ref-man-gscholar (for gscholar utils)
;; python utils are in any case separate
;;
;; NOTE: I think I'll keep the python interface, the processes, the data
;; structures all here and move org to a new file next.
;;
;; TODO: Also the code is very messy and very little documentation. I have to
;; add them
;;
;; TODO: (if (eq major-mode 'org-mode) ,body (message "not in org mode") nil)
;; Should be a macro I think or an advice
;;; Code:
(require 'a)
(require 'async)
(require 'biblio-core)
(require 'bibtex) ; Primary function I use from 'bibtex is 'bibtex-parse-entry
(require 'bind-key)
(require 'cl-lib)
(require 'citeproc)
(require 'dash)
(require 'eww)
(require 'f)
(require 'gscholar-bibtex) ; NOTE: Maybe remove this eventually
(require 'json)
(require 'org)
(require 'org-element)
(require 'ov)
(require 'pos-tip)
(require 'seq)
(require 'shr)
(require 'subr-x)
(require 'thingatpt)
(require 'time-stamp)
(require 'url)
(require 'xml)
(require 'util)
(require 'util/org "util-org")
(require 'ref-man-util)
(require 'ref-man-files)
(require 'ref-man-url)
(require 'ref-man-web)
(require 'ref-man-py)
(require 'ref-man-ss)
(defgroup ref-man nil
"Bibliography Manager."
:prefix "ref-man-"
:group 'ref-man)
(defcustom ref-man-data-root-dir (expand-file-name "~/.ref-man")
"Root directory where ref-man data is stored."
:type 'directory
:group 'ref-man)
(defcustom ref-man-org-links-file-path (expand-file-name "~/.ref-man/.temp-org-links.org")
"Temporary org file to hold URLs and metadata."
:type 'file
:group 'ref-man)
(defcustom ref-man-bib-files nil
"List of `bibtex' files to search for references while generating articles."
:type '(repeat string)
:group 'ref-man)
(defcustom ref-man-temp-bib-file-path (expand-file-name "~/.ref-man/.temp.bib")
"Temporary bib file to append any extract bibtex info."
:type 'file
:group 'ref-man)
(defcustom ref-man-org-store-dir (expand-file-name "~/.ref-man/org/")
"Directory where the org files corresponding to documents will be stored."
:type 'directory
:group 'ref-man)
(defcustom ref-man-update-pdf-url-when-download nil
"When non-nil insert/update PDF_URL property of heading when fetching pdf."
:type 'boolean
:group 'ref-man)
(defcustom ref-man-pandoc-executable "/usr/bin/pandoc"
"`pandoc' executable to use."
:type 'file
:group 'ref-man)
(defcustom ref-man-always-update-heading-if-different nil
"Always update entry heading if different from fetched data."
:type 'boolean
:group 'ref-man)
(defvar ref-man-org-ss-search-functions '(ref-man-ss-search
ref-man-org-default-ss-search-func)
"Order of SS search function to use")
(defvar ref-man-key-list
'(authors title venue volume number pages year doi ee)
"Only these keys from bibtex are retained (I think).")
(defvar ref-man-bibtex-save-ring
nil
"List to store parsed bibtex entries when they're not killed.")
(defvar ref-man-org-entry-post-update-hook '(ref-man-org-find-duplicate-headings)
"Hook to run after a `ref-man-org' entry is updated.
The functions in the hook are called with no arguments.")
;; NOTE: External functions
(declare-function ref-man-try-start-science-parse-server "ref-man")
(declare-function ref-man-kill-science-parse-process "ref-man")
;; (setq ref-man-org-links-file-path (expand-file-name "~/.temp-org-links.org"))
;; (setq ref-man-documents-dir (expand-file-name "~/org/pdfs/"))
;; ;; (setq ref-man-temp-bib-file-path (expand-file-name "~/lib/docprocess/all.bib"))
;; (setq ref-man-temp-bib-file-path (expand-file-name "~/.temp.bib"))
;; (setq ref-man-org-store-dir (expand-file-name "~/org/pubs_org/"))
;; Internal global variables
;; FIXME: ref-man--org-gscholar-launch-buffer etc. are still being
;; used causing confusion
;; (setq ref-man--org-gscholar-launch-buffer nil)
;; (setq ref-man--org-gscholar-launch-point nil)
;; NOTE: External variables
;; from `ref-man'
(defvar ref-man-home-dir)
(defvar ref-man-science-parse-server-port)
(defvar ref-man-py-data-dir) ; from `ref-man-py'
(defvar ref-man-py-server-port) ; from `ref-man-py'
(defvar ref-man-public-links-cache) ; from `ref-man-remote'
(defvar ref-man-public-links-cache-file) ; from `ref-man-remote'
;; (declare-function 'string-match-p "subr")
;; NOTE: Internal variables
(defvar ref-man--org-gscholar-launch-buffer nil)
(defvar ref-man--org-gscholar-launch-point nil)
(defvar ref-man--eww-import-link nil)
(defvar ref-man--subtree-list nil)
(defvar ref-man--current-org-buffer nil)
(defvar ref-man--science-parse-data nil)
(defvar ref-man--json-data nil)
(defvar ref-man--document-title nil)
(defvar ref-man--current-pdf-file-name nil)
(defvar ref-man--biblio-callback-buf nil)
(defvar ref-man--subtree-num-entries nil)
(defvar ref-man-org-file-link-re "\\[\\(?:\\[\\(.+?\\)]\\)?\\[\\(.+?\\)]]")
(defvar shr-map
(let ((map (make-sparse-keymap)))
(define-key map "a" 'shr-show-alt-text)
(define-key map "i" 'shr-browse-image)
(define-key map "z" 'shr-zoom-image)
(define-key map [?\t] 'shr-next-link)
(define-key map [?\M-\t] 'shr-previous-link)
(define-key map [follow-link] 'mouse-face)
(define-key map [mouse-2] 'shr-browse-url)
(define-key map "I" 'shr-insert-image)
(define-key map "w" 'shr-copy-url)
(define-key map "u" 'shr-copy-url)
(define-key map "RET" 'shr-browse-url)
(define-key map "o" 'shr-save-contents)
(define-key map "\r" 'shr-browse-url)
map))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; START ref-man constants ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defun ref-man-pandoc-version ()
"The version of pandoc being used."
(cadr (split-string
(shell-command-to-string
(format "%s --version" ref-man-pandoc-executable)))))
;;
;; Constants. perhaps can name them better
;; Also should be shifted to defcustom
;;
(defvar ref-man-venue-priorities
(let* ((confs '("icml" "nips" "iccv" "cvpr" "ijcai" "aaai" "eccv"))
(confs-seq (number-sequence (length confs) 1 -1)))
(cl-mapcar 'cons confs confs-seq))
"Venue priority list from high to low.")
(defvar ref-man-venues
'(("nips" . "Advances in Neural Information Processing Systems")
("neurips" . "Advances in Neural Information Processing Systems")
("iccv" . "IEEE International Conference on Computer Vision")
("wavc" . "IEEE Winter Conference on Applications of Computer Vision")
("eccv" . "European Conference on Computer Vision")
("cvpr" . "IEEE Conference on Computer Vision and Pattern Recognition")
("iclr" . "International Conference on Learning Representations")
("bmvc" . "British Machine Vision Conference")
("aistats" . "International Conference on Artificial Intelligence and Statistics")
("uai" . "Conference on Uncertainty in Artificial Intelligence")
("ijcv" . "International Journal of Computer Vision")
("icml" . "International Conference on Machine Learning")
("pami" . "IEEE Transactions on Pattern Analysis and Machine Intelligence")
("tpami" . "IEEE Transactions on Pattern Analysis and Machine Intelligence")
("jair" . "Journal of Artificial Intelligence Research")
("jmlr" . "Journal of Machine Learning Research"))
"Alist of venues and their abbreviations.
These are primarily Machine Learning venues and one can populate
the alist according to whichever ones they prefer.")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; END ref-man constants ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; START ref-man string utility functions ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; FIXME: Ugly hack? Maybe change with an alist and transcribe
(defun ref-man-bibtex-transcribe (key)
"Transcribe non-ascii characters in bibtex KEY to ASCII lookalikes.
Transcription is done using `bibtex-autokey-transcriptions'. I
think the function is copied from `bibtex'."
(ref-man--transcribe key bibtex-autokey-transcriptions))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; END ref-man string utility functions ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; START Bib entry utility functions ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; FIXME: All these bib functions are a huge mess
(defun ref-man--preferred-venue (results)
"Return the preferred venue for RESULTS.
Venues are looked up in `ref-man-venue-priorities'. If multiple
venues are found for a result, pick the one where venue has
higher priority."
(if (= 1 (length results))
0
(let* ((venues (mapcar (lambda (x)
(gscholar-bibtex--xml-get-child x 'venue))
results))
(prefs (mapcar (lambda (x)
(a-get ref-man-venue-priorities (downcase (car (last x)))))
venues)))
prefs)))
(defun ref-man--preferred-venue-vector (results)
"Like `ref-man--preferred-venue' but for a vector RESULTS."
(if (= 1 (length results))
0
(let* ((venues (mapcar (lambda (x)
(cdr (assoc 'venue x)))
results))
(venues (mapcar (lambda (x)
(cond ((vectorp x) (downcase (aref x 0)))
((stringp x) (downcase x))
(t nil)))
venues))
(prefs (mapcar (lambda (x) (cdr (assoc x ref-man-venue-priorities))) venues)))
prefs)))
;; CHECK: What does this do exactly?
(defun ref-man--validate-author (author)
"Remove numbers and stuff from AUTHOR string."
(condition-case nil
(if (or (string-match-p "[0-9]+" (car (last author)))
(string-match-p "^i$\\|^ii$\\|^iii$\\|^iv$" (downcase (car (last author)))))
(if (> (length author) 2) (butlast author) (nconc (butlast author) '("")))
author)
(error
(message "Invalid author %s" author)
'("author invalid"))))
(defun ref-man--dblp-clean-helper (result)
"Subroutine with xml RESULT for `ref-man-dblp-clean'."
(remove '("nil")
(mapcar
(lambda (x)
(if (eq x 'authors)
(list
(symbol-name 'authors)
(string-join (mapcar (lambda (x) (car (last x)))
(-drop 2 (gscholar-bibtex--xml-get-child result x))) ", "))
(cons (symbol-name (car (gscholar-bibtex--xml-get-child result x)))
(last (gscholar-bibtex--xml-get-child result x)))))
ref-man-key-list)))
;;
;; clean the xml entry and keep relevant itmes. uses gscholar-bibtex
;;
(defun ref-man-dblp-clean (results &optional all)
"Clean the xml entry and keep relevant itmes according to `ref-man-key-list'.
RESULTS are results obtained from parsing xml from dblp.
Optional ALL specifies to process all results. By default only
the top result is processed.
Uses `gscholar-bibtex'. If ALL is NIL returns only the top
processed result according to `ref-man-venue-priorities'"
(if (and results all)
(mapcar #'ref-man--dblp-clean-helper results)
(let ((result (nth (max-ind (ref-man--preferred-venue results)) results)))
(when result
(ref-man--dblp-clean-helper result)))))
;; NOTE: Only used by `ref-man--dblp-fetch-python-process-results'
(defun ref-man--dblp-clean-vector (result)
"Clean the xml entry and keep relevant itmes according to `ref-man-key-list'.
Uses `gscholar-bibtex'. Returns an alist with symbol keys for
only the top RESULT from `ref-man-venue-priorities'"
;; FIXME: inds is not used
(let* ((inds (ref-man--preferred-venue-vector result))
(result (aref result (max-ind (ref-man--preferred-venue-vector result)))))
;; TODO: handle this later
(if result
(remove '("nil")
(mapcar
(lambda (x)
(if (eq x 'authors)
(list
(symbol-name 'authors)
;; NOTE: There was a bug that messed up the names because I was building "bib-author" twice
;; (mapconcat (lambda (x) (let ((splits (split-string x)))
;; (concat (car (last splits))
;; ", " (string-join (butlast splits) " "))))
;; (cdr (assoc x result)) " and ")
(mapconcat #'identity (cdr (assoc x result)) ", "))
(list (symbol-name x) (cdr (assoc x result)))))
ref-man-key-list)))))
;; FIXME: Check for errors in case something is nil.
;; `ref-man-parse-properties-for-bib-key' throws error because of this
;; subroutine as TITLE here evals to nil.
(defun ref-man--build-bib-key-from-plist (str-plist &optional additional-word)
"Builds a unique key with the format [author year first-title-word].
Entry STR-PLIST is a plist."
(let* ((first-author-str (car (split-string (ref-man--trim-and-unquote
(plist-get str-plist :author)) ",")))
(first-author (ref-man--validate-author (split-string first-author-str " " t)))
(last-name (car (last first-author)))
(year-pub (ref-man--trim-and-unquote (plist-get str-plist :year)))
(title (-remove 'ref-man--stop-word-p
(mapcar #'ref-man--remove-punc
(split-string (downcase (ref-man--trim-and-unquote
(plist-get str-plist :title))) " "))))
(title-first (car (split-string (car title) "-")))
(title-second (if additional-word (car (split-string (nth 1 title) "-")) ""))
(key (ref-man--replace-non-ascii
(mapconcat 'downcase (list last-name year-pub title-first title-second) "")))
(key (ref-man-bibtex-transcribe (ref-man--remove-punc key))))
key))
;;
;; TODO: crossref and dblp insert URL as dx.doi.org something which
;; redirects to the real url or may not even in some cases. If a pdf
;; url exists, don't mess with it and insert it as doi.
;;
;;
(defun ref-man--build-bib-key (key-str &optional na)
"Builds a unique key with the format [author year first-title-word].
Entry KEY-STR is an alist of string keys. Optional NA argument
appends \"na_\" if the key is non-authoritative."
(let* ((first-author-str (car (split-string (ref-man--trim-and-unquote (cadr (assoc "authors" key-str))) ",")))
(first-author (ref-man--validate-author (split-string first-author-str " " t)))
(last-name (car (last first-author)))
(year-pub (ref-man--trim-and-unquote (car (cdr (assoc "year" key-str)))))
(title (-remove 'ref-man--stop-word-p
(mapcar #'ref-man--remove-punc
(split-string (downcase (ref-man--trim-and-unquote
(cadr (assoc "title" key-str)))) " "))))
(title-first (car (split-string (car title) "-")))
(key (ref-man--replace-non-ascii (mapconcat 'downcase (list last-name year-pub title-first) "")))
(key (ref-man-bibtex-transcribe (ref-man--remove-punc key))))
(if na (concat "na_" key) key)))
(defun ref-man--build-bib-key-from-parsed-org-bibtex (bib-alist)
"Builds a unique key with the format [author year first-title-word].
BIB-ALIST is an plist of parsed bibtex entry. Returns the
trimmed entries and converts multiple spaces to a single one."
(let* ((first-author-str (car (split-string (ref-man--trim-and-unquote (cdr (assoc :author bib-alist))) ",")))
(first-author (ref-man--validate-author (split-string first-author-str " " t)))
(last-name (car (last first-author)))
(year-pub (ref-man--trim-and-unquote (cdr (assoc :year bib-alist))))
(title (-remove 'ref-man--stop-word-p
(mapcar #'ref-man--remove-punc
(split-string (downcase (ref-man--trim-and-unquote
(cdr (assoc :title bib-alist)))) " "))))
(title-first (car (split-string (car title) "-")))
(key (ref-man--replace-non-ascii (mapconcat 'downcase (list last-name year-pub title-first) "")))
(key (ref-man-bibtex-transcribe (ref-man--remove-punc key))))
key))
(defun ref-man--build-bib-key-from-parsed-bibtex (bib-alist)
"Builds a unique key with the format [author year first-title-word].
BIB-ALIST is an alist of string keys. Assumes the strings are all validated"
(let* ((last-name (car (split-string
(car (split-string (ref-man--fix-curly
(cdr (assoc "author" bib-alist))) " and ")) ", ")))
(year-pub (cdr (assoc "year" bib-alist)))
(title (-remove 'ref-man--stop-word-p
(split-string (ref-man--fix-curly
(downcase (cdr (assoc "title" bib-alist)))) " ")))
(title-first (car (split-string (car title) "-"))))
(ref-man-bibtex-transcribe
(ref-man--remove-punc
(ref-man--replace-non-ascii
(mapconcat 'downcase (list last-name year-pub title-first) ""))))))
;; TODO: Rename this
;; CHECK: It's not even used anywhere
(defun ref-man--build-bib-assoc-from-parsed-org-bibtex (bib-alist)
"Builds the str alist of bib from symbol BIB-ALIST.
Can be used to build both the bib entry and org entry."
(let* ((key (ref-man--build-bib-key-from-parsed-org-bibtex bib-alist))
(author (cons "author" (ref-man--trim-and-unquote (cdr (assoc :author bib-alist)))))
(title (cons "title" (ref-man--trim-and-unquote (cdr (assoc :title bib-alist)))))
(year (cons "year" (ref-man--trim-and-unquote (cdr (assoc :year bib-alist)))))
(doi (cons "doi" (cdr (assoc :doi bib-alist))))
(volume (cons "volume" (cdr (assoc :volume bib-alist))))
(number (cons "number" (cdr (assoc :number bib-alist))))
(pages (cons "pages" (cdr (assoc :pages bib-alist))))
(publisher (cons "publisher" (cdr (assoc :publisher bib-alist))))
(abstract (cons "abstract" (cdr (assoc :abstract bib-alist))))
(url (cons "url" (cdr (assoc :ee bib-alist))))
(url (if url url (cons "url" (cdr (assoc :url bib-alist)))))
(tmp-venue (cdr (assoc :journal bib-alist))) ;; TODO: expand venue
(tmp-venue (if tmp-venue tmp-venue (cdr (assoc :booktitle bib-alist)))) ;; TODO: expand venue
(tmp-venue (if tmp-venue tmp-venue (cdr (assoc :venue bib-alist)))) ;; TODO: expand venue
(venue (cons "venue" tmp-venue)) ;; TODO: expand venue
(howpublished (cdr (assoc :howpublished bib-alist)))
(howpublished (when (and howpublished (> 1 (length (split-string howpublished "{"))))
(when (string-match-p "url" (nth 0 (split-string howpublished "{")))
(car (split-string (nth 1 (split-string howpublished "{")) "}"))))))
(list key (-filter 'cdr (list abstract author title year doi
volume number pages url venue publisher howpublished)))))
(defun ref-man--build-bib-author (author-str)
"Return bibtex format author from string AUTHOR-STR.
For example, for an input \"Samy Bengio and Oriol Vinyals and
Navdeep Jaitly and Noam Shazee\", it'll split at \"and\" and
transpose the last name as the first element of each name
inserting a comma there, resulting in \"Bengio, Samy and Vinyals,
Oriol and Jaitly, Navdeep and Shazee, Noam\"."
(let* ((author-str (ref-man--replace-non-ascii author-str))
(author-str (replace-in-string (replace-in-string author-str "\\.$" "") ",$" ""))
(authors (split-string author-str "," t))
(result-authors
(mapcar (lambda (x)
(let ((temp-auth (ref-man--validate-author (split-string x " " t))))
(if (= 1 (length temp-auth)) (car temp-auth)
(mapconcat 'identity (list (car (last temp-auth))
(mapconcat 'identity
(butlast temp-auth) " "))
", "))))
authors)))
(mapconcat 'identity result-authors " and ")))
(defun ref-man--build-vernacular-author (author-str)
"Builds common spoken English author from AUTHOR-STR.
Assumes that the input is in bib_author format.
For example, for an input \"Bengio, Samy and Vinyals, Oriol and
Jaitly, Navdeep and Shazee, Noam\", it'll split at \"and\" and
transpose the last names to the proper place, resulting in \"Samy
Bengio and Oriol Vinyals and Navdeep Jaitly and Noam Shazee\"."
(let* ((author-str (replace-in-string (replace-in-string author-str "\\.$" "") ",$" ""))
(authors (split-string author-str " and " t "[ ]+"))
(result-authors
(mapcar (lambda (x) (mapconcat 'identity (reverse (split-string x ", ")) " "))
authors))
(result-authors (mapconcat 'identity result-authors " and ")))
(ref-man--invert-accents result-authors)))
(defun ref-man--build-bib-assoc (key-str &optional na)
"Return a list of string cons'es from DBLP entry KEY-STR.
The car of the list is a bibtex key generated by
`ref-man--build-bib-key'. With non-nil NA, prefix the generated
bibtex key with \"na_\"."
(let* ((key (ref-man--build-bib-key key-str na))
(author (cons "author" (ref-man--build-bib-author
(cadr (assoc "authors" key-str)))))
(title (cons "title" (cadr (assoc "title" key-str))))
(year (cons "year" (cadr (assoc "year" key-str))))
(doi (cons "doi" (cadr (assoc "doi" key-str))))
(volume (cons "volume" (cadr (assoc "volume" key-str))))
(number (cons "number" (cadr (assoc "number" key-str))))
(tmp-pages (cadr (assoc "pages" key-str)))
(pages (cons "pages" (when tmp-pages
(replace-in-string
(replace-in-string tmp-pages "-" "--") " " ""))))
(url (cons "url" (cadr (assoc "ee" key-str))))
(venue (cons "venue" (cadr (assoc "venue" key-str)))))
(list key (-filter 'cdr (list author title year doi volume number pages url venue)))))
(make-obsolete 'ref-man--build-bib-assoc nil "ref-man 0.3.0")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; END Bib entry utility functions ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; START Some experimental CSL functions ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defvar ref-man-bibtex-to-csl-alist
'(("=key=" . id)
("address" . publisher-place)
("venue" . container-title)
("booktitle" . container-title)
("journal" . container-title)
("chapter" . title)
("location" . event-place)
("series" . collection-title)
("keywords" . keyword)
("institution" . publisher)
("school" . publisher)
("pages" . page)
("organization" . publisher)
("url" . URL)
("doi" . DOI)
("pmid" . PMID)
("pmcid" . PMCID))
"Alist mapping BibTeX keys to CSL keys with different names.")
(defun ref-man-citeproc-bib-to-csl-date (year month &optional pandoc-compat)
"Return date in CSL format.
YEAR and MONTH are the values of the corresponding BibTeX fields,
MONTH might be nil. Derived from `citeproc-bt--to-csl-date'.
See for details.
When PANDOC-COMPAT is non-nil, date-parts aren't given and year
is given with `issued' as key."
(let ((csl-year (string-to-number (car (s-match "[[:digit:]]+" year))))
(csl-month (when month
(assoc-default (downcase month)
citeproc-bt--mon-to-num-alist)))
date)
(when csl-year
(when csl-month (push csl-month date))
(push csl-year date))
(if pandoc-compat
(string-join (mapcar #'number-to-string date) "-")
`((date-parts . ,date)))))
(defun ref-man-bibtex-to-csl (bib &optional pandoc-compat)
"Return a CSL form of normalized parsed BibTeX entry BIB.
Like `citeproc-bt-entry-to-csl' but optionally compatible with
`pandoc' with optional variable PANDOC-COMPAT and using
`ref-man-bibtex-to-csl-alist' for conversion."
(let ((type (assoc-default (downcase (assoc-default "=type=" bib))
citeproc-bt--to-csl-types-alist))
result year month)
(cl-loop for (key . value) in bib do
(let ((key (downcase key))
(value (citeproc-bt--to-csl value)))
(-if-let (csl-key (assoc-default key ref-man-bibtex-to-csl-alist))
;; Vars mapped simply to a differently named CSL var
(push (cons csl-key value) result)
(pcase key
((or "author" "editor") ; Name vars
(push (cons (intern key) (citeproc-bt--to-csl-names value))
result))
("=type=" (push (cons 'type type) result))
("number" (push (cons (if (string= type "article-journal") 'issue
'number)
value)
result))
;; Date vars that need further processing below
("year" (setq year value))
("month" (setq month value))
;; Remaining keys are mapped without change
(_ (push (cons (intern key) value) result))))))
(when year
(push (cons 'issued (ref-man-citeproc-bib-to-csl-date year month pandoc-compat))
result))
result))
(defun ref-man-bibtex-csl-to-yaml (maybe-bib)
"Parse given bibtex MAYBE-BIB to csl and return yaml.
When called interactively, then read bibtex from current buffer
at point. Also kill to kill ring when interactive."
(interactive "p")
(let* ((bib (cond ((numberp maybe-bib)
(ref-man-bibtex-to-csl (bibtex-parse-entry) t))
((stringp maybe-bib)
(with-temp-buffer
(insert maybe-bib)
(goto-char (point-min))
(ref-man-bibtex-to-csl (bibtex-parse-entry) t)))))
(csl (with-current-buffer
(ref-man--post-json-synchronous (ref-man-py-url "get_yaml") bib)
(goto-char (point-min))
(re-search-forward "\r?\n\r?\n")
(concat (buffer-substring-no-properties (point) (point-max)) "\n"))))
(when (numberp maybe-bib)
(kill-new csl))
csl))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; END Some experimental CSL functions ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; START Org generation and insertion stuff ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; TODO: What if science-parse is called on multiple pdf files?
;; For that I'll have to hold all the data in a list
;; TODO: Add async callback with message "DONE" when done
(defun ref-man-get-references ()
"Extract references for the current pdf document.
Uses Science Parse server to extract the references and DBLP to
verify the bibliography entries. The results are extracted to an
org buffer generated by `ref-man--generate-buffer-and-fetch-if-required'.
Each such org buffer is given a unique name which is the same as
the bibtex key for that publication. In case the file already
exists on disk in `ref-man-org-store-dir', that is opened instead
of fetching from DBLP again.
For entries which aren't identified by DBLP, an \"na_\" is
prefixed to the name where \"na\" stands for \"non authoritative\".
For example for a publication if the bibtex key
would be \"name1995something\" but DBLP can't find it in the
database, the bibtex key (and file) becomes
\"na_name1995something\"(.org)."
(interactive)
;; (setq my/dblp-results nil) ; TODO: Not used after change to python backend
(let ((status (ref-man-try-start-science-parse-server)))
(cond ((not status)
(ref-man-try-start-science-parse-server))
((eq status 'waiting)
(message "[ref-man] Waiting for Science Parse server to become ready."))
(t
(let*
((pdf-file-name (expand-file-name (buffer-file-name (current-buffer))))
(json-string
(if (string-equal major-mode "pdf-view-mode")
(shell-command-to-string (format "curl -s -H\
\"content-type: application/pdf\" --data-binary @%s\
\"http://localhost:%s/v1\"" pdf-file-name ref-man-science-parse-server-port))
(progn (message "[ref-man] not pdf-view-mode") nil)))
(json-object-type 'hash-table)
(json-key-type 'string)
(json-array-type 'list)
(json-string (if json-string (json-read-from-string json-string) nil))
;; concats title and authors for each ref entry for easy lookup
(refs-list (if json-string
(mapcar (lambda (x)
(cons (concat (gethash "title" x) " "
(string-join (gethash "authors" x) " "))
x))
(gethash "references" json-string))
nil)))
(if json-string
(progn
(setq ref-man--current-pdf-file-name pdf-file-name)
(setq ref-man--science-parse-data json-string)
(setq ref-man--document-title (if (gethash "title" json-string)
(gethash "title" json-string)
(puthash "title" (read-from-minibuffer
"ENTER TITLE (could not infer): ")
json-string)
(gethash "title" json-string)))
(ref-man--generate-buffer-and-fetch-if-required refs-list))
(progn (message "[ref-man] Empty PDF parse") nil)))))))
(defun ref-man--create-org-buffer (&optional visiting-filename)
"Create an org buffer where all the fetch results will be inserted.
With optional VISITING-FILENAME the buffer name and file are set
to VISITING-FILENAME instead of `ref-man--document-title'."
(let ((buf (get-buffer-create
(if visiting-filename visiting-filename
(concat ref-man--document-title "_org"))))
(win (ref-man--get-or-create-window-on-side)))
(set-window-buffer win buf)
(with-current-buffer buf (org-mode)) buf))
(defun ref-man--generate-org-buffer-content (org-buf refs-list bib-assoc visiting-filename)
"Populate ORG-BUF for a BIB-ASSOC with entries from REFS-LIST.
BIB-ASSOC is used to generate the top level heading corresponding
to the publication. REFS-LIST are similar alists which are
fetched from dblp with `ref-man--dblp-fetch-python' and synced
before calling this function. VISITING-FILENAME is the filename
for the buffer."
(with-current-buffer org-buf
(ref-man--org-bibtex-write-top-heading-from-assoc bib-assoc)
(org-insert-heading-after-current)
(org-demote-subtree)
(insert "Refs")
(org-insert-heading-after-current)
(org-demote-subtree)
(end-of-line)
(message "[ref-man] Fetching references from DBLP")
;; TODO: Maybe set buffer read only until fetched?
(ref-man--dblp-fetch-python refs-list org-buf)
(set-visited-file-name visiting-filename)))
;; CHECK: Do I really need to send URL here? Maybe for debugging but there's no
;; debugging here. status of course is sent automatically
;;
;; CHECK: This should be named differently perhaps.
(defun ref-man--parse-json-callback (_status _url callback)
"Callback to parse a response buffer as JSON.
STATUS is HTTP status, URL the called url, and CALLBACK is the
callback which will be called after parsing the JSON data."
(goto-char (point-min))
(forward-paragraph)
(setq ref-man--json-data (json-read))
(apply callback (list ref-man--json-data)))
(defun ref-man--post-json (url data callback)
"Send an HTTP POST with JSON data request to URL.
QUERIES is a list of strings which is encoded as json. The
request is sent with content-type as application/json.
CALLBACK is passed as an argument to
`ref-man--parse-json-callback' after the URL is retrieved.
`ref-man--parse-json-callback' decodes the JSON data to elisp
structures and then calls CALLBACK on it."
(let ((url-request-extra-headers
`(("Content-Type" . "application/json")))
(url-request-method "POST")
(url-request-data
(encode-coding-string (json-encode data) 'utf-8)))
(url-retrieve url #'ref-man--parse-json-callback
(list url callback))))
(defun ref-man--post-json-new (url encode-func args callback)
"Send an HTTP POST request with JSON data to URL.
More general than `ref-man--post-json'. The data and parameters
are given to ENCODE-FUNC as ARGS and can have any type. The
output must be a string. CALLBACK is used in a similar way to
`ref-man--post-json'."
(let ((url-request-extra-headers
`(("Content-Type" . "application/json")))
(url-request-method "POST")
(url-request-data
(encode-coding-string (funcall encode-func args) 'utf-8)))
(url-retrieve url #'ref-man--parse-json-callback
(list url callback))))
(defun ref-man--dblp-fetch-python-process-results (refs-list org-buf results)
"Utility function to process results from the python server.
It's passed as an argument to `ref-man--post-json' but as a
partial application with list of queries REFS-LIST and target
buffer ORG-BUF fixed. `ref-man--parse-json-callback' processes
the HTTP response buffer, converts JSON data to elisp and then
calls the partial function with sole argument RESULTS."
;; NOTE: Sometimes result is hash-table and sometime alist
(let ((na-results (cond ((listp results)
(-filter (lambda (x) x)
(mapcar (lambda (x)
(when (and (vectorp (cdr x))
(stringp (aref (cdr x) 0))
(string= (aref (cdr x) 0) "NO_RESULT"))
(prog1 (format "%s" (car x))
(delq x results))))
results)))
((hash-table-p results)
(-filter (lambda (x) x)
(mapcar (lambda (x)
(when (and (stringp (car (gethash x ref-man--json-data)))
(string= (car (gethash x ref-man--json-data)) "NO_RESULT"))
(prog1 x
(remhash x results))))
(hash-table-keys results)))))))
;; NOTE: First write headings which are retrieved
(seq-do (lambda (x)
(if (and (vectorp (cdr x))
(stringp (aref (cdr x) 0)))
(add-to-list 'na-results (format "%s" (car x)))
(with-current-buffer org-buf
(ref-man--org-bibtex-write-ref-from-assoc
(ref-man--build-bib-assoc (ref-man--dblp-clean-vector (cdr x)))))))
results)
;; NOTE: Then non authoritative headings
(seq-do (lambda (x)
(with-current-buffer org-buf
(ref-man--org-bibtex-write-ref-NA-from-keyhash
(cdr (assoc x refs-list)))))
na-results)
(with-current-buffer org-buf
(outline-up-heading 1)
(forward-line)
(kill-line)
(delete-blank-lines)
(save-buffer))
(message "Inserted %s references from DBLP, %s from SP"
(- (length refs-list) (length na-results)) (length na-results))))
;; TODO: Need another function to fetch a search string at prompt and copy to kill ring
(defun ref-man--dblp-fetch-python (refs-list org-buf)
"Fetch publication queries in parallel from DBLP.
Uses a python server which parallelizes the queries and sends
result. The queries are `car's of REFS-LIST. ORG-BUF is the
target buffer where the results are inserted.
There was an implementation with `async' initially, but the
buffer would hang while at the sync step waiting for the result
and the entire process was very messy. Parallel network calls in
python are much easier and cleaner."
;; (setq ref-man--temp-ref nil)
(let ((queries (mapcar 'car refs-list))
(url (ref-man-py-url "dblp")))
;; ;; NOTE: For `ref-man--post-json-new' encode-func has to be provided
;; ;; Not using for now
;; (encode-func json-encode-list))
;; (ref-man--post-json-new url encode-func queries
;; (-cut ref-man--dblp-fetch-python-process-results refs-list org-buf <>))
;; NOTE: partial function which will process results eventually
(ref-man--post-json url queries
(-cut ref-man--dblp-fetch-python-process-results refs-list org-buf <>))))
;;
;; Called by ref-man--generate-buffer-and-fetch-if-required
;; NOTE: I was thinking to make it async but it's still useful
;;
(defun ref-man-dblp-fetch-serial (query &optional all)
"Fetch the dblp data synchronously for query.
QUERY should be the title string of pubilcation or a combination
of title or author string. When called from
`ref-man--generate-buffer-and-fetch-if-required', it's used to
insert the top level heading.
By default returns only the top result. With non-nil ALL, returns
all results."
(message "[ref-man] Fetching from DBLP synchronously.")
(let* ((query (replace-in-string query " " "+"))
(query-url (format "https://dblp.uni-trier.de/search/publ/api?q=%s&format=xml" query))
(buf (url-retrieve-synchronously query-url))
(beg (with-current-buffer buf (set-buffer-multibyte t)
(goto-char (point-min))
(re-search-forward "\r?\n\r?\n")
(point))))
(pcase-let ((`(,(and result `(result . ,_)))
(xml-parse-region beg nil buf)))
(remove nil (ref-man-dblp-clean
(mapcar (lambda (hit)
(gscholar-bibtex--xml-get-child hit 'info))
(xml-get-children (gscholar-bibtex--xml-get-child result 'hits) 'hit))
all)))))
;; NOTE: Changed add-to-list to push
;; TODO: Change to
(defun ref-man--generate-key-str-from-science-parse ()
"Generate a string alist from Science Parse data.
Science Parse data is a hashtable, which is cleaned and the alist
returned."
(declare (pure t) (side-effect-free t))
(let ((key-str nil))
(when (gethash "authors" ref-man--science-parse-data)
(push (cons "authors" (list (mapconcat (lambda (x) (gethash "name" x))
(gethash "authors" ref-man--science-parse-data) ", ")))
key-str))
(when (gethash "year" ref-man--science-parse-data)
(push (cons "year" (list (format "%s"
(gethash "year" ref-man--science-parse-data))))
key-str))
(when (gethash "title" ref-man--science-parse-data)
(push (cons "title" (list (gethash "title" ref-man--science-parse-data))) key-str))
(when (gethash "venue" ref-man--science-parse-data)
(push (cons "venue" (list (gethash "venue" ref-man--science-parse-data))) key-str))
key-str))
(make-obsolete 'ref-man--generate-key-str-from-science-parse nil "ref-man 0.3.0")
(defun ref-man--generate-buffer-and-fetch-if-required (refs-list)
"Generate the Org buffer with publication details and references.
REFS-LIST is the list of references to fetch and insert into the
Org buffer.
The generation process is complicated. Science Parse data is
parsed and the publication title and author are queried from DBLP
to get authoritative information. A bibtex key is generated by
`ref-man-dblp-fetch-serial' which will be unique for all
publications and the Org buffer filename is the same as that key
+ \".org\".
If the filename exists in `ref-man-org-store-dir'; implying that
the queries were sent before some time; that file is opened
instead. If it doesn't exist, then REFS-LIST is sent to a python
server which parallelizes and syncs the results from DBLP. The
results are then formatted as org entries and inserted into the
buffer with that filename."
(let* ((query (concat
(replace-regexp-in-string "[^\t\n\r\f -~]" ""
(gethash "title" ref-man--science-parse-data)) " "
(string-join (mapcar (lambda (x) (gethash "name" x))
(gethash "authors" ref-man--science-parse-data)) " ")))
(result (ref-man-dblp-fetch-serial query))
(na (not result))
(result (or result (ref-man--generate-key-str-from-science-parse)))
(entry-alist (ref-man--build-bib-assoc result na))
(filename (car entry-alist))
(visiting-filename
(path-join ref-man-org-store-dir (concat (string-remove-prefix "na_" filename) ".org")))
(buf (find-buffer-visiting visiting-filename))
open-file)
(if (not filename)
(message "[ref-man] filename could not be generated!")
(setq filename (string-remove-prefix "na_" filename)) ; always remove na_ from filename
(if buf
(cond ((not (string-empty-p (with-current-buffer buf (buffer-string))))
(message "[ref-man] File is already opened and not empty. Switching...")
(ref-man--create-org-buffer (concat filename ".org")))
((string-empty-p (with-current-buffer buf (buffer-string)))
(message "[ref-man] Buffer is opened but is empty."))
(t nil))
(when (file-exists-p visiting-filename)
(message "[ref-man] File already exists. Opening...")
(setq open-file t)))
(let ((org-buf (ref-man--create-org-buffer (concat filename ".org"))))
(when open-file
(with-current-buffer org-buf
(insert-file-contents visiting-filename t))
(unless (string-empty-p (with-current-buffer buf (buffer-string)))
(message "[ref-man] Opened buffer but is empty.")))
(ref-man--generate-org-buffer-content org-buf refs-list entry-alist visiting-filename)))))
(defun ref-man-org-find-duplicate-headings ()
"Find duplicate headings for current heading or link under point.
Display the entries if any found in a helm buffer."
(interactive)
(pcase-let* ((link (util/org-link-get-target-for-internal))
(`(,buf ,pt) (if link
(list (find-file-noselect (plist-get link :file))
(plist-get link :point))
(list (current-buffer)
(save-excursion
(org-back-to-heading t)
(beginning-of-line)
(point)))))
(`(,heading ,cid ,buf-name) (with-current-buffer buf
(save-excursion
(goto-char pt)
(list (org-get-heading t t t t)
(org-entry-get (point) "CUSTOM_ID")
(buffer-name)))))
(headings (-keep (lambda (x)
(unless (and (= pt (nth 4 x)) (string= (nth 2 x) buf-name))
`(,(car x) . ,(list (nth 2 x) (nth 4 x)))))
(ref-man-org-check-for-duplicate-pub heading cid))))
(if headings