This repository has been archived by the owner on Apr 30, 2021. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 61
/
pandoc-citeproc.hs
221 lines (202 loc) · 8.77 KB
/
pandoc-citeproc.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
{-# LANGUAGE CPP #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Main where
import Prelude
import Control.Applicative (many, (<|>))
import Control.Exception as E
import Control.Monad
import Data.Aeson.Encode.Pretty (Config (..), Indent (Spaces),
NumberFormat (Generic),
defConfig, encodePretty')
import Data.Attoparsec.ByteString.Char8 as Attoparsec
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
import Data.Char (chr, toLower)
import Data.List (group, sort)
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8, decodeUtf8)
import Data.Version (showVersion)
import Data.Yaml.Builder (toByteStringWith, setWidth)
import Text.Libyaml (defaultFormatOptions)
import Paths_pandoc_citeproc (version)
import System.Console.GetOpt
import System.Environment (getArgs)
import System.Exit
import System.FilePath (takeExtension)
import System.IO
import Text.CSL.Data (getLicense, getManPage)
import Text.CSL.Exception
import Text.CSL.Input.Bibutils (BibFormat (..),
readBiblioString)
import Text.CSL.Pandoc (processCites')
import Safe (readMay)
import Text.CSL.Reference (Literal (..),
Reference (refId))
import Text.Pandoc.JSON hiding (Format)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Walk
main :: IO ()
main = do
argv <- getArgs
let (flags, args, errs) = getOpt Permute options argv
let header = "Usage: pandoc-citeproc [options] [file..]"
unless (null errs) $ do
UTF8.hPutStrLn stderr $ usageInfo (unlines $ errs ++ [header]) options
exitWith $ ExitFailure 1
when (Version `elem` flags) $ do
UTF8.putStrLn $ "pandoc-citeproc " ++ showVersion version
exitSuccess
when (Help `elem` flags) $ do
UTF8.putStrLn $ usageInfo header options
exitSuccess
when (Man `elem` flags) $ do
getManPage >>= BL.putStr
exitSuccess
when (License `elem` flags) $ do
getLicense >>= BL.putStr
exitSuccess
E.handle
(\(e :: CiteprocException) -> do
UTF8.hPutStrLn stderr $ renderError e
exitWith (ExitFailure 1)) $
if Bib2YAML `elem` flags || Bib2JSON `elem` flags
then do
colwidth <- case Prelude.take 1 $
reverse [readMay n | Columns n <- flags] of
[] -> return $ Just 80
[Just 0] -> return $ Nothing
[Just x] -> return $ Just x
_ -> do
UTF8.hPutStrLn stderr $
"--columns must be followed by a number"
exitWith $ ExitFailure 7
let fmtOpts = setWidth colwidth defaultFormatOptions
let mbformat = case [f | Format f <- flags] of
[x] -> readFormat x
_ -> Nothing
bibformat <- case mbformat <|>
msum (map formatFromExtension args) of
Just f -> return f
Nothing -> do
UTF8.hPutStrLn stderr $ usageInfo
("Unknown format\n" ++ header) options
exitWith $ ExitFailure 4
bibstring <- case args of
[] -> B.getContents
xs -> mconcat <$> mapM B.readFile xs
readBiblioString (const True) bibformat (decodeUtf8 bibstring) >>=
(if Quiet `elem` flags then return else warnDuplicateKeys) >>=
if Bib2YAML `elem` flags
then outputYamlBlock .
B8.intercalate (B.singleton 10) .
map (unescapeTags . toByteStringWith fmtOpts . (:[]))
else B8.putStrLn . unescapeUnicode . B.concat . BL.toChunks .
encodePretty' defConfig{ confIndent = Spaces 2
, confCompare = compare
, confNumFormat = Generic }
else toJSONFilter (doCites (Quiet `elem` flags))
formatFromExtension :: FilePath -> Maybe BibFormat
formatFromExtension = readFormat . dropWhile (=='.') . takeExtension
readFormat :: String -> Maybe BibFormat
readFormat = go . map toLower
where go "biblatex" = Just BibLatex
go "bib" = Just BibLatex
go "bibtex" = Just Bibtex
go "json" = Just Json
go "yaml" = Just Yaml
#ifdef USE_BIBUTILS
go "ris" = Just Ris
go "endnote" = Just Endnote
go "enl" = Just Endnote
go "endnotexml" = Just EndnotXml
go "xml" = Just EndnotXml
go "wos" = Just Isi
go "isi" = Just Isi
go "medline" = Just Medline
go "copac" = Just Copac
go "mods" = Just Mods
go "nbib" = Just Nbib
#endif
go _ = Nothing
doCites :: Bool -> Pandoc -> IO Pandoc
doCites beQuiet doc = do
doc' <- processCites' doc
let warnings = query findWarnings doc'
unless beQuiet $ mapM_ (UTF8.hPutStrLn stderr) warnings
return doc'
findWarnings :: Inline -> [String]
findWarnings (Span (_,["citeproc-not-found"],[("data-reference-id",ref)]) _) =
["pandoc-citeproc: reference " ++ T.unpack ref ++ " not found" | ref /= "*"]
findWarnings (Span (_,["citeproc-no-output"],_) _) =
["pandoc-citeproc: reference with no printed form"]
findWarnings _ = []
data Option =
Help
| Man
| License
| Version
| Convert
| Format String
| Columns String
| Bib2YAML
| Bib2JSON
| Quiet
deriving (Ord, Eq, Show)
options :: [OptDescr Option]
options =
[ Option ['h'] ["help"] (NoArg Help) "show usage information"
, Option [] ["man"] (NoArg Man) "print man page to stdout"
, Option [] ["license"] (NoArg License) "print license to stdout"
, Option ['V'] ["version"] (NoArg Version) "show program version"
, Option ['y'] ["bib2yaml"] (NoArg Bib2YAML) "convert bibliography to YAML"
, Option ['j'] ["bib2json"] (NoArg Bib2JSON) "convert bibliography to JSON"
, Option ['q'] ["quiet"] (NoArg Quiet) "silence all warnings"
, Option ['f'] ["format"] (ReqArg Format "FORMAT") "bibliography format"
, Option ['c'] ["columns"] (ReqArg Columns "NUMBER") "column width (or 0)"
]
warnDuplicateKeys :: [Reference] -> IO [Reference]
warnDuplicateKeys refs = mapM_ warnDup dupKeys >> return refs
where warnDup k = UTF8.hPutStrLn stderr $
"biblio2yaml: duplicate key " ++ T.unpack k
allKeys = map (unLiteral . refId) refs
dupKeys = [x | (x:_:_) <- group (sort allKeys)]
outputYamlBlock :: B.ByteString -> IO ()
outputYamlBlock contents = do
UTF8.putStrLn "---\nreferences:"
B.putStr contents
UTF8.putStrLn "..."
-- turn
-- id: ! "\u043F\u0443\u043D\u043A\u04423"
-- into
-- id: пункт3
unescapeTags :: B.ByteString -> B.ByteString
unescapeTags bs = case parseOnly (many $ tag <|> other) bs of
Left e -> error e
Right r -> B.concat r
unescapeUnicode :: B.ByteString -> B.ByteString
unescapeUnicode bs = case parseOnly (many other) bs of
Left e -> error e
Right r -> B.concat r
tag :: Attoparsec.Parser B.ByteString
tag = do
_ <- string $ B8.pack ": ! "
c <- char '\'' <|> char '"'
cs <- manyTill (escaped c <|> other) (char c)
return $ B8.pack ": " <> B8.singleton c <> B.concat cs <> B8.singleton c
escaped :: Char -> Attoparsec.Parser B.ByteString
escaped c = string $ B8.pack ['\\',c]
other :: Attoparsec.Parser B.ByteString
other = uchar <|> Attoparsec.takeWhile1 notspecial <|> regchar
where notspecial = not . inClass ":!\\\"'"
uchar :: Attoparsec.Parser B.ByteString
uchar = do
_ <- char '\\'
num <- (2 <$ char 'x') <|> (4 <$ char 'u') <|> (8 <$ char 'U')
cs <- count num $ satisfy $ inClass "0-9a-fA-F"
let n = read ('0':'x':cs)
return $ encodeUtf8 $ T.pack [chr n]
regchar :: Attoparsec.Parser B.ByteString
regchar = B8.singleton <$> anyChar