-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscanner
executable file
·206 lines (174 loc) · 6.16 KB
/
scanner
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/bin/bash
# Utility for making quick, sensible scans. Photographs are automatically
# separated, cropped and derotated, whereas documents are monochrome and
# heavily compressed, combined into a single OCR'ed PDF.
# `fmwconcepts.com/imagemagick/multicrop` was an inspiration for the photo
# portion, but this is written from scratch since multicrop is non-free.
#
# See also:
# - https://askubuntu.com/questions/106769/scanning-from-terminal
# - https://manpages.ubuntu.com/manpages/bionic/en/man1/scanimage.1.html
# - https://imagemagick.org/
set -euo pipefail
TMP=$(mktemp -d "${TMPDIR:-/tmp}/$(basename "$0").XXXXX")
function finalize {
rm -rf "${TMP}"
}
trap finalize EXIT
# SETTING PARAMETERS #########################################################
DEFAULT_DPI=150
DEFAULT_FUZZ=10
DEFAULT_GRID=8
DEFAULT_SCAN_TYPE=document
DEFAULT_EXT=png
DEFAULT_QUALITY=90
USAGE="$(basename "$0") [-d|--dpi N] [-f|--fuzz N] [-g|--grid N]
[-i|--input FILE] [--document|--photo] [-t TITLE] [FILE]...
where:
-t|--title T Title for the produced file
-d|--dpi N Density resolution of scan
-f|--fuzz N Tolerance for non-exact color matches (default: ${DEFAULT_FUZZ})
-g|--grid N Grid lines for detection of individual photos (default: ${DEFAULT_GRID})
-q|--quality N Quality of JPEG output (default: ${DEFAULT_QUALITY})
--document Interpret scan as a document
--photo Interpret scan as one or more photographs
--raw Produce scan without further processing
FILE When FILEs are given, skip scanning, only process given files"
INPUTS=()
while [[ $# -gt 0 ]]; do
case "${1}" in
-h|--help)
echo "${USAGE}" >&2; exit 0
;;
-d|--dpi)
DPI="${2}"
shift
shift
;;
-t|--title)
TITLE="${2}"
shift
shift
;;
-q|--quality)
QUALITY="${2}"
shift
shift
;;
--raw)
SCAN_TYPE=raw
EXT=jpg
DEFAULT_DPI=300
shift
;;
--photo)
SCAN_TYPE=photo
EXT=jpg
DEFAULT_DPI=300
shift
;;
--document)
SCAN_TYPE=document
EXT=png
DEFAULT_DPI=150
shift
;;
-*)
echo "Unknown option \"${1}\"." >&2; exit 1
;;
*)
INPUTS+=("$1")
shift
;;
esac
done
DPI="${DPI:-${DEFAULT_DPI}}"
SCAN_TYPE="${SCAN_TYPE:-${DEFAULT_SCAN_TYPE}}"
TITLE="${POSITIONALS[@]:-${SCAN_TYPE}}"
EXT="${EXT:-${DEFAULT_EXT}}"
FUZZ="${FUZZ:-${DEFAULT_FUZZ}}"
GRID="${GRID:-${DEFAULT_GRID}}"
QUALITY="${QUALITY:-${DEFAULT_QUALITY}}"
# If there are already pictures called `${TITLE}-${NUMBER}.${EXT}` in the
# current directory, we will make sure that our number is one higher
EXISTING_NUMBER=$( find . -maxdepth 1 -name "${TITLE}-*\.${EXT}" \
| sed -n "s|\./${TITLE}-\([0-9]\+\)\.${EXT}|\1|p" \
| sort -g | tail -n 1 )
NUMBER=$((${EXISTING_NUMBER:-0} + 1))
# SCANNING ##################################################################
# Scan an image, but only if the ${INPUTS} array is empty
if [ -z "${INPUT:-}" ] ; then
INPUT="${TMP}/scan.pnm"
echo "Starting up scanner..." >&2
scanimage \
--progress \
--resolution ${DPI} \
-l 2 -t 2 -x 213 -y 295 \
--format pnm > "${INPUT}" \
&& (echo "Scan finished." >&2) \
|| (echo "Scan failed. Exiting..." >&2 ; exit 1)
elif [ ! -f "${INPUT}" ] ; then
echo "File \"${INPUT}\" does not exist." >&2 ; exit 1
fi
# IMAGE PROCESSING ###########################################################
if [ "${SCAN_TYPE}" == "raw" ]; then
for INPUT in "${INPUTS[@]}"; do
OUTPUT="${TITLE}-${NUMBER}.${EXT}"
convert "${INPUT}" -quality ${QUALITY}% "${OUTPUT}"
echo "Wrote \"${OUTPUT}\"..." >&2
done
elif [ "${SCAN_TYPE}" == "document" ]; then
echo "Compressing image..." >&2
OUTPUT="${TITLE}-${NUMBER}.${EXT}"
convert \
-strip \
-colorspace gray \
-sigmoidal-contrast 10,50% \
-resize 1000x \
-posterize 8 \
"${INPUT}" PNG8:"${OUTPUT}"
optipng "${OUTPUT}"
echo "Wrote \"${OUTPUT}\"..." >&2
elif [ "${SCAN_TYPE}" == "photo" ]; then
echo "Separating, cropping, de-rotating and compressing images..." >&2
LMASK="${TMP}/local-mask.mpc"
GMASK="${TMP}/global-mask.mpc"
# Make a mask such that the background of the scan is transparent and
# everything else is plain red
convert "${INPUT}" -fuzz ${FUZZ}% \
-fill none -draw "matte 0,0 floodfill" \
-fill red +opaque none \
"${GMASK}"
# Now we try to find each individual photo by looping over a grid
IFS=' ' read -r WIDTH HEIGHT Xd Yd <<< $( convert ${GMASK} -format \
"%w %h %[fx:trunc(w/${GRID})] %[fx:trunc(h/${GRID})]" info: )
for((X=${Xd};X<${WIDTH};X=${X}+${Xd})); do
for((Y=${Yd};Y<${HEIGHT};Y=${Y}+${Yd})); do
# If the color at this position is not transparent, we found a picture!
COLOR=$(convert "${GMASK}" -format "%[fx:u.p{${X},${Y}}==none]" info:)
if [ ${COLOR} -eq 0 ]; then
# Create local mask by keeping only the current region opaque
convert ${GMASK} -fuzz ${FUZZ}% \
-fill white -draw "color ${X},${Y} floodfill" \
+transparent white \
"${LMASK}"
# Compose the local mask with input, then trim, de-rotate & write
OUTPUT="${TITLE}-${NUMBER}.${EXT}"
convert "${INPUT}" -alpha on \
${LMASK} -compose Dst_In -composite \
-background white -alpha remove \
-trim -deskew 40% -trim +repage \
-quality ${QUALITY}% \
"${OUTPUT}"
echo "Wrote \"${OUTPUT}\"..." >&2
# Remove current region from global mask so it won't be refound
convert "${GMASK}" -fuzz ${FUZZ}% \
-fill none -draw "matte ${X},${Y} floodfill" "${GMASK}"
NUMBER=$((${NUMBER}+1))
fi
done
done
else
echo "Unknown scan type ${SCAN_TYPE}." >&2
exit 1
fi