word2stresses

#!/bin/bash

# Given a word, print out the stress pattern as . and -.

# . is no stress
# - is primary stress
# ~ is secondary stress

# If a word has multiple stress patterns, then all of them will be
# shown, one per line.

# For example
#
#     ./word2stresses rocard
#     ROCARD -.
#     ROCARD -~
#     ROCARD ~-
#     ROCARD .-


# BUGS:

# * What is the right output for words with secondary stress, like
#   INQUISITION IH2 N K W AH0 Z IH1 SH AH0 N? Currently using ~.

# * Note that duplicate stresses are not removed unless adjacent.
#   (Only easy fix is to sort by pattern, but I believe CMU dict is
#   sorted such that the most common usage is at the top. If so, it
#   should remain that way.)
#
#     $ ./word2stresses whitening
#     WHITENING -..
#     WHITENING -.
#     WHITENING -..
#     WHITENING -.


# The full CMU dictionary is always used for looking up words.
# Ignore -f, if given. That's for stress2words..
if [[ "$1" == "-f" || "$1" == "--full" ]]; then shift; fi

word="$1"

egrep -iw "^$word(\([0-9]\))? " cmudict-0.7b  \
    | sed -r 's/\([0-9]\)//;
      	      s/ [[:alpha:]]+//g;
              y/012/.-~/; ' \
    | uniq