A Praat script to assist manual coding of variables

## This script is used in coding for assimilated endings, but it could be modified to any other task that invoves
## a human evaluation of a sound extract.
##
## The script searches for words ending in certain strings of letters in adjectives and verbs 
## (i.e. words that are coded as of these Parts of Speech). If the string is found, the corresponding
## sound is extracted and played for the coder. When the coder has decided on the proper code, he or she
## enters it, and it is written to the TextGrid. The coder can listen to the extract any number of times,
## and cancel the task if in doubt.
## There is no "undo".
##
## A list of excluded words can be given in the txt file "ExcludedWords.txt"
##
## The script is written December 2011 by jthoegersen@hum.ku.dk and is GNU/GPL
##


# The coder initials are used to generate unique output tier names and output files from each coder

form Coding of assimilations
    comment Search criteria
    word Search_string
    comment Coder initials (in capitals)
    word Coder
endform



filename$ = selected$ ("TextGrid")


# A list of words that are to be omitted from the analysis can be generated as a txt file.

Read Strings from raw text file... C:\tmp\ExcludedWords.txt

Open long sound file... c:\tmp\'filename$'.wav


select TextGrid 'filename$'
tiers = Get number of tiers


# The script checks whether the output tier already exists (as the last tier) to avoid working on files that have already been coded once

lastTier$ = Get tier name... tiers

if lastTier$ = "Assimilation_'coder$'"
    beginPause ("Error!")
    comment ("The output tier already exists. Delete tier?")
    delete = endPause ("Yes", 0)

    if delete = 1
        Remove tier... tiers
    endif
    
endif


# The output tier is (re)generated

Insert interval tier... tiers+1 Assimilation_'coder$'

tiers = Get number of tiers


# "Intervals" are the intervals that are being analysed. Change the tier number if they are not in tier 1 

intervals = Get number of intervals... 1


# "Trials" is a counter of the number of times the coder listens before making a choice (i.e. a certainty check)

trials = 1


# Here is the search procedure proper.

for interval to intervals

    select TextGrid 'filename$'
    label$ = Get label of interval... 1 interval


# Look in the exclusion list for words that are omitted

    exclude = 0

    select Strings ExcludedWords
    strings = Get number of strings

    for string to strings

        string$ = Get string... 'string'

        if label$ = "'string$'"
            exclude = 1
        endif
    endfor


    select TextGrid 'filename$'

    if exclude = 0

        if endsWith (label$, "search_string$")


# "stem" is generated as the entire word minus the ending (the RegEx removes the search string only at the end of the word ($)).

            stem$ = replace_regex$ ("'label$'", "'search_string$'$", "", 1)

            call assimilations

        endif
    endif
endfor

###

# Cleanup after all intervals have been checked

select LongSound 'filename$'
Remove
select Strings ExcludedWords
Remove
select TextGrid 'filename$'
Save as text file... c:\tmp\output\'filename$'_'search_string$'_'coder$'.TextGrid


#################################

procedure assimilations


# Part-of-speech is included as an additional search criteria

pos_label$ = Get label of interval... 5 interval

if index_regex("'pos_label$'", "(ADJ|V_PRES|V_PAST|V_PARTC_PAST)") > 0


# Timecodes for extraction of sound (including a 25 ms buffer) and for the placement of the output interval in the output tier

    start = Get starting point... 1 interval
    end = Get end point... 1 interval
    endCut = 'end'+0.025
    startCut = 'start'-0.025


# The boundaries of the interval of the output tier in which the code is written, is generated. There is a small work-around
# because Praat will not set a boundary at the very beginning of a file (where there is already a default boundary). If one has
# search strings that may appear in the very last interval of the tier, something similar must be written to avoid writing "end"
# boundaries in the last interval.

    stringIntervals = Get number of intervals... tiers

    if stringIntervals > 1

        tEndBoundary = Get end point... tiers stringIntervals-1

        if tEndBoundary <> start
            Insert boundary... tiers start
        endif

    elsif stringIntervals = 1

        Insert boundary... tiers start

    endif

    Insert boundary... tiers end
    
    select LongSound 'filename$'

    Extract part... startCut endCut no
    Play

######

# A pause prompt asks for coder's input. In this case, the coder has to choose between two different orthogrphical forms
# distinguished by the ending, and pronounced as but coders could equally well be asked to hear "assimilated" vs. "non-assimilated", "stop"
## vs. "fricative" etc.
# The "error" button is used if the script proposes a candidate word that shouldn't be analysed. (The script editor should search the
# output tier for "N/A"s and potentially add the words to the "ExcludedWords" list).

    beginPause ("What do you hear?")
    comment ("Is the word 'stem$'et or 'stem$'ede?")
    endelse = endPause("-et", "-ede", "Listen again", "Error!", 3)

    if endelse <> 3

        select TextGrid 'filename$'

        tttInterval = Get interval at time... tiers start


# The output label, of course, has to be adjusted to the individual task. Potentially one could just use "1", "2" and "3".
# The number of listens is also written to the code, and is immediately reset to 1.

        if endelse = 1
            Set interval text... tiers tttInterval \dh'tab$''trials'
            trials = 1
        elsif endelse = 2
            Set interval text... tiers tttInterval \dh\sw'tab$''trials'
            trials = 1
        elsif endelse = 4
            Set interval text... tiers tttInterval N/A'tab$''trials'
            trials = 1
        endif


# If the coder wishes to listen again, 1 is added to the number of listens, boundaries are deleted (they will be written again immediately,
# and the loop is sent back to the same interval once again.
            
    elsif endelse = 3
                
        select TextGrid 'filename$'

        Remove boundary at time... tiers start
        Remove boundary at time... tiers end

        interval = interval - 1

        trials = trials + 1

    endif

    select Sound 'filename$'
    Remove

endif

endproc