# Insert glottal stop before initial vowels gst-ins1: ^([aeiyou@]) ? $1 # after negation prefix too! (conflicts with foreign words?! see v-ins2) gst-ins2: ^n e ([aeiyou@]) n e ? $1 # Deletion of initial o (okolo) o-del: ^o (.) $1 # single consonants get schwa # does not work for voiced ones: transc makes them unvoiced :-( schwa-ins: ^([ptkbdgfvmnNsSzZjJhcx]\?(_[sSzZ])?)$ $1 @ # Insert prothetic v before initial o (short only? no o_u?) # okno, oko, opice v-ins1: ^o( |$) v o$1 # after negation prefix too (conflict with the Latin prefix neo-! overgenerating!) v-ins2: ^n e o n e v o # Delete initial j before consonants # jsme, jméno, jde jsmd-del: ^j ([smd]) $1 # Půjde > pude pud-del: p u: j d p u d # DNI/TNI - assimilation/palatalization of d dni-pal: d J i J\ J i dJe-pal: d J e J\ J e tni-pal: t J i c J i tJe-pal: t J e c J e # ts/tS/dz/dZ - build often affricates # největší tS-repl: t S t_S # odshora ts-repl: t s t_s dz-repl: d z d_z dZ-repl: d Z d_Z # ohrádce, otce tts-del: t t_s t_s ttS-del: t t_S t_S # tt-deletion (someone forgot this one in the transc rules?) # odtamtud tt-del: t t t # tst - deletion at the end of words # patnáct tst-del: t_s t$ t_s # říkat > řikat rikat-short: P\ i: k a P\ i k a rikej-short: P\ i: k e j P\ i k e j # dělat - l-deletion (more general in intervocalic position?) # udě(l)áme, dě(l)at, nedě(l)ej? # (sometimes overgenerating?: viděla, děla) dela-del: J\ e l a J\ e a delaa-del: J\ e l a: J\ a: # l-deletion in -ila (maybe also -ilo, -ili/y? or general l-deletion?) # koupi(l)a, nakresli(l)a ila-del: i l a$ i a # hledat - l-deletion (maybe more general deletion?) # h(l)edám, h(l)edáš hledat-del: ^h\ l e d a h\ e d a # malinký - l-deletion malinky-del: m a l i N k m a i N k # tady - d-deletion (maybe more general in intervocalic position?) # tady(hletěm) > tai(hletěm) tady-del: ^t a d i t a i # Alternative variants of number 4 (often in compounds!) # čtyři > štyři / čtyry / štyry 4-var1: t_S t i P\ t_S t i r 4-var2: t_S t i P\ S t i r 4-var3: t_S t i P\ S t i P\ # l-deletion in sl (dangerous?) # myslim > mysim, nakres(l)ila, pos(l)ední sl-del: ([aeiyou]:?) s l ([aeiyou]:?) $1 s $2 # l-deletion in čl (dangerous?) # č(l)ověk # only proper pronunciation in my recordings! tSl-del: t_S l t_S # Verbal endings in 3p.pl. (dangerous! overgenerating! e.g. "vykolejí") # dělají > dělaj aji-del: a j i:$ a j # sázejí > sázej eji-del: e j i:$ e j # Verbal endings shortening (dangerous! overgenerating!) # musím > musim im-short: i: m$ i m # jdeme > jdem eme-del: e m e$ e m # Verbal past tense -l deletion (test and improve!?) # (řek(l), rozhod(l), pomoh(l), všim(l)) kl-del: k l$ k dl-del: d l$ t hl-del: h\ l$ x ml-del: m l$ m # zh -deletion (maybe "sh" =[sx] too?) # rozhodl, shora zh-del: z h\ z # vl-deletion #(vlasy, vlastně) vl-del: ^v l v vl-del: ^v l l # vS/vZ-deletion #(všude, všiml) vS-del: ^f S S vZ-del: ^v Z Z # KTN -deletion # perfek(t)ní ktn-del: k t n k n ktJ-del: k t J k J # STN -deletion # vlas(t)ně stn-del: s t n s n stJ-del: s t J s J # zvláš(t)ní Stn-del: S t n S n StJ-del: S t J S J # RDL -deletion # štokr(d)le rdl-del: r d l r l # ZVL -deletion # z(v)láštní zvl-del: z v l z l # tStv -deletions # čtvrtina - štvrtina - čvrtina tStv-del1: t_S t v S t v tStv-del2: t_S t v t_S v # hn - deletion (maybe overgenerating?) # pře(h)nutej hn-del: h\ n n # Stv - deletion # I have no example in my recordings, but I think this one is common too: # štvrt > štrt Stvr-del: S t v r S t r # vS - deletion # všechen, však vS-del: ^v S S # Delete initial j before e # useful in continuous speech rather than in single words, no gst preceding! # (does not include (j)eště, (j)es(tl)i because those can also start by gst!) # "to je jedno" > "to e jedno" > "to e edno" je-del: ^j e e # Delete t after initial k (alternative to kt-del2!) # který > kerý kt-del1: ^k t k # Delete initial k before consonants (který, kdo) (alternative to kt-del1!) # kterej > terej kt-del2: ^k t t # kde > de kd-del: ^g d d # Deletion of v / j / h in intervocalic position # (the only tendency mentioned by Czech linguists so far?) vjh-del: ([aeiyou]:?) (v|j|h|h\) ([aeiyou]:?) $1 $3 # nějak > ňák (keep BEHIND vjh-del!) nejak-del: J e a k J a: k # timhle - shortening in compounds too! tim-short: ^c i: m c i m # compound pronouns with -hle- (find better rules!) # (keep BEHIND vjh-del to avoid too crazy reductions like "takovejhle">"kee"?) # tenhle- > tehle- ten-del: ^([tc]) e n h\ l e $1 e h\ l e # -hle- > -hlenc- (take care of recursivity! the rules can apply to itself too) hlenc-ins: ^([tc]) (.*) h\ l e( [^n]|$) $1 $2 h\ l e n t_S$3 # -hlenc- > -hlens- hlens-del: h\ l e n t_S h\ l e n s # -hlenc- > -hlen- hlens-del: h\ l e n s h\ l e n # -hle- > -dle- (for k+d we have to apply assimilation > gd (takdle)) hle-repl1: ^([tc])(.*)? ([^k]) h\ l e $1$2 $3 d l e hle-repl2: ^([tc])(.*) k h\ l e $1 $2 g d l e # -hle- > -le- hle-del1: ^([tc]) (.*) h\ l e $1 $2 l e # -hle- > -e- (general l-deletion?) hle-del2: ^([tc]) (.*) h\ l e $1 $2 e # jak-deletion (dangerous!?) (keep BEHIND -hle-rules!) # jak > k; jakýho > kýho jak-del1: ^j a k k jak-del2: ^j a k j k # tak-deletion (dangerous!?) (keep BEHIND -hle-rules!) # tak > k; takovej > kovej tak-del1: ^t a k k tak-de2: ^t a k t k # čč > č # actually the only example in the recording is really pronounced with double č! tStS-del: t_S t_S t_S # Rection of o to schwa # forget it, we do not need to go as far now: # keep only deletions here, sound models should be already trained for reduced sounds #o-red: o @ # after deletions: short+long vowel > long vowel # seems to be overgenerating..., only applies to special words... # dělám > děám > ďám, takový > takoý > taký? #vow-del: ([^aeiyou:]) [aeiyou] ([aeiyou]): $1 $2: # oe > e (?) - only applies to special words? # takovej > takoej > takej #oe-del: o e e