[header]
language = CAH

[data]
#
# Siri Additions
#
# <rdar://problem/16536631> [zh_HK][Sports][TTS] Incorrect pronounciation of '14' in the dialog 2014世界盃會喺14年6月12日中午12點十四分至到14年7月13日中午12點十四分 喺巴西舉行。
/(?<!\d)(\d)(\d年\d{1,2}月\d{1,2}日)/ --> "$1 $2"
/(\d{4})年(\d{1,2})月(\d{1,2})日下午(\d{1,2})時(\d{2})分/ --> "$1 年$2月$3日/+#/+下午 $4時$5分"
/(\d{4})年(\d{1,2})月(\d{1,2})日下午(\d{1,2})\:(\d{2})/ --> "$1 年$2月$3日/+#/+下午 $4:$5"

# <rdar://problem/11519421> [clock] Mispronunciation of @{fn#currentTime}
/\b([0-1]?[0-9]|2[0-4])\:([0-5][0-9])\b/ --> " $1 dim2 $2 fan1 "

# <rdar://problem/34827886> [zh_HK][TTS] Siri pronounces S. T. instead of Saint in "St. Petersburg"
/\bst\.?(?= (james|john|petersburg|louis)\b)/i --> "saint"

#
# Tokenization issues
#
# <rdar://problem/21762510> [zh-HK][Pronunciation][Portfolio][Premium][Female] Mispronunciation of Year when asking about movies
/(?:過|过)(\d{4})年/ --> "過' $1年"

# <rdar://problem/17914355> [zh_HK][Weather][TTS] Incorrect pronounciation of ‘間’
/(期間|睇睇)(中華人民)/ --> "$1' $2"

# <rdar://problem/22754528> [zh-HK][Pronunciation][Portfolio][Premium][Female] Mispronunciation of 分(minutes) when set up the timer
/((?:一|二|三|四|五|六|七|八|九|十|百|千)+分)\s*((?:一|二|三|四|五|六|七|八|九|十|百|千)+秒)/ --> "$1' $2"

# <rdar://problem/16208581> ☂[zh_HK][Clock, Alarm and Timer][TTS] Incorrect speech for '2分30秒'
/(?<![0-9.])2\s*分(半?鐘|半?钟|\s*[0-9.]+\s*秒)/ --> "兩 fan1 $1"
/分(之(?:間|前|後))/ --> " fan1 $1"
/(?<![0-9.])2\s*秒/ --> "兩秒"
/(?<!([0-9.]|(?<!個)月))2\s*日/ --> "兩日"

# <rdar://problem/33615862> Cantonese Siri reads 英里 as 英裡, should be 英理
/(?<![0-9.])2\s*(公里|英里)/ --> "兩$1"

# <rdar://problem/22794645> [zh-HK][Pronunciation][Portfolio][Premium][Female] Mispronunciation of standing (名) when asking for the club standing
/((?:零|一|二|兩|三|四|五|六|七|八|九|十|百)+)名/ --> "$1 明"
/([0-9]+)名/ --> "$1 明"

# <rdar://problem/21197997> [zh-HK][Pronunciation][Portfolio][Premium][Female] Mispronunciation of interjection "呢"(Ne)
/呢(間|位|張|條|隻|則|架|盞|粒|次|份|首|封|部)/ --> " nei1 $1 "
/呢\s*((?:零|一|二|兩|三|四|五|六|七|八|九|十|百)+)\s*(個|間|位|張|條|隻|則|架|盞|粒|次|份|首|封|部)/ --> " nei1 $1 $2 "
/呢\s*([0-9]+)\s*(個|間|位|張|條|隻|則|架|盞|粒|次|份|首|封|部)/ --> " nei1 $1 $2 "

# <rdar://problem/15999842> 11D151: VoiceOver does not speak Chinese number in Notes
# Split numbers if it's too long
/((?:一|二|三|四|五|六|七|八|九|十){12})(?=一|二|三|四|五|六|七|八|九|十)/ --> "$1 ' "

# <rdar://problem/28628532> [yue_CN][Pronunciation][Portfolio][Compact][Female] Missing the pronunciation of ℃.
/(?:攝氏\s*|摄氏\s*)?([0-9.]+)\s*(?:℃|°C)/i --> "攝氏 $1 度"
/(?:華氏\s*|华氏\s*)?([0-9.]+)\s*(?:℉|°F)/i --> "華氏 $1 度"

# <rdar://problem/14269701> [VoiceOver] Innsbruck11A408: Pronunciation of NT$, HK$, S$
# <rdar://problem/33465618> Voiceover cannot read "CN￥1.11" in Warsaw/Oslo payment using CNY
/(?:HK|HKD)[$＄]\s*([0-9]+(?:,[0-9]{3}(?![0-9]))*(?:\.[0-9]+)?)/i --> "港幣 $1 mncmonetaryunit "
/(?:NT|TWD)[$＄]\s*([0-9]+(?:,[0-9]{3}(?![0-9]))*(?:\.[0-9]+)?)/i --> "新台幣 $1 mncmonetaryunit "
/(?:US|USD)[$＄]\s*([0-9]+(?:,[0-9]{3}(?![0-9]))*(?:\.[0-9]+)?)/i --> " $1 美元 "
/(?:S|SGD)[$＄]\s*([0-9]+(?:,[0-9]{3}(?![0-9]))*(?:\.[0-9]+)?)/i --> "新加坡幣 $1 mncmonetaryunit "
/(?:CN|CNY|RMB)[$＄¥￥]\s*([0-9]+(?:,[0-9]{3}(?![0-9]))*(?:\.[0-9]+)?)/i --> "人民幣 $1 mncmonetaryunit "
/(?:JP|JPY)[¥￥]\s*([0-9]+(?:,[0-9]{3}(?![0-9]))*(?:\.[0-9]+)?)/i --> " $1 日圓 "
/[$＄¥￥]\s*([0-9]+(?:,[0-9]{3}(?![0-9]))*(?:\.[0-9]+)?)/i --> "$1 mncmonetaryunit "

# <rdar://problem/29381181> [Siri Guide][Fuji][zh-HK][Pronunciation][Custom][Premium][Female] Multiple TTS issues - Mispronunciation of the OS version info
# ' is used to prevent reading out each digit after OS e.g. "iOS 一零"
/OS (\d{1,2})\.(\d{1,2})\.(\d)/ --> "OS' $1 點 $2 點 $3"
/OS (\d{1,2})\.(\d{1,2})/ --> "OS' $1 點 $2"
/OS (\d{1,2})/ --> "OS' $1"

# <rdar://problem/33871725> [zh_HK][Lobo][Siri Guide][TTS] Mispronunciation of the OS version info.
/\b(\d{1,2})([A-Z])(\d{1,4})\b/ --> " $1-$2-$3 "

# <rdar://problem/33870491> I heard Maps said 二英铝 for 2 miles
# Basically this is to fix freeway numbers like US-101 CA-85 I-280
# 3-digit numbers are spelled-out:              101 = 一零一
# 1 or 2-digit numbers are spoken as cardinal:   85 = 八十五
/\b([A-Z])([A-Z])[- ]?(\d)(\d)(\d)\b/ --> " $1-$2' $3'$4'$5 "
/\b([A-Z])([A-Z])[- ]?(\d{1,2})\b/ --> " $1-$2' $3 "
/\bI-(\d)(\d)(\d)\b/ --> " aye $1'$2'$3 "
/\bI-(\d{1,2})\b/ --> " aye $1 "

# <rdar://problem/17463250> [VoiceOver] HK/TA: Okemo12A312: Number of mails spoken is in incorrect format
# Remove brackets
/[(\[（]([0-9]+)[)\]）]/ --> " $1 "

# <rdar://problem/34182154> [WeChat TF][zh_HK][Synapse][TTS][15A-10.48] Siri will spell english letter instead of speak the word if there is some specific fullwidth punctuation before a english word
# For /(，|：|；)(\w+)/, (\w+) is spelt out
# Added a space as a workaround
/(，|：|；)(\w+)/ --> "$1 $2"

# <rdar://problem/31782216> [zh-HK][Pronunciation][Portfolio][Premium][Female] Mispronunciation of ‘.’ in the ticker symbol(e.g.0700.HK）
/([0-9]{1,6})\.HK/i --> "$1 點 HK"

# <rdar://problem/32468018> [zh_HK] [Due 06/04] TTS support for new terms
/(\d{1,2}(?:\.\d)?)["\x{201D}]\s*(ipad|imac|mac|macbook|retina)\b/i --> "$1 吋 $2"

/(?<=beats solo|powerbeats)(²|2\b)/i     --> " two "
/(?<=beats solo|powerbeats)(³|3\b)/i     --> " three "

/(?<=\bwatch[\s\x{00A0}]series[\s\x{00A0}])(1\b)/i      --> " one "
/(?<=\bwatch[\s\x{00A0}]series[\s\x{00A0}])(2\b)/i      --> " two "
/(?<=\bwatch[\s\x{00A0}]series[\s\x{00A0}])(3\b)/i      --> " three "
/(?<=tv[\s\x{00A0}])4[\s-\x{00A0}]?k\b/i               --> " four-k " # says "Four-K" when followed by TV

/\b7[\s-]?eleven\b/i    --> " seven eleven "
/\bapple[\s\x{00A0}]+tv\b/i    --> "apple-tv"
/\bapple[\s\x{00A0}]+watch\b/i --> "apple-watch"
/\bbig bang\b/i         --> " bicbang "
/\bc[\s-]?3po\b/i       --> " see three pee oh "
/group/i                --> "groop" # to match googlegroups, grouping, etc.
/\biphone[\s\x{00A0}]*3g\b/i   --> " iPhone three g " # match non-breaking space
/\biphone[\s\x{00A0}]*3gs\b/i  --> " iPhone three g s "
/\biphone[\s\x{00A0}]*4\b/i    --> " iPhone four "
/\biphone[\s\x{00A0}]*4s\b/i   --> " iPhone four s "
/\biphone[\s\x{00A0}]*5\b/i    --> " iPhone five "
/\biphone[\s\x{00A0}]*5s\b/i   --> " iPhone five s "
/\biphone[\s\x{00A0}]*5c\b/i   --> " iPhone five c "
/\biphone[\s\x{00A0}]*6\b/i    --> " iPhone six "
/\biphone[\s\x{00A0}]*6s\b/i   --> " iPhone six s "
/\biphone[\s\x{00A0}]*7\b/i    --> " iPhone seven "
/\biphone[\s\x{00A0}]*8\b/i    --> " iPhone eight "
/\biphone[\s\x{00A0}]*se\b/i   --> " iPhone s ee "
/\biphone[\s\x{00A0}]*x\b/i    --> " iPhone ten "
/\bles mis[eé]rables?/i --> " lay miserable "
/\b[mM]ac\s*OS\b/       --> " mahk O S "
/\b[nN]g\b/             --> " m6 "
/\bNike\+/i             --> " Nyki plus "
/\bOS[\s\x{00A0}]*X\b/         --> " O S ten "
/\bPM\s*2\.5\b/         --> " pee em 2.5 "
/\bPM\s*10\b/           --> " pee em 10 "
/\bsci[\s-]?fi\b/i      --> " sy fy "
/\b[tT]v\s*OS\b/        --> " t v O S "
/\b[wW]atch\s*OS\b/     --> " watch O S "
/\bwi-?fi\b/i           --> " wyfye "

# <rdar://problem/29349417> [zh_HK] [LocTask] [TTS] Add “” in all language rulesets
# Also refer to: https://gitlab.sd.apple.com/heywood/voiceservicescustomdictionaries/merge_requests/643
/(?= ?(music|pay|pencil|tv|watch)\b)/i --> " apple "
// --> "蘋果"

# <rdar://problem/15868452> [VoiceOver] HK Sochi11D139: Unclear pronunciaton for 'email@outlook.com'
/@/ --> " at "

#
# Non-standard characters
#
# Homographs
/(?<!學|聯|夜)校(?=(高|低)(音|d|啲|咗|左|過|你))/i --> "較"
/(?<!學|聯|夜)校(?=(大|細)(聲|d|啲|咗|左|過|你))/i --> "較"
/(?<!學|聯|夜)校(?=(到|番|返)(去|最|咁|個|光|暗|高|低|大|細|中等|中間|適中|自動|原本|準|鬧鐘|音量|計時器|鈴聲|溫度|温度|\d))/ --> "較"
/(?<!學|聯|夜)校(?=去(今日|聽日|凌晨|清晨|早上|上午|中午|正午|下晝|下午|黃昏|晚上|夜晚|午夜|\d))/ --> "較"
/(?<!學|聯|夜)校(?=唔(到|番|返))/ --> "較"
/(?<!學|聯|夜)校(?=光|暗|凍|熱|早|晏|好咗|咗|幾點)/ --> "較"
/(?<!學|聯|夜)校(?=低電量模式|震機)/ --> "較"
/(?<!學|聯|夜)校(?=.{0,10}(鬧鐘|音量|計時器|鈴聲|溫度|温度))/ --> "較"
/(?<!學|聯|夜)校(?=(多|邊|嗰)(一)?個)/ --> "較"
/(?<=鬧鐘|音量|計時器|鈴聲|溫度|温度|調|幫我|同我|點|點樣)校/ --> "較"

/(?<!每)當(?=(咗|左)?(我|你|佢)係)/ --> " dong3 "

# One-to-many Simplified Chinese characters -- need lookbehind and lookahead to tell them apart
/(?<=约|霸|侵)占|占(?=地|据|有|领|地|中|\d)/ --> "佔"
/(?<=茶|炕)几|几(?=淨)/ --> "機"
/里(?=面)/ --> "裏"

# Normalised to Traditional Chinese to apply the fixes in user-dict
/㖞/ --> "喎"
/㶶/ --> "燶"
/吿/ --> "告"
/𥔵/ --> "磁"
/乐/ --> "樂"
/撑/ --> "撐"
/画/ --> "畫"
/紧/ --> "緊"
/电/ --> "電"
/话/ --> "話"
/请/ --> "請"
/庆/ --> "慶"
/赢/ --> "贏"
/经/ --> "經"
/几/ --> "幾"
/𡃁/ --> " leng1 "
/哂/ --> "晒"
/嗶/ --> "咇"
/沪/ --> "滬"
/尔/ --> "爾"
/駅/ --> "驛"
/⋯/ --> "，"

# Special surnames
/\x1b\\tn=name\\單|單(?=立文)/ --> " sin6 "
/\x1b\\tn=name\\區|區(?=瑞強|文詩|錦新|宗傑|俊濤|瑞華|淑貞|劍雄|詠芷|諾軒|永權)/ --> " ngau1 "
/費(?=禕)/ --> " bei3 "
/單仲偕/ --> " sin6zung6gaai1 "
