1use alloc::format;
2use alloc::string::String;
3use alloc::vec::Vec;
4use core::ops::Range;
5
6pub const BASIC_LATIN: Range<u32> = 0x20..0x7F;
7pub const LATIN_1_SUPPLEMENT: Range<u32> = 0xA0..0xFF;
8pub const LATIN_EXTENDED_A: Range<u32> = 0x100..0x17F;
9pub const LATIN_EXTENDED_B: Range<u32> = 0x180..0x24F;
10pub const LATIN_EXTENDED_ADDITIONAL: Range<u32> = 0x1E00..0x1EFF;
11pub const CYRILLIC: Range<u32> = 0x400..0x4FF;
12pub const CYRILLIC_SUPPLEMENT: Range<u32> = 0x500..0x52F;
13pub const GREEK: Range<u32> = 0x370..0x3FF;
14pub const GREEK_EXTENDED: Range<u32> = 0x1F00..0x1FFF;
15pub const ARABIC: Range<u32> = 0x600..0x6FF;
16pub const ARABIC_SUPPLEMENT: Range<u32> = 0x750..0x77F;
17pub const HEBREW: Range<u32> = 0x590..0x5FF;
18pub const CJK_UNIFIED_IDEOGRAPHS: Range<u32> = 0x4E00..0x9FFF;
19pub const HIRAGANA: Range<u32> = 0x3040..0x309F;
20pub const KATAKANA: Range<u32> = 0x30A0..0x30FF;
21pub const HANGUL_SYLLABLES: Range<u32> = 0xAC00..0xD7AF;
22pub const DEVANAGARI: Range<u32> = 0x900..0x97F;
23pub const THAI: Range<u32> = 0xE00..0xE7F;
24pub const VIETNAMESE_EXTENSIONS: Range<u32> = 0x1EA0..0x1EFF;
25
26pub fn get_locale_ranges(locale: &str) -> Option<&'static [Range<u32>]> {
27 let ranges: &[Range<u32>] = match locale {
28 "en" => &[BASIC_LATIN],
30
31 "fr" => &[
33 BASIC_LATIN,
34 LATIN_1_SUPPLEMENT,
35 LATIN_EXTENDED_A,
36 LATIN_EXTENDED_B,
37 ],
38
39 "de" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A],
41
42 "es" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT],
44
45 "it" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT],
47
48 "pt" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A],
50
51 "nl" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT],
53
54 "sv" | "no" | "da" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT],
56
57 "pl" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A],
59
60 "cs" | "sk" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A],
62
63 "hu" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A],
65
66 "ro" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A],
68
69 "tr" => &[BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A],
71
72 "ru" | "uk" | "be" => &[BASIC_LATIN, CYRILLIC, CYRILLIC_SUPPLEMENT],
74
75 "el" => &[BASIC_LATIN, GREEK, GREEK_EXTENDED],
77
78 "ar" => &[BASIC_LATIN, ARABIC, ARABIC_SUPPLEMENT],
80
81 "he" => &[BASIC_LATIN, HEBREW],
83
84 "ja" => &[BASIC_LATIN, HIRAGANA, KATAKANA, CJK_UNIFIED_IDEOGRAPHS],
86
87 "zh" | "zh-CN" | "zh-TW" => &[BASIC_LATIN, CJK_UNIFIED_IDEOGRAPHS],
89
90 "ko" => &[BASIC_LATIN, HANGUL_SYLLABLES, CJK_UNIFIED_IDEOGRAPHS],
92
93 "hi" => &[BASIC_LATIN, DEVANAGARI],
95
96 "th" => &[BASIC_LATIN, THAI],
98
99 "vi" => &[
101 BASIC_LATIN,
102 LATIN_1_SUPPLEMENT,
103 LATIN_EXTENDED_A,
104 LATIN_EXTENDED_ADDITIONAL,
105 VIETNAMESE_EXTENSIONS,
106 ],
107 _ => return None,
108 };
109
110 Some(ranges)
111}
112
113pub fn merge_contiguous_ranges(ranges: Vec<Range<u32>>) -> Vec<Range<u32>> {
114 if ranges.is_empty() {
115 return ranges;
116 }
117
118 let mut sorted_ranges = ranges;
119 sorted_ranges.sort_by_key(|r| r.start);
120
121 let mut merged_ranges = Vec::new();
122 let mut current_range = sorted_ranges[0].clone();
123
124 for range in sorted_ranges.into_iter().skip(1) {
125 if range.start <= current_range.end {
126 current_range.end = current_range.end.max(range.end);
127 } else {
128 merged_ranges.push(current_range);
129 current_range = range;
130 }
131 }
132
133 merged_ranges.push(current_range);
134
135 merged_ranges
136}
137
138pub fn format_range(range: &Range<u32>) -> String {
139 if range.start + 1 == range.end {
140 format!("{}", range.start)
141 } else {
142 format!("{}-{}", range.start, range.end - 1)
143 }
144}
145
146pub fn format_ranges<'a>(ranges: impl IntoIterator<Item = &'a Range<u32>>) -> String {
147 ranges
148 .into_iter()
149 .map(format_range)
150 .collect::<Vec<String>>()
151 .join(",")
152}
153
154#[cfg(test)]
155mod tests {
156 use super::*;
157
158 use alloc::vec;
159
160 #[test]
161 fn test_get_locale_ranges_english() {
162 let ranges = get_locale_ranges("en").unwrap();
163 assert_eq!(ranges.len(), 1);
164 assert_eq!(ranges[0], BASIC_LATIN);
165 }
166
167 #[test]
168 fn test_get_locale_ranges_french() {
169 let ranges = get_locale_ranges("fr").unwrap();
170 assert_eq!(ranges.len(), 4);
171 assert!(ranges.contains(&BASIC_LATIN));
172 assert!(ranges.contains(&LATIN_1_SUPPLEMENT));
173 }
174
175 #[test]
176 fn test_get_locale_ranges_russian() {
177 let ranges = get_locale_ranges("ru").unwrap();
178 assert_eq!(ranges.len(), 3);
179 assert!(ranges.contains(&CYRILLIC));
180 }
181
182 #[test]
183 fn test_get_locale_ranges_japanese() {
184 let ranges = get_locale_ranges("ja").unwrap();
185 assert_eq!(ranges.len(), 4);
186 assert!(ranges.contains(&HIRAGANA));
187 assert!(ranges.contains(&KATAKANA));
188 assert!(ranges.contains(&CJK_UNIFIED_IDEOGRAPHS));
189 }
190
191 #[test]
192 fn test_get_locale_ranges_invalid() {
193 assert!(get_locale_ranges("invalid").is_none());
194 assert!(get_locale_ranges("xx").is_none());
195 }
196
197 #[test]
198 fn test_merge_contiguous_ranges_empty() {
199 let ranges = vec![];
200 let merged = merge_contiguous_ranges(ranges);
201 assert_eq!(merged.len(), 0);
202 }
203
204 #[test]
205 fn test_merge_contiguous_ranges_single() {
206 let ranges = vec![0x20..0x7F];
207 let merged = merge_contiguous_ranges(ranges);
208 assert_eq!(merged.len(), 1);
209 assert_eq!(merged[0], 0x20..0x7F);
210 }
211
212 #[test]
213 fn test_merge_contiguous_ranges_adjacent() {
214 let ranges = vec![0x20..0x7F, 0x7F..0xFF];
215 let merged = merge_contiguous_ranges(ranges);
216 assert_eq!(merged.len(), 1);
217 assert_eq!(merged[0], 0x20..0xFF);
218 }
219
220 #[test]
221 fn test_merge_contiguous_ranges_overlapping() {
222 let ranges = vec![0x20..0x80, 0x50..0xFF];
223 let merged = merge_contiguous_ranges(ranges);
224 assert_eq!(merged.len(), 1);
225 assert_eq!(merged[0], 0x20..0xFF);
226 }
227
228 #[test]
229 fn test_merge_contiguous_ranges_non_overlapping() {
230 let ranges = vec![0x20..0x7F, 0x100..0x17F];
231 let merged = merge_contiguous_ranges(ranges);
232 assert_eq!(merged.len(), 2);
233 assert_eq!(merged[0], 0x20..0x7F);
234 assert_eq!(merged[1], 0x100..0x17F);
235 }
236
237 #[test]
238 fn test_merge_contiguous_ranges_unsorted() {
239 let ranges = vec![0x100..0x17F, 0x20..0x7F, 0x7F..0xFF];
240 let merged = merge_contiguous_ranges(ranges);
241 assert_eq!(merged.len(), 2);
242 assert_eq!(merged[0], 0x20..0xFF);
243 assert_eq!(merged[1], 0x100..0x17F);
244 }
245
246 #[test]
247 fn test_merge_contiguous_ranges_multiple_groups() {
248 let ranges = vec![0x20..0x7F, 0x7F..0xFF, 0x200..0x2FF, 0x2FF..0x3FF];
249 let merged = merge_contiguous_ranges(ranges);
250 assert_eq!(merged.len(), 2);
251 assert_eq!(merged[0], 0x20..0xFF);
252 assert_eq!(merged[1], 0x200..0x3FF);
253 }
254
255 #[test]
256 fn test_unicode_range_boundaries() {
257 assert_eq!(BASIC_LATIN.start, 0x20);
258 assert_eq!(BASIC_LATIN.end, 0x7F);
259 assert_eq!(CJK_UNIFIED_IDEOGRAPHS.start, 0x4E00);
260 assert_eq!(CJK_UNIFIED_IDEOGRAPHS.end, 0x9FFF);
261 }
262
263 #[test]
264 fn test_get_locale_ranges_aliases() {
265 assert!(get_locale_ranges("zh").is_some());
267 assert!(get_locale_ranges("zh-CN").is_some());
268 assert!(get_locale_ranges("zh-TW").is_some());
269
270 assert!(get_locale_ranges("sv").is_some());
272 assert!(get_locale_ranges("no").is_some());
273 assert!(get_locale_ranges("da").is_some());
274 }
275}