Context Navigation

udf_osta.c@ 39916d6

Visit:

Last change on this file since 39916d6 was d7f7a4a, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 3 years ago

Replace some license headers with SPDX identifier

Headers are replaced using tools/transorm-copyright.sh only
when it can be matched verbatim with the license header used
throughout most of the codebase.

Property mode set to 100644

File size: 6.9 KB

Line
1	/*
2	* SPDX-FileCopyrightText: 2012 Julia Medvedeva
3	* Copyright 1995 Micro Design International, Inc.
4	*
5	* SPDX-License-Identifier: BSD-3-Clause
6	*/
7
8	/*
9	* OSTA compliant Unicode compression, uncompression routines,
10	* file name translation routine for OS/2, Windows 95, Windows NT,
11	* Macintosh and UNIX.
12	* Written by Jason M. Rinn.
13	* Micro Design International gives permission for the free use of the
14	* following source code.
15	*/
16
17	/** @addtogroup udf
18	* @{
19	*/
20	/**
21	* @file udf_osta.c
22	* @brief OSTA compliant functions.
23	*/
24
25	#include <stdlib.h>
26	#include <str.h>
27	#include <macros.h>
28	#include <errno.h>
29	#include "udf_osta.h"
30	#include "udf_cksum.h"
31
32	/** Illegal UNIX characters are NULL and slash.
33	*
34	*/
35	static bool legal_check(uint16_t ch)
36	{
37	if ((ch == 0x0000) \|\| (ch == 0x002F))
38	return false;
39
40	return true;
41	}
42
43	/** Convert OSTA CS0 compressed Unicode name to Unicode.
44	*
45	* The Unicode output will be in the byte order that the local compiler
46	* uses for 16-bit values.
47	*
48	* NOTE: This routine only performs error checking on the comp_id.
49	* It is up to the user to ensure that the Unicode buffer is large
50	* enough, and that the compressed Unicode name is correct.
51	*
52	* @param[in] number_of_bytes Number of bytes read from media
53	* @param[in] udf_compressed Bytes read from media
54	* @param[out] unicode Uncompressed unicode characters
55	* @param[in] unicode_max_len Size of output array
56	*
57	* @return Number of Unicode characters which were uncompressed.
58	*
59	*/
60	static size_t udf_uncompress_unicode(size_t number_of_bytes,
61	uint8_t udf_compressed, uint16_t unicode, size_t unicode_max_len)
62	{
63	/* Use udf_compressed to store current byte being read. */
64	uint8_t comp_id = udf_compressed[0];
65
66	/* First check for valid compID. */
67	if ((comp_id != 8) && (comp_id != 16))
68	return 0;
69
70	size_t unicode_idx = 0;
71	size_t byte_idx = 1;
72
73	/* Loop through all the bytes. */
74	while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
75	if (comp_id == 16) {
76	/*
77	* Move the first byte to the high bits of the
78	* Unicode char.
79	*/
80	unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
81	} else
82	unicode[unicode_idx] = 0;
83
84	if (byte_idx < number_of_bytes) {
85	/* Then the next byte to the low bits. */
86	unicode[unicode_idx] \|= udf_compressed[byte_idx++];
87	}
88
89	unicode_idx++;
90	}
91
92	return unicode_idx;
93	}
94
95	/** Translate a long file name
96	*
97	* Translate a long file name to one using a MAXLEN and an illegal char set
98	* in accord with the OSTA requirements. Assumes the name has already been
99	* translated to Unicode.
100	*
101	* @param[out] new_name Translated name. Must be of length MAXLEN
102	* @param[in] udf_name Name from UDF volume
103	* @param[in] udf_len Length of UDF Name
104	*
105	* @return Number of Unicode characters in translated name.
106	*
107	*/
108	size_t udf_translate_name(uint16_t new_name, uint16_t udf_name,
109	size_t udf_len)
110	{
111	bool needs_crc = false;
112	bool has_ext = false;
113	size_t ext_idx = 0;
114	size_t new_idx = 0;
115	size_t new_ext_idx = 0;
116
117	for (size_t idx = 0; idx < udf_len; idx++) {
118	uint16_t current = udf_name[idx];
119
120	if ((!legal_check(current)) \|\| (!ascii_check(current))) {
121	needs_crc = true;
122
123	/*
124	* Replace Illegal and non-displayable chars with
125	* underscore.
126	*/
127	current = ILLEGAL_CHAR_MARK;
128
129	/*
130	* Skip any other illegal or non-displayable
131	* characters.
132	*/
133	while ((idx + 1 < udf_len) &&
134	(!legal_check(udf_name[idx + 1]) \|\|
135	(!ascii_check(udf_name[idx + 1]))))
136	idx++;
137	}
138
139	/* Record position of extension, if one is found. */
140	if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
141	if (udf_len == idx + 1) {
142	/* A trailing period is NOT an extension. */
143	has_ext = false;
144	} else {
145	has_ext = true;
146	ext_idx = idx;
147	new_ext_idx = new_idx;
148	}
149	}
150
151	if (new_idx < MAXLEN)
152	new_name[new_idx++] = current;
153	else
154	needs_crc = true;
155	}
156
157	if (needs_crc) {
158	uint16_t ext[EXT_SIZE];
159	size_t local_ext_idx = 0;
160
161	if (has_ext) {
162	size_t max_filename_len;
163
164	/* Translate extension, and store it in ext. */
165	for (size_t idx = 0; (idx < EXT_SIZE) &&
166	(ext_idx + idx + 1 < udf_len); idx++) {
167	uint16_t current = udf_name[ext_idx + idx + 1];
168
169	if ((!legal_check(current)) \|\| (!ascii_check(current))) {
170	needs_crc = true;
171
172	/*
173	* Replace Illegal and non-displayable
174	* chars with underscore.
175	*/
176	current = ILLEGAL_CHAR_MARK;
177
178	/*
179	* Skip any other illegal or
180	* non-displayable characters.
181	*/
182	while ((idx + 1 < EXT_SIZE) &&
183	((!legal_check(udf_name[ext_idx + idx + 2])) \|\|
184	(!ascii_check(udf_name[ext_idx + idx + 2]))))
185	idx++;
186	}
187
188	ext[local_ext_idx++] = current;
189	}
190
191	/*
192	* Truncate filename to leave room for extension and
193	* CRC.
194	*/
195	max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
196	if (new_idx > max_filename_len)
197	new_idx = max_filename_len;
198	else
199	new_idx = new_ext_idx;
200	} else if (new_idx > MAXLEN - 5) {
201	/* If no extension, make sure to leave room for CRC. */
202	new_idx = MAXLEN - 5;
203	}
204
205	/* Add mark for CRC. */
206	new_name[new_idx++] = CRC_MARK;
207
208	/* Calculate CRC from original filename. */
209	uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);
210
211	/* Convert 16-bits of CRC to hex characters. */
212	const char hex_char[] = "0123456789ABCDEF";
213
214	new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
215	new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
216	new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
217	new_name[new_idx++] = hex_char[(value_crc & 0x000f)];
218
219	/* Place a translated extension at end, if found. */
220	if (has_ext) {
221	new_name[new_idx++] = PERIOD;
222
223	for (size_t idx = 0; idx < local_ext_idx; idx++)
224	new_name[new_idx++] = ext[idx];
225	}
226	}
227
228	return new_idx;
229	}
230
231	/** Decode from dchar to utf8
232	*
233	* @param result Returned value - utf8 string
234	* @param result_len Length of output string
235	* @param id Input string
236	* @param len Length of input string
237	* @param chsp Decode method
238	*
239	*/
240	void udf_to_unix_name(char result, size_t result_len, char id, size_t len,
241	udf_charspec_t *chsp)
242	{
243	const char *osta_id = "OSTA Compressed Unicode";
244	size_t ucode_chars, nice_uchars;
245
246	uint16_t raw_name = malloc(MAX_BUF sizeof(uint16_t));
247	uint16_t unix_name = malloc(MAX_BUF sizeof(uint16_t));
248
249	// FIXME: Check for malloc returning NULL
250
251	bool is_osta_typ0 = (chsp->type == 0) &&
252	(str_cmp((char *) chsp->info, osta_id) == 0);
253
254	if (is_osta_typ0) {
255	*raw_name = 0;
256	*unix_name = 0;
257
258	ucode_chars =
259	udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
260	ucode_chars = min(ucode_chars, utf16_wsize(raw_name));
261	nice_uchars =
262	udf_translate_name(unix_name, raw_name, ucode_chars);
263
264	/* Output UTF-8 */
265	unix_name[nice_uchars] = 0;
266	utf16_to_str(result, result_len, unix_name);
267	} else {
268	/* Assume 8 bit char length byte Latin-1 */
269	str_ncpy(result, result_len, (char *) (id + 1),
270	str_size((char *) (id + 1)));
271	}
272
273	free(raw_name);
274	free(unix_name);
275	}
276
277	/**
278	* @}
279	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/srv/fs/udf/udf_osta.c@ 39916d6

Download in other formats: