Context Navigation

source: mainline/uspace/srv/fs/udf/udf_osta.c@ 48e3190

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 48e3190 was 48e3190, checked in by Martin Decky <martin@…>, 13 years ago
cherrypick UDF file system driver implementation (originally by Julia Medvedeva) with coding style improvements and minor changes
Property mode set to `100644`
File size: 10.1 KB

Line
1	/*
2	* Copyright (c) 2012 Julia Medvedeva
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/*
30	* OSTA compliant Unicode compression, uncompression routines,
31	* file name translation routine for OS/2, Windows 95, Windows NT,
32	* Macintosh and UNIX.
33	* Copyright 1995 Micro Design International, Inc.
34	* Written by Jason M. Rinn.
35	* Micro Design International gives permission for the free use of the
36	* following source code.
37	*/
38
39	/** @addtogroup fs
40	* @{
41	*/
42	/**
43	* @file udf_osta.c
44	* @brief OSTA compliant functions.
45	*/
46
47	#include <malloc.h>
48	#include <str.h>
49	#include <macros.h>
50	#include <errno.h>
51	#include "udf_osta.h"
52	#include "udf_cksum.h"
53
54	/** Calculate length of UTF-16 string
55	*
56	* FIXME: This is wrong! UTF-16 is not a fixed-width encoding,
57	* it is a variable-width encoding (mind the surrogate
58	* pairs).
59	*
60	*/
61	static size_t utf16_length(uint16_t *string) {
62	size_t len = 0;
63
64	while (*string++ != 0)
65	len++;
66
67	return len;
68	}
69
70	/** Illegal UNIX characters are NULL and slash.
71	*
72	*/
73	static bool legal_check(uint16_t ch)
74	{
75	if ((ch == 0x0000) \|\| (ch == 0x002F))
76	return false;
77
78	return true;
79	}
80
81	/** Convert OSTA CS0 compressed Unicode name to Unicode.
82	*
83	* The Unicode output will be in the byte order that the local compiler
84	* uses for 16-bit values.
85	*
86	* NOTE: This routine only performs error checking on the comp_id.
87	* It is up to the user to ensure that the Unicode buffer is large
88	* enough, and that the compressed Unicode name is correct.
89	*
90	* @param[in] number_of_bytes Number of bytes read from media
91	* @param[in] udf_compressed Bytes read from media
92	* @param[out] unicode Uncompressed unicode characters
93	* @param[in] unicode_max_len Size of output array
94	*
95	* @return Number of Unicode characters which were uncompressed.
96	*
97	*/
98	size_t udf_uncompress_unicode(size_t number_of_bytes, uint8_t *udf_compressed,
99	uint16_t *unicode, size_t unicode_max_len)
100	{
101	/* Use udf_compressed to store current byte being read. */
102	uint8_t comp_id = udf_compressed[0];
103
104	/* First check for valid compID. */
105	if ((comp_id != 8) && (comp_id != 16))
106	return 0;
107
108	size_t unicode_idx = 0;
109	size_t byte_idx = 1;
110
111	/* Loop through all the bytes. */
112	while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
113	if (comp_id == 16) {
114	/*
115	* Move the first byte to the high bits of the
116	* Unicode char.
117	*/
118	unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
119	} else
120	unicode[unicode_idx] = 0;
121
122	if (byte_idx < number_of_bytes) {
123	/* Then the next byte to the low bits. */
124	unicode[unicode_idx] \|= udf_compressed[byte_idx++];
125	}
126
127	unicode_idx++;
128	}
129
130	return unicode_idx;
131	}
132
133	/** Convert Unicode wide characters to OSTA CS0 compressed Unicode string.
134	*
135	* The Unicode MUST be in the byte order of the compiler in order
136	* to obtain correct results. Returns an error if the compression ID
137	* is invalid.
138	*
139	* NOTE: This routine assumes the implementation already knows,
140	* by the local environment, how many bits are appropriate and therefore
141	* does no checking to test if the input characters fit into that number
142	* of bits or not.
143	*
144	* @param[in] number_of_chars Number of unicode characters.
145	* @param[in] comp_id Compression ID to be used.
146	* @param[in] unicode Unicode characters to compress.
147	* @param[out] udf_compressed Compressed string, as bytes.
148	*
149	* @return The total number of bytes in the compressed OSTA CS0 string,
150	* including the compression ID.
151	*
152	*/
153	size_t udf_compress_unicode(size_t number_of_chars, uint8_t comp_id,
154	uint16_t unicode, uint8_t udf_compressed)
155	{
156	if ((comp_id != 8) && (comp_id != 16))
157	return 0;
158
159	/* Place compression code in first byte. */
160	udf_compressed[0] = comp_id;
161
162	size_t byte_idx = 1;
163	size_t unicode_idx = 0;
164
165	while (unicode_idx < number_of_chars) {
166	if (comp_id == 16) {
167	/*
168	* First, place the high bits of the char
169	* into the byte stream.
170	*/
171	udf_compressed[byte_idx++] =
172	(unicode[unicode_idx] & 0xFF00) >> 8;
173	}
174
175	/* Then place the low bits into the stream. */
176	udf_compressed[byte_idx++] = unicode[unicode_idx] & 0x00FF;
177	unicode_idx++;
178	}
179
180	return byte_idx;
181	}
182
183	/** Translate a long file name
184	*
185	* Translate a long file name to one using a MAXLEN and an illegal char set
186	* in accord with the OSTA requirements. Assumes the name has already been
187	* translated to Unicode.
188	*
189	* @param[out] new_name Translated name. Must be of length MAXLEN
190	* @param[in] udf_name Name from UDF volume
191	* @param[in] udf_len Length of UDF Name
192	*
193	* @return Number of Unicode characters in translated name.
194	*
195	*/
196	size_t udf_translate_name(uint16_t new_name, uint16_t udf_name,
197	size_t udf_len)
198	{
199	bool needs_crc = false;
200	bool has_ext = false;
201	size_t ext_idx = 0;
202	size_t new_idx = 0;
203	size_t new_ext_idx = 0;
204
205	for (size_t idx = 0; idx < udf_len; idx++) {
206	uint16_t current = udf_name[idx];
207
208	if ((!legal_check(current)) \|\| (!ascii_check(current))) {
209	needs_crc = true;
210
211	/*
212	* Replace Illegal and non-displayable chars with
213	* underscore.
214	*/
215	current = ILLEGAL_CHAR_MARK;
216
217	/*
218	* Skip any other illegal or non-displayable
219	* characters.
220	*/
221	while ((idx + 1 < udf_len) &&
222	(!legal_check(udf_name[idx + 1]) \|\|
223	(!ascii_check(udf_name[idx + 1]))))
224	idx++;
225	}
226
227	/* Record position of extension, if one is found. */
228	if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
229	if (udf_len == idx + 1) {
230	/* A trailing period is NOT an extension. */
231	has_ext = false;
232	} else {
233	has_ext = true;
234	ext_idx = idx;
235	new_ext_idx = new_idx;
236	}
237	}
238
239	if (new_idx < MAXLEN)
240	new_name[new_idx++] = current;
241	else
242	needs_crc = true;
243	}
244
245	if (needs_crc) {
246	uint16_t ext[EXT_SIZE];
247	size_t local_ext_idx = 0;
248
249	if (has_ext) {
250	size_t max_filename_len;
251
252	/* Translate extension, and store it in ext. */
253	for (size_t idx = 0; (idx < EXT_SIZE) &&
254	(ext_idx + idx + 1 < udf_len); idx++) {
255	uint16_t current = udf_name[ext_idx + idx + 1];
256
257	if ((!legal_check(current)) \|\| (!ascii_check(current))) {
258	needs_crc = true;
259
260	/*
261	* Replace Illegal and non-displayable
262	* chars with underscore.
263	*/
264	current = ILLEGAL_CHAR_MARK;
265
266	/*
267	* Skip any other illegal or
268	* non-displayable characters.
269	*/
270	while ((idx + 1 < EXT_SIZE) &&
271	((!legal_check(udf_name[ext_idx + idx + 2])) \|\|
272	(!ascii_check(udf_name[ext_idx + idx + 2]))))
273	idx++;
274	}
275
276	ext[local_ext_idx++] = current;
277	}
278
279	/*
280	* Truncate filename to leave room for extension and
281	* CRC.
282	*/
283	max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
284	if (new_idx > max_filename_len)
285	new_idx = max_filename_len;
286	else
287	new_idx = new_ext_idx;
288	} else if (new_idx > MAXLEN - 5) {
289	/* If no extension, make sure to leave room for CRC. */
290	new_idx = MAXLEN - 5;
291	}
292
293	/* Add mark for CRC. */
294	new_name[new_idx++] = CRC_MARK;
295
296	/* Calculate CRC from original filename. */
297	uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);
298
299	/* Convert 16-bits of CRC to hex characters. */
300	const char hex_char[] = "0123456789ABCDEF";
301
302	new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
303	new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
304	new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
305	new_name[new_idx++] = hex_char[(value_crc & 0x000f)];
306
307	/* Place a translated extension at end, if found. */
308	if (has_ext) {
309	new_name[new_idx++] = PERIOD;
310
311	for (size_t idx = 0; idx < local_ext_idx; idx++)
312	new_name[new_idx++] = ext[idx];
313	}
314	}
315
316	return new_idx;
317	}
318
319	/** Decode from dchar to utf8
320	*
321	* @param result Returned value - utf8 string
322	* @param result_len Length of output string
323	* @param id Input string
324	* @param len Length of input string
325	* @param chsp Decode method
326	*
327	*/
328	void udf_to_unix_name(char result, size_t result_len, char id, size_t len,
329	udf_charspec_t *chsp)
330	{
331	const char *osta_id = "OSTA Compressed Unicode";
332	size_t ucode_chars, nice_uchars;
333
334	uint16_t raw_name = malloc(MAX_BUF sizeof(uint16_t));
335	uint16_t unix_name = malloc(MAX_BUF sizeof(uint16_t));
336
337	// FIXME: Check for malloc returning NULL
338
339	bool is_osta_typ0 = (chsp->type == 0) &&
340	(str_cmp((char *) chsp->info, osta_id) == 0);
341
342	if (is_osta_typ0) {
343	*raw_name = 0;
344	*unix_name = 0;
345
346	ucode_chars =
347	udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
348	ucode_chars = min(ucode_chars, utf16_length(raw_name));
349	nice_uchars =
350	udf_translate_name(unix_name, raw_name, ucode_chars);
351
352	/* Output UTF-8 */
353	unix_name[nice_uchars] = 0;
354	utf16_to_str(result, result_len, unix_name);
355	} else {
356	/* Assume 8 bit char length byte Latin-1 */
357	str_ncpy(result, result_len, (char *) (id + 1),
358	str_size((char *) (id + 1)));
359	}
360
361	free(raw_name);
362	free(unix_name);
363	}
364
365	/**
366	* @}
367	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: