Context Navigation

source: mainline/uspace/srv/fs/udf/udf_osta.c@ ac31040

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since ac31040 was 5c702a8, checked in by Martin Decky <martin@…>, 13 years ago
dead and unfinished code elimination
Property mode set to `100644`
File size: 8.6 KB

Line
1	/*
2	* Copyright (c) 2012 Julia Medvedeva
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/*
30	* OSTA compliant Unicode compression, uncompression routines,
31	* file name translation routine for OS/2, Windows 95, Windows NT,
32	* Macintosh and UNIX.
33	* Copyright 1995 Micro Design International, Inc.
34	* Written by Jason M. Rinn.
35	* Micro Design International gives permission for the free use of the
36	* following source code.
37	*/
38
39	/** @addtogroup fs
40	* @{
41	*/
42	/**
43	* @file udf_osta.c
44	* @brief OSTA compliant functions.
45	*/
46
47	#include <malloc.h>
48	#include <str.h>
49	#include <macros.h>
50	#include <errno.h>
51	#include "udf_osta.h"
52	#include "udf_cksum.h"
53
54	/** Calculate length of UTF-16 string
55	*
56	* FIXME: This is wrong! UTF-16 is not a fixed-width encoding,
57	* it is a variable-width encoding (mind the surrogate
58	* pairs).
59	*
60	*/
61	static size_t utf16_length(uint16_t *string) {
62	size_t len = 0;
63
64	while (*string++ != 0)
65	len++;
66
67	return len;
68	}
69
70	/** Illegal UNIX characters are NULL and slash.
71	*
72	*/
73	static bool legal_check(uint16_t ch)
74	{
75	if ((ch == 0x0000) \|\| (ch == 0x002F))
76	return false;
77
78	return true;
79	}
80
81	/** Convert OSTA CS0 compressed Unicode name to Unicode.
82	*
83	* The Unicode output will be in the byte order that the local compiler
84	* uses for 16-bit values.
85	*
86	* NOTE: This routine only performs error checking on the comp_id.
87	* It is up to the user to ensure that the Unicode buffer is large
88	* enough, and that the compressed Unicode name is correct.
89	*
90	* @param[in] number_of_bytes Number of bytes read from media
91	* @param[in] udf_compressed Bytes read from media
92	* @param[out] unicode Uncompressed unicode characters
93	* @param[in] unicode_max_len Size of output array
94	*
95	* @return Number of Unicode characters which were uncompressed.
96	*
97	*/
98	static size_t udf_uncompress_unicode(size_t number_of_bytes,
99	uint8_t udf_compressed, uint16_t unicode, size_t unicode_max_len)
100	{
101	/* Use udf_compressed to store current byte being read. */
102	uint8_t comp_id = udf_compressed[0];
103
104	/* First check for valid compID. */
105	if ((comp_id != 8) && (comp_id != 16))
106	return 0;
107
108	size_t unicode_idx = 0;
109	size_t byte_idx = 1;
110
111	/* Loop through all the bytes. */
112	while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
113	if (comp_id == 16) {
114	/*
115	* Move the first byte to the high bits of the
116	* Unicode char.
117	*/
118	unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
119	} else
120	unicode[unicode_idx] = 0;
121
122	if (byte_idx < number_of_bytes) {
123	/* Then the next byte to the low bits. */
124	unicode[unicode_idx] \|= udf_compressed[byte_idx++];
125	}
126
127	unicode_idx++;
128	}
129
130	return unicode_idx;
131	}
132
133	/** Translate a long file name
134	*
135	* Translate a long file name to one using a MAXLEN and an illegal char set
136	* in accord with the OSTA requirements. Assumes the name has already been
137	* translated to Unicode.
138	*
139	* @param[out] new_name Translated name. Must be of length MAXLEN
140	* @param[in] udf_name Name from UDF volume
141	* @param[in] udf_len Length of UDF Name
142	*
143	* @return Number of Unicode characters in translated name.
144	*
145	*/
146	size_t udf_translate_name(uint16_t new_name, uint16_t udf_name,
147	size_t udf_len)
148	{
149	bool needs_crc = false;
150	bool has_ext = false;
151	size_t ext_idx = 0;
152	size_t new_idx = 0;
153	size_t new_ext_idx = 0;
154
155	for (size_t idx = 0; idx < udf_len; idx++) {
156	uint16_t current = udf_name[idx];
157
158	if ((!legal_check(current)) \|\| (!ascii_check(current))) {
159	needs_crc = true;
160
161	/*
162	* Replace Illegal and non-displayable chars with
163	* underscore.
164	*/
165	current = ILLEGAL_CHAR_MARK;
166
167	/*
168	* Skip any other illegal or non-displayable
169	* characters.
170	*/
171	while ((idx + 1 < udf_len) &&
172	(!legal_check(udf_name[idx + 1]) \|\|
173	(!ascii_check(udf_name[idx + 1]))))
174	idx++;
175	}
176
177	/* Record position of extension, if one is found. */
178	if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
179	if (udf_len == idx + 1) {
180	/* A trailing period is NOT an extension. */
181	has_ext = false;
182	} else {
183	has_ext = true;
184	ext_idx = idx;
185	new_ext_idx = new_idx;
186	}
187	}
188
189	if (new_idx < MAXLEN)
190	new_name[new_idx++] = current;
191	else
192	needs_crc = true;
193	}
194
195	if (needs_crc) {
196	uint16_t ext[EXT_SIZE];
197	size_t local_ext_idx = 0;
198
199	if (has_ext) {
200	size_t max_filename_len;
201
202	/* Translate extension, and store it in ext. */
203	for (size_t idx = 0; (idx < EXT_SIZE) &&
204	(ext_idx + idx + 1 < udf_len); idx++) {
205	uint16_t current = udf_name[ext_idx + idx + 1];
206
207	if ((!legal_check(current)) \|\| (!ascii_check(current))) {
208	needs_crc = true;
209
210	/*
211	* Replace Illegal and non-displayable
212	* chars with underscore.
213	*/
214	current = ILLEGAL_CHAR_MARK;
215
216	/*
217	* Skip any other illegal or
218	* non-displayable characters.
219	*/
220	while ((idx + 1 < EXT_SIZE) &&
221	((!legal_check(udf_name[ext_idx + idx + 2])) \|\|
222	(!ascii_check(udf_name[ext_idx + idx + 2]))))
223	idx++;
224	}
225
226	ext[local_ext_idx++] = current;
227	}
228
229	/*
230	* Truncate filename to leave room for extension and
231	* CRC.
232	*/
233	max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
234	if (new_idx > max_filename_len)
235	new_idx = max_filename_len;
236	else
237	new_idx = new_ext_idx;
238	} else if (new_idx > MAXLEN - 5) {
239	/* If no extension, make sure to leave room for CRC. */
240	new_idx = MAXLEN - 5;
241	}
242
243	/* Add mark for CRC. */
244	new_name[new_idx++] = CRC_MARK;
245
246	/* Calculate CRC from original filename. */
247	uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);
248
249	/* Convert 16-bits of CRC to hex characters. */
250	const char hex_char[] = "0123456789ABCDEF";
251
252	new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
253	new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
254	new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
255	new_name[new_idx++] = hex_char[(value_crc & 0x000f)];
256
257	/* Place a translated extension at end, if found. */
258	if (has_ext) {
259	new_name[new_idx++] = PERIOD;
260
261	for (size_t idx = 0; idx < local_ext_idx; idx++)
262	new_name[new_idx++] = ext[idx];
263	}
264	}
265
266	return new_idx;
267	}
268
269	/** Decode from dchar to utf8
270	*
271	* @param result Returned value - utf8 string
272	* @param result_len Length of output string
273	* @param id Input string
274	* @param len Length of input string
275	* @param chsp Decode method
276	*
277	*/
278	void udf_to_unix_name(char result, size_t result_len, char id, size_t len,
279	udf_charspec_t *chsp)
280	{
281	const char *osta_id = "OSTA Compressed Unicode";
282	size_t ucode_chars, nice_uchars;
283
284	uint16_t raw_name = malloc(MAX_BUF sizeof(uint16_t));
285	uint16_t unix_name = malloc(MAX_BUF sizeof(uint16_t));
286
287	// FIXME: Check for malloc returning NULL
288
289	bool is_osta_typ0 = (chsp->type == 0) &&
290	(str_cmp((char *) chsp->info, osta_id) == 0);
291
292	if (is_osta_typ0) {
293	*raw_name = 0;
294	*unix_name = 0;
295
296	ucode_chars =
297	udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
298	ucode_chars = min(ucode_chars, utf16_length(raw_name));
299	nice_uchars =
300	udf_translate_name(unix_name, raw_name, ucode_chars);
301
302	/* Output UTF-8 */
303	unix_name[nice_uchars] = 0;
304	utf16_to_str(result, result_len, unix_name);
305	} else {
306	/* Assume 8 bit char length byte Latin-1 */
307	str_ncpy(result, result_len, (char *) (id + 1),
308	str_size((char *) (id + 1)));
309	}
310
311	free(raw_name);
312	free(unix_name);
313	}
314
315	/**
316	* @}
317	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: