source: mainline/uspace/srv/fs/udf/udf_osta.c@ ac31040

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since ac31040 was 5c702a8, checked in by Martin Decky <martin@…>, 13 years ago

dead and unfinished code elimination

  • Property mode set to 100644
File size: 8.6 KB
Line 
1/*
2 * Copyright (c) 2012 Julia Medvedeva
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * OSTA compliant Unicode compression, uncompression routines,
31 * file name translation routine for OS/2, Windows 95, Windows NT,
32 * Macintosh and UNIX.
33 * Copyright 1995 Micro Design International, Inc.
34 * Written by Jason M. Rinn.
35 * Micro Design International gives permission for the free use of the
36 * following source code.
37 */
38
39/** @addtogroup fs
40 * @{
41 */
42/**
43 * @file udf_osta.c
44 * @brief OSTA compliant functions.
45 */
46
47#include <malloc.h>
48#include <str.h>
49#include <macros.h>
50#include <errno.h>
51#include "udf_osta.h"
52#include "udf_cksum.h"
53
54/** Calculate length of UTF-16 string
55 *
56 * FIXME: This is wrong! UTF-16 is not a fixed-width encoding,
57 * it is a variable-width encoding (mind the surrogate
58 * pairs).
59 *
60 */
61static size_t utf16_length(uint16_t *string) {
62 size_t len = 0;
63
64 while (*string++ != 0)
65 len++;
66
67 return len;
68}
69
70/** Illegal UNIX characters are NULL and slash.
71 *
72 */
73static bool legal_check(uint16_t ch)
74{
75 if ((ch == 0x0000) || (ch == 0x002F))
76 return false;
77
78 return true;
79}
80
81/** Convert OSTA CS0 compressed Unicode name to Unicode.
82 *
83 * The Unicode output will be in the byte order that the local compiler
84 * uses for 16-bit values.
85 *
86 * NOTE: This routine only performs error checking on the comp_id.
87 * It is up to the user to ensure that the Unicode buffer is large
88 * enough, and that the compressed Unicode name is correct.
89 *
90 * @param[in] number_of_bytes Number of bytes read from media
91 * @param[in] udf_compressed Bytes read from media
92 * @param[out] unicode Uncompressed unicode characters
93 * @param[in] unicode_max_len Size of output array
94 *
95 * @return Number of Unicode characters which were uncompressed.
96 *
97 */
98static size_t udf_uncompress_unicode(size_t number_of_bytes,
99 uint8_t *udf_compressed, uint16_t *unicode, size_t unicode_max_len)
100{
101 /* Use udf_compressed to store current byte being read. */
102 uint8_t comp_id = udf_compressed[0];
103
104 /* First check for valid compID. */
105 if ((comp_id != 8) && (comp_id != 16))
106 return 0;
107
108 size_t unicode_idx = 0;
109 size_t byte_idx = 1;
110
111 /* Loop through all the bytes. */
112 while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
113 if (comp_id == 16) {
114 /*
115 * Move the first byte to the high bits of the
116 * Unicode char.
117 */
118 unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
119 } else
120 unicode[unicode_idx] = 0;
121
122 if (byte_idx < number_of_bytes) {
123 /* Then the next byte to the low bits. */
124 unicode[unicode_idx] |= udf_compressed[byte_idx++];
125 }
126
127 unicode_idx++;
128 }
129
130 return unicode_idx;
131}
132
133/** Translate a long file name
134 *
135 * Translate a long file name to one using a MAXLEN and an illegal char set
136 * in accord with the OSTA requirements. Assumes the name has already been
137 * translated to Unicode.
138 *
139 * @param[out] new_name Translated name. Must be of length MAXLEN
140 * @param[in] udf_name Name from UDF volume
141 * @param[in] udf_len Length of UDF Name
142 *
143 * @return Number of Unicode characters in translated name.
144 *
145 */
146size_t udf_translate_name(uint16_t *new_name, uint16_t *udf_name,
147 size_t udf_len)
148{
149 bool needs_crc = false;
150 bool has_ext = false;
151 size_t ext_idx = 0;
152 size_t new_idx = 0;
153 size_t new_ext_idx = 0;
154
155 for (size_t idx = 0; idx < udf_len; idx++) {
156 uint16_t current = udf_name[idx];
157
158 if ((!legal_check(current)) || (!ascii_check(current))) {
159 needs_crc = true;
160
161 /*
162 * Replace Illegal and non-displayable chars with
163 * underscore.
164 */
165 current = ILLEGAL_CHAR_MARK;
166
167 /*
168 * Skip any other illegal or non-displayable
169 * characters.
170 */
171 while ((idx + 1 < udf_len) &&
172 (!legal_check(udf_name[idx + 1]) ||
173 (!ascii_check(udf_name[idx + 1]))))
174 idx++;
175 }
176
177 /* Record position of extension, if one is found. */
178 if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
179 if (udf_len == idx + 1) {
180 /* A trailing period is NOT an extension. */
181 has_ext = false;
182 } else {
183 has_ext = true;
184 ext_idx = idx;
185 new_ext_idx = new_idx;
186 }
187 }
188
189 if (new_idx < MAXLEN)
190 new_name[new_idx++] = current;
191 else
192 needs_crc = true;
193 }
194
195 if (needs_crc) {
196 uint16_t ext[EXT_SIZE];
197 size_t local_ext_idx = 0;
198
199 if (has_ext) {
200 size_t max_filename_len;
201
202 /* Translate extension, and store it in ext. */
203 for (size_t idx = 0; (idx < EXT_SIZE) &&
204 (ext_idx + idx + 1 < udf_len); idx++) {
205 uint16_t current = udf_name[ext_idx + idx + 1];
206
207 if ((!legal_check(current)) || (!ascii_check(current))) {
208 needs_crc = true;
209
210 /*
211 * Replace Illegal and non-displayable
212 * chars with underscore.
213 */
214 current = ILLEGAL_CHAR_MARK;
215
216 /*
217 * Skip any other illegal or
218 * non-displayable characters.
219 */
220 while ((idx + 1 < EXT_SIZE) &&
221 ((!legal_check(udf_name[ext_idx + idx + 2])) ||
222 (!ascii_check(udf_name[ext_idx + idx + 2]))))
223 idx++;
224 }
225
226 ext[local_ext_idx++] = current;
227 }
228
229 /*
230 * Truncate filename to leave room for extension and
231 * CRC.
232 */
233 max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
234 if (new_idx > max_filename_len)
235 new_idx = max_filename_len;
236 else
237 new_idx = new_ext_idx;
238 } else if (new_idx > MAXLEN - 5) {
239 /* If no extension, make sure to leave room for CRC. */
240 new_idx = MAXLEN - 5;
241 }
242
243 /* Add mark for CRC. */
244 new_name[new_idx++] = CRC_MARK;
245
246 /* Calculate CRC from original filename. */
247 uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);
248
249 /* Convert 16-bits of CRC to hex characters. */
250 const char hex_char[] = "0123456789ABCDEF";
251
252 new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
253 new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
254 new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
255 new_name[new_idx++] = hex_char[(value_crc & 0x000f)];
256
257 /* Place a translated extension at end, if found. */
258 if (has_ext) {
259 new_name[new_idx++] = PERIOD;
260
261 for (size_t idx = 0; idx < local_ext_idx; idx++)
262 new_name[new_idx++] = ext[idx];
263 }
264 }
265
266 return new_idx;
267}
268
269/** Decode from dchar to utf8
270 *
271 * @param result Returned value - utf8 string
272 * @param result_len Length of output string
273 * @param id Input string
274 * @param len Length of input string
275 * @param chsp Decode method
276 *
277 */
278void udf_to_unix_name(char *result, size_t result_len, char *id, size_t len,
279 udf_charspec_t *chsp)
280{
281 const char *osta_id = "OSTA Compressed Unicode";
282 size_t ucode_chars, nice_uchars;
283
284 uint16_t *raw_name = malloc(MAX_BUF * sizeof(uint16_t));
285 uint16_t *unix_name = malloc(MAX_BUF * sizeof(uint16_t));
286
287 // FIXME: Check for malloc returning NULL
288
289 bool is_osta_typ0 = (chsp->type == 0) &&
290 (str_cmp((char *) chsp->info, osta_id) == 0);
291
292 if (is_osta_typ0) {
293 *raw_name = 0;
294 *unix_name = 0;
295
296 ucode_chars =
297 udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
298 ucode_chars = min(ucode_chars, utf16_length(raw_name));
299 nice_uchars =
300 udf_translate_name(unix_name, raw_name, ucode_chars);
301
302 /* Output UTF-8 */
303 unix_name[nice_uchars] = 0;
304 utf16_to_str(result, result_len, unix_name);
305 } else {
306 /* Assume 8 bit char length byte Latin-1 */
307 str_ncpy(result, result_len, (char *) (id + 1),
308 str_size((char *) (id + 1)));
309 }
310
311 free(raw_name);
312 free(unix_name);
313}
314
315/**
316 * @}
317 */
Note: See TracBrowser for help on using the repository browser.