source: mainline/uspace/srv/fs/udf/udf_osta.c@ 48e3190

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 48e3190 was 48e3190, checked in by Martin Decky <martin@…>, 13 years ago

cherrypick UDF file system driver implementation (originally by Julia Medvedeva)
with coding style improvements and minor changes

  • Property mode set to 100644
File size: 10.1 KB
Line 
1/*
2 * Copyright (c) 2012 Julia Medvedeva
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * OSTA compliant Unicode compression, uncompression routines,
31 * file name translation routine for OS/2, Windows 95, Windows NT,
32 * Macintosh and UNIX.
33 * Copyright 1995 Micro Design International, Inc.
34 * Written by Jason M. Rinn.
35 * Micro Design International gives permission for the free use of the
36 * following source code.
37 */
38
39/** @addtogroup fs
40 * @{
41 */
42/**
43 * @file udf_osta.c
44 * @brief OSTA compliant functions.
45 */
46
47#include <malloc.h>
48#include <str.h>
49#include <macros.h>
50#include <errno.h>
51#include "udf_osta.h"
52#include "udf_cksum.h"
53
54/** Calculate length of UTF-16 string
55 *
56 * FIXME: This is wrong! UTF-16 is not a fixed-width encoding,
57 * it is a variable-width encoding (mind the surrogate
58 * pairs).
59 *
60 */
61static size_t utf16_length(uint16_t *string) {
62 size_t len = 0;
63
64 while (*string++ != 0)
65 len++;
66
67 return len;
68}
69
70/** Illegal UNIX characters are NULL and slash.
71 *
72 */
73static bool legal_check(uint16_t ch)
74{
75 if ((ch == 0x0000) || (ch == 0x002F))
76 return false;
77
78 return true;
79}
80
81/** Convert OSTA CS0 compressed Unicode name to Unicode.
82 *
83 * The Unicode output will be in the byte order that the local compiler
84 * uses for 16-bit values.
85 *
86 * NOTE: This routine only performs error checking on the comp_id.
87 * It is up to the user to ensure that the Unicode buffer is large
88 * enough, and that the compressed Unicode name is correct.
89 *
90 * @param[in] number_of_bytes Number of bytes read from media
91 * @param[in] udf_compressed Bytes read from media
92 * @param[out] unicode Uncompressed unicode characters
93 * @param[in] unicode_max_len Size of output array
94 *
95 * @return Number of Unicode characters which were uncompressed.
96 *
97 */
98size_t udf_uncompress_unicode(size_t number_of_bytes, uint8_t *udf_compressed,
99 uint16_t *unicode, size_t unicode_max_len)
100{
101 /* Use udf_compressed to store current byte being read. */
102 uint8_t comp_id = udf_compressed[0];
103
104 /* First check for valid compID. */
105 if ((comp_id != 8) && (comp_id != 16))
106 return 0;
107
108 size_t unicode_idx = 0;
109 size_t byte_idx = 1;
110
111 /* Loop through all the bytes. */
112 while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
113 if (comp_id == 16) {
114 /*
115 * Move the first byte to the high bits of the
116 * Unicode char.
117 */
118 unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
119 } else
120 unicode[unicode_idx] = 0;
121
122 if (byte_idx < number_of_bytes) {
123 /* Then the next byte to the low bits. */
124 unicode[unicode_idx] |= udf_compressed[byte_idx++];
125 }
126
127 unicode_idx++;
128 }
129
130 return unicode_idx;
131}
132
133/** Convert Unicode wide characters to OSTA CS0 compressed Unicode string.
134 *
135 * The Unicode MUST be in the byte order of the compiler in order
136 * to obtain correct results. Returns an error if the compression ID
137 * is invalid.
138 *
139 * NOTE: This routine assumes the implementation already knows,
140 * by the local environment, how many bits are appropriate and therefore
141 * does no checking to test if the input characters fit into that number
142 * of bits or not.
143 *
144 * @param[in] number_of_chars Number of unicode characters.
145 * @param[in] comp_id Compression ID to be used.
146 * @param[in] unicode Unicode characters to compress.
147 * @param[out] udf_compressed Compressed string, as bytes.
148 *
149 * @return The total number of bytes in the compressed OSTA CS0 string,
150 * including the compression ID.
151 *
152 */
153size_t udf_compress_unicode(size_t number_of_chars, uint8_t comp_id,
154 uint16_t *unicode, uint8_t *udf_compressed)
155{
156 if ((comp_id != 8) && (comp_id != 16))
157 return 0;
158
159 /* Place compression code in first byte. */
160 udf_compressed[0] = comp_id;
161
162 size_t byte_idx = 1;
163 size_t unicode_idx = 0;
164
165 while (unicode_idx < number_of_chars) {
166 if (comp_id == 16) {
167 /*
168 * First, place the high bits of the char
169 * into the byte stream.
170 */
171 udf_compressed[byte_idx++] =
172 (unicode[unicode_idx] & 0xFF00) >> 8;
173 }
174
175 /* Then place the low bits into the stream. */
176 udf_compressed[byte_idx++] = unicode[unicode_idx] & 0x00FF;
177 unicode_idx++;
178 }
179
180 return byte_idx;
181}
182
183/** Translate a long file name
184 *
185 * Translate a long file name to one using a MAXLEN and an illegal char set
186 * in accord with the OSTA requirements. Assumes the name has already been
187 * translated to Unicode.
188 *
189 * @param[out] new_name Translated name. Must be of length MAXLEN
190 * @param[in] udf_name Name from UDF volume
191 * @param[in] udf_len Length of UDF Name
192 *
193 * @return Number of Unicode characters in translated name.
194 *
195 */
196size_t udf_translate_name(uint16_t *new_name, uint16_t *udf_name,
197 size_t udf_len)
198{
199 bool needs_crc = false;
200 bool has_ext = false;
201 size_t ext_idx = 0;
202 size_t new_idx = 0;
203 size_t new_ext_idx = 0;
204
205 for (size_t idx = 0; idx < udf_len; idx++) {
206 uint16_t current = udf_name[idx];
207
208 if ((!legal_check(current)) || (!ascii_check(current))) {
209 needs_crc = true;
210
211 /*
212 * Replace Illegal and non-displayable chars with
213 * underscore.
214 */
215 current = ILLEGAL_CHAR_MARK;
216
217 /*
218 * Skip any other illegal or non-displayable
219 * characters.
220 */
221 while ((idx + 1 < udf_len) &&
222 (!legal_check(udf_name[idx + 1]) ||
223 (!ascii_check(udf_name[idx + 1]))))
224 idx++;
225 }
226
227 /* Record position of extension, if one is found. */
228 if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
229 if (udf_len == idx + 1) {
230 /* A trailing period is NOT an extension. */
231 has_ext = false;
232 } else {
233 has_ext = true;
234 ext_idx = idx;
235 new_ext_idx = new_idx;
236 }
237 }
238
239 if (new_idx < MAXLEN)
240 new_name[new_idx++] = current;
241 else
242 needs_crc = true;
243 }
244
245 if (needs_crc) {
246 uint16_t ext[EXT_SIZE];
247 size_t local_ext_idx = 0;
248
249 if (has_ext) {
250 size_t max_filename_len;
251
252 /* Translate extension, and store it in ext. */
253 for (size_t idx = 0; (idx < EXT_SIZE) &&
254 (ext_idx + idx + 1 < udf_len); idx++) {
255 uint16_t current = udf_name[ext_idx + idx + 1];
256
257 if ((!legal_check(current)) || (!ascii_check(current))) {
258 needs_crc = true;
259
260 /*
261 * Replace Illegal and non-displayable
262 * chars with underscore.
263 */
264 current = ILLEGAL_CHAR_MARK;
265
266 /*
267 * Skip any other illegal or
268 * non-displayable characters.
269 */
270 while ((idx + 1 < EXT_SIZE) &&
271 ((!legal_check(udf_name[ext_idx + idx + 2])) ||
272 (!ascii_check(udf_name[ext_idx + idx + 2]))))
273 idx++;
274 }
275
276 ext[local_ext_idx++] = current;
277 }
278
279 /*
280 * Truncate filename to leave room for extension and
281 * CRC.
282 */
283 max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
284 if (new_idx > max_filename_len)
285 new_idx = max_filename_len;
286 else
287 new_idx = new_ext_idx;
288 } else if (new_idx > MAXLEN - 5) {
289 /* If no extension, make sure to leave room for CRC. */
290 new_idx = MAXLEN - 5;
291 }
292
293 /* Add mark for CRC. */
294 new_name[new_idx++] = CRC_MARK;
295
296 /* Calculate CRC from original filename. */
297 uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);
298
299 /* Convert 16-bits of CRC to hex characters. */
300 const char hex_char[] = "0123456789ABCDEF";
301
302 new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
303 new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
304 new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
305 new_name[new_idx++] = hex_char[(value_crc & 0x000f)];
306
307 /* Place a translated extension at end, if found. */
308 if (has_ext) {
309 new_name[new_idx++] = PERIOD;
310
311 for (size_t idx = 0; idx < local_ext_idx; idx++)
312 new_name[new_idx++] = ext[idx];
313 }
314 }
315
316 return new_idx;
317}
318
319/** Decode from dchar to utf8
320 *
321 * @param result Returned value - utf8 string
322 * @param result_len Length of output string
323 * @param id Input string
324 * @param len Length of input string
325 * @param chsp Decode method
326 *
327 */
328void udf_to_unix_name(char *result, size_t result_len, char *id, size_t len,
329 udf_charspec_t *chsp)
330{
331 const char *osta_id = "OSTA Compressed Unicode";
332 size_t ucode_chars, nice_uchars;
333
334 uint16_t *raw_name = malloc(MAX_BUF * sizeof(uint16_t));
335 uint16_t *unix_name = malloc(MAX_BUF * sizeof(uint16_t));
336
337 // FIXME: Check for malloc returning NULL
338
339 bool is_osta_typ0 = (chsp->type == 0) &&
340 (str_cmp((char *) chsp->info, osta_id) == 0);
341
342 if (is_osta_typ0) {
343 *raw_name = 0;
344 *unix_name = 0;
345
346 ucode_chars =
347 udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
348 ucode_chars = min(ucode_chars, utf16_length(raw_name));
349 nice_uchars =
350 udf_translate_name(unix_name, raw_name, ucode_chars);
351
352 /* Output UTF-8 */
353 unix_name[nice_uchars] = 0;
354 utf16_to_str(result, result_len, unix_name);
355 } else {
356 /* Assume 8 bit char length byte Latin-1 */
357 str_ncpy(result, result_len, (char *) (id + 1),
358 str_size((char *) (id + 1)));
359 }
360
361 free(raw_name);
362 free(unix_name);
363}
364
365/**
366 * @}
367 */
Note: See TracBrowser for help on using the repository browser.