source: mainline/uspace/srv/fs/udf/udf_osta.c

Last change on this file was b1834a01, checked in by Jakub Jermar <jakub@…>, 7 years ago

Categorize the remaining orphan doxygroups

  • Property mode set to 100644
File size: 8.3 KB
RevLine 
[48e3190]1/*
2 * Copyright (c) 2012 Julia Medvedeva
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * OSTA compliant Unicode compression, uncompression routines,
31 * file name translation routine for OS/2, Windows 95, Windows NT,
32 * Macintosh and UNIX.
33 * Copyright 1995 Micro Design International, Inc.
34 * Written by Jason M. Rinn.
35 * Micro Design International gives permission for the free use of the
36 * following source code.
37 */
38
[b1834a01]39/** @addtogroup udf
[48e3190]40 * @{
41 */
42/**
43 * @file udf_osta.c
44 * @brief OSTA compliant functions.
45 */
46
[38d150e]47#include <stdlib.h>
[48e3190]48#include <str.h>
49#include <macros.h>
50#include <errno.h>
51#include "udf_osta.h"
52#include "udf_cksum.h"
53
54/** Illegal UNIX characters are NULL and slash.
55 *
56 */
57static bool legal_check(uint16_t ch)
58{
59 if ((ch == 0x0000) || (ch == 0x002F))
60 return false;
[a35b458]61
[48e3190]62 return true;
63}
64
65/** Convert OSTA CS0 compressed Unicode name to Unicode.
66 *
67 * The Unicode output will be in the byte order that the local compiler
68 * uses for 16-bit values.
69 *
70 * NOTE: This routine only performs error checking on the comp_id.
71 * It is up to the user to ensure that the Unicode buffer is large
72 * enough, and that the compressed Unicode name is correct.
73 *
74 * @param[in] number_of_bytes Number of bytes read from media
75 * @param[in] udf_compressed Bytes read from media
76 * @param[out] unicode Uncompressed unicode characters
77 * @param[in] unicode_max_len Size of output array
78 *
79 * @return Number of Unicode characters which were uncompressed.
80 *
81 */
[5c702a8]82static size_t udf_uncompress_unicode(size_t number_of_bytes,
83 uint8_t *udf_compressed, uint16_t *unicode, size_t unicode_max_len)
[48e3190]84{
85 /* Use udf_compressed to store current byte being read. */
86 uint8_t comp_id = udf_compressed[0];
[a35b458]87
[48e3190]88 /* First check for valid compID. */
89 if ((comp_id != 8) && (comp_id != 16))
90 return 0;
[a35b458]91
[48e3190]92 size_t unicode_idx = 0;
93 size_t byte_idx = 1;
[a35b458]94
[48e3190]95 /* Loop through all the bytes. */
96 while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
97 if (comp_id == 16) {
98 /*
99 * Move the first byte to the high bits of the
100 * Unicode char.
101 */
102 unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
103 } else
104 unicode[unicode_idx] = 0;
[a35b458]105
[48e3190]106 if (byte_idx < number_of_bytes) {
107 /* Then the next byte to the low bits. */
108 unicode[unicode_idx] |= udf_compressed[byte_idx++];
109 }
[a35b458]110
[48e3190]111 unicode_idx++;
112 }
[a35b458]113
[48e3190]114 return unicode_idx;
115}
116
117/** Translate a long file name
118 *
119 * Translate a long file name to one using a MAXLEN and an illegal char set
120 * in accord with the OSTA requirements. Assumes the name has already been
121 * translated to Unicode.
122 *
123 * @param[out] new_name Translated name. Must be of length MAXLEN
124 * @param[in] udf_name Name from UDF volume
125 * @param[in] udf_len Length of UDF Name
126 *
127 * @return Number of Unicode characters in translated name.
128 *
129 */
130size_t udf_translate_name(uint16_t *new_name, uint16_t *udf_name,
131 size_t udf_len)
132{
133 bool needs_crc = false;
134 bool has_ext = false;
135 size_t ext_idx = 0;
136 size_t new_idx = 0;
137 size_t new_ext_idx = 0;
[a35b458]138
[48e3190]139 for (size_t idx = 0; idx < udf_len; idx++) {
140 uint16_t current = udf_name[idx];
[a35b458]141
[48e3190]142 if ((!legal_check(current)) || (!ascii_check(current))) {
143 needs_crc = true;
[a35b458]144
[48e3190]145 /*
146 * Replace Illegal and non-displayable chars with
147 * underscore.
148 */
149 current = ILLEGAL_CHAR_MARK;
[a35b458]150
[48e3190]151 /*
152 * Skip any other illegal or non-displayable
153 * characters.
154 */
155 while ((idx + 1 < udf_len) &&
156 (!legal_check(udf_name[idx + 1]) ||
157 (!ascii_check(udf_name[idx + 1]))))
158 idx++;
159 }
[a35b458]160
[48e3190]161 /* Record position of extension, if one is found. */
162 if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
163 if (udf_len == idx + 1) {
164 /* A trailing period is NOT an extension. */
165 has_ext = false;
166 } else {
167 has_ext = true;
168 ext_idx = idx;
169 new_ext_idx = new_idx;
170 }
171 }
[a35b458]172
[48e3190]173 if (new_idx < MAXLEN)
174 new_name[new_idx++] = current;
175 else
176 needs_crc = true;
177 }
[a35b458]178
[48e3190]179 if (needs_crc) {
180 uint16_t ext[EXT_SIZE];
181 size_t local_ext_idx = 0;
[a35b458]182
[48e3190]183 if (has_ext) {
184 size_t max_filename_len;
[a35b458]185
[48e3190]186 /* Translate extension, and store it in ext. */
187 for (size_t idx = 0; (idx < EXT_SIZE) &&
188 (ext_idx + idx + 1 < udf_len); idx++) {
189 uint16_t current = udf_name[ext_idx + idx + 1];
[a35b458]190
[48e3190]191 if ((!legal_check(current)) || (!ascii_check(current))) {
192 needs_crc = true;
[a35b458]193
[48e3190]194 /*
195 * Replace Illegal and non-displayable
196 * chars with underscore.
197 */
198 current = ILLEGAL_CHAR_MARK;
[a35b458]199
[48e3190]200 /*
201 * Skip any other illegal or
202 * non-displayable characters.
203 */
204 while ((idx + 1 < EXT_SIZE) &&
205 ((!legal_check(udf_name[ext_idx + idx + 2])) ||
206 (!ascii_check(udf_name[ext_idx + idx + 2]))))
207 idx++;
208 }
[a35b458]209
[48e3190]210 ext[local_ext_idx++] = current;
211 }
[a35b458]212
[48e3190]213 /*
214 * Truncate filename to leave room for extension and
215 * CRC.
216 */
217 max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
218 if (new_idx > max_filename_len)
219 new_idx = max_filename_len;
220 else
221 new_idx = new_ext_idx;
222 } else if (new_idx > MAXLEN - 5) {
223 /* If no extension, make sure to leave room for CRC. */
224 new_idx = MAXLEN - 5;
225 }
[a35b458]226
[48e3190]227 /* Add mark for CRC. */
228 new_name[new_idx++] = CRC_MARK;
[a35b458]229
[48e3190]230 /* Calculate CRC from original filename. */
231 uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);
[a35b458]232
[48e3190]233 /* Convert 16-bits of CRC to hex characters. */
234 const char hex_char[] = "0123456789ABCDEF";
[a35b458]235
[48e3190]236 new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
237 new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
238 new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
239 new_name[new_idx++] = hex_char[(value_crc & 0x000f)];
[a35b458]240
[48e3190]241 /* Place a translated extension at end, if found. */
242 if (has_ext) {
243 new_name[new_idx++] = PERIOD;
[a35b458]244
[48e3190]245 for (size_t idx = 0; idx < local_ext_idx; idx++)
246 new_name[new_idx++] = ext[idx];
247 }
248 }
[a35b458]249
[48e3190]250 return new_idx;
251}
252
253/** Decode from dchar to utf8
254 *
255 * @param result Returned value - utf8 string
256 * @param result_len Length of output string
257 * @param id Input string
258 * @param len Length of input string
259 * @param chsp Decode method
260 *
261 */
262void udf_to_unix_name(char *result, size_t result_len, char *id, size_t len,
263 udf_charspec_t *chsp)
264{
265 const char *osta_id = "OSTA Compressed Unicode";
266 size_t ucode_chars, nice_uchars;
[a35b458]267
[48e3190]268 uint16_t *raw_name = malloc(MAX_BUF * sizeof(uint16_t));
269 uint16_t *unix_name = malloc(MAX_BUF * sizeof(uint16_t));
[a35b458]270
[48e3190]271 // FIXME: Check for malloc returning NULL
[a35b458]272
[48e3190]273 bool is_osta_typ0 = (chsp->type == 0) &&
274 (str_cmp((char *) chsp->info, osta_id) == 0);
[a35b458]275
[48e3190]276 if (is_osta_typ0) {
277 *raw_name = 0;
278 *unix_name = 0;
[a35b458]279
[48e3190]280 ucode_chars =
281 udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
[b2906c0]282 ucode_chars = min(ucode_chars, utf16_wsize(raw_name));
[48e3190]283 nice_uchars =
284 udf_translate_name(unix_name, raw_name, ucode_chars);
[a35b458]285
[48e3190]286 /* Output UTF-8 */
287 unix_name[nice_uchars] = 0;
288 utf16_to_str(result, result_len, unix_name);
289 } else {
290 /* Assume 8 bit char length byte Latin-1 */
291 str_ncpy(result, result_len, (char *) (id + 1),
292 str_size((char *) (id + 1)));
293 }
[a35b458]294
[48e3190]295 free(raw_name);
296 free(unix_name);
297}
298
299/**
300 * @}
301 */
Note: See TracBrowser for help on using the repository browser.