source: mainline/uspace/srv/fs/udf/udf_osta.c@ 39916d6

Last change on this file since 39916d6 was d7f7a4a, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 3 years ago

Replace some license headers with SPDX identifier

Headers are replaced using tools/transorm-copyright.sh only
when it can be matched verbatim with the license header used
throughout most of the codebase.

  • Property mode set to 100644
File size: 6.9 KB
Line 
1/*
2 * SPDX-FileCopyrightText: 2012 Julia Medvedeva
3 * Copyright 1995 Micro Design International, Inc.
4 *
5 * SPDX-License-Identifier: BSD-3-Clause
6 */
7
8/*
9 * OSTA compliant Unicode compression, uncompression routines,
10 * file name translation routine for OS/2, Windows 95, Windows NT,
11 * Macintosh and UNIX.
12 * Written by Jason M. Rinn.
13 * Micro Design International gives permission for the free use of the
14 * following source code.
15 */
16
17/** @addtogroup udf
18 * @{
19 */
20/**
21 * @file udf_osta.c
22 * @brief OSTA compliant functions.
23 */
24
25#include <stdlib.h>
26#include <str.h>
27#include <macros.h>
28#include <errno.h>
29#include "udf_osta.h"
30#include "udf_cksum.h"
31
32/** Illegal UNIX characters are NULL and slash.
33 *
34 */
35static bool legal_check(uint16_t ch)
36{
37 if ((ch == 0x0000) || (ch == 0x002F))
38 return false;
39
40 return true;
41}
42
43/** Convert OSTA CS0 compressed Unicode name to Unicode.
44 *
45 * The Unicode output will be in the byte order that the local compiler
46 * uses for 16-bit values.
47 *
48 * NOTE: This routine only performs error checking on the comp_id.
49 * It is up to the user to ensure that the Unicode buffer is large
50 * enough, and that the compressed Unicode name is correct.
51 *
52 * @param[in] number_of_bytes Number of bytes read from media
53 * @param[in] udf_compressed Bytes read from media
54 * @param[out] unicode Uncompressed unicode characters
55 * @param[in] unicode_max_len Size of output array
56 *
57 * @return Number of Unicode characters which were uncompressed.
58 *
59 */
60static size_t udf_uncompress_unicode(size_t number_of_bytes,
61 uint8_t *udf_compressed, uint16_t *unicode, size_t unicode_max_len)
62{
63 /* Use udf_compressed to store current byte being read. */
64 uint8_t comp_id = udf_compressed[0];
65
66 /* First check for valid compID. */
67 if ((comp_id != 8) && (comp_id != 16))
68 return 0;
69
70 size_t unicode_idx = 0;
71 size_t byte_idx = 1;
72
73 /* Loop through all the bytes. */
74 while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
75 if (comp_id == 16) {
76 /*
77 * Move the first byte to the high bits of the
78 * Unicode char.
79 */
80 unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
81 } else
82 unicode[unicode_idx] = 0;
83
84 if (byte_idx < number_of_bytes) {
85 /* Then the next byte to the low bits. */
86 unicode[unicode_idx] |= udf_compressed[byte_idx++];
87 }
88
89 unicode_idx++;
90 }
91
92 return unicode_idx;
93}
94
95/** Translate a long file name
96 *
97 * Translate a long file name to one using a MAXLEN and an illegal char set
98 * in accord with the OSTA requirements. Assumes the name has already been
99 * translated to Unicode.
100 *
101 * @param[out] new_name Translated name. Must be of length MAXLEN
102 * @param[in] udf_name Name from UDF volume
103 * @param[in] udf_len Length of UDF Name
104 *
105 * @return Number of Unicode characters in translated name.
106 *
107 */
108size_t udf_translate_name(uint16_t *new_name, uint16_t *udf_name,
109 size_t udf_len)
110{
111 bool needs_crc = false;
112 bool has_ext = false;
113 size_t ext_idx = 0;
114 size_t new_idx = 0;
115 size_t new_ext_idx = 0;
116
117 for (size_t idx = 0; idx < udf_len; idx++) {
118 uint16_t current = udf_name[idx];
119
120 if ((!legal_check(current)) || (!ascii_check(current))) {
121 needs_crc = true;
122
123 /*
124 * Replace Illegal and non-displayable chars with
125 * underscore.
126 */
127 current = ILLEGAL_CHAR_MARK;
128
129 /*
130 * Skip any other illegal or non-displayable
131 * characters.
132 */
133 while ((idx + 1 < udf_len) &&
134 (!legal_check(udf_name[idx + 1]) ||
135 (!ascii_check(udf_name[idx + 1]))))
136 idx++;
137 }
138
139 /* Record position of extension, if one is found. */
140 if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
141 if (udf_len == idx + 1) {
142 /* A trailing period is NOT an extension. */
143 has_ext = false;
144 } else {
145 has_ext = true;
146 ext_idx = idx;
147 new_ext_idx = new_idx;
148 }
149 }
150
151 if (new_idx < MAXLEN)
152 new_name[new_idx++] = current;
153 else
154 needs_crc = true;
155 }
156
157 if (needs_crc) {
158 uint16_t ext[EXT_SIZE];
159 size_t local_ext_idx = 0;
160
161 if (has_ext) {
162 size_t max_filename_len;
163
164 /* Translate extension, and store it in ext. */
165 for (size_t idx = 0; (idx < EXT_SIZE) &&
166 (ext_idx + idx + 1 < udf_len); idx++) {
167 uint16_t current = udf_name[ext_idx + idx + 1];
168
169 if ((!legal_check(current)) || (!ascii_check(current))) {
170 needs_crc = true;
171
172 /*
173 * Replace Illegal and non-displayable
174 * chars with underscore.
175 */
176 current = ILLEGAL_CHAR_MARK;
177
178 /*
179 * Skip any other illegal or
180 * non-displayable characters.
181 */
182 while ((idx + 1 < EXT_SIZE) &&
183 ((!legal_check(udf_name[ext_idx + idx + 2])) ||
184 (!ascii_check(udf_name[ext_idx + idx + 2]))))
185 idx++;
186 }
187
188 ext[local_ext_idx++] = current;
189 }
190
191 /*
192 * Truncate filename to leave room for extension and
193 * CRC.
194 */
195 max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
196 if (new_idx > max_filename_len)
197 new_idx = max_filename_len;
198 else
199 new_idx = new_ext_idx;
200 } else if (new_idx > MAXLEN - 5) {
201 /* If no extension, make sure to leave room for CRC. */
202 new_idx = MAXLEN - 5;
203 }
204
205 /* Add mark for CRC. */
206 new_name[new_idx++] = CRC_MARK;
207
208 /* Calculate CRC from original filename. */
209 uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);
210
211 /* Convert 16-bits of CRC to hex characters. */
212 const char hex_char[] = "0123456789ABCDEF";
213
214 new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
215 new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
216 new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
217 new_name[new_idx++] = hex_char[(value_crc & 0x000f)];
218
219 /* Place a translated extension at end, if found. */
220 if (has_ext) {
221 new_name[new_idx++] = PERIOD;
222
223 for (size_t idx = 0; idx < local_ext_idx; idx++)
224 new_name[new_idx++] = ext[idx];
225 }
226 }
227
228 return new_idx;
229}
230
231/** Decode from dchar to utf8
232 *
233 * @param result Returned value - utf8 string
234 * @param result_len Length of output string
235 * @param id Input string
236 * @param len Length of input string
237 * @param chsp Decode method
238 *
239 */
240void udf_to_unix_name(char *result, size_t result_len, char *id, size_t len,
241 udf_charspec_t *chsp)
242{
243 const char *osta_id = "OSTA Compressed Unicode";
244 size_t ucode_chars, nice_uchars;
245
246 uint16_t *raw_name = malloc(MAX_BUF * sizeof(uint16_t));
247 uint16_t *unix_name = malloc(MAX_BUF * sizeof(uint16_t));
248
249 // FIXME: Check for malloc returning NULL
250
251 bool is_osta_typ0 = (chsp->type == 0) &&
252 (str_cmp((char *) chsp->info, osta_id) == 0);
253
254 if (is_osta_typ0) {
255 *raw_name = 0;
256 *unix_name = 0;
257
258 ucode_chars =
259 udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
260 ucode_chars = min(ucode_chars, utf16_wsize(raw_name));
261 nice_uchars =
262 udf_translate_name(unix_name, raw_name, ucode_chars);
263
264 /* Output UTF-8 */
265 unix_name[nice_uchars] = 0;
266 utf16_to_str(result, result_len, unix_name);
267 } else {
268 /* Assume 8 bit char length byte Latin-1 */
269 str_ncpy(result, result_len, (char *) (id + 1),
270 str_size((char *) (id + 1)));
271 }
272
273 free(raw_name);
274 free(unix_name);
275}
276
277/**
278 * @}
279 */
Note: See TracBrowser for help on using the repository browser.