source: mainline/uspace/srv/fs/udf/udf_osta.c@ a63966d

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since a63966d was a35b458, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on _all_ lines, including empty ones, for particular file types.

Command used: tools/srepl '\s\+$' '' -- *.c *.h *.py *.sh *.s *.S *.ag

Currently, whitespace on empty lines is very inconsistent.
There are two basic choices: Either remove the whitespace, or keep empty lines
indented to the level of surrounding code. The former is AFAICT more common,
and also much easier to do automatically.

Alternatively, we could write script for automatic indentation, and use that
instead. However, if such a script exists, it's possible to use the indented
style locally, by having the editor apply relevant conversions on load/save,
without affecting remote repository. IMO, it makes more sense to adopt
the simpler rule.

  • Property mode set to 100644
File size: 8.2 KB
Line 
1/*
2 * Copyright (c) 2012 Julia Medvedeva
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * OSTA compliant Unicode compression, uncompression routines,
31 * file name translation routine for OS/2, Windows 95, Windows NT,
32 * Macintosh and UNIX.
33 * Copyright 1995 Micro Design International, Inc.
34 * Written by Jason M. Rinn.
35 * Micro Design International gives permission for the free use of the
36 * following source code.
37 */
38
39/** @addtogroup fs
40 * @{
41 */
42/**
43 * @file udf_osta.c
44 * @brief OSTA compliant functions.
45 */
46
47#include <stdlib.h>
48#include <str.h>
49#include <macros.h>
50#include <errno.h>
51#include "udf_osta.h"
52#include "udf_cksum.h"
53
54/** Illegal UNIX characters are NULL and slash.
55 *
56 */
57static bool legal_check(uint16_t ch)
58{
59 if ((ch == 0x0000) || (ch == 0x002F))
60 return false;
61
62 return true;
63}
64
65/** Convert OSTA CS0 compressed Unicode name to Unicode.
66 *
67 * The Unicode output will be in the byte order that the local compiler
68 * uses for 16-bit values.
69 *
70 * NOTE: This routine only performs error checking on the comp_id.
71 * It is up to the user to ensure that the Unicode buffer is large
72 * enough, and that the compressed Unicode name is correct.
73 *
74 * @param[in] number_of_bytes Number of bytes read from media
75 * @param[in] udf_compressed Bytes read from media
76 * @param[out] unicode Uncompressed unicode characters
77 * @param[in] unicode_max_len Size of output array
78 *
79 * @return Number of Unicode characters which were uncompressed.
80 *
81 */
82static size_t udf_uncompress_unicode(size_t number_of_bytes,
83 uint8_t *udf_compressed, uint16_t *unicode, size_t unicode_max_len)
84{
85 /* Use udf_compressed to store current byte being read. */
86 uint8_t comp_id = udf_compressed[0];
87
88 /* First check for valid compID. */
89 if ((comp_id != 8) && (comp_id != 16))
90 return 0;
91
92 size_t unicode_idx = 0;
93 size_t byte_idx = 1;
94
95 /* Loop through all the bytes. */
96 while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
97 if (comp_id == 16) {
98 /*
99 * Move the first byte to the high bits of the
100 * Unicode char.
101 */
102 unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
103 } else
104 unicode[unicode_idx] = 0;
105
106 if (byte_idx < number_of_bytes) {
107 /* Then the next byte to the low bits. */
108 unicode[unicode_idx] |= udf_compressed[byte_idx++];
109 }
110
111 unicode_idx++;
112 }
113
114 return unicode_idx;
115}
116
117/** Translate a long file name
118 *
119 * Translate a long file name to one using a MAXLEN and an illegal char set
120 * in accord with the OSTA requirements. Assumes the name has already been
121 * translated to Unicode.
122 *
123 * @param[out] new_name Translated name. Must be of length MAXLEN
124 * @param[in] udf_name Name from UDF volume
125 * @param[in] udf_len Length of UDF Name
126 *
127 * @return Number of Unicode characters in translated name.
128 *
129 */
130size_t udf_translate_name(uint16_t *new_name, uint16_t *udf_name,
131 size_t udf_len)
132{
133 bool needs_crc = false;
134 bool has_ext = false;
135 size_t ext_idx = 0;
136 size_t new_idx = 0;
137 size_t new_ext_idx = 0;
138
139 for (size_t idx = 0; idx < udf_len; idx++) {
140 uint16_t current = udf_name[idx];
141
142 if ((!legal_check(current)) || (!ascii_check(current))) {
143 needs_crc = true;
144
145 /*
146 * Replace Illegal and non-displayable chars with
147 * underscore.
148 */
149 current = ILLEGAL_CHAR_MARK;
150
151 /*
152 * Skip any other illegal or non-displayable
153 * characters.
154 */
155 while ((idx + 1 < udf_len) &&
156 (!legal_check(udf_name[idx + 1]) ||
157 (!ascii_check(udf_name[idx + 1]))))
158 idx++;
159 }
160
161 /* Record position of extension, if one is found. */
162 if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
163 if (udf_len == idx + 1) {
164 /* A trailing period is NOT an extension. */
165 has_ext = false;
166 } else {
167 has_ext = true;
168 ext_idx = idx;
169 new_ext_idx = new_idx;
170 }
171 }
172
173 if (new_idx < MAXLEN)
174 new_name[new_idx++] = current;
175 else
176 needs_crc = true;
177 }
178
179 if (needs_crc) {
180 uint16_t ext[EXT_SIZE];
181 size_t local_ext_idx = 0;
182
183 if (has_ext) {
184 size_t max_filename_len;
185
186 /* Translate extension, and store it in ext. */
187 for (size_t idx = 0; (idx < EXT_SIZE) &&
188 (ext_idx + idx + 1 < udf_len); idx++) {
189 uint16_t current = udf_name[ext_idx + idx + 1];
190
191 if ((!legal_check(current)) || (!ascii_check(current))) {
192 needs_crc = true;
193
194 /*
195 * Replace Illegal and non-displayable
196 * chars with underscore.
197 */
198 current = ILLEGAL_CHAR_MARK;
199
200 /*
201 * Skip any other illegal or
202 * non-displayable characters.
203 */
204 while ((idx + 1 < EXT_SIZE) &&
205 ((!legal_check(udf_name[ext_idx + idx + 2])) ||
206 (!ascii_check(udf_name[ext_idx + idx + 2]))))
207 idx++;
208 }
209
210 ext[local_ext_idx++] = current;
211 }
212
213 /*
214 * Truncate filename to leave room for extension and
215 * CRC.
216 */
217 max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
218 if (new_idx > max_filename_len)
219 new_idx = max_filename_len;
220 else
221 new_idx = new_ext_idx;
222 } else if (new_idx > MAXLEN - 5) {
223 /* If no extension, make sure to leave room for CRC. */
224 new_idx = MAXLEN - 5;
225 }
226
227 /* Add mark for CRC. */
228 new_name[new_idx++] = CRC_MARK;
229
230 /* Calculate CRC from original filename. */
231 uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);
232
233 /* Convert 16-bits of CRC to hex characters. */
234 const char hex_char[] = "0123456789ABCDEF";
235
236 new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
237 new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
238 new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
239 new_name[new_idx++] = hex_char[(value_crc & 0x000f)];
240
241 /* Place a translated extension at end, if found. */
242 if (has_ext) {
243 new_name[new_idx++] = PERIOD;
244
245 for (size_t idx = 0; idx < local_ext_idx; idx++)
246 new_name[new_idx++] = ext[idx];
247 }
248 }
249
250 return new_idx;
251}
252
253/** Decode from dchar to utf8
254 *
255 * @param result Returned value - utf8 string
256 * @param result_len Length of output string
257 * @param id Input string
258 * @param len Length of input string
259 * @param chsp Decode method
260 *
261 */
262void udf_to_unix_name(char *result, size_t result_len, char *id, size_t len,
263 udf_charspec_t *chsp)
264{
265 const char *osta_id = "OSTA Compressed Unicode";
266 size_t ucode_chars, nice_uchars;
267
268 uint16_t *raw_name = malloc(MAX_BUF * sizeof(uint16_t));
269 uint16_t *unix_name = malloc(MAX_BUF * sizeof(uint16_t));
270
271 // FIXME: Check for malloc returning NULL
272
273 bool is_osta_typ0 = (chsp->type == 0) &&
274 (str_cmp((char *) chsp->info, osta_id) == 0);
275
276 if (is_osta_typ0) {
277 *raw_name = 0;
278 *unix_name = 0;
279
280 ucode_chars =
281 udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
282 ucode_chars = min(ucode_chars, utf16_wsize(raw_name));
283 nice_uchars =
284 udf_translate_name(unix_name, raw_name, ucode_chars);
285
286 /* Output UTF-8 */
287 unix_name[nice_uchars] = 0;
288 utf16_to_str(result, result_len, unix_name);
289 } else {
290 /* Assume 8 bit char length byte Latin-1 */
291 str_ncpy(result, result_len, (char *) (id + 1),
292 str_size((char *) (id + 1)));
293 }
294
295 free(raw_name);
296 free(unix_name);
297}
298
299/**
300 * @}
301 */
Note: See TracBrowser for help on using the repository browser.