Context Navigation

source: mainline/uspace/srv/fs/udf/udf_osta.c@ 48e3190

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 48e3190 was 48e3190, checked in by Martin Decky <martin@…>, 13 years ago
cherrypick UDF file system driver implementation (originally by Julia Medvedeva) with coding style improvements and minor changes
Property mode set to `100644`
File size: 10.1 KB

Rev	Line
[48e3190]	1	/*
	2	* Copyright (c) 2012 Julia Medvedeva
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	*
	9	* - Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* - Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* - The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	27	*/
	28
	29	/*
	30	* OSTA compliant Unicode compression, uncompression routines,
	31	* file name translation routine for OS/2, Windows 95, Windows NT,
	32	* Macintosh and UNIX.
	33	* Copyright 1995 Micro Design International, Inc.
	34	* Written by Jason M. Rinn.
	35	* Micro Design International gives permission for the free use of the
	36	* following source code.
	37	*/
	38
	39	/** @addtogroup fs
	40	* @{
	41	*/
	42	/**
	43	* @file udf_osta.c
	44	* @brief OSTA compliant functions.
	45	*/
	46
	47	#include <malloc.h>
	48	#include <str.h>
	49	#include <macros.h>
	50	#include <errno.h>
	51	#include "udf_osta.h"
	52	#include "udf_cksum.h"
	53
	54	/** Calculate length of UTF-16 string
	55	*
	56	* FIXME: This is wrong! UTF-16 is not a fixed-width encoding,
	57	* it is a variable-width encoding (mind the surrogate
	58	* pairs).
	59	*
	60	*/
	61	static size_t utf16_length(uint16_t *string) {
	62	size_t len = 0;
	63
	64	while (*string++ != 0)
	65	len++;
	66
	67	return len;
	68	}
	69
	70	/** Illegal UNIX characters are NULL and slash.
	71	*
	72	*/
	73	static bool legal_check(uint16_t ch)
	74	{
	75	if ((ch == 0x0000) \|\| (ch == 0x002F))
	76	return false;
	77
	78	return true;
	79	}
	80
	81	/** Convert OSTA CS0 compressed Unicode name to Unicode.
	82	*
	83	* The Unicode output will be in the byte order that the local compiler
	84	* uses for 16-bit values.
	85	*
	86	* NOTE: This routine only performs error checking on the comp_id.
	87	* It is up to the user to ensure that the Unicode buffer is large
	88	* enough, and that the compressed Unicode name is correct.
	89	*
	90	* @param[in] number_of_bytes Number of bytes read from media
	91	* @param[in] udf_compressed Bytes read from media
	92	* @param[out] unicode Uncompressed unicode characters
	93	* @param[in] unicode_max_len Size of output array
	94	*
	95	* @return Number of Unicode characters which were uncompressed.
	96	*
	97	*/
	98	size_t udf_uncompress_unicode(size_t number_of_bytes, uint8_t *udf_compressed,
	99	uint16_t *unicode, size_t unicode_max_len)
	100	{
	101	/* Use udf_compressed to store current byte being read. */
	102	uint8_t comp_id = udf_compressed[0];
	103
	104	/* First check for valid compID. */
	105	if ((comp_id != 8) && (comp_id != 16))
	106	return 0;
	107
	108	size_t unicode_idx = 0;
	109	size_t byte_idx = 1;
	110
	111	/* Loop through all the bytes. */
	112	while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
	113	if (comp_id == 16) {
	114	/*
	115	* Move the first byte to the high bits of the
	116	* Unicode char.
	117	*/
	118	unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
	119	} else
	120	unicode[unicode_idx] = 0;
	121
	122	if (byte_idx < number_of_bytes) {
	123	/* Then the next byte to the low bits. */
	124	unicode[unicode_idx] \|= udf_compressed[byte_idx++];
	125	}
	126
	127	unicode_idx++;
	128	}
	129
	130	return unicode_idx;
	131	}
	132
	133	/** Convert Unicode wide characters to OSTA CS0 compressed Unicode string.
	134	*
	135	* The Unicode MUST be in the byte order of the compiler in order
	136	* to obtain correct results. Returns an error if the compression ID
	137	* is invalid.
	138	*
	139	* NOTE: This routine assumes the implementation already knows,
	140	* by the local environment, how many bits are appropriate and therefore
	141	* does no checking to test if the input characters fit into that number
	142	* of bits or not.
	143	*
	144	* @param[in] number_of_chars Number of unicode characters.
	145	* @param[in] comp_id Compression ID to be used.
	146	* @param[in] unicode Unicode characters to compress.
	147	* @param[out] udf_compressed Compressed string, as bytes.
	148	*
	149	* @return The total number of bytes in the compressed OSTA CS0 string,
	150	* including the compression ID.
	151	*
	152	*/
	153	size_t udf_compress_unicode(size_t number_of_chars, uint8_t comp_id,
	154	uint16_t unicode, uint8_t udf_compressed)
	155	{
	156	if ((comp_id != 8) && (comp_id != 16))
	157	return 0;
	158
	159	/* Place compression code in first byte. */
	160	udf_compressed[0] = comp_id;
	161
	162	size_t byte_idx = 1;
	163	size_t unicode_idx = 0;
	164
	165	while (unicode_idx < number_of_chars) {
	166	if (comp_id == 16) {
	167	/*
	168	* First, place the high bits of the char
	169	* into the byte stream.
	170	*/
	171	udf_compressed[byte_idx++] =
	172	(unicode[unicode_idx] & 0xFF00) >> 8;
	173	}
	174
	175	/* Then place the low bits into the stream. */
	176	udf_compressed[byte_idx++] = unicode[unicode_idx] & 0x00FF;
	177	unicode_idx++;
	178	}
	179
	180	return byte_idx;
	181	}
	182
	183	/** Translate a long file name
	184	*
	185	* Translate a long file name to one using a MAXLEN and an illegal char set
	186	* in accord with the OSTA requirements. Assumes the name has already been
	187	* translated to Unicode.
	188	*
	189	* @param[out] new_name Translated name. Must be of length MAXLEN
	190	* @param[in] udf_name Name from UDF volume
	191	* @param[in] udf_len Length of UDF Name
	192	*
	193	* @return Number of Unicode characters in translated name.
	194	*
	195	*/
	196	size_t udf_translate_name(uint16_t new_name, uint16_t udf_name,
	197	size_t udf_len)
	198	{
	199	bool needs_crc = false;
	200	bool has_ext = false;
	201	size_t ext_idx = 0;
	202	size_t new_idx = 0;
	203	size_t new_ext_idx = 0;
	204
	205	for (size_t idx = 0; idx < udf_len; idx++) {
	206	uint16_t current = udf_name[idx];
	207
	208	if ((!legal_check(current)) \|\| (!ascii_check(current))) {
	209	needs_crc = true;
	210
	211	/*
	212	* Replace Illegal and non-displayable chars with
	213	* underscore.
	214	*/
	215	current = ILLEGAL_CHAR_MARK;
	216
	217	/*
	218	* Skip any other illegal or non-displayable
	219	* characters.
	220	*/
	221	while ((idx + 1 < udf_len) &&
	222	(!legal_check(udf_name[idx + 1]) \|\|
	223	(!ascii_check(udf_name[idx + 1]))))
	224	idx++;
	225	}
	226
	227	/* Record position of extension, if one is found. */
	228	if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
	229	if (udf_len == idx + 1) {
	230	/* A trailing period is NOT an extension. */
	231	has_ext = false;
	232	} else {
	233	has_ext = true;
	234	ext_idx = idx;
	235	new_ext_idx = new_idx;
	236	}
	237	}
	238
	239	if (new_idx < MAXLEN)
	240	new_name[new_idx++] = current;
	241	else
	242	needs_crc = true;
	243	}
	244
	245	if (needs_crc) {
	246	uint16_t ext[EXT_SIZE];
	247	size_t local_ext_idx = 0;
	248
	249	if (has_ext) {
	250	size_t max_filename_len;
	251
	252	/* Translate extension, and store it in ext. */
	253	for (size_t idx = 0; (idx < EXT_SIZE) &&
	254	(ext_idx + idx + 1 < udf_len); idx++) {
	255	uint16_t current = udf_name[ext_idx + idx + 1];
	256
	257	if ((!legal_check(current)) \|\| (!ascii_check(current))) {
	258	needs_crc = true;
	259
	260	/*
	261	* Replace Illegal and non-displayable
	262	* chars with underscore.
	263	*/
	264	current = ILLEGAL_CHAR_MARK;
	265
	266	/*
	267	* Skip any other illegal or
	268	* non-displayable characters.
	269	*/
	270	while ((idx + 1 < EXT_SIZE) &&
	271	((!legal_check(udf_name[ext_idx + idx + 2])) \|\|
	272	(!ascii_check(udf_name[ext_idx + idx + 2]))))
	273	idx++;
	274	}
	275
	276	ext[local_ext_idx++] = current;
	277	}
	278
	279	/*
	280	* Truncate filename to leave room for extension and
	281	* CRC.
	282	*/
	283	max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
	284	if (new_idx > max_filename_len)
	285	new_idx = max_filename_len;
	286	else
	287	new_idx = new_ext_idx;
	288	} else if (new_idx > MAXLEN - 5) {
	289	/* If no extension, make sure to leave room for CRC. */
	290	new_idx = MAXLEN - 5;
	291	}
	292
	293	/* Add mark for CRC. */
	294	new_name[new_idx++] = CRC_MARK;
	295
	296	/* Calculate CRC from original filename. */
	297	uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);
	298
	299	/* Convert 16-bits of CRC to hex characters. */
	300	const char hex_char[] = "0123456789ABCDEF";
	301
	302	new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
	303	new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
	304	new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
	305	new_name[new_idx++] = hex_char[(value_crc & 0x000f)];
	306
	307	/* Place a translated extension at end, if found. */
	308	if (has_ext) {
	309	new_name[new_idx++] = PERIOD;
	310
	311	for (size_t idx = 0; idx < local_ext_idx; idx++)
	312	new_name[new_idx++] = ext[idx];
	313	}
	314	}
	315
	316	return new_idx;
	317	}
	318
	319	/** Decode from dchar to utf8
	320	*
	321	* @param result Returned value - utf8 string
	322	* @param result_len Length of output string
	323	* @param id Input string
	324	* @param len Length of input string
	325	* @param chsp Decode method
	326	*
	327	*/
	328	void udf_to_unix_name(char result, size_t result_len, char id, size_t len,
	329	udf_charspec_t *chsp)
	330	{
	331	const char *osta_id = "OSTA Compressed Unicode";
	332	size_t ucode_chars, nice_uchars;
	333
	334	uint16_t raw_name = malloc(MAX_BUF sizeof(uint16_t));
	335	uint16_t unix_name = malloc(MAX_BUF sizeof(uint16_t));
	336
	337	// FIXME: Check for malloc returning NULL
	338
	339	bool is_osta_typ0 = (chsp->type == 0) &&
	340	(str_cmp((char *) chsp->info, osta_id) == 0);
	341
	342	if (is_osta_typ0) {
	343	*raw_name = 0;
	344	*unix_name = 0;
	345
	346	ucode_chars =
	347	udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
	348	ucode_chars = min(ucode_chars, utf16_length(raw_name));
	349	nice_uchars =
	350	udf_translate_name(unix_name, raw_name, ucode_chars);
	351
	352	/* Output UTF-8 */
	353	unix_name[nice_uchars] = 0;
	354	utf16_to_str(result, result_len, unix_name);
	355	} else {
	356	/* Assume 8 bit char length byte Latin-1 */
	357	str_ncpy(result, result_len, (char *) (id + 1),
	358	str_size((char *) (id + 1)));
	359	}
	360
	361	free(raw_name);
	362	free(unix_name);
	363	}
	364
	365	/**
	366	* @}
	367	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: