source: mainline/uspace/lib/block/libblock.c@ 662da50

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 662da50 was 79ae36dd, checked in by Martin Decky <martin@…>, 15 years ago

new async framework with integrated exchange tracking

  • strict isolation between low-level IPC and high-level async framework with integrated exchange tracking
    • each IPC connection is represented by an async_sess_t structure
    • each IPC exchange is represented by an async_exch_t structure
    • exchange management is either based on atomic messages (EXCHANGE_ATOMIC), locking (EXCHANGE_SERIALIZE) or connection cloning (EXCHANGE_CLONE)
  • async_obsolete: temporary compatibility layer to keep old async clients working (several pieces of code are currently broken, but only non-essential functionality)
  • IPC_M_PHONE_HANGUP is now method no. 0 (for elegant boolean evaluation)
  • IPC_M_DEBUG_ALL has been renamed to IPC_M_DEBUG
  • IPC_M_PING has been removed (VFS protocol now has VFS_IN_PING)
  • console routines in libc have been rewritten for better abstraction
  • additional use for libc-private header files (FILE structure opaque to the client)
  • various cstyle changes (typos, indentation, missing externs in header files, improved comments, etc.)
  • Property mode set to 100644
File size: 24.3 KB
RevLine 
[fc840d9]1/*
[ed903174]2 * Copyright (c) 2008 Jakub Jermar
3 * Copyright (c) 2008 Martin Decky
[e272949]4 * Copyright (c) 2011 Martin Sucha
[fc840d9]5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
[97c9da8]31/** @addtogroup libblock
[fc840d9]32 * @{
[97c9da8]33 */
[fc840d9]34/**
35 * @file
36 * @brief
37 */
38
[97c9da8]39#include "libblock.h"
[fc840d9]40#include "../../srv/vfs/vfs.h"
[7858bc5f]41#include <ipc/devmap.h>
[c5747fe]42#include <ipc/bd.h>
[7858bc5f]43#include <ipc/services.h>
[fc840d9]44#include <errno.h>
[7858bc5f]45#include <sys/mman.h>
[fc840d9]46#include <async.h>
47#include <as.h>
48#include <assert.h>
[1e4cada]49#include <fibril_synch.h>
[d9c8c81]50#include <adt/list.h>
51#include <adt/hash_table.h>
[1ee00b7]52#include <macros.h>
[d00ae4c]53#include <mem.h>
[c7bbf029]54#include <malloc.h>
55#include <stdio.h>
[16fc3c9]56#include <sys/typefmt.h>
57#include <stacktrace.h>
[fc840d9]58
[916bf1a]59/** Lock protecting the device connection list */
[4e1b57d]60static FIBRIL_MUTEX_INITIALIZE(dcl_lock);
[916bf1a]61/** Device connection list head. */
62static LIST_INITIALIZE(dcl_head);
63
[79ae36dd]64#define CACHE_BUCKETS_LOG2 10
65#define CACHE_BUCKETS (1 << CACHE_BUCKETS_LOG2)
[f1ba5d6]66
67typedef struct {
[4e1b57d]68 fibril_mutex_t lock;
[79ae36dd]69 size_t lblock_size; /**< Logical block size. */
70 unsigned blocks_cluster; /**< Physical blocks per block_t */
71 unsigned block_count; /**< Total number of blocks. */
72 unsigned blocks_cached; /**< Number of cached blocks. */
[f1ba5d6]73 hash_table_t block_hash;
74 link_t free_head;
[1fbe064b]75 enum cache_mode mode;
[f1ba5d6]76} cache_t;
77
[916bf1a]78typedef struct {
79 link_t link;
[991f645]80 devmap_handle_t devmap_handle;
[79ae36dd]81 async_sess_t *sess;
[a830611]82 fibril_mutex_t comm_area_lock;
83 void *comm_area;
84 size_t comm_size;
[916bf1a]85 void *bb_buf;
[ed903174]86 aoff64_t bb_addr;
[79ae36dd]87 size_t pblock_size; /**< Physical block size. */
[f1ba5d6]88 cache_t *cache;
[916bf1a]89} devcon_t;
90
[79ae36dd]91static int read_blocks(devcon_t *, aoff64_t, size_t);
92static int write_blocks(devcon_t *, aoff64_t, size_t);
93static int get_block_size(async_sess_t *, size_t *);
94static int get_num_blocks(async_sess_t *, aoff64_t *);
95static aoff64_t ba_ltop(devcon_t *, aoff64_t);
[1fbe064b]96
[991f645]97static devcon_t *devcon_search(devmap_handle_t devmap_handle)
[916bf1a]98{
99 link_t *cur;
[79ae36dd]100
[4e1b57d]101 fibril_mutex_lock(&dcl_lock);
[79ae36dd]102
[916bf1a]103 for (cur = dcl_head.next; cur != &dcl_head; cur = cur->next) {
104 devcon_t *devcon = list_get_instance(cur, devcon_t, link);
[991f645]105 if (devcon->devmap_handle == devmap_handle) {
[4e1b57d]106 fibril_mutex_unlock(&dcl_lock);
[916bf1a]107 return devcon;
108 }
109 }
[79ae36dd]110
[4e1b57d]111 fibril_mutex_unlock(&dcl_lock);
[916bf1a]112 return NULL;
113}
114
[79ae36dd]115static int devcon_add(devmap_handle_t devmap_handle, async_sess_t *sess,
116 size_t bsize, void *comm_area, size_t comm_size)
[916bf1a]117{
118 link_t *cur;
119 devcon_t *devcon;
[79ae36dd]120
[a830611]121 if (comm_size < bsize)
[1ee00b7]122 return EINVAL;
[79ae36dd]123
[916bf1a]124 devcon = malloc(sizeof(devcon_t));
125 if (!devcon)
126 return ENOMEM;
127
128 link_initialize(&devcon->link);
[991f645]129 devcon->devmap_handle = devmap_handle;
[79ae36dd]130 devcon->sess = sess;
[a830611]131 fibril_mutex_initialize(&devcon->comm_area_lock);
132 devcon->comm_area = comm_area;
133 devcon->comm_size = comm_size;
[6284978]134 devcon->bb_buf = NULL;
[1ee00b7]135 devcon->bb_addr = 0;
136 devcon->pblock_size = bsize;
[f1ba5d6]137 devcon->cache = NULL;
[79ae36dd]138
[4e1b57d]139 fibril_mutex_lock(&dcl_lock);
[916bf1a]140 for (cur = dcl_head.next; cur != &dcl_head; cur = cur->next) {
141 devcon_t *d = list_get_instance(cur, devcon_t, link);
[991f645]142 if (d->devmap_handle == devmap_handle) {
[4e1b57d]143 fibril_mutex_unlock(&dcl_lock);
[916bf1a]144 free(devcon);
145 return EEXIST;
146 }
147 }
148 list_append(&devcon->link, &dcl_head);
[4e1b57d]149 fibril_mutex_unlock(&dcl_lock);
[916bf1a]150 return EOK;
151}
152
153static void devcon_remove(devcon_t *devcon)
154{
[4e1b57d]155 fibril_mutex_lock(&dcl_lock);
[916bf1a]156 list_remove(&devcon->link);
[4e1b57d]157 fibril_mutex_unlock(&dcl_lock);
[916bf1a]158}
[7858bc5f]159
[79ae36dd]160int block_init(exch_mgmt_t mgmt, devmap_handle_t devmap_handle,
161 size_t comm_size)
[7858bc5f]162{
[79ae36dd]163 void *comm_area = mmap(NULL, comm_size, PROTO_READ | PROTO_WRITE,
[7858bc5f]164 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
[79ae36dd]165 if (!comm_area)
[7858bc5f]166 return ENOMEM;
[79ae36dd]167
168 async_sess_t *sess = devmap_device_connect(mgmt, devmap_handle,
169 IPC_FLAG_BLOCKING);
170 if (!sess) {
[a830611]171 munmap(comm_area, comm_size);
[79ae36dd]172 return ENOENT;
[7858bc5f]173 }
[79ae36dd]174
175 async_exch_t *exch = async_exchange_begin(sess);
176 int rc = async_share_out_start(exch, comm_area,
[7858bc5f]177 AS_AREA_READ | AS_AREA_WRITE);
[79ae36dd]178 async_exchange_end(exch);
179
[7858bc5f]180 if (rc != EOK) {
[79ae36dd]181 munmap(comm_area, comm_size);
182 async_hangup(sess);
[7858bc5f]183 return rc;
184 }
[79ae36dd]185
186 size_t bsize;
187 rc = get_block_size(sess, &bsize);
188
189 if (rc != EOK) {
[a830611]190 munmap(comm_area, comm_size);
[79ae36dd]191 async_hangup(sess);
[1ee00b7]192 return rc;
193 }
[916bf1a]194
[79ae36dd]195 rc = devcon_add(devmap_handle, sess, bsize, comm_area, comm_size);
[916bf1a]196 if (rc != EOK) {
[a830611]197 munmap(comm_area, comm_size);
[79ae36dd]198 async_hangup(sess);
[916bf1a]199 return rc;
200 }
[79ae36dd]201
[7858bc5f]202 return EOK;
203}
204
[991f645]205void block_fini(devmap_handle_t devmap_handle)
[7858bc5f]206{
[991f645]207 devcon_t *devcon = devcon_search(devmap_handle);
[916bf1a]208 assert(devcon);
209
[64bc4b6]210 if (devcon->cache)
[991f645]211 (void) block_cache_fini(devmap_handle);
[79ae36dd]212
[916bf1a]213 devcon_remove(devcon);
[79ae36dd]214
[6284978]215 if (devcon->bb_buf)
216 free(devcon->bb_buf);
[79ae36dd]217
[a830611]218 munmap(devcon->comm_area, devcon->comm_size);
[79ae36dd]219 async_hangup(devcon->sess);
220
221 free(devcon);
[7858bc5f]222}
223
[991f645]224int block_bb_read(devmap_handle_t devmap_handle, aoff64_t ba)
[6284978]225{
226 void *bb_buf;
[0c243b4]227 int rc;
[6284978]228
[991f645]229 devcon_t *devcon = devcon_search(devmap_handle);
[6284978]230 if (!devcon)
231 return ENOENT;
232 if (devcon->bb_buf)
233 return EEXIST;
[1ee00b7]234 bb_buf = malloc(devcon->pblock_size);
[6284978]235 if (!bb_buf)
236 return ENOMEM;
[1ee00b7]237
[a830611]238 fibril_mutex_lock(&devcon->comm_area_lock);
[1ee00b7]239 rc = read_blocks(devcon, 0, 1);
[0c243b4]240 if (rc != EOK) {
[a830611]241 fibril_mutex_unlock(&devcon->comm_area_lock);
[6284978]242 free(bb_buf);
[0c243b4]243 return rc;
[6284978]244 }
[a830611]245 memcpy(bb_buf, devcon->comm_area, devcon->pblock_size);
246 fibril_mutex_unlock(&devcon->comm_area_lock);
[6408be3]247
[6284978]248 devcon->bb_buf = bb_buf;
[1ee00b7]249 devcon->bb_addr = ba;
[6284978]250
251 return EOK;
252}
253
[991f645]254void *block_bb_get(devmap_handle_t devmap_handle)
[7858bc5f]255{
[991f645]256 devcon_t *devcon = devcon_search(devmap_handle);
[916bf1a]257 assert(devcon);
258 return devcon->bb_buf;
[7858bc5f]259}
260
[f1ba5d6]261static hash_index_t cache_hash(unsigned long *key)
262{
263 return *key & (CACHE_BUCKETS - 1);
264}
265
266static int cache_compare(unsigned long *key, hash_count_t keys, link_t *item)
267{
268 block_t *b = hash_table_get_instance(item, block_t, hash_link);
[a6ba0c9]269 return b->lba == *key;
[f1ba5d6]270}
271
272static void cache_remove_callback(link_t *item)
273{
274}
275
276static hash_table_operations_t cache_ops = {
277 .hash = cache_hash,
278 .compare = cache_compare,
279 .remove_callback = cache_remove_callback
280};
281
[991f645]282int block_cache_init(devmap_handle_t devmap_handle, size_t size, unsigned blocks,
[1fbe064b]283 enum cache_mode mode)
[f1ba5d6]284{
[991f645]285 devcon_t *devcon = devcon_search(devmap_handle);
[f1ba5d6]286 cache_t *cache;
287 if (!devcon)
288 return ENOENT;
289 if (devcon->cache)
290 return EEXIST;
291 cache = malloc(sizeof(cache_t));
292 if (!cache)
293 return ENOMEM;
294
[4e1b57d]295 fibril_mutex_initialize(&cache->lock);
[f1ba5d6]296 list_initialize(&cache->free_head);
[1ee00b7]297 cache->lblock_size = size;
[f1ba5d6]298 cache->block_count = blocks;
[d68e4d5]299 cache->blocks_cached = 0;
[1fbe064b]300 cache->mode = mode;
[f1ba5d6]301
[f092718]302 /* Allow 1:1 or small-to-large block size translation */
[37cf3792]303 if (cache->lblock_size % devcon->pblock_size != 0) {
304 free(cache);
[f092718]305 return ENOTSUP;
[37cf3792]306 }
[f092718]307
308 cache->blocks_cluster = cache->lblock_size / devcon->pblock_size;
[1ee00b7]309
[f1ba5d6]310 if (!hash_table_create(&cache->block_hash, CACHE_BUCKETS, 1,
311 &cache_ops)) {
312 free(cache);
313 return ENOMEM;
314 }
315
316 devcon->cache = cache;
317 return EOK;
318}
319
[991f645]320int block_cache_fini(devmap_handle_t devmap_handle)
[64bc4b6]321{
[991f645]322 devcon_t *devcon = devcon_search(devmap_handle);
[64bc4b6]323 cache_t *cache;
324 int rc;
325
326 if (!devcon)
327 return ENOENT;
328 if (!devcon->cache)
329 return EOK;
330 cache = devcon->cache;
331
332 /*
333 * We are expecting to find all blocks for this device handle on the
334 * free list, i.e. the block reference count should be zero. Do not
335 * bother with the cache and block locks because we are single-threaded.
336 */
337 while (!list_empty(&cache->free_head)) {
338 block_t *b = list_get_instance(cache->free_head.next,
339 block_t, free_link);
340
341 list_remove(&b->free_link);
342 if (b->dirty) {
343 memcpy(devcon->comm_area, b->data, b->size);
[f092718]344 rc = write_blocks(devcon, b->pba, cache->blocks_cluster);
[64bc4b6]345 if (rc != EOK)
346 return rc;
347 }
348
[a6ba0c9]349 unsigned long key = b->lba;
[64bc4b6]350 hash_table_remove(&cache->block_hash, &key, 1);
351
352 free(b->data);
353 free(b);
354 }
355
356 hash_table_destroy(&cache->block_hash);
357 devcon->cache = NULL;
358 free(cache);
359
360 return EOK;
361}
362
[d68e4d5]363#define CACHE_LO_WATERMARK 10
364#define CACHE_HI_WATERMARK 20
[e1c88d5]365static bool cache_can_grow(cache_t *cache)
[fc840d9]366{
[d68e4d5]367 if (cache->blocks_cached < CACHE_LO_WATERMARK)
368 return true;
369 if (!list_empty(&cache->free_head))
370 return false;
[e1c88d5]371 return true;
372}
373
374static void block_initialize(block_t *b)
375{
[4e1b57d]376 fibril_mutex_initialize(&b->lock);
[e1c88d5]377 b->refcnt = 1;
378 b->dirty = false;
[cd688d9]379 b->toxic = false;
[4e1b57d]380 fibril_rwlock_initialize(&b->contents_lock);
[e1c88d5]381 link_initialize(&b->free_link);
382 link_initialize(&b->hash_link);
383}
384
385/** Instantiate a block in memory and get a reference to it.
386 *
[c91f2d1b]387 * @param block Pointer to where the function will store the
388 * block pointer on success.
[991f645]389 * @param devmap_handle Device handle of the block device.
[a6ba0c9]390 * @param ba Block address (logical).
[1d8cdb1]391 * @param flags If BLOCK_FLAGS_NOREAD is specified, block_get()
392 * will not read the contents of the block from the
393 * device.
[e1c88d5]394 *
[c91f2d1b]395 * @return EOK on success or a negative error code.
[e1c88d5]396 */
[a6ba0c9]397int block_get(block_t **block, devmap_handle_t devmap_handle, aoff64_t ba, int flags)
[e1c88d5]398{
399 devcon_t *devcon;
400 cache_t *cache;
[fc840d9]401 block_t *b;
[e1c88d5]402 link_t *l;
[a6ba0c9]403 unsigned long key = ba;
[b7b3fda]404 int rc;
[e1c88d5]405
[991f645]406 devcon = devcon_search(devmap_handle);
[fc840d9]407
[e1c88d5]408 assert(devcon);
409 assert(devcon->cache);
[fc840d9]410
[e1c88d5]411 cache = devcon->cache;
[02ee6bf5]412
413retry:
[b7b3fda]414 rc = EOK;
[4f690cd]415 b = NULL;
[b7b3fda]416
[4e1b57d]417 fibril_mutex_lock(&cache->lock);
[e1c88d5]418 l = hash_table_find(&cache->block_hash, &key);
419 if (l) {
[5716e9a]420found:
[e1c88d5]421 /*
422 * We found the block in the cache.
423 */
424 b = hash_table_get_instance(l, block_t, hash_link);
[4e1b57d]425 fibril_mutex_lock(&b->lock);
[e1c88d5]426 if (b->refcnt++ == 0)
427 list_remove(&b->free_link);
[402a18f]428 if (b->toxic)
429 rc = EIO;
[4e1b57d]430 fibril_mutex_unlock(&b->lock);
431 fibril_mutex_unlock(&cache->lock);
[e1c88d5]432 } else {
433 /*
434 * The block was not found in the cache.
435 */
436 if (cache_can_grow(cache)) {
437 /*
438 * We can grow the cache by allocating new blocks.
439 * Should the allocation fail, we fail over and try to
440 * recycle a block from the cache.
441 */
442 b = malloc(sizeof(block_t));
443 if (!b)
444 goto recycle;
[1ee00b7]445 b->data = malloc(cache->lblock_size);
[e1c88d5]446 if (!b->data) {
447 free(b);
[0dfaa099]448 b = NULL;
[e1c88d5]449 goto recycle;
450 }
[d68e4d5]451 cache->blocks_cached++;
[e1c88d5]452 } else {
453 /*
454 * Try to recycle a block from the free list.
455 */
456 unsigned long temp_key;
457recycle:
[7a56b1ed]458 if (list_empty(&cache->free_head)) {
459 fibril_mutex_unlock(&cache->lock);
460 rc = ENOMEM;
461 goto out;
462 }
[e1c88d5]463 l = cache->free_head.next;
[d68e4d5]464 b = list_get_instance(l, block_t, free_link);
[02ee6bf5]465
466 fibril_mutex_lock(&b->lock);
467 if (b->dirty) {
468 /*
469 * The block needs to be written back to the
470 * device before it changes identity. Do this
471 * while not holding the cache lock so that
472 * concurrency is not impeded. Also move the
473 * block to the end of the free list so that we
474 * do not slow down other instances of
475 * block_get() draining the free list.
476 */
477 list_remove(&b->free_link);
478 list_append(&b->free_link, &cache->free_head);
479 fibril_mutex_unlock(&cache->lock);
[a830611]480 fibril_mutex_lock(&devcon->comm_area_lock);
481 memcpy(devcon->comm_area, b->data, b->size);
[f092718]482 rc = write_blocks(devcon, b->pba,
483 cache->blocks_cluster);
[a830611]484 fibril_mutex_unlock(&devcon->comm_area_lock);
[402a18f]485 if (rc != EOK) {
486 /*
487 * We did not manage to write the block
488 * to the device. Keep it around for
489 * another try. Hopefully, we will grab
490 * another block next time.
491 */
492 fibril_mutex_unlock(&b->lock);
493 goto retry;
494 }
[02ee6bf5]495 b->dirty = false;
496 if (!fibril_mutex_trylock(&cache->lock)) {
497 /*
498 * Somebody is probably racing with us.
499 * Unlock the block and retry.
500 */
501 fibril_mutex_unlock(&b->lock);
502 goto retry;
503 }
[5716e9a]504 l = hash_table_find(&cache->block_hash, &key);
505 if (l) {
506 /*
507 * Someone else must have already
508 * instantiated the block while we were
509 * not holding the cache lock.
510 * Leave the recycled block on the
511 * freelist and continue as if we
512 * found the block of interest during
513 * the first try.
514 */
515 fibril_mutex_unlock(&b->lock);
516 goto found;
517 }
[02ee6bf5]518
519 }
520 fibril_mutex_unlock(&b->lock);
521
522 /*
523 * Unlink the block from the free list and the hash
524 * table.
525 */
526 list_remove(&b->free_link);
[a6ba0c9]527 temp_key = b->lba;
[e1c88d5]528 hash_table_remove(&cache->block_hash, &temp_key, 1);
529 }
[fc840d9]530
[e1c88d5]531 block_initialize(b);
[991f645]532 b->devmap_handle = devmap_handle;
[1ee00b7]533 b->size = cache->lblock_size;
[a6ba0c9]534 b->lba = ba;
535 b->pba = ba_ltop(devcon, b->lba);
[a6d97fb9]536 hash_table_insert(&cache->block_hash, &key, &b->hash_link);
537
538 /*
539 * Lock the block before releasing the cache lock. Thus we don't
[5ac8918]540 * kill concurrent operations on the cache while doing I/O on
541 * the block.
[a6d97fb9]542 */
[4e1b57d]543 fibril_mutex_lock(&b->lock);
544 fibril_mutex_unlock(&cache->lock);
[a6d97fb9]545
[1d8cdb1]546 if (!(flags & BLOCK_FLAGS_NOREAD)) {
547 /*
548 * The block contains old or no data. We need to read
549 * the new contents from the device.
550 */
[a830611]551 fibril_mutex_lock(&devcon->comm_area_lock);
[f092718]552 rc = read_blocks(devcon, b->pba, cache->blocks_cluster);
[a830611]553 memcpy(b->data, devcon->comm_area, cache->lblock_size);
554 fibril_mutex_unlock(&devcon->comm_area_lock);
[402a18f]555 if (rc != EOK)
556 b->toxic = true;
557 } else
558 rc = EOK;
[fc840d9]559
[4e1b57d]560 fibril_mutex_unlock(&b->lock);
[a6d97fb9]561 }
[7a56b1ed]562out:
[4f690cd]563 if ((rc != EOK) && b) {
564 assert(b->toxic);
565 (void) block_put(b);
566 b = NULL;
567 }
[c91f2d1b]568 *block = b;
[402a18f]569 return rc;
[fc840d9]570}
571
[d5a720cf]572/** Release a reference to a block.
573 *
[a6d97fb9]574 * If the last reference is dropped, the block is put on the free list.
[d5a720cf]575 *
576 * @param block Block of which a reference is to be released.
[c91f2d1b]577 *
578 * @return EOK on success or a negative error code.
[d5a720cf]579 */
[c91f2d1b]580int block_put(block_t *block)
[fc840d9]581{
[991f645]582 devcon_t *devcon = devcon_search(block->devmap_handle);
[d5a720cf]583 cache_t *cache;
[ddfc39a3]584 unsigned blocks_cached;
585 enum cache_mode mode;
[402a18f]586 int rc = EOK;
[d5a720cf]587
588 assert(devcon);
589 assert(devcon->cache);
[0f1cf7a]590 assert(block->refcnt >= 1);
[d5a720cf]591
592 cache = devcon->cache;
[ddfc39a3]593
594retry:
595 fibril_mutex_lock(&cache->lock);
596 blocks_cached = cache->blocks_cached;
597 mode = cache->mode;
598 fibril_mutex_unlock(&cache->lock);
599
600 /*
601 * Determine whether to sync the block. Syncing the block is best done
602 * when not holding the cache lock as it does not impede concurrency.
603 * Since the situation may have changed when we unlocked the cache, the
604 * blocks_cached and mode variables are mere hints. We will recheck the
605 * conditions later when the cache lock is held again.
606 */
607 fibril_mutex_lock(&block->lock);
[402a18f]608 if (block->toxic)
609 block->dirty = false; /* will not write back toxic block */
[ddfc39a3]610 if (block->dirty && (block->refcnt == 1) &&
611 (blocks_cached > CACHE_HI_WATERMARK || mode != CACHE_MODE_WB)) {
[a830611]612 fibril_mutex_lock(&devcon->comm_area_lock);
613 memcpy(devcon->comm_area, block->data, block->size);
[f092718]614 rc = write_blocks(devcon, block->pba, cache->blocks_cluster);
[a830611]615 fibril_mutex_unlock(&devcon->comm_area_lock);
[ddfc39a3]616 block->dirty = false;
617 }
618 fibril_mutex_unlock(&block->lock);
619
[4e1b57d]620 fibril_mutex_lock(&cache->lock);
621 fibril_mutex_lock(&block->lock);
[d5a720cf]622 if (!--block->refcnt) {
623 /*
[d68e4d5]624 * Last reference to the block was dropped. Either free the
[402a18f]625 * block or put it on the free list. In case of an I/O error,
626 * free the block.
[d68e4d5]627 */
[402a18f]628 if ((cache->blocks_cached > CACHE_HI_WATERMARK) ||
629 (rc != EOK)) {
[d68e4d5]630 /*
[402a18f]631 * Currently there are too many cached blocks or there
632 * was an I/O error when writing the block back to the
633 * device.
[d68e4d5]634 */
635 if (block->dirty) {
[ddfc39a3]636 /*
637 * We cannot sync the block while holding the
638 * cache lock. Release everything and retry.
639 */
640 block->refcnt++;
641 fibril_mutex_unlock(&block->lock);
642 fibril_mutex_unlock(&cache->lock);
643 goto retry;
[d68e4d5]644 }
645 /*
646 * Take the block out of the cache and free it.
647 */
[a6ba0c9]648 unsigned long key = block->lba;
[d68e4d5]649 hash_table_remove(&cache->block_hash, &key, 1);
[956d4df8]650 fibril_mutex_unlock(&block->lock);
[d68e4d5]651 free(block->data);
[b9e6205]652 free(block);
[d68e4d5]653 cache->blocks_cached--;
654 fibril_mutex_unlock(&cache->lock);
[402a18f]655 return rc;
[d68e4d5]656 }
657 /*
658 * Put the block on the free list.
[d5a720cf]659 */
[1fbe064b]660 if (cache->mode != CACHE_MODE_WB && block->dirty) {
[ddfc39a3]661 /*
662 * We cannot sync the block while holding the cache
663 * lock. Release everything and retry.
664 */
665 block->refcnt++;
666 fibril_mutex_unlock(&block->lock);
667 fibril_mutex_unlock(&cache->lock);
668 goto retry;
[1fbe064b]669 }
[ddfc39a3]670 list_append(&block->free_link, &cache->free_head);
[d5a720cf]671 }
[4e1b57d]672 fibril_mutex_unlock(&block->lock);
673 fibril_mutex_unlock(&cache->lock);
[c91f2d1b]674
[402a18f]675 return rc;
[d5a720cf]676}
677
[6408be3]678/** Read sequential data from a block device.
[d5a720cf]679 *
[991f645]680 * @param devmap_handle Device handle of the block device.
[d5a720cf]681 * @param bufpos Pointer to the first unread valid offset within the
682 * communication buffer.
683 * @param buflen Pointer to the number of unread bytes that are ready in
684 * the communication buffer.
685 * @param pos Device position to be read.
686 * @param dst Destination buffer.
687 * @param size Size of the destination buffer.
688 * @param block_size Block size to be used for the transfer.
689 *
690 * @return EOK on success or a negative return code on failure.
691 */
[991f645]692int block_seqread(devmap_handle_t devmap_handle, size_t *bufpos, size_t *buflen,
[ed903174]693 aoff64_t *pos, void *dst, size_t size)
[d5a720cf]694{
[ed903174]695 size_t offset = 0;
[d5a720cf]696 size_t left = size;
[1ee00b7]697 size_t block_size;
698 devcon_t *devcon;
699
[991f645]700 devcon = devcon_search(devmap_handle);
[d5a720cf]701 assert(devcon);
[1ee00b7]702 block_size = devcon->pblock_size;
[e1c88d5]703
[a830611]704 fibril_mutex_lock(&devcon->comm_area_lock);
[d5a720cf]705 while (left > 0) {
706 size_t rd;
707
708 if (*bufpos + left < *buflen)
709 rd = left;
710 else
711 rd = *buflen - *bufpos;
712
713 if (rd > 0) {
714 /*
715 * Copy the contents of the communication buffer to the
716 * destination buffer.
717 */
[a830611]718 memcpy(dst + offset, devcon->comm_area + *bufpos, rd);
[d5a720cf]719 offset += rd;
720 *bufpos += rd;
721 *pos += rd;
722 left -= rd;
723 }
724
[ed903174]725 if (*bufpos == *buflen) {
[d5a720cf]726 /* Refill the communication buffer with a new block. */
[6408be3]727 int rc;
728
[1ee00b7]729 rc = read_blocks(devcon, *pos / block_size, 1);
[d68e4d5]730 if (rc != EOK) {
[a830611]731 fibril_mutex_unlock(&devcon->comm_area_lock);
[6408be3]732 return rc;
[d68e4d5]733 }
[d5a720cf]734
735 *bufpos = 0;
736 *buflen = block_size;
737 }
738 }
[a830611]739 fibril_mutex_unlock(&devcon->comm_area_lock);
[d5a720cf]740
741 return EOK;
[fc840d9]742}
743
[00b1d20e]744/** Read blocks directly from device (bypass cache).
745 *
[991f645]746 * @param devmap_handle Device handle of the block device.
[a6ba0c9]747 * @param ba Address of first block (physical).
[00b1d20e]748 * @param cnt Number of blocks.
749 * @param src Buffer for storing the data.
750 *
751 * @return EOK on success or negative error code on failure.
752 */
[991f645]753int block_read_direct(devmap_handle_t devmap_handle, aoff64_t ba, size_t cnt, void *buf)
[00b1d20e]754{
755 devcon_t *devcon;
756 int rc;
757
[991f645]758 devcon = devcon_search(devmap_handle);
[00b1d20e]759 assert(devcon);
760
761 fibril_mutex_lock(&devcon->comm_area_lock);
762
763 rc = read_blocks(devcon, ba, cnt);
764 if (rc == EOK)
765 memcpy(buf, devcon->comm_area, devcon->pblock_size * cnt);
766
767 fibril_mutex_unlock(&devcon->comm_area_lock);
768
769 return rc;
770}
771
772/** Write blocks directly to device (bypass cache).
773 *
[991f645]774 * @param devmap_handle Device handle of the block device.
[a6ba0c9]775 * @param ba Address of first block (physical).
[00b1d20e]776 * @param cnt Number of blocks.
777 * @param src The data to be written.
778 *
779 * @return EOK on success or negative error code on failure.
780 */
[991f645]781int block_write_direct(devmap_handle_t devmap_handle, aoff64_t ba, size_t cnt,
[00b1d20e]782 const void *data)
783{
784 devcon_t *devcon;
785 int rc;
786
[991f645]787 devcon = devcon_search(devmap_handle);
[00b1d20e]788 assert(devcon);
789
790 fibril_mutex_lock(&devcon->comm_area_lock);
791
792 memcpy(devcon->comm_area, data, devcon->pblock_size * cnt);
[dccf721]793 rc = write_blocks(devcon, ba, cnt);
[00b1d20e]794
795 fibril_mutex_unlock(&devcon->comm_area_lock);
796
797 return rc;
798}
799
800/** Get device block size.
801 *
[991f645]802 * @param devmap_handle Device handle of the block device.
[00b1d20e]803 * @param bsize Output block size.
804 *
805 * @return EOK on success or negative error code on failure.
806 */
[991f645]807int block_get_bsize(devmap_handle_t devmap_handle, size_t *bsize)
[00b1d20e]808{
809 devcon_t *devcon;
810
[991f645]811 devcon = devcon_search(devmap_handle);
[00b1d20e]812 assert(devcon);
813
[79ae36dd]814 return get_block_size(devcon->sess, bsize);
[00b1d20e]815}
816
[08232ee]817/** Get number of blocks on device.
818 *
[991f645]819 * @param devmap_handle Device handle of the block device.
[08232ee]820 * @param nblocks Output number of blocks.
821 *
822 * @return EOK on success or negative error code on failure.
823 */
[991f645]824int block_get_nblocks(devmap_handle_t devmap_handle, aoff64_t *nblocks)
[08232ee]825{
[79ae36dd]826 devcon_t *devcon = devcon_search(devmap_handle);
[08232ee]827 assert(devcon);
828
[79ae36dd]829 return get_num_blocks(devcon->sess, nblocks);
[08232ee]830}
831
[e272949]832/** Read bytes directly from the device (bypass cache)
833 *
834 * @param devmap_handle Device handle of the block device.
835 * @param abs_offset Absolute offset in bytes where to start reading
836 * @param bytes Number of bytes to read
837 * @param data Buffer that receives the data
838 *
839 * @return EOK on success or negative error code on failure.
840 */
841int block_read_bytes_direct(devmap_handle_t devmap_handle, aoff64_t abs_offset,
842 size_t bytes, void *data)
843{
844 int rc;
845 size_t phys_block_size;
846 size_t buf_size;
847 void *buffer;
848 aoff64_t first_block;
849 aoff64_t last_block;
850 size_t blocks;
851 size_t offset;
852
853 rc = block_get_bsize(devmap_handle, &phys_block_size);
854 if (rc != EOK) {
855 return rc;
856 }
857
[c4aa9cf]858 /* calculate data position and required space */
[e272949]859 first_block = abs_offset / phys_block_size;
860 offset = abs_offset % phys_block_size;
861 last_block = (abs_offset + bytes - 1) / phys_block_size;
862 blocks = last_block - first_block + 1;
863 buf_size = blocks * phys_block_size;
864
[c4aa9cf]865 /* read the data into memory */
[e272949]866 buffer = malloc(buf_size);
867 if (buffer == NULL) {
868 return ENOMEM;
869 }
870
871 rc = block_read_direct(devmap_handle, first_block, blocks, buffer);
872 if (rc != EOK) {
873 free(buffer);
874 return rc;
875 }
876
[c4aa9cf]877 /* copy the data from the buffer */
[e272949]878 memcpy(data, buffer + offset, bytes);
879 free(buffer);
880
881 return EOK;
882}
883
[1ee00b7]884/** Read blocks from block device.
[6408be3]885 *
886 * @param devcon Device connection.
[1ee00b7]887 * @param ba Address of first block.
888 * @param cnt Number of blocks.
[6408be3]889 * @param src Buffer for storing the data.
890 *
891 * @return EOK on success or negative error code on failure.
892 */
[ed903174]893static int read_blocks(devcon_t *devcon, aoff64_t ba, size_t cnt)
[6408be3]894{
895 assert(devcon);
[79ae36dd]896
897 async_exch_t *exch = async_exchange_begin(devcon->sess);
898 int rc = async_req_3_0(exch, BD_READ_BLOCKS, LOWER32(ba),
[1ee00b7]899 UPPER32(ba), cnt);
[79ae36dd]900 async_exchange_end(exch);
901
[16fc3c9]902 if (rc != EOK) {
[7e752b2]903 printf("Error %d reading %zu blocks starting at block %" PRIuOFF64
904 " from device handle %" PRIun "\n", rc, cnt, ba,
[991f645]905 devcon->devmap_handle);
[16fc3c9]906#ifndef NDEBUG
907 stacktrace_print();
908#endif
909 }
[79ae36dd]910
[1ee00b7]911 return rc;
[6408be3]912}
913
[1fbe064b]914/** Write block to block device.
915 *
916 * @param devcon Device connection.
[1ee00b7]917 * @param ba Address of first block.
918 * @param cnt Number of blocks.
[1fbe064b]919 * @param src Buffer containing the data to write.
920 *
921 * @return EOK on success or negative error code on failure.
922 */
[ed903174]923static int write_blocks(devcon_t *devcon, aoff64_t ba, size_t cnt)
[1fbe064b]924{
925 assert(devcon);
[79ae36dd]926
927 async_exch_t *exch = async_exchange_begin(devcon->sess);
928 int rc = async_req_3_0(exch, BD_WRITE_BLOCKS, LOWER32(ba),
[1ee00b7]929 UPPER32(ba), cnt);
[79ae36dd]930 async_exchange_end(exch);
931
[16fc3c9]932 if (rc != EOK) {
[7e752b2]933 printf("Error %d writing %zu blocks starting at block %" PRIuOFF64
934 " to device handle %" PRIun "\n", rc, cnt, ba, devcon->devmap_handle);
[16fc3c9]935#ifndef NDEBUG
936 stacktrace_print();
937#endif
938 }
[79ae36dd]939
[1ee00b7]940 return rc;
941}
[1fbe064b]942
[1ee00b7]943/** Get block size used by the device. */
[79ae36dd]944static int get_block_size(async_sess_t *sess, size_t *bsize)
[1ee00b7]945{
[96b02eb9]946 sysarg_t bs;
[79ae36dd]947
948 async_exch_t *exch = async_exchange_begin(sess);
949 int rc = async_req_0_1(exch, BD_GET_BLOCK_SIZE, &bs);
950 async_exchange_end(exch);
951
[1ee00b7]952 if (rc == EOK)
953 *bsize = (size_t) bs;
[79ae36dd]954
[1ee00b7]955 return rc;
[1fbe064b]956}
957
[08232ee]958/** Get total number of blocks on block device. */
[79ae36dd]959static int get_num_blocks(async_sess_t *sess, aoff64_t *nblocks)
[08232ee]960{
[79ae36dd]961 sysarg_t nb_l;
962 sysarg_t nb_h;
963
964 async_exch_t *exch = async_exchange_begin(sess);
965 int rc = async_req_0_2(exch, BD_GET_NUM_BLOCKS, &nb_l, &nb_h);
966 async_exchange_end(exch);
967
968 if (rc == EOK)
[ed903174]969 *nblocks = (aoff64_t) MERGE_LOUP32(nb_l, nb_h);
[79ae36dd]970
[08232ee]971 return rc;
972}
973
[f092718]974/** Convert logical block address to physical block address. */
975static aoff64_t ba_ltop(devcon_t *devcon, aoff64_t lba)
976{
977 assert(devcon->cache != NULL);
978 return lba * devcon->cache->blocks_cluster;
979}
980
[fc840d9]981/** @}
982 */
Note: See TracBrowser for help on using the repository browser.