558 lines
14 KiB
C
558 lines
14 KiB
C
|
/* xdelta3 - delta compression tools and library
|
||
|
Copyright 2016 Joshua MacDonald
|
||
|
|
||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
you may not use this file except in compliance with the License.
|
||
|
You may obtain a copy of the License at
|
||
|
|
||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
Unless required by applicable law or agreed to in writing, software
|
||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
See the License for the specific language governing permissions and
|
||
|
limitations under the License.
|
||
|
*/
|
||
|
|
||
|
#include "xdelta3-internal.h"
|
||
|
|
||
|
typedef struct _main_blklru main_blklru;
|
||
|
typedef struct _main_blklru_list main_blklru_list;
|
||
|
|
||
|
|
||
|
#define XD3_INVALID_OFFSET XOFF_T_MAX
|
||
|
|
||
|
struct _main_blklru_list
|
||
|
{
|
||
|
main_blklru_list *next;
|
||
|
main_blklru_list *prev;
|
||
|
};
|
||
|
|
||
|
struct _main_blklru
|
||
|
{
|
||
|
uint8_t *blk;
|
||
|
xoff_t blkno;
|
||
|
usize_t size;
|
||
|
main_blklru_list link;
|
||
|
};
|
||
|
|
||
|
XD3_MAKELIST(main_blklru_list,main_blklru,link);
|
||
|
|
||
|
static usize_t lru_size = 0;
|
||
|
static main_blklru *lru = NULL; /* array of lru_size elts */
|
||
|
static main_blklru_list lru_list;
|
||
|
static int do_src_fifo = 0; /* set to avoid lru */
|
||
|
|
||
|
static int lru_hits = 0;
|
||
|
static int lru_misses = 0;
|
||
|
static int lru_filled = 0;
|
||
|
|
||
|
static void main_lru_reset (void)
|
||
|
{
|
||
|
lru_size = 0;
|
||
|
lru = NULL;
|
||
|
do_src_fifo = 0;
|
||
|
lru_hits = 0;
|
||
|
lru_misses = 0;
|
||
|
lru_filled = 0;
|
||
|
}
|
||
|
|
||
|
static void main_lru_cleanup (void)
|
||
|
{
|
||
|
if (lru != NULL)
|
||
|
{
|
||
|
main_buffree (lru[0].blk);
|
||
|
}
|
||
|
|
||
|
main_free (lru);
|
||
|
lru = NULL;
|
||
|
|
||
|
lru_hits = 0;
|
||
|
lru_misses = 0;
|
||
|
lru_filled = 0;
|
||
|
}
|
||
|
|
||
|
/* This is called at different times for encoding and decoding. The
|
||
|
* encoder calls it immediately, the decoder delays until the
|
||
|
* application header is received. */
|
||
|
static int
|
||
|
main_set_source (xd3_stream *stream, xd3_cmd cmd,
|
||
|
main_file *sfile, xd3_source *source)
|
||
|
{
|
||
|
int ret = 0;
|
||
|
usize_t i;
|
||
|
xoff_t source_size = 0;
|
||
|
usize_t blksize;
|
||
|
|
||
|
XD3_ASSERT (lru == NULL);
|
||
|
XD3_ASSERT (stream->src == NULL);
|
||
|
XD3_ASSERT (option_srcwinsz >= XD3_MINSRCWINSZ);
|
||
|
|
||
|
/* TODO: this code needs refactoring into FIFO, LRU, FAKE. Yuck!
|
||
|
* This is simplified from 3.0z which had issues with sizing the
|
||
|
* source buffer memory allocation and the source blocksize. */
|
||
|
|
||
|
/* LRU-specific */
|
||
|
main_blklru_list_init (& lru_list);
|
||
|
|
||
|
if (allow_fake_source)
|
||
|
{
|
||
|
/* TODO: refactor
|
||
|
* TOOLS/recode-specific: Check "allow_fake_source" mode looks
|
||
|
* broken now. */
|
||
|
sfile->mode = XO_READ;
|
||
|
sfile->realname = sfile->filename;
|
||
|
sfile->nread = 0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* Either a regular file (possibly compressed) or a FIFO
|
||
|
* (possibly compressed). */
|
||
|
if ((ret = main_file_open (sfile, sfile->filename, XO_READ)))
|
||
|
{
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/* If the file is regular we know it's size. If the file turns
|
||
|
* out to be externally compressed, size_known may change. */
|
||
|
sfile->size_known = (main_file_stat (sfile, &source_size) == 0);
|
||
|
}
|
||
|
|
||
|
/* Note: The API requires a power-of-two blocksize and srcwinsz
|
||
|
* (-B). The logic here will use a single block if the entire file
|
||
|
* is known to fit into srcwinsz. */
|
||
|
option_srcwinsz = xd3_xoff_roundup (option_srcwinsz);
|
||
|
|
||
|
/* Though called "lru", it is not LRU-specific. We always allocate
|
||
|
* a maximum number of source block buffers. If the entire file
|
||
|
* fits into srcwinsz, this buffer will stay as the only
|
||
|
* (lru_size==1) source block. Otherwise, we know that at least
|
||
|
* option_srcwinsz bytes are available. Split the source window
|
||
|
* into buffers. */
|
||
|
if ((lru = (main_blklru*) main_malloc (MAX_LRU_SIZE *
|
||
|
sizeof (main_blklru))) == NULL)
|
||
|
{
|
||
|
ret = ENOMEM;
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
memset (lru, 0, sizeof(lru[0]) * MAX_LRU_SIZE);
|
||
|
|
||
|
/* Allocate the entire buffer. */
|
||
|
if ((lru[0].blk = (uint8_t*) main_bufalloc (option_srcwinsz)) == NULL)
|
||
|
{
|
||
|
ret = ENOMEM;
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/* Main calls main_getblk_func() once before xd3_set_source(). This
|
||
|
* is the point at which external decompression may begin. Set the
|
||
|
* system for a single block. */
|
||
|
lru_size = 1;
|
||
|
lru[0].blkno = XD3_INVALID_OFFSET;
|
||
|
blksize = option_srcwinsz;
|
||
|
main_blklru_list_push_back (& lru_list, & lru[0]);
|
||
|
XD3_ASSERT (blksize != 0);
|
||
|
|
||
|
/* Initialize xd3_source. */
|
||
|
source->blksize = blksize;
|
||
|
source->name = sfile->filename;
|
||
|
source->ioh = sfile;
|
||
|
source->curblkno = XD3_INVALID_OFFSET;
|
||
|
source->curblk = NULL;
|
||
|
source->max_winsize = option_srcwinsz;
|
||
|
|
||
|
if ((ret = main_getblk_func (stream, source, 0)) != 0)
|
||
|
{
|
||
|
XPR(NT "error reading source: %s: %s\n",
|
||
|
sfile->filename,
|
||
|
xd3_mainerror (ret));
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
source->onblk = lru[0].size; /* xd3 sets onblk */
|
||
|
|
||
|
/* If the file is smaller than a block, size is known. */
|
||
|
if (!sfile->size_known && source->onblk < blksize)
|
||
|
{
|
||
|
source_size = source->onblk;
|
||
|
source->onlastblk = source_size;
|
||
|
sfile->size_known = 1;
|
||
|
}
|
||
|
|
||
|
/* If the size is not known or is greater than the buffer size, we
|
||
|
* split the buffer across MAX_LRU_SIZE blocks (already allocated in
|
||
|
* "lru"). */
|
||
|
if (!sfile->size_known || source_size > option_srcwinsz)
|
||
|
{
|
||
|
/* Modify block 0, change blocksize. */
|
||
|
blksize = option_srcwinsz / MAX_LRU_SIZE;
|
||
|
source->blksize = blksize;
|
||
|
source->onblk = blksize;
|
||
|
source->onlastblk = blksize;
|
||
|
source->max_blkno = MAX_LRU_SIZE - 1;
|
||
|
|
||
|
lru[0].size = blksize;
|
||
|
lru_size = MAX_LRU_SIZE;
|
||
|
|
||
|
/* Setup rest of blocks. */
|
||
|
for (i = 1; i < lru_size; i += 1)
|
||
|
{
|
||
|
lru[i].blk = lru[0].blk + (blksize * i);
|
||
|
lru[i].blkno = i;
|
||
|
lru[i].size = blksize;
|
||
|
main_blklru_list_push_back (& lru_list, & lru[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (! sfile->size_known)
|
||
|
{
|
||
|
/* If the size is not know, we must use FIFO discipline. */
|
||
|
do_src_fifo = 1;
|
||
|
}
|
||
|
|
||
|
/* Call the appropriate set_source method, handle errors, print
|
||
|
* verbose message, etc. */
|
||
|
if (sfile->size_known)
|
||
|
{
|
||
|
ret = xd3_set_source_and_size (stream, source, source_size);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
ret = xd3_set_source (stream, source);
|
||
|
}
|
||
|
|
||
|
if (ret)
|
||
|
{
|
||
|
XPR(NT XD3_LIB_ERRMSG (stream, ret));
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
XD3_ASSERT (stream->src == source);
|
||
|
XD3_ASSERT (source->blksize == blksize);
|
||
|
|
||
|
if (option_verbose)
|
||
|
{
|
||
|
static shortbuf srcszbuf;
|
||
|
static shortbuf srccntbuf;
|
||
|
static shortbuf winszbuf;
|
||
|
static shortbuf blkszbuf;
|
||
|
static shortbuf nbufs;
|
||
|
|
||
|
if (sfile->size_known)
|
||
|
{
|
||
|
short_sprintf (srcszbuf, "source size %s [%"Q"u]",
|
||
|
main_format_bcnt (source_size, &srccntbuf),
|
||
|
source_size);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
short_sprintf (srcszbuf, "%s", "source size unknown");
|
||
|
}
|
||
|
|
||
|
nbufs.buf[0] = 0;
|
||
|
|
||
|
if (option_verbose > 1)
|
||
|
{
|
||
|
short_sprintf (nbufs, " #bufs %"W"u", lru_size);
|
||
|
}
|
||
|
|
||
|
XPR(NT "source %s %s blksize %s window %s%s%s\n",
|
||
|
sfile->filename,
|
||
|
srcszbuf.buf,
|
||
|
main_format_bcnt (blksize, &blkszbuf),
|
||
|
main_format_bcnt (option_srcwinsz, &winszbuf),
|
||
|
nbufs.buf,
|
||
|
do_src_fifo ? " (FIFO)" : "");
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
main_getblk_lru (xd3_source *source, xoff_t blkno,
|
||
|
main_blklru** blrup, int *is_new)
|
||
|
{
|
||
|
main_blklru *blru = NULL;
|
||
|
usize_t i;
|
||
|
|
||
|
(*is_new) = 0;
|
||
|
|
||
|
if (do_src_fifo)
|
||
|
{
|
||
|
/* Direct lookup assumes sequential scan w/o skipping blocks. */
|
||
|
int idx = blkno % lru_size;
|
||
|
blru = & lru[idx];
|
||
|
if (blru->blkno == blkno)
|
||
|
{
|
||
|
(*blrup) = blru;
|
||
|
return 0;
|
||
|
}
|
||
|
/* No going backwards in a sequential scan. */
|
||
|
if (blru->blkno != XD3_INVALID_OFFSET && blru->blkno > blkno)
|
||
|
{
|
||
|
return XD3_TOOFARBACK;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* Sequential search through LRU. */
|
||
|
for (i = 0; i < lru_size; i += 1)
|
||
|
{
|
||
|
blru = & lru[i];
|
||
|
if (blru->blkno == blkno)
|
||
|
{
|
||
|
main_blklru_list_remove (blru);
|
||
|
main_blklru_list_push_back (& lru_list, blru);
|
||
|
(*blrup) = blru;
|
||
|
IF_DEBUG1 (DP(RINT "[getblk_lru] HIT blkno = %"Q"u lru_size=%"W"u\n",
|
||
|
blkno, lru_size));
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
IF_DEBUG1 (DP(RINT "[getblk_lru] MISS blkno = %"Q"u lru_size=%"W"u\n",
|
||
|
blkno, lru_size));
|
||
|
}
|
||
|
|
||
|
if (do_src_fifo)
|
||
|
{
|
||
|
int idx = blkno % lru_size;
|
||
|
blru = & lru[idx];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
XD3_ASSERT (! main_blklru_list_empty (& lru_list));
|
||
|
blru = main_blklru_list_pop_front (& lru_list);
|
||
|
main_blklru_list_push_back (& lru_list, blru);
|
||
|
}
|
||
|
|
||
|
lru_filled += 1;
|
||
|
(*is_new) = 1;
|
||
|
(*blrup) = blru;
|
||
|
blru->blkno = XD3_INVALID_OFFSET;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
main_read_seek_source (xd3_stream *stream,
|
||
|
xd3_source *source,
|
||
|
xoff_t blkno) {
|
||
|
xoff_t pos = blkno * source->blksize;
|
||
|
main_file *sfile = (main_file*) source->ioh;
|
||
|
main_blklru *blru;
|
||
|
int is_new;
|
||
|
size_t nread = 0;
|
||
|
int ret = 0;
|
||
|
|
||
|
if (!sfile->seek_failed)
|
||
|
{
|
||
|
ret = main_file_seek (sfile, pos);
|
||
|
|
||
|
if (ret == 0)
|
||
|
{
|
||
|
sfile->source_position = pos;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (sfile->seek_failed || ret != 0)
|
||
|
{
|
||
|
/* For an unseekable file (or other seek error, does it
|
||
|
* matter?) */
|
||
|
if (sfile->source_position > pos)
|
||
|
{
|
||
|
/* Could assert !IS_ENCODE(), this shouldn't happen
|
||
|
* because of do_src_fifo during encode. */
|
||
|
if (!option_quiet)
|
||
|
{
|
||
|
XPR(NT "source can't seek backwards; requested block offset "
|
||
|
"%"Q"u source position is %"Q"u\n",
|
||
|
pos, sfile->source_position);
|
||
|
}
|
||
|
|
||
|
sfile->seek_failed = 1;
|
||
|
stream->msg = "non-seekable source: "
|
||
|
"copy is too far back (try raising -B)";
|
||
|
return XD3_TOOFARBACK;
|
||
|
}
|
||
|
|
||
|
/* There's a chance here, that an genuine lseek error will cause
|
||
|
* xdelta3 to shift into non-seekable mode, entering a degraded
|
||
|
* condition. */
|
||
|
if (!sfile->seek_failed && option_verbose)
|
||
|
{
|
||
|
XPR(NT "source can't seek, will use FIFO for %s\n",
|
||
|
sfile->filename);
|
||
|
|
||
|
if (option_verbose > 1)
|
||
|
{
|
||
|
XPR(NT "seek error at offset %"Q"u: %s\n",
|
||
|
pos, xd3_mainerror (ret));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sfile->seek_failed = 1;
|
||
|
|
||
|
if (option_verbose > 1 && pos != sfile->source_position)
|
||
|
{
|
||
|
XPR(NT "non-seekable source skipping %"Q"u bytes @ %"Q"u\n",
|
||
|
pos - sfile->source_position,
|
||
|
sfile->source_position);
|
||
|
}
|
||
|
|
||
|
while (sfile->source_position < pos)
|
||
|
{
|
||
|
xoff_t skip_blkno;
|
||
|
usize_t skip_offset;
|
||
|
|
||
|
xd3_blksize_div (sfile->source_position, source,
|
||
|
&skip_blkno, &skip_offset);
|
||
|
|
||
|
/* Read past unused data */
|
||
|
XD3_ASSERT (pos - sfile->source_position >= source->blksize);
|
||
|
XD3_ASSERT (skip_offset == 0);
|
||
|
|
||
|
if ((ret = main_getblk_lru (source, skip_blkno,
|
||
|
& blru, & is_new)))
|
||
|
{
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
XD3_ASSERT (is_new);
|
||
|
blru->blkno = skip_blkno;
|
||
|
|
||
|
if ((ret = main_read_primary_input (sfile,
|
||
|
(uint8_t*) blru->blk,
|
||
|
source->blksize,
|
||
|
& nread)))
|
||
|
{
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
if (nread != source->blksize)
|
||
|
{
|
||
|
IF_DEBUG1 (DP(RINT "[getblk] short skip block nread = %"Z"u\n",
|
||
|
nread));
|
||
|
stream->msg = "non-seekable input is short";
|
||
|
return XD3_INVALID_INPUT;
|
||
|
}
|
||
|
|
||
|
sfile->source_position += nread;
|
||
|
blru->size = nread;
|
||
|
|
||
|
IF_DEBUG1 (DP(RINT "[getblk] skip blkno %"Q"u size %"W"u\n",
|
||
|
skip_blkno, blru->size));
|
||
|
|
||
|
XD3_ASSERT (sfile->source_position <= pos);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* This is the callback for reading a block of source. This function
|
||
|
* is blocking and it implements a small LRU.
|
||
|
*
|
||
|
* Note that it is possible for main_input() to handle getblk requests
|
||
|
* in a non-blocking manner. If the callback is NULL then the caller
|
||
|
* of xd3_*_input() must handle the XD3_GETSRCBLK return value and
|
||
|
* fill the source in the same way. See xd3_getblk for details. To
|
||
|
* see an example of non-blocking getblk, see xdelta-test.h. */
|
||
|
static int
|
||
|
main_getblk_func (xd3_stream *stream,
|
||
|
xd3_source *source,
|
||
|
xoff_t blkno)
|
||
|
{
|
||
|
int ret = 0;
|
||
|
xoff_t pos = blkno * source->blksize;
|
||
|
main_file *sfile = (main_file*) source->ioh;
|
||
|
main_blklru *blru;
|
||
|
int is_new;
|
||
|
size_t nread = 0;
|
||
|
|
||
|
if (allow_fake_source)
|
||
|
{
|
||
|
source->curblkno = blkno;
|
||
|
source->onblk = 0;
|
||
|
source->curblk = lru[0].blk;
|
||
|
lru[0].size = 0;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if ((ret = main_getblk_lru (source, blkno, & blru, & is_new)))
|
||
|
{
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
if (!is_new)
|
||
|
{
|
||
|
source->curblkno = blkno;
|
||
|
source->onblk = blru->size;
|
||
|
source->curblk = blru->blk;
|
||
|
lru_hits++;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
lru_misses += 1;
|
||
|
|
||
|
if (pos != sfile->source_position)
|
||
|
{
|
||
|
/* Only try to seek when the position is wrong. This means the
|
||
|
* decoder will fail when the source buffer is too small, but
|
||
|
* only when the input is non-seekable. */
|
||
|
if ((ret = main_read_seek_source (stream, source, blkno)))
|
||
|
{
|
||
|
return ret;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
XD3_ASSERT (sfile->source_position == pos);
|
||
|
|
||
|
if ((ret = main_read_primary_input (sfile,
|
||
|
(uint8_t*) blru->blk,
|
||
|
source->blksize,
|
||
|
& nread)))
|
||
|
{
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/* Save the last block read, used to handle non-seekable files. */
|
||
|
sfile->source_position = pos + nread;
|
||
|
|
||
|
if (option_verbose > 3)
|
||
|
{
|
||
|
if (blru->blkno != XD3_INVALID_OFFSET)
|
||
|
{
|
||
|
if (blru->blkno != blkno)
|
||
|
{
|
||
|
XPR(NT "source block %"Q"u read %"Z"u ejects %"Q"u (lru_hits=%u, "
|
||
|
"lru_misses=%u, lru_filled=%u)\n",
|
||
|
blkno, nread, blru->blkno, lru_hits, lru_misses, lru_filled);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
XPR(NT "source block %"Q"u read %"Z"u (lru_hits=%u, "
|
||
|
"lru_misses=%u, lru_filled=%u)\n",
|
||
|
blkno, nread, lru_hits, lru_misses, lru_filled);
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
XPR(NT "source block %"Q"u read %"Z"u (lru_hits=%u, lru_misses=%u, "
|
||
|
"lru_filled=%u)\n", blkno, nread,
|
||
|
lru_hits, lru_misses, lru_filled);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
source->curblk = blru->blk;
|
||
|
source->curblkno = blkno;
|
||
|
source->onblk = nread;
|
||
|
blru->size = nread;
|
||
|
blru->blkno = blkno;
|
||
|
|
||
|
IF_DEBUG1 (DP(RINT "[main_getblk] blkno %"Q"u onblk %"Z"u pos %"Q"u "
|
||
|
"srcpos %"Q"u\n",
|
||
|
blkno, nread, pos, sfile->source_position));
|
||
|
|
||
|
return 0;
|
||
|
}
|