From ae59ce225649ba069811973d962e072c7ad8cdd6 Mon Sep 17 00:00:00 2001 From: C-3PO Date: Fri, 14 Sep 2018 03:16:31 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=8F=97=20Add=20xdelta3=20dependency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 14 +- lib/xdelta3/LICENSE | 176 + lib/xdelta3/Makefile.am | 192 + lib/xdelta3/README.md | 37 + lib/xdelta3/badcopy.c | 158 + lib/xdelta3/configure.ac | 51 + lib/xdelta3/cpp-btree/CMakeLists.txt | 40 + lib/xdelta3/cpp-btree/COPYING | 202 + lib/xdelta3/cpp-btree/README | 31 + lib/xdelta3/cpp-btree/btree.h | 2394 ++++++++ lib/xdelta3/cpp-btree/btree_bench.cc | 593 ++ lib/xdelta3/cpp-btree/btree_container.h | 349 ++ lib/xdelta3/cpp-btree/btree_map.h | 130 + lib/xdelta3/cpp-btree/btree_set.h | 121 + lib/xdelta3/cpp-btree/btree_test.cc | 270 + lib/xdelta3/cpp-btree/btree_test.h | 940 +++ lib/xdelta3/cpp-btree/btree_test_flags.cc | 20 + lib/xdelta3/cpp-btree/safe_btree.h | 395 ++ lib/xdelta3/cpp-btree/safe_btree_map.h | 89 + lib/xdelta3/cpp-btree/safe_btree_set.h | 88 + lib/xdelta3/cpp-btree/safe_btree_test.cc | 116 + lib/xdelta3/draft-korn-vcdiff.txt | 1322 ++++ lib/xdelta3/examples/Makefile | 32 + lib/xdelta3/examples/README.md | 8 + lib/xdelta3/examples/compare_test.c | 138 + lib/xdelta3/examples/encode_decode_test.c | 203 + .../project.pbxproj | 389 ++ .../xdelta3-ios-test/Xd3iOSAppDelegate.h | 23 + .../xdelta3-ios-test/Xd3iOSAppDelegate.m | 68 + .../xdelta3-ios-test/Xd3iOSViewController.h | 28 + .../xdelta3-ios-test/Xd3iOSViewController.m | 177 + .../en.lproj/InfoPlist.strings | 2 + .../en.lproj/MainStoryboard_iPad.storyboard | 77 + .../en.lproj/MainStoryboard_iPhone.storyboard | 27 + .../xdelta3-ios-test/file_v1.bin | 1378 +++++ .../xdelta3-ios-test/file_v1_to_v2.bin | Bin 0 -> 52723 bytes .../xdelta3-ios-test/file_v2.bin | 5419 +++++++++++++++++ .../xdelta3-ios-test/xdelta3-ios-test/main.m | 25 + .../xdelta3-ios-test-Info.plist | 52 + .../xdelta3-ios-test-Prefix.pch | 14 + lib/xdelta3/examples/small_page_test.c | 215 + lib/xdelta3/examples/speed_test.c | 87 + lib/xdelta3/examples/test.h | 56 + lib/xdelta3/generate_build_files.sh | 8 + lib/xdelta3/go/src/regtest.go | 274 + lib/xdelta3/go/src/xdelta/rstream.go | 71 + lib/xdelta3/go/src/xdelta/run.go | 71 + lib/xdelta3/go/src/xdelta/test.go | 164 + lib/xdelta3/go/src/xdelta/tgroup.go | 97 + lib/xdelta3/linkxd3lib.c | 42 + .../m4/ax_check_aligned_access_required.m4 | 84 + lib/xdelta3/m4/ax_pkg_swig.m4 | 135 + lib/xdelta3/m4/ax_python_devel.m4 | 325 + lib/xdelta3/m4/ax_swig_python.m4 | 64 + lib/xdelta3/plot.sh | 25 + lib/xdelta3/rcs_junk.cc | 1861 ++++++ lib/xdelta3/run_release.sh | 288 + lib/xdelta3/testing/Makefile | 8 + lib/xdelta3/testing/checksum_test.cc | 770 +++ lib/xdelta3/testing/checksum_test_c.c | 189 + lib/xdelta3/testing/cmp.h | 67 + lib/xdelta3/testing/delta.h | 87 + lib/xdelta3/testing/file.h | 399 ++ lib/xdelta3/testing/modify.h | 400 ++ lib/xdelta3/testing/random.h | 157 + lib/xdelta3/testing/regtest.cc | 1321 ++++ lib/xdelta3/testing/regtest_c.c | 17 + lib/xdelta3/testing/run_release.sh | 2 + lib/xdelta3/testing/segment.h | 112 + lib/xdelta3/testing/sizes.h | 126 + lib/xdelta3/testing/test.h | 84 + lib/xdelta3/testing/xdelta3-regtest.py | 1264 ++++ lib/xdelta3/testing/xdelta3-test.py | 153 + lib/xdelta3/xdelta3-blkcache.h | 557 ++ lib/xdelta3/xdelta3-cfgs.h | 171 + lib/xdelta3/xdelta3-decode.h | 1219 ++++ lib/xdelta3/xdelta3-djw.h | 1835 ++++++ lib/xdelta3/xdelta3-fgk.h | 857 +++ lib/xdelta3/xdelta3-hash.h | 159 + lib/xdelta3/xdelta3-internal.h | 385 ++ lib/xdelta3/xdelta3-list.h | 127 + lib/xdelta3/xdelta3-lzma.h | 195 + lib/xdelta3/xdelta3-main.h | 4062 ++++++++++++ lib/xdelta3/xdelta3-merge.h | 583 ++ lib/xdelta3/xdelta3-second.h | 321 + lib/xdelta3/xdelta3-test.h | 3022 +++++++++ lib/xdelta3/xdelta3.1 | 153 + lib/xdelta3/xdelta3.c | 4819 +++++++++++++++ lib/xdelta3/xdelta3.h | 1476 +++++ lib/xdelta3/xdelta3.i | 85 + lib/xdelta3/xdelta3.vcxproj | 344 ++ lib/xdelta3/xdelta3.wxi | 7 + lib/xdelta3/xdelta3.wxs | 131 + src/errorAndExit.h | 2 + src/parseArguments.h | 2 + src/utils/min.h | 2 + src/xdelta3.c | 8 + 97 files changed, 45332 insertions(+), 1 deletion(-) create mode 100644 lib/xdelta3/LICENSE create mode 100644 lib/xdelta3/Makefile.am create mode 100644 lib/xdelta3/README.md create mode 100644 lib/xdelta3/badcopy.c create mode 100644 lib/xdelta3/configure.ac create mode 100644 lib/xdelta3/cpp-btree/CMakeLists.txt create mode 100644 lib/xdelta3/cpp-btree/COPYING create mode 100644 lib/xdelta3/cpp-btree/README create mode 100644 lib/xdelta3/cpp-btree/btree.h create mode 100644 lib/xdelta3/cpp-btree/btree_bench.cc create mode 100644 lib/xdelta3/cpp-btree/btree_container.h create mode 100644 lib/xdelta3/cpp-btree/btree_map.h create mode 100644 lib/xdelta3/cpp-btree/btree_set.h create mode 100644 lib/xdelta3/cpp-btree/btree_test.cc create mode 100644 lib/xdelta3/cpp-btree/btree_test.h create mode 100644 lib/xdelta3/cpp-btree/btree_test_flags.cc create mode 100644 lib/xdelta3/cpp-btree/safe_btree.h create mode 100644 lib/xdelta3/cpp-btree/safe_btree_map.h create mode 100644 lib/xdelta3/cpp-btree/safe_btree_set.h create mode 100644 lib/xdelta3/cpp-btree/safe_btree_test.cc create mode 100644 lib/xdelta3/draft-korn-vcdiff.txt create mode 100644 lib/xdelta3/examples/Makefile create mode 100644 lib/xdelta3/examples/README.md create mode 100644 lib/xdelta3/examples/compare_test.c create mode 100644 lib/xdelta3/examples/encode_decode_test.c create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1.bin create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1_to_v2.bin create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v2.bin create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist create mode 100644 lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch create mode 100644 lib/xdelta3/examples/small_page_test.c create mode 100644 lib/xdelta3/examples/speed_test.c create mode 100644 lib/xdelta3/examples/test.h create mode 100644 lib/xdelta3/generate_build_files.sh create mode 100644 lib/xdelta3/go/src/regtest.go create mode 100644 lib/xdelta3/go/src/xdelta/rstream.go create mode 100644 lib/xdelta3/go/src/xdelta/run.go create mode 100644 lib/xdelta3/go/src/xdelta/test.go create mode 100644 lib/xdelta3/go/src/xdelta/tgroup.go create mode 100644 lib/xdelta3/linkxd3lib.c create mode 100644 lib/xdelta3/m4/ax_check_aligned_access_required.m4 create mode 100644 lib/xdelta3/m4/ax_pkg_swig.m4 create mode 100644 lib/xdelta3/m4/ax_python_devel.m4 create mode 100644 lib/xdelta3/m4/ax_swig_python.m4 create mode 100644 lib/xdelta3/plot.sh create mode 100644 lib/xdelta3/rcs_junk.cc create mode 100644 lib/xdelta3/run_release.sh create mode 100644 lib/xdelta3/testing/Makefile create mode 100644 lib/xdelta3/testing/checksum_test.cc create mode 100644 lib/xdelta3/testing/checksum_test_c.c create mode 100644 lib/xdelta3/testing/cmp.h create mode 100644 lib/xdelta3/testing/delta.h create mode 100644 lib/xdelta3/testing/file.h create mode 100644 lib/xdelta3/testing/modify.h create mode 100644 lib/xdelta3/testing/random.h create mode 100644 lib/xdelta3/testing/regtest.cc create mode 100644 lib/xdelta3/testing/regtest_c.c create mode 100644 lib/xdelta3/testing/run_release.sh create mode 100644 lib/xdelta3/testing/segment.h create mode 100644 lib/xdelta3/testing/sizes.h create mode 100644 lib/xdelta3/testing/test.h create mode 100644 lib/xdelta3/testing/xdelta3-regtest.py create mode 100644 lib/xdelta3/testing/xdelta3-test.py create mode 100644 lib/xdelta3/xdelta3-blkcache.h create mode 100644 lib/xdelta3/xdelta3-cfgs.h create mode 100644 lib/xdelta3/xdelta3-decode.h create mode 100644 lib/xdelta3/xdelta3-djw.h create mode 100644 lib/xdelta3/xdelta3-fgk.h create mode 100644 lib/xdelta3/xdelta3-hash.h create mode 100644 lib/xdelta3/xdelta3-internal.h create mode 100644 lib/xdelta3/xdelta3-list.h create mode 100644 lib/xdelta3/xdelta3-lzma.h create mode 100644 lib/xdelta3/xdelta3-main.h create mode 100644 lib/xdelta3/xdelta3-merge.h create mode 100644 lib/xdelta3/xdelta3-second.h create mode 100644 lib/xdelta3/xdelta3-test.h create mode 100644 lib/xdelta3/xdelta3.1 create mode 100644 lib/xdelta3/xdelta3.c create mode 100644 lib/xdelta3/xdelta3.h create mode 100644 lib/xdelta3/xdelta3.i create mode 100644 lib/xdelta3/xdelta3.vcxproj create mode 100644 lib/xdelta3/xdelta3.wxi create mode 100644 lib/xdelta3/xdelta3.wxs create mode 100644 src/xdelta3.c diff --git a/README.md b/README.md index f713628..a9a890c 100644 --- a/README.md +++ b/README.md @@ -49,4 +49,16 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -## Xdelta3 +## Xdelta3 <[https://github.com/jmacd/xdelta](https://github.com/jmacd/xdelta)> + +Copyright 2016 Joshua MacDonald + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/lib/xdelta3/LICENSE b/lib/xdelta3/LICENSE new file mode 100644 index 0000000..7a77415 --- /dev/null +++ b/lib/xdelta3/LICENSE @@ -0,0 +1,176 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, +and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by +the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all +other entities that control, are controlled by, or are under common +control with that entity. For the purposes of this definition, +"control" means (i) the power, direct or indirect, to cause the +direction or management of such entity, whether by contract or +otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity +exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, +including but not limited to software source code, documentation +source, and configuration files. + +"Object" form shall mean any form resulting from mechanical +transformation or translation of a Source form, including but +not limited to compiled object code, generated documentation, +and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or +Object form, made available under the License, as indicated by a +copyright notice that is included in or attached to the work +(an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object +form, that is based on (or derived from) the Work and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. For the purposes +of this License, Derivative Works shall not include works that remain +separable from, or merely link (or bind by name) to the interfaces of, +the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including +the original version of the Work and any modifications or additions +to that Work or Derivative Works thereof, that is intentionally +submitted to Licensor for inclusion in the Work by the copyright owner +or by an individual or Legal Entity authorized to submit on behalf of +the copyright owner. For the purposes of this definition, "submitted" +means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, +and issue tracking systems that are managed by, or on behalf of, the +Licensor for the purpose of discussing and improving the Work, but +excluding communication that is conspicuously marked or otherwise +designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity +on behalf of whom a Contribution has been received by Licensor and +subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the +Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +(except as stated in this section) patent license to make, have made, +use, offer to sell, sell, import, and otherwise transfer the Work, +where such license applies only to those patent claims licensable +by such Contributor that are necessarily infringed by their +Contribution(s) alone or by combination of their Contribution(s) +with the Work to which such Contribution(s) was submitted. If You +institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work +or a Contribution incorporated within the Work constitutes direct +or contributory patent infringement, then any patent licenses +granted to You under this License for that Work shall terminate +as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the +Work or Derivative Works thereof in any medium, with or without +modifications, and in Source or Object form, provided that You +meet the following conditions: + +(a) You must give any other recipients of the Work or +Derivative Works a copy of this License; and + +(b) You must cause any modified files to carry prominent notices +stating that You changed the files; and + +(c) You must retain, in the Source form of any Derivative Works +that You distribute, all copyright, patent, trademark, and +attribution notices from the Source form of the Work, +excluding those notices that do not pertain to any part of +the Derivative Works; and + +(d) If the Work includes a "NOTICE" text file as part of its +distribution, then any Derivative Works that You distribute must +include a readable copy of the attribution notices contained +within such NOTICE file, excluding those notices that do not +pertain to any part of the Derivative Works, in at least one +of the following places: within a NOTICE text file distributed +as part of the Derivative Works; within the Source form or +documentation, if provided along with the Derivative Works; or, +within a display generated by the Derivative Works, if and +wherever such third-party notices normally appear. The contents +of the NOTICE file are for informational purposes only and +do not modify the License. You may add Your own attribution +notices within Derivative Works that You distribute, alongside +or as an addendum to the NOTICE text from the Work, provided +that such additional attribution notices cannot be construed +as modifying the License. + +You may add Your own copyright statement to Your modifications and +may provide additional or different license terms and conditions +for use, reproduction, or distribution of Your modifications, or +for any such Derivative Works as a whole, provided Your use, +reproduction, and distribution of the Work otherwise complies with +the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, +any Contribution intentionally submitted for inclusion in the Work +by You to the Licensor shall be under the terms and conditions of +this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify +the terms of any separate license agreement you may have executed +with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade +names, trademarks, service marks, or product names of the Licensor, +except as required for reasonable and customary use in describing the +origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or +agreed to in writing, Licensor provides the Work (and each +Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied, including, without limitation, any warranties or conditions +of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. You are solely responsible for determining the +appropriateness of using or redistributing the Work and assume any +risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, +whether in tort (including negligence), contract, or otherwise, +unless required by applicable law (such as deliberate and grossly +negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, +incidental, or consequential damages of any character arising as a +result of this License or out of the use or inability to use the +Work (including but not limited to damages for loss of goodwill, +work stoppage, computer failure or malfunction, or any and all +other commercial damages or losses), even if such Contributor +has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing +the Work or Derivative Works thereof, You may choose to offer, +and charge a fee for, acceptance of support, warranty, indemnity, +or other liability obligations and/or rights consistent with this +License. However, in accepting such obligations, You may act only +on Your own behalf and on Your sole responsibility, not on behalf +of any other Contributor, and only if You agree to indemnify, +defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason +of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/lib/xdelta3/Makefile.am b/lib/xdelta3/Makefile.am new file mode 100644 index 0000000..2d9f6db --- /dev/null +++ b/lib/xdelta3/Makefile.am @@ -0,0 +1,192 @@ +ACLOCAL_AMFLAGS = -I m4 +AUTOMAKE_OPTIONS = subdir-objects + +bin_PROGRAMS = xdelta3 +noinst_PROGRAMS = xdelta3regtest xdelta3decode xdelta3checksum + +export AFL_HARDEN + +common_SOURCES = \ + xdelta3-blkcache.h \ + xdelta3-decode.h \ + xdelta3-djw.h \ + xdelta3-fgk.h \ + xdelta3-hash.h \ + xdelta3-internal.h \ + xdelta3-list.h \ + xdelta3-lzma.h \ + xdelta3-main.h \ + xdelta3-merge.h \ + xdelta3-second.h \ + xdelta3-test.h \ + xdelta3-cfgs.h \ + xdelta3.h + +xdelta3_SOURCES = $(common_SOURCES) xdelta3.c + +xdelta3decode_SOURCES = $(common_SOURCES) xdelta3.c + +xdelta3regtest_SOURCES = $(common_SOURCES) \ + testing/cmp.h \ + testing/delta.h \ + testing/file.h \ + testing/modify.h \ + testing/random.h \ + testing/regtest.cc \ + testing/regtest_c.c \ + testing/segment.h \ + testing/sizes.h \ + testing/test.h + +xdelta3checksum_SOURCES = $(common_SOURCES) \ + testing/checksum_test.cc \ + testing/checksum_test_c.c + +# These sources constitute a regression test written in Go, that is +# not automatically built or run. Install Go-1.5.x or later, add +# `pwd`/go in $GOPATH, and (cd go/src && go run regtest.go). +# TODO(jmacd): replace hard-coded path names in regtest.go w/ flags. +GOLANG_SRCS = \ + go/src/xdelta/test.go \ + go/src/xdelta/rstream.go \ + go/src/xdelta/tgroup.go \ + go/src/xdelta/run.go \ + go/src/regtest.go + +# Note: for extra sanity checks, enable -Wconversion. Note there +# are a lot of false positives. +WFLAGS = -Wall -Wshadow -fno-builtin -Wextra -Wsign-compare \ + -Wformat=2 -Wno-format-nonliteral \ + -Wno-unused-parameter -Wno-unused-function + + # -Weverything \ + # -Wc++11-compat-reserved-user-defined-literal \ + # -Wno-padded \ + # -Wno-format-nonliteral \ + # -Wno-cast-align \ + # -Wno-unused-parameter \ + # -Wno-sign-conversion \ + # -Wno-conversion \ + # -Wno-switch-enum \ + # -Wno-covered-switch-default \ + # -Wno-disabled-macro-expansion \ + # -Wno-variadic-macros \ + # -Wno-c++98-compat-pedantic + +C_WFLAGS = $(WFLAGS) -std=c99 +CXX_WFLAGS = $(WFLAGS) -std=c++11 + +common_CFLAGS = \ + -DREGRESSION_TEST=1 \ + -DSECONDARY_DJW=1 \ + -DSECONDARY_FGK=1 \ + -DXD3_MAIN=1 + +if DEBUG_SYMBOLS + common_CFLAGS += -g +endif + +#common_CFLAGS += -fsanitize=address -fno-omit-frame-pointer +#common_CFLAGS += -O2 + +# For additional debugging, add -DXD3_DEBUG=1, 2, 3, ... +xdelta3_CFLAGS = $(C_WFLAGS) $(common_CFLAGS) -DXD3_DEBUG=0 +xdelta3_LDADD = -lm + +xdelta3decode_CFLAGS = \ + $(C_WFLAGS) \ + -DREGRESSION_TEST=0 \ + -DSECONDARY_DJW=0 \ + -DSECONDARY_FGK=0 \ + -DSECONDARY_LZMA=0 \ + -DXD3_MAIN=1 \ + -DXD3_ENCODER=0 \ + -DXD3_STDIO=1 \ + -DEXTERNAL_COMPRESSION=0 \ + -DVCDIFF_TOOLS=0 + +xdelta3regtest_CXXFLAGS = \ + $(CXX_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_DEBUG=1 +xdelta3regtest_CFLAGS = \ + $(C_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_DEBUG=1 +xdelta3regtest_LDADD = -lm + +xdelta3checksum_CXXFLAGS = \ + $(CXX_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_MAIN=1 -std=c++11 +xdelta3checksum_CFLAGS = \ + $(C_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_MAIN=1 + + +man1_MANS = xdelta3.1 + +EXTRA_DIST = \ + README.md \ + run_release.sh \ + draft-korn-vcdiff.txt \ + $(GOLANG_SRCS) \ + examples/Makefile \ + examples/README.md \ + examples/compare_test.c \ + examples/encode_decode_test.c \ + examples/small_page_test.c \ + examples/speed_test.c \ + examples/test.h \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1.bin \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1_to_v2.bin \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v2.bin \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist \ + examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch \ + cpp-btree/CMakeLists.txt \ + cpp-btree/COPYING \ + cpp-btree/README \ + cpp-btree/btree.h \ + cpp-btree/btree_bench.cc \ + cpp-btree/btree_container.h \ + cpp-btree/btree_map.h \ + cpp-btree/btree_set.h \ + cpp-btree/btree_test.cc \ + cpp-btree/btree_test.h \ + cpp-btree/btree_test_flags.cc \ + cpp-btree/safe_btree.h \ + cpp-btree/safe_btree_map.h \ + cpp-btree/safe_btree_set.h \ + cpp-btree/safe_btree_test.cc \ + testing/xdelta3-regtest.py \ + testing/xdelta3-test.py \ + xdelta3.1 \ + xdelta3.i \ + xdelta3.vcxproj \ + xdelta3.wxi \ + xdelta3.wxs + +# Broken, removed from distribution: +# xdelta3_pywrap.c +# xdelta3.py + +#PYFILES = xdelta3_pywrap.c xdelta3.py +#XDELTA3PY = xdelta3.py +#XDELTA3PYLIB = xdelta3.la + +#BUILT_SOURCES = $(PYFILES) + +#xdelta3_pywrap.c xdelta3.py : xdelta3.i +# $(SWIG) -python -o xdelta3_pywrap.c xdelta3.i + +# OS X for some reason requires: +# pythondir = $(PYTHON_SITE_PKG) +# pyexecdir = $(PYTHON_SITE_PKG) + +#python_PYTHON = $(XDELTA3PY) +#pyexec_LTLIBRARIES = $(XDELTA3PYLIB) +#_xdelta3_la_SOURCES = $(srcdir)/xdelta3_pywrap.c $(xdelta3_SOURCES) +#_xdelta3_la_CFLAGS = $(common_CFLAGS) -DNOT_MAIN=1 $(PYTHON_CPPFLAGS) +#_xdelta3_la_LDFLAGS = -module diff --git a/lib/xdelta3/README.md b/lib/xdelta3/README.md new file mode 100644 index 0000000..ba6f030 --- /dev/null +++ b/lib/xdelta3/README.md @@ -0,0 +1,37 @@ +Xdelta 3.x readme.txt +Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, +2009, 2010, 2011, 2012, 2013, 2014, 2015 + + + +Thanks for downloading Xdelta! + +This directory contains the Xdelta3 command-line interface (CLI) and source +distribution for VCDIFF differential compression, a.k.a. delta +compression. The latest information and downloads are available here: + + http://xdelta.org/ + http://github.com/jmacd/xdelta/ + +Xdelta can be configured to use XZ Utils for secondary compression: + + http://tukaani.org/xz/ + +The command-line syntax is detailed here: + + https://github.com/jmacd/xdelta/blob/wiki/CommandLineSyntax.md + +Run 'xdelta3 -h' for brief help. Run 'xdelta3 test' for built-in tests. + +Sample commands (like gzip, -e means encode, -d means decode) + + xdelta3 -9 -S lzma -e -f -s OLD_FILE NEW_FILE DELTA_FILE + xdelta3 -d -s OLD_FILE DELTA_FILE DECODED_FILE + +File bug reports and browse open support issues here: + + https://github.com/jmacd/xdelta/issues + +The source distribution contains the C/C++/Python APIs, Unix, Microsoft VC++ +and Cygwin builds. Xdelta3 is covered under the terms of the APL, see +LICENSE. diff --git a/lib/xdelta3/badcopy.c b/lib/xdelta3/badcopy.c new file mode 100644 index 0000000..03abc63 --- /dev/null +++ b/lib/xdelta3/badcopy.c @@ -0,0 +1,158 @@ +#include +#include +#include + +#define BUFSZ (1 << 22) + +#ifdef WIN32 +// whatever +static +double drand48() { + double r = rand() / (double)RAND_MAX; + return r; +} +long lrand48() { + long l = 0; + int i; + for (i = 0; i < 32; i++) { + l = l ^ (l << 2) ^ (l << 1) ^ rand(); + } + return l; +} +#endif + +#ifdef _WIN32 +#define XD3_WIN32 1 +#else +#define XD3_POSIX 1 +#endif +#define XD3_MAIN 1 +#define main notmain +#define EXTERNAL_COMPRESSION 0 +#define XD3_USE_LARGEFILE64 1 +#include "xdelta3.c" +#undef main + + +double error_prob = 0.0001; +usize_t mean_change = 100; +xoff_t total_change = 0; +xoff_t total_size = 0; +usize_t max_change = 0; +usize_t num_change = 0; + + +static usize_t +edist (usize_t mean, usize_t max) +{ + double mean_d = mean; + double erand = log (1.0 / drand48 ()); + usize_t x = (usize_t) (mean_d * erand + 0.5); + + return (x < max) ? (x > 0 ? x : 1) : max; +} + +void modify (char *buf, usize_t size) +{ + usize_t bufpos = 0, j; + usize_t last_end = 0; + + for (;; /* bufpos and j are incremented in the inner loop */) + { + /* The size of the next modification. */ + usize_t next_size = edist (mean_change, 1 << 31); + /* The expected interval of such a change. */ + double expect_interval = ((double) next_size * (1.0 - error_prob)) / error_prob; + /* The number of bytes until the next modification. */ + usize_t next_mod = edist ((usize_t)expect_interval, 1 << 31); + + if (next_size + next_mod + bufpos > size) { break; } + + if (max_change < next_size) { max_change = next_size; } + + bufpos += next_mod; + + fprintf (stderr, "COPY: %I64u-%I64u (%u)\n", + total_size + (xoff_t)last_end, + total_size + (xoff_t)bufpos, + bufpos - last_end); + fprintf (stderr, "ADD: %I64u-%I64u (%u) is change %u\n", + total_size + (xoff_t)bufpos, + total_size + (xoff_t)(bufpos + next_size), + next_size, num_change); + + total_change += next_size; + num_change += 1; + + for (j = 0; j < next_size; j += 1, bufpos += 1) + { + buf[bufpos] = (char)(lrand48 () >> 3); + } + + last_end = bufpos; + } + + fprintf (stderr, "COPY: %I64u-%I64u (%u)\n", + total_size + last_end, + total_size + size, size - last_end); + + total_size += size; +} + +int main(int argc, char **argv) +{ + main_file inp, out; + char *buf = malloc(BUFSZ); + int c, ret; + main_file_init(&inp); + main_file_init(&out); + option_force = 1; + if (argc > 5) + { + fprintf (stderr, "usage: badcopy [byte_error_prob [mean_error_size]]\n"); + return 1; + } + + if (argc > 4) { mean_change = atoi (argv[4]); } + if (argc > 3) { error_prob = atof (argv[3]); } + fprintf (stderr, "mean change = %u; error_prob = %0.10f\n", mean_change, error_prob); + + if ((ret = main_file_open (&inp, argv[1], XO_READ)) != 0) { + return 1; + } + if ((ret = main_file_open (&out, argv[2], XO_WRITE)) != 0) { + return 1; + } + + if (error_prob < 0.0 || error_prob > 1.0) + { + fprintf (stderr, "warning: error probability out of range\n"); + return 1; + } + + do + { + if ((ret = main_file_read (&inp, buf, BUFSZ, &c, "read failed")) != 0) { + return 1; + } + + if (c == 0) { break; } + + modify (buf, c); + + if ((ret = main_file_write (&out, buf, c, "write failed")) != 0) { + return 1; + } + } + while (c == BUFSZ); + + if ((ret = main_file_close (&out))) + { + return 1; + } + + fprintf (stderr, "add_prob %f; %u adds; total_change %u of %u bytes; add percentage %f; max add size %u\n", + error_prob, num_change, total_change, total_size, (double) total_change / (double) total_size, max_change); + + return 0; +} diff --git a/lib/xdelta3/configure.ac b/lib/xdelta3/configure.ac new file mode 100644 index 0000000..5d81f38 --- /dev/null +++ b/lib/xdelta3/configure.ac @@ -0,0 +1,51 @@ +AC_INIT([Xdelta3], [3.1.1], [josh.macdonald@gmail.com], + [xdelta3], [http://xdelta.org/]) +AC_PREREQ([2.68]) +AC_CONFIG_MACRO_DIR([m4]) +LT_INIT +AM_INIT_AUTOMAKE([1.15 no-define foreign tar-ustar subdir-objects]) +AC_CONFIG_MACRO_DIRS([m4]) + +AX_CHECK_ALIGNED_ACCESS_REQUIRED +AC_PROG_CC +AC_PROG_CXX + +AC_CHECK_SIZEOF(size_t) +AC_CHECK_SIZEOF(unsigned int) +AC_CHECK_SIZEOF(unsigned long) +AC_CHECK_SIZEOF(unsigned long long) + +AC_ARG_WITH( + [liblzma], + [AC_HELP_STRING( + [--with-liblzma], + [build with liblzma support @<:@default=autodetect@:>@])], + [USE_LIBLZMA=$withval], + [USE_LIBLZMA=auto]) + +if test "x$USE_LIBLZMA" != xno ; then + AC_CHECK_HEADERS([lzma.h],,[ + if test "x$with_liblzma" = xyes ; then + AC_MSG_FAILURE([liblzma includes were not found]) + fi]) + AC_CHECK_LIB([lzma], [lzma_easy_buffer_encode],,[ + if test "x$with_liblzma" = xyes ; then + AC_MSG_FAILURE([liblzma library were not found]) + fi]) +fi + +#AM_PATH_PYTHON(,, [:]) +#AM_CONDITIONAL([HAVE_PYTHON], [test "$PYTHON" != :]) +#AX_PYTHON_DEVEL() +#AX_PKG_SWIG(2.0.0,,) +#AX_SWIG_PYTHON + +dnl --enable-debug-symbols : build with debug symbols? +AC_ARG_ENABLE(debug-symbols, + AS_HELP_STRING(--enable-debug-symbols,[Build with debug symbols (default is NO)]),,enableval=no) +AM_CONDITIONAL([DEBUG_SYMBOLS], [test ${enableval} = "yes"]) + + +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_FILES([Makefile]) +AC_OUTPUT diff --git a/lib/xdelta3/cpp-btree/CMakeLists.txt b/lib/xdelta3/cpp-btree/CMakeLists.txt new file mode 100644 index 0000000..d005e15 --- /dev/null +++ b/lib/xdelta3/cpp-btree/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright 2013 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 2.6) + +project(cppbtree CXX) + +option(build_tests "Build B-tree tests" OFF) +add_definitions(-std=c++11) +set(CMAKE_CXX_FLAGS "-g -O2") + +# CMake doesn't have a way to pure template library, +# add_library(cppbtree btree.h btree_map.h btree_set.h +# safe_btree.h safe_btree_map.h safe_btree_set.h) +# set_target_properties(cppbtree PROPERTIES LINKER_LANGUAGE CXX) + +if(build_tests) + enable_testing() + include_directories($ENV{GTEST_ROOT}/include) + link_directories($ENV{GTEST_ROOT}) + include_directories($ENV{GFLAGS_ROOT}/include) + link_directories($ENV{GFLAGS_ROOT}/lib) + add_executable(btree_test btree_test.cc btree_test_flags.cc) + add_executable(safe_btree_test safe_btree_test.cc btree_test_flags.cc) + add_executable(btree_bench btree_bench.cc btree_test_flags.cc) + target_link_libraries(btree_test gtest_main gtest gflags) + target_link_libraries(safe_btree_test gtest_main gtest gflags) + target_link_libraries(btree_bench gflags gtest) +endif() diff --git a/lib/xdelta3/cpp-btree/COPYING b/lib/xdelta3/cpp-btree/COPYING new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/lib/xdelta3/cpp-btree/COPYING @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/lib/xdelta3/cpp-btree/README b/lib/xdelta3/cpp-btree/README new file mode 100644 index 0000000..319fe9b --- /dev/null +++ b/lib/xdelta3/cpp-btree/README @@ -0,0 +1,31 @@ +This library is a C++ template library and, as such, there is no +library to build and install. Copy the .h files and use them! + +See http://code.google.com/p/cpp-btree/wiki/UsageInstructions for +details. + +---- + +To build and run the provided tests, however, you will need to install +CMake, the Google C++ Test framework, and the Google flags package. + +Download and install CMake from http://www.cmake.org + +Download and build the GoogleTest framework from +http://code.google.com/p/googletest + +Download and install gflags from https://code.google.com/p/gflags + +Set GTEST_ROOT to the directory where GTEST was built. +Set GFLAGS_ROOT to the directory prefix where GFLAGS is installed. + +export GTEST_ROOT=/path/for/gtest-x.y +export GFLAGS_ROOT=/opt + +cmake . -Dbuild_tests=ON + +For example, to build on a Unix system with the clang++ compiler, + +export GTEST_ROOT=$(HOME)/src/googletest +export GFLAGS_ROOT=/opt +cmake . -G "Unix Makefiles" -Dbuild_tests=ON -DCMAKE_CXX_COMPILER=clang++ diff --git a/lib/xdelta3/cpp-btree/btree.h b/lib/xdelta3/cpp-btree/btree.h new file mode 100644 index 0000000..cdd2b52 --- /dev/null +++ b/lib/xdelta3/cpp-btree/btree.h @@ -0,0 +1,2394 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A btree implementation of the STL set and map interfaces. A btree is both +// smaller and faster than STL set/map. The red-black tree implementation of +// STL set/map has an overhead of 3 pointers (left, right and parent) plus the +// node color information for each stored value. So a set consumes 20 +// bytes for each value stored. This btree implementation stores multiple +// values on fixed size nodes (usually 256 bytes) and doesn't store child +// pointers for leaf nodes. The result is that a btree_set may use much +// less memory per stored value. For the random insertion benchmark in +// btree_test.cc, a btree_set with node-size of 256 uses 4.9 bytes per +// stored value. +// +// The packing of multiple values on to each node of a btree has another effect +// besides better space utilization: better cache locality due to fewer cache +// lines being accessed. Better cache locality translates into faster +// operations. +// +// CAVEATS +// +// Insertions and deletions on a btree can cause splitting, merging or +// rebalancing of btree nodes. And even without these operations, insertions +// and deletions on a btree will move values around within a node. In both +// cases, the result is that insertions and deletions can invalidate iterators +// pointing to values other than the one being inserted/deleted. This is +// notably different from STL set/map which takes care to not invalidate +// iterators on insert/erase except, of course, for iterators pointing to the +// value being erased. A partial workaround when erasing is available: +// erase() returns an iterator pointing to the item just after the one that was +// erased (or end() if none exists). See also safe_btree. + +// PERFORMANCE +// +// btree_bench --benchmarks=. 2>&1 | ./benchmarks.awk +// +// Run on pmattis-warp.nyc (4 X 2200 MHz CPUs); 2010/03/04-15:23:06 +// Benchmark STL(ns) B-Tree(ns) @ +// -------------------------------------------------------- +// BM_set_int32_insert 1516 608 +59.89% <256> [40.0, 5.2] +// BM_set_int32_lookup 1160 414 +64.31% <256> [40.0, 5.2] +// BM_set_int32_fulllookup 960 410 +57.29% <256> [40.0, 4.4] +// BM_set_int32_delete 1741 528 +69.67% <256> [40.0, 5.2] +// BM_set_int32_queueaddrem 3078 1046 +66.02% <256> [40.0, 5.5] +// BM_set_int32_mixedaddrem 3600 1384 +61.56% <256> [40.0, 5.3] +// BM_set_int32_fifo 227 113 +50.22% <256> [40.0, 4.4] +// BM_set_int32_fwditer 158 26 +83.54% <256> [40.0, 5.2] +// BM_map_int32_insert 1551 636 +58.99% <256> [48.0, 10.5] +// BM_map_int32_lookup 1200 508 +57.67% <256> [48.0, 10.5] +// BM_map_int32_fulllookup 989 487 +50.76% <256> [48.0, 8.8] +// BM_map_int32_delete 1794 628 +64.99% <256> [48.0, 10.5] +// BM_map_int32_queueaddrem 3189 1266 +60.30% <256> [48.0, 11.6] +// BM_map_int32_mixedaddrem 3822 1623 +57.54% <256> [48.0, 10.9] +// BM_map_int32_fifo 151 134 +11.26% <256> [48.0, 8.8] +// BM_map_int32_fwditer 161 32 +80.12% <256> [48.0, 10.5] +// BM_set_int64_insert 1546 636 +58.86% <256> [40.0, 10.5] +// BM_set_int64_lookup 1200 512 +57.33% <256> [40.0, 10.5] +// BM_set_int64_fulllookup 971 487 +49.85% <256> [40.0, 8.8] +// BM_set_int64_delete 1745 616 +64.70% <256> [40.0, 10.5] +// BM_set_int64_queueaddrem 3163 1195 +62.22% <256> [40.0, 11.6] +// BM_set_int64_mixedaddrem 3760 1564 +58.40% <256> [40.0, 10.9] +// BM_set_int64_fifo 146 103 +29.45% <256> [40.0, 8.8] +// BM_set_int64_fwditer 162 31 +80.86% <256> [40.0, 10.5] +// BM_map_int64_insert 1551 720 +53.58% <256> [48.0, 20.7] +// BM_map_int64_lookup 1214 612 +49.59% <256> [48.0, 20.7] +// BM_map_int64_fulllookup 994 592 +40.44% <256> [48.0, 17.2] +// BM_map_int64_delete 1778 764 +57.03% <256> [48.0, 20.7] +// BM_map_int64_queueaddrem 3189 1547 +51.49% <256> [48.0, 20.9] +// BM_map_int64_mixedaddrem 3779 1887 +50.07% <256> [48.0, 21.6] +// BM_map_int64_fifo 147 145 +1.36% <256> [48.0, 17.2] +// BM_map_int64_fwditer 162 41 +74.69% <256> [48.0, 20.7] +// BM_set_string_insert 1989 1966 +1.16% <256> [64.0, 44.5] +// BM_set_string_lookup 1709 1600 +6.38% <256> [64.0, 44.5] +// BM_set_string_fulllookup 1573 1529 +2.80% <256> [64.0, 35.4] +// BM_set_string_delete 2520 1920 +23.81% <256> [64.0, 44.5] +// BM_set_string_queueaddrem 4706 4309 +8.44% <256> [64.0, 48.3] +// BM_set_string_mixedaddrem 5080 4654 +8.39% <256> [64.0, 46.7] +// BM_set_string_fifo 318 512 -61.01% <256> [64.0, 35.4] +// BM_set_string_fwditer 182 93 +48.90% <256> [64.0, 44.5] +// BM_map_string_insert 2600 2227 +14.35% <256> [72.0, 55.8] +// BM_map_string_lookup 2068 1730 +16.34% <256> [72.0, 55.8] +// BM_map_string_fulllookup 1859 1618 +12.96% <256> [72.0, 44.0] +// BM_map_string_delete 3168 2080 +34.34% <256> [72.0, 55.8] +// BM_map_string_queueaddrem 5840 4701 +19.50% <256> [72.0, 59.4] +// BM_map_string_mixedaddrem 6400 5200 +18.75% <256> [72.0, 57.8] +// BM_map_string_fifo 398 596 -49.75% <256> [72.0, 44.0] +// BM_map_string_fwditer 243 113 +53.50% <256> [72.0, 55.8] + +#ifndef UTIL_BTREE_BTREE_H__ +#define UTIL_BTREE_BTREE_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef NDEBUG +#define NDEBUG 1 +#endif + +namespace btree { + +// Inside a btree method, if we just call swap(), it will choose the +// btree::swap method, which we don't want. And we can't say ::swap +// because then MSVC won't pickup any std::swap() implementations. We +// can't just use std::swap() directly because then we don't get the +// specialization for types outside the std namespace. So the solution +// is to have a special swap helper function whose name doesn't +// collide with other swap functions defined by the btree classes. +template +inline void btree_swap_helper(T &a, T &b) { + using std::swap; + swap(a, b); +} + +// A template helper used to select A or B based on a condition. +template +struct if_{ + typedef A type; +}; + +template +struct if_ { + typedef B type; +}; + +// Types small_ and big_ are promise that sizeof(small_) < sizeof(big_) +typedef char small_; + +struct big_ { + char dummy[2]; +}; + +// A compile-time assertion. +template +struct CompileAssert { +}; + +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] + +// A helper type used to indicate that a key-compare-to functor has been +// provided. A user can specify a key-compare-to functor by doing: +// +// struct MyStringComparer +// : public util::btree::btree_key_compare_to_tag { +// int operator()(const string &a, const string &b) const { +// return a.compare(b); +// } +// }; +// +// Note that the return type is an int and not a bool. There is a +// COMPILE_ASSERT which enforces this return type. +struct btree_key_compare_to_tag { +}; + +// A helper class that indicates if the Compare parameter is derived from +// btree_key_compare_to_tag. +template +struct btree_is_key_compare_to + : public std::is_convertible { +}; + +// A helper class to convert a boolean comparison into a three-way +// "compare-to" comparison that returns a negative value to indicate +// less-than, zero to indicate equality and a positive value to +// indicate greater-than. This helper class is specialized for +// less and greater. The btree_key_compare_to_adapter +// class is provided so that btree users automatically get the more +// efficient compare-to code when using common google string types +// with common comparison functors. +template +struct btree_key_compare_to_adapter : Compare { + btree_key_compare_to_adapter() { } + btree_key_compare_to_adapter(const Compare &c) : Compare(c) { } + btree_key_compare_to_adapter(const btree_key_compare_to_adapter &c) + : Compare(c) { + } +}; + +template <> +struct btree_key_compare_to_adapter > + : public btree_key_compare_to_tag { + btree_key_compare_to_adapter() {} + btree_key_compare_to_adapter(const std::less&) {} + btree_key_compare_to_adapter( + const btree_key_compare_to_adapter >&) {} + int operator()(const std::string &a, const std::string &b) const { + return a.compare(b); + } +}; + +template <> +struct btree_key_compare_to_adapter > + : public btree_key_compare_to_tag { + btree_key_compare_to_adapter() {} + btree_key_compare_to_adapter(const std::greater&) {} + btree_key_compare_to_adapter( + const btree_key_compare_to_adapter >&) {} + int operator()(const std::string &a, const std::string &b) const { + return b.compare(a); + } +}; + +// A helper class that allows a compare-to functor to behave like a plain +// compare functor. This specialization is used when we do not have a +// compare-to functor. +template +struct btree_key_comparer { + btree_key_comparer() {} + btree_key_comparer(Compare c) : comp(c) {} + static bool bool_compare(const Compare &comp, const Key &x, const Key &y) { + return comp(x, y); + } + bool operator()(const Key &x, const Key &y) const { + return bool_compare(comp, x, y); + } + Compare comp; +}; + +// A specialization of btree_key_comparer when a compare-to functor is +// present. We need a plain (boolean) comparison in some parts of the btree +// code, such as insert-with-hint. +template +struct btree_key_comparer { + btree_key_comparer() {} + btree_key_comparer(Compare c) : comp(c) {} + static bool bool_compare(const Compare &comp, const Key &x, const Key &y) { + return comp(x, y) < 0; + } + bool operator()(const Key &x, const Key &y) const { + return bool_compare(comp, x, y); + } + Compare comp; +}; + +// A helper function to compare to keys using the specified compare +// functor. This dispatches to the appropriate btree_key_comparer comparison, +// depending on whether we have a compare-to functor or not (which depends on +// whether Compare is derived from btree_key_compare_to_tag). +template +static bool btree_compare_keys( + const Compare &comp, const Key &x, const Key &y) { + typedef btree_key_comparer::value> key_comparer; + return key_comparer::bool_compare(comp, x, y); +} + +template +struct btree_common_params { + // If Compare is derived from btree_key_compare_to_tag then use it as the + // key_compare type. Otherwise, use btree_key_compare_to_adapter<> which will + // fall-back to Compare if we don't have an appropriate specialization. + typedef typename if_< + btree_is_key_compare_to::value, + Compare, btree_key_compare_to_adapter >::type key_compare; + // A type which indicates if we have a key-compare-to functor or a plain old + // key-compare functor. + typedef btree_is_key_compare_to is_key_compare_to; + + typedef Alloc allocator_type; + typedef Key key_type; + typedef ssize_t size_type; + typedef ptrdiff_t difference_type; + + enum { + kTargetNodeSize = TargetNodeSize, + + // Available space for values. This is largest for leaf nodes, + // which has overhead no fewer than two pointers. + kNodeValueSpace = TargetNodeSize - 2 * sizeof(void*), + }; + + // This is an integral type large enough to hold as many + // ValueSize-values as will fit a node of TargetNodeSize bytes. + typedef typename if_< + (kNodeValueSpace / ValueSize) >= 256, + uint16_t, + uint8_t>::type node_count_type; +}; + +// A parameters structure for holding the type parameters for a btree_map. +template +struct btree_map_params + : public btree_common_params { + typedef Data data_type; + typedef Data mapped_type; + typedef std::pair value_type; + typedef std::pair mutable_value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + + enum { + kValueSize = sizeof(Key) + sizeof(data_type), + }; + + static const Key& key(const value_type &x) { return x.first; } + static const Key& key(const mutable_value_type &x) { return x.first; } + static void swap(mutable_value_type *a, mutable_value_type *b) { + btree_swap_helper(a->first, b->first); + btree_swap_helper(a->second, b->second); + } +}; + +// A parameters structure for holding the type parameters for a btree_set. +template +struct btree_set_params + : public btree_common_params { + typedef std::false_type data_type; + typedef std::false_type mapped_type; + typedef Key value_type; + typedef value_type mutable_value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + + enum { + kValueSize = sizeof(Key), + }; + + static const Key& key(const value_type &x) { return x; } + static void swap(mutable_value_type *a, mutable_value_type *b) { + btree_swap_helper(*a, *b); + } +}; + +// An adapter class that converts a lower-bound compare into an upper-bound +// compare. +template +struct btree_upper_bound_adapter : public Compare { + btree_upper_bound_adapter(Compare c) : Compare(c) {} + bool operator()(const Key &a, const Key &b) const { + return !static_cast(*this)(b, a); + } +}; + +template +struct btree_upper_bound_compare_to_adapter : public CompareTo { + btree_upper_bound_compare_to_adapter(CompareTo c) : CompareTo(c) {} + int operator()(const Key &a, const Key &b) const { + return static_cast(*this)(b, a); + } +}; + +// Dispatch helper class for using linear search with plain compare. +template +struct btree_linear_search_plain_compare { + static int lower_bound(const K &k, const N &n, Compare comp) { + return n.linear_search_plain_compare(k, 0, n.count(), comp); + } + static int upper_bound(const K &k, const N &n, Compare comp) { + typedef btree_upper_bound_adapter upper_compare; + return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp)); + } +}; + +// Dispatch helper class for using linear search with compare-to +template +struct btree_linear_search_compare_to { + static int lower_bound(const K &k, const N &n, CompareTo comp) { + return n.linear_search_compare_to(k, 0, n.count(), comp); + } + static int upper_bound(const K &k, const N &n, CompareTo comp) { + typedef btree_upper_bound_adapter > upper_compare; + return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp)); + } +}; + +// Dispatch helper class for using binary search with plain compare. +template +struct btree_binary_search_plain_compare { + static int lower_bound(const K &k, const N &n, Compare comp) { + return n.binary_search_plain_compare(k, 0, n.count(), comp); + } + static int upper_bound(const K &k, const N &n, Compare comp) { + typedef btree_upper_bound_adapter upper_compare; + return n.binary_search_plain_compare(k, 0, n.count(), upper_compare(comp)); + } +}; + +// Dispatch helper class for using binary search with compare-to. +template +struct btree_binary_search_compare_to { + static int lower_bound(const K &k, const N &n, CompareTo comp) { + return n.binary_search_compare_to(k, 0, n.count(), CompareTo()); + } + static int upper_bound(const K &k, const N &n, CompareTo comp) { + typedef btree_upper_bound_adapter > upper_compare; + return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp)); + } +}; + +// A node in the btree holding. The same node type is used for both internal +// and leaf nodes in the btree, though the nodes are allocated in such a way +// that the children array is only valid in internal nodes. +template +class btree_node { + public: + typedef Params params_type; + typedef btree_node self_type; + typedef typename Params::key_type key_type; + typedef typename Params::data_type data_type; + typedef typename Params::value_type value_type; + typedef typename Params::mutable_value_type mutable_value_type; + typedef typename Params::pointer pointer; + typedef typename Params::const_pointer const_pointer; + typedef typename Params::reference reference; + typedef typename Params::const_reference const_reference; + typedef typename Params::key_compare key_compare; + typedef typename Params::size_type size_type; + typedef typename Params::difference_type difference_type; + // Typedefs for the various types of node searches. + typedef btree_linear_search_plain_compare< + key_type, self_type, key_compare> linear_search_plain_compare_type; + typedef btree_linear_search_compare_to< + key_type, self_type, key_compare> linear_search_compare_to_type; + typedef btree_binary_search_plain_compare< + key_type, self_type, key_compare> binary_search_plain_compare_type; + typedef btree_binary_search_compare_to< + key_type, self_type, key_compare> binary_search_compare_to_type; + // If we have a valid key-compare-to type, use linear_search_compare_to, + // otherwise use linear_search_plain_compare. + typedef typename if_< + Params::is_key_compare_to::value, + linear_search_compare_to_type, + linear_search_plain_compare_type>::type linear_search_type; + // If we have a valid key-compare-to type, use binary_search_compare_to, + // otherwise use binary_search_plain_compare. + typedef typename if_< + Params::is_key_compare_to::value, + binary_search_compare_to_type, + binary_search_plain_compare_type>::type binary_search_type; + // If the key is an integral or floating point type, use linear search which + // is faster than binary search for such types. Might be wise to also + // configure linear search based on node-size. + typedef typename if_< + std::is_integral::value || + std::is_floating_point::value, + linear_search_type, binary_search_type>::type search_type; + + struct base_fields { + typedef typename Params::node_count_type field_type; + + // A boolean indicating whether the node is a leaf or not. + bool leaf; + // The position of the node in the node's parent. + field_type position; + // The maximum number of values the node can hold. + field_type max_count; + // The count of the number of values in the node. + field_type count; + // A pointer to the node's parent. + btree_node *parent; + }; + + enum { + kValueSize = params_type::kValueSize, + kTargetNodeSize = params_type::kTargetNodeSize, + + // Compute how many values we can fit onto a leaf node. + kNodeTargetValues = (kTargetNodeSize - sizeof(base_fields)) / kValueSize, + // We need a minimum of 3 values per internal node in order to perform + // splitting (1 value for the two nodes involved in the split and 1 value + // propagated to the parent as the delimiter for the split). + kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3, + + kExactMatch = 1 << 30, + kMatchMask = kExactMatch - 1, + }; + + struct leaf_fields : public base_fields { + // The array of values. Only the first count of these values have been + // constructed and are valid. + mutable_value_type values[kNodeValues]; + }; + + struct internal_fields : public leaf_fields { + // The array of child pointers. The keys in children_[i] are all less than + // key(i). The keys in children_[i + 1] are all greater than key(i). There + // are always count + 1 children. + btree_node *children[kNodeValues + 1]; + }; + + struct root_fields : public internal_fields { + btree_node *rightmost; + size_type size; + }; + + public: + // Getter/setter for whether this is a leaf node or not. This value doesn't + // change after the node is created. + bool leaf() const { return fields_.leaf; } + + // Getter for the position of this node in its parent. + int position() const { return fields_.position; } + void set_position(int v) { fields_.position = v; } + + // Getter/setter for the number of values stored in this node. + int count() const { return fields_.count; } + void set_count(int v) { fields_.count = v; } + int max_count() const { return fields_.max_count; } + + // Getter for the parent of this node. + btree_node* parent() const { return fields_.parent; } + // Getter for whether the node is the root of the tree. The parent of the + // root of the tree is the leftmost node in the tree which is guaranteed to + // be a leaf. + bool is_root() const { return parent()->leaf(); } + void make_root() { + assert(parent()->is_root()); + fields_.parent = fields_.parent->parent(); + } + + // Getter for the rightmost root node field. Only valid on the root node. + btree_node* rightmost() const { return fields_.rightmost; } + btree_node** mutable_rightmost() { return &fields_.rightmost; } + + // Getter for the size root node field. Only valid on the root node. + size_type size() const { return fields_.size; } + size_type* mutable_size() { return &fields_.size; } + + // Getters for the key/value at position i in the node. + const key_type& key(int i) const { + return params_type::key(fields_.values[i]); + } + reference value(int i) { + return reinterpret_cast(fields_.values[i]); + } + const_reference value(int i) const { + return reinterpret_cast(fields_.values[i]); + } + mutable_value_type* mutable_value(int i) { + return &fields_.values[i]; + } + + // Swap value i in this node with value j in node x. + void value_swap(int i, btree_node *x, int j) { + params_type::swap(mutable_value(i), x->mutable_value(j)); + } + + // Getters/setter for the child at position i in the node. + btree_node* child(int i) const { return fields_.children[i]; } + btree_node** mutable_child(int i) { return &fields_.children[i]; } + void set_child(int i, btree_node *c) { + *mutable_child(i) = c; + c->fields_.parent = this; + c->fields_.position = i; + } + + // Returns the position of the first value whose key is not less than k. + template + int lower_bound(const key_type &k, const Compare &comp) const { + return search_type::lower_bound(k, *this, comp); + } + // Returns the position of the first value whose key is greater than k. + template + int upper_bound(const key_type &k, const Compare &comp) const { + return search_type::upper_bound(k, *this, comp); + } + + // Returns the position of the first value whose key is not less than k using + // linear search performed using plain compare. + template + int linear_search_plain_compare( + const key_type &k, int s, int e, const Compare &comp) const { + while (s < e) { + if (!btree_compare_keys(comp, key(s), k)) { + break; + } + ++s; + } + return s; + } + + // Returns the position of the first value whose key is not less than k using + // linear search performed using compare-to. + template + int linear_search_compare_to( + const key_type &k, int s, int e, const Compare &comp) const { + while (s < e) { + int c = comp(key(s), k); + if (c == 0) { + return s | kExactMatch; + } else if (c > 0) { + break; + } + ++s; + } + return s; + } + + // Returns the position of the first value whose key is not less than k using + // binary search performed using plain compare. + template + int binary_search_plain_compare( + const key_type &k, int s, int e, const Compare &comp) const { + while (s != e) { + int mid = (s + e) / 2; + if (btree_compare_keys(comp, key(mid), k)) { + s = mid + 1; + } else { + e = mid; + } + } + return s; + } + + // Returns the position of the first value whose key is not less than k using + // binary search performed using compare-to. + template + int binary_search_compare_to( + const key_type &k, int s, int e, const CompareTo &comp) const { + while (s != e) { + int mid = (s + e) / 2; + int c = comp(key(mid), k); + if (c < 0) { + s = mid + 1; + } else if (c > 0) { + e = mid; + } else { + // Need to return the first value whose key is not less than k, which + // requires continuing the binary search. Note that we are guaranteed + // that the result is an exact match because if "key(mid-1) < k" the + // call to binary_search_compare_to() will return "mid". + s = binary_search_compare_to(k, s, mid, comp); + return s | kExactMatch; + } + } + return s; + } + + // Inserts the value x at position i, shifting all existing values and + // children at positions >= i to the right by 1. + void insert_value(int i, const value_type &x); + + // Removes the value at position i, shifting all existing values and children + // at positions > i to the left by 1. + void remove_value(int i); + + // Rebalances a node with its right sibling. + void rebalance_right_to_left(btree_node *sibling, int to_move); + void rebalance_left_to_right(btree_node *sibling, int to_move); + + // Splits a node, moving a portion of the node's values to its right sibling. + void split(btree_node *sibling, int insert_position); + + // Merges a node with its right sibling, moving all of the values and the + // delimiting key in the parent node onto itself. + void merge(btree_node *sibling); + + // Swap the contents of "this" and "src". + void swap(btree_node *src); + + // Node allocation/deletion routines. + static btree_node* init_leaf( + leaf_fields *f, btree_node *parent, int max_count) { + btree_node *n = reinterpret_cast(f); + f->leaf = 1; + f->position = 0; + f->max_count = max_count; + f->count = 0; + f->parent = parent; + if (!NDEBUG) { + memset(&f->values, 0, max_count * sizeof(value_type)); + } + return n; + } + static btree_node* init_internal(internal_fields *f, btree_node *parent) { + btree_node *n = init_leaf(f, parent, kNodeValues); + f->leaf = 0; + if (!NDEBUG) { + memset(f->children, 0, sizeof(f->children)); + } + return n; + } + static btree_node* init_root(root_fields *f, btree_node *parent) { + btree_node *n = init_internal(f, parent); + f->rightmost = parent; + f->size = parent->count(); + return n; + } + void destroy() { + for (int i = 0; i < count(); ++i) { + value_destroy(i); + } + } + + private: + void value_init(int i) { + new (&fields_.values[i]) mutable_value_type; + } + void value_init(int i, const value_type &x) { + new (&fields_.values[i]) mutable_value_type(x); + } + void value_destroy(int i) { + fields_.values[i].~mutable_value_type(); + } + + private: + root_fields fields_; + + private: + btree_node(const btree_node&); + void operator=(const btree_node&); +}; + +template +struct btree_iterator { + typedef typename Node::key_type key_type; + typedef typename Node::size_type size_type; + typedef typename Node::difference_type difference_type; + typedef typename Node::params_type params_type; + + typedef Node node_type; + typedef typename std::remove_const::type normal_node; + typedef const Node const_node; + typedef typename params_type::value_type value_type; + typedef typename params_type::pointer normal_pointer; + typedef typename params_type::reference normal_reference; + typedef typename params_type::const_pointer const_pointer; + typedef typename params_type::const_reference const_reference; + + typedef Pointer pointer; + typedef Reference reference; + typedef std::bidirectional_iterator_tag iterator_category; + + typedef btree_iterator< + normal_node, normal_reference, normal_pointer> iterator; + typedef btree_iterator< + const_node, const_reference, const_pointer> const_iterator; + typedef btree_iterator self_type; + + btree_iterator() + : node(NULL), + position(-1) { + } + btree_iterator(Node *n, int p) + : node(n), + position(p) { + } + btree_iterator(const iterator &x) + : node(x.node), + position(x.position) { + } + + // Increment/decrement the iterator. + void increment() { + if (node->leaf() && ++position < node->count()) { + return; + } + increment_slow(); + } + void increment_by(int count); + void increment_slow(); + + void decrement() { + if (node->leaf() && --position >= 0) { + return; + } + decrement_slow(); + } + void decrement_slow(); + + bool operator==(const const_iterator &x) const { + return node == x.node && position == x.position; + } + bool operator!=(const const_iterator &x) const { + return node != x.node || position != x.position; + } + + // Accessors for the key/value the iterator is pointing at. + const key_type& key() const { + return node->key(position); + } + reference operator*() const { + return node->value(position); + } + pointer operator->() const { + return &node->value(position); + } + + self_type& operator++() { + increment(); + return *this; + } + self_type& operator--() { + decrement(); + return *this; + } + self_type operator++(int) { + self_type tmp = *this; + ++*this; + return tmp; + } + self_type operator--(int) { + self_type tmp = *this; + --*this; + return tmp; + } + + // The node in the tree the iterator is pointing at. + Node *node; + // The position within the node of the tree the iterator is pointing at. + int position; +}; + +// Dispatch helper class for using btree::internal_locate with plain compare. +struct btree_internal_locate_plain_compare { + template + static std::pair dispatch(const K &k, const T &t, Iter iter) { + return t.internal_locate_plain_compare(k, iter); + } +}; + +// Dispatch helper class for using btree::internal_locate with compare-to. +struct btree_internal_locate_compare_to { + template + static std::pair dispatch(const K &k, const T &t, Iter iter) { + return t.internal_locate_compare_to(k, iter); + } +}; + +template +class btree : public Params::key_compare { + typedef btree self_type; + typedef btree_node node_type; + typedef typename node_type::base_fields base_fields; + typedef typename node_type::leaf_fields leaf_fields; + typedef typename node_type::internal_fields internal_fields; + typedef typename node_type::root_fields root_fields; + typedef typename Params::is_key_compare_to is_key_compare_to; + + friend struct btree_internal_locate_plain_compare; + friend struct btree_internal_locate_compare_to; + typedef typename if_< + is_key_compare_to::value, + btree_internal_locate_compare_to, + btree_internal_locate_plain_compare>::type internal_locate_type; + + enum { + kNodeValues = node_type::kNodeValues, + kMinNodeValues = kNodeValues / 2, + kValueSize = node_type::kValueSize, + kExactMatch = node_type::kExactMatch, + kMatchMask = node_type::kMatchMask, + }; + + // A helper class to get the empty base class optimization for 0-size + // allocators. Base is internal_allocator_type. + // (e.g. empty_base_handle). If Base is + // 0-size, the compiler doesn't have to reserve any space for it and + // sizeof(empty_base_handle) will simply be sizeof(Data). Google [empty base + // class optimization] for more details. + template + struct empty_base_handle : public Base { + empty_base_handle(const Base &b, const Data &d) + : Base(b), + data(d) { + } + Data data; + }; + + struct node_stats { + node_stats(ssize_t l, ssize_t i) + : leaf_nodes(l), + internal_nodes(i) { + } + + node_stats& operator+=(const node_stats &x) { + leaf_nodes += x.leaf_nodes; + internal_nodes += x.internal_nodes; + return *this; + } + + ssize_t leaf_nodes; + ssize_t internal_nodes; + }; + + public: + typedef Params params_type; + typedef typename Params::key_type key_type; + typedef typename Params::data_type data_type; + typedef typename Params::mapped_type mapped_type; + typedef typename Params::value_type value_type; + typedef typename Params::key_compare key_compare; + typedef typename Params::pointer pointer; + typedef typename Params::const_pointer const_pointer; + typedef typename Params::reference reference; + typedef typename Params::const_reference const_reference; + typedef typename Params::size_type size_type; + typedef typename Params::difference_type difference_type; + typedef btree_iterator iterator; + typedef typename iterator::const_iterator const_iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + + typedef typename Params::allocator_type allocator_type; + typedef typename allocator_type::template rebind::other + internal_allocator_type; + + public: + // Default constructor. + btree(const key_compare &comp, const allocator_type &alloc); + + // Copy constructor. + btree(const self_type &x); + + // Destructor. + ~btree() { + clear(); + } + + // Iterator routines. + iterator begin() { + return iterator(leftmost(), 0); + } + const_iterator begin() const { + return const_iterator(leftmost(), 0); + } + iterator end() { + return iterator(rightmost(), rightmost() ? rightmost()->count() : 0); + } + const_iterator end() const { + return const_iterator(rightmost(), rightmost() ? rightmost()->count() : 0); + } + reverse_iterator rbegin() { + return reverse_iterator(end()); + } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + reverse_iterator rend() { + return reverse_iterator(begin()); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + // Finds the first element whose key is not less than key. + iterator lower_bound(const key_type &key) { + return internal_end( + internal_lower_bound(key, iterator(root(), 0))); + } + const_iterator lower_bound(const key_type &key) const { + return internal_end( + internal_lower_bound(key, const_iterator(root(), 0))); + } + + // Finds the first element whose key is greater than key. + iterator upper_bound(const key_type &key) { + return internal_end( + internal_upper_bound(key, iterator(root(), 0))); + } + const_iterator upper_bound(const key_type &key) const { + return internal_end( + internal_upper_bound(key, const_iterator(root(), 0))); + } + + // Finds the range of values which compare equal to key. The first member of + // the returned pair is equal to lower_bound(key). The second member pair of + // the pair is equal to upper_bound(key). + std::pair equal_range(const key_type &key) { + return std::make_pair(lower_bound(key), upper_bound(key)); + } + std::pair equal_range(const key_type &key) const { + return std::make_pair(lower_bound(key), upper_bound(key)); + } + + // Inserts a value into the btree only if it does not already exist. The + // boolean return value indicates whether insertion succeeded or failed. The + // ValuePointer type is used to avoid instatiating the value unless the key + // is being inserted. Value is not dereferenced if the key already exists in + // the btree. See btree_map::operator[]. + template + std::pair insert_unique(const key_type &key, ValuePointer value); + + // Inserts a value into the btree only if it does not already exist. The + // boolean return value indicates whether insertion succeeded or failed. + std::pair insert_unique(const value_type &v) { + return insert_unique(params_type::key(v), &v); + } + + // Insert with hint. Check to see if the value should be placed immediately + // before position in the tree. If it does, then the insertion will take + // amortized constant time. If not, the insertion will take amortized + // logarithmic time as if a call to insert_unique(v) were made. + iterator insert_unique(iterator position, const value_type &v); + + // Insert a range of values into the btree. + template + void insert_unique(InputIterator b, InputIterator e); + + // Inserts a value into the btree. The ValuePointer type is used to avoid + // instatiating the value unless the key is being inserted. Value is not + // dereferenced if the key already exists in the btree. See + // btree_map::operator[]. + template + iterator insert_multi(const key_type &key, ValuePointer value); + + // Inserts a value into the btree. + iterator insert_multi(const value_type &v) { + return insert_multi(params_type::key(v), &v); + } + + // Insert with hint. Check to see if the value should be placed immediately + // before position in the tree. If it does, then the insertion will take + // amortized constant time. If not, the insertion will take amortized + // logarithmic time as if a call to insert_multi(v) were made. + iterator insert_multi(iterator position, const value_type &v); + + // Insert a range of values into the btree. + template + void insert_multi(InputIterator b, InputIterator e); + + void assign(const self_type &x); + + // Erase the specified iterator from the btree. The iterator must be valid + // (i.e. not equal to end()). Return an iterator pointing to the node after + // the one that was erased (or end() if none exists). + iterator erase(iterator iter); + + // Erases range. Returns the number of keys erased. + int erase(iterator begin, iterator end); + + // Erases the specified key from the btree. Returns 1 if an element was + // erased and 0 otherwise. + int erase_unique(const key_type &key); + + // Erases all of the entries matching the specified key from the + // btree. Returns the number of elements erased. + int erase_multi(const key_type &key); + + // Finds the iterator corresponding to a key or returns end() if the key is + // not present. + iterator find_unique(const key_type &key) { + return internal_end( + internal_find_unique(key, iterator(root(), 0))); + } + const_iterator find_unique(const key_type &key) const { + return internal_end( + internal_find_unique(key, const_iterator(root(), 0))); + } + iterator find_multi(const key_type &key) { + return internal_end( + internal_find_multi(key, iterator(root(), 0))); + } + const_iterator find_multi(const key_type &key) const { + return internal_end( + internal_find_multi(key, const_iterator(root(), 0))); + } + + // Returns a count of the number of times the key appears in the btree. + size_type count_unique(const key_type &key) const { + const_iterator b = internal_find_unique( + key, const_iterator(root(), 0)); + if (!b.node) { + // The key doesn't exist in the tree. + return 0; + } + return 1; + } + // Returns a count of the number of times the key appears in the btree. + size_type count_multi(const key_type &key) const { + return distance(lower_bound(key), upper_bound(key)); + } + + // Clear the btree, deleting all of the values it contains. + void clear(); + + // Swap the contents of *this and x. + void swap(self_type &x); + + // Assign the contents of x to *this. + self_type& operator=(const self_type &x) { + if (&x == this) { + // Don't copy onto ourselves. + return *this; + } + assign(x); + return *this; + } + + key_compare* mutable_key_comp() { + return this; + } + const key_compare& key_comp() const { + return *this; + } + bool compare_keys(const key_type &x, const key_type &y) const { + return btree_compare_keys(key_comp(), x, y); + } + + // Dump the btree to the specified ostream. Requires that operator<< is + // defined for Key and Value. + void dump(std::ostream &os) const { + if (root() != NULL) { + internal_dump(os, root(), 0); + } + } + + // Verifies the structure of the btree. + void verify() const; + + // Size routines. Note that empty() is slightly faster than doing size()==0. + size_type size() const { + if (empty()) return 0; + if (root()->leaf()) return root()->count(); + return root()->size(); + } + size_type max_size() const { return std::numeric_limits::max(); } + bool empty() const { return root() == NULL; } + + // The height of the btree. An empty tree will have height 0. + size_type height() const { + size_type h = 0; + if (root()) { + // Count the length of the chain from the leftmost node up to the + // root. We actually count from the root back around to the level below + // the root, but the calculation is the same because of the circularity + // of that traversal. + const node_type *n = root(); + do { + ++h; + n = n->parent(); + } while (n != root()); + } + return h; + } + + // The number of internal, leaf and total nodes used by the btree. + size_type leaf_nodes() const { + return internal_stats(root()).leaf_nodes; + } + size_type internal_nodes() const { + return internal_stats(root()).internal_nodes; + } + size_type nodes() const { + node_stats stats = internal_stats(root()); + return stats.leaf_nodes + stats.internal_nodes; + } + + // The total number of bytes used by the btree. + size_type bytes_used() const { + node_stats stats = internal_stats(root()); + if (stats.leaf_nodes == 1 && stats.internal_nodes == 0) { + return sizeof(*this) + + sizeof(base_fields) + root()->max_count() * sizeof(value_type); + } else { + return sizeof(*this) + + sizeof(root_fields) - sizeof(internal_fields) + + stats.leaf_nodes * sizeof(leaf_fields) + + stats.internal_nodes * sizeof(internal_fields); + } + } + + // The average number of bytes used per value stored in the btree. + static double average_bytes_per_value() { + // Returns the number of bytes per value on a leaf node that is 75% + // full. Experimentally, this matches up nicely with the computed number of + // bytes per value in trees that had their values inserted in random order. + return sizeof(leaf_fields) / (kNodeValues * 0.75); + } + + // The fullness of the btree. Computed as the number of elements in the btree + // divided by the maximum number of elements a tree with the current number + // of nodes could hold. A value of 1 indicates perfect space + // utilization. Smaller values indicate space wastage. + double fullness() const { + return double(size()) / (nodes() * kNodeValues); + } + // The overhead of the btree structure in bytes per node. Computed as the + // total number of bytes used by the btree minus the number of bytes used for + // storing elements divided by the number of elements. + double overhead() const { + if (empty()) { + return 0.0; + } + return (bytes_used() - size() * kValueSize) / double(size()); + } + + private: + // Internal accessor routines. + node_type* root() { return root_.data; } + const node_type* root() const { return root_.data; } + node_type** mutable_root() { return &root_.data; } + + // The rightmost node is stored in the root node. + node_type* rightmost() { + return (!root() || root()->leaf()) ? root() : root()->rightmost(); + } + const node_type* rightmost() const { + return (!root() || root()->leaf()) ? root() : root()->rightmost(); + } + node_type** mutable_rightmost() { return root()->mutable_rightmost(); } + + // The leftmost node is stored as the parent of the root node. + node_type* leftmost() { return root() ? root()->parent() : NULL; } + const node_type* leftmost() const { return root() ? root()->parent() : NULL; } + + // The size of the tree is stored in the root node. + size_type* mutable_size() { return root()->mutable_size(); } + + // Allocator routines. + internal_allocator_type* mutable_internal_allocator() { + return static_cast(&root_); + } + const internal_allocator_type& internal_allocator() const { + return *static_cast(&root_); + } + + // Node creation/deletion routines. + node_type* new_internal_node(node_type *parent) { + internal_fields *p = reinterpret_cast( + mutable_internal_allocator()->allocate(sizeof(internal_fields))); + return node_type::init_internal(p, parent); + } + node_type* new_internal_root_node() { + root_fields *p = reinterpret_cast( + mutable_internal_allocator()->allocate(sizeof(root_fields))); + return node_type::init_root(p, root()->parent()); + } + node_type* new_leaf_node(node_type *parent) { + leaf_fields *p = reinterpret_cast( + mutable_internal_allocator()->allocate(sizeof(leaf_fields))); + return node_type::init_leaf(p, parent, kNodeValues); + } + node_type* new_leaf_root_node(int max_count) { + leaf_fields *p = reinterpret_cast( + mutable_internal_allocator()->allocate( + sizeof(base_fields) + max_count * sizeof(value_type))); + return node_type::init_leaf(p, reinterpret_cast(p), max_count); + } + void delete_internal_node(node_type *node) { + node->destroy(); + assert(node != root()); + mutable_internal_allocator()->deallocate( + reinterpret_cast(node), sizeof(internal_fields)); + } + void delete_internal_root_node() { + root()->destroy(); + mutable_internal_allocator()->deallocate( + reinterpret_cast(root()), sizeof(root_fields)); + } + void delete_leaf_node(node_type *node) { + node->destroy(); + mutable_internal_allocator()->deallocate( + reinterpret_cast(node), + sizeof(base_fields) + node->max_count() * sizeof(value_type)); + } + + // Rebalances or splits the node iter points to. + void rebalance_or_split(iterator *iter); + + // Merges the values of left, right and the delimiting key on their parent + // onto left, removing the delimiting key and deleting right. + void merge_nodes(node_type *left, node_type *right); + + // Tries to merge node with its left or right sibling, and failing that, + // rebalance with its left or right sibling. Returns true if a merge + // occurred, at which point it is no longer valid to access node. Returns + // false if no merging took place. + bool try_merge_or_rebalance(iterator *iter); + + // Tries to shrink the height of the tree by 1. + void try_shrink(); + + iterator internal_end(iterator iter) { + return iter.node ? iter : end(); + } + const_iterator internal_end(const_iterator iter) const { + return iter.node ? iter : end(); + } + + // Inserts a value into the btree immediately before iter. Requires that + // key(v) <= iter.key() and (--iter).key() <= key(v). + iterator internal_insert(iterator iter, const value_type &v); + + // Returns an iterator pointing to the first value >= the value "iter" is + // pointing at. Note that "iter" might be pointing to an invalid location as + // iter.position == iter.node->count(). This routine simply moves iter up in + // the tree to a valid location. + template + static IterType internal_last(IterType iter); + + // Returns an iterator pointing to the leaf position at which key would + // reside in the tree. We provide 2 versions of internal_locate. The first + // version (internal_locate_plain_compare) always returns 0 for the second + // field of the pair. The second version (internal_locate_compare_to) is for + // the key-compare-to specialization and returns either kExactMatch (if the + // key was found in the tree) or -kExactMatch (if it wasn't) in the second + // field of the pair. The compare_to specialization allows the caller to + // avoid a subsequent comparison to determine if an exact match was made, + // speeding up string keys. + template + std::pair internal_locate( + const key_type &key, IterType iter) const; + template + std::pair internal_locate_plain_compare( + const key_type &key, IterType iter) const; + template + std::pair internal_locate_compare_to( + const key_type &key, IterType iter) const; + + // Internal routine which implements lower_bound(). + template + IterType internal_lower_bound( + const key_type &key, IterType iter) const; + + // Internal routine which implements upper_bound(). + template + IterType internal_upper_bound( + const key_type &key, IterType iter) const; + + // Internal routine which implements find_unique(). + template + IterType internal_find_unique( + const key_type &key, IterType iter) const; + + // Internal routine which implements find_multi(). + template + IterType internal_find_multi( + const key_type &key, IterType iter) const; + + // Deletes a node and all of its children. + void internal_clear(node_type *node); + + // Dumps a node and all of its children to the specified ostream. + void internal_dump(std::ostream &os, const node_type *node, int level) const; + + // Verifies the tree structure of node. + int internal_verify(const node_type *node, + const key_type *lo, const key_type *hi) const; + + node_stats internal_stats(const node_type *node) const { + if (!node) { + return node_stats(0, 0); + } + if (node->leaf()) { + return node_stats(1, 0); + } + node_stats res(0, 1); + for (int i = 0; i <= node->count(); ++i) { + res += internal_stats(node->child(i)); + } + return res; + } + + private: + empty_base_handle root_; + + private: + // A never instantiated helper function that returns big_ if we have a + // key-compare-to functor or if R is bool and small_ otherwise. + template + static typename if_< + if_, + std::is_same >::type::value, + big_, small_>::type key_compare_checker(R); + + // A never instantiated helper function that returns the key comparison + // functor. + static key_compare key_compare_helper(); + + // Verify that key_compare returns a bool. This is similar to the way + // is_convertible in base/type_traits.h works. Note that key_compare_checker + // is never actually invoked. The compiler will select which + // key_compare_checker() to instantiate and then figure out the size of the + // return type of key_compare_checker() at compile time which we then check + // against the sizeof of big_. + COMPILE_ASSERT( + sizeof(key_compare_checker(key_compare_helper()(key_type(), key_type()))) == + sizeof(big_), + key_comparison_function_must_return_bool); + + // Note: We insist on kTargetValues, which is computed from + // Params::kTargetNodeSize, must fit the base_fields::field_type. + COMPILE_ASSERT(kNodeValues < + (1 << (8 * sizeof(typename base_fields::field_type))), + target_node_size_too_large); + + // Test the assumption made in setting kNodeValueSpace. + COMPILE_ASSERT(sizeof(base_fields) >= 2 * sizeof(void*), + node_space_assumption_incorrect); +}; + +//// +// btree_node methods +template +inline void btree_node

::insert_value(int i, const value_type &x) { + assert(i <= count()); + value_init(count(), x); + for (int j = count(); j > i; --j) { + value_swap(j, this, j - 1); + } + set_count(count() + 1); + + if (!leaf()) { + ++i; + for (int j = count(); j > i; --j) { + *mutable_child(j) = child(j - 1); + child(j)->set_position(j); + } + *mutable_child(i) = NULL; + } +} + +template +inline void btree_node

::remove_value(int i) { + if (!leaf()) { + assert(child(i + 1)->count() == 0); + for (int j = i + 1; j < count(); ++j) { + *mutable_child(j) = child(j + 1); + child(j)->set_position(j); + } + *mutable_child(count()) = NULL; + } + + set_count(count() - 1); + for (; i < count(); ++i) { + value_swap(i, this, i + 1); + } + value_destroy(i); +} + +template +void btree_node

::rebalance_right_to_left(btree_node *src, int to_move) { + assert(parent() == src->parent()); + assert(position() + 1 == src->position()); + assert(src->count() >= count()); + assert(to_move >= 1); + assert(to_move <= src->count()); + + // Make room in the left node for the new values. + for (int i = 0; i < to_move; ++i) { + value_init(i + count()); + } + + // Move the delimiting value to the left node and the new delimiting value + // from the right node. + value_swap(count(), parent(), position()); + parent()->value_swap(position(), src, to_move - 1); + + // Move the values from the right to the left node. + for (int i = 1; i < to_move; ++i) { + value_swap(count() + i, src, i - 1); + } + // Shift the values in the right node to their correct position. + for (int i = to_move; i < src->count(); ++i) { + src->value_swap(i - to_move, src, i); + } + for (int i = 1; i <= to_move; ++i) { + src->value_destroy(src->count() - i); + } + + if (!leaf()) { + // Move the child pointers from the right to the left node. + for (int i = 0; i < to_move; ++i) { + set_child(1 + count() + i, src->child(i)); + } + for (int i = 0; i <= src->count() - to_move; ++i) { + assert(i + to_move <= src->max_count()); + src->set_child(i, src->child(i + to_move)); + *src->mutable_child(i + to_move) = NULL; + } + } + + // Fixup the counts on the src and dest nodes. + set_count(count() + to_move); + src->set_count(src->count() - to_move); +} + +template +void btree_node

::rebalance_left_to_right(btree_node *dest, int to_move) { + assert(parent() == dest->parent()); + assert(position() + 1 == dest->position()); + assert(count() >= dest->count()); + assert(to_move >= 1); + assert(to_move <= count()); + + // Make room in the right node for the new values. + for (int i = 0; i < to_move; ++i) { + dest->value_init(i + dest->count()); + } + for (int i = dest->count() - 1; i >= 0; --i) { + dest->value_swap(i, dest, i + to_move); + } + + // Move the delimiting value to the right node and the new delimiting value + // from the left node. + dest->value_swap(to_move - 1, parent(), position()); + parent()->value_swap(position(), this, count() - to_move); + value_destroy(count() - to_move); + + // Move the values from the left to the right node. + for (int i = 1; i < to_move; ++i) { + value_swap(count() - to_move + i, dest, i - 1); + value_destroy(count() - to_move + i); + } + + if (!leaf()) { + // Move the child pointers from the left to the right node. + for (int i = dest->count(); i >= 0; --i) { + dest->set_child(i + to_move, dest->child(i)); + *dest->mutable_child(i) = NULL; + } + for (int i = 1; i <= to_move; ++i) { + dest->set_child(i - 1, child(count() - to_move + i)); + *mutable_child(count() - to_move + i) = NULL; + } + } + + // Fixup the counts on the src and dest nodes. + set_count(count() - to_move); + dest->set_count(dest->count() + to_move); +} + +template +void btree_node

::split(btree_node *dest, int insert_position) { + assert(dest->count() == 0); + + // We bias the split based on the position being inserted. If we're + // inserting at the beginning of the left node then bias the split to put + // more values on the right node. If we're inserting at the end of the + // right node then bias the split to put more values on the left node. + if (insert_position == 0) { + dest->set_count(count() - 1); + } else if (insert_position == max_count()) { + dest->set_count(0); + } else { + dest->set_count(count() / 2); + } + set_count(count() - dest->count()); + assert(count() >= 1); + + // Move values from the left sibling to the right sibling. + for (int i = 0; i < dest->count(); ++i) { + dest->value_init(i); + value_swap(count() + i, dest, i); + value_destroy(count() + i); + } + + // The split key is the largest value in the left sibling. + set_count(count() - 1); + parent()->insert_value(position(), value_type()); + value_swap(count(), parent(), position()); + value_destroy(count()); + parent()->set_child(position() + 1, dest); + + if (!leaf()) { + for (int i = 0; i <= dest->count(); ++i) { + assert(child(count() + i + 1) != NULL); + dest->set_child(i, child(count() + i + 1)); + *mutable_child(count() + i + 1) = NULL; + } + } +} + +template +void btree_node

::merge(btree_node *src) { + assert(parent() == src->parent()); + assert(position() + 1 == src->position()); + + // Move the delimiting value to the left node. + value_init(count()); + value_swap(count(), parent(), position()); + + // Move the values from the right to the left node. + for (int i = 0; i < src->count(); ++i) { + value_init(1 + count() + i); + value_swap(1 + count() + i, src, i); + src->value_destroy(i); + } + + if (!leaf()) { + // Move the child pointers from the right to the left node. + for (int i = 0; i <= src->count(); ++i) { + set_child(1 + count() + i, src->child(i)); + *src->mutable_child(i) = NULL; + } + } + + // Fixup the counts on the src and dest nodes. + set_count(1 + count() + src->count()); + src->set_count(0); + + // Remove the value on the parent node. + parent()->remove_value(position()); +} + +template +void btree_node

::swap(btree_node *x) { + assert(leaf() == x->leaf()); + + // Swap the values. + for (int i = count(); i < x->count(); ++i) { + value_init(i); + } + for (int i = x->count(); i < count(); ++i) { + x->value_init(i); + } + int n = std::max(count(), x->count()); + for (int i = 0; i < n; ++i) { + value_swap(i, x, i); + } + for (int i = count(); i < x->count(); ++i) { + x->value_destroy(i); + } + for (int i = x->count(); i < count(); ++i) { + value_destroy(i); + } + + if (!leaf()) { + // Swap the child pointers. + for (int i = 0; i <= n; ++i) { + btree_swap_helper(*mutable_child(i), *x->mutable_child(i)); + } + for (int i = 0; i <= count(); ++i) { + x->child(i)->fields_.parent = x; + } + for (int i = 0; i <= x->count(); ++i) { + child(i)->fields_.parent = this; + } + } + + // Swap the counts. + btree_swap_helper(fields_.count, x->fields_.count); +} + +//// +// btree_iterator methods +template +void btree_iterator::increment_slow() { + if (node->leaf()) { + assert(position >= node->count()); + self_type save(*this); + while (position == node->count() && !node->is_root()) { + assert(node->parent()->child(node->position()) == node); + position = node->position(); + node = node->parent(); + } + if (position == node->count()) { + *this = save; + } + } else { + assert(position < node->count()); + node = node->child(position + 1); + while (!node->leaf()) { + node = node->child(0); + } + position = 0; + } +} + +template +void btree_iterator::increment_by(int count) { + while (count > 0) { + if (node->leaf()) { + int rest = node->count() - position; + position += std::min(rest, count); + count = count - rest; + if (position < node->count()) { + return; + } + } else { + --count; + } + increment_slow(); + } +} + +template +void btree_iterator::decrement_slow() { + if (node->leaf()) { + assert(position <= -1); + self_type save(*this); + while (position < 0 && !node->is_root()) { + assert(node->parent()->child(node->position()) == node); + position = node->position() - 1; + node = node->parent(); + } + if (position < 0) { + *this = save; + } + } else { + assert(position >= 0); + node = node->child(position); + while (!node->leaf()) { + node = node->child(node->count()); + } + position = node->count() - 1; + } +} + +//// +// btree methods +template +btree

::btree(const key_compare &comp, const allocator_type &alloc) + : key_compare(comp), + root_(alloc, NULL) { +} + +template +btree

::btree(const self_type &x) + : key_compare(x.key_comp()), + root_(x.internal_allocator(), NULL) { + assign(x); +} + +template template +std::pair::iterator, bool> +btree

::insert_unique(const key_type &key, ValuePointer value) { + if (empty()) { + *mutable_root() = new_leaf_root_node(1); + } + + std::pair res = internal_locate(key, iterator(root(), 0)); + iterator &iter = res.first; + if (res.second == kExactMatch) { + // The key already exists in the tree, do nothing. + return std::make_pair(internal_last(iter), false); + } else if (!res.second) { + iterator last = internal_last(iter); + if (last.node && !compare_keys(key, last.key())) { + // The key already exists in the tree, do nothing. + return std::make_pair(last, false); + } + } + + return std::make_pair(internal_insert(iter, *value), true); +} + +template +inline typename btree

::iterator +btree

::insert_unique(iterator position, const value_type &v) { + if (!empty()) { + const key_type &key = params_type::key(v); + if (position == end() || compare_keys(key, position.key())) { + iterator prev = position; + if (position == begin() || compare_keys((--prev).key(), key)) { + // prev.key() < key < position.key() + return internal_insert(position, v); + } + } else if (compare_keys(position.key(), key)) { + iterator next = position; + ++next; + if (next == end() || compare_keys(key, next.key())) { + // position.key() < key < next.key() + return internal_insert(next, v); + } + } else { + // position.key() == key + return position; + } + } + return insert_unique(v).first; +} + +template template +void btree

::insert_unique(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert_unique(end(), *b); + } +} + +template template +typename btree

::iterator +btree

::insert_multi(const key_type &key, ValuePointer value) { + if (empty()) { + *mutable_root() = new_leaf_root_node(1); + } + + iterator iter = internal_upper_bound(key, iterator(root(), 0)); + if (!iter.node) { + iter = end(); + } + return internal_insert(iter, *value); +} + +template +typename btree

::iterator +btree

::insert_multi(iterator position, const value_type &v) { + if (!empty()) { + const key_type &key = params_type::key(v); + if (position == end() || !compare_keys(position.key(), key)) { + iterator prev = position; + if (position == begin() || !compare_keys(key, (--prev).key())) { + // prev.key() <= key <= position.key() + return internal_insert(position, v); + } + } else { + iterator next = position; + ++next; + if (next == end() || !compare_keys(next.key(), key)) { + // position.key() < key <= next.key() + return internal_insert(next, v); + } + } + } + return insert_multi(v); +} + +template template +void btree

::insert_multi(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert_multi(end(), *b); + } +} + +template +void btree

::assign(const self_type &x) { + clear(); + + *mutable_key_comp() = x.key_comp(); + *mutable_internal_allocator() = x.internal_allocator(); + + // Assignment can avoid key comparisons because we know the order of the + // values is the same order we'll store them in. + for (const_iterator iter = x.begin(); iter != x.end(); ++iter) { + if (empty()) { + insert_multi(*iter); + } else { + // If the btree is not empty, we can just insert the new value at the end + // of the tree! + internal_insert(end(), *iter); + } + } +} + +template +typename btree

::iterator btree

::erase(iterator iter) { + bool internal_delete = false; + if (!iter.node->leaf()) { + // Deletion of a value on an internal node. Swap the key with the largest + // value of our left child. This is easy, we just decrement iter. + iterator tmp_iter(iter--); + assert(iter.node->leaf()); + assert(!compare_keys(tmp_iter.key(), iter.key())); + iter.node->value_swap(iter.position, tmp_iter.node, tmp_iter.position); + internal_delete = true; + --*mutable_size(); + } else if (!root()->leaf()) { + --*mutable_size(); + } + + // Delete the key from the leaf. + iter.node->remove_value(iter.position); + + // We want to return the next value after the one we just erased. If we + // erased from an internal node (internal_delete == true), then the next + // value is ++(++iter). If we erased from a leaf node (internal_delete == + // false) then the next value is ++iter. Note that ++iter may point to an + // internal node and the value in the internal node may move to a leaf node + // (iter.node) when rebalancing is performed at the leaf level. + + // Merge/rebalance as we walk back up the tree. + iterator res(iter); + for (;;) { + if (iter.node == root()) { + try_shrink(); + if (empty()) { + return end(); + } + break; + } + if (iter.node->count() >= kMinNodeValues) { + break; + } + bool merged = try_merge_or_rebalance(&iter); + if (iter.node->leaf()) { + res = iter; + } + if (!merged) { + break; + } + iter.node = iter.node->parent(); + } + + // Adjust our return value. If we're pointing at the end of a node, advance + // the iterator. + if (res.position == res.node->count()) { + res.position = res.node->count() - 1; + ++res; + } + // If we erased from an internal node, advance the iterator. + if (internal_delete) { + ++res; + } + return res; +} + +template +int btree

::erase(iterator b, iterator e) { + int count = distance(b, e); + for (int i = 0; i < count; i++) { + b = erase(b); + } + return count; +} + +template +int btree

::erase_unique(const key_type &key) { + iterator iter = internal_find_unique(key, iterator(root(), 0)); + if (!iter.node) { + // The key doesn't exist in the tree, return nothing done. + return 0; + } + erase(iter); + return 1; +} + +template +int btree

::erase_multi(const key_type &key) { + iterator b = internal_lower_bound(key, iterator(root(), 0)); + if (!b.node) { + // The key doesn't exist in the tree, return nothing done. + return 0; + } + // Delete all of the keys between begin and upper_bound(key). + iterator e = internal_end( + internal_upper_bound(key, iterator(root(), 0))); + return erase(b, e); +} + +template +void btree

::clear() { + if (root() != NULL) { + internal_clear(root()); + } + *mutable_root() = NULL; +} + +template +void btree

::swap(self_type &x) { + std::swap(static_cast(*this), static_cast(x)); + std::swap(root_, x.root_); +} + +template +void btree

::verify() const { + if (root() != NULL) { + assert(size() == internal_verify(root(), NULL, NULL)); + assert(leftmost() == (++const_iterator(root(), -1)).node); + assert(rightmost() == (--const_iterator(root(), root()->count())).node); + assert(leftmost()->leaf()); + assert(rightmost()->leaf()); + } else { + assert(size() == 0); + assert(leftmost() == NULL); + assert(rightmost() == NULL); + } +} + +template +void btree

::rebalance_or_split(iterator *iter) { + node_type *&node = iter->node; + int &insert_position = iter->position; + assert(node->count() == node->max_count()); + + // First try to make room on the node by rebalancing. + node_type *parent = node->parent(); + if (node != root()) { + if (node->position() > 0) { + // Try rebalancing with our left sibling. + node_type *left = parent->child(node->position() - 1); + if (left->count() < left->max_count()) { + // We bias rebalancing based on the position being inserted. If we're + // inserting at the end of the right node then we bias rebalancing to + // fill up the left node. + int to_move = (left->max_count() - left->count()) / + (1 + (insert_position < left->max_count())); + to_move = std::max(1, to_move); + + if (((insert_position - to_move) >= 0) || + ((left->count() + to_move) < left->max_count())) { + left->rebalance_right_to_left(node, to_move); + + assert(node->max_count() - node->count() == to_move); + insert_position = insert_position - to_move; + if (insert_position < 0) { + insert_position = insert_position + left->count() + 1; + node = left; + } + + assert(node->count() < node->max_count()); + return; + } + } + } + + if (node->position() < parent->count()) { + // Try rebalancing with our right sibling. + node_type *right = parent->child(node->position() + 1); + if (right->count() < right->max_count()) { + // We bias rebalancing based on the position being inserted. If we're + // inserting at the beginning of the left node then we bias rebalancing + // to fill up the right node. + int to_move = (right->max_count() - right->count()) / + (1 + (insert_position > 0)); + to_move = std::max(1, to_move); + + if ((insert_position <= (node->count() - to_move)) || + ((right->count() + to_move) < right->max_count())) { + node->rebalance_left_to_right(right, to_move); + + if (insert_position > node->count()) { + insert_position = insert_position - node->count() - 1; + node = right; + } + + assert(node->count() < node->max_count()); + return; + } + } + } + + // Rebalancing failed, make sure there is room on the parent node for a new + // value. + if (parent->count() == parent->max_count()) { + iterator parent_iter(node->parent(), node->position()); + rebalance_or_split(&parent_iter); + } + } else { + // Rebalancing not possible because this is the root node. + if (root()->leaf()) { + // The root node is currently a leaf node: create a new root node and set + // the current root node as the child of the new root. + parent = new_internal_root_node(); + parent->set_child(0, root()); + *mutable_root() = parent; + assert(*mutable_rightmost() == parent->child(0)); + } else { + // The root node is an internal node. We do not want to create a new root + // node because the root node is special and holds the size of the tree + // and a pointer to the rightmost node. So we create a new internal node + // and move all of the items on the current root into the new node. + parent = new_internal_node(parent); + parent->set_child(0, parent); + parent->swap(root()); + node = parent; + } + } + + // Split the node. + node_type *split_node; + if (node->leaf()) { + split_node = new_leaf_node(parent); + node->split(split_node, insert_position); + if (rightmost() == node) { + *mutable_rightmost() = split_node; + } + } else { + split_node = new_internal_node(parent); + node->split(split_node, insert_position); + } + + if (insert_position > node->count()) { + insert_position = insert_position - node->count() - 1; + node = split_node; + } +} + +template +void btree

::merge_nodes(node_type *left, node_type *right) { + left->merge(right); + if (right->leaf()) { + if (rightmost() == right) { + *mutable_rightmost() = left; + } + delete_leaf_node(right); + } else { + delete_internal_node(right); + } +} + +template +bool btree

::try_merge_or_rebalance(iterator *iter) { + node_type *parent = iter->node->parent(); + if (iter->node->position() > 0) { + // Try merging with our left sibling. + node_type *left = parent->child(iter->node->position() - 1); + if ((1 + left->count() + iter->node->count()) <= left->max_count()) { + iter->position += 1 + left->count(); + merge_nodes(left, iter->node); + iter->node = left; + return true; + } + } + if (iter->node->position() < parent->count()) { + // Try merging with our right sibling. + node_type *right = parent->child(iter->node->position() + 1); + if ((1 + iter->node->count() + right->count()) <= right->max_count()) { + merge_nodes(iter->node, right); + return true; + } + // Try rebalancing with our right sibling. We don't perform rebalancing if + // we deleted the first element from iter->node and the node is not + // empty. This is a small optimization for the common pattern of deleting + // from the front of the tree. + if ((right->count() > kMinNodeValues) && + ((iter->node->count() == 0) || + (iter->position > 0))) { + int to_move = (right->count() - iter->node->count()) / 2; + to_move = std::min(to_move, right->count() - 1); + iter->node->rebalance_right_to_left(right, to_move); + return false; + } + } + if (iter->node->position() > 0) { + // Try rebalancing with our left sibling. We don't perform rebalancing if + // we deleted the last element from iter->node and the node is not + // empty. This is a small optimization for the common pattern of deleting + // from the back of the tree. + node_type *left = parent->child(iter->node->position() - 1); + if ((left->count() > kMinNodeValues) && + ((iter->node->count() == 0) || + (iter->position < iter->node->count()))) { + int to_move = (left->count() - iter->node->count()) / 2; + to_move = std::min(to_move, left->count() - 1); + left->rebalance_left_to_right(iter->node, to_move); + iter->position += to_move; + return false; + } + } + return false; +} + +template +void btree

::try_shrink() { + if (root()->count() > 0) { + return; + } + // Deleted the last item on the root node, shrink the height of the tree. + if (root()->leaf()) { + assert(size() == 0); + delete_leaf_node(root()); + *mutable_root() = NULL; + } else { + node_type *child = root()->child(0); + if (child->leaf()) { + // The child is a leaf node so simply make it the root node in the tree. + child->make_root(); + delete_internal_root_node(); + *mutable_root() = child; + } else { + // The child is an internal node. We want to keep the existing root node + // so we move all of the values from the child node into the existing + // (empty) root node. + child->swap(root()); + delete_internal_node(child); + } + } +} + +template template +inline IterType btree

::internal_last(IterType iter) { + while (iter.node && iter.position == iter.node->count()) { + iter.position = iter.node->position(); + iter.node = iter.node->parent(); + if (iter.node->leaf()) { + iter.node = NULL; + } + } + return iter; +} + +template +inline typename btree

::iterator +btree

::internal_insert(iterator iter, const value_type &v) { + if (!iter.node->leaf()) { + // We can't insert on an internal node. Instead, we'll insert after the + // previous value which is guaranteed to be on a leaf node. + --iter; + ++iter.position; + } + if (iter.node->count() == iter.node->max_count()) { + // Make room in the leaf for the new item. + if (iter.node->max_count() < kNodeValues) { + // Insertion into the root where the root is smaller that the full node + // size. Simply grow the size of the root node. + assert(iter.node == root()); + iter.node = new_leaf_root_node( + std::min(kNodeValues, 2 * iter.node->max_count())); + iter.node->swap(root()); + delete_leaf_node(root()); + *mutable_root() = iter.node; + } else { + rebalance_or_split(&iter); + ++*mutable_size(); + } + } else if (!root()->leaf()) { + ++*mutable_size(); + } + iter.node->insert_value(iter.position, v); + return iter; +} + +template template +inline std::pair btree

::internal_locate( + const key_type &key, IterType iter) const { + return internal_locate_type::dispatch(key, *this, iter); +} + +template template +inline std::pair btree

::internal_locate_plain_compare( + const key_type &key, IterType iter) const { + for (;;) { + iter.position = iter.node->lower_bound(key, key_comp()); + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + return std::make_pair(iter, 0); +} + +template template +inline std::pair btree

::internal_locate_compare_to( + const key_type &key, IterType iter) const { + for (;;) { + int res = iter.node->lower_bound(key, key_comp()); + iter.position = res & kMatchMask; + if (res & kExactMatch) { + return std::make_pair(iter, static_cast(kExactMatch)); + } + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + return std::make_pair(iter, -kExactMatch); +} + +template template +IterType btree

::internal_lower_bound( + const key_type &key, IterType iter) const { + if (iter.node) { + for (;;) { + iter.position = + iter.node->lower_bound(key, key_comp()) & kMatchMask; + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + iter = internal_last(iter); + } + return iter; +} + +template template +IterType btree

::internal_upper_bound( + const key_type &key, IterType iter) const { + if (iter.node) { + for (;;) { + iter.position = iter.node->upper_bound(key, key_comp()); + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + iter = internal_last(iter); + } + return iter; +} + +template template +IterType btree

::internal_find_unique( + const key_type &key, IterType iter) const { + if (iter.node) { + std::pair res = internal_locate(key, iter); + if (res.second == kExactMatch) { + return res.first; + } + if (!res.second) { + iter = internal_last(res.first); + if (iter.node && !compare_keys(key, iter.key())) { + return iter; + } + } + } + return IterType(NULL, 0); +} + +template template +IterType btree

::internal_find_multi( + const key_type &key, IterType iter) const { + if (iter.node) { + iter = internal_lower_bound(key, iter); + if (iter.node) { + iter = internal_last(iter); + if (iter.node && !compare_keys(key, iter.key())) { + return iter; + } + } + } + return IterType(NULL, 0); +} + +template +void btree

::internal_clear(node_type *node) { + if (!node->leaf()) { + for (int i = 0; i <= node->count(); ++i) { + internal_clear(node->child(i)); + } + if (node == root()) { + delete_internal_root_node(); + } else { + delete_internal_node(node); + } + } else { + delete_leaf_node(node); + } +} + +template +void btree

::internal_dump( + std::ostream &os, const node_type *node, int level) const { + for (int i = 0; i < node->count(); ++i) { + if (!node->leaf()) { + internal_dump(os, node->child(i), level + 1); + } + for (int j = 0; j < level; ++j) { + os << " "; + } + os << node->key(i) << " [" << level << "]\n"; + } + if (!node->leaf()) { + internal_dump(os, node->child(node->count()), level + 1); + } +} + +template +int btree

::internal_verify( + const node_type *node, const key_type *lo, const key_type *hi) const { + assert(node->count() > 0); + assert(node->count() <= node->max_count()); + if (lo) { + assert(!compare_keys(node->key(0), *lo)); + } + if (hi) { + assert(!compare_keys(*hi, node->key(node->count() - 1))); + } + for (int i = 1; i < node->count(); ++i) { + assert(!compare_keys(node->key(i), node->key(i - 1))); + } + int count = node->count(); + if (!node->leaf()) { + for (int i = 0; i <= node->count(); ++i) { + assert(node->child(i) != NULL); + assert(node->child(i)->parent() == node); + assert(node->child(i)->position() == i); + count += internal_verify( + node->child(i), + (i == 0) ? lo : &node->key(i - 1), + (i == node->count()) ? hi : &node->key(i)); + } + } + return count; +} + +} // namespace btree + +#endif // UTIL_BTREE_BTREE_H__ diff --git a/lib/xdelta3/cpp-btree/btree_bench.cc b/lib/xdelta3/cpp-btree/btree_bench.cc new file mode 100644 index 0000000..6eaed99 --- /dev/null +++ b/lib/xdelta3/cpp-btree/btree_bench.cc @@ -0,0 +1,593 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "btree_map.h" +#include "btree_set.h" +#include "btree_test.h" + +DEFINE_int32(test_random_seed, 123456789, "Seed for srand()"); +DEFINE_int32(benchmark_max_iters, 10000000, "Maximum test iterations"); +DEFINE_int32(benchmark_min_iters, 100, "Minimum test iterations"); +DEFINE_int32(benchmark_target_seconds, 1, + "Attempt to benchmark for this many seconds"); + +using std::allocator; +using std::less; +using std::map; +using std::max; +using std::min; +using std::multimap; +using std::multiset; +using std::set; +using std::string; +using std::vector; + +namespace btree { +namespace { + +struct RandGen { + typedef ptrdiff_t result_type; + RandGen(result_type seed) { + srand(seed); + } + result_type operator()(result_type l) { + return rand() % l; + } +}; + +struct BenchmarkRun { + BenchmarkRun(const char *name, void (*func)(int)); + void Run(); + void Stop(); + void Start(); + void Reset(); + + BenchmarkRun *next_benchmark; + const char *benchmark_name; + void (*benchmark_func)(int); + int64_t accum_micros; + int64_t last_started; +}; + +BenchmarkRun *first_benchmark; +BenchmarkRun *current_benchmark; + +int64_t get_micros () { + timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec * 1000000 + tv.tv_usec; +} + +BenchmarkRun::BenchmarkRun(const char *name, void (*func)(int)) + : next_benchmark(first_benchmark), + benchmark_name(name), + benchmark_func(func), + accum_micros(0), + last_started(0) { + first_benchmark = this; +} + +#define BTREE_BENCHMARK(name) \ + BTREE_BENCHMARK2(#name, name, __COUNTER__) +#define BTREE_BENCHMARK2(name, func, counter) \ + BTREE_BENCHMARK3(name, func, counter) +#define BTREE_BENCHMARK3(name, func, counter) \ + BenchmarkRun bench ## counter (name, func) + +void StopBenchmarkTiming() { + current_benchmark->Stop(); +} + +void StartBenchmarkTiming() { + current_benchmark->Start(); +} + +void RunBenchmarks() { + for (BenchmarkRun *bench = first_benchmark; bench; + bench = bench->next_benchmark) { + bench->Run(); + } +} + +void BenchmarkRun::Start() { + assert(!last_started); + last_started = get_micros(); +} + +void BenchmarkRun::Stop() { + if (last_started == 0) { + return; + } + accum_micros += get_micros() - last_started; + last_started = 0; +} + +void BenchmarkRun::Reset() { + last_started = 0; + accum_micros = 0; +} + +void BenchmarkRun::Run() { + assert(current_benchmark == NULL); + current_benchmark = this; + int iters = FLAGS_benchmark_min_iters; + for (;;) { + Reset(); + Start(); + benchmark_func(iters); + Stop(); + if (accum_micros > FLAGS_benchmark_target_seconds * 1000000 || + iters >= FLAGS_benchmark_max_iters) { + break; + } else if (accum_micros == 0) { + iters *= 100; + } else { + int64_t target_micros = FLAGS_benchmark_target_seconds * 1000000; + iters = target_micros * iters / accum_micros; + } + iters = min(iters, FLAGS_benchmark_max_iters); + } + std::cout << benchmark_name << "\t" + << accum_micros * 1000 / iters << "\t" + << iters; + current_benchmark = NULL; +} + +// Used to avoid compiler optimizations for these benchmarks. +template +void sink(const T& t0) { + volatile T t = t0; +} + +// Benchmark insertion of values into a container. +template +void BM_Insert(int n) { + typedef typename std::remove_const::type V; + typename KeyOfValue::type key_of_value; + + // Disable timing while we perform some initialization. + StopBenchmarkTiming(); + + T container; + vector values = GenerateValues(FLAGS_benchmark_values); + for (int i = 0; i < values.size(); i++) { + container.insert(values[i]); + } + + for (int i = 0; i < n; ) { + // Remove and re-insert 10% of the keys + int m = min(n - i, FLAGS_benchmark_values / 10); + + for (int j = i; j < i + m; j++) { + int x = j % FLAGS_benchmark_values; + container.erase(key_of_value(values[x])); + } + + StartBenchmarkTiming(); + + for (int j = i; j < i + m; j++) { + int x = j % FLAGS_benchmark_values; + container.insert(values[x]); + } + + StopBenchmarkTiming(); + + i += m; + } +} + +// Benchmark lookup of values in a container. +template +void BM_Lookup(int n) { + typedef typename std::remove_const::type V; + typename KeyOfValue::type key_of_value; + + // Disable timing while we perform some initialization. + StopBenchmarkTiming(); + + T container; + vector values = GenerateValues(FLAGS_benchmark_values); + + for (int i = 0; i < values.size(); i++) { + container.insert(values[i]); + } + + V r = V(); + + StartBenchmarkTiming(); + + for (int i = 0; i < n; i++) { + int m = i % values.size(); + r = *container.find(key_of_value(values[m])); + } + + StopBenchmarkTiming(); + + sink(r); // Keep compiler from optimizing away r. +} + +// Benchmark lookup of values in a full container, meaning that values +// are inserted in-order to take advantage of biased insertion, which +// yields a full tree. +template +void BM_FullLookup(int n) { + typedef typename std::remove_const::type V; + typename KeyOfValue::type key_of_value; + + // Disable timing while we perform some initialization. + StopBenchmarkTiming(); + + T container; + vector values = GenerateValues(FLAGS_benchmark_values); + vector sorted(values); + sort(sorted.begin(), sorted.end()); + + for (int i = 0; i < sorted.size(); i++) { + container.insert(sorted[i]); + } + + V r = V(); + + StartBenchmarkTiming(); + + for (int i = 0; i < n; i++) { + int m = i % values.size(); + r = *container.find(key_of_value(values[m])); + } + + StopBenchmarkTiming(); + + sink(r); // Keep compiler from optimizing away r. +} + +// Benchmark deletion of values from a container. +template +void BM_Delete(int n) { + typedef typename std::remove_const::type V; + typename KeyOfValue::type key_of_value; + + // Disable timing while we perform some initialization. + StopBenchmarkTiming(); + + T container; + vector values = GenerateValues(FLAGS_benchmark_values); + for (int i = 0; i < values.size(); i++) { + container.insert(values[i]); + } + + for (int i = 0; i < n; ) { + // Remove and re-insert 10% of the keys + int m = min(n - i, FLAGS_benchmark_values / 10); + + StartBenchmarkTiming(); + + for (int j = i; j < i + m; j++) { + int x = j % FLAGS_benchmark_values; + container.erase(key_of_value(values[x])); + } + + StopBenchmarkTiming(); + + for (int j = i; j < i + m; j++) { + int x = j % FLAGS_benchmark_values; + container.insert(values[x]); + } + + i += m; + } +} + +// Benchmark steady-state insert (into first half of range) and remove +// (from second second half of range), treating the container +// approximately like a queue with log-time access for all elements. +// This benchmark does not test the case where insertion and removal +// happen in the same region of the tree. This benchmark counts two +// value constructors. +template +void BM_QueueAddRem(int n) { + typedef typename std::remove_const::type V; + typename KeyOfValue::type key_of_value; + + // Disable timing while we perform some initialization. + StopBenchmarkTiming(); + assert(FLAGS_benchmark_values % 2 == 0); + + T container; + + const int half = FLAGS_benchmark_values / 2; + vector remove_keys(half); + vector add_keys(half); + + for (int i = 0; i < half; i++) { + remove_keys[i] = i; + add_keys[i] = i; + } + + RandGen rand(FLAGS_test_random_seed); + + random_shuffle(remove_keys.begin(), remove_keys.end(), rand); + random_shuffle(add_keys.begin(), add_keys.end(), rand); + + Generator g(FLAGS_benchmark_values + FLAGS_benchmark_max_iters); + + for (int i = 0; i < half; i++) { + container.insert(g(add_keys[i])); + container.insert(g(half + remove_keys[i])); + } + + // There are three parts each of size "half": + // 1 is being deleted from [offset - half, offset) + // 2 is standing [offset, offset + half) + // 3 is being inserted into [offset + half, offset + 2 * half) + int offset = 0; + + StartBenchmarkTiming(); + + for (int i = 0; i < n; i++) { + int idx = i % half; + + if (idx == 0) { + StopBenchmarkTiming(); + random_shuffle(remove_keys.begin(), remove_keys.end(), rand); + random_shuffle(add_keys.begin(), add_keys.end(), rand); + offset += half; + StartBenchmarkTiming(); + } + + int e = container.erase(key_of_value(g(offset - half + remove_keys[idx]))); + assert(e == 1); + container.insert(g(offset + half + add_keys[idx])); + } + + StopBenchmarkTiming(); +} + +// Mixed insertion and deletion in the same range using pre-constructed values. +template +void BM_MixedAddRem(int n) { + typedef typename std::remove_const::type V; + typename KeyOfValue::type key_of_value; + + // Disable timing while we perform some initialization. + StopBenchmarkTiming(); + assert(FLAGS_benchmark_values % 2 == 0); + + T container; + RandGen rand(FLAGS_test_random_seed); + + vector values = GenerateValues(FLAGS_benchmark_values * 2); + + // Create two random shuffles + vector remove_keys(FLAGS_benchmark_values); + vector add_keys(FLAGS_benchmark_values); + + // Insert the first half of the values (already in random order) + for (int i = 0; i < FLAGS_benchmark_values; i++) { + container.insert(values[i]); + + // remove_keys and add_keys will be swapped before each round, + // therefore fill add_keys here w/ the keys being inserted, so + // they'll be the first to be removed. + remove_keys[i] = i + FLAGS_benchmark_values; + add_keys[i] = i; + } + + StartBenchmarkTiming(); + + for (int i = 0; i < n; i++) { + int idx = i % FLAGS_benchmark_values; + + if (idx == 0) { + StopBenchmarkTiming(); + remove_keys.swap(add_keys); + random_shuffle(remove_keys.begin(), remove_keys.end(), rand); + random_shuffle(add_keys.begin(), add_keys.end(), rand); + StartBenchmarkTiming(); + } + + int e = container.erase(key_of_value(values[remove_keys[idx]])); + assert(e == 1); + container.insert(values[add_keys[idx]]); + } + + StopBenchmarkTiming(); +} + +// Insertion at end, removal from the beginning. This benchmark +// counts two value constructors. +template +void BM_Fifo(int n) { + typedef typename std::remove_const::type V; + + // Disable timing while we perform some initialization. + StopBenchmarkTiming(); + + T container; + Generator g(FLAGS_benchmark_values + FLAGS_benchmark_max_iters); + + for (int i = 0; i < FLAGS_benchmark_values; i++) { + container.insert(g(i)); + } + + StartBenchmarkTiming(); + + for (int i = 0; i < n; i++) { + container.erase(container.begin()); + container.insert(container.end(), g(i + FLAGS_benchmark_values)); + } + + StopBenchmarkTiming(); +} + +// Iteration (forward) through the tree +template +void BM_FwdIter(int n) { + typedef typename std::remove_const::type V; + + // Disable timing while we perform some initialization. + StopBenchmarkTiming(); + + T container; + vector values = GenerateValues(FLAGS_benchmark_values); + + for (int i = 0; i < FLAGS_benchmark_values; i++) { + container.insert(values[i]); + } + + typename T::iterator iter; + + V r = V(); + + StartBenchmarkTiming(); + + for (int i = 0; i < n; i++) { + int idx = i % FLAGS_benchmark_values; + + if (idx == 0) { + iter = container.begin(); + } + r = *iter; + ++iter; + } + + StopBenchmarkTiming(); + + sink(r); // Keep compiler from optimizing away r. +} + +typedef set stl_set_int32; +typedef set stl_set_int64; +typedef set stl_set_string; + +typedef map stl_map_int32; +typedef map stl_map_int64; +typedef map stl_map_string; + +typedef multiset stl_multiset_int32; +typedef multiset stl_multiset_int64; +typedef multiset stl_multiset_string; + +typedef multimap stl_multimap_int32; +typedef multimap stl_multimap_int64; +typedef multimap stl_multimap_string; + +#define MY_BENCHMARK_TYPES2(value, name, size) \ + typedef btree ## _set, allocator, size> \ + btree ## _ ## size ## _set_ ## name; \ + typedef btree ## _map, allocator, size> \ + btree ## _ ## size ## _map_ ## name; \ + typedef btree ## _multiset, allocator, size> \ + btree ## _ ## size ## _multiset_ ## name; \ + typedef btree ## _multimap, allocator, size> \ + btree ## _ ## size ## _multimap_ ## name + +#define MY_BENCHMARK_TYPES(value, name) \ + MY_BENCHMARK_TYPES2(value, name, 128); \ + MY_BENCHMARK_TYPES2(value, name, 160); \ + MY_BENCHMARK_TYPES2(value, name, 192); \ + MY_BENCHMARK_TYPES2(value, name, 224); \ + MY_BENCHMARK_TYPES2(value, name, 256); \ + MY_BENCHMARK_TYPES2(value, name, 288); \ + MY_BENCHMARK_TYPES2(value, name, 320); \ + MY_BENCHMARK_TYPES2(value, name, 352); \ + MY_BENCHMARK_TYPES2(value, name, 384); \ + MY_BENCHMARK_TYPES2(value, name, 416); \ + MY_BENCHMARK_TYPES2(value, name, 448); \ + MY_BENCHMARK_TYPES2(value, name, 480); \ + MY_BENCHMARK_TYPES2(value, name, 512); \ + MY_BENCHMARK_TYPES2(value, name, 1024); \ + MY_BENCHMARK_TYPES2(value, name, 1536); \ + MY_BENCHMARK_TYPES2(value, name, 2048) + +MY_BENCHMARK_TYPES(int32_t, int32); +MY_BENCHMARK_TYPES(int64_t, int64); +MY_BENCHMARK_TYPES(string, string); + +#define MY_BENCHMARK4(type, name, func) \ + void BM_ ## type ## _ ## name(int n) { BM_ ## func (n); } \ + BTREE_BENCHMARK(BM_ ## type ## _ ## name) + +// Define NODESIZE_TESTING when running btree_perf.py. + +#ifdef NODESIZE_TESTING +#define MY_BENCHMARK3(tree, type, name, func) \ + MY_BENCHMARK4(tree ## _128_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _160_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _192_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _224_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _256_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _288_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _320_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _352_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _384_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _416_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _448_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _480_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _512_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _1024_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _1536_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _2048_ ## type, name, func) +#else +#define MY_BENCHMARK3(tree, type, name, func) \ + MY_BENCHMARK4(tree ## _256_ ## type, name, func); \ + MY_BENCHMARK4(tree ## _2048_ ## type, name, func) +#endif + +#define MY_BENCHMARK2(type, name, func) \ + MY_BENCHMARK4(stl_ ## type, name, func); \ + MY_BENCHMARK3(btree, type, name, func) + +#define MY_BENCHMARK(type) \ + MY_BENCHMARK2(type, insert, Insert); \ + MY_BENCHMARK2(type, lookup, Lookup); \ + MY_BENCHMARK2(type, fulllookup, FullLookup); \ + MY_BENCHMARK2(type, delete, Delete); \ + MY_BENCHMARK2(type, queueaddrem, QueueAddRem); \ + MY_BENCHMARK2(type, mixedaddrem, MixedAddRem); \ + MY_BENCHMARK2(type, fifo, Fifo); \ + MY_BENCHMARK2(type, fwditer, FwdIter) + +MY_BENCHMARK(set_int32); +MY_BENCHMARK(map_int32); +MY_BENCHMARK(set_int64); +MY_BENCHMARK(map_int64); +MY_BENCHMARK(set_string); +MY_BENCHMARK(map_string); + +MY_BENCHMARK(multiset_int32); +MY_BENCHMARK(multimap_int32); +MY_BENCHMARK(multiset_int64); +MY_BENCHMARK(multimap_int64); +MY_BENCHMARK(multiset_string); +MY_BENCHMARK(multimap_string); + +} // namespace +} // namespace btree + +int main(int argc, char **argv) { + btree::RunBenchmarks(); + return 0; +} diff --git a/lib/xdelta3/cpp-btree/btree_container.h b/lib/xdelta3/cpp-btree/btree_container.h new file mode 100644 index 0000000..fb617ab --- /dev/null +++ b/lib/xdelta3/cpp-btree/btree_container.h @@ -0,0 +1,349 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef UTIL_BTREE_BTREE_CONTAINER_H__ +#define UTIL_BTREE_BTREE_CONTAINER_H__ + +#include +#include + +#include "btree.h" + +namespace btree { + +// A common base class for btree_set, btree_map, btree_multiset and +// btree_multimap. +template +class btree_container { + typedef btree_container self_type; + + public: + typedef typename Tree::params_type params_type; + typedef typename Tree::key_type key_type; + typedef typename Tree::value_type value_type; + typedef typename Tree::key_compare key_compare; + typedef typename Tree::allocator_type allocator_type; + typedef typename Tree::pointer pointer; + typedef typename Tree::const_pointer const_pointer; + typedef typename Tree::reference reference; + typedef typename Tree::const_reference const_reference; + typedef typename Tree::size_type size_type; + typedef typename Tree::difference_type difference_type; + typedef typename Tree::iterator iterator; + typedef typename Tree::const_iterator const_iterator; + typedef typename Tree::reverse_iterator reverse_iterator; + typedef typename Tree::const_reverse_iterator const_reverse_iterator; + + public: + // Default constructor. + btree_container(const key_compare &comp, const allocator_type &alloc) + : tree_(comp, alloc) { + } + + // Copy constructor. + btree_container(const self_type &x) + : tree_(x.tree_) { + } + + // Iterator routines. + iterator begin() { return tree_.begin(); } + const_iterator begin() const { return tree_.begin(); } + iterator end() { return tree_.end(); } + const_iterator end() const { return tree_.end(); } + reverse_iterator rbegin() { return tree_.rbegin(); } + const_reverse_iterator rbegin() const { return tree_.rbegin(); } + reverse_iterator rend() { return tree_.rend(); } + const_reverse_iterator rend() const { return tree_.rend(); } + + // Lookup routines. + iterator lower_bound(const key_type &key) { + return tree_.lower_bound(key); + } + const_iterator lower_bound(const key_type &key) const { + return tree_.lower_bound(key); + } + iterator upper_bound(const key_type &key) { + return tree_.upper_bound(key); + } + const_iterator upper_bound(const key_type &key) const { + return tree_.upper_bound(key); + } + std::pair equal_range(const key_type &key) { + return tree_.equal_range(key); + } + std::pair equal_range(const key_type &key) const { + return tree_.equal_range(key); + } + + // Utility routines. + void clear() { + tree_.clear(); + } + void swap(self_type &x) { + tree_.swap(x.tree_); + } + void dump(std::ostream &os) const { + tree_.dump(os); + } + void verify() const { + tree_.verify(); + } + + // Size routines. + size_type size() const { return tree_.size(); } + size_type max_size() const { return tree_.max_size(); } + bool empty() const { return tree_.empty(); } + size_type height() const { return tree_.height(); } + size_type internal_nodes() const { return tree_.internal_nodes(); } + size_type leaf_nodes() const { return tree_.leaf_nodes(); } + size_type nodes() const { return tree_.nodes(); } + size_type bytes_used() const { return tree_.bytes_used(); } + static double average_bytes_per_value() { + return Tree::average_bytes_per_value(); + } + double fullness() const { return tree_.fullness(); } + double overhead() const { return tree_.overhead(); } + + bool operator==(const self_type& x) const { + if (size() != x.size()) { + return false; + } + for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi) { + if (*i != *xi) { + return false; + } + } + return true; + } + + bool operator!=(const self_type& other) const { + return !operator==(other); + } + + + protected: + Tree tree_; +}; + +template +inline std::ostream& operator<<(std::ostream &os, const btree_container &b) { + b.dump(os); + return os; +} + +// A common base class for btree_set and safe_btree_set. +template +class btree_unique_container : public btree_container { + typedef btree_unique_container self_type; + typedef btree_container super_type; + + public: + typedef typename Tree::key_type key_type; + typedef typename Tree::value_type value_type; + typedef typename Tree::size_type size_type; + typedef typename Tree::key_compare key_compare; + typedef typename Tree::allocator_type allocator_type; + typedef typename Tree::iterator iterator; + typedef typename Tree::const_iterator const_iterator; + + public: + // Default constructor. + btree_unique_container(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_unique_container(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_unique_container(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + insert(b, e); + } + + // Lookup routines. + iterator find(const key_type &key) { + return this->tree_.find_unique(key); + } + const_iterator find(const key_type &key) const { + return this->tree_.find_unique(key); + } + size_type count(const key_type &key) const { + return this->tree_.count_unique(key); + } + + // Insertion routines. + std::pair insert(const value_type &x) { + return this->tree_.insert_unique(x); + } + iterator insert(iterator position, const value_type &x) { + return this->tree_.insert_unique(position, x); + } + template + void insert(InputIterator b, InputIterator e) { + this->tree_.insert_unique(b, e); + } + + // Deletion routines. + int erase(const key_type &key) { + return this->tree_.erase_unique(key); + } + // Erase the specified iterator from the btree. The iterator must be valid + // (i.e. not equal to end()). Return an iterator pointing to the node after + // the one that was erased (or end() if none exists). + iterator erase(const iterator &iter) { + return this->tree_.erase(iter); + } + void erase(const iterator &first, const iterator &last) { + this->tree_.erase(first, last); + } +}; + +// A common base class for btree_map and safe_btree_map. +template +class btree_map_container : public btree_unique_container { + typedef btree_map_container self_type; + typedef btree_unique_container super_type; + + public: + typedef typename Tree::key_type key_type; + typedef typename Tree::data_type data_type; + typedef typename Tree::value_type value_type; + typedef typename Tree::mapped_type mapped_type; + typedef typename Tree::key_compare key_compare; + typedef typename Tree::allocator_type allocator_type; + + private: + // A pointer-like object which only generates its value when + // dereferenced. Used by operator[] to avoid constructing an empty data_type + // if the key already exists in the map. + struct generate_value { + generate_value(const key_type &k) + : key(k) { + } + value_type operator*() const { + return std::make_pair(key, data_type()); + } + const key_type &key; + }; + + public: + // Default constructor. + btree_map_container(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_map_container(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_map_container(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } + + // Insertion routines. + data_type& operator[](const key_type &key) { + return this->tree_.insert_unique(key, generate_value(key)).first->second; + } +}; + +// A common base class for btree_multiset and btree_multimap. +template +class btree_multi_container : public btree_container { + typedef btree_multi_container self_type; + typedef btree_container super_type; + + public: + typedef typename Tree::key_type key_type; + typedef typename Tree::value_type value_type; + typedef typename Tree::size_type size_type; + typedef typename Tree::key_compare key_compare; + typedef typename Tree::allocator_type allocator_type; + typedef typename Tree::iterator iterator; + typedef typename Tree::const_iterator const_iterator; + + public: + // Default constructor. + btree_multi_container(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_multi_container(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_multi_container(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + insert(b, e); + } + + // Lookup routines. + iterator find(const key_type &key) { + return this->tree_.find_multi(key); + } + const_iterator find(const key_type &key) const { + return this->tree_.find_multi(key); + } + size_type count(const key_type &key) const { + return this->tree_.count_multi(key); + } + + // Insertion routines. + iterator insert(const value_type &x) { + return this->tree_.insert_multi(x); + } + iterator insert(iterator position, const value_type &x) { + return this->tree_.insert_multi(position, x); + } + template + void insert(InputIterator b, InputIterator e) { + this->tree_.insert_multi(b, e); + } + + // Deletion routines. + int erase(const key_type &key) { + return this->tree_.erase_multi(key); + } + // Erase the specified iterator from the btree. The iterator must be valid + // (i.e. not equal to end()). Return an iterator pointing to the node after + // the one that was erased (or end() if none exists). + iterator erase(const iterator &iter) { + return this->tree_.erase(iter); + } + void erase(const iterator &first, const iterator &last) { + this->tree_.erase(first, last); + } +}; + +} // namespace btree + +#endif // UTIL_BTREE_BTREE_CONTAINER_H__ diff --git a/lib/xdelta3/cpp-btree/btree_map.h b/lib/xdelta3/cpp-btree/btree_map.h new file mode 100644 index 0000000..b83489f --- /dev/null +++ b/lib/xdelta3/cpp-btree/btree_map.h @@ -0,0 +1,130 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A btree_map<> implements the STL unique sorted associative container +// interface and the pair associative container interface (a.k.a map<>) using a +// btree. A btree_multimap<> implements the STL multiple sorted associative +// container interface and the pair associtive container interface (a.k.a +// multimap<>) using a btree. See btree.h for details of the btree +// implementation and caveats. + +#ifndef UTIL_BTREE_BTREE_MAP_H__ +#define UTIL_BTREE_BTREE_MAP_H__ + +#include +#include +#include +#include +#include + +#include "btree.h" +#include "btree_container.h" + +namespace btree { + +// The btree_map class is needed mainly for its constructors. +template , + typename Alloc = std::allocator >, + int TargetNodeSize = 256> +class btree_map : public btree_map_container< + btree > > { + + typedef btree_map self_type; + typedef btree_map_params< + Key, Value, Compare, Alloc, TargetNodeSize> params_type; + typedef btree btree_type; + typedef btree_map_container super_type; + + public: + typedef typename btree_type::key_compare key_compare; + typedef typename btree_type::allocator_type allocator_type; + + public: + // Default constructor. + btree_map(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_map(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_map(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } +}; + +template +inline void swap(btree_map &x, + btree_map &y) { + x.swap(y); +} + +// The btree_multimap class is needed mainly for its constructors. +template , + typename Alloc = std::allocator >, + int TargetNodeSize = 256> +class btree_multimap : public btree_multi_container< + btree > > { + + typedef btree_multimap self_type; + typedef btree_map_params< + Key, Value, Compare, Alloc, TargetNodeSize> params_type; + typedef btree btree_type; + typedef btree_multi_container super_type; + + public: + typedef typename btree_type::key_compare key_compare; + typedef typename btree_type::allocator_type allocator_type; + typedef typename btree_type::data_type data_type; + typedef typename btree_type::mapped_type mapped_type; + + public: + // Default constructor. + btree_multimap(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_multimap(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_multimap(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } +}; + +template +inline void swap(btree_multimap &x, + btree_multimap &y) { + x.swap(y); +} + +} // namespace btree + +#endif // UTIL_BTREE_BTREE_MAP_H__ diff --git a/lib/xdelta3/cpp-btree/btree_set.h b/lib/xdelta3/cpp-btree/btree_set.h new file mode 100644 index 0000000..f9b2e75 --- /dev/null +++ b/lib/xdelta3/cpp-btree/btree_set.h @@ -0,0 +1,121 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A btree_set<> implements the STL unique sorted associative container +// interface (a.k.a set<>) using a btree. A btree_multiset<> implements the STL +// multiple sorted associative container interface (a.k.a multiset<>) using a +// btree. See btree.h for details of the btree implementation and caveats. + +#ifndef UTIL_BTREE_BTREE_SET_H__ +#define UTIL_BTREE_BTREE_SET_H__ + +#include +#include +#include + +#include "btree.h" +#include "btree_container.h" + +namespace btree { + +// The btree_set class is needed mainly for its constructors. +template , + typename Alloc = std::allocator, + int TargetNodeSize = 256> +class btree_set : public btree_unique_container< + btree > > { + + typedef btree_set self_type; + typedef btree_set_params params_type; + typedef btree btree_type; + typedef btree_unique_container super_type; + + public: + typedef typename btree_type::key_compare key_compare; + typedef typename btree_type::allocator_type allocator_type; + + public: + // Default constructor. + btree_set(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_set(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_set(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } +}; + +template +inline void swap(btree_set &x, btree_set &y) { + x.swap(y); +} + +// The btree_multiset class is needed mainly for its constructors. +template , + typename Alloc = std::allocator, + int TargetNodeSize = 256> +class btree_multiset : public btree_multi_container< + btree > > { + + typedef btree_multiset self_type; + typedef btree_set_params params_type; + typedef btree btree_type; + typedef btree_multi_container super_type; + + public: + typedef typename btree_type::key_compare key_compare; + typedef typename btree_type::allocator_type allocator_type; + + public: + // Default constructor. + btree_multiset(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_multiset(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_multiset(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } +}; + +template +inline void swap(btree_multiset &x, + btree_multiset &y) { + x.swap(y); +} + +} // namespace btree + +#endif // UTIL_BTREE_BTREE_SET_H__ diff --git a/lib/xdelta3/cpp-btree/btree_test.cc b/lib/xdelta3/cpp-btree/btree_test.cc new file mode 100644 index 0000000..6b1837d --- /dev/null +++ b/lib/xdelta3/cpp-btree/btree_test.cc @@ -0,0 +1,270 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "btree_map.h" +#include "btree_set.h" +#include "btree_test.h" + +namespace btree { +namespace { + +template +void SetTest() { + typedef TestAllocator TestAlloc; + ASSERT_EQ(sizeof(btree_set), sizeof(void*)); + BtreeTest, std::allocator, N>, std::set >(); + BtreeAllocatorTest, TestAlloc, N> >(); +} + +template +void MapTest() { + typedef TestAllocator TestAlloc; + ASSERT_EQ(sizeof(btree_map), sizeof(void*)); + BtreeTest, std::allocator, N>, std::map >(); + BtreeAllocatorTest, TestAlloc, N> >(); + BtreeMapTest, std::allocator, N> >(); +} + +TEST(Btree, set_int32_32) { SetTest(); } +TEST(Btree, set_int32_64) { SetTest(); } +TEST(Btree, set_int32_128) { SetTest(); } +TEST(Btree, set_int32_256) { SetTest(); } +TEST(Btree, set_int64_256) { SetTest(); } +TEST(Btree, set_string_256) { SetTest(); } +TEST(Btree, set_pair_256) { SetTest, 256>(); } +TEST(Btree, map_int32_256) { MapTest(); } +TEST(Btree, map_int64_256) { MapTest(); } +TEST(Btree, map_string_256) { MapTest(); } +TEST(Btree, map_pair_256) { MapTest, 256>(); } + +// Large-node tests +TEST(Btree, map_int32_1024) { MapTest(); } +TEST(Btree, map_int32_1032) { MapTest(); } +TEST(Btree, map_int32_1040) { MapTest(); } +TEST(Btree, map_int32_1048) { MapTest(); } +TEST(Btree, map_int32_1056) { MapTest(); } + +TEST(Btree, map_int32_2048) { MapTest(); } +TEST(Btree, map_int32_4096) { MapTest(); } +TEST(Btree, set_int32_1024) { SetTest(); } +TEST(Btree, set_int32_2048) { SetTest(); } +TEST(Btree, set_int32_4096) { SetTest(); } +TEST(Btree, map_string_1024) { MapTest(); } +TEST(Btree, map_string_2048) { MapTest(); } +TEST(Btree, map_string_4096) { MapTest(); } +TEST(Btree, set_string_1024) { SetTest(); } +TEST(Btree, set_string_2048) { SetTest(); } +TEST(Btree, set_string_4096) { SetTest(); } + +template +void MultiSetTest() { + typedef TestAllocator TestAlloc; + ASSERT_EQ(sizeof(btree_multiset), sizeof(void*)); + BtreeMultiTest, std::allocator, N>, + std::multiset >(); + BtreeAllocatorTest, TestAlloc, N> >(); +} + +template +void MultiMapTest() { + typedef TestAllocator TestAlloc; + ASSERT_EQ(sizeof(btree_multimap), sizeof(void*)); + BtreeMultiTest, std::allocator, N>, + std::multimap >(); + BtreeMultiMapTest, std::allocator, N> >(); + BtreeAllocatorTest, TestAlloc, N> >(); +} + +TEST(Btree, multiset_int32_256) { MultiSetTest(); } +TEST(Btree, multiset_int64_256) { MultiSetTest(); } +TEST(Btree, multiset_string_256) { MultiSetTest(); } +TEST(Btree, multiset_pair_256) { MultiSetTest, 256>(); } +TEST(Btree, multimap_int32_256) { MultiMapTest(); } +TEST(Btree, multimap_int64_256) { MultiMapTest(); } +TEST(Btree, multimap_string_256) { MultiMapTest(); } +TEST(Btree, multimap_pair_256) { MultiMapTest, 256>(); } + +// Large-node tests +TEST(Btree, multimap_int32_1024) { MultiMapTest(); } +TEST(Btree, multimap_int32_2048) { MultiMapTest(); } +TEST(Btree, multimap_int32_4096) { MultiMapTest(); } +TEST(Btree, multiset_int32_1024) { MultiSetTest(); } +TEST(Btree, multiset_int32_2048) { MultiSetTest(); } +TEST(Btree, multiset_int32_4096) { MultiSetTest(); } +TEST(Btree, multimap_string_1024) { MultiMapTest(); } +TEST(Btree, multimap_string_2048) { MultiMapTest(); } +TEST(Btree, multimap_string_4096) { MultiMapTest(); } +TEST(Btree, multiset_string_1024) { MultiSetTest(); } +TEST(Btree, multiset_string_2048) { MultiSetTest(); } +TEST(Btree, multiset_string_4096) { MultiSetTest(); } + +// Verify that swapping btrees swaps the key comparision functors. +struct SubstringLess { + SubstringLess() : n(2) {} + SubstringLess(size_t length) + : n(length) { + } + bool operator()(const std::string &a, const std::string &b) const { + std::string as(a.data(), std::min(n, a.size())); + std::string bs(b.data(), std::min(n, b.size())); + return as < bs; + } + size_t n; +}; + +TEST(Btree, SwapKeyCompare) { + typedef btree_set SubstringSet; + SubstringSet s1(SubstringLess(1), SubstringSet::allocator_type()); + SubstringSet s2(SubstringLess(2), SubstringSet::allocator_type()); + + ASSERT_TRUE(s1.insert("a").second); + ASSERT_FALSE(s1.insert("aa").second); + + ASSERT_TRUE(s2.insert("a").second); + ASSERT_TRUE(s2.insert("aa").second); + ASSERT_FALSE(s2.insert("aaa").second); + + swap(s1, s2); + + ASSERT_TRUE(s1.insert("b").second); + ASSERT_TRUE(s1.insert("bb").second); + ASSERT_FALSE(s1.insert("bbb").second); + + ASSERT_TRUE(s2.insert("b").second); + ASSERT_FALSE(s2.insert("bb").second); +} + +TEST(Btree, UpperBoundRegression) { + // Regress a bug where upper_bound would default-construct a new key_compare + // instead of copying the existing one. + typedef btree_set SubstringSet; + SubstringSet my_set(SubstringLess(3)); + my_set.insert("aab"); + my_set.insert("abb"); + // We call upper_bound("aaa"). If this correctly uses the length 3 + // comparator, aaa < aab < abb, so we should get aab as the result. + // If it instead uses the default-constructed length 2 comparator, + // aa == aa < ab, so we'll get abb as our result. + SubstringSet::iterator it = my_set.upper_bound("aaa"); + ASSERT_TRUE(it != my_set.end()); + EXPECT_EQ("aab", *it); +} + + +TEST(Btree, IteratorIncrementBy) { + // Test that increment_by returns the same position as increment. + const int kSetSize = 2341; + btree_set my_set; + for (int i = 0; i < kSetSize; ++i) { + my_set.insert(i); + } + + { + // Simple increment vs. increment by. + btree_set::iterator a = my_set.begin(); + btree_set::iterator b = my_set.begin(); + a.increment(); + b.increment_by(1); + EXPECT_EQ(*a, *b); + } + + btree_set::iterator a = my_set.begin(); + for (int i = 1; i < kSetSize; ++i) { + ++a; + // increment_by + btree_set::iterator b = my_set.begin(); + b.increment_by(i); + EXPECT_EQ(*a, *b) << ": i=" << i; + } +} + +TEST(Btree, Comparison) { + const int kSetSize = 1201; + btree_set my_set; + for (int i = 0; i < kSetSize; ++i) { + my_set.insert(i); + } + btree_set my_set_copy(my_set); + EXPECT_TRUE(my_set_copy == my_set); + EXPECT_TRUE(my_set == my_set_copy); + EXPECT_FALSE(my_set_copy != my_set); + EXPECT_FALSE(my_set != my_set_copy); + + my_set.insert(kSetSize); + EXPECT_FALSE(my_set_copy == my_set); + EXPECT_FALSE(my_set == my_set_copy); + EXPECT_TRUE(my_set_copy != my_set); + EXPECT_TRUE(my_set != my_set_copy); + + my_set.erase(kSetSize - 1); + EXPECT_FALSE(my_set_copy == my_set); + EXPECT_FALSE(my_set == my_set_copy); + EXPECT_TRUE(my_set_copy != my_set); + EXPECT_TRUE(my_set != my_set_copy); + + btree_map my_map; + for (int i = 0; i < kSetSize; ++i) { + my_map[std::string(i, 'a')] = i; + } + btree_map my_map_copy(my_map); + EXPECT_TRUE(my_map_copy == my_map); + EXPECT_TRUE(my_map == my_map_copy); + EXPECT_FALSE(my_map_copy != my_map); + EXPECT_FALSE(my_map != my_map_copy); + + ++my_map_copy[std::string(7, 'a')]; + EXPECT_FALSE(my_map_copy == my_map); + EXPECT_FALSE(my_map == my_map_copy); + EXPECT_TRUE(my_map_copy != my_map); + EXPECT_TRUE(my_map != my_map_copy); + + my_map_copy = my_map; + my_map["hello"] = kSetSize; + EXPECT_FALSE(my_map_copy == my_map); + EXPECT_FALSE(my_map == my_map_copy); + EXPECT_TRUE(my_map_copy != my_map); + EXPECT_TRUE(my_map != my_map_copy); + + my_map.erase(std::string(kSetSize - 1, 'a')); + EXPECT_FALSE(my_map_copy == my_map); + EXPECT_FALSE(my_map == my_map_copy); + EXPECT_TRUE(my_map_copy != my_map); + EXPECT_TRUE(my_map != my_map_copy); +} + +TEST(Btree, RangeCtorSanity) { + typedef btree_set, std::allocator, 256> test_set; + typedef btree_map, std::allocator, 256> + test_map; + typedef btree_multiset, std::allocator, 256> + test_mset; + typedef btree_multimap, std::allocator, 256> + test_mmap; + std::vector ivec; + ivec.push_back(1); + std::map imap; + imap.insert(std::make_pair(1, 2)); + test_mset tmset(ivec.begin(), ivec.end()); + test_mmap tmmap(imap.begin(), imap.end()); + test_set tset(ivec.begin(), ivec.end()); + test_map tmap(imap.begin(), imap.end()); + EXPECT_EQ(1, tmset.size()); + EXPECT_EQ(1, tmmap.size()); + EXPECT_EQ(1, tset.size()); + EXPECT_EQ(1, tmap.size()); +} + +} // namespace +} // namespace btree diff --git a/lib/xdelta3/cpp-btree/btree_test.h b/lib/xdelta3/cpp-btree/btree_test.h new file mode 100644 index 0000000..413dc3c --- /dev/null +++ b/lib/xdelta3/cpp-btree/btree_test.h @@ -0,0 +1,940 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef UTIL_BTREE_BTREE_TEST_H__ +#define UTIL_BTREE_BTREE_TEST_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "gflags/gflags.h" +#include "btree_container.h" + +DECLARE_int32(test_values); +DECLARE_int32(benchmark_values); + +namespace std { + +// Provide operator<< support for std::pair. +template +ostream& operator<<(ostream &os, const std::pair &p) { + os << "(" << p.first << "," << p.second << ")"; + return os; +} + +// Provide pair equality testing that works as long as x.first is comparable to +// y.first and x.second is comparable to y.second. Needed in the test for +// comparing std::pair to std::pair. +template +bool operator==(const std::pair &x, const std::pair &y) { + return x.first == y.first && x.second == y.second; +} + +// Partial specialization of remove_const that propagates the removal through +// std::pair. +template +struct remove_const > { + typedef pair::type, + typename remove_const::type> type; +}; + +} // namespace std + +namespace btree { + +// Select the first member of a pair. +template +struct select1st : public std::unary_function<_Pair, typename _Pair::first_type> { + const typename _Pair::first_type& operator()(const _Pair& __x) const { + return __x.first; + } +}; + +// Utility class to provide an accessor for a key given a value. The default +// behavior is to treat the value as a pair and return the first element. +template +struct KeyOfValue { + typedef select1st type; +}; + +template +struct identity { + inline const T& operator()(const T& t) const { return t; } +}; + +// Partial specialization of KeyOfValue class for when the key and value are +// the same type such as in set<> and btree_set<>. +template +struct KeyOfValue { + typedef identity type; +}; + +// Counts the number of occurances of "c" in a buffer. +inline ptrdiff_t strcount(const char* buf_begin, const char* buf_end, char c) { + if (buf_begin == NULL) + return 0; + if (buf_end <= buf_begin) + return 0; + ptrdiff_t num = 0; + for (const char* bp = buf_begin; bp != buf_end; bp++) { + if (*bp == c) + num++; + } + return num; +} + +// for when the string is not null-terminated. +inline ptrdiff_t strcount(const char* buf, size_t len, char c) { + return strcount(buf, buf + len, c); +} + +inline ptrdiff_t strcount(const std::string& buf, char c) { + return strcount(buf.c_str(), buf.size(), c); +} + +// The base class for a sorted associative container checker. TreeType is the +// container type to check and CheckerType is the container type to check +// against. TreeType is expected to be btree_{set,map,multiset,multimap} and +// CheckerType is expected to be {set,map,multiset,multimap}. +template +class base_checker { + typedef base_checker self_type; + + public: + typedef typename TreeType::key_type key_type; + typedef typename TreeType::value_type value_type; + typedef typename TreeType::key_compare key_compare; + typedef typename TreeType::pointer pointer; + typedef typename TreeType::const_pointer const_pointer; + typedef typename TreeType::reference reference; + typedef typename TreeType::const_reference const_reference; + typedef typename TreeType::size_type size_type; + typedef typename TreeType::difference_type difference_type; + typedef typename TreeType::iterator iterator; + typedef typename TreeType::const_iterator const_iterator; + typedef typename TreeType::reverse_iterator reverse_iterator; + typedef typename TreeType::const_reverse_iterator const_reverse_iterator; + + public: + // Default constructor. + base_checker() + : const_tree_(tree_) { + } + // Copy constructor. + base_checker(const self_type &x) + : tree_(x.tree_), + const_tree_(tree_), + checker_(x.checker_) { + } + // Range constructor. + template + base_checker(InputIterator b, InputIterator e) + : tree_(b, e), + const_tree_(tree_), + checker_(b, e) { + } + + // Iterator routines. + iterator begin() { return tree_.begin(); } + const_iterator begin() const { return tree_.begin(); } + iterator end() { return tree_.end(); } + const_iterator end() const { return tree_.end(); } + reverse_iterator rbegin() { return tree_.rbegin(); } + const_reverse_iterator rbegin() const { return tree_.rbegin(); } + reverse_iterator rend() { return tree_.rend(); } + const_reverse_iterator rend() const { return tree_.rend(); } + + // Helper routines. + template + IterType iter_check( + IterType tree_iter, CheckerIterType checker_iter) const { + if (tree_iter == tree_.end()) { + EXPECT_EQ(checker_iter, checker_.end()); + } else { + EXPECT_EQ(*tree_iter, *checker_iter); + } + return tree_iter; + } + template + IterType riter_check( + IterType tree_iter, CheckerIterType checker_iter) const { + if (tree_iter == tree_.rend()) { + EXPECT_EQ(checker_iter, checker_.rend()); + } else { + EXPECT_EQ(*tree_iter, *checker_iter); + } + return tree_iter; + } + void value_check(const value_type &x) { + typename KeyOfValue::type key_of_value; + const key_type &key = key_of_value(x); + EXPECT_EQ(*find(key), x); + lower_bound(key); + upper_bound(key); + equal_range(key); + count(key); + } + void erase_check(const key_type &key) { + EXPECT_TRUE(tree_.find(key) == const_tree_.end()); + EXPECT_TRUE(const_tree_.find(key) == tree_.end()); + EXPECT_TRUE(tree_.equal_range(key).first == + const_tree_.equal_range(key).second); + } + + // Lookup routines. + iterator lower_bound(const key_type &key) { + return iter_check(tree_.lower_bound(key), checker_.lower_bound(key)); + } + const_iterator lower_bound(const key_type &key) const { + return iter_check(tree_.lower_bound(key), checker_.lower_bound(key)); + } + iterator upper_bound(const key_type &key) { + return iter_check(tree_.upper_bound(key), checker_.upper_bound(key)); + } + const_iterator upper_bound(const key_type &key) const { + return iter_check(tree_.upper_bound(key), checker_.upper_bound(key)); + } + std::pair equal_range(const key_type &key) { + std::pair checker_res = + checker_.equal_range(key); + std::pair tree_res = tree_.equal_range(key); + iter_check(tree_res.first, checker_res.first); + iter_check(tree_res.second, checker_res.second); + return tree_res; + } + std::pair equal_range(const key_type &key) const { + std::pair checker_res = + checker_.equal_range(key); + std::pair tree_res = tree_.equal_range(key); + iter_check(tree_res.first, checker_res.first); + iter_check(tree_res.second, checker_res.second); + return tree_res; + } + iterator find(const key_type &key) { + return iter_check(tree_.find(key), checker_.find(key)); + } + const_iterator find(const key_type &key) const { + return iter_check(tree_.find(key), checker_.find(key)); + } + size_type count(const key_type &key) const { + size_type res = checker_.count(key); + EXPECT_EQ(res, tree_.count(key)); + return res; + } + + // Assignment operator. + self_type& operator=(const self_type &x) { + tree_ = x.tree_; + checker_ = x.checker_; + return *this; + } + + // Deletion routines. + int erase(const key_type &key) { + int size = tree_.size(); + int res = checker_.erase(key); + EXPECT_EQ(res, tree_.count(key)); + EXPECT_EQ(res, tree_.erase(key)); + EXPECT_EQ(tree_.count(key), 0); + EXPECT_EQ(tree_.size(), size - res); + erase_check(key); + return res; + } + iterator erase(iterator iter) { + key_type key = iter.key(); + int size = tree_.size(); + int count = tree_.count(key); + typename CheckerType::iterator checker_iter = checker_.find(key); + for (iterator tmp(tree_.find(key)); tmp != iter; ++tmp) { + ++checker_iter; + } + typename CheckerType::iterator checker_next = checker_iter; + ++checker_next; + checker_.erase(checker_iter); + iter = tree_.erase(iter); + EXPECT_EQ(tree_.size(), checker_.size()); + EXPECT_EQ(tree_.size(), size - 1); + EXPECT_EQ(tree_.count(key), count - 1); + if (count == 1) { + erase_check(key); + } + return iter_check(iter, checker_next); + } + + void erase(iterator begin, iterator end) { + int size = tree_.size(); + int count = distance(begin, end); + typename CheckerType::iterator checker_begin = checker_.find(begin.key()); + for (iterator tmp(tree_.find(begin.key())); tmp != begin; ++tmp) { + ++checker_begin; + } + typename CheckerType::iterator checker_end = + end == tree_.end() ? checker_.end() : checker_.find(end.key()); + if (end != tree_.end()) { + for (iterator tmp(tree_.find(end.key())); tmp != end; ++tmp) { + ++checker_end; + } + } + checker_.erase(checker_begin, checker_end); + tree_.erase(begin, end); + EXPECT_EQ(tree_.size(), checker_.size()); + EXPECT_EQ(tree_.size(), size - count); + } + + // Utility routines. + void clear() { + tree_.clear(); + checker_.clear(); + } + void swap(self_type &x) { + tree_.swap(x.tree_); + checker_.swap(x.checker_); + } + + void verify() const { + tree_.verify(); + EXPECT_EQ(tree_.size(), checker_.size()); + + // Move through the forward iterators using increment. + typename CheckerType::const_iterator + checker_iter(checker_.begin()); + const_iterator tree_iter(tree_.begin()); + for (; tree_iter != tree_.end(); + ++tree_iter, ++checker_iter) { + EXPECT_EQ(*tree_iter, *checker_iter); + } + + // Move through the forward iterators using decrement. + for (int n = tree_.size() - 1; n >= 0; --n) { + iter_check(tree_iter, checker_iter); + --tree_iter; + --checker_iter; + } + EXPECT_TRUE(tree_iter == tree_.begin()); + EXPECT_TRUE(checker_iter == checker_.begin()); + + // Move through the reverse iterators using increment. + typename CheckerType::const_reverse_iterator + checker_riter(checker_.rbegin()); + const_reverse_iterator tree_riter(tree_.rbegin()); + for (; tree_riter != tree_.rend(); + ++tree_riter, ++checker_riter) { + EXPECT_EQ(*tree_riter, *checker_riter); + } + + // Move through the reverse iterators using decrement. + for (int n = tree_.size() - 1; n >= 0; --n) { + riter_check(tree_riter, checker_riter); + --tree_riter; + --checker_riter; + } + EXPECT_EQ(tree_riter, tree_.rbegin()); + EXPECT_EQ(checker_riter, checker_.rbegin()); + } + + // Access to the underlying btree. + const TreeType& tree() const { return tree_; } + + // Size routines. + size_type size() const { + EXPECT_EQ(tree_.size(), checker_.size()); + return tree_.size(); + } + size_type max_size() const { return tree_.max_size(); } + bool empty() const { + EXPECT_EQ(tree_.empty(), checker_.empty()); + return tree_.empty(); + } + size_type height() const { return tree_.height(); } + size_type internal_nodes() const { return tree_.internal_nodes(); } + size_type leaf_nodes() const { return tree_.leaf_nodes(); } + size_type nodes() const { return tree_.nodes(); } + size_type bytes_used() const { return tree_.bytes_used(); } + double fullness() const { return tree_.fullness(); } + double overhead() const { return tree_.overhead(); } + + protected: + TreeType tree_; + const TreeType &const_tree_; + CheckerType checker_; +}; + +// A checker for unique sorted associative containers. TreeType is expected to +// be btree_{set,map} and CheckerType is expected to be {set,map}. +template +class unique_checker : public base_checker { + typedef base_checker super_type; + typedef unique_checker self_type; + + public: + typedef typename super_type::iterator iterator; + typedef typename super_type::value_type value_type; + + public: + // Default constructor. + unique_checker() + : super_type() { + } + // Copy constructor. + unique_checker(const self_type &x) + : super_type(x) { + } + // Range constructor. + template + unique_checker(InputIterator b, InputIterator e) + : super_type(b, e) { + } + + // Insertion routines. + std::pair insert(const value_type &x) { + int size = this->tree_.size(); + std::pair checker_res = + this->checker_.insert(x); + std::pair tree_res = this->tree_.insert(x); + EXPECT_EQ(*tree_res.first, *checker_res.first); + EXPECT_EQ(tree_res.second, checker_res.second); + EXPECT_EQ(this->tree_.size(), this->checker_.size()); + EXPECT_EQ(this->tree_.size(), size + tree_res.second); + return tree_res; + } + iterator insert(iterator position, const value_type &x) { + int size = this->tree_.size(); + std::pair checker_res = + this->checker_.insert(x); + iterator tree_res = this->tree_.insert(position, x); + EXPECT_EQ(*tree_res, *checker_res.first); + EXPECT_EQ(this->tree_.size(), this->checker_.size()); + EXPECT_EQ(this->tree_.size(), size + checker_res.second); + return tree_res; + } + template + void insert(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert(*b); + } + } +}; + +// A checker for multiple sorted associative containers. TreeType is expected +// to be btree_{multiset,multimap} and CheckerType is expected to be +// {multiset,multimap}. +template +class multi_checker : public base_checker { + typedef base_checker super_type; + typedef multi_checker self_type; + + public: + typedef typename super_type::iterator iterator; + typedef typename super_type::value_type value_type; + + public: + // Default constructor. + multi_checker() + : super_type() { + } + // Copy constructor. + multi_checker(const self_type &x) + : super_type(x) { + } + // Range constructor. + template + multi_checker(InputIterator b, InputIterator e) + : super_type(b, e) { + } + + // Insertion routines. + iterator insert(const value_type &x) { + int size = this->tree_.size(); + typename CheckerType::iterator checker_res = this->checker_.insert(x); + iterator tree_res = this->tree_.insert(x); + EXPECT_EQ(*tree_res, *checker_res); + EXPECT_EQ(this->tree_.size(), this->checker_.size()); + EXPECT_EQ(this->tree_.size(), size + 1); + return tree_res; + } + iterator insert(iterator position, const value_type &x) { + int size = this->tree_.size(); + typename CheckerType::iterator checker_res = this->checker_.insert(x); + iterator tree_res = this->tree_.insert(position, x); + EXPECT_EQ(*tree_res, *checker_res); + EXPECT_EQ(this->tree_.size(), this->checker_.size()); + EXPECT_EQ(this->tree_.size(), size + 1); + return tree_res; + } + template + void insert(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert(*b); + } + } +}; + +char* GenerateDigits(char buf[16], int val, int maxval) { + EXPECT_LE(val, maxval); + int p = 15; + buf[p--] = 0; + while (maxval > 0) { + buf[p--] = '0' + (val % 10); + val /= 10; + maxval /= 10; + } + return buf + p + 1; +} + +template +struct Generator { + int maxval; + Generator(int m) + : maxval(m) { + } + K operator()(int i) const { + EXPECT_LE(i, maxval); + return i; + } +}; + +template <> +struct Generator { + int maxval; + Generator(int m) + : maxval(m) { + } + std::string operator()(int i) const { + char buf[16]; + return GenerateDigits(buf, i, maxval); + } +}; + +template +struct Generator > { + Generator::type> tgen; + Generator::type> ugen; + + Generator(int m) + : tgen(m), + ugen(m) { + } + std::pair operator()(int i) const { + return std::make_pair(tgen(i), ugen(i)); + } +}; + +// Generate values for our tests and benchmarks. Value range is [0, maxval]. +const std::vector& GenerateNumbers(int n, int maxval) { + static std::vector values; + static std::set unique_values; + + if (values.size() < n) { + + for (int i = values.size(); i < n; i++) { + int value; + do { + value = rand() % (maxval + 1); + } while (unique_values.find(value) != unique_values.end()); + + values.push_back(value); + unique_values.insert(value); + } + } + + return values; +} + +// Generates values in the range +// [0, 4 * min(FLAGS_benchmark_values, FLAGS_test_values)] +template +std::vector GenerateValues(int n) { + int two_times_max = 2 * std::max(FLAGS_benchmark_values, FLAGS_test_values); + int four_times_max = 2 * two_times_max; + EXPECT_LE(n, two_times_max); + const std::vector &nums = GenerateNumbers(n, four_times_max); + Generator gen(four_times_max); + std::vector vec; + + for (int i = 0; i < n; i++) { + vec.push_back(gen(nums[i])); + } + + return vec; +} + +template +void DoTest(const char *name, T *b, const std::vector &values) { + typename KeyOfValue::type key_of_value; + + T &mutable_b = *b; + const T &const_b = *b; + + // Test insert. + for (int i = 0; i < values.size(); ++i) { + mutable_b.insert(values[i]); + mutable_b.value_check(values[i]); + } + assert(mutable_b.size() == values.size()); + + const_b.verify(); + printf(" %s fullness=%0.2f overhead=%0.2f bytes-per-value=%0.2f\n", + name, const_b.fullness(), const_b.overhead(), + double(const_b.bytes_used()) / const_b.size()); + + // Test copy constructor. + T b_copy(const_b); + EXPECT_EQ(b_copy.size(), const_b.size()); + EXPECT_LE(b_copy.height(), const_b.height()); + EXPECT_LE(b_copy.internal_nodes(), const_b.internal_nodes()); + EXPECT_LE(b_copy.leaf_nodes(), const_b.leaf_nodes()); + for (int i = 0; i < values.size(); ++i) { + EXPECT_EQ(*b_copy.find(key_of_value(values[i])), values[i]); + } + + // Test range constructor. + T b_range(const_b.begin(), const_b.end()); + EXPECT_EQ(b_range.size(), const_b.size()); + EXPECT_LE(b_range.height(), const_b.height()); + EXPECT_LE(b_range.internal_nodes(), const_b.internal_nodes()); + EXPECT_LE(b_range.leaf_nodes(), const_b.leaf_nodes()); + for (int i = 0; i < values.size(); ++i) { + EXPECT_EQ(*b_range.find(key_of_value(values[i])), values[i]); + } + + // Test range insertion for values that already exist. + b_range.insert(b_copy.begin(), b_copy.end()); + b_range.verify(); + + // Test range insertion for new values. + b_range.clear(); + b_range.insert(b_copy.begin(), b_copy.end()); + EXPECT_EQ(b_range.size(), b_copy.size()); + EXPECT_EQ(b_range.height(), b_copy.height()); + EXPECT_EQ(b_range.internal_nodes(), b_copy.internal_nodes()); + EXPECT_EQ(b_range.leaf_nodes(), b_copy.leaf_nodes()); + for (int i = 0; i < values.size(); ++i) { + EXPECT_EQ(*b_range.find(key_of_value(values[i])), values[i]); + } + + // Test assignment to self. Nothing should change. + b_range.operator=(b_range); + EXPECT_EQ(b_range.size(), b_copy.size()); + EXPECT_EQ(b_range.height(), b_copy.height()); + EXPECT_EQ(b_range.internal_nodes(), b_copy.internal_nodes()); + EXPECT_EQ(b_range.leaf_nodes(), b_copy.leaf_nodes()); + + // Test assignment of new values. + b_range.clear(); + b_range = b_copy; + EXPECT_EQ(b_range.size(), b_copy.size()); + EXPECT_EQ(b_range.height(), b_copy.height()); + EXPECT_EQ(b_range.internal_nodes(), b_copy.internal_nodes()); + EXPECT_EQ(b_range.leaf_nodes(), b_copy.leaf_nodes()); + + // Test swap. + b_range.clear(); + b_range.swap(b_copy); + EXPECT_EQ(b_copy.size(), 0); + EXPECT_EQ(b_range.size(), const_b.size()); + for (int i = 0; i < values.size(); ++i) { + EXPECT_EQ(*b_range.find(key_of_value(values[i])), values[i]); + } + b_range.swap(b_copy); + + // Test erase via values. + for (int i = 0; i < values.size(); ++i) { + mutable_b.erase(key_of_value(values[i])); + // Erasing a non-existent key should have no effect. + EXPECT_EQ(mutable_b.erase(key_of_value(values[i])), 0); + } + + const_b.verify(); + EXPECT_EQ(const_b.internal_nodes(), 0); + EXPECT_EQ(const_b.leaf_nodes(), 0); + EXPECT_EQ(const_b.size(), 0); + + // Test erase via iterators. + mutable_b = b_copy; + for (int i = 0; i < values.size(); ++i) { + mutable_b.erase(mutable_b.find(key_of_value(values[i]))); + } + + const_b.verify(); + EXPECT_EQ(const_b.internal_nodes(), 0); + EXPECT_EQ(const_b.leaf_nodes(), 0); + EXPECT_EQ(const_b.size(), 0); + + // Test insert with hint. + for (int i = 0; i < values.size(); i++) { + mutable_b.insert(mutable_b.upper_bound(key_of_value(values[i])), values[i]); + } + + const_b.verify(); + + // Test dumping of the btree to an ostream. There should be 1 line for each + // value. + std::stringstream strm; + strm << mutable_b.tree(); + EXPECT_EQ(mutable_b.size(), strcount(strm.str(), '\n')); + + // Test range erase. + mutable_b.erase(mutable_b.begin(), mutable_b.end()); + EXPECT_EQ(mutable_b.size(), 0); + const_b.verify(); + + // First half. + mutable_b = b_copy; + typename T::iterator mutable_iter_end = mutable_b.begin(); + for (int i = 0; i < values.size() / 2; ++i) ++mutable_iter_end; + mutable_b.erase(mutable_b.begin(), mutable_iter_end); + EXPECT_EQ(mutable_b.size(), values.size() - values.size() / 2); + const_b.verify(); + + // Second half. + mutable_b = b_copy; + typename T::iterator mutable_iter_begin = mutable_b.begin(); + for (int i = 0; i < values.size() / 2; ++i) ++mutable_iter_begin; + mutable_b.erase(mutable_iter_begin, mutable_b.end()); + EXPECT_EQ(mutable_b.size(), values.size() / 2); + const_b.verify(); + + // Second quarter. + mutable_b = b_copy; + mutable_iter_begin = mutable_b.begin(); + for (int i = 0; i < values.size() / 4; ++i) ++mutable_iter_begin; + mutable_iter_end = mutable_iter_begin; + for (int i = 0; i < values.size() / 4; ++i) ++mutable_iter_end; + mutable_b.erase(mutable_iter_begin, mutable_iter_end); + EXPECT_EQ(mutable_b.size(), values.size() - values.size() / 4); + const_b.verify(); + + mutable_b.clear(); +} + +template +void ConstTest() { + typedef typename T::value_type value_type; + typename KeyOfValue::type key_of_value; + + T mutable_b; + const T &const_b = mutable_b; + + // Insert a single value into the container and test looking it up. + value_type value = Generator(2)(2); + mutable_b.insert(value); + EXPECT_TRUE(mutable_b.find(key_of_value(value)) != const_b.end()); + EXPECT_TRUE(const_b.find(key_of_value(value)) != mutable_b.end()); + EXPECT_EQ(*const_b.lower_bound(key_of_value(value)), value); + EXPECT_TRUE(const_b.upper_bound(key_of_value(value)) == const_b.end()); + EXPECT_EQ(*const_b.equal_range(key_of_value(value)).first, value); + + // We can only create a non-const iterator from a non-const container. + typename T::iterator mutable_iter(mutable_b.begin()); + EXPECT_TRUE(mutable_iter == const_b.begin()); + EXPECT_TRUE(mutable_iter != const_b.end()); + EXPECT_TRUE(const_b.begin() == mutable_iter); + EXPECT_TRUE(const_b.end() != mutable_iter); + typename T::reverse_iterator mutable_riter(mutable_b.rbegin()); + EXPECT_TRUE(mutable_riter == const_b.rbegin()); + EXPECT_TRUE(mutable_riter != const_b.rend()); + EXPECT_TRUE(const_b.rbegin() == mutable_riter); + EXPECT_TRUE(const_b.rend() != mutable_riter); + + // We can create a const iterator from a non-const iterator. + typename T::const_iterator const_iter(mutable_iter); + EXPECT_TRUE(const_iter == mutable_b.begin()); + EXPECT_TRUE(const_iter != mutable_b.end()); + EXPECT_TRUE(mutable_b.begin() == const_iter); + EXPECT_TRUE(mutable_b.end() != const_iter); + typename T::const_reverse_iterator const_riter(mutable_riter); + EXPECT_EQ(const_riter, mutable_b.rbegin()); + EXPECT_TRUE(const_riter != mutable_b.rend()); + EXPECT_EQ(mutable_b.rbegin(), const_riter); + EXPECT_TRUE(mutable_b.rend() != const_riter); + + // Make sure various methods can be invoked on a const container. + const_b.verify(); + EXPECT_FALSE(const_b.empty()); + EXPECT_EQ(const_b.size(), 1); + EXPECT_GT(const_b.max_size(), 0); + EXPECT_EQ(const_b.height(), 1); + EXPECT_EQ(const_b.count(key_of_value(value)), 1); + EXPECT_EQ(const_b.internal_nodes(), 0); + EXPECT_EQ(const_b.leaf_nodes(), 1); + EXPECT_EQ(const_b.nodes(), 1); + EXPECT_GT(const_b.bytes_used(), 0); + EXPECT_GT(const_b.fullness(), 0); + EXPECT_GT(const_b.overhead(), 0); +} + +template +void BtreeTest() { + ConstTest(); + + typedef typename std::remove_const::type V; + std::vector random_values = GenerateValues(FLAGS_test_values); + + unique_checker container; + + // Test key insertion/deletion in sorted order. + std::vector sorted_values(random_values); + sort(sorted_values.begin(), sorted_values.end()); + DoTest("sorted: ", &container, sorted_values); + + // Test key insertion/deletion in reverse sorted order. + reverse(sorted_values.begin(), sorted_values.end()); + DoTest("rsorted: ", &container, sorted_values); + + // Test key insertion/deletion in random order. + DoTest("random: ", &container, random_values); +} + +template +void BtreeMultiTest() { + ConstTest(); + + typedef typename std::remove_const::type V; + const std::vector& random_values = GenerateValues(FLAGS_test_values); + + multi_checker container; + + // Test keys in sorted order. + std::vector sorted_values(random_values); + sort(sorted_values.begin(), sorted_values.end()); + DoTest("sorted: ", &container, sorted_values); + + // Test keys in reverse sorted order. + reverse(sorted_values.begin(), sorted_values.end()); + DoTest("rsorted: ", &container, sorted_values); + + // Test keys in random order. + DoTest("random: ", &container, random_values); + + // Test keys in random order w/ duplicates. + std::vector duplicate_values(random_values); + duplicate_values.insert( + duplicate_values.end(), random_values.begin(), random_values.end()); + DoTest("duplicates:", &container, duplicate_values); + + // Test all identical keys. + std::vector identical_values(100); + fill(identical_values.begin(), identical_values.end(), Generator(2)(2)); + DoTest("identical: ", &container, identical_values); +} + +template > +class TestAllocator : public Alloc { + public: + typedef typename Alloc::pointer pointer; + typedef typename Alloc::size_type size_type; + + TestAllocator() : bytes_used_(NULL) { } + TestAllocator(int64_t *bytes_used) : bytes_used_(bytes_used) { } + + // Constructor used for rebinding + template + TestAllocator(const TestAllocator& x) + : Alloc(x), + bytes_used_(x.bytes_used()) { + } + + pointer allocate(size_type n, std::allocator::const_pointer hint = 0) { + EXPECT_TRUE(bytes_used_ != NULL); + *bytes_used_ += n * sizeof(T); + return Alloc::allocate(n, hint); + } + + void deallocate(pointer p, size_type n) { + Alloc::deallocate(p, n); + EXPECT_TRUE(bytes_used_ != NULL); + *bytes_used_ -= n * sizeof(T); + } + + // Rebind allows an allocator to be used for a different type + template struct rebind { + typedef TestAllocator::other> other; + }; + + int64_t* bytes_used() const { return bytes_used_; } + + private: + int64_t *bytes_used_; +}; + +template +void BtreeAllocatorTest() { + typedef typename T::value_type value_type; + + int64_t alloc1 = 0; + int64_t alloc2 = 0; + T b1(typename T::key_compare(), &alloc1); + T b2(typename T::key_compare(), &alloc2); + + // This should swap the allocators! + swap(b1, b2); + + for (int i = 0; i < 1000; i++) { + b1.insert(Generator(1000)(i)); + } + + // We should have allocated out of alloc2! + EXPECT_LE(b1.bytes_used(), alloc2 + sizeof(b1)); + EXPECT_GT(alloc2, alloc1); +} + +template +void BtreeMapTest() { + typedef typename T::value_type value_type; + typedef typename T::mapped_type mapped_type; + + mapped_type m = Generator(0)(0); + (void) m; + + T b; + + // Verify we can insert using operator[]. + for (int i = 0; i < 1000; i++) { + value_type v = Generator(1000)(i); + b[v.first] = v.second; + } + EXPECT_EQ(b.size(), 1000); + + // Test whether we can use the "->" operator on iterators and + // reverse_iterators. This stresses the btree_map_params::pair_pointer + // mechanism. + EXPECT_EQ(b.begin()->first, Generator(1000)(0).first); + EXPECT_EQ(b.begin()->second, Generator(1000)(0).second); + EXPECT_EQ(b.rbegin()->first, Generator(1000)(999).first); + EXPECT_EQ(b.rbegin()->second, Generator(1000)(999).second); +} + +template +void BtreeMultiMapTest() { + typedef typename T::mapped_type mapped_type; + mapped_type m = Generator(0)(0); + (void) m; +} + +} // namespace btree + +#endif // UTIL_BTREE_BTREE_TEST_H__ diff --git a/lib/xdelta3/cpp-btree/btree_test_flags.cc b/lib/xdelta3/cpp-btree/btree_test_flags.cc new file mode 100644 index 0000000..bf608a9 --- /dev/null +++ b/lib/xdelta3/cpp-btree/btree_test_flags.cc @@ -0,0 +1,20 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gflags/gflags.h" + +DEFINE_int32(test_values, 10000, + "The number of values to use for tests."); +DEFINE_int32(benchmark_values, 1000000, + "The number of values to use for benchmarks."); diff --git a/lib/xdelta3/cpp-btree/safe_btree.h b/lib/xdelta3/cpp-btree/safe_btree.h new file mode 100644 index 0000000..2d85c70 --- /dev/null +++ b/lib/xdelta3/cpp-btree/safe_btree.h @@ -0,0 +1,395 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A safe_btree<> wraps around a btree<> and removes the caveat that insertion +// and deletion invalidate iterators. A safe_btree<> maintains a generation +// number that is incremented on every mutation. A safe_btree<>::iterator keeps +// a pointer to the safe_btree<> it came from, the generation of the tree when +// it was last validated and the key the underlying btree<>::iterator points +// to. If an iterator is accessed and its generation differs from the tree +// generation it is revalidated. +// +// References and pointers returned by safe_btree iterators are not safe. +// +// See the incorrect usage examples mentioned in safe_btree_set.h and +// safe_btree_map.h. + +#ifndef UTIL_BTREE_SAFE_BTREE_H__ +#define UTIL_BTREE_SAFE_BTREE_H__ + +#include +#include +#include + +#include "btree.h" + +namespace btree { + +template +class safe_btree_iterator { + public: + typedef typename Iterator::key_type key_type; + typedef typename Iterator::value_type value_type; + typedef typename Iterator::size_type size_type; + typedef typename Iterator::difference_type difference_type; + typedef typename Iterator::pointer pointer; + typedef typename Iterator::reference reference; + typedef typename Iterator::const_pointer const_pointer; + typedef typename Iterator::const_reference const_reference; + typedef typename Iterator::iterator_category iterator_category; + typedef typename Tree::iterator iterator; + typedef typename Tree::const_iterator const_iterator; + typedef safe_btree_iterator self_type; + + void update() const { + if (iter_ != tree_->internal_btree()->end()) { + // A positive generation indicates a valid key. + generation_ = tree_->generation(); + key_ = iter_.key(); + } else { + // Use a negative generation to indicate iter_ points to end(). + generation_ = -tree_->generation(); + } + } + + public: + safe_btree_iterator() + : generation_(0), + key_(), + iter_(), + tree_(NULL) { + } + safe_btree_iterator(const iterator &x) + : generation_(x.generation()), + key_(x.key()), + iter_(x.iter()), + tree_(x.tree()) { + } + safe_btree_iterator(Tree *tree, const Iterator &iter) + : generation_(), + key_(), + iter_(iter), + tree_(tree) { + update(); + } + + Tree* tree() const { return tree_; } + int64_t generation() const { return generation_; } + + Iterator* mutable_iter() const { + if (generation_ != tree_->generation()) { + if (generation_ > 0) { + // This does the wrong thing for a multi{set,map}. If my iter was + // pointing to the 2nd of 2 values with the same key, then this will + // reset it to point to the first. This is why we don't provide a + // safe_btree_multi{set,map}. + iter_ = tree_->internal_btree()->lower_bound(key_); + update(); + } else if (-generation_ != tree_->generation()) { + iter_ = tree_->internal_btree()->end(); + generation_ = -tree_->generation(); + } + } + return &iter_; + } + const Iterator& iter() const { + return *mutable_iter(); + } + + // Equality/inequality operators. + bool operator==(const const_iterator &x) const { + return iter() == x.iter(); + } + bool operator!=(const const_iterator &x) const { + return iter() != x.iter(); + } + + // Accessors for the key/value the iterator is pointing at. + const key_type& key() const { + return key_; + } + // This reference value is potentially invalidated by any non-const + // method on the tree; it is NOT safe. + reference operator*() const { + assert(generation_ > 0); + return iter().operator*(); + } + // This pointer value is potentially invalidated by any non-const + // method on the tree; it is NOT safe. + pointer operator->() const { + assert(generation_ > 0); + return iter().operator->(); + } + + // Increment/decrement operators. + self_type& operator++() { + ++(*mutable_iter()); + update(); + return *this; + } + self_type& operator--() { + --(*mutable_iter()); + update(); + return *this; + } + self_type operator++(int) { + self_type tmp = *this; + ++*this; + return tmp; + } + self_type operator--(int) { + self_type tmp = *this; + --*this; + return tmp; + } + + private: + // The generation of the tree when "iter" was updated. + mutable int64_t generation_; + // The key the iterator points to. + mutable key_type key_; + // The underlying iterator. + mutable Iterator iter_; + // The tree the iterator is associated with. + Tree *tree_; +}; + +template +class safe_btree { + typedef safe_btree self_type; + + typedef btree btree_type; + typedef typename btree_type::iterator tree_iterator; + typedef typename btree_type::const_iterator tree_const_iterator; + + public: + typedef typename btree_type::params_type params_type; + typedef typename btree_type::key_type key_type; + typedef typename btree_type::data_type data_type; + typedef typename btree_type::mapped_type mapped_type; + typedef typename btree_type::value_type value_type; + typedef typename btree_type::key_compare key_compare; + typedef typename btree_type::allocator_type allocator_type; + typedef typename btree_type::pointer pointer; + typedef typename btree_type::const_pointer const_pointer; + typedef typename btree_type::reference reference; + typedef typename btree_type::const_reference const_reference; + typedef typename btree_type::size_type size_type; + typedef typename btree_type::difference_type difference_type; + typedef safe_btree_iterator iterator; + typedef safe_btree_iterator< + const self_type, tree_const_iterator> const_iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + + public: + // Default constructor. + safe_btree(const key_compare &comp, const allocator_type &alloc) + : tree_(comp, alloc), + generation_(1) { + } + + // Copy constructor. + safe_btree(const self_type &x) + : tree_(x.tree_), + generation_(1) { + } + + iterator begin() { + return iterator(this, tree_.begin()); + } + const_iterator begin() const { + return const_iterator(this, tree_.begin()); + } + iterator end() { + return iterator(this, tree_.end()); + } + const_iterator end() const { + return const_iterator(this, tree_.end()); + } + reverse_iterator rbegin() { + return reverse_iterator(end()); + } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + reverse_iterator rend() { + return reverse_iterator(begin()); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + // Lookup routines. + iterator lower_bound(const key_type &key) { + return iterator(this, tree_.lower_bound(key)); + } + const_iterator lower_bound(const key_type &key) const { + return const_iterator(this, tree_.lower_bound(key)); + } + iterator upper_bound(const key_type &key) { + return iterator(this, tree_.upper_bound(key)); + } + const_iterator upper_bound(const key_type &key) const { + return const_iterator(this, tree_.upper_bound(key)); + } + std::pair equal_range(const key_type &key) { + std::pair p = tree_.equal_range(key); + return std::make_pair(iterator(this, p.first), + iterator(this, p.second)); + } + std::pair equal_range(const key_type &key) const { + std::pair p = tree_.equal_range(key); + return std::make_pair(const_iterator(this, p.first), + const_iterator(this, p.second)); + } + iterator find_unique(const key_type &key) { + return iterator(this, tree_.find_unique(key)); + } + const_iterator find_unique(const key_type &key) const { + return const_iterator(this, tree_.find_unique(key)); + } + iterator find_multi(const key_type &key) { + return iterator(this, tree_.find_multi(key)); + } + const_iterator find_multi(const key_type &key) const { + return const_iterator(this, tree_.find_multi(key)); + } + size_type count_unique(const key_type &key) const { + return tree_.count_unique(key); + } + size_type count_multi(const key_type &key) const { + return tree_.count_multi(key); + } + + // Insertion routines. + template + std::pair insert_unique(const key_type &key, ValuePointer value) { + std::pair p = tree_.insert_unique(key, value); + generation_ += p.second; + return std::make_pair(iterator(this, p.first), p.second); + } + std::pair insert_unique(const value_type &v) { + std::pair p = tree_.insert_unique(v); + generation_ += p.second; + return std::make_pair(iterator(this, p.first), p.second); + } + iterator insert_unique(iterator position, const value_type &v) { + tree_iterator tree_pos = position.iter(); + ++generation_; + return iterator(this, tree_.insert_unique(tree_pos, v)); + } + template + void insert_unique(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert_unique(*b); + } + } + iterator insert_multi(const value_type &v) { + ++generation_; + return iterator(this, tree_.insert_multi(v)); + } + iterator insert_multi(iterator position, const value_type &v) { + tree_iterator tree_pos = position.iter(); + ++generation_; + return iterator(this, tree_.insert_multi(tree_pos, v)); + } + template + void insert_multi(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert_multi(*b); + } + } + self_type& operator=(const self_type &x) { + if (&x == this) { + // Don't copy onto ourselves. + return *this; + } + ++generation_; + tree_ = x.tree_; + return *this; + } + + // Deletion routines. + void erase(const iterator &begin, const iterator &end) { + tree_.erase(begin.iter(), end.iter()); + ++generation_; + } + // Erase the specified iterator from the btree. The iterator must be valid + // (i.e. not equal to end()). Return an iterator pointing to the node after + // the one that was erased (or end() if none exists). + iterator erase(iterator iter) { + tree_iterator res = tree_.erase(iter.iter()); + ++generation_; + return iterator(this, res); + } + int erase_unique(const key_type &key) { + int res = tree_.erase_unique(key); + generation_ += res; + return res; + } + int erase_multi(const key_type &key) { + int res = tree_.erase_multi(key); + generation_ += res; + return res; + } + + // Access to the underlying btree. + btree_type* internal_btree() { return &tree_; } + const btree_type* internal_btree() const { return &tree_; } + + // Utility routines. + void clear() { + ++generation_; + tree_.clear(); + } + void swap(self_type &x) { + ++generation_; + ++x.generation_; + tree_.swap(x.tree_); + } + void dump(std::ostream &os) const { + tree_.dump(os); + } + void verify() const { + tree_.verify(); + } + int64_t generation() const { + return generation_; + } + key_compare key_comp() const { return tree_.key_comp(); } + + // Size routines. + size_type size() const { return tree_.size(); } + size_type max_size() const { return tree_.max_size(); } + bool empty() const { return tree_.empty(); } + size_type height() const { return tree_.height(); } + size_type internal_nodes() const { return tree_.internal_nodes(); } + size_type leaf_nodes() const { return tree_.leaf_nodes(); } + size_type nodes() const { return tree_.nodes(); } + size_type bytes_used() const { return tree_.bytes_used(); } + static double average_bytes_per_value() { + return btree_type::average_bytes_per_value(); + } + double fullness() const { return tree_.fullness(); } + double overhead() const { return tree_.overhead(); } + + private: + btree_type tree_; + int64_t generation_; +}; + +} // namespace btree + +#endif // UTIL_BTREE_SAFE_BTREE_H__ diff --git a/lib/xdelta3/cpp-btree/safe_btree_map.h b/lib/xdelta3/cpp-btree/safe_btree_map.h new file mode 100644 index 0000000..a0668f1 --- /dev/null +++ b/lib/xdelta3/cpp-btree/safe_btree_map.h @@ -0,0 +1,89 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// The safe_btree_map<> is like btree_map<> except that it removes the caveat +// about insertion and deletion invalidating existing iterators at a small cost +// in making iterators larger and slower. +// +// Revalidation occurs whenever an iterator is accessed. References +// and pointers returned by safe_btree_map<> iterators are not stable, +// they are potentially invalidated by any non-const method on the map. +// +// BEGIN INCORRECT EXAMPLE +// for (auto i = safe_map->begin(); i != safe_map->end(); ++i) { +// const T *value = &i->second; // DO NOT DO THIS +// [code that modifies safe_map and uses value]; +// } +// END INCORRECT EXAMPLE +#ifndef UTIL_BTREE_SAFE_BTREE_MAP_H__ +#define UTIL_BTREE_SAFE_BTREE_MAP_H__ + +#include +#include +#include + +#include "btree_container.h" +#include "btree_map.h" +#include "safe_btree.h" + +namespace btree { + +// The safe_btree_map class is needed mainly for its constructors. +template , + typename Alloc = std::allocator >, + int TargetNodeSize = 256> +class safe_btree_map : public btree_map_container< + safe_btree > > { + + typedef safe_btree_map self_type; + typedef btree_map_params< + Key, Value, Compare, Alloc, TargetNodeSize> params_type; + typedef safe_btree btree_type; + typedef btree_map_container super_type; + + public: + typedef typename btree_type::key_compare key_compare; + typedef typename btree_type::allocator_type allocator_type; + + public: + // Default constructor. + safe_btree_map(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + safe_btree_map(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + safe_btree_map(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } +}; + +template +inline void swap(safe_btree_map &x, + safe_btree_map &y) { + x.swap(y); +} + +} // namespace btree + +#endif // UTIL_BTREE_SAFE_BTREE_MAP_H__ diff --git a/lib/xdelta3/cpp-btree/safe_btree_set.h b/lib/xdelta3/cpp-btree/safe_btree_set.h new file mode 100644 index 0000000..a6cd541 --- /dev/null +++ b/lib/xdelta3/cpp-btree/safe_btree_set.h @@ -0,0 +1,88 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// The safe_btree_set<> is like btree_set<> except that it removes the caveat +// about insertion and deletion invalidating existing iterators at a small cost +// in making iterators larger and slower. +// +// Revalidation occurs whenever an iterator is accessed. References +// and pointers returned by safe_btree_map<> iterators are not stable, +// they are potentially invalidated by any non-const method on the set. +// +// BEGIN INCORRECT EXAMPLE +// for (auto i = safe_set->begin(); i != safe_set->end(); ++i) { +// const T &value = *i; // DO NOT DO THIS +// [code that modifies safe_set and uses value]; +// } +// END INCORRECT EXAMPLE + +#ifndef UTIL_BTREE_SAFE_BTREE_SET_H__ +#define UTIL_BTREE_SAFE_BTREE_SET_H__ + +#include +#include + +#include "btree_container.h" +#include "btree_set.h" +#include "safe_btree.h" + +namespace btree { + +// The safe_btree_set class is needed mainly for its constructors. +template , + typename Alloc = std::allocator, + int TargetNodeSize = 256> +class safe_btree_set : public btree_unique_container< + safe_btree > > { + + typedef safe_btree_set self_type; + typedef btree_set_params params_type; + typedef safe_btree btree_type; + typedef btree_unique_container super_type; + + public: + typedef typename btree_type::key_compare key_compare; + typedef typename btree_type::allocator_type allocator_type; + + public: + // Default constructor. + safe_btree_set(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + safe_btree_set(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + safe_btree_set(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } +}; + +template +inline void swap(safe_btree_set &x, + safe_btree_set &y) { + x.swap(y); +} + +} // namespace btree + +#endif // UTIL_BTREE_SAFE_BTREE_SET_H__ diff --git a/lib/xdelta3/cpp-btree/safe_btree_test.cc b/lib/xdelta3/cpp-btree/safe_btree_test.cc new file mode 100644 index 0000000..0d77ae0 --- /dev/null +++ b/lib/xdelta3/cpp-btree/safe_btree_test.cc @@ -0,0 +1,116 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// TODO(pmattis): Add some tests that iterators are not invalidated by +// insertion and deletion. + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "btree_test.h" +#include "safe_btree_map.h" +#include "safe_btree_set.h" + +class UnsafeArena; + +namespace btree { +namespace { + +template +void SetTest() { + typedef TestAllocator TestAlloc; + BtreeTest, std::allocator, N>, std::set >(); + BtreeAllocatorTest, TestAlloc, N> >(); +} + +template +void MapTest() { + typedef TestAllocator TestAlloc; + BtreeTest, std::allocator, N>, std::map >(); + BtreeAllocatorTest, TestAlloc, N> >(); + BtreeMapTest, std::allocator, N> >(); +} + +TEST(SafeBtree, set_int32_32) { SetTest(); } +TEST(SafeBtree, set_int32_64) { SetTest(); } +TEST(SafeBtree, set_int32_128) { SetTest(); } +TEST(SafeBtree, set_int32_256) { SetTest(); } +TEST(SafeBtree, set_int64_256) { SetTest(); } +TEST(SafeBtree, set_string_256) { SetTest(); } +TEST(SafeBtree, set_pair_256) { SetTest, 256>(); } +TEST(SafeBtree, map_int32_256) { MapTest(); } +TEST(SafeBtree, map_int64_256) { MapTest(); } +TEST(SafeBtree, map_string_256) { MapTest(); } +TEST(SafeBtree, map_pair_256) { MapTest, 256>(); } + +TEST(SafeBtree, Comparison) { + const int kSetSize = 1201; + safe_btree_set my_set; + for (int i = 0; i < kSetSize; ++i) { + my_set.insert(i); + } + safe_btree_set my_set_copy(my_set); + EXPECT_TRUE(my_set_copy == my_set); + EXPECT_TRUE(my_set == my_set_copy); + EXPECT_FALSE(my_set_copy != my_set); + EXPECT_FALSE(my_set != my_set_copy); + + my_set.insert(kSetSize); + EXPECT_FALSE(my_set_copy == my_set); + EXPECT_FALSE(my_set == my_set_copy); + EXPECT_TRUE(my_set_copy != my_set); + EXPECT_TRUE(my_set != my_set_copy); + + my_set.erase(kSetSize - 1); + EXPECT_FALSE(my_set_copy == my_set); + EXPECT_FALSE(my_set == my_set_copy); + EXPECT_TRUE(my_set_copy != my_set); + EXPECT_TRUE(my_set != my_set_copy); + + safe_btree_map my_map; + for (int i = 0; i < kSetSize; ++i) { + my_map[std::string(i, 'a')] = i; + } + safe_btree_map my_map_copy(my_map); + EXPECT_TRUE(my_map_copy == my_map); + EXPECT_TRUE(my_map == my_map_copy); + EXPECT_FALSE(my_map_copy != my_map); + EXPECT_FALSE(my_map != my_map_copy); + + ++my_map_copy[std::string(7, 'a')]; + EXPECT_FALSE(my_map_copy == my_map); + EXPECT_FALSE(my_map == my_map_copy); + EXPECT_TRUE(my_map_copy != my_map); + EXPECT_TRUE(my_map != my_map_copy); + + my_map_copy = my_map; + my_map["hello"] = kSetSize; + EXPECT_FALSE(my_map_copy == my_map); + EXPECT_FALSE(my_map == my_map_copy); + EXPECT_TRUE(my_map_copy != my_map); + EXPECT_TRUE(my_map != my_map_copy); + + my_map.erase(std::string(kSetSize - 1, 'a')); + EXPECT_FALSE(my_map_copy == my_map); + EXPECT_FALSE(my_map == my_map_copy); + EXPECT_TRUE(my_map_copy != my_map); + EXPECT_TRUE(my_map != my_map_copy); +} + +} // namespace +} // namespace btree diff --git a/lib/xdelta3/draft-korn-vcdiff.txt b/lib/xdelta3/draft-korn-vcdiff.txt new file mode 100644 index 0000000..1487deb --- /dev/null +++ b/lib/xdelta3/draft-korn-vcdiff.txt @@ -0,0 +1,1322 @@ + David G. Korn, AT&T Labs + Joshua P. MacDonald, UC Berkeley + Jeffrey C. Mogul, Compaq WRL +Internet-Draft Kiem-Phong Vo, AT&T Labs +Expires: 09 November 2002 09 November 2001 + + + The VCDIFF Generic Differencing and Compression Data Format + + draft-korn-vcdiff-06.txt + + + +Status of this Memo + + This document is an Internet-Draft and is in full conformance + with all provisions of Section 10 of RFC2026. + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF), its areas, and its working groups. Note that + other groups may also distribute working documents as + Internet-Drafts. + + Internet-Drafts are draft documents valid for a maximum of six + months and may be updated, replaced, or obsoleted by other + documents at any time. It is inappropriate to use Internet- + Drafts as reference material or to cite them other than as + "work in progress." + + The list of current Internet-Drafts can be accessed at + http://www.ietf.org/ietf/1id-abstracts.txt + + The list of Internet-Draft Shadow Directories can be accessed at + http://www.ietf.org/shadow.html. + + +Abstract + + This memo describes a general, efficient and portable data format + suitable for encoding compressed and/or differencing data so that + they can be easily transported among computers. + + +Table of Contents: + + 1. EXECUTIVE SUMMARY ............................................ 2 + 2. CONVENTIONS .................................................. 3 + 3. DELTA INSTRUCTIONS ........................................... 4 + 4. DELTA FILE ORGANIZATION ...................................... 5 + 5. DELTA INSTRUCTION ENCODING ................................... 9 + 6. DECODING A TARGET WINDOW ..................................... 14 + 7. APPLICATION-DEFINED CODE TABLES .............................. 16 + 8. PERFORMANCE .................................................. 16 + 9. FURTHER ISSUES ............................................... 17 + 10. SUMMARY ...................................................... 18 + 11. ACKNOWLEDGEMENTS ............................................. 18 + 12. SECURITY CONSIDERATIONS ...................................... 18 + 13. SOURCE CODE AVAILABILITY ..................................... 18 + 14. INTELLECTUAL PROPERTY RIGHTS ................................. 18 + 15. IANA CONSIDERATIONS .......................................... 19 + 16. REFERENCES ................................................... 19 + 17. AUTHOR'S ADDRESS ............................................. 20 + + +1. EXECUTIVE SUMMARY + + Compression and differencing techniques can greatly improve storage + and transmission of files and file versions. Since files are often + transported across machines with distinct architectures and performance + characteristics, such data should be encoded in a form that is portable + and can be decoded with little or no knowledge of the encoders. + This document describes Vcdiff, a compact portable encoding format + designed for these purposes. + + Data differencing is the process of computing a compact and invertible + encoding of a "target file" given a "source file". Data compression + is similar but without the use of source data. The UNIX utilities diff, + compress, and gzip are well-known examples of data differencing and + compression tools. For data differencing, the computed encoding is + called a "delta file", and, for data compression, it is called + a "compressed file". Delta and compressed files are good for storage + and transmission as they are often smaller than the originals. + + Data differencing and data compression are traditionally treated + as distinct types of data processing. However, as shown in the Vdelta + technique by Korn and Vo [1], compression can be thought of as a special + case of differencing in which the source data is empty. The basic idea + is to unify the string parsing scheme used in the Lempel-Ziv'77 style + compressors [2], and the block-move technique of Tichy [3]. Loosely + speaking, this works as follows: + + a. Concatenate source and target data. + b. Parse the data from left to right as in LZ'77 but + make sure that a parsed segment starts the target data. + c. Start to output when reaching target data. + + Parsing is based on string matching algorithms such as suffix trees [4] + or hashing with different time and space performance characteristics. + Vdelta uses a fast string matching algorithm that requires less memory + than other techniques [5,6]. However, even with this algorithm, the + memory requirement can still be prohibitive for large files. A common + way to deal with memory limitation is to partition an input file into + chunks called "windows" and process them separately. Here, except for + unpublished work by Vo, little has been done on designing effective + windowing schemes. Current techniques, including Vdelta, simply use + source and target windows with corresponding addresses across source + and target files. + + String matching and windowing algorithms have large influence on the + compression rate of delta and compressed files. However, it is desirable + to have a portable encoding format that is independent of such algorithms. + This enables construction of client-server applications in which a server + may serve clients with unknown computing characteristics. Unfortunately, + all current differencing and compressing tools, including Vdelta, fall + short in this respect. Their storage formats are closely intertwined + with the implemented string matching and/or windowing algorithms. + + The encoding format Vcdiff proposed here addresses the above issues. + Vcdiff achieves the below characteristics: + + Output compactness: + The basic encoding format compactly represents compressed or delta + files. Applications can further extend the basic encoding format + with "secondary encoders" to achieve more compression. + + Data portability: + The basic encoding format is free from machine byte order and + word size issues. This allows data to be encoded on one machine + and decoded on a different machine with different architecture. + + Algorithm genericity: + The decoding algorithm is independent from string matching and + windowing algorithms. This allows competition among implementations + of the encoder while keeping the same decoder. + + Decoding efficiency: + Except for secondary encoder issues, the decoding algorithm runs + in time proportional to the size of the target file and uses space + proportional to the maximal window size. Vcdiff differs from more + conventional compressors in that it uses only byte-aligned + data, thus avoiding bit-level operations, which improves + decoding speed at the slight cost of compression efficiency. + + The Vcdiff data format and the algorithms for decoding data shall be + described next. Since Vcdiff treats compression as a special case of + differencing, we shall use the term "delta file" to indicate the + compressed output for both cases. + + +2. CONVENTIONS + + The basic data unit is a byte. For portability, Vcdiff shall limit + a byte to its lower eight bits even on machines with larger bytes. + The bits in a byte are ordered from right to left so that the least + significant bit (LSB) has value 1, and the most significant bit (MSB), + has value 128. + + For purposes of exposition in this document, we adopt the convention + that the LSB is numbered 0, and the MSB is numbered 7. Bit numbers + never appear in the encoded format itself. + + Vcdiff encodes unsigned integer values using a portable variable-sized + format (originally introduced in the Sfio library [7]). This encoding + treats an integer as a number in base 128. Then, each digit in this + representation is encoded in the lower seven bits of a byte. Except for + the least significant byte, other bytes have their most significant bit + turned on to indicate that there are still more digits in the encoding. + The two key properties of this integer encoding that are beneficial + to a data compression format are: + + a. The encoding is portable among systems using 8-bit bytes, and + b. Small values are encoded compactly. + + For example, consider the value 123456789 which can be represented with + four 7-bit digits whose values are 58, 111, 26, 21 in order from most + to least significant. Below is the 8-bit byte encoding of these digits. + Note that the MSBs of 58, 111 and 26 are on. + + +-------------------------------------------+ + | 10111010 | 11101111 | 10011010 | 00010101 | + +-------------------------------------------+ + MSB+58 MSB+111 MSB+26 0+21 + + + Henceforth, the terms "byte" and "integer" will refer to a byte and an + unsigned integer as described. + + + From time to time, algorithms are exhibited to clarify the descriptions + of parts of the Vcdiff format. On such occasions, the C language will be + used to make precise the algorithms. The C code shown in this + document is meant for clarification only, and is not part of the + actual specification of the Vcdiff format. + + In this specification, the key words "MUST", "MUST NOT", + "SHOULD", "SHOULD NOT", and "MAY" document are to be interpreted as + described in RFC2119 [12]. + + +3. DELTA INSTRUCTIONS + + A large target file is partitioned into non-overlapping sections + called "target windows". These target windows are processed separately + and sequentially based on their order in the target file. + + A target window T of length t may be compared against some source data + segment S of length s. By construction, this source data segment S + comes either from the source file, if one is used, or from a part of + the target file earlier than T. In this way, during decoding, S is + completely known when T is being decoded. + + The choices of T, t, S and s are made by some window selection algorithm + which can greatly affect the size of the encoding. However, as seen later, + these choices are encoded so that no knowledge of the window selection + algorithm is needed during decoding. + + Assume that S[j] represents the jth byte in S, and T[k] represents + the kth byte in T. Then, for the delta instructions, we treat the data + windows S and T as substrings of a superstring U formed by concatenating + them like this: + + S[0]S[1]...S[s-1]T[0]T[1]...T[t-1] + + The "address" of a byte in S or T is referred to by its location in U. + For example, the address of T[k] is s+k. + + The instructions to encode and direct the reconstruction of a target + window are called delta instructions. There are three types: + + ADD: This instruction has two arguments, a size x and a sequence of + x bytes to be copied. + COPY: This instruction has two arguments, a size x and an address p + in the string U. The arguments specify the substring of U that + must be copied. We shall assert that such a substring must be + entirely contained in either S or T. + RUN: This instruction has two arguments, a size x and a byte b that + will be repeated x times. + + Below are example source and target windows and the delta instructions + that encode the target window in terms of the source window. + + a b c d e f g h i j k l m n o p + a b c d w x y z e f g h e f g h e f g h e f g h z z z z + + COPY 4, 0 + ADD 4, w x y z + COPY 4, 4 + COPY 12, 24 + RUN 4, z + + + Thus, the first letter 'a' in the target window is at location 16 + in the superstring. Note that the fourth instruction, "COPY 12, 24", + copies data from T itself since address 24 is position 8 in T. + This instruction also shows that it is fine to overlap the data to be + copied with the data being copied from as long as the latter starts + earlier. This enables efficient encoding of periodic sequences, + i.e., sequences with regularly repeated subsequences. The RUN instruction + is a compact way to encode a sequence repeating the same byte even though + such a sequence can be thought of as a periodic sequence with period 1. + + To reconstruct the target window, one simply processes one delta + instruction at a time and copy the data either from the source window + or the being reconstructed target window based on the type of the + instruction and the associated address, if any. + + +4. DELTA FILE ORGANIZATION + + A Vcdiff delta file starts with a Header section followed by a sequence + of Window sections. The Header section includes magic bytes to identify + the file type, and information concerning data processing beyond the + basic encoding format. The Window sections encode the target windows. + + Below is the overall organization of a delta file. The indented items + refine the ones immediately above them. An item in square brackets may + or may not be present in the file depending on the information encoded + in the Indicator byte above it. + + Header + Header1 - byte + Header2 - byte + Header3 - byte + Header4 - byte + Hdr_Indicator - byte + [Secondary compressor ID] - byte + +[@@@ Why is compressor ID not an integer? ] +[@@@ If we aren't defining any secondary compressors yet, then it seems +that defining the [Secondary compressor ID] and the corresponding +VCD_DECOMPRESS Hdr_Indicator bit in this draft has no real value. An +implementation of this specification won't be able to decode a VCDIFF +encoded with this option if it doesn't know about any secondary +compressors. It seems that you should specify the bits related to +secondary compressors once you have defined the first a secondary +compressor. I can imagine a secondary-compressor might want to supply +extra information, such as a dictionary of some kind, in which case +this speculative treatment wouldn't go far enough.] + + [Length of code table data] - integer + [Code table data] + Size of near cache - byte + Size of same cache - byte + Compressed code table data + Window1 + Win_Indicator - byte + [Source segment size] - integer + [Source segment position] - integer + The delta encoding of the target window + Length of the delta encoding - integer + The delta encoding + Size of the target window - integer + Delta_Indicator - byte + Length of data for ADDs and RUNs - integer + Length of instructions and sizes - integer + Length of addresses for COPYs - integer + Data section for ADDs and RUNs - array of bytes + Instructions and sizes section - array of bytes + Addresses section for COPYs - array of bytes + Window2 + ... + + + +4.1 The Header Section + + Each delta file starts with a header section organized as below. + Note the convention that square-brackets enclose optional items. + + Header1 - byte = 0xE6 + Header2 - byte = 0xD3 + Header3 - byte = 0xD4 + +HMMM + +0xD6 +0xC3 +0xC4 + + Header4 - byte + Hdr_Indicator - byte + [Secondary compressor ID] - byte + [Length of code table data] - integer + [Code table data] + + The first three Header bytes are the ASCII characters 'V', 'C' and 'D' + with their most significant bits turned on (in hexadecimal, the values + are 0xE6, 0xD3, and 0xD4). The fourth Header byte is currently set to + zero. In the future, it might be used to indicate the version of Vcdiff. + + The Hdr_Indicator byte shows if there are any initialization data + required to aid in the reconstruction of data in the Window sections. + This byte MAY have non-zero values for either, both, or neither of + the two bits VCD_DECOMPRESS and VCD_CODETABLE below: + + 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+ + | | | | | | | | | + +-+-+-+-+-+-+-+-+ + ^ ^ + | | + | +-- VCD_DECOMPRESS + +---- VCD_CODETABLE + + If bit 0 (VCD_DECOMPRESS) is non-zero, this indicates that a secondary + compressor may have been used to further compress certain parts of the + delta encoding data as described in Sections 4.3 and 6. In that case, + the ID of the secondary compressor is given next. If this bit is zero, + the compressor ID byte is not included. + +[@@@ If we aren't defining any secondary compressors yet, then it seems +this bit has no real value yet..] + + If bit 1 (VCD_CODETABLE) is non-zero, this indicates that an + application-defined code table is to be used for decoding the delta + instructions. This table itself is compressed. The length of the data + comprising this compressed code table and the data follow next. Section 7 + discusses application-defined code tables. If this bit is zero, the code + table data length and the code table data are not included. + + If both bits are set, then the compressor ID byte is included + before the code table data length and the code table data. + + +4.2 The Format of a Window Section + + Each Window section is organized as follows: + + Win_Indicator - byte + [Source segment length] - integer + [Source segment position] - integer + The delta encoding of the target window + + + Below are the detail of the various items: + +[@@@ Here, I want to replace the Win_Indicator with a source-count, +followed by source-count length/position pairs?] + + Win_Indicator: + This byte is a set of bits, as shown: + + 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+ + | | | | | | | | | + +-+-+-+-+-+-+-+-+ + ^ ^ + | | + | +-- VCD_SOURCE + +---- VCD_TARGET + + + If bit 0 (VCD_SOURCE) is non-zero, this indicates that a segment + of data from the "source" file was used as the corresponding + source window of data to encode the target window. The decoder + will use this same source data segment to decode the target window. + + If bit 1 (VCD_TARGET) is non-zero, this indicates that a segment + of data from the "target" file was used as the corresponding + source window of data to encode the target window. As above, this + same source data segment is used to decode the target window. + + The Win_Indicator byte MUST NOT have more than one of the bits + set (non-zero). It MAY have none of these bits set. + + If one of these bits is set, the byte is followed by two + integers to indicate respectively the length and position of + the source data segment in the relevant file. If the + indicator byte is zero, the target window was compressed + by itself without comparing against another data segment, + and these two integers are not included. + + The delta encoding of the target window: + This contains the delta encoding of the target window either + in terms of the source data segment (i.e., VCD_SOURCE + or VCD_TARGET was set) or by itself if no source window + is specified. This data format is discussed next. + + +4.3 The Delta Encoding of a Target Window + + The delta encoding of a target window is organized as follows: + + Length of the delta encoding - integer + The delta encoding + Length of the target window - integer + Delta_Indicator - byte + Length of data for ADDs and RUNs - integer + Length of instructions section - integer + Length of addresses for COPYs - integer + Data section for ADDs and RUNs - array of bytes + Instructions and sizes section - array of bytes + Addresses section for COPYs - array of bytes + + + Length of the delta encoding: + This integer gives the total number of remaining bytes that + comprise data of the delta encoding for this target window. + + The delta encoding: + This contains the data representing the delta encoding which + is described next. + + Length of the target window: + This integer indicates the actual size of the target window + after decompression. A decoder can use this value to allocate + memory to store the uncompressed data. + + Delta_Indicator: + This byte is a set of bits, as shown: + + 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+ + | | | | | | | | | + +-+-+-+-+-+-+-+-+ + ^ ^ ^ + | | | + | | +-- VCD_DATACOMP + | +---- VCD_INSTCOMP + +------ VCD_ADDRCOMP + + VCD_DATACOMP: bit value 1. + VCD_INSTCOMP: bit value 2. + VCD_ADDRCOMP: bit value 4. + + As discussed, the delta encoding consists of COPY, ADD and RUN + instructions. The ADD and RUN instructions have accompanying + unmatched data (that is, data that does not specifically match + any data in the source window or in some earlier part of the + target window) and the COPY instructions have addresses of where + the matches occur. OPTIONALLY, these types of data MAY be further + compressed using a secondary compressor. Thus, Vcdiff separates + the encoding of the delta instructions into three parts: + + a. The unmatched data in the ADD and RUN instructions, + b. The delta instructions and accompanying sizes, and + c. The addresses of the COPY instructions. + + If the bit VCD_DECOMPRESS (Section 4.1) was on, each of these + sections may have been compressed using the specified secondary + compressor. The bit positions 0 (VCD_DATACOMP), 1 (VCD_INSTCOMP), + and 2 (VCD_ADDRCOMP) respectively indicate, if non-zero, that + the corresponding parts are compressed. Then, these parts MUST + be decompressed before decoding the delta instructions. + + Length of data for ADDs and RUNs: + This is the length (in bytes) of the section of data storing + the unmatched data accompanying the ADD and RUN instructions. + + Length of instructions section: + This is the length (in bytes) of the delta instructions and + accompanying sizes. + + Length of addresses for COPYs: + This is the length (in bytes) of the section storing + the addresses of the COPY instructions. + + Data section for ADDs and RUNs: + This sequence of bytes encodes the unmatched data for the ADD + and RUN instructions. + + Instructions and sizes section: + This sequence of bytes encodes the instructions and their sizes. + + Addresses section for COPYs: + This sequence of bytes encodes the addresses of the COPY + instructions. + + +5. DELTA INSTRUCTION ENCODING + + The delta instructions described in Section 3 represent the results of + string matching. For many data differencing applications in which the + changes between source and target data are small, any straightforward + representation of these instructions would be adequate. However, for + applications including data compression, it is important to encode + these instructions well to achieve good compression rates. From our + experience, the following observations can be made: + + a. The addresses in COPY instructions are locations of matches and + often occur close by or even exactly equal to one another. This is + because data in local regions are often replicated with minor changes. + In turn, this means that coding a newly matched address against some + set of recently matched addresses can be beneficial. + + b. The matches are often short in length and separated by small amounts + of unmatched data. That is, the lengths of COPY and ADD instructions + are often small. This is particularly true of binary data such as + executable files or structured data such as HTML or XML. In such cases, + compression can be improved by combining the encoding of the sizes + and the instruction types as well as combining the encoding of adjacent + delta instructions with sufficiently small data sizes. + + The below subsections discuss how the Vcdiff data format provides + mechanisms enabling encoders to use the above observations to improve + compression rates. + + +5.1 Address Encoding Modes of COPY Instructions + + As mentioned earlier, addresses of COPY instructions often occur close + to one another or are exactly equal. To take advantage of this phenomenon + and encode addresses of COPY instructions more efficiently, the Vcdiff + data format supports the use of two different types of address caches. + Both the encoder and decoder maintain these caches, so that decoder's + caches remain synchronized with the encoder's caches. + + a. A "near" cache is an array with "s_near" slots, each containing an + address used for encoding addresses nearby to previously encoded + addresses (in the positive direction only). The near cache also + maintains a "next_slot" index to the near cache. New entries to the + near cache are always inserted in the next_slot index, which maintains + a circular buffer of the s_near most recent addresses. + + b. A "same" cache is an array with "s_same" multiple of 256 slots, each + containing an address. The same cache maintains a hash table of recent + addresses used for repeated encoding of the exact same address. + + + By default, the parameters s_near and s_same are respectively set to 4 + and 3. An encoder MAY modify these values, but then it MUST encode the + new values in the encoding itself, as discussed in Section 7, so that + the decoder can properly set up its own caches. + + At the start of processing a target window, an implementation + (encoder or decoder) initializes all of the slots in both caches + to zero. The next_slot pointer of the near cache is set + to point to slot zero. + + Each time a COPY instruction is processed by the encoder or + decoder, the implementation's caches are updated as follows, where + "addr" is the address in the COPY instruction. + + a. The slot in the near cache referenced by the next_slot + index is set to addr. The next_slot index is then incremented + modulo s_near. + + b. The slot in the same cache whose index is addr%(s_same*256) + is set to addr. [We use the C notations of % for modulo and + * for multiplication.] + + +5.2 Example code for maintaining caches + + To make clear the above description, below are example cache data + structures and algorithms to initialize and update them: + + typedef struct _cache_s + { + int* near; /* array of size s_near */ + int s_near; + int next_slot; /* the circular index for near */ + int* same; /* array of size s_same*256 */ + int s_same; + } Cache_t; + + cache_init(Cache_t* ka) + { + int i; + + ka->next_slot = 0; + for(i = 0; i < ka->s_near; ++i) + ka->near[i] = 0; + + for(i = 0; i < ka->s_same*256; ++i) + ka->same[i] = 0; + } + + cache_update(Cache_t* ka, int addr) + { + if(ka->s_near > 0) + { ka->near[ka->next_slot] = addr; + ka->next_slot = (ka->next_slot + 1) % ka->s_near; + } + + if(ka->s_same > 0) + ka->same[addr % (ka->s_same*256)] = addr; + } + + +5.3 Encoding of COPY instruction addresses + + The address of a COPY instruction is encoded using different modes + depending on the type of cached address used, if any. + + Let "addr" be the address of a COPY instruction to be decoded and "here" + be the current location in the target data (i.e., the start of the data + about to be encoded or decoded). Let near[j] be the jth element in + the near cache, and same[k] be the kth element in the same cache. + Below are the possible address modes: + + VCD_SELF: This mode has value 0. The address was encoded by itself + as an integer. + + VCD_HERE: This mode has value 1. The address was encoded as + the integer value "here - addr". + + Near modes: The "near modes" are in the range [2,s_near+1]. Let m + be the mode of the address encoding. The address was encoded + as the integer value "addr - near[m-2]". + + Same modes: The "same modes" are in the range + [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding. + The address was encoded as a single byte b such that + "addr == same[(m - (s_near+2))*256 + b]". + + +5.3 Example code for encoding and decoding of COPY instruction addresses + + We show example algorithms below to demonstrate use of address modes more + clearly. The encoder has freedom to choose address modes, the sample + addr_encode() algorithm merely shows one way of picking the address + mode. The decoding algorithm addr_decode() will uniquely decode addresses + regardless of the encoder's algorithm choice. + + Note that the address caches are updated immediately after an address is + encoded or decoded. In this way, the decoder is always synchronized with + the encoder. + + int addr_encode(Cache_t* ka, int addr, int here, int* mode) + { + int i, d, bestd, bestm; + + /* Attempt to find the address mode that yields the + * smallest integer value for "d", the encoded address + * value, thereby minimizing the encoded size of the + * address. */ + + bestd = addr; bestm = VCD_SELF; /* VCD_SELF == 0 */ + + if((d = here-addr) < bestd) + { bestd = d; bestm = VCD_HERE; } /* VCD_HERE == 1 */ + + for(i = 0; i < ka->s_near; ++i) + if((d = addr - ka->near[i]) >= 0 && d < bestd) + { bestd = d; bestm = i+2; } + + if(ka->s_same > 0 && ka->same[d = addr%(ka->s_same*256)] == addr) + { bestd = d%256; bestm = ka->s_near + 2 + d/256; } + + cache_update(ka,addr); + + *mode = bestm; /* this returns the address encoding mode */ + return bestd; /* this returns the encoded address */ + } + + Note that the addr_encode() algorithm chooses the best address mode using a + local optimization, but that may not lead to the best encoding efficiency + because different modes lead to different instruction encodings, as described below. + + The functions addrint() and addrbyte() used in addr_decode() obtain from + the "Addresses section for COPYs" (Section 4.3) an integer or a byte, + respectively. These utilities will not be described here. We simply + recall that an integer is represented as a compact variable-sized string + of bytes as described in Section 2 (i.e., base 128). + + int addr_decode(Cache_t* ka, int here, int mode) + { int addr, m; + + if(mode == VCD_SELF) + addr = addrint(); + else if(mode == VCD_HERE) + addr = here - addrint(); + else if((m = mode - 2) >= 0 && m < ka->s_near) /* near cache */ + addr = ka->near[m] + addrint(); + else /* same cache */ + { m = mode - (2 + ka->s_near); + addr = ka->same[m*256 + addrbyte()]; + } + + cache_update(ka, addr); + + return addr; + } + + +5.4 Instruction Codes + + As noted, the data sizes associated with delta instructions are often + small. Thus, compression efficiency can be improved by combining the sizes + and instruction types in a single encoding, as well by combining certain + pairs of adjacent delta instructions. Effective choices of when to perform + such combinations depend on many factors including the data being processed + and the string matching algorithm in use. For example, if many COPY + instructions have the same data sizes, it may be worth to encode these + instructions more compactly than others. + + The Vcdiff data format is designed so that a decoder does not need to be + aware of the choices made in encoding algorithms. This is achieved with the + notion of an "instruction code table" containing 256 entries. Each entry + defines either a single delta instruction or a pair of instructions that + have been combined. Note that the code table itself only exists in main + memory, not in the delta file (unless using an application-defined code + table, described in Section 7). The encoded data simply includes the index + of each instruction and, since there are only 256 indices, each index + can be represented as a single byte. + + Each instruction code entry contains six fields, each of which + is a single byte with unsigned value: + + +-----------------------------------------------+ + | inst1 | size1 | mode1 | inst2 | size2 | mode2 | + +-----------------------------------------------+ + +@@@ could be more compact + + Each triple (inst,size,mode) defines a delta instruction. The meanings + of these fields are as follows: + + inst: An "inst" field can have one of the four values: NOOP (0), ADD (1), + RUN (2) or COPY (3) to indicate the instruction types. NOOP means + that no instruction is specified. In this case, both the corresponding + size and mode fields will be zero. + + size: A "size" field is zero or positive. A value zero means that the + size associated with the instruction is encoded separately as + an integer in the "Instructions and sizes section" (Section 6). + A positive value for "size" defines the actual data size. + Note that since the size is restricted to a byte, the maximum + value for any instruction with size implicitly defined in the code + table is 255. + + mode: A "mode" field is significant only when the associated delta + instruction is a COPY. It defines the mode used to encode the + associated addresses. For other instructions, this is always zero. + + +5.5 The Code Table + + Following the discussions on address modes and instruction code tables, + we define a "Code Table" to have the data below: + + s_near: the size of the near cache, + s_same: the size of the same cache, + i_code: the 256-entry instruction code table. + + Vcdiff itself defines a "default code table" in which s_near is 4 + and s_same is 3. Thus, there are 9 address modes for a COPY instruction. + The first two are VCD_SELF (0) and VCD_HERE (1). Modes 2, 3, 4 and 5 + are for addresses coded against the near cache. And, modes 6, 7 and 8 + are for addresses coded against the same cache. + + The default instruction code table is depicted below, in a compact + representation that we use only for descriptive purposes. See section 7 + for the specification of how an instruction code table is represented + in the Vcdiff encoding format. In the depiction, a zero value for + size indicates that the size is separately coded. The mode of non-COPY + instructions is represented as 0 even though they are not used. + + + TYPE SIZE MODE TYPE SIZE MODE INDEX + --------------------------------------------------------------- + 1. RUN 0 0 NOOP 0 0 0 + 2. ADD 0, [1,17] 0 NOOP 0 0 [1,18] + 3. COPY 0, [4,18] 0 NOOP 0 0 [19,34] + 4. COPY 0, [4,18] 1 NOOP 0 0 [35,50] + 5. COPY 0, [4,18] 2 NOOP 0 0 [51,66] + 6. COPY 0, [4,18] 3 NOOP 0 0 [67,82] + 7. COPY 0, [4,18] 4 NOOP 0 0 [83,98] + 8. COPY 0, [4,18] 5 NOOP 0 0 [99,114] + 9. COPY 0, [4,18] 6 NOOP 0 0 [115,130] + 10. COPY 0, [4,18] 7 NOOP 0 0 [131,146] + 11. COPY 0, [4,18] 8 NOOP 0 0 [147,162] + 12. ADD [1,4] 0 COPY [4,6] 0 [163,174] + 13. ADD [1,4] 0 COPY [4,6] 1 [175,186] + 14. ADD [1,4] 0 COPY [4,6] 2 [187,198] + 15. ADD [1,4] 0 COPY [4,6] 3 [199,210] + 16. ADD [1,4] 0 COPY [4,6] 4 [211,222] + 17. ADD [1,4] 0 COPY [4,6] 5 [223,234] + 18. ADD [1,4] 0 COPY 4 6 [235,238] + 19. ADD [1,4] 0 COPY 4 7 [239,242] + 20. ADD [1,4] 0 COPY 4 8 [243,246] + 21. COPY 4 [0,8] ADD 1 0 [247,255] + --------------------------------------------------------------- + + In the above depiction, each numbered line represents one or more + entries in the actual instruction code table (recall that an entry in + the instruction code table may represent up to two combined delta + instructions.) The last column ("INDEX") shows which index value or + range of index values of the entries covered by that line. The notation + [i,j] means values from i through j, inclusive. The first 6 columns of + a line in the depiction describe the pairs of instructions used for + the corresponding index value(s). + + If a line in the depiction includes a column entry using the [i,j] + notation, this means that the line is instantiated for each value + in the range from i to j, inclusive. The notation "0, [i,j]" means + that the line is instantiated for the value 0 and for each value + in the range from i to j, inclusive. + + If a line in the depiction includes more than one entry using the [i,j] + notation, implying a "nested loop" to convert the line to a range of + table entries, the first such [i,j] range specifies the outer loop, + and the second specifies the inner loop. + + The below examples should make clear the above description: + + Line 1 shows the single RUN instruction with index 0. As the size field + is 0, this RUN instruction always has its actual size encoded separately. + + Line 2 shows the 18 single ADD instructions. The ADD instruction with + size field 0 (i.e., the actual size is coded separately) has index 1. + ADD instructions with sizes from 1 to 17 use code indices 2 to 18 and + their sizes are as given (so they will not be separately encoded.) + + Following the single ADD instructions are the single COPY instructions + ordered by their address encoding modes. For example, line 11 shows the + COPY instructions with mode 8, i.e., the last of the same cache. + In this case, the COPY instruction with size field 0 has index 147. + Again, the actual size of this instruction will be coded separately. + + Lines 12 to 21 show the pairs of instructions that are combined together. + For example, line 12 depicts the 12 entries in which an ADD instruction + is combined with an immediately following COPY instruction. The entries + with indices 163, 164, 165 represent the pairs in which the ADD + instructions all have size 1 while the COPY instructions has mode + 0 (VCD_SELF) and sizes 4, 5 and 6 respectively. + + The last line, line 21, shows the eight instruction pairs where the first + instruction is a COPY and the second is an ADD. In this case, all COPY + instructions have size 4 with mode ranging from 0 to 8 and all the ADD + instructions have size 1. Thus, the entry with largest index 255 + combines a COPY instruction of size 4 and mode 8 with an ADD instruction + of size 1. + + The choice of the minimum size 4 for COPY instructions in the default code + table was made from experiments that showed that excluding small matches + (less then 4 bytes long) improved the compression rates. + + +6. DECODING A TARGET WINDOW + + Section 4.3 discusses that the delta instructions and associated data + are encoded in three arrays of bytes: + + Data section for ADDs and RUNs, + Instructions and sizes section, and + Addresses section for COPYs. + + + Further, these data sections may have been further compressed by some + secondary compressor. Assuming that any such compressed data has been + decompressed so that we now have three arrays: + + inst: bytes coding the instructions and sizes. + data: unmatched data associated with ADDs and RUNs. + addr: bytes coding the addresses of COPYs. + + These arrays are organized as follows: + + inst: + a sequence of (index, [size1], [size2]) tuples, where "index" + is an index into the instruction code table, and size1 and size2 + are integers that MAY or MAY NOT be included in the tuple as + follows. The entry with the given "index" in the instruction + code table potentially defines two delta instructions. If the + first delta instruction is not a VCD_NOOP and its size is zero, + then size1 MUST be present. Otherwise, size1 MUST be omitted and + the size of the instruction (if it is not VCD_NOOP) is as defined + in the table. The presence or absence of size2 is defined + similarly with respect to the second delta instruction. + + data: + a sequence of data values, encoded as bytes. + + addr: + a sequence of address values. Addresses are normally encoded as + integers as described in Section 2 (i.e., base 128). + Since the same cache emits addresses in the range [0,255], + however, same cache addresses are always encoded as a + single byte. + + To summarize, each tuple in the "inst" array includes an index to some + entry in the instruction code table that determines: + + a. Whether one or two instructions were encoded and their types. + + b. If the instructions have their sizes encoded separately, these + sizes will follow, in order, in the tuple. + + c. If the instructions have accompanying data, i.e., ADDs or RUNs, + their data will be in the array "data". + + d. Similarly, if the instructions are COPYs, the coded addresses are + found in the array "addr". + + The decoding procedure simply processes the arrays by reading one code + index at a time, looking up the corresponding instruction code entry, + then consuming the respective sizes, data and addresses following the + directions in this entry. In other words, the decoder maintains an implicit + next-element pointer for each array; "consuming" an instruction tuple, + data, or address value implies incrementing the associated pointer. + + For example, if during the processing of the target window, the next + unconsumed tuple in the inst array has index value 19, then the first + instruction is a COPY, whose size is found as the immediately following + integer in the inst array. Since the mode of this COPY instruction is + VCD_SELF, the corresponding address is found by consuming the next + integer in the addr array. The data array is left intact. As the second + instruction for code index 19 is a NOOP, this tuple is finished. + + +7. APPLICATION-DEFINED CODE TABLES + + Although the default code table used in Vcdiff is good for general + purpose encoders, there are times when other code tables may perform + better. For example, to code a file with many identical segments of data, + it may be advantageous to have a COPY instruction with the specific size + of these data segments so that the instruction can be encoded in a single + byte. Such a special code table MUST then be encoded in the delta file + so that the decoder can reconstruct it before decoding the data. + + Vcdiff allows an application-defined code table to be specified + in a delta file with the following data: + + Size of near cache - byte + Size of same cache - byte + Compressed code table data + + The "compressed code table data" encodes the delta between the default + code table (source) and the new code table (target) in the same manner as + described in Section 4.3 for encoding a target window in terms of a + source window. This delta is computed using the following steps: + + a. Convert the new instruction code table into a string, "code", of + 1536 bytes using the below steps in order: + + i. Add in order the 256 bytes representing the types of the first + instructions in the instruction pairs. + ii. Add in order the 256 bytes representing the types of the second + instructions in the instruction pairs. + iii. Add in order the 256 bytes representing the sizes of the first + instructions in the instruction pairs. + iv. Add in order the 256 bytes representing the sizes of the second + instructions in the instruction pairs. + v. Add in order the 256 bytes representing the modes of the first + instructions in the instruction pairs. + vi. Add in order the 256 bytes representing the modes of the second + instructions in the instruction pairs. + + b. Similarly, convert the default instruction code table into + a string "dflt". + + c. Treat the string "code" as a target window and "dflt" as the + corresponding source data and apply an encoding algorithm to + compute the delta encoding of "code" in terms of "dflt". + This computation MUST use the default code table for encoding + the delta instructions. + + The decoder can then reverse the above steps to decode the compressed + table data using the method of Section 6, employing the default code + table, to generate the new code table. Note that the decoder does not + need to know anything about the details of the encoding algorithm used + in step (c). The decoder is still able to decode the new code table + because the Vcdiff format is independent from the choice of encoding + algorithm, and because the encoder in step (c) uses the known, default + code table. + + +8. PERFORMANCE + + The encoding format is compact. For compression only, using the LZ-77 + string parsing strategy and without any secondary compressors, the typical + compression rate is better than Unix compress and close to gzip. For + differencing, the data format is better than all known methods in + terms of its stated goal, which is primarily decoding speed and + encoding efficiency. + + We compare the performance of compress, gzip and Vcdiff using the + archives of three versions of the Gnu C compiler, gcc-2.95.1.tar, + gcc-2.95.2.tar and gcc-2.95.3.tar. The experiments were done on an + SGI-MIPS3, 400MHZ. Gzip was used at its default compression level. + Vcdiff timings were done using the Vcodex/Vcdiff software (Section 13). + As string and window matching typically dominates the computation during + compression, the Vcdiff compression times were directly due to the + algorithms used in the Vcodex/Vcdiff software. However, the decompression + times should be generic and representative of any good implementation + of the Vcdiff data format. Timing was done by running each program + three times and taking the average of the total cpu+system times. + + Below are the different Vcdiff runs: + + Vcdiff: vcdiff is used as compressor only. + + Vcdiff-d: vcdiff is used as a differencer only. That is, it only + compares target data against source data. Since the files + involved are large, they are broken into windows. In this + case, each target window starting at some file offset in + the target file is compared against a source window with + the same file offset (in the source file). The source + window is also slightly larger than the target window + to increase matching opportunities. The -d option also gives + a hint to the string matching algorithm of Vcdiff that + the two files are very similar with long stretches of matches. + The algorithm takes advantage of this to minimize its + processing of source data and save time. + + Vcdiff-dc: This is similar to Vcdiff-d but vcdiff can also compare + target data against target data as applicable. Thus, vcdiff + both computes differences and compresses data. The windowing + algorithm is the same as above. However, the above hint is + recinded in this case. + + Vcdiff-dcs: This is similar to Vcdiff-dc but the windowing algorithm + uses a content-based heuristic to select source data segments + that are more likely to match with a given target window. + Thus, the source data segment selected for a target window + often will not be aligned with the file offsets of this + target window. + + + gcc-2.95.1 gcc-2.95.2 compression decompression + raw size 55746560 55797760 + compress - 19939390 13.85s 7.09s + gzip - 12973443 42.99s 5.35s + Vcdiff - 15358786 20.04s 4.65s + Vcdiff-d - 100971 10.93s 1.92s + Vcdiff-dc - 97246 20.03s 1.84s + Vcdiff-dcs - 256445 44.81s 1.84s + + TABLE 1. Compressing gcc-2.95.2.tar given gcc-2.95.1 + + + TABLE 1 shows the raw sizes of gcc-2.95.1.tar and gcc-2.95.2.tar and the + sizes of the compressed results. As a pure compressor, the compression + rate for Vcdiff is worse than gzip and better than compress. The last + three rows shows that when two file versions are very similar, differencing + can have dramatically good compression rates. Vcdiff-d and Vcdiff-dc use + the same simple window selection method but Vcdiff-dc also does compression + so its output is slightly smaller. Vcdiff-dcs uses a heuristic based on + data content to search for source data that likely will match a given target + window. Although it does a good job, the heuristic did not always find the + best matches which are given by the simple algorithm of Vcdiff-d. As a + result, the output size is slightly larger. Note also that there is a large + cost in computing matching windows this way. Finally, the compression times + of Vcdiff-d is nearly half of that of Vcdiff-dc. It is tempting to conclude + that the compression feature causes the additional time in Vcdiff-dc + relative to Vcdiff-d. However, this is not the case. The hint given to + the Vcdiff string matching algorithm that the two files are likely to + have very long stretches of matches helps the algorithm to minimize + processing of the "source data", thus saving half the time. However, as we + shall see below when this hint is wrong, the result is even longer time. + + + gcc-2.95.2 gcc-2.95.3 compression decompression + raw size 55797760 55787520 + compress - 19939453 13.54s 7.00s + gzip - 12998097 42.63s 5.62s + Vcdiff - 15371737 20.09s 4.74s + Vcdiff-d - 26383849 71.41s 6.41s + Vcdiff-dc - 14461203 42.48s 4.82s + Vcdiff-dcs - 1248543 61.18s 1.99s + + TABLE 2. Compressing gcc-2.95.3.tar given gcc-2.95.2 + + + TABLE 2 shows the raw sizes of gcc-2.95.2.tar and gcc-2.95.3.tar and + the sizes of the compressed results. In this case, the tar file of + gcc-2.95.3 is rearranged in a way that makes the straightforward method + of matching file offsets for source and target windows fail. As a + result, Vcdiff-d performs rather dismally both in time and output size. + The large time for Vcdiff-d is directly due to fact that the string + matching algorithm has to work much harder to find matches when the hint + that two files have long matching stretches fails to hold. On the other + hand, Vcdiff-dc does both differencing and compression resulting in good + output size. Finally, the window searching heuristic used in Vcdiff-dcs is + effective in finding the right matching source windows for target windows + resulting a small output size. This shows why the data format needs to + have a way to specify matching windows to gain performance. Finally, + we note that the decoding times are always good regardless of how + the string matching or window searching algorithms perform. + + +9. FURTHER ISSUES + + This document does not address a few issues: + + Secondary compressors: + As discussed in Section 4.3, certain sections in the delta encoding + of a window may be further compressed by a secondary compressor. + In our experience, the basic Vcdiff format is adequate for most + purposes so that secondary compressors are seldom needed. In + particular, for normal use of data differencing where the files to + be compared have long stretches of matches, much of the gain in + compression rate is already achieved by normal string matching. + Thus, the use of secondary compressors is seldom needed in this case. + However, for applications beyond differencing of such nearly identical + files, secondary compressors may be needed to achieve maximal + compressed results. + + Therefore, we recommend to leave the Vcdiff data format defined + as in this document so that the use of secondary compressors + can be implemented when they become needed in the future. + The formats of the compressed data via such compressors or any + compressors that may be defined in the future are left open to + their implementations. These could include Huffman encoding, + arithmetic encoding, and splay tree encoding [8,9]. + + Large file system vs. small file system: + As discussed in Section 4, a target window in a large file may be + compared against some source window in another file or in the same + file (from some earlier part). In that case, the file offset of the + source window is specified as a variable-sized integer in the delta + encoding. There is a possibility that the encoding was computed on + a system supporting much larger files than in a system where + the data may be decoded (e.g., 64-bit file systems vs. 32-bit file + systems). In that case, some target data may not be recoverable. + This problem could afflict any compression format, and ought + to be resolved with a generic negotiation mechanism in the + appropriate protocol(s). + + +10. SUMMARY + + We have described Vcdiff, a general and portable encoding format for + compression and differencing. The format is good in that it allows + implementing a decoder without knowledge of the encoders. Further, + ignoring the use of secondary compressors not defined within the format, + the decoding algorithms runs in linear time and requires working space + proportional to window sizes. + + + +11. ACKNOWLEDGEMENTS + + Thanks are due to Balachander Krishnamurthy, Jeff Mogul and Arthur Van Hoff + who provided much encouragement to publicize Vcdiff. In particular, Jeff + helped clarifying the description of the data format presented here. + + + +12. SECURITY CONSIDERATIONS + + Vcdiff only provides a format to encode compressed and differenced data. + It does not address any issues concerning how such data are, in fact, + stored in a given file system or the run-time memory of a computer system. + Therefore, we do not anticipate any security issues with respect to Vcdiff. + + + +13. SOURCE CODE AVAILABILITY + + Vcdiff is implemented as a data transforming method in Phong Vo's + Vcodex library. AT&T Corp. has made the source code for Vcodex available + for anyone to use to transmit data via HTTP/1.1 Delta Encoding [10,11]. + The source code and according license is accessible at the below URL: + + http://www.research.att.com/sw/tools + + +14. INTELLECTUAL PROPERTY RIGHTS + + The IETF has been notified of intellectual property rights claimed in + regard to some or all of the specification contained in this + document. For more information consult the online list of claimed + rights, at . + + The IETF takes no position regarding the validity or scope of any + intellectual property or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; neither does it represent that it + has made any effort to identify any such rights. Information on the + IETF's procedures with respect to rights in standards-track and + standards-related documentation can be found in BCP-11. Copies of + claims of rights made available for publication and any assurances of + licenses to be made available, or the result of an attempt made to + obtain a general license or permission for the use of such + proprietary rights by implementors or users of this specification can + be obtained from the IETF Secretariat. + + + +15. IANA CONSIDERATIONS + + The Internet Assigned Numbers Authority (IANA) administers the number + space for Secondary Compressor ID values. Values and their meaning + must be documented in an RFC or other peer-reviewed, permanent, and + readily available reference, in sufficient detail so that + interoperability between independent implementations is possible. + Subject to these constraints, name assignments are First Come, First + Served - see RFC2434 [13]. Legal ID values are in the range 1..255. + + This document does not define any values in this number space. + + +16. REFERENCES + + [1] D.G. Korn and K.P. Vo, Vdelta: Differencing and Compression, + Practical Reusable Unix Software, Editor B. Krishnamurthy, + John Wiley & Sons, Inc., 1995. + + [2] J. Ziv and A. Lempel, A Universal Algorithm for Sequential Data + Compression, IEEE Trans. on Information Theory, 23(3):337-343, 1977. + + [3] W. Tichy, The String-to-String Correction Problem with Block Moves, + ACM Transactions on Computer Systems, 2(4):309-321, November 1984. + + [4] E.M. McCreight, A Space-Economical Suffix Tree Construction + Algorithm, Journal of the ACM, 23:262-272, 1976. + + [5] J.J. Hunt, K.P. Vo, W. Tichy, An Empirical Study of Delta Algorithms, + IEEE Software Configuration and Maintenance Workshop, 1996. + + [6] J.J. Hunt, K.P. Vo, W. Tichy, Delta Algorithms: An Empirical Analysis, + ACM Trans. on Software Engineering and Methodology, 7:192-214, 1998. + + [7] D.G. Korn, K.P. Vo, Sfio: A buffered I/O Library, + Proc. of the Summer '91 Usenix Conference, 1991. + + [8] D. W. Jones, Application of Splay Trees to Data Compression, + CACM, 31(8):996:1007. + + [9] M. Nelson, J. Gailly, The Data Compression Book, ISBN 1-55851-434-1, + M&T Books, New York, NY, 1995. + + [10] J.C. Mogul, F. Douglis, A. Feldmann, and B. Krishnamurthy, + Potential benefits of delta encoding and data compression for HTTP, + SIGCOMM '97, Cannes, France, 1997. + + [11] J.C. Mogul, B. Krishnamurthy, F. Douglis, A. Feldmann, + Y. Goland, and A. Van Hoff, Delta Encoding in HTTP, + IETF, draft-mogul-http-delta-10, 2001. + + [12] S. Bradner, Key words for use in RFCs to Indicate Requirement Levels, + RFC 2119, March 1997. + + [13] T. Narten, H. Alvestrand, Guidelines for Writing an IANA + Considerations Section in RFCs, RFC2434, October 1998. + + + +17. AUTHOR'S ADDRESS + + Kiem-Phong Vo (main contact) + AT&T Labs, Room D223 + 180 Park Avenue + Florham Park, NJ 07932 + Email: kpv@research.att.com + Phone: 1 973 360 8630 + + David G. Korn + AT&T Labs, Room D237 + 180 Park Avenue + Florham Park, NJ 07932 + Email: dgk@research.att.com + Phone: 1 973 360 8602 + + Jeffrey C. Mogul + Western Research Laboratory + Compaq Computer Corporation + 250 University Avenue + Palo Alto, California, 94305, U.S.A. + Email: JeffMogul@acm.org + Phone: 1 650 617 3304 (email preferred) + + Joshua P. MacDonald + Computer Science Division + University of California, Berkeley + 345 Soda Hall + Berkeley, CA 94720 + Email: jmacd@cs.berkeley.edu diff --git a/lib/xdelta3/examples/Makefile b/lib/xdelta3/examples/Makefile new file mode 100644 index 0000000..2d3563f --- /dev/null +++ b/lib/xdelta3/examples/Makefile @@ -0,0 +1,32 @@ +CFLAGS = -g -Wall -I.. -DXD3_DEBUG=1 -DNDEBUG=0 -DSIZEOF_SIZE_T=8 -DSIZEOF_UNSIGNED_LONG_LONG=8 +#CFLAGS = -O3 -Wall -I.. -DXD3_DEBUG=0 -fno-builtin -DNDEBUG=1 +# -pg + +SOURCES = small_page_test.c encode_decode_test.c speed_test.c + +DEPS = ../*.h ../*.c *.h + +TARGETS = small_page_test encode_decode_test speed_test32 speed_test64 compare_test checksum_test + +all: $(TARGETS) + +small_page_test: small_page_test.c $(DEPS) + $(CC) $(CFLAGS) small_page_test.c -o small_page_test -DXD3_USE_LARGEFILE64=0 -DSECONDARY_DJW=1 + +encode_decode_test: encode_decode_test.c $(DEPS) + $(CC) $(CFLAGS) encode_decode_test.c -o encode_decode_test + +speed_test32: speed_test.c $(DEPS) + $(CC) $(CFLAGS) -DXD3_USE_LARGEFILE64=0 speed_test.c -o speed_test32 + +speed_test64: speed_test.c $(DEPS) + $(CC) $(CFLAGS) -DXD3_USE_LARGEFILE64=1 speed_test.c -o speed_test64 + +compare_test: compare_test.c + $(CC) $(CFLAGS) compare_test.c -o compare_test + +checksum_test: checksum_test.cc + $(CXX) $(CFLAGS) checksum_test.cc -o checksum_test + +clean: + rm -r -f *.exe *.stackdump $(TARGETS) *.dSYM *~ diff --git a/lib/xdelta3/examples/README.md b/lib/xdelta3/examples/README.md new file mode 100644 index 0000000..ebaf522 --- /dev/null +++ b/lib/xdelta3/examples/README.md @@ -0,0 +1,8 @@ +Files in this directory demonstrate how to use the Xdelta3 API. Copyrights +are held by the respective authors. + +small_page_test.c -- how to use xdelta3 in an environment such as the kernel +for small pages with little memory + +encode_decode_test.c -- how to use xdelta3 to process (encode/decode) data in +multiple windows with the non-blocking API diff --git a/lib/xdelta3/examples/compare_test.c b/lib/xdelta3/examples/compare_test.c new file mode 100644 index 0000000..24f1cb8 --- /dev/null +++ b/lib/xdelta3/examples/compare_test.c @@ -0,0 +1,138 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#include +#include +#include +#include +#include + +#include "xdelta3.h" + +#define NUM (1<<20) +#define ITERS 100 + +/* From wikipedia on RDTSC */ +inline uint64_t rdtsc() { + uint32_t lo, hi; + asm volatile ("rdtsc" : "=a" (lo), "=d" (hi)); + return (uint64_t)hi << 32 | lo; +} + +typedef int (*test_func)(const char *s1, const char *s2, int n); + +void run_test(const char *buf1, const char *buf2, + const char *name, test_func func) { + uint64_t start, end; + uint64_t accum = 0; + int i, x; + + for (i = 0; i < ITERS; i++) { + start = rdtsc(); + x = func(buf1, buf2, NUM); + end = rdtsc(); + accum += end - start; + assert(x == NUM - 1); + } + + accum /= ITERS; + + printf("%s : %qu cycles\n", name, accum); +} + +/* Build w/ -fno-builtin for this to be fast, this assumes that there + * is a difference at s1[n-1] */ +int memcmp_fake(const char *s1, const char *s2, int n) { + int x = memcmp(s1, s2, n); + return x < 0 ? n - 1 : n + 1; +} + +#define UNALIGNED_OK 1 +static inline int +test2(const char *s1c, const char *s2c, int n) +{ + int i = 0; +#if UNALIGNED_OK + int nint = n / sizeof(int); + + if (nint >> 3) + { + int j = 0; + const int *s1 = (const int*)s1c; + const int *s2 = (const int*)s2c; + int nint_8 = nint - 8; + + while (i <= nint_8 && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++]) { } + + i = (i - 1) * sizeof(int); + } +#endif + + while (i < n && s1c[i] == s2c[i]) + { + i++; + } + return i; +} + +static inline int +test1(const char *s1c, const char *s2c, int n) { + int i = 0; + while (i < n && s1c[i] == s2c[i]) + { + i++; + } + return i; +} + +int main(/*int argc, char **argv*/) { + char *buf1 = malloc(NUM+1); + char *buf2 = malloc(NUM+1); + int i; + + for (i = 0; i < NUM; i++) { + buf1[i] = buf2[i] = rand(); + } + + buf2[NUM-1]++; + + printf ("ALIGNED\n"); + + run_test(buf1, buf2, "memcmp", &memcmp_fake); + run_test(buf1, buf2, "test1", &test1); + run_test(buf1, buf2, "test2", &test2); + + for (i = 0; i < NUM; i++) { + buf1[i] = buf2[i+1] = rand(); + } + + buf2[NUM]++; + + printf ("UNALIGNED\n"); + + run_test(buf1, buf2+1, "memcmp", &memcmp_fake); + run_test(buf1, buf2+1, "test1", &test1); + run_test(buf1, buf2+1, "test2", &test2); + + return 0; +} diff --git a/lib/xdelta3/examples/encode_decode_test.c b/lib/xdelta3/examples/encode_decode_test.c new file mode 100644 index 0000000..d516783 --- /dev/null +++ b/lib/xdelta3/examples/encode_decode_test.c @@ -0,0 +1,203 @@ +// Permission to distribute this example by +// Copyright (C) 2007 Ralf Junker +// Ralf Junker +// http://www.yunqa.de/delphi/ + +//--------------------------------------------------------------------------- + +#include +#include +#include "xdelta3.h" +#include "xdelta3.c" + +//--------------------------------------------------------------------------- + +int code ( + int encode, + FILE* InFile, + FILE* SrcFile , + FILE* OutFile, + int BufSize ) +{ + int r, ret; + struct stat statbuf; + xd3_stream stream; + xd3_config config; + xd3_source source; + void* Input_Buf; + int Input_Buf_Read; + + if (BufSize < XD3_ALLOCSIZE) + BufSize = XD3_ALLOCSIZE; + + memset (&stream, 0, sizeof (stream)); + memset (&source, 0, sizeof (source)); + + xd3_init_config(&config, XD3_ADLER32); + config.winsize = BufSize; + xd3_config_stream(&stream, &config); + + if (SrcFile) + { + r = fstat(fileno(SrcFile), &statbuf); + if (r) + return r; + + source.blksize = BufSize; + source.curblk = malloc(source.blksize); + + /* Load 1st block of stream. */ + r = fseek(SrcFile, 0, SEEK_SET); + if (r) + return r; + source.onblk = fread((void*)source.curblk, 1, source.blksize, SrcFile); + source.curblkno = 0; + /* Set the stream. */ + xd3_set_source(&stream, &source); + } + + Input_Buf = malloc(BufSize); + + fseek(InFile, 0, SEEK_SET); + do + { + Input_Buf_Read = fread(Input_Buf, 1, BufSize, InFile); + if (Input_Buf_Read < BufSize) + { + xd3_set_flags(&stream, XD3_FLUSH | stream.flags); + } + xd3_avail_input(&stream, Input_Buf, Input_Buf_Read); + +process: + if (encode) + ret = xd3_encode_input(&stream); + else + ret = xd3_decode_input(&stream); + + switch (ret) + { + case XD3_INPUT: + { + fprintf (stderr,"XD3_INPUT\n"); + continue; + } + + case XD3_OUTPUT: + { + fprintf (stderr,"XD3_OUTPUT\n"); + r = fwrite(stream.next_out, 1, stream.avail_out, OutFile); + if (r != (int)stream.avail_out) + return r; + xd3_consume_output(&stream); + goto process; + } + + case XD3_GETSRCBLK: + { + fprintf (stderr,"XD3_GETSRCBLK %qd\n", source.getblkno); + if (SrcFile) + { + r = fseek(SrcFile, source.blksize * source.getblkno, SEEK_SET); + if (r) + return r; + source.onblk = fread((void*)source.curblk, 1, + source.blksize, SrcFile); + source.curblkno = source.getblkno; + } + goto process; + } + + case XD3_GOTHEADER: + { + fprintf (stderr,"XD3_GOTHEADER\n"); + goto process; + } + + case XD3_WINSTART: + { + fprintf (stderr,"XD3_WINSTART\n"); + goto process; + } + + case XD3_WINFINISH: + { + fprintf (stderr,"XD3_WINFINISH\n"); + goto process; + } + + default: + { + fprintf (stderr,"!!! INVALID %s %d !!!\n", + stream.msg, ret); + return ret; + } + + } + + } + while (Input_Buf_Read == BufSize); + + free(Input_Buf); + + free((void*)source.curblk); + xd3_close_stream(&stream); + xd3_free_stream(&stream); + + return 0; + +}; + + +int main(int argc, char* argv[]) +{ + FILE* InFile; + FILE* SrcFile; + FILE* OutFile; + int r; + + if (argc != 3) { + fprintf (stderr, "usage: %s source input\n", argv[0]); + return 1; + } + + char *input = argv[2]; + char *source = argv[1]; + const char *output = "encoded.testdata"; + const char *decoded = "decoded.testdata"; + + /* Encode */ + + InFile = fopen(input, "rb"); + SrcFile = fopen(source, "rb"); + OutFile = fopen(output, "wb"); + + r = code (1, InFile, SrcFile, OutFile, 0x1000); + + fclose(OutFile); + fclose(SrcFile); + fclose(InFile); + + if (r) { + fprintf (stderr, "Encode error: %d\n", r); + return r; + } + + /* Decode */ + + InFile = fopen(output, "rb"); + SrcFile = fopen(source, "rb"); + OutFile = fopen(decoded, "wb"); + + r = code (0, InFile, SrcFile, OutFile, 0x1000); + + fclose(OutFile); + fclose(SrcFile); + fclose(InFile); + + if (r) { + fprintf (stderr, "Decode error: %d\n", r); + return r; + } + + return 0; +} diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj new file mode 100644 index 0000000..d50d9f7 --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj @@ -0,0 +1,389 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + B9001B65158D008900B9E855 /* xdelta3.c in Sources */ = {isa = PBXBuildFile; fileRef = B9001B63158D008900B9E855 /* xdelta3.c */; }; + B9313C3C158D11BA001C1F28 /* file_v1_to_v2.bin in Resources */ = {isa = PBXBuildFile; fileRef = B9313C39158D11BA001C1F28 /* file_v1_to_v2.bin */; }; + B9313C3D158D11BA001C1F28 /* file_v1.bin in Resources */ = {isa = PBXBuildFile; fileRef = B9313C3A158D11BA001C1F28 /* file_v1.bin */; }; + B9313C3E158D11BA001C1F28 /* file_v2.bin in Resources */ = {isa = PBXBuildFile; fileRef = B9313C3B158D11BA001C1F28 /* file_v2.bin */; }; + B9ADC6BF158CFD36007EF999 /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B9ADC6BE158CFD36007EF999 /* UIKit.framework */; }; + B9ADC6C1158CFD36007EF999 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B9ADC6C0158CFD36007EF999 /* Foundation.framework */; }; + B9ADC6C3158CFD36007EF999 /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B9ADC6C2158CFD36007EF999 /* CoreGraphics.framework */; }; + B9ADC6C9158CFD36007EF999 /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = B9ADC6C7158CFD36007EF999 /* InfoPlist.strings */; }; + B9ADC6CB158CFD36007EF999 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = B9ADC6CA158CFD36007EF999 /* main.m */; }; + B9ADC6CF158CFD36007EF999 /* Xd3iOSAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = B9ADC6CE158CFD36007EF999 /* Xd3iOSAppDelegate.m */; }; + B9ADC6D2158CFD36007EF999 /* MainStoryboard_iPhone.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = B9ADC6D0158CFD36007EF999 /* MainStoryboard_iPhone.storyboard */; }; + B9ADC6D5158CFD36007EF999 /* MainStoryboard_iPad.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = B9ADC6D3158CFD36007EF999 /* MainStoryboard_iPad.storyboard */; }; + B9ADC6D8158CFD36007EF999 /* Xd3iOSViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = B9ADC6D7158CFD36007EF999 /* Xd3iOSViewController.m */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + B9001B56158D008900B9E855 /* xdelta3-blkcache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-blkcache.h"; path = "../../../../xdelta3-blkcache.h"; sourceTree = ""; }; + B9001B57158D008900B9E855 /* xdelta3-cfgs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-cfgs.h"; path = "../../../../xdelta3-cfgs.h"; sourceTree = ""; }; + B9001B58158D008900B9E855 /* xdelta3-decode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-decode.h"; path = "../../../../xdelta3-decode.h"; sourceTree = ""; }; + B9001B59158D008900B9E855 /* xdelta3-djw.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-djw.h"; path = "../../../../xdelta3-djw.h"; sourceTree = ""; }; + B9001B5A158D008900B9E855 /* xdelta3-fgk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-fgk.h"; path = "../../../../xdelta3-fgk.h"; sourceTree = ""; }; + B9001B5B158D008900B9E855 /* xdelta3-hash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-hash.h"; path = "../../../../xdelta3-hash.h"; sourceTree = ""; }; + B9001B5C158D008900B9E855 /* xdelta3-internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-internal.h"; path = "../../../../xdelta3-internal.h"; sourceTree = ""; }; + B9001B5D158D008900B9E855 /* xdelta3-list.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-list.h"; path = "../../../../xdelta3-list.h"; sourceTree = ""; }; + B9001B5E158D008900B9E855 /* xdelta3-main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-main.h"; path = "../../../../xdelta3-main.h"; sourceTree = ""; }; + B9001B5F158D008900B9E855 /* xdelta3-merge.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-merge.h"; path = "../../../../xdelta3-merge.h"; sourceTree = ""; }; + B9001B60158D008900B9E855 /* xdelta3-python.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-python.h"; path = "../../../../xdelta3-python.h"; sourceTree = ""; }; + B9001B61158D008900B9E855 /* xdelta3-second.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-second.h"; path = "../../../../xdelta3-second.h"; sourceTree = ""; }; + B9001B62158D008900B9E855 /* xdelta3-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-test.h"; path = "../../../../xdelta3-test.h"; sourceTree = ""; }; + B9001B63158D008900B9E855 /* xdelta3.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = xdelta3.c; path = ../../../../xdelta3.c; sourceTree = ""; }; + B9001B64158D008900B9E855 /* xdelta3.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = xdelta3.h; path = ../../../../xdelta3.h; sourceTree = ""; }; + B9313C39158D11BA001C1F28 /* file_v1_to_v2.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; path = file_v1_to_v2.bin; sourceTree = ""; }; + B9313C3A158D11BA001C1F28 /* file_v1.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; path = file_v1.bin; sourceTree = ""; }; + B9313C3B158D11BA001C1F28 /* file_v2.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; path = file_v2.bin; sourceTree = ""; }; + B9ADC6BA158CFD36007EF999 /* xdelta3-ios-test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "xdelta3-ios-test.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + B9ADC6BE158CFD36007EF999 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; }; + B9ADC6C0158CFD36007EF999 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; + B9ADC6C2158CFD36007EF999 /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; }; + B9ADC6C6158CFD36007EF999 /* xdelta3-ios-test-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "xdelta3-ios-test-Info.plist"; sourceTree = ""; }; + B9ADC6C8158CFD36007EF999 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = ""; }; + B9ADC6CA158CFD36007EF999 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = ""; }; + B9ADC6CC158CFD36007EF999 /* xdelta3-ios-test-Prefix.pch */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "xdelta3-ios-test-Prefix.pch"; sourceTree = ""; }; + B9ADC6CD158CFD36007EF999 /* Xd3iOSAppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Xd3iOSAppDelegate.h; sourceTree = ""; }; + B9ADC6CE158CFD36007EF999 /* Xd3iOSAppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = Xd3iOSAppDelegate.m; sourceTree = ""; }; + B9ADC6D1158CFD36007EF999 /* en */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = en; path = en.lproj/MainStoryboard_iPhone.storyboard; sourceTree = ""; }; + B9ADC6D4158CFD36007EF999 /* en */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = en; path = en.lproj/MainStoryboard_iPad.storyboard; sourceTree = ""; }; + B9ADC6D6158CFD36007EF999 /* Xd3iOSViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Xd3iOSViewController.h; sourceTree = ""; }; + B9ADC6D7158CFD36007EF999 /* Xd3iOSViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = Xd3iOSViewController.m; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + B9ADC6B7158CFD36007EF999 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + B9ADC6BF158CFD36007EF999 /* UIKit.framework in Frameworks */, + B9ADC6C1158CFD36007EF999 /* Foundation.framework in Frameworks */, + B9ADC6C3158CFD36007EF999 /* CoreGraphics.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + B9ADC6AF158CFD36007EF999 = { + isa = PBXGroup; + children = ( + B9ADC6C4158CFD36007EF999 /* xdelta3-ios-test */, + B9ADC6BD158CFD36007EF999 /* Frameworks */, + B9ADC6BB158CFD36007EF999 /* Products */, + ); + sourceTree = ""; + }; + B9ADC6BB158CFD36007EF999 /* Products */ = { + isa = PBXGroup; + children = ( + B9ADC6BA158CFD36007EF999 /* xdelta3-ios-test.app */, + ); + name = Products; + sourceTree = ""; + }; + B9ADC6BD158CFD36007EF999 /* Frameworks */ = { + isa = PBXGroup; + children = ( + B9ADC6BE158CFD36007EF999 /* UIKit.framework */, + B9ADC6C0158CFD36007EF999 /* Foundation.framework */, + B9ADC6C2158CFD36007EF999 /* CoreGraphics.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; + B9ADC6C4158CFD36007EF999 /* xdelta3-ios-test */ = { + isa = PBXGroup; + children = ( + B9001B56158D008900B9E855 /* xdelta3-blkcache.h */, + B9001B57158D008900B9E855 /* xdelta3-cfgs.h */, + B9001B58158D008900B9E855 /* xdelta3-decode.h */, + B9001B59158D008900B9E855 /* xdelta3-djw.h */, + B9001B5A158D008900B9E855 /* xdelta3-fgk.h */, + B9001B5B158D008900B9E855 /* xdelta3-hash.h */, + B9001B5C158D008900B9E855 /* xdelta3-internal.h */, + B9001B5D158D008900B9E855 /* xdelta3-list.h */, + B9001B5E158D008900B9E855 /* xdelta3-main.h */, + B9001B5F158D008900B9E855 /* xdelta3-merge.h */, + B9001B60158D008900B9E855 /* xdelta3-python.h */, + B9001B61158D008900B9E855 /* xdelta3-second.h */, + B9001B62158D008900B9E855 /* xdelta3-test.h */, + B9001B63158D008900B9E855 /* xdelta3.c */, + B9001B64158D008900B9E855 /* xdelta3.h */, + B9ADC6CD158CFD36007EF999 /* Xd3iOSAppDelegate.h */, + B9ADC6CE158CFD36007EF999 /* Xd3iOSAppDelegate.m */, + B9ADC6D0158CFD36007EF999 /* MainStoryboard_iPhone.storyboard */, + B9ADC6D3158CFD36007EF999 /* MainStoryboard_iPad.storyboard */, + B9ADC6D6158CFD36007EF999 /* Xd3iOSViewController.h */, + B9ADC6D7158CFD36007EF999 /* Xd3iOSViewController.m */, + B9ADC6C5158CFD36007EF999 /* Supporting Files */, + ); + path = "xdelta3-ios-test"; + sourceTree = ""; + }; + B9ADC6C5158CFD36007EF999 /* Supporting Files */ = { + isa = PBXGroup; + children = ( + B9313C39158D11BA001C1F28 /* file_v1_to_v2.bin */, + B9313C3A158D11BA001C1F28 /* file_v1.bin */, + B9313C3B158D11BA001C1F28 /* file_v2.bin */, + B9ADC6C6158CFD36007EF999 /* xdelta3-ios-test-Info.plist */, + B9ADC6C7158CFD36007EF999 /* InfoPlist.strings */, + B9ADC6CA158CFD36007EF999 /* main.m */, + B9ADC6CC158CFD36007EF999 /* xdelta3-ios-test-Prefix.pch */, + ); + name = "Supporting Files"; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + B9ADC6B9158CFD36007EF999 /* xdelta3-ios-test */ = { + isa = PBXNativeTarget; + buildConfigurationList = B9ADC6DB158CFD36007EF999 /* Build configuration list for PBXNativeTarget "xdelta3-ios-test" */; + buildPhases = ( + B9ADC6B6158CFD36007EF999 /* Sources */, + B9ADC6B7158CFD36007EF999 /* Frameworks */, + B9ADC6B8158CFD36007EF999 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "xdelta3-ios-test"; + productName = "xdelta3-ios-test"; + productReference = B9ADC6BA158CFD36007EF999 /* xdelta3-ios-test.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + B9ADC6B1158CFD36007EF999 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0420; + }; + buildConfigurationList = B9ADC6B4158CFD36007EF999 /* Build configuration list for PBXProject "xdelta3-ios-test" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = B9ADC6AF158CFD36007EF999; + productRefGroup = B9ADC6BB158CFD36007EF999 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + B9ADC6B9158CFD36007EF999 /* xdelta3-ios-test */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + B9ADC6B8158CFD36007EF999 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + B9ADC6C9158CFD36007EF999 /* InfoPlist.strings in Resources */, + B9ADC6D2158CFD36007EF999 /* MainStoryboard_iPhone.storyboard in Resources */, + B9ADC6D5158CFD36007EF999 /* MainStoryboard_iPad.storyboard in Resources */, + B9313C3C158D11BA001C1F28 /* file_v1_to_v2.bin in Resources */, + B9313C3D158D11BA001C1F28 /* file_v1.bin in Resources */, + B9313C3E158D11BA001C1F28 /* file_v2.bin in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + B9ADC6B6158CFD36007EF999 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + B9ADC6CB158CFD36007EF999 /* main.m in Sources */, + B9ADC6CF158CFD36007EF999 /* Xd3iOSAppDelegate.m in Sources */, + B9ADC6D8158CFD36007EF999 /* Xd3iOSViewController.m in Sources */, + B9001B65158D008900B9E855 /* xdelta3.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXVariantGroup section */ + B9ADC6C7158CFD36007EF999 /* InfoPlist.strings */ = { + isa = PBXVariantGroup; + children = ( + B9ADC6C8158CFD36007EF999 /* en */, + ); + name = InfoPlist.strings; + sourceTree = ""; + }; + B9ADC6D0158CFD36007EF999 /* MainStoryboard_iPhone.storyboard */ = { + isa = PBXVariantGroup; + children = ( + B9ADC6D1158CFD36007EF999 /* en */, + ); + name = MainStoryboard_iPhone.storyboard; + sourceTree = ""; + }; + B9ADC6D3158CFD36007EF999 /* MainStoryboard_iPad.storyboard */ = { + isa = PBXVariantGroup; + children = ( + B9ADC6D4158CFD36007EF999 /* en */, + ); + name = MainStoryboard_iPad.storyboard; + sourceTree = ""; + }; +/* End PBXVariantGroup section */ + +/* Begin XCBuildConfiguration section */ + B9ADC6D9158CFD36007EF999 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_32_BIT)"; + CLANG_ENABLE_OBJC_ARC = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_INPUT_FILETYPE = sourcecode.c.objc; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "XD3_USE_LARGEFILE64=0", + "XD3_POSIX=1", + "EXTERNAL_COMPRESSION=0", + "NOT_MAIN=1", + "XD3_MAIN=1", + "SECONDARY_DJW=1", + "XD3_DEBUG=1", + "REGRESSION_TEST=1", + "SHELL_TESTS=0", + "SECONDARY_FGK=1", + "DEBUG=1", + "$(inherited)", + ); + GCC_SYMBOLS_PRIVATE_EXTERN = NO; + GCC_VERSION = com.apple.compilers.llvm.clang.1_0; + GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 5.0; + OTHER_CFLAGS = ( + "-DXD3_USE_LARGEFILE64=0", + "-DXD3_POSIX=1", + "-DEXTERNAL_COMPRESSION=0", + "-DNOT_MAIN=1", + "-DXD3_MAIN=1", + "-DSECONDARY_DJW=1", + "-DXD3_DEBUG=1", + "-DREGRESSION_TEST=1", + "-DSHELL_TESTS=0", + "-DSECONDARY_FGK=1", + ); + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + B9ADC6DA158CFD36007EF999 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_32_BIT)"; + CLANG_ENABLE_OBJC_ARC = YES; + "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; + COPY_PHASE_STRIP = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_INPUT_FILETYPE = sourcecode.c.objc; + GCC_PREPROCESSOR_DEFINITIONS = ( + "XD3_USE_LARGEFILE64=0", + "XD3_POSIX=1", + "EXTERNAL_COMPRESSION=0", + "NOT_MAIN=1", + "XD3_MAIN=1", + "SECONDARY_DJW=1", + "XD3_DEBUG=1", + "REGRESSION_TEST=1", + "SHELL_TESTS=0", + "SECONDARY_FGK=1", + ); + GCC_VERSION = com.apple.compilers.llvm.clang.1_0; + GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 5.0; + OTHER_CFLAGS = ( + "-DXD3_USE_LARGEFILE64=0", + "-DXD3_POSIX=1", + "-DEXTERNAL_COMPRESSION=0", + "-DNOT_MAIN=1", + "-DXD3_MAIN=1", + "-DSECONDARY_DJW=1", + "-DXD3_DEBUG=1", + "-DREGRESSION_TEST=1", + "-DSHELL_TESTS=0", + "-DSECONDARY_FGK=1", + ); + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + B9ADC6DC158CFD36007EF999 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = "xdelta3-ios-test/xdelta3-ios-test-Prefix.pch"; + INFOPLIST_FILE = "xdelta3-ios-test/xdelta3-ios-test-Info.plist"; + OTHER_CFLAGS = ""; + PRODUCT_NAME = "$(TARGET_NAME)"; + WRAPPER_EXTENSION = app; + }; + name = Debug; + }; + B9ADC6DD158CFD36007EF999 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = "xdelta3-ios-test/xdelta3-ios-test-Prefix.pch"; + INFOPLIST_FILE = "xdelta3-ios-test/xdelta3-ios-test-Info.plist"; + OTHER_CFLAGS = ""; + PRODUCT_NAME = "$(TARGET_NAME)"; + WRAPPER_EXTENSION = app; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + B9ADC6B4158CFD36007EF999 /* Build configuration list for PBXProject "xdelta3-ios-test" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + B9ADC6D9158CFD36007EF999 /* Debug */, + B9ADC6DA158CFD36007EF999 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + B9ADC6DB158CFD36007EF999 /* Build configuration list for PBXNativeTarget "xdelta3-ios-test" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + B9ADC6DC158CFD36007EF999 /* Debug */, + B9ADC6DD158CFD36007EF999 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = B9ADC6B1158CFD36007EF999 /* Project object */; +} diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h new file mode 100644 index 0000000..b421071 --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h @@ -0,0 +1,23 @@ +/* xdelta3 - delta compression tools and library -*- Mode: objc *-* + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#import + +@interface Xd3iOSAppDelegate : UIResponder + +@property (strong, nonatomic) UIWindow *window; + +@end diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m new file mode 100644 index 0000000..629faea --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m @@ -0,0 +1,68 @@ +/* xdelta3 - delta compression tools and library -*- Mode: objc *-* + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#import "Xd3iOSAppDelegate.h" + +@implementation Xd3iOSAppDelegate + +@synthesize window = _window; + +- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions +{ + // Override point for customization after application launch. + return YES; +} + +- (void)applicationWillResignActive:(UIApplication *)application +{ + /* + Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. + Use this method to pause ongoing tasks, disable timers, and throttle down OpenGL ES frame rates. Games should use this method to pause the game. + */ +} + +- (void)applicationDidEnterBackground:(UIApplication *)application +{ + /* + Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. + If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. + */ +} + +- (void)applicationWillEnterForeground:(UIApplication *)application +{ + /* + Called as part of the transition from the background to the inactive state; here you can undo many of the changes made on entering the background. + */ +} + +- (void)applicationDidBecomeActive:(UIApplication *)application +{ + /* + Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. + */ +} + +- (void)applicationWillTerminate:(UIApplication *)application +{ + /* + Called when the application is about to terminate. + Save data if appropriate. + See also applicationDidEnterBackground:. + */ +} + +@end diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h new file mode 100644 index 0000000..287a4be --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h @@ -0,0 +1,28 @@ +/* xdelta3 - delta compression tools and library -*- Mode: objc *-* + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#import + +@interface Xd3iOSViewController : UIViewController { + NSString *inputSeed; +} +- (IBAction)startTest:(id)sender; +@property (weak, nonatomic) IBOutlet UITextField *theSeed; +@property (weak, nonatomic) IBOutlet UITextView *theView; +@property (atomic, retain) NSMutableString *theOutput; +@property (nonatomic) BOOL inTest; + +@end diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m new file mode 100644 index 0000000..0db7e39 --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m @@ -0,0 +1,177 @@ +/* xdelta3 - delta compression tools and library -*- Mode: objc *-* + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#import "Xd3iOSViewController.h" +#include "xdelta3.h" +#include "dispatch/queue.h" +#include "Foundation/NSBundle.h" + +extern void (*xprintf_message_func)(const char* msg); +void print_to_view(const char* buf); +int xd3_main_cmdline(int argc, char **argv); +void do_localfile_test(void); +int compare_files(const char* file1, const char* file2); +Xd3iOSViewController *static_ptr; + +@implementation Xd3iOSViewController +@synthesize theSeed = _theSeed; +@synthesize theView = _theView; +@synthesize theOutput = _theOutput; +@synthesize inTest = _inTest; + +- (void)didReceiveMemoryWarning +{ + [super didReceiveMemoryWarning]; +} + +#pragma mark - View lifecycle + +- (void)viewDidLoad +{ + [super viewDidLoad]; +} + +- (void)viewDidUnload +{ + [self setTheSeed:nil]; + [self setTheView:nil]; + [self setTheView:nil]; + [super viewDidUnload]; +} + +- (void)viewWillAppear:(BOOL)animated +{ + [super viewWillAppear:animated]; +} + +- (void)viewDidAppear:(BOOL)animated +{ + [super viewDidAppear:animated]; +} + +- (void)viewWillDisappear:(BOOL)animated +{ + [super viewWillDisappear:animated]; +} + +- (void)viewDidDisappear:(BOOL)animated +{ + [super viewDidDisappear:animated]; +} + +- (BOOL)shouldAutorotateToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation +{ + switch (interfaceOrientation) { + case UIInterfaceOrientationPortrait: + case UIInterfaceOrientationPortraitUpsideDown: + return YES; + default: + break; + } + return NO; +} +- (BOOL)textFieldShouldReturn:(UITextField*)theTextField { + if (theTextField == self.theSeed) { + [theTextField resignFirstResponder]; + } + return YES; +} +- (IBAction)startTest:(id)sender { + if (self.inTest) { + return; + } + self.inTest = YES; + NSString *seedString = self.theSeed.text; + if ([seedString length] == 0) { + seedString = @"RFC3284"; + } + static_ptr = self; + xprintf_message_func = &print_to_view; + self.theOutput = [[NSMutableString alloc] initWithFormat:@"Starting test (seed=%@)\n", seedString]; + self.theView.text = self.theOutput; + dispatch_queue_t mq = dispatch_get_main_queue(); + dispatch_queue_t dq = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); + dispatch_async(dq, ^{ + do_localfile_test(); + char *argv[] = { "xdelta3", "test", NULL }; + xd3_main_cmdline(2, argv); + print_to_view("Finished unittest: success"); + dispatch_async(mq, ^{ + self.inTest = NO; + }); + }); +} + +void printns_to_view(NSString* ns); +void printns_to_view(NSString* ns) { + dispatch_queue_t mq = dispatch_get_main_queue(); + dispatch_async(mq, ^{ + if ([static_ptr.theOutput length] < 25000) { + [static_ptr.theOutput appendString:ns]; + } else { + static_ptr.theOutput = [[NSMutableString alloc] initWithString:ns]; + } + static_ptr.theView.text = static_ptr.theOutput; + CGSize size = static_ptr.theView.contentSize; + [static_ptr.theView scrollRectToVisible:CGRectMake(0, size.height - 1, 1, 1) animated:NO]; + }); +} + +void print_to_view(const char* buf) { + NSString *ns = [NSString stringWithCString:buf encoding:NSASCIIStringEncoding]; + printns_to_view(ns); +} + +void do_localfile_test(void) { + NSBundle *bundle; + bundle = [NSBundle mainBundle]; + NSString *localfile1 = [bundle pathForResource:@"file_v1" ofType:@"bin"]; + NSString *localfile2 = [bundle pathForResource:@"file_v2" ofType:@"bin"]; + NSString *localfiled = [bundle pathForResource:@"file_v1_to_v2" ofType:@"bin"]; + printns_to_view([localfile1 stringByAppendingString:@"\n"]); + printns_to_view([localfile2 stringByAppendingString:@"\n"]); + printns_to_view([localfiled stringByAppendingString:@"\n"]); + NSString *tmpdir = NSTemporaryDirectory(); + NSString *tmpfile = [tmpdir stringByAppendingPathComponent:@"delta.tmp"]; + printns_to_view([tmpfile stringByAppendingString:@"\n"]); + char *argv[] = { + "xdelta3", "-dfvv", "-s", + (char*)[localfile1 UTF8String], + (char*)[localfiled UTF8String], + (char*)[tmpfile UTF8String] }; + xd3_main_cmdline(6, argv); + + NSFileManager *filemgr; + + filemgr = [NSFileManager defaultManager]; + + if ([filemgr contentsEqualAtPath: localfile2 andPath: tmpfile] == YES) { + printns_to_view(@"File contents match\n"); + } else { + NSError *err1 = NULL; + NSDictionary *d1 = [filemgr attributesOfItemAtPath: tmpfile error: &err1]; + if (err1 != NULL) { + printns_to_view([@"File localfile2 could not stat %s\n" stringByAppendingString: tmpfile]); + } else { + printns_to_view([@"File contents do not match!!!! tmpfile size=" stringByAppendingString: + [[NSMutableString alloc] initWithFormat:@"%llu\n", [d1 fileSize]]]); + } + compare_files([localfile2 UTF8String], [tmpfile UTF8String]); + } + print_to_view("Finished localfile test.\n"); +} + +@end diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings new file mode 100644 index 0000000..477b28f --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings @@ -0,0 +1,2 @@ +/* Localized versions of Info.plist keys */ + diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard new file mode 100644 index 0000000..7581bbe --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard new file mode 100644 index 0000000..08b2175 --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1.bin b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1.bin new file mode 100644 index 0000000..39e0c17 --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1.bin @@ -0,0 +1,1378 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007, + * 2008, 2009, 2010. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* To know more about Xdelta, start by reading xdelta3.c. If you are + * ready to use the API, continue reading here. There are two + * interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen + * or so related calls. This interface is styled after Zlib. */ + +#ifndef _XDELTA3_H_ +#define _XDELTA3_H_ + +#include +#include +#include +#include + +/****************************************************************/ + +/* Default configured value of stream->winsize. If the program + * supplies xd3_encode_input() with data smaller than winsize the + * stream will automatically buffer the input, otherwise the input + * buffer is used directly. + */ +#ifndef XD3_DEFAULT_WINSIZE +#define XD3_DEFAULT_WINSIZE (1U << 23) +#endif + +/* Default total size of the source window used in xdelta3-main.h */ +#ifndef XD3_DEFAULT_SRCWINSZ +#define XD3_DEFAULT_SRCWINSZ (1U << 26) +#endif + +/* When Xdelta requests a memory allocation for certain buffers, it + * rounds up to units of at least this size. The code assumes (and + * asserts) that this is a power-of-two. */ +#ifndef XD3_ALLOCSIZE +#define XD3_ALLOCSIZE (1U<<14) +#endif + +/* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect + * decoders against malicious files. The decoder will never decode a + * window larger than this. If the file specifies VCD_TARGET the + * decoder may require two buffers of this size. + * + * 8-16MB is reasonable, probably don't need to go larger. */ +#ifndef XD3_HARDMAXWINSIZE +#define XD3_HARDMAXWINSIZE (1U<<24) +#endif +/* The IOPT_SIZE value sets the size of a buffer used to batch + * overlapping copy instructions before they are optimized by picking + * the best non-overlapping ranges. The larger this buffer, the + * longer a forced xd3_srcwin_setup() decision is held off. Setting + * this value to 0 causes an unlimited buffer to be used. */ +#ifndef XD3_DEFAULT_IOPT_SIZE +#define XD3_DEFAULT_IOPT_SIZE (1U<<15) +#endif + +/* The maximum distance backward to search for small matches */ +#ifndef XD3_DEFAULT_SPREVSZ +#define XD3_DEFAULT_SPREVSZ (1U<<18) +#endif + +/* The default compression level + */ +#ifndef XD3_DEFAULT_LEVEL +#define XD3_DEFAULT_LEVEL 3 +#endif + +#ifndef XD3_DEFAULT_SECONDARY_LEVEL +#define XD3_DEFAULT_SECONDARY_LEVEL 6 +#endif + +#ifndef XD3_USE_LARGEFILE64 +#define XD3_USE_LARGEFILE64 1 +#endif + +/* Sizes and addresses within VCDIFF windows are represented as usize_t + * + * For source-file offsets and total file sizes, total input and + * output counts, the xoff_t type is used. The decoder and encoder + * generally check for overflow of the xoff_t size (this is tested at + * the 32bit boundary [xdelta3-test.h]). + */ +#ifndef _WIN32 +#include +typedef unsigned int usize_t; +#else +#define WIN32_LEAN_AND_MEAN +#if XD3_USE_LARGEFILE64 +/* 64 bit file offsets: uses GetFileSizeEx and SetFilePointerEx. + * requires Win2000 or newer version of WinNT */ +#define WINVER 0x0500 +#define _WIN32_WINNT 0x0500 +#else +/* 32 bit (DWORD) file offsets: uses GetFileSize and + * SetFilePointer. compatible with win9x-me and WinNT4 */ +#define WINVER 0x0400 +#define _WIN32_WINNT 0x0400 +#endif +#include +typedef unsigned int usize_t; +#ifdef _MSC_VER +#define inline +typedef signed int ssize_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long uint32_t; +typedef ULONGLONG uint64_t; +#else +/* mingw32, lcc and watcom provide a proper header */ +#include +#endif +#endif + +/* TODO: note that SIZEOF_USIZE_T is never set to 8, although it should be for + * a 64bit platform. OTOH, may be that using 32bits is appropriate even on a + * 64bit platform because we allocate large arrays of these values. */ +#if XD3_USE_LARGEFILE64 +#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */ +#ifndef _LARGEFILE_SOURCE +#define _LARGEFILE_SOURCE +#endif +#ifndef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 64 +#endif + +typedef uint64_t xoff_t; +#define SIZEOF_XOFF_T 8 +#define SIZEOF_USIZE_T 4 +#ifndef WIN32 +#define Q "ll" +#else +#define Q "I64" +#endif +#else +typedef uint32_t xoff_t; +#define SIZEOF_XOFF_T 4 +#define SIZEOF_USIZE_T 4 +#define Q +#endif + +#define USE_UINT32 (SIZEOF_USIZE_T == 4 || \ + SIZEOF_XOFF_T == 4 || REGRESSION_TEST) +#define USE_UINT64 (SIZEOF_USIZE_T == 8 || \ + SIZEOF_XOFF_T == 8 || REGRESSION_TEST) + +/* TODO: probably should do something better here. */ +#ifndef UNALIGNED_OK +#if defined(__i386__) || defined(__i486__) || defined(__i586__) || \ + defined(__i686__) || defined(_X86_) || defined(__x86_64__) +#define UNALIGNED_OK 1 +#else +#define UNALIGNED_OK 0 +#endif +#endif + +/**********************************************************************/ + +/* Whether to build the encoder, otherwise only build the decoder. */ +#ifndef XD3_ENCODER +#define XD3_ENCODER 1 +#endif + +/* The code returned when main() fails, also defined in system + includes. */ +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +/* REGRESSION TEST enables the "xdelta3 test" command, which runs a + series of self-tests. */ +#ifndef REGRESSION_TEST +#define REGRESSION_TEST 0 +#endif + +/* XD3_DEBUG=1 enables assertions and various statistics. Levels > 1 + * enable some additional output only useful during development and + * debugging. */ +#ifndef XD3_DEBUG +#define XD3_DEBUG 0 +#endif + +#ifndef PYTHON_MODULE +#define PYTHON_MODULE 0 +#endif + +#ifndef SWIG_MODULE +#define SWIG_MODULE 0 +#endif + +/* There are three string matching functions supplied: one fast, one + * slow (default), and one soft-configurable. To disable any of + * these, use the following definitions. */ +#ifndef XD3_BUILD_SLOW +#define XD3_BUILD_SLOW 1 +#endif +#ifndef XD3_BUILD_FAST +#define XD3_BUILD_FAST 1 +#endif +#ifndef XD3_BUILD_FASTER +#define XD3_BUILD_FASTER 1 +#endif +#ifndef XD3_BUILD_FASTEST +#define XD3_BUILD_FASTEST 1 +#endif +#ifndef XD3_BUILD_SOFT +#define XD3_BUILD_SOFT 1 +#endif +#ifndef XD3_BUILD_DEFAULT +#define XD3_BUILD_DEFAULT 1 +#endif + +#if XD3_DEBUG +#include +#endif + +/* XPRINT. Debug output and VCDIFF_TOOLS functions report to stderr. + * I have used an irregular style to abbreviate [fprintf(stderr, "] as + * [DP(RINT "]. */ +#define DP fprintf +#define RINT stderr, + +typedef struct _xd3_stream xd3_stream; +typedef struct _xd3_source xd3_source; +typedef struct _xd3_hash_cfg xd3_hash_cfg; +typedef struct _xd3_smatcher xd3_smatcher; +typedef struct _xd3_rinst xd3_rinst; +typedef struct _xd3_dinst xd3_dinst; +typedef struct _xd3_hinst xd3_hinst; +typedef struct _xd3_winst xd3_winst; +typedef struct _xd3_rpage xd3_rpage; +typedef struct _xd3_addr_cache xd3_addr_cache; +typedef struct _xd3_output xd3_output; +typedef struct _xd3_desect xd3_desect; +typedef struct _xd3_iopt_buflist xd3_iopt_buflist; +typedef struct _xd3_rlist xd3_rlist; +typedef struct _xd3_sec_type xd3_sec_type; +typedef struct _xd3_sec_cfg xd3_sec_cfg; +typedef struct _xd3_sec_stream xd3_sec_stream; +typedef struct _xd3_config xd3_config; +typedef struct _xd3_code_table_desc xd3_code_table_desc; +typedef struct _xd3_code_table_sizes xd3_code_table_sizes; +typedef struct _xd3_slist xd3_slist; +typedef struct _xd3_whole_state xd3_whole_state; +typedef struct _xd3_wininfo xd3_wininfo; + +/* The stream configuration has three callbacks functions, all of + * which may be supplied with NULL values. If config->getblk is + * provided as NULL, the stream returns XD3_GETSRCBLK. */ + +typedef void* (xd3_alloc_func) (void *opaque, + usize_t items, + usize_t size); +typedef void (xd3_free_func) (void *opaque, + void *address); + +typedef int (xd3_getblk_func) (xd3_stream *stream, + xd3_source *source, + xoff_t blkno); + +/* These are internal functions to delay construction of encoding + * tables and support alternate code tables. See the comments & code + * enabled by GENERIC_ENCODE_TABLES. */ + +typedef const xd3_dinst* (xd3_code_table_func) (void); +typedef int (xd3_comp_table_func) (xd3_stream *stream, + const uint8_t **data, + usize_t *size); + + + +#if XD3_DEBUG +#define XD3_ASSERT(x) \ + do { if (! (x)) { DP(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \ + abort (); } } while (0) +#else +#define XD3_ASSERT(x) (void)0 +#endif /* XD3_DEBUG */ + +#ifdef __GNUC__ +#ifndef max +#define max(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) +#endif /* __GNUC__ */ + +#ifndef min +#define min(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) +#endif +#else /* __GNUC__ */ +#ifndef max +#define max(x,y) ((x) < (y) ? (y) : (x)) +#endif +#ifndef min +#define min(x,y) ((x) < (y) ? (x) : (y)) +#endif +#endif /* __GNUC__ */ + +/**************************************************************** + PUBLIC ENUMS + ******************************************************************/ + +/* These are the five ordinary status codes returned by the + * xd3_encode_input() and xd3_decode_input() state machines. */ +typedef enum { + + /* An application must be prepared to handle these five return + * values from either xd3_encode_input or xd3_decode_input, except + * in the case of no-source compression, in which case XD3_GETSRCBLK + * is never returned. More detailed comments for these are given in + * xd3_encode_input and xd3_decode_input comments, below. */ + XD3_INPUT = -17703, /* need input */ + XD3_OUTPUT = -17704, /* have output */ + XD3_GETSRCBLK = -17705, /* need a block of source input (with no + * xd3_getblk function), a chance to do + * non-blocking read. */ + XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF & + first window header */ + XD3_WINSTART = -17707, /* notification: returned before a window is + * processed, giving a chance to + * XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that + * window. */ + XD3_WINFINISH = -17708, /* notification: returned after + encode/decode & output for a window */ + XD3_TOOFARBACK = -17709, /* (encoder only) may be returned by + getblk() if the block is too old */ + XD3_INTERNAL = -17710, /* internal error */ + XD3_INVALID = -17711, /* invalid config */ + XD3_INVALID_INPUT = -17712, /* invalid input/decoder error */ + XD3_NOSECOND = -17713, /* when secondary compression finds no + improvement. */ + XD3_UNIMPLEMENTED = -17714, /* currently VCD_TARGET */ +} xd3_rvalues; + +/* special values in config->flags */ +typedef enum +{ + XD3_JUST_HDR = (1 << 1), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_SKIP_WINDOW = (1 << 2), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_SKIP_EMIT = (1 << 3), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_FLUSH = (1 << 4), /* flush the stream buffer to + prepare for + xd3_stream_close(). */ + + XD3_SEC_DJW = (1 << 5), /* use DJW static huffman */ + XD3_SEC_FGK = (1 << 6), /* use FGK adaptive huffman */ + XD3_SEC_TYPE = (XD3_SEC_DJW | XD3_SEC_FGK), + + XD3_SEC_NODATA = (1 << 7), /* disable secondary compression of + the data section. */ + XD3_SEC_NOINST = (1 << 8), /* disable secondary compression of + the inst section. */ + XD3_SEC_NOADDR = (1 << 9), /* disable secondary compression of + the addr section. */ + + XD3_SEC_NOALL = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR), + + XD3_ADLER32 = (1 << 10), /* enable checksum computation in + the encoder. */ + XD3_ADLER32_NOVER = (1 << 11), /* disable checksum verification in + the decoder. */ + + XD3_ALT_CODE_TABLE = (1 << 12), /* for testing th + e alternate code table encoding. */ + + XD3_NOCOMPRESS = (1 << 13), /* disable ordinary data + * compression feature, only search + * the source, not the target. */ + XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass + * algorithm", instead use greedy + * matching. Greedy is off by + * default. */ + XD3_ADLER32_RECODE = (1 << 15), /* used by "recode". */ + + /* 4 bits to set the compression level the same as the command-line + * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag, + * and is independent of compression level). This is for + * convenience, especially with xd3_encode_memory(). */ + + XD3_COMPLEVEL_SHIFT = 20, /* 20 - 24 */ + XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_2 = (2 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT), + +} xd3_flags; + +/* The values of this enumeration are set in xd3_config using the + * smatch_cfg variable. It can be set to default, slow, fast, etc., + * and soft. */ +typedef enum +{ + XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits, + else default. */ + XD3_SMATCH_SLOW = 1, + XD3_SMATCH_FAST = 2, + XD3_SMATCH_FASTER = 3, + XD3_SMATCH_FASTEST = 4, + XD3_SMATCH_SOFT = 5, +} xd3_smatch_cfg; + +/********************************************************************* + PRIVATE ENUMS +**********************************************************************/ + +/* stream->match_state is part of the xd3_encode_input state machine + * for source matching: + * + * 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching + * 2. this state spans encoder windows: a match and end-of-window + * will continue in the next 3. the initial target byte and source + * byte are a presumed match, to avoid some computation in case the + * inputs are identical. + */ +typedef enum { + + MATCH_TARGET = 0, /* in this state, attempt to match the start of + * the target with the previously set source + * address (initially 0). */ + MATCH_BACKWARD = 1, /* currently expanding a match backward in the + source/target. */ + MATCH_FORWARD = 2, /* currently expanding a match forward in the + source/target. */ + MATCH_SEARCHING = 3, /* currently searching for a match. */ + +} xd3_match_state; + +/* The xd3_encode_input state machine steps through these states in + * the following order. The matcher is reentrant and returns + * XD3_INPUT whenever it requires more data. After receiving + * XD3_INPUT, if the application reads EOF it should call + * xd3_stream_close(). + */ +typedef enum { + + ENC_INIT = 0, /* xd3_encode_input has never been called. */ + ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */ + ENC_SEARCH = 2, /* currently searching for matches. */ + ENC_INSTR = 3, /* currently formatting output. */ + ENC_FLUSH = 4, /* currently emitting output. */ + ENC_POSTOUT = 5, /* after an output section. */ + ENC_POSTWIN = 6, /* after all output sections. */ + ENC_ABORTED = 7, /* abort. */ +} xd3_encode_state; + +/* The xd3_decode_input state machine steps through these states in + * the following order. The matcher is reentrant and returns + * XD3_INPUT whenever it requires more data. After receiving + * XD3_INPUT, if the application reads EOF it should call + * xd3_stream_close(). + * + * 0-8: the VCDIFF header + * 9-18: the VCDIFF window header + * 19-21: the three primary sections: data, inst, addr + * 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK, + * 23: return XD3_WINFINISH, set state=9 to decode more input + */ +typedef enum { + + DEC_VCHEAD = 0, /* VCDIFF header */ + DEC_HDRIND = 1, /* header indicator */ + + DEC_SECONDID = 2, /* secondary compressor ID */ + + DEC_TABLEN = 3, /* code table length */ + DEC_NEAR = 4, /* code table near */ + DEC_SAME = 5, /* code table same */ + DEC_TABDAT = 6, /* code table data */ + + DEC_APPLEN = 7, /* application data length */ + DEC_APPDAT = 8, /* application data */ + + DEC_WININD = 9, /* window indicator */ + + DEC_CPYLEN = 10, /* copy window length */ + DEC_CPYOFF = 11, /* copy window offset */ + + DEC_ENCLEN = 12, /* length of delta encoding */ + DEC_TGTLEN = 13, /* length of target window */ + DEC_DELIND = 14, /* delta indicator */ + + DEC_DATALEN = 15, /* length of ADD+RUN data */ + DEC_INSTLEN = 16, /* length of instruction data */ + DEC_ADDRLEN = 17, /* length of address data */ + + DEC_CKSUM = 18, /* window checksum */ + + DEC_DATA = 19, /* data section */ + DEC_INST = 20, /* instruction section */ + DEC_ADDR = 21, /* address section */ + + DEC_EMIT = 22, /* producing data */ + + DEC_FINISH = 23, /* window finished */ + + DEC_ABORTED = 24, /* xd3_abort_stream */ +} xd3_decode_state; + +/************************************************************ + internal types + ************************************************************/ + +/* instruction lists used in the IOPT buffer */ +struct _xd3_rlist +{ + xd3_rlist *next; + xd3_rlist *prev; +}; + +/* the raw encoding of an instruction used in the IOPT buffer */ +struct _xd3_rinst +{ + uint8_t type; + uint8_t xtra; + uint8_t code1; + uint8_t code2; + usize_t pos; + usize_t size; + xoff_t addr; + xd3_rlist link; +}; + +/* the code-table form of an single- or double-instruction */ +struct _xd3_dinst +{ + uint8_t type1; + uint8_t size1; + uint8_t type2; + uint8_t size2; +}; + +/* the decoded form of a single (half) instruction. */ +struct _xd3_hinst +{ + uint8_t type; + uint32_t size; /* TODO: why decode breaks if this is usize_t? */ + uint32_t addr; /* TODO: why decode breaks if this is usize_t? */ +}; + +/* the form of a whole-file instruction */ +struct _xd3_winst +{ + uint8_t type; /* RUN, ADD, COPY */ + uint8_t mode; /* 0, VCD_SOURCE, VCD_TARGET */ + usize_t size; + xoff_t addr; + xoff_t position; /* absolute position of this inst */ +}; + +/* used by the encoder to buffer output in sections. list of blocks. */ +struct _xd3_output +{ + uint8_t *base; + usize_t next; + usize_t avail; + xd3_output *next_page; +}; + +/* used by the decoder to buffer input in sections. */ +struct _xd3_desect +{ + const uint8_t *buf; + const uint8_t *buf_max; + uint32_t size; /* TODO: why decode breaks if this is usize_t? */ + usize_t pos; + + /* used in xdelta3-decode.h */ + uint8_t *copied1; + usize_t alloc1; + + /* used in xdelta3-second.h */ + uint8_t *copied2; + usize_t alloc2; +}; + +/* the VCDIFF address cache, see the RFC */ +struct _xd3_addr_cache +{ + usize_t s_near; + usize_t s_same; + usize_t next_slot; /* the circular index for near */ + usize_t *near_array; /* array of size s_near */ + usize_t *same_array; /* array of size s_same*256 */ +}; + +/* the IOPT buffer list is just a list of buffers, which may be allocated + * during encode when using an unlimited buffer. */ +struct _xd3_iopt_buflist +{ + xd3_rinst *buffer; + xd3_iopt_buflist *next; +}; + +/* This is the record of a pre-compiled configuration, a subset of + xd3_config. */ +struct _xd3_smatcher +{ + const char *name; + int (*string_match) (xd3_stream *stream); + usize_t large_look; + usize_t large_step; + usize_t small_look; + usize_t small_chain; + usize_t small_lchain; + usize_t max_lazy; + usize_t long_enough; +}; + +/* hash table size & power-of-two hash function. */ +struct _xd3_hash_cfg +{ + usize_t size; + usize_t shift; + usize_t mask; +}; + +/* the sprev list */ +struct _xd3_slist +{ + usize_t last_pos; +}; + +/* window info (for whole state) */ +struct _xd3_wininfo { + xoff_t offset; + usize_t length; + uint32_t adler32; +}; + +/* whole state for, e.g., merge */ +struct _xd3_whole_state { + usize_t addslen; + uint8_t *adds; + usize_t adds_alloc; + + usize_t instlen; + xd3_winst *inst; + usize_t inst_alloc; + + usize_t wininfolen; + xd3_wininfo *wininfo; + usize_t wininfo_alloc; + + xoff_t length; +}; + +/******************************************************************** + public types + *******************************************************************/ + +/* Settings for the secondary compressor. */ +struct _xd3_sec_cfg +{ + int data_type; /* Which section. (set automatically) */ + usize_t ngroups; /* Number of DJW Huffman groups. */ + usize_t sector_size; /* Sector size. */ + int inefficient; /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */ +}; + +/* This is the user-visible stream configuration. */ +struct _xd3_config +{ + usize_t winsize; /* The encoder window size. */ + usize_t sprevsz; /* How far back small string + matching goes */ + usize_t iopt_size; /* entries in the + instruction-optimizing + buffer */ + usize_t srcwin_maxsz; /* srcwin_size grows by a factor + of 2 when no matches are + found. encoder will not seek + back further than this. */ + + xd3_getblk_func *getblk; /* The three callbacks. */ + xd3_alloc_func *alloc; + xd3_free_func *freef; + void *opaque; /* Not used. */ + int flags; /* stream->flags are initialized + * from xd3_config & never + * modified by the library. Use + * xd3_set_flags to modify flags + * settings mid-stream. */ + + xd3_sec_cfg sec_data; /* Secondary compressor config: data */ + xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ + xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ + + xd3_smatch_cfg smatch_cfg; /* See enum: use fields below for + soft config */ + xd3_smatcher smatcher_soft; +}; + +/* The primary source file object. You create one of these objects and + * initialize the first four fields. This library maintains the next + * 5 fields. The configured getblk implementation is responsible for + * setting the final 3 fields when called (and/or when XD3_GETSRCBLK + * is returned). + */ +struct _xd3_source +{ + /* you set */ + usize_t blksize; /* block size */ + const char *name; /* its name, for debug/print + purposes */ + void *ioh; /* opaque handle */ + + /* getblk sets */ + xoff_t curblkno; /* current block number: client + sets after getblk request */ + usize_t onblk; /* number of bytes on current + block: client sets, must be >= 0 + and <= blksize */ + const uint8_t *curblk; /* current block array: client + sets after getblk request */ + + /* xd3 sets */ + usize_t srclen; /* length of this source window */ + xoff_t srcbase; /* offset of this source window + in the source itself */ + int shiftby; /* for power-of-two blocksizes */ + int maskby; /* for power-of-two blocksizes */ + xoff_t cpyoff_blocks; /* offset of dec_cpyoff in blocks */ + usize_t cpyoff_blkoff; /* offset of copy window in + blocks, remainder */ + xoff_t getblkno; /* request block number: xd3 sets + current getblk request */ + + /* See xd3_getblk() */ + xoff_t max_blkno; /* Maximum block, if eof is known, + * otherwise, equals frontier_blkno + * (initially 0). */ + xoff_t frontier_blkno; /* If eof is unknown, the next + * source position to be read. + * Otherwise, equal to + * max_blkno. */ + usize_t onlastblk; /* Number of bytes on max_blkno */ + int eof_known; /* Set to true when the first + * partial block is read. */ +}; + +/* The primary xd3_stream object, used for encoding and decoding. You + * may access only two fields: avail_out, next_out. Use the methods + * above to operate on xd3_stream. */ +struct _xd3_stream +{ + /* input state */ + const uint8_t *next_in; /* next input byte */ + usize_t avail_in; /* number of bytes available at + next_in */ + xoff_t total_in; /* how many bytes in */ + + /* output state */ + uint8_t *next_out; /* next output byte */ + usize_t avail_out; /* number of bytes available at + next_out */ + usize_t space_out; /* total out space */ + xoff_t current_window; /* number of windows encoded/decoded */ + xoff_t total_out; /* how many bytes out */ + + /* to indicate an error, xd3 sets */ + const char *msg; /* last error message, NULL if + no error */ + + /* source configuration */ + xd3_source *src; /* source array */ + + /* encoder memory configuration */ + usize_t winsize; /* suggested window size */ + usize_t sprevsz; /* small string, previous window + size (power of 2) */ + usize_t sprevmask; /* small string, previous window + size mask */ + usize_t iopt_size; + usize_t iopt_unlimited; + usize_t srcwin_maxsz; + + /* general configuration */ + xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */ + xd3_alloc_func *alloc; /* malloc function */ + xd3_free_func *free; /* free function */ + void* opaque; /* private data object passed to + alloc, free, and getblk */ + int flags; /* various options */ + + /* secondary compressor configuration */ + xd3_sec_cfg sec_data; /* Secondary compressor config: data */ + xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ + xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ + + xd3_smatcher smatcher; + + usize_t *large_table; /* table of large checksums */ + xd3_hash_cfg large_hash; /* large hash config */ + + usize_t *small_table; /* table of small checksums */ + xd3_slist *small_prev; /* table of previous offsets, + circular linked list */ + int small_reset; /* true if small table should + be reset */ + + xd3_hash_cfg small_hash; /* small hash config */ + xd3_addr_cache acache; /* the vcdiff address cache */ + xd3_encode_state enc_state; /* state of the encoder */ + + usize_t taroff; /* base offset of the target input */ + usize_t input_position; /* current input position */ + usize_t min_match; /* current minimum match + length, avoids redundent + matches */ + usize_t unencoded_offset; /* current input, first + * unencoded offset. this value + * is <= the first instruction's + * position in the iopt buffer, + * if there is at least one + * match in the buffer. */ + + // SRCWIN + // these variables plus srcwin_maxsz above (set by config) + int srcwin_decided; /* boolean: true if srclen and + srcbase have been + decided. */ + int srcwin_decided_early; /* boolean: true if srclen + and srcbase were + decided early. */ + xoff_t srcwin_cksum_pos; /* Source checksum position */ + + // MATCH + xd3_match_state match_state; /* encoder match state */ + xoff_t match_srcpos; /* current match source + position relative to + srcbase */ + xoff_t match_last_srcpos; /* previously attempted + * srcpos, to avoid loops. */ + xoff_t match_minaddr; /* smallest matching address to + * set window params (reset each + * window xd3_encode_reset) */ + xoff_t match_maxaddr; /* largest matching address to + * set window params (reset each + * window xd3_encode_reset) */ + usize_t match_back; /* match extends back so far */ + usize_t match_maxback; /* match extends back maximum */ + usize_t match_fwd; /* match extends forward so far */ + usize_t match_maxfwd; /* match extends forward maximum */ + + xoff_t maxsrcaddr; /* address of the last source + match (across windows) */ + + uint8_t *buf_in; /* for saving buffered input */ + usize_t buf_avail; /* amount of saved input */ + const uint8_t *buf_leftover; /* leftover content of next_in + (i.e., user's buffer) */ + usize_t buf_leftavail; /* amount of leftover content */ + + xd3_output *enc_current; /* current output buffer */ + xd3_output *enc_free; /* free output buffers */ + xd3_output *enc_heads[4]; /* array of encoded outputs: + head of chain */ + xd3_output *enc_tails[4]; /* array of encoded outputs: + tail of chain */ + uint32_t recode_adler32; /* set the adler32 checksum + * during "recode". */ + + xd3_rlist iopt_used; /* instruction optimizing buffer */ + xd3_rlist iopt_free; + xd3_rinst *iout; /* next single instruction */ + xd3_iopt_buflist *iopt_alloc; + + const uint8_t *enc_appheader; /* application header to encode */ + usize_t enc_appheadsz; /* application header size */ + + /* decoder stuff */ + xd3_decode_state dec_state; /* current DEC_XXX value */ + usize_t dec_hdr_ind; /* VCDIFF header indicator */ + usize_t dec_win_ind; /* VCDIFF window indicator */ + usize_t dec_del_ind; /* VCDIFF delta indicator */ + + uint8_t dec_magic[4]; /* First four bytes */ + usize_t dec_magicbytes; /* Magic position. */ + + usize_t dec_secondid; /* Optional secondary compressor ID. */ + + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_codetblsz; /* Optional code table: length. */ + uint8_t *dec_codetbl; /* Optional code table: storage. */ + usize_t dec_codetblbytes; /* Optional code table: position. */ + + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_appheadsz; /* Optional application header: + size. */ + uint8_t *dec_appheader; /* Optional application header: + storage */ + usize_t dec_appheadbytes; /* Optional application header: + position. */ + + usize_t dec_cksumbytes; /* Optional checksum: position. */ + uint8_t dec_cksum[4]; /* Optional checksum: storage. */ + uint32_t dec_adler32; /* Optional checksum: value. */ + + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_cpylen; /* length of copy window + (VCD_SOURCE or VCD_TARGET) */ + xoff_t dec_cpyoff; /* offset of copy window + (VCD_SOURCE or VCD_TARGET) */ + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_enclen; /* length of delta encoding */ + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_tgtlen; /* length of target window */ + +#if USE_UINT64 + uint64_t dec_64part; /* part of a decoded uint64_t */ +#endif +#if USE_UINT32 + uint32_t dec_32part; /* part of a decoded uint32_t */ +#endif + + xoff_t dec_winstart; /* offset of the start of + current target window */ + xoff_t dec_window_count; /* == current_window + 1 in + DEC_FINISH */ + usize_t dec_winbytes; /* bytes of the three sections + so far consumed */ + usize_t dec_hdrsize; /* VCDIFF + app header size */ + + const uint8_t *dec_tgtaddrbase; /* Base of decoded target + addresses (addr >= + dec_cpylen). */ + const uint8_t *dec_cpyaddrbase; /* Base of decoded copy + addresses (addr < + dec_cpylen). */ + + usize_t dec_position; /* current decoder position + counting the cpylen + offset */ + usize_t dec_maxpos; /* maximum decoder position + counting the cpylen + offset */ + xd3_hinst dec_current1; /* current instruction */ + xd3_hinst dec_current2; /* current instruction */ + + uint8_t *dec_buffer; /* Decode buffer */ + uint8_t *dec_lastwin; /* In case of VCD_TARGET, the + last target window. */ + usize_t dec_lastlen; /* length of the last target + window */ + xoff_t dec_laststart; /* offset of the start of last + target window */ + usize_t dec_lastspace; /* allocated space of last + target window, for reuse */ + + xd3_desect inst_sect; /* staging area for decoding + window sections */ + xd3_desect addr_sect; + xd3_desect data_sect; + + xd3_code_table_func *code_table_func; + xd3_comp_table_func *comp_table_func; + const xd3_dinst *code_table; + const xd3_code_table_desc *code_table_desc; + xd3_dinst *code_table_alloc; + + /* secondary compression */ + const xd3_sec_type *sec_type; + xd3_sec_stream *sec_stream_d; + xd3_sec_stream *sec_stream_i; + xd3_sec_stream *sec_stream_a; + + /* state for reconstructing whole files (e.g., for merge), this only + * supports loading USIZE_T_MAX instructions, adds, etc. */ + xd3_whole_state whole_target; + + /* statistics */ + xoff_t n_scpy; + xoff_t n_tcpy; + xoff_t n_add; + xoff_t n_run; + + xoff_t l_scpy; + xoff_t l_tcpy; + xoff_t l_add; + xoff_t l_run; + + usize_t i_slots_used; + +#if XD3_DEBUG + usize_t large_ckcnt; + + /* memory usage */ + usize_t alloc_cnt; + usize_t free_cnt; +#endif +}; + +/************************************************************************** + PUBLIC FUNCTIONS + **************************************************************************/ + +/* This function configures an xd3_stream using the provided in-memory + * input buffer, source buffer, output buffer, and flags. The output + * array must be large enough or else ENOSPC will be returned. This + * is the simplest in-memory encoding interface. */ +int xd3_encode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buffer, + usize_t *output_size, + usize_t avail_output, + int flags); + +/* The reverse of xd3_encode_memory. */ +int xd3_decode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buf, + usize_t *output_size, + usize_t avail_output, + int flags); + +/* This function encodes an in-memory input using a pre-configured + * xd3_stream. This allows the caller to set a variety of options + * which are not available in the xd3_encode/decode_memory() + * functions. + * + * The output array must be large enough to hold the output or else + * ENOSPC is returned. The source (if any) should be set using + * xd3_set_source_and_size() with a single-block xd3_source. This + * calls the underlying non-blocking interfaces, + * xd3_encode/decode_input(), handling the necessary input/output + * states. This method may be considered a reference for any + * application using xd3_encode_input() directly. + * + * xd3_stream stream; + * xd3_config config; + * xd3_source src; + * + * memset (& src, 0, sizeof (src)); + * memset (& stream, 0, sizeof (stream)); + * memset (& config, 0, sizeof (config)); + * + * if (source != NULL) + * { + * src.size = source_size; + * src.blksize = source_size; + * src.curblkno = 0; + * src.onblk = source_size; + * src.curblk = source; + * xd3_set_source(&stream, &src); + * } + * + * config.flags = flags; + * config.srcwin_maxsz = source_size; + * config.winsize = input_size; + * + * ... set smatcher, appheader, encoding-table, compression-level, etc. + * + * xd3_config_stream(&stream, &config); + * xd3_encode_stream(&stream, ...); + * xd3_free_stream(&stream); + */ +int xd3_encode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t avail_output); + +/* The reverse of xd3_encode_stream. */ +int xd3_decode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t avail_size); + +/* This is the non-blocking interface. + * + * Handling input and output states is the same for encoding or + * decoding using the xd3_avail_input() and xd3_consume_output() + * routines, inlined below. + * + * Return values: + * + * XD3_INPUT: the process requires more input: call + * xd3_avail_input() then repeat + * + * XD3_OUTPUT: the process has more output: read stream->next_out, + * stream->avail_out, then call xd3_consume_output(), + * then repeat + * + * XD3_GOTHEADER: (decoder-only) notification returned following the + * VCDIFF header and first window header. the decoder + * may use the header to configure itself. + * + * XD3_WINSTART: a general notification returned once for each + * window except the 0-th window, which is implied by + * XD3_GOTHEADER. It is recommended to use a + * switch-stmt such as: + * + * ... + * again: + * switch ((ret = xd3_decode_input (stream))) { + * case XD3_GOTHEADER: { + * assert(stream->current_window == 0); + * stuff; + * } + * // fallthrough + * case XD3_WINSTART: { + * something(stream->current_window); + * goto again; + * } + * ... + * + * XD3_WINFINISH: a general notification, following the complete + * input & output of a window. at this point, + * stream->total_in and stream->total_out are consistent + * for either encoding or decoding. + * + * XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value + * is returned to initiate a non-blocking source read. + */ +int xd3_decode_input (xd3_stream *stream); +int xd3_encode_input (xd3_stream *stream); + +/* The xd3_config structure is used to initialize a stream - all data + * is copied into stream so config may be a temporary variable. See + * the [documentation] or comments on the xd3_config structure. */ +int xd3_config_stream (xd3_stream *stream, + xd3_config *config); + +/* Since Xdelta3 doesn't open any files, xd3_close_stream is just an + * error check that the stream is in a proper state to be closed: this + * means the encoder is flushed and the decoder is at a window + * boundary. The application is responsible for freeing any of the + * resources it supplied. */ +int xd3_close_stream (xd3_stream *stream); + +/* This arranges for closes the stream to succeed. Does not free the + * stream.*/ +void xd3_abort_stream (xd3_stream *stream); + +/* xd3_free_stream frees all memory allocated for the stream. The + * application is responsible for freeing any of the resources it + * supplied. */ +void xd3_free_stream (xd3_stream *stream); + +/* This function informs the encoder or decoder that source matching + * (i.e., delta-compression) is possible. For encoding, this should + * be called before the first xd3_encode_input. A NULL source is + * ignored. For decoding, this should be called before the first + * window is decoded, but the appheader may be read first + * (XD3_GOTHEADER). After decoding the header, call xd3_set_source() + * if you have a source file. Note: if (stream->dec_win_ind & VCD_SOURCE) + * is true, it means the first window expects there to be a source file. + */ +int xd3_set_source (xd3_stream *stream, + xd3_source *source); + +/* If the source size is known, call this instead of xd3_set_source(). + * to avoid having stream->getblk called (and/or to avoid XD3_GETSRCBLK). + * + * Follow these steps: + xd3_source source; + memset(&source, 0, sizeof(source)); + source.blksize = size; + source.onblk = size; + source.curblk = buf; + source.curblkno = 0; + int ret = xd3_set_source_and_size(&stream, &source, size); + ... + */ +int xd3_set_source_and_size (xd3_stream *stream, + xd3_source *source, + xoff_t source_size); + +/* This should be called before the first call to xd3_encode_input() + * to include application-specific data in the VCDIFF header. */ +void xd3_set_appheader (xd3_stream *stream, + const uint8_t *data, + usize_t size); + +/* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. + * For convenience, the decoder always adds a single byte padding to + * the end of the application header, which is set to zero in case the + * application header is a string. */ +int xd3_get_appheader (xd3_stream *stream, + uint8_t **data, + usize_t *size); + +/* To generate a VCDIFF encoded delta with xd3_encode_init() from + * another format, use: + * + * xd3_encode_init_partial() -- initialze encoder state (w/o hash tables) + * xd3_init_cache() -- reset VCDIFF address cache + * xd3_found_match() -- to report a copy instruction + * + * set stream->enc_state to ENC_INSTR and call xd3_encode_input as usual. + */ +int xd3_encode_init_partial (xd3_stream *stream); +void xd3_init_cache (xd3_addr_cache* acache); +int xd3_found_match (xd3_stream *stream, + usize_t pos, usize_t size, + xoff_t addr, int is_source); + +/* Gives an error string for xdelta3-speficic errors, returns NULL for + system errors */ +const char* xd3_strerror (int ret); + +/* For convenience, zero & initialize the xd3_config structure with + specified flags. */ +static inline +void xd3_init_config (xd3_config *config, + int flags) +{ + memset (config, 0, sizeof (*config)); + config->flags = flags; +} + +/* This supplies some input to the stream. + * + * For encoding, if the input is larger than the configured window + * size (xd3_config.winsize), the entire input will be consumed and + * encoded anyway. If you wish to strictly limit the window size, + * limit the buffer passed to xd3_avail_input to the window size. + * + * For encoding, if the input is smaller than the configured window + * size (xd3_config.winsize), the library will create a window-sized + * buffer and accumulate input until a full-sized window can be + * encoded. XD3_INPUT will be returned. The input must remain valid + * until the next time xd3_encode_input() returns XD3_INPUT. + * + * For decoding, the input will be consumed entirely before XD3_INPUT + * is returned again. + */ +static inline +void xd3_avail_input (xd3_stream *stream, + const uint8_t *idata, + usize_t isize) +{ + /* Even if isize is zero, the code expects a non-NULL idata. Why? + * It uses this value to determine whether xd3_avail_input has ever + * been called. If xd3_encode_input is called before + * xd3_avail_input it will return XD3_INPUT right away without + * allocating a stream->winsize buffer. This is to avoid an + * unwanted allocation. */ + XD3_ASSERT (idata != NULL || isize == 0); + + stream->next_in = idata; + stream->avail_in = isize; +} + +/* This acknowledges receipt of output data, must be called after any + * XD3_OUTPUT return. */ +static inline +void xd3_consume_output (xd3_stream *stream) +{ + stream->avail_out = 0; +} + +/* These are set for each XD3_WINFINISH return. */ +static inline +int xd3_encoder_used_source (xd3_stream *stream) { + return stream->src != NULL && stream->src->srclen > 0; +} +static inline +xoff_t xd3_encoder_srcbase (xd3_stream *stream) { + return stream->src->srcbase; +} +static inline +usize_t xd3_encoder_srclen (xd3_stream *stream) { + return stream->src->srclen; +} + +/* Checks for legal flag changes. */ +static inline +void xd3_set_flags (xd3_stream *stream, int flags) +{ + /* The bitwise difference should contain only XD3_FLUSH or + XD3_SKIP_WINDOW */ + XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0); + stream->flags = flags; +} + +/* Gives some extra information about the latest library error, if any + * is known. */ +static inline +const char* xd3_errstring (xd3_stream *stream) +{ + return stream->msg ? stream->msg : ""; +} + + +/* 64-bit divisions are expensive, which is why we require a + * power-of-two source->blksize. To relax this restriction is + * relatively easy, see the history for xd3_blksize_div(). */ +static inline +void xd3_blksize_div (const xoff_t offset, + const xd3_source *source, + xoff_t *blkno, + usize_t *blkoff) { + *blkno = (xoff_t) (offset >> source->shiftby); + *blkoff = (usize_t) (offset & source->maskby); + XD3_ASSERT (*blkoff < source->blksize); +} + +static inline +void xd3_blksize_add (xoff_t *blkno, + usize_t *blkoff, + const xd3_source *source, + const usize_t add) +{ + usize_t blkdiff; + + /* Does not check for overflow, checked in xdelta3-decode.h. */ + *blkoff += add; + blkdiff = *blkoff >> source->shiftby; + + if (blkdiff) + { + *blkno += blkdiff; + *blkoff &= source->maskby; + } + + XD3_ASSERT (*blkoff < source->blksize); +} + +#define XD3_NOOP 0U +#define XD3_ADD 1U +#define XD3_RUN 2U +#define XD3_CPY 3U /* XD3_CPY rtypes are represented as (XD3_CPY + + * copy-mode value) */ + +#if XD3_DEBUG +#define IF_DEBUG(x) x +#else +#define IF_DEBUG(x) +#endif +#if XD3_DEBUG > 1 +#define IF_DEBUG1(x) x +#else +#define IF_DEBUG1(x) +#endif +#if XD3_DEBUG > 2 +#define IF_DEBUG2(x) x +#else +#define IF_DEBUG2(x) +#endif + +#define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0])) + +#endif /* _XDELTA3_H_ */ diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1_to_v2.bin b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1_to_v2.bin new file mode 100644 index 0000000000000000000000000000000000000000..8621f0e154d1246abcf791fc7a38d004b216e90f GIT binary patch literal 52723 zcma&OX>=p$c`nxT6$lcTk!%ax051KJwQtxLk|5Z_2EnD9+YkhoX7_NwLIJ2I2tX*H zNp=r)uOeH!9Oq25j7D~Ha&pi8ksly?nV!{B;%kq_mYrN{MwadPj%3N!&F}mOwcL|D z82jYpXUJh;t*WoS?|q;5d7ig=e)K2*;fljsv?}KQy|Dklss;l8@*?!{i?09b|J=Ru zx!0Dz^!@u+{@nN3Fa7pszyGO~*Z;Qcrj+V#_fEmAH1jm#-YlE6RLfT^n>K6opw=wY zCf<_lX1-d`^OaiFlneQ0UdKzCx9g_Sv}vt~@7k?;z1C>bJgqlsrAGef2z%z6^tjeI zv}wgUH0iC1bs&|S&H7XzaD061FWtB5{#v6HxQ`F~<>paE_S39s(vA6aBs>wFqMHk5 z)ofVCoK-BE4YO*b=Wud5UuzuYoBlj)Rjs0B7L3{vuGqAt@xr)V&KJyP9urZZ2d7O_ zH)?oIk1brvQNDUgi)OyrYM8d)U9@SwO0AG*ri|=^Q}r#)|$nJ`Du(vZ`pSK$izbE$NAH6EUuf?X5$nS zZPyG8(Yj~uVOSe?v{_oVnt@;-w9(8TR7{N?w3_)!(`;1nEgKhldSsiGd!|)2Dy;%7 zo0yb_c@KYPp>{NRq|qp?n)!xCBiep8vpi3w{DEDow3_l_W+OwThFQ^deYR4nH868W z_N;ATN1Uc=lxsz-aur813n47n=ElIxXk1B>9F>J}{e@5iB((Ib$Kz?Iwm) z(k(mer%S6CcrLxUw6;ny0huyZz--w~TCdr*bx^6*o7RzaKW~?i2j*$56YsQiiZfof z&CV5?n3~h^YOSh|Z{o=2k$sRi4v+JVg1%igs~=C%F)pfUR!%i)Q?rrZK-T0d^m?mN z#k7_%;%dHfdfzM{Tyc>F>9%1tL;jdv$BJ(nO~myPj!*L%uAa`V<*qMh+}1(9pkr~( zvQ^S>;F`Obw;Grm3*p_w__jNtK3>V+KOL8<$U_VBE#vC*MfXxwL&94KgS>rgX|zzN zwGQrCSWU|?%9vrZTFQEu&U#QoEiv%p)w`e!YgZm!f{Uy|g;Fwq2~WY^&AK6(ww2JjSP{M2$ zOv6GXO#x@h2v^LPQM68qht=9K7VE&Yn}JFUVU|g+tpwVJqqyq&j?EZsKB$Xi-(9RL^0bk2XnZ&r@2!U6Cre4|yd zfFfBlZ`zvCIIa8X*x1Fh}DK2 zDBL}c1P&TzA>w~vXm$I{y*8iAW;SrKt6Q1XrOawNW7~j`TB*8#)4CB!1q`WS)f!a7 zpINbiag~Zuw#<92YDKP_&9a5>RP~VmgdXQ@jap5FP4Q5wTXj>Csex=T4o~audo>_M z0U3Qbe_CtNMxGv6%~~g4EW4ml^TaeH*#Pk2;FSW_33<0SU#m57S>}{wAiXL`jHO!B z-N-i^`6}XoE*0tg(sIVXaEo@TOudYGwGk^-ZHh`ESqf`Zir`qS3UK&Os#Y}Sx$_mEbImZ`ESRZdZ55dVvtbUSmDKQxin#bVW;KdoAbmO~q7kx)R2rfe5# z_!OXb*s9YbYsEaO+sKu2sZmR>ucJyKBrB#4kSUtS6!BIt>t?m!a&0z>#*`Zsw_M5B zsO z#I#s27N#^7(&F)Wgwl9In+PYO_-aH$%u_l!sf9w5aeO%xifN%pD43u$g5yO)p$ST( z2`v;4MQMC56p!E}p~zk+f$vS=G!ytN6vOEzwQwlD7Y>EAa5%9S4oC1B`%;`f9GOf| zd>#!4_rg(p5T<(=a2Ur8$6|Nfbi>R~(KTf3Jf1pEOIfU;IGfH|~^vrXE}AFW`4{B{|vM9Uul zm2_Z)(Xb9`n+X4Atya;DJW6s2>qJ=;V}L0j&7}MLJ9C-k&DqF)dVha?Hk;kw*Y=lI zmzP#E`*eRZv$DQCyP4T9*7A0%Si~~fQ|=CP0vfNcWtVm|D&SNHtrD$O8?%)PZQMOB zmJV%vTD_P@W|t^l zELkPfeK(KOS8L<`^0?wg1)tqqO3yDX)R;G|HFPEXUITx)F3qCka^eBq_lt;M+w=`+ETM2?1L=aw@Yk=F3~z%}{s?Grd1|V|#yTE=5DyDqWggoz;z{IeTp1D%eTD{{HIP{POHV zmIk%-T5ff7e|{tLLbhVjklmDVUabf)-_L4Pp}-Jjzj<0WlkUdN%<@upQ^KO4N*u~7 zORM|Y*)33&EDe{zSM&FlR@ZZ(ptUl)u#`r^1WyX_kbT+=8s=Ladvf=N-w`WyS%vmsZmo zfvP)i8s@=f{=V60*?04X`@lRnJvM_hnoC7#D729S>;cz{`G5`DUMu+Bp+I~|4b7>c zS&eSwR*{J*H?8VA3P{w(lI(BJE-g#844lGNp9$Uxg(G)1*EVODr4yM-VHFShOx|zr zcc`fBc3Z~M{B`_yJ-eAn*Q$048M0a{&Sj*kOm8Ei3N`uwPt>ZMq__1Hy{(>s?n%L1 zU0==YfMl&ohAgF0`-V&(R6nE6)S?_ZkVR{}w35Lxxz@?(f@X6BtVEh5>G778IAQ>{CfGHM3L!n&VKmrquV#dAqC_MOURm98gt3zE4i~ z&6A38La7pPD}h2t3Eit19kEkX>dgi?&WWbn%U9gCBqwu-=SX-T!M3)&zcQP>iM219 z!FV*7TV7t7-J#L&WOOo~2v5dx20A9cYj$9Uo85FRde`u`uHvLg z_nB$z7i24sb#Onh3P}Zoj@I*^Zc)onu!NEVOhchO#nPcl8=9N$0|CwIS@QbQ=6+^r z6Key=4bii?OzS@b3Y`P_?Xtq+fYs!goQc#qFRoJ(H z1(-UZS9&&^(PvUr087aqCXehA7IC}ADVd8h5iTzkwMacVn&H z55}{aTUKQGo}%E1jKe9i`%K4fn&vJ8M0Y@#>Zi>zSjUnb*lYl)loI3)XtWJ=1N8~E zOr@n_N3d0`f**udxvrx^H;zsJ3E0f>!6_s}1tN&%8x2TPXoPYN@v<+;z%ILk^1%8Q z$gtHs&DWq>m4czH)M|NC>&OkOcmyJ2fwzDUXl^TzUR+ztg0?CPo7b1Aq}3syG#Tk; zH(Lj%pd(dDr4n|sz(a8Wh3?1?jQZ&W@Z638MFayr<9aB%cjs&h zH6|5GL^XVj7>r{tEo4NE2lPQ4B{!l)&a^~qA~89Mx*Cq6kfQ2NqJoCb=viVS7DUoU zV^t}p0sklsM>-@R9KF-2voS5&skQN7JftOX2yHSxnZTZi&_w(WRrYbPU?La`YWO|V z9z^Yf7HA!^NK={z6&VN8QZ!6eyMPtOJ5oVa0SO_RLMkVrKc(qZM>^osV3JO!*LSJv zdf;}s4B4HUrs14SuhA(x?{>SwsQ8_~EU4@zl?n$Hj4ys?FL=j4b|Pr9Kj{Yd2bUEG!P;+|FJTMWFK`IC`PEQqA z-qfd4NPc~Wx~xV4oUz^r8WfSO%d(twok2_}00X;TsaIO2VOc|V6 zF*MM_;|}d@7#7yQDS~)ZB_m(Q=^nUMwNudMTq(fWnfuJLQ94426phY+*HBg8nVY7e zu$***SfQj%%;+={1;qjl0|!%;NGK{Nv3xF`-X5t}Heoe|9k3*k^ibH{?giOB;) zGeiIS1Ih(nsJXG|6IwiS5Q`PVF&sWRp-l#(v1m9HcE=DX?qE2qouNF)G$qyKWY9EQ z`*)Bu2gi9xNbqQ7-1b{l&E0C!aL`{UR+@Wv3|T{=#ZuTd6lnp4s9J0U@8BqcEZ4xS zfJtxy%K54_w=QXP{C1UZC2v>9_xRQwnz~(mAXyfj4xaGA@QF5V-LWW-qlYHP<>c9z z5(vd2aT@bSY2u`#rKf4!4o<1+o^@@CqL73TWT8mZ2*!+XMYMH0U+JjB(^RrkX%q>w zFK1TQRx&GerdA)PG^MKH@YGmpb!}}u5~iu>OzOy}sbI@t3X<4t`A=Lf@VFpwdravFp_Hn*GQK$6~IIJQmb!vxY5dTUIdiOY7sg-KQP_>4~Bo!3o#>upH65PkGC)Np_ zp_Y}i_75tD_Wk?!sX!(DM7suS?LJj0!jn!>`@Y*Uk?67DEDF(eATEu^V&Ty8vK@Xg zu-d6XF}$vVJXfWv8j=H&9+F>-g!K^Ifsk~7(6N9A50KKuJ?l;q%MnVRg_CMDsfH); z5Q@umLQZyA5vEb-R5<)3rI{Iuk0AdsOnQhcp8Yt9>EU509;fOqZ@7hA5w+ zvyvTtc?0uenkf zKmNG0E6S%{E9!Zm9^xcP>zS3M&D(ln(ob)Pf|%3Ww=s1c+rfrPuC8RS+y2|SE`jbP z=?w5Fk!u~*ibXnkhYHvqL&9u_f>d8xn~#J+ucoP#K()ZLWIVdO+&U=$2O?qkAx#Y` zo4OxRDIRslqn+QD)>IU{j7Oyo=EoBglOeYvgT2udP!Fq3Uw&Di0S!c%PgY?&0+ie> zts(D82rNJwpLFk0UIFl}t#3}1%%)~FfzV1`lM7nGI?|GvtaKvK&tOh+55OavwOJTv zNA;q0YMGS+C?0%OjO^glx^L41Oo|&BDCwXyg(RM%WFD7~p$y=m!cmn9v;eW4JVJ_9 zn>-(mshF_38W7H=G)bXq3eqL%wl#1%JS*#JDtsqDo^+=a)KmEmILs{An&JlOwlIz( zsubN1l)OH(G%r=Ep)dk#99LAoL-`i%&^W$cYvoVeu6lF&&WV&v!EVTxpe9nbe3C>E zrKlUuO|yAaPr92&Nw<4SPFAUMkfL&@sIwMUoytiNO{3}W*xMkGi1u*Uw(`g28a#D> zzEFT;R<)|d+RmKJB3&@;`%QQt&HbZh2S&!}7)s$;Iyki-M9~gM0JpJNJf28QOiWJF zna;A%ETs^7NyX{}=+YFtDSB!)jaCL2#=FcFUpkwXX<;{ICvm}cNJ%f)Is&q21d0gudJeq=mm9N4_cloO> zs3x3vUIxcSs2pdDdAng1B_Ra_pdn1KE4I0Q4<6|p%o(?< zc@4gLzUtTLX>%W!XYO0YeFQ$dn9SPz?6ez*i~-IC+>p_kszD2KYhNk?KU)>5s@63~ zPz5Vx$yL~UKpL1oEf|~!c{y3^*p3AYAmKl5zw*iKgfHFnZylcPNG|t>^)HGXbHd=5Agz@nTIs_fQZB(cD!X2tt zo2r_J%t*Q)*cRk(E7aLkA0W;UyR-pLDjPd<5v$g+s1-VcH;DI*ll%;@%1-GPHQZJc zE=z|c;}dJa%){5+hXrncm)oaRn00w~L2iV=Hya_d+6W(&^7RwcfJ3V;x|>x|35-#V zHo_9VlcJ4~TByNBg`*h8vZTVB`NCb8p);y1HzIz&Rg8oi2aPcNHna`$)kb6*dM&JPIDQ6?%!HQ4fKEuk?0I|TkBm?5Y>Echa^Slq>CO)=3B z0vA=nYprUi)PmJqMbSCVhr!vZMjl0>Y97zb$c=n}YI+uJB;X@##n22b1ZQ-~J#Q6t z%|*|aigmc8aME}n1g!Hq8V{DL&ID-Kt)0I7GJSC79&?vWML~U+Wz4Or&caZH6QsE- z=1HeYpe2JoR){t&&D}V>o>|z)Waf6I#`Ktq9T3&iv>s@JgBMJz>2@{X1nPPd4HlSS z8DsB`AJd7<0^0GA6?_XxRcJPYvenV?3c}{(7&{u(@SQ2UfonySBdaOwBWzQ38-}S} zE;MLr4AzBw&nTpy-?FQ2 zc3;Y^!{E?>9+0*IWm$S#5GXhv3OAKHy(y`1ZElJNt(Q^y&SYxI4N5EKoZBw6iY@cR z4~UmgeWI9=!d~=FvctYY%aDQd@Ib*JfkV(d@$f`Mn4iy~PleQnz^xrn0af(QwbcA_ zF1y%aC*!ARzkxNO2Qv<*4+EX1R73Rw;vID8C``cV>lJXllzwK7!5*N6CMGY5wUTPc zVmD?FwM?KL)hH)n+NW2{0%pE!+X24})@Z>OuxpK?T|0oG&@su2$AJd?oadi^K4a2U z5!hKgpjihzwq+)H>(d%n!fy~m! zUcf1}tJhCq4I|c()X=n8HtBeZn)zA-EsP4xr3$?Fyw>bYk&s14F}*l-2LVyA8yf*M zSurqxW8O~#fHM(U3=12ANK2)P$6&dXo|$DAvZkd=sQ-3VLEpo#rqUQ{0azrOGQ?AM zY~x1j2&9%C2b#u;*(lLw^$6~12of5-)C1%)#8Zkcrt|h}0X(;Mh&iQZt!jj(K|yKK zy<{~3PtCDZ!2mZxv)BSEYa4mUhBRDzNx_ukXa6W)uW1rQ{L3nsXfkOPA5f@by|XBV zS`}k2RXgYwgs0%@gj12KGuidDnwpZXL1s+R6AMJikI^8-a75&0fJVYv{zMAHMsr)5 zKsC)Z)j%0)m}hQ?B@N{&sGYgN-fN+r(seu7u|y+bnZA5x z+6Qjz??)UQ6yRk9lqUEyd_><(?#G;3G*gG^kMld>^3M z1Co41kI~DA176dp+mg_0P&RDL1sdRX2=SG#>VaBozt-6Auxqn$oHz0XV0s~s7AWkT z7KDjnHUq$SzhS^OTCG*DuaAw@@1W0yh=dxG5AsWB9Gl3d<&E6Y{L=gy%nhlL42_RZ z>oX1W?)Z4Y!ipDD3&^iy_fcMH2AVa*AMM;XQ0#OyWcV(y?DEJSdVMA(gOBOx*dL?E z(rlIklr|L_)b(vVcLz{!5Ti03@1Pr<+n_TCs%1leA~8coxQWhW^)5gTkz?a^*xH}4 zCZH5UdscYwE(}F#0Riz-4HXl2_>qJTHbGDbH$4=e5(O%>{myiu?b6iw8{W*!(s-I#i~1eRU9M-Ms$ zL&Ty1^#jM~h9MYJdDoplsi{%fkj8Y?+S38sQ|^>X^HarROQj8}<*ob2nsNgEFm{Zt zxYaBvR;vcCc2Yb>E6kn7%%K-e>9L;2%?owtnp&qX?sf&IO|zk&Ok34D$lA2T1OE5! zoT#Arsnqycu~{zXw=7Uf*m-92cn|*P{+>S5G>@AqXbgJ#MQ8-mNAXP1;sU36# zJ^aZA(zUdNGvh`JL}Q1KHq8Beif?K2z!WsW(W*jMe>zzOi4JEVpmCt2ZqG=_K}3Gj zC{yd@muC=rh;&`|*IihZ8Mp`s`3A&AA>BMhUlR9$G|Pvu2x;U5qO6nG-w{}|dkUy;JD0Gi#$d22>>yYyLFv71LJYE9FW4XA~shYm{FI4#=p z45~*c5D8D=)|O5?M$yz1G8rldH%An5y6$oTX*!2GbC2(V?1C2={P^aBGc>5z=1<)4 z&{g0XLLYV`CJb#sD33DkKym9hYZVmZpwiOH_wpJj>R!h|G!F3$HubosHSNXPF(hP& zs%{EDNI8-q7&W;nM>@5(x-`EV4znlwFtel`ExR_qnVw$=0i2f6O1P=fWq9=K8tk_8 zBKkP+3zlbZ?c(jV)dkoexrN2bNwQKwtfNn2!Oc^vwWMUp6%5c_opK-3sZA_MAy4Jv zq&CsCQqW=mq%%xrK#?~D+)Fc^g=#@E9Z}(Ab7OZovx?hv8ZA6_GO=L!$1~&Mmf4W- zj@BVT5{kzRo}^?PDZDtlie}Ie?u==o{{UVDSi4Q7B*-^aac@TruAL^Oj_rK?xPxrC zD+krAg>IB)9aU+m0aK-piFdo#kjhqZMnc!uO8f89luIwVX5fdq;n^KkY!KhGrF`S0 zu#!JLpztTboz__^IO$34@hKhwuO6Y%wLZtimE6TGnLmgBBSA6JweDx*Jq=2KUn z=@)OGm!BEj^m5MJGrj!lZ#ewVs&7dAO=s+(-hGGb=3SXLxGs@7i$Yx_H;6@X23`Z^XM6eqO*x#V-oo#9@socnlJ)XA0kx##$9)3u#!xBoKsUCt@El*9+`-7efCH@WT{(SI=V zfpDLD)+94KIdkM~Q4r1l*735F&vC!2{xi2uwD4O3sbL{LrL-N#eLT-O^^0kTl4(60 zn))p97Z{Ph`*Xqn}>$pkKzdUh}|4@oE!?TLDNWfw2xr2d>^nMhpMr-ix4qL6vlF~on= zBXx;?Aw1I;|K=^9!f(DIJ}XP@`0r=_K@_@L&R0AS|CZyv#a(jlTIYrw zAOFN9JSQ+3_oE+Ov@`s9!I|jR+K>OH;E;bSy9>)d$#6fHNK)dt%{Mr% zhH1ZW{9&dMb@C+1pFbl<7UXmCjm+HdU~ToKpGUi+UFrv%V^m<|x$eXZE^fn_a|#Z` z+qPVqCxhZQar3_qWgWOJ)HBIWpD>s$QM=Hs5y z+i|{o^Jj0#i-mb%@NxSx_ptR#(>eF1!ifK?NEUUGQQVTzE5%-4=$3?{V^Tc-goX@= zo3V`S6qt#GL`3rK3%;koeUkh8IX54B?0n7lsDz38jzi|0e0Ryo&CT&xtG_w_VEp_) zFyd2%=jR;5C39#&aA-GsHYFbs9qb@8-F-R#9N+pszWd~@p1B@*k#GNr`0GSw$f1kM z1Vb37-yxXYm6C~rHSe?(-?v3(?_w@a9B}g!8Lq|sMj8^+akyua z>3o0GY5nWtE|K_O!-9I7XgxB+hGRXq2)5mK@!ag0!t-3uU2HSm3d4%byZg-#cHuYNQ}44P5k<1_n{Qsm+?gLar1|;5%R(_4`%f|F+&`I| z$+?92;y;n{sw#cZ%@vr0+P)OGW9FX&Z2r#4J(nP2A_hEoXPuQzGGJq4EcNu%o z~llhH{SbW!F9Qi^>L5*_svay538) zvG7ywPx(wd!_93nsyN}Wy%%q9es@a}-hc0dtHwZ*_3k;dg9;KPoxX4(^CELR8ST3A zsmG3q39pn-d6(svl*7A3`F8vD%_l_jlPf)gVu0KX58cv8;C=ohwvStzNRVsumnIYT z^o#TVk>mIu-B;QEb5rsTnw+X+xU$&RN7%kL;!W})R#`!Kaaffi1Bz#Ta`tISSakEb zaVE$)r3>#%fu|A)&|o(TN~D+V6Uf@kNYyK0)kL?FNvSv8HqX4+K(LU`1v`*jy`$I;nxH~5C;UUj0auwUA*%~`!C-1q6L;< z$0Wh4X{uA<7)dyE2^{zJHYaoqDaE^dkN%v#AaReFk|UM-$8+VOw|i9Y=6{{wIqqlw zSvZ#lE(F2PyZEPVA__T-Rnd7eK~zS3lNm`~3KF;QZo9~PWvBSohr$@q$i{ZN{avBm z9?cOSCu*DC!Pnm>3C`gq4_;$A_4CzFP z5piB{J>VX`-Lv)V19@5CgdYy8=jB&dnS|)~zpjw0?ceqEe(4#{6r-3@kSK9>@Z$|r z7WjQG;*o-RcJKvuaK$NnlJ5x=o=E59YTCo>u>(X48XiW?;tA{1w1}7Z*&*a3%aQ{c z;MftdadlN>N0edrF(Z|Ceaa)=ape=PE29kSA=A!C?_*$Nq?fS0?N05u6uk1Yu}kiu zlW)Jt3ocPqy_ekf76-7kJcAD3D+9NtjRhtwBECIdVbuAmr=JlK>*4WLB)v?0I~lug zkyrO_7~D~WI{8t3L?qHO|H_a&?{|t*Hw1Dbo%&6I9TP?rMIPjs zo3|z;|0>&S4+-yd+kF>3tWV_GA$F+9dKJ~jA=G+VUnh-klS49S@~r1>q_-{~o(DgS z>%vewJutB?@QmDMLXR3unB$${(>C#*6V~%@kqwi)^6gzd=kXl#!kqv+AUewvz}1IH*|qoXO!wU!0ybxTk^Xdlq`%^o z-gsmB^^8+vWGQ$^c4bjuS%>1-D-S#?GwPl!`&ZN(eqFt*uP8TIPq#Ltk1T8JF0^)? zK^`&I&u9A|`=!9Be_3)!J)SgwSNf-^AtuCOFkx)EJiScKG1IkF76B1g=enftf7hq{ z;&L>#{t4_oato2leH{?x@m$Wa-pJ@SlOFVkF1pm0exLP#k_2!7c6og}8GQaK|7Avb z<2?2p>tO<}F~_#dgjQZB0jEIZ`Ac;(qg$qujZxw(jU8zhSD z^Ak@W$7ljR$))?aFz?8YcBOuq?z_Cj_(d}GI?sO;fXju_{Y=J}1M1C@g%xnEB8#>=Zs~|4Lk1;k;V}K|d6q@bOHGxo!w=uM=rO$${jnc0 z6`^Oyf!$In(Xa0)7v_aFd@~;V0r`p$e-5!)B;K~DI=&?kEc$$WiRbwbKHz@V2DOzg zO@YtcPD*4-<2i70KHV=S*a2o?;Um#0O6ANG)#s2e#9<ibHB&ON#avE&j>kmOp~P~zq)-XD!X2U3&&p;)WT6SzkkUfvj$+LAiQhPruaZ%9TLfSlkl21=S>B03a`Q&bA tI!Qd@i~Pmc zRmO)~zFr6Xc)EtzUNGYeBI$U=_Ul36BmNVpkoXlH(nSvdxQ7$Lo^ecBX0Z9!UlGUu zm8VaC^jD-@jwF-mYaElF6<%Nrp-1{O!wTJUUR+2IIpvU)9dpXFQlzsLw^3!bg^(1v zx~=5JVtS-YCWdtVQWOnNl6ECFvb`flA`6|({EpOlR{D5DKIdhUpjkr9%U)wxeu<>} znRM^F&tJ@v`;lca^}HCF#6_e9vY4K73F+az8%t#J)^0-7RNf>*f)L%^`-&yzoIqRO zCX;mTmLENu5=|%1Ey@H?)5G^{{&N@G|N13g1c#AD{`JogKQBvz3j*ilnHyp_?QtH9 z%gW>p`DV7aE3bIH`FWey#CbolxPxe$8zI71p)MJVteeIkusNc zM*2V-;&?KIz-9X%^8$a7Au`_%3q0pj@8vj-6aGx-HZEAePzP{Pwc;CIExFi)70 zC<qNFv5_zvBP!{dt5ouay4j;#KZXbNJkUHFJ+Y7ly&~ z1Q^wq*Do>)LP>m`U#29$jyzowgyMp5k>MQ@@sWu)a?ehZk`#JP=r$7qvXU$gw+ol^ za_Nruk)%RnqU50OQ&2e6#2a-O+AM1=IcI2o)k@+`OQv ziG-LC9ZI%;=u25&M;GuRW5gR-6QnF#^b=4bM!Kz(6JKfPE?AGIkZ_5}*5s;FC$cKB zv&>E@err+essAH6T<`if?JWN^y+mp+#z`4=!6h$5JWjEF8EWr4Bt-14@btd@gjrr?`vuckaC6=AZ-eI%osk~4+YUuV{pa~M-fHu0MmoRz zmmkid0o{oIoKZyK{=czV9$2E-EK~l7DFH~N_xUzYB*2L%6x)o9`UCR^HCj4H+CVOI zBy@hs|L{d3i7Z99ArSN}Mw^hfi7qTCsPH7)M~;MZ2!KmLB&;{dDDQt%#8)M8i(i67 zlzn86?C!E-{MEouV^2p!`GTyFVV=u;biVx!qybNKMgB!@aPg8W@PlLrm^r1eo>4|5 zx1J9Ti0a*2D!*l#?106|2>}-178w}m5{ask@3dKZ7!U;HnTEpqWm4hftOtoPFsZU* zUnS}YJ0L)2Fp@+h>119M-hOjPmJsLLC)^~n@F9{0oPCrvf5W9mUVd~_9AZ7E;#Riz zuXuJ0Vqb(%5w-`$neDreg+vaH37?1xj5ot`IUI6vac-V}C{4;c690BgaCq7N#ce@+ z`-xK-N{?||Hy4ros*fFWvmuW8ydy1g6XhE&V^Cuf#cohR1pV~eHzaRllI>UG5Ybm( zZ4)i+Q=TVjANN*#Xb5p7-jL($rX2Z6z~g_-P1>p zyo|I*WJj1OW9*}Z#TM+;ciGvF0E+3!yeoU}EckvPwTblS z{6x(uj*hIHl>DMVG%wLKOXUWyG9u%>5WuW)t1{6zUbtm-4`s_pFJ?j+6n)mii?1j$ z^JN85DMvi)XFowADwFm;j({?=8kdb_B3PNI!?3d&iTEBzG-4>9P7jOO0IM<^B#qI2 z7I85PM*MR|?weOcM@T;pR$%9}rqIgu0XCl|sNDmpuIA&Ed_j1O5H zi!&ham|CYK-gLhBo8fL4E(wtlOGpys^~*~s48!5S1J-DFclgXhW{S*!$~!0CeYfL1 z@oy-Ma=FaU!EkJiwq+5?`x@UPKJ2kq&f${?+|P6G<%(UNIcev&FE2T6WG@r?*N2A~8>cyxVVufCUhy!y%K`{7|18`4YoXhp zkwoX>)RPN`_yip3+-!^QF-wvZN2yTGF9iNvd(2<_1FBV(7nAB4T<8DOFq3q zc3*vEZYhAYXQ!%PjS07im{hi=RQc&C^mRfNIM13Z_-!`LhXbLVg<>o-&iDrd$<+&>%L@%{FP@j?=n}S%j{%2!(J28H(Bw@=;j}gA;y21mL7BLJkQEk-06k29*AVtm;Nm3Jv)&$%GrSlSY^-Lu|NBY;o(8N-!C2Y3(eG2bn9MB)+eAU5SyqG2V`Td@@1E zBwPbO`3q+d8R?z-bM8ORU0xZJxb8?oVAmd}BgR@Z`j^AGPhaWfHl4q`lK6Np6baS; zF5k_o7d`xc;oB}wxC}EPh8?DUA~B&a3A}o&0d_)g8nWY%6Ot6^9qrlqGM^q`*hwa6 z^fCtu>-_^&lGyOzMEGZ;f9PQQ#a-6R7#cIh%(4KGVc%T>N3DhmgyOR(O=f)~>V_zu zM|_;&Ee!^Fa(bP4z6_=nKRa)F2sBWS*T?g5VVV zrQp}emmRXaE{F#r*=8mk5O!RIK8S>Q-iY|^K`}n*VJ(6A%1y6KET><{SVWY5_(Wtv zJmPC@D(z#G({s-#8D>~{gS^0T*IqyAYy_efQ z#MN+=ofEaii`d`%f>)HALy=)?>V~7Cv4Zc*qPcJkP=#Gr1yMWVFE3viU=+uKDdi+_ zo>w|<9z+;p`=SL#oeBy3ju^P;i#a0DQAkmFP0M`r6r7J63J1xpD!cTg4e2f>x~`s! z3M@^a$%Meb!V!IexCo;#D2SfEs}i7Nk{Xp-)t({~p2)nK5C+GFFWSEDBYEmx4!t(CA9G#c_mRP_MX^mvUPaE~e=@;~ zWjGc@zo9rFRXLd7OkDu8Md<$j+KBQM*B+4^o*Ry|C%quO2;cJsSys3%iWcSMGy(mR z7_b$zGsr^h&$iz>VLge9^~VYz#M8UnW)%73XWyx48ro%WggUI#r}}+f$KJ#(HS1B7 z(0$~uDBh5W3OAn#e#aq7g2OL+Wm%J6^#;hEC5WdBO2@`hy@7V%ob@1=#P1MMUk-Ab zAc^NFad?;;RY+N6rZp90szmfbi7AsCP83CvjE1BihGlDnjL47@o#a#k=(E75f|t5P zw(nYllC;En98x(Tzoz^>>unoRK{MDcpK#l{>gY&UI4)tK1h&36%5rzdm~7uoqc<=l z%l4ouzc?s)(4Z{-WC0GfE`dWlB??Hg?vr;T9xmHADNUvOpu~3m9?cE5w>wYtp`NAX zDOrs3emST35?n9Po3u8I8SmpLyp`hr;g z>eSA>MEqjN!CviRuVzSC%wByGxq6%&id^MMgk}3!7B0}J>|K%~@Hu+tJh}g`OxgLly{u!FDkO>j>B2j{sOvpN(KCUkaiI4SUP7AC$VN(_6IOKcU-If+b^2Ik3U zPo%@W+e0t$LtP@I$ntm4Z^*98?272l&WPn9!^3??F5e!G@p}`J7~A~!o`LuznQo)~ z`hP(vtoYwz!qM1{CTcnn`@hJJOBHdDsGylqrlc#2cEu>)FW>>39@xV zL577!udRX;EQCiU2bXJq#7&aDLx{FrkZsK~nC}03&TWp9Vci(~4EzxyhKU}QhE%qX zQ3}H6Mmu;u%mFUtB`tp5pSlu6=VMWw8rV@6rHyz8_2$S{*B%N`FJxSIh8$!0P)7E(~!IKp|;~iQ2 zW*n$XzSj1Ft?o$DAYm~(s`FB5zaGQ11-xX+k?z0EyhM&%aixd5g&xXXK9u{&$@bk{ z5J{rQ>uit{`bQaWQFz|+l3hN=f8iZRi5V%r8=uc?wdKM74(TTQ zqgw(ak()zzu89dq%^~%}_TCMi_fkgOm6el=$Ip68C23Gz;!+zB7ia_c{IbFi@%$8l z{}%Q-B^Z>G${ySISSJI;FYdyUS_J1}+A~4PjAF$rTlW}&C?Nzzw zrO5M+$Oa&EBoX^k{Q|5n#CmfVWH|JaL`s5#?cEjPLR@pO0|B`Lr&wAHs|wpcq_e%3 z>{U^HPg0Z@0&=API*H5^-?j6wAP#O}%^W*7VzC0}^Sys5`wfU~c5sKk$)$!jZv}UM z$?3cxLD$SV{BOT=Ayr@Lz590BL&}j}AKArlxMAS`cQa}rCw$N8_d8@y-!%LUUS8Kx zx3Ys6jz8O*g<(Sm{wRZ7M-#Sh=bVWQ|2`WSOQY$0?M3$HRhD~gVkfvQ{CGHL#uh!j z{_Uddo#ZlcUa=gdZqD?Qn`HY!oyx+A);z@NM*$$yd8{CM06H9A7!+wBcd#2NSrjs4qIlg7^||1}os$VMQRBBe3NJB^z8Ac_i{(fIOc2 zvB=Lc@@w;t1=9F;qBHZ8O-_Z9Et4@O@TgOSS-I2GROEqv2#AIAM=pN*u>-XZ$f$ph z(b16mh*AF?UzpGE!U$PVWbP*UB9TA*2KlBCMF$B%H~0nIwe$tzUAPdPLZ2Y~r|w1f zUh#i=n&AsxB8!S|huP-1uJNvt>>U($h57!AGVy&tTJPjQC4EYvODp|GkPZRR>i?+Z z;EW4%6NW_Ys&RE;K|6vU4B!2qKV?I39?w& zA`|E4PX>g+`C;LlFZ4pcz9gBM-+i2r0_QN}h*#Zu`0h96#ZsiVt5lhQ(XbN?vIB@> zVd79Bi=}sX{?Pjq8C2(`aC&TUash6{ZW+ZNi^ECmO{1@-3*ya}LkAbcxBWkpjX#nC z@-(y47ZlX+{oASKfKEL9yXk@N@?FEpNs}x>Mb9}VknSRniBI=BA|7x@+%iNjZO`;A z?uNQYuER4UsL_sUdGV)P3*L%yAtd0^mUq0{;x6x95QMFryX+{;%(U;(2s@Mowgs}j zc~U+G8Hs77ctgF0?{YC^Lp$~y(7`jiqm7?Jf4m#gV*>#ib?t6Tm-+%9XM5H-)xvq1*?Fe$ACz# z!P==5C0(e>O>azBhcO+f0R3|5OZ0Zf&&5(Q_ZpHtd5(cspi5TWOy@=Yx6 zcPYcgm?}!QnIv_XG(s+j6`K+&R<(E~R>V)C`PW97?|zV?RH!A1 z?dp)k+}7>lIH93QRS~OWbJ(kDHLVV|X=73q8e6gey?eSP`9(pOsu{aPU9?iJ88k}S1iQPbiIE;d}2v?LZy=T$AG0v4MIFdB)>)|_ujFw@A!l%1VI!_>51f52e4N;gS0 ziZH}wd#CkiJaHDGF%9XEEE*R;HS&#cs)wzVTun@Ug7_%)B1&B%RcCRmDM}=J5(Cid z*s?@fI|A8c8Ktc=B1Yv#xCIK9s;eu};}Px0#`@&ssh(IV7T9t|(1cQUFl|sTYf)nY zfQ?4^ku%011+#966*no{L&_p@wMg>iPc$$(O;ey0peeTqwWv&a6sZI8A*c*xur$fh~elw}m#9-X9?x2xq= zhTemo9=^`QOSqxbDvpVcMdMIK5B1kfVoF zt1#8Ug;B-}lvcn_+k!QpBWIQ1Ui&}dB|46cOC8)4MtkMr_7|(OIdwtRN`~pc9au`S z*6)~v%!X;IimErLL&JqlQK9LKt<>N>#_;$E(pDr=hlSc`DjA955twOt6K&0-6nb*l zSTH%zJ{1%5SS_|;=1)a6MKV++qYX9UkcV1YGbPaLb7g|ah3qmzS2P>L(TXBG%1k^N zR-#J^N1c0Rt~g#0GlfPevN+5|V#o!p619n8cQF=RhWBv`(@kP+Y=p4Z+!U-3a2?i$ z5QC~of}uTK3$5=!@T6IT`5qerREbc!OG1TNk{DaXc`K`JOAIr3p=vLVXJ!(ZP$Yd( z=+o7!rkA4$Hj~uCP@w{Y9=(TF9DQLK|B8>r%Q$T;^#X&TvD1JaINu_4%%Ie{vzF0qoEYTb`lNJ{4-Tg3Pj z%Y@3gLWAD40_${~T9slLLct9=sg_f;;MT_c#q?8|8JLBML4zCZprXP=C4HV2EPc=- zs7jzJr>5^cQDQN&bH>676^tOIBTXG#8Y;9LnWnlD7_PlG5mzrFAH0tmwFLQMSuoxq zP4o6(jK{GGsuI%xG?zbI$mNJ{j$51w(2ZiRJ>PdRFzJiTx(`?bR5fSwJ0bc zJIs@#N1zjS={Z_S>s8dArmJ+gWaP<6E75jhXkCad zoQg`e$ zi0o~X;kVABe(ps|r}CXSJedOb`Yr-_##8qx`i z^(aJk7Gl0)L77C)A;#gViN#WfxqU4wXp*Ys8siGXDg&j+$1Dpgs7<+-C{MzS2343M zm+PWqG_4gO24${DqpL~bxc)}uaaLAL$&o%pYb*OWc1loP(OOSyO!|%{nIIA4L#m6&n_*ofa$=R4O$28C0^BN}Cac z&Ctmq?acJq2O1_GwGxg=NTO;I*-cYS3-%jMHltSG7vfEHF_$V(aj#9%q26^FgEj`0 znsuwg@j&FPyeh@0N9Wieh@iql3aBD&Vxmy3-t?qBV4f7^7O_EEpkvT3Gt#Y7|587{N!RqRJwu+@-nFct#|O73b)wSZ%D9ZPm4L>@LcrQ0N3^ zin~aQBpheT)d&`~DJh*uBrs{CeXu)X=U{|UENcp@rA%SrPhM;#LNAJ-nj*@=$`F^b zlqpu!MW-1{(#&J}8G$MuPnkN5sv8RWYW8s@r7ckhn}iAh+sX;B!7}<-E;+I+T&-wQ zhhqjy9ej(%r%P^<=mjO(szyg{q?r zX-q5*L#QS3c=t5AG==2^(p4--43%Nf;x}tp0k(OWQd_0iprz8)!RuLlu|90kli;=P z_HLG*W{VWJDsX#IiZYWxAv7#R+f>NPgnHb$Q}?vACG1K$XO^~7^S{QX>a`>=wIdqQ zS~21iWg%CALfF)mgeUo8qmpH2no%=&kSZ*!`alrM8XK9A=yRW*LcKegaL0As3kXK&70n#>5O8iji4Lnn=|$HYTCEU}lP=WKw;CbCsY|pwgiL zD%YyG)JoQ7VZt=|NuASTEL;KQ10^1cm8UlW&33$hf4Yqb@LY^7F7 zPn(cE9f{_U3nq1-x>ay|H|9Z6ZPBEmsyy}Javpu|s_OnUKBWQ+2&clZWnp4PB$`sF z>CraSC2cnGMM@iXj;cascs&};$|yl}Bge)fl=oF=8T|ncgPaolAEY$G7dkKGF5aEI zHs;DHij^f6Ot(^s>6zs+-979%7}%Bwq=^HSSW$1;HmT^Af(km}>erR=GO3okfg%(Y zmU!!h>l%!rp^d0ZMDd2LQ>7Q{&{n?aYeIT3HHFu*D$&ar?#*7MUp!6WaqKp*A!a>Q zgjh^1)3wu2szr#&$mvEHJvFK`BIU2rz{wSSU1YGW2&uv$)yB=PA8V7G7hSR%$CWoD#|U zlsEQw&Js$csUNRd;^F+YQyfHvwBN*$*b zcd=0$^_aV&1;a7P9a^FVJcWtsv-3QOtR=0B~D?qrQA0j|0scOQ9Tjh+HeQfF}Q*ZNPE4J zFGW2G0$n>!aa9*JjOK#of*Nu!O+BUsI2qD;&Z|vh7NE4JQrOr|#a)_dmr|^PPN_lG z%vB2IwMwQfF$u{mt`4gdDH9W3G}WvTTBo?vavl9;eg$hml|h3kUyJyy^OUv))rAqZ zQdo$=YXe#imDE_S66Y71*-G5M?P5w{WUqu+X}OwtoQ568r$xYGrhsBm1(s{Hi26)I zJ!L^p534iHqOe@hsSQ(IMLswsPQX~)m=BVxYCf1+ps%6n2VRyGit}H=RIv}R0fyFg>-xO|jKFg!XuX3XT;- zJ*b>Mr@?|pReq5u2-9k-EbUY%zM2+`vpJ0JfLi<;-VsWV`o+|F+Mr731Sb4h3tB^A zN(JJzHtqv%dYGH3pW$XMgdqVy<2Ds{%U(?}f{>69Oq}D9n|s$pP$Qh7Nv@VQMNoxo zfwowShp9%`tW^4*DVL^Wld$b~tr3?Y$ZWM~Rm)mz)3g!MCN_7? zV5YvT{7XSf#SK;h5LOl1HoZ5hr<9Fqoqc*T&9J^o5M^aEzk)&)WJGN z&3u8DYc!{pX=!;2tuw^G#Gt|G)67uS(+GQ`G2+GIURXa4bDB~`5em~@sEAMF`zf7VhvRq}V=9LxqY}>uyEQh0 zO0Ait8U!dS;qvlX%w}HDzeR=EmZJAzKvT(l>k`dwuAin+H$d`wuLTXMT6sTXVcNNA z&ZyFA+YM9?{gy<`w8^<7h880YW^4j2s@C~(t3Vq%B{WLkHP`^rQ$_`dAPhyz2%V9Q7aB&AWF-jbwnxNriIHitP)d)ZTG4K z$y5g>HMI?iZ9?t4j08u|_H^maGkB>ILiFtj>5?Wu=N%O&Egp!nzjk5 z^tuyR11n|iEVm(b3NF*U zgj|ZM%9T*LXeDW=+z3Ieh81Gk8Kou`k&z%Bm3Tx8Y6aesvWT#%1t@o*6RxY|q|>;X zS=)Y=s;!AyEE30*8SALZSP4$GM8ryktVj~t5TbgywzD`D5sgY=QBiHv7Dh4?jq6>g zGFh;>F_qZ!t;j7#N{t3PB1xqrF(6QPHiH zsLL8MMPc$qf@-W2v2;X=JRF zMQvC17*~QpYzU!}nP^H|-s~;I^j1Za5(9v;Evuhc+8eRC$&mmfduXL`CPkCb>YlE{ zsIZ}SmDae)B#7CF<8JspCZwloc~T`SV{j4=!vuJPapZ?pp2u}E#mT!;Xj*~AwSWqx z*jp)@$1Ne#^bY!+C#zb5)!IpAT72S+5(LJEmHV)xaoY3pDp4>vEmI&+G8){qLisetaTp!Fhi1+Sv~)bW2|9VH zT2`~Cm7zOXhHgiQ(?MhXXDVixHz++u(v_N_)&@*-;gAWZ^_3OG3!<9Ua%~|B!ZXvt zM2sJaadK{rl+&+3We@|ZI8lqelafE#qp3A0Afv#otua4o--$}yb{UFiS5fZ5T+_P( zGuKAHQOgQzWEh~YO3*7dFBfn#bOENPkxOtmZo*+V(+Ne2mdA+hP+Ua`LW!5F_C?J* zU0MWfcvJ8O>*}<$AQiVGYSZ{l7@1OwDr7<~$&yk;B?}EzQ=AeJiCoL|(5c7((Z*3X zV}hk>qY0-0IBne;@wutYsaWL^wM-K$MJ`Ag)soF+h_BSahtzsDHbh&eryp}N)_`O< zT#ciK*g&Ncn{K=OyLQ!*wpA!&mC0Jl^gf2%q`j0%B@pGlX{DR;Fx^zD$rU{nEy_U` zrNCmHu23XXNd~l=o?KImS%`6Tz(|nnAw3&2)2dNLMR9Cu zVk?s`>_jK8(8^h8Dm^9#<$;zu%_psr>2{79Y-c-Et!)^%L;|NXCgM|6PibnW^EadC zm6$|TKWR{G!Pn`Ci_hb%YBOTo5*h1-dYJAq$DLqL%wTt=646e?-JzjZ>j$Dx0>SV% z&WbrHt@xZkBtJh*kP7G#kJB+6D@jsI6dY~}wg$_yF|`(7NjS4Wpx-j1uf$=E7?kQY zTziy;jiqfk0d14+YDEV}k2&gSyqgK7A%Zj`i_Vw666u^gp{TkwUu>i*LvXp8N>xY% zXN)Yn7e#NjEIt?Wu?pP8xa}DeLknXJ(U@VU6lm|`4Qo;}t*osm&Gs~n;<}j04Q92N z!dPicY{Pn4P^B~(O^8t_R*uWS%m}#?UnbC37=cMgTd`1iDc%fAL|ePTD2xp*wj~zb z#!c>(8;oH(G|NL8@t$IOSHhX`m|D^CN2x%bP$puVlyqzulcI~_>V>*`6BUMOo=#n+ z21^O&jbRk_Zv1qIjCm3v9GHquvW}MN8t)6MwXv0jkTC$+3Nx7t3 zn$Y~TE*Wny&M#>7S{vrw%R-}LbrERswVIepy!iUqP*v&{jmIXK>aUw2QYvEExUCwYlWMsoLE3WkI zOR#_!DXr&CITJD`WviMG1%tDO^6gozFdNb)LSaAzBBJ#t9E^nR_ZzL=z^6gs_5{~5 zt1EGQjY~ol+j6x07Uz7Dt~n8 z_uyv}?=d-afv;onj-{k2bx6LXz#+>TtQU~EYhhNQUD4u*-WE7_djV9qy>9OmkVTF- zqK5^BS;aEckF0`a~iU8+o*h3 z1IS_BQuoRzz7v|B9Sr&(py>Yx0()ZINoPBw|qg}e+0`eUtj>_R)k`Inq zqOd~c!%^uzJPdyIL)8?5Bj+Z#UzLUbFk=CJ-^|pB`vZO7Kh??4C!)&xI7vU&9=84N*ylI4{!0c&mE8C z{l_D`H`hVYiB%)Y{y8H(aCUykN4vxAX8T&Z(iONK#xf?059NIq0)g(z>+=t~G(=Yj zq1zhMj~hv{lT$$>1VOcf0V^cK+4kXRvV6gv**Wba9e-T{wj*uMsKHg9fIuZoTWPz@2zUIha67>^V-vT=UlQXdE77Ka3*;D zp6Zm3_t_Uf)V6j~^IW`p{RB@smFs({*=RZ!3h3EyrQN(>lVvs?eW&tcICft`V~ ztwS1@_KTIg=!oid+fc>cI5KCr_blU3K-ab?upX-0lr!4m^gpH?nd4qRxPEXQ$rzHJ z6H8rPG5-nthIN@v2JDGCa*hE~Zr2t-XBrWHO(R*TCq!!ERTkqa8jPAallJkJO*n z|Fp#wG%5>*)bW;S+O+}R@9w?e^{V&IqR{|n{vp59R=(C1;n+yxA>w>JR5)0B`VxoE z9sov&gR`Y0&F;Cd?8SollkKhz&O~?eXMsC|F0bW{jzp-2Y1ZFgseq-{U2%SA{XG>y z`De3c!%mDOs63jFK;9;6-L7Z(1%KaO(GIrC>_S`|c7SPnsjR*1tl5x(KS zT^kC21z90}sSrPO=p8k)AbW_eT3{Gc7J8mu{&oAdqN#N|8pc(FtpgX$_S8{-w-GMK zjCk_xFxoevlJhaHYJTnpveqSZX16`>138d>Gtaf*sBB;S&d7PI zUa&juSdz;ZmIb@giTp5qxQX{o9_qam%}WN}iP<%qm+e|IF4@uOtTjGwK9|bRdcAvV zTaWDsi(5EwFz(JQe!|oN-zn|k`qsD1hn5|)9bUb+Y?HmyzU=l& zQsy3$x!1TC^UdL~6N*#Gb9goE6qKtXF@&?oO^yv!`gi4%yRf=yK*lYu7^2%df`>o%`TUA{h3hk zzn_;oyyk<7UXp6DuMO+n+z<~+M^ta-@LJe(*1zd!{I&py9}XO;es-N*PulYD3|2$* z^Y-v@!v~aj)b?(VBab9q(CwW4H_7m(=M48}+3TvBr#HdccNQKio6y4T&f(yZ{3|^? z1No#rc%+0k$4;2}2qJlH&5^ZlR)PQFh`kZ+TY;N|%jc0zQ-P0GExO1YA z)RBmf+lQ0gbHrM-%!!ssKhx>2|Lp@v_Km3sc3eaFgfNStH#y!k6sob*4MeCxxGQ6_2IuHlETulwhiYugzN28+adW|5;pc6;^bl^7+XuF9 zGR^rUeaAdVd@W|_C?y2?x!IH5j(`s#uB?nt*%ke#kbRQv@h{onybr|R3(h}Kbx?Fr z3qLNNnh$|*ZGw1as<)!n;PLFrU%Es=p{EzZwuBECI_D7CL+fOB`q01^t=VA7&K}zG z*dyz}+^j6%>$06*Gt0HG<;(n#OY2}JZ~4!3cJ`-ib~bGJ>i~Qouqi2YOtsbz9R2b; zP0l5%G4KxydoMLC1FSy>yx+qw7mxUf!MWgO)E`zH>f2fdgSKzMXDun{jgVknd{O>t z@i((aswSfN#{5qcKr7;7K1v!1=SwFSH0|?)y*Xca!ZYh$Tlrdd(|%L+SUi^a{ayEc z{x)GCqKdgy5f|JTGNJKDea+%owS2%<(+C z2&{3-UrYaM-5IZcNV4}?2Yqi2JYUZ@Em*U_)p}hIUj#Hkj3YX5XLk6m)$X+rL^O}X zo3|XueMUR7d9aBeP(4!w8a^-rj+dP+-6@;cz$fxS>)w(Ue`*7l1zZI%Z(^g&i!A?@ z-IjWwX1MCt%)J9J0GC((-sM^yOuXC+_m!_bX?#=nD9~ALFUf-SSs_1rTSG$JE%3KG zJNsYJvG8XC#=0E?bKj=G@K+1GWPq8G5(odk^@i zYg-;1Z}=fUmaLxc|Dg2)pWEum+>;LKWQgQbGtNTo}>S5@s3VMEL_cX zQ2)(83uBn&Tlg_ zTJ2(cAUIM*2MkW?^uUi^#}Y7@Ji#YFJBO$XQ2F@2m z$ZIs?t^f0R)1l;tOP;MC_L>y#4ut!X^O?^ig*>BT4$gJPyssUJ`5!TSW`iDXq*Y3b(Rqwe^tGfy{>E| zJQsTALjSn?&q7{ zTk6`p23#ITyx>my{`LIwU30%IgfBy!YIqQOxScpee9&rGYOjR-X?HqHpwV8Qwo1$p zM>p}iHf`uy1v8C0wm`32VrP*W>PVSm8=a7Q*>9yPv?3tZ}CBubd z932=UN#x?Wx0FS>kFLo z1e-t399p}_48ecccrwl(EZ>gh?`!dV9)*fsy_MsAqTLgQ`_~B4az09GSXOOHFc|pmQ^ljD!KU-(7(;!}sZx+7 zB}cV@_bHKG=GyY?Mra$J?d+TPY&Tg^>gkLto?C=Bfe`uL+Pr^fi#V8;YJ+t{#m4c3 z8bW%a9Ofv_=kMy^IUu|Bt00|9XnA8?+J!Qj+!75yeD$$&$TDf z!71=v6#O;*+YR3|+2dw~xx{mjV$S{30x_trw!lZ>_GK??J;3Y%Q<4*b!sA6Ji;lGL ztL#4Sr2gzd#@1YEN%&97>;rWNYGIM0cU$82CFGRN zz4)2dD3VEkT2qj)eRY!lToMN@ODyxq)26WZBpbdr39k=a6Yukc<^NpPH#B=+{r`m6 zgTO;Q&d%+Jc0;E&Awj=vV13^|RC|=uVIJ{;|Cr@2{>K*gmMQ$ceQ60hN;|stbpIMX zWF7GU|G^FLy8(_Q+)#}~%nTgNf>XY5+i%Aj-V@sSjORTc*1^PDM+O}0+BY9wS5200 zlN7+&w)dGUIdHD;qlWXF4`ttMK#k#VzOVJdoyrkzGXG-jMUuw1l0%(8ug<)=E)TA^ z%-U55@7l%|-(7~(YgdNLAbT}sYxI~8Y-k{nKd9lIpjRqSTW-!hnm5`AMw|KRQcdi) z^WcIxKO3UqGvDEAGRE90hqDV#biYvj7t5#${?Y)mRd$bW8lH5zfj8-3_@tQk{7?>G z2RUUA>*kz|-m_ur;s$upbZ8d*A)S}jkf{W_&)srxW~9KRKa9)-|p z4_dZUya%to);unUQdk5pu=hGfJzWdQNoHVA@lOUfbKF9jA>g4x3jzhXc~T&R!@g$JdaPBJG61@nhPB_&DY9qR> zn?stjnjz1(5`WL1zkiIs|M4zZG zprf7J62LFtQVVnbmmgkPKr*dxW0_TMHRC7DR#ZExKuDIujmAQGJL!ljbj)faol8OG zYC-kkMx$L*XqSMn5dR5tiVOemVI?aH3yX@LeDHM=v~*2d;6j|H^vMiHAI8Xp?evf_f_zX{uoPxh+UvucOU9SMJqIzF2Hb^d7rtnb=CVTceQUzRm8*ZLBRYwXjcD`3dMgb zd=G3~rf7t*2KbWN0@Wm9ZqSM)`u_N1=>Z;OISEjO_J^j!-~EDKJ{qHi_(z68X?5Wl z9|$;990`GtJKNq4CdHBIzhi);>081ZS3jnu-rJCW+8w(;KG?Kin-EMqkkpc$e2XkO=kGCu8 zhXp|+mSyBzv|}keTmKnM2b`}P?Z2B0SF;ZONWGB_ zhnCpuLFzlY#no3mvn30jnZ3P$IG4Ayu7`yaMmS#%e{bNm_71Y$8)Bzhmy$?+F*#nL|D zXi*zmMOyZzlj7hVA>^tIo(q^zJC+!)#@-AeIq&V6zdZ}KZN5b(#f_R_fb7;>f0XsN z4M%i+ytZIFW(G`}&D*VSdOV7-+lXOk)*gd^9~Q&x!IXWb%Qbw^?6)HLh8&3f$sXqlLfH4sn%qMg z5jiArt%n;yANWt?7r4#fi6!je!l6Xceb_u=QQJeEJ~6JU5i40eidWt{6vTEvCEpE( z*rRJh`MmpN{ob(GY*%NI>LPidBW0DW48qsKMk{%v9jKv|T)bbV?@y9ri^=4=`9}oz zlAPVBGX61q|5Ddn7)h{@Xl$`UW1>6T?mekVb6U2v>^@?Q@pR5pBql0OrKfMF!OPi` zdORBX$EAF}#k6cpEGM)*Id$PF63hGZri~5WO)<~pd8)F8jif4$#0G3vx-GNepC074 zPm-NIX>u>&%8DTy>Vs?I--&lDErjflGhLa}l5L&7j#VSx{}@XDwuzVjZRH;%;EFp@ zJ-HY@iF7v&<{h0)3?U$?{*@|+_-Mr_9ctOxOJ1*Y6+T{fKYrhm{Usy5q-Hwe znD-a$4%4w}_{H{B+leYTVYs&H{3b82;9!5F7d~dyrU%BV!Pg5^JzicdUOg?qF8aSm zKYqEz%i{mV+pnj2S-f6{;Fa~k{;6Vqkv1$Hnm8%%k?0>714Xqa(z^&!L;bb2d<%rk z3WI8UzQ5Mb8MqYU5)%W#1C9orZn)5X#($8yv4Hnboo-aRxsm$RqbsH~cHK;RsB`u< z*`2j;t7~ZEgA{T|cXKYempzC?+Z8Ysc4Q@)y|;(FT6@9zRqEc@5zmLUKd13UpAKGR`gdYjW_v+Q@Y^*%i3m zpVVLWa+Hh}z8*VkPcb~d2%hOW;_X-i#-r=Wsj@J>gdCN@c+~BIv2||dNHUobZ`F?1 z9a(eKLc_lrV3gWS65-?2`4j8S|5-?ymb!pPG`W{Cve;4d!=jnF(Ys`4at|yZXZsvgKm9%R(!hBMYQUYtb@~y% zqvejx!~<%HOEzTychF?n&l~OYUPybvL_WwOgFXLlNxc#_RtZn7A)kl-$1)u52s)Wc zwk<1qSef@p@5esmslGE(vO~G2^I|fDmt3M;vv?VKb=_pbFS@OkcSH-%W^BXax`zC9 zg)`ng`23LtC)?lA~Ajp7SGrQRuObg3T;9AKcK1T@%Z@&uxC~G^L$jiXgKp;TT5$$q z;lnxC`rcl0sNY}jkS%%gaIBAuYfns?(nW)mZECt(I2c7Phw-cTMZC3ze42dUwA=Q5 z-f;Skh2R!EXBdm!>PhZIePwm)4z-a#ExuuUkNuGkaCyx}VDYewoD2AD@!mRDxHIC= z7IMK%#&TRrgu7s`3Ez_iFD4wWNaft2BOBkW(m)h>RL{pi*ci?Gf#{9$M_b^y<$lBd zHFIVxhu~!CYyQ21N%nyI>4}gE0miqwHV&8b!472%B&I-RBBVh0pwgc=l4g?e$s#EP zG^`|+s&!vik_)C$>u5b0?RY+IA_G3{*x5ap{m=Nj$-jAyZ#pr{QUAwgm%InT-b&|2 z!&n+zP(NNxVxEz@gHQUeCm!T!CT_qT;!&_CMg@QCuty&2wcCKRoHdC%M#}lh2{}j} zHp1&6@LSqM$ku`39p!%k8RQLac-bT%@0UJEg!lFEYUeS}07vfAbFVM)o43U8=Q)tq z??~9OsecX}68F8*@P0L!*h0P^*wdeM!S=OeC~GhXG;pl#*jmN81Q=x;!O=T&jAFe_ zb|`2p!}m2m_K<327bHCOWrIWOu)vnnt6rK5mJd~4nppT{fv!Dk^Xe7#Yk2>VS$kI$ zhixSf!iJRma&6t2U}u?LTtY@?UHw5pBWqvInceb-_3q*`c|_|KVebGosD|wE*}Z7GvB6yf zX=DlM$+wpbtu6%tz7+V~&f(}qzNcq9LI&3b2o5iA0^x8H9Llms&4Vmi;?^W`EP?c| zbq5?-WZMa4|5!C@g#VeRKk4vBj-w-dh2V^QY*PR}o4xp4_ZQ>EiqSdX#YdlBbT`Ka zmoxa>Nfo?rB_P(3@*Ul`+7rlwj;ppu1G}1zFNB%pYF@RGJShD&6P}OTY35T76r#5} zXog7v3)fdqp1dS~A;rhu`|8rgB)f!s9|33UUA3?Gzz;0EYJ9i^EcYvaI78>dQyczM z{zBx0*Ph}XrJ#TozTPgpFndt0)Eiz|uswg}ryWlz%tx{q@?d!ZO7_|I((hJJDQeQ} z4Jr_jFn2rO`~{CHnlS)QG+dtL^1hID#OL<~pX7X>81K8UITN~Ews|C&*yfTVhyY)| zqtq<%Yvr%@li@X*rQ17prH0r8qal6kzs%t5my@9a__HtyzS%e*4hZ-t z7*xBvB?j;^eXj_IFo*I&lw^AnwDOyM$YNP^@JX4&<{F3_+89e*W`3?d84fYQjoeUq z^nsNQ>V~P|oAilQ`!!^b`9K{>8S&cHlg;~)y$bS|61!w;C~@}|+WQy}#{j>=OS3hQ zywXn&o19Ah{a#*CQFcJ??I^)*6SSi3X^BJ4*G_~HCGG$pNcPCd&nJOUwtq4B9y%i) z)q7#);04*VpB?EL*{zuD_tg%1^%4gMLF0?Cz_h}V|%GTc@tE-Qb~7!-RE>SO!t!? z@mB3$Jkmq{7z&@S+g`kF6O0TT9rmynznpNomuO(3?7wC3iGlpK_O-~-Si+1i3~fKP zZY*+4<|-pAQlPeQQ1#-HUdd~m(oR=D0)w9UW+W&f- zu1qFhm_}FKZd(_Slam9#4O|Q)hjPeaDu$dDO_xAXXi!kl^<|`bxD?6Qx$@8#BPNo` zYa;kl?x&B+FX_oCW?%8b{d(R5Oai|C#V$uGxu}980WdHwgVrN0|AF8u8~82o<6t^z6ghXNym)almVBD$ivb%1IVuo z`O+7T=f3PWob?&|S;6NNoHia6?y4n!_IWmeJgVe=h{fW5e4gn_EnKREIls1$GJCY- zcp~|6HhB=L<>Oxq1J8wWah)M))nFkA=LM4=>*Za8+7B(fhLlTCeLmaznd&Paaye!^ zDdg}{vJ{d04Kv)#Lk^|9Rh`I;4d>pk8?&`7VR}sR!;3NGpmK6M7?)}5 zVz9?3W-HEa{&>;6ufr1IEm8E1LC>LX%j@Zm91?df9{AcPu{Q>6SQu`)&&O=77M*Pw_DT59BZG1)a zXW8V3j@`Zpz@H1>BC@CM$T$=yS_EHu!JHcluj5(Adpxtn-t-(XkPpJ{w~cX!HL-2c;NUWH$por>VZ$r;>+FXAxv?CM>W&tZn_|aWmfYRacSE$# z2AL$v9&4PeBHwxI_?(8W{8TVIoR{p8aNPh&g9#2R`J3>1TC(tqT>Q-)QqUWZXceD@ zgDhZSuq%!{@_-$p=xeI47rCV_<*5SlKMLZRvdH{-r|g@^!*Se*75pD~oiack?23m^ zjTxESq9eb_BX1W;ww61h!H#frJXvzWC;8h{cM$RYwgtp$d+$CaS*QA{1EimQyJp2n zA-~z4wk=cE2IB8ZX;Azg56)S9$%~cbkzx{#%{k=X;A`*0QgAFwASVwKhD*p-LU^#+ z?ht&H{Vnwp|B_AfeFrcpe2=&uk-Q1g)T8a>pLkV8KlVE4rLH_UxQfiWteP{VB;rK% z))KM-wB$nlpJ?zSrxu*^y}aJO0ZQQP+HXV4m~-v7Dpj$8bHvV2k_s#Ny$#Hm0PANM zIM%^rH+Gqa>G0}iUC7=PXFD8da)Sy@>w5&!&V-YW{8TOYM_ zKgEpdouWkYp`09Dd}BWBQ}SI?D;)F5NAhpW1mwQ=x7ja+*$rM#r(IZjF$#Y33LIpw zW&|Hpk#jNd8^^)B^SL^X-{-@*4Nr$oQcfXce!QxvD1bPVaxmHa|9kL_3C!t*{}vxD znBo4j;`R6`8C+U8U8lwb*Lwl}R}HW&O7dREt|nFUIX^z~YTiFK{lRZ++2N9l2KkWB z-ln0b!89-h1&!p9yOER8r9nYEds^W}?qaDsS2kOLIg^eojLr08X!S zwU7DCFeA8e)vOOG{puJ!tI%jpVBa4S47S@EP_ z($0{V7hZ$=KAFMXcozqEyt$V*ayLx!5&JDUc5j{4l0H?olYYcX%pUF(NaQfK_UiTXzaGx6j`_|DaEbHN)@C>t#% zUxdO`5eek$;4>wEO|jhy&j)=TinPHQvTX&aPI=b)Qe6XloK4=20_~3FFKvRM*=5*- zsLz%nH~C%$_e^FpQJb%-pXH0;8V9AWl(@GR!=5PXYw7TOc_KVobxlKd52RiWcFrSv zYZ7=L@&_NkhcbwM*m^BRxNwY1BBM+ae8YVO{p57OLHVNrd%?|h}i!2zjG=b*~r`RM;ROW<}iRC)1lHxtG#ab{u6kDSOm5%U4RhJR6c zzP?As+Z91D2TmGcBHGP1^A|GhR6{N9^-P)k`*qBrqQ41)8xxn**N(T^%lZ2`(yh@@ zUY%A1*F=Q6n3dUT;H@=6n2XmrKH z&FWi8f$;JG87V&ytGJl8!%P&;Z1Q#0e?{|GiMo(<$z+2mQu1OjyigS)hx2phkXIXc z(F+UVTJ`&lPe)%bBL4{aqW#kK z*O!b(iiSJX)Gi76q2Z)jo-$nITz9grHG+m0m%xhw|A2E@Z>JoM>e|GAlWA{$x)wq$ z@TzQR_1$*A1y3&^5eDgh67>85!P||z_S0hWDA3Wq2;zgH$q(`R7v+3-Xn(M);kG=4 zJlfm`S(`4XzN;dgenm{Mg5R6uEKu{a{mEbFLt*&aWTxg)hQ_7-Q-2lD{eKmmbzGF$ z7sf%lySqcWJC%@bX+a5*md-urzVFPyFm#IwqF7*ecXzI}wrl)cU0ro`UH#qt%-__^ zd(S=RdA`q~0DGf>a*e)6bi`GT=d}lTu`)K0hr`Woo94o)tlk>p#X94=f;3(@uF)oW z&sC?{)5QM+MFa z9C-a^K{P>^WN1(M+;=ljXDh)F&o<%OMH9wryl}X%H*#zKOw$%mw1^;5MkEiPCbBhP z`9cvi^*ZL`xp^45J78rKJPzJC@m3*jaTEo zi!wHFY*F4oM)EhU@`}EgNO&RlQf$x;R#|Qd#cxWRX*1^cPjp}0^r9CcSADjMZhD_h zlwiL*T*xDI*1BVO-glon(~9dywdbnJwRAJcL~7Noz)?ek+( z89f}#w}3zs--NOH@IePmaKIZL>vpB>aWzYG zo^vBC1vY_R9flpVJ3TsGoR`UKVxl*^(Z8<}^}6(}xW`x?d0+}`N_=1Bi1#5iJmuSF z4ptp1)A?k*EBU3A>q=X;KZD?E!!rrED1ToISIT|J>k=A=jbX4hmUP& ztgwTht?<4I_KTcIz9Kp^8*7mXJLOV}JnI>s6R|}4!{PyPe55@nh9?ZJWhFn3?46HC z%8s^@$MQqSn)Js*XWcbd*we)5z{H|GVMDV>Zd>TlQhXZPFsM)_5G5ZqzzXg)z-D{c z(+rPgK?6n&`^9i`g)&|W7_26}RXtWo6XKU+-l!8+v|q>@kEy`zI$u0cFky)n?OJfR zh*)lvz!NHec+t{16aeYBJJg~IW^>Gf24pC(py)Lzn4Fb!hjaZ|qOs4YNPAoNO3s>8OKB3wR(dh7U~1EG+$Q z0h5%@x9?OW<8vYG6p23GvZfj@RROaITlR|dE9G_vbDH^87BWl`(+E=A)P=x9sXbhS z6D^Co9l1{M%Fv`U1+V)l(N;#qT6Qw>A{{jC-YMdR3cM#yQee3;!|~Juy>JRO54T#=326}z&Q>~#Agy)M><_>L99H)YFlVbp1#s|i+g*b2_D{Xw_tDnI-xLY-@H+HUxwj!bLeUGN%A`s1P;Eq>eYh zy#$QeXEfo*xU`1^u}-ARVnT>fIUar6&~J?s+L8ELo(o^J)6OcK4NDT$CagW#)+BPs zo2z#+&GuajZm-FWeP>gTzErsK3xJvCyJ)U3qM-}uKVw>NK zrN7loC-)0+mO@7u`Cra+!{@d5ht>}XxT1jPO4d287u~pkmV`E~z0$N*M5`y^c&6MG zW5!j?HYk%5+W5TY^lTU{YS$Poxhku;%Gy(D3JzN-@q?9=r)uWXhwWM&`fsVmReAk& zJuP2~A!4oOt5)0;9Fok)U(IA;6VuT4E?Ms1*&aCKQzPw$XIchHg` z1I2$-T*!gj;yn?JD5&(xER-WZsSmuWbiZ60nV0%es)Ss0A*EcY|~ZKO5E>_Qny@bqQAV!Qj|J2zjBV0iT?p!}yIXUk_t(c>%33d#V_v0WsNq z#zK(~_2+R=zEeT6Wc{oQ&-7Jn{0 z8ICh1*OmTfO$Np3px#8Q)E0ZPSp9C`ZRVSf*uTKn#s_>?x%E^NvV`Y?Ta6O1e#v4rXaGZy z?Z`7?;|n>?g@uYlgR3VEG!3MOgQk!PoAU9OM5;uVnQ@(k7NTi&Z;(5tfEJ#%#^_Fc z`&|wDlF=e=R)5CU7_id+$4QYb#2v}_u?lK1Y7%4_9W7mmV7iPocZh59YE6oSL2`Tf zpcDP9iQ=ee*014xA!OAe;22Vg^Jo;wPX$WfwXDd8WkQs=HszLLeJuvT(NMV3vL-kf&bs13 z&mK$iLZ4E47H7oppg&H?`oReessz3_gAL(BPMk~%{iNrPFY3s)Csq=<2W*`}!ibL1 zENca>BDdys<$y7i52v+z;iTDht3x`>nEB+A(WK@+p(+x_jo^ylxzt^vFcHlu@AHB) z{!@l-+^nP3?ipBHw^9*`=w7qS$B8RqR#8e^j79?6e{y0#7txx&+5{)*3!F+cxL-a+#O5zNIFb2lQv*KJ!s`~? zNLx!?;-fSj2Y;v`9ltGM<>CpC&7!afuX@A7=5B-4p0De%1Ou?waBb!b=!#h;(O#j2 zqI=31dkF6~(1nCD_kA9x(up=~iQQfbJk@noNRumE^VjmYKZ;{EYa{|orlr~}X` zsU4Yv;ds2TQRHTRBbc|VQwg7OST6+Opm{y^wLVC$s9*w^p%UcP(pFCUw7I|@kJDe& zcdHLjOs2#5Nlkn5J{!y@osO_C`LH2esbXW_c;d|fxZ~0nZblMFdnQWW%8Zw(2~_$$l{*aZ%wiHxGXSnmT5GA?6r#s2-1|Tz#Z01T*V`=+I5U$#V&g_%PaWr1fzElN13@TZXgY zLO#8H-&Cy&5E|0c_-Z7i!m(8Cm}5y~ReQ8?JN>uTy9>c$#2rVgMAAZOb4!DlNaVf?f5|G8AD1trqo(UJcQWiftxP`8Uz;^y z);o7VB0fHzd|8sfsD!PQ+*(d~OLENt9;z`3Z9#p_G}ZXjPYpSFcS7lfMFynFw!{fd zwiT6UlxvO@l4m7{D2b@nSKVXL9s!^02**6wR)55ECYDp&x2UGRmYhyE;Z%qYi?x-J zH&sjEW#AHYB4N}{lY>qf){ifyyR=^l$`jy!#WXJ{n|&eWVXGlqy-en_V8JPr7x~D44Q1755E2Gs6)A`hf$<-TqNW{6Uer^nGu^}a5oYPHEIiJ zFAjx{WX*o>@w5)1ZsFB|zePC{CV{$=x+mv;*N@<3!K?B?{r84jH6a<#WgLuUc7!E$ zmD5Erl+Y$$DtA~-ItYRVa)lg;ofgd5SXgDeuRbb#Kpr9GYQpiDdm;E|%-Qq{*1NRH zNa%(5P9=o0y(KdRM%b&14--kJ>P}UZ`QQkr>bRPMmRRVIC7(@GVY?D<-W~}Rkip;G zX2Xk0S{_%-eUX|%;jP_nk+5$uud=dDstCWRGdlCgX8Q$EuHO_)z1g8D2dXs#;)d6PYdkB0;AccqQI3;#;C|H&$4VxCz$nQDd zuXfv7$h~TuiGJCzQtqn}{9$!={_>@GUdZBUV+5>7V;(E&9o55$d9g6r!5oI~v z=(R(WnJ~lS_L6YddA&WfNyB?DSgyXqwqK6WkwF;iF3k;o|LDMs=bqq> zx&N!5R%)-slkrTkF`d`vR9p5Kf3}}8SqhJGjbXd( z<1FZG~E5pe^a>Z#+S^HdKFs%5WFL70}Et_nRrJv9- zacsSp4C_Lu5rS8fKj3!zy`IP^Ia|?DL}Z3a$qRetipOX+T-M<^9ZwLRfDJQryO6{(@NMan`#Uh-IBZ$c_cDfx@Ra82mmP z&d0-h+jX*dw*@*EpQ^&G`S7ITYv|vZ)Plci0bSy?UD92P@Vv0c)meD2V1V+;-!kd3 zhVlbJ2ZY(iEji?UGe3j;;s_Tj`W7y)+?;w)>sSy8^XY&SN)xj-HjuCW4CPW4Ceqr2 zppY4+Zp$Jt+fqr{_iz_ZiU&MXJi4@V0VlI9+W;SxZOLQ{+U&GOKF8lW#oU+Kb=5=fr*MX?Rpa9jue%BG*hl}fl#{o`%@l*?fo_}HTJu9a23TR~n_ zp0FdoN3Z!*Qn;)&?O;4a+L;n@&MZ@{zudKnr)EbvBh%^4k=>^af*f7kSHG^hw-)Y% zan1V{u8BM8Pi)C*YedKKx``!ZKmqtZF7UPKWQawf0p9{YWHI|SV1o$!rUbWBuB1Ev zEWI8K>4vN~bftDG{lHm_oD&n)mJd@JTN;^ZJJ?Z4E?dIdrLeUXu9wZIe$Rt7rFf;t z|BBvfT5Lmz;_7N%aetEUe&g@*Lkr+o^+`vV%xU-RiGfDgroN8E@3QI_KeWvdS6O$S ztpu5?J!0S^q3Q^G=2E5jh!?ywhy5w=Tpu2`faCgHrba>RdF{HIV9Enk{b*vIg!S9e z0&!3Cp#6~V75a6b=8_LWduj&@MpNNU1q^r{mp`T93&&iE(_l;R`keV(&nrf+6PODj zM0MPq++L6dW;Z>)_!o7@F%LBUk!OwR(Yj+v=h01zaawJ;?sVzWFAK1YRbdXrON_Z~ z4S-kki+0SNnz!GyJuj4_y37(D8sHm=Iyg}X<8!;`B0@^6iB}leZ6+t19n>-)ao|q z%Hb|1i-_-Kh4RS|xY&s2UH0bwlh>0Buj)BE1JSrpjXL5RW#HGGmr+B?ti+@o%+#sy za!C36ZVK>;^+v%zzGPQ0o)^C@#C64G^s>xBkHYSFcg_M>UjvkGS|5OslPYX+TiAi* zf^?!!`S3N7_;7I;Y*40>A7!}>M$P_-b{}_y=dSwR9nMt7D=CNuGsY4g$6xiPiv*w; zm8V+@Fisyn$J|c`cNe0u-VhonjS_PuMhiE}%*xMl}@ z-Ojvj?NFFegsw&Xvma)Cjk%+t0IGLAh47CCXwzNkj_)W)6x%(k-?gi7C_tcrol>{C zrVo5K1%A!#OK|u%VC`Hq0~t>CPWhJ2 z!-;wzVGQ$dq8caC)b04C#Wu%Vm^lxt!BUy|rG5)f6UP(br50XNYIE$65Col)!Zli; z-;<0l4Ry@W1Bd70FHIeD%WqVK^IAeO-^;n-#n!1vhTed;Q-?*@2H^fBIJIDl`RPXD zl!B8MF1X6r*FVR!3RPAY>ENMO>g^9Im>&>Z(_sM6s?{EzvKC_xsZiy}<($O|7kII{?;n?GH1Ly|8xXy|85lai8C zk*scGdk|xW6YlyrEMYweme^z_2U@X}FyPu&i+@+FmE0CKrhPTy&y>|p!-1%WSB+>| z`DH+74&nF$8`9gXway;n@L{bsE}u2zi`^FIOn{e2eyI`&-zVTbP$ ze?@x-d~)wew8Fi@B#mc0QcHRR*<`y_Zp=1SW}DC1#UEkk_?a~FdBUrJ2b?n=@X3(|yJ$t0hiM=|ubnWWvK($eCH3Yvh# zr)K7cH=r?WSz3+HXu$kwG!-`P`%aU5cqA!}+hT?*aov(b1p+hZF~);7*yFIGQ1-RX7BJGYl zU97X&3myiM8POMs=+Pri%OT0-86QYElFV`!d~?~EIZ%X?nOzBONfbk8?Qy0SPUH=j z;~N9qQU#MHxG@*EE8r%NzwCC)P@jneud4rKOL%(UUHZ&+nQfJyFi+zIHGhr)YGY4X z?4k^*&gCY!V~<~Rcxtcmdh@t6#gP2808h?2CD9>%(=d>_On9b_OiO|KF;N&a4}@Q& zakD=;Wd(HUz_|H>W9Y$xgjAQYwmh8&^ ztrBCi3Uy>d6LvYV8OYd)G0%%-dXm5bnz&`C zY-t0V<_XGRLX0)pGWSpmsemVexYv%%*u&0x>WVU!_vz%UM+REvz;g+KC{+6}<0>zV zFv``2oQU$K&t4J0EL9lOKA28!nQ~MZS3H${EAUe??MvUaA|=zbiP`?Dj|&CaKg&t< zMipSBnO`GtOq_X>I5Vv_po5uIsj+36W-XH?GZUP;^mFdjbS8&)v|RW6yYzHnGo< zx^Sowp3A}CRc~8<dJ(nlWHn}Ac! zG^P64>wq%$%i(xk=qkUd#bi{GVD_M!OJ6g`hV(n(*A&^7qf+V6XQa=nz($LITF7B>nDXL_kd>N4 zo|8?f52?tvIvAfO;V0!aZXG7Db|Lx{Z4w)3ke}3pyH4=d3Y>$_E0_~KM!N~N+pD)E zQLVLt+$-beIrYfQI6shUlCz;`{y8xtc&K5J_ zWu4RV_}rU%-em?n%{NZ$6kP!k{mCBJw!B%;7Z%Fwv+Pm&Q4DvS5q!yrc;fc@Q0#EW zU9cZ|JDGpo+ThMXC@g8M&^~q z{hL(;d&JE2@v7U1C0iM|mhnimQbHW<^G?}mji zT`wqNcEr7FZQN$1N9dZOokBP9c6{;)$>AKvZw21=Z`b^k>P5fV+l<}zSmm3_WU6zXL4JE~(*cBUtSLSn;`VwDV@E))wpFEVuDq(d_Gc9v7rnJ-J`B81G zT-{)`8aEA9OLYbfDN0dvPziHc_b zd3l@7X@m3FThAQYMR+dE8KH?B8C`)Qz0W<^jjEKccrNZf*F>*@ zQrCF?Rd~>z^&g-yGW&n&WFmSkYg7Ur*FMU__9EPyJ(GA=>`f`$$mHgaCg0J=o$)y6 zvNBV8KZ_4t)x;z+tF@owmbGd2GZbTJvp?F7jBVxfEvPmtKg34RXc94&={VdC78m zPhKh}&t}gA<6CpK2{oWsBTTs*mnp)|v}h&--c?gT%pZv2v_B)ot?00THR?#=?g!QU zgu$iwDF=3C{Hjk*8!Z=xljZQq8@s%k+iX<3)PHh3@t=hUQdZ2}tFF#WE~S(B#+(s@ z>fT`dH)VGmbXRQEWCVd^TP66x<2pQD`V(W1di>dP(+(dvu8{WM?hkS^Mr>3pM8ma| z?`=X{I-~n_t5S+zepn2)INs767jvyxW_QbxE3+z<}0~7?z!`1=^e;Ck^F2k)Cl8V zqbc+=80OGHb;z~G^f+uU*~xC1XM+1Z+U=zKqD`3*=}gNBXJ!^_ zuGp4m{Ln!s*{PId&91hgR(4!Cbb8ZE&Fr)e{n2}6I}GTiMfJ>%8pFNL_n%eJL*YF$oQTTHf$qsBU<|R{4 zmpl}?;IK;kTGYKlH^L9Op|Nc7{-n)?T~SSNpOTXq!s?yL_{Kc~elw38@f453moiMR zA!tQuWAl@AOVSrG8A5|#>r9TQi@`<3zEJWuX{86e62fB!U30)4^~kyg$n%1=`E1p& zzcom0EnzcLubLJzU19yyY+S6e#ttsH?RVl=vswe%%K{$JnM+rT(f{#VEe?54`W>p_ zOT(>vJmdso`uJd8Hr?Zp-KRZPejvC$y{gT$0-j}7dsObQB^}}TIG?OC>Z7p)5{m+< z{Y%70<5Uz^8%B{1it*U7I+i%UO~{?GQgx>uR0?QhLA%@FlHED@!?6N1y36|J4T|G+ zq2n2_J_tr;qmv!5%BJ2i@M4LvrYUZYgD!9Uvy!dHh91=!Yg}Fg*TuN?xQOTum*Ar% z$L2p2L7V10kNWka7UKGvNL|H_P|fXd!PHTYio zAfTBYpaG>Xm=QjuoDKg59rid|i|Z8OFZw9t5f{7?&x^(zGO%8z^@j6jN}()sd_v>W zIe4+LZ`PkWOwjH`+!4sD)!VDoZ^d3|X0N!vUb?}8N}_Pg{eb>J4v4bT`Rtl3xE{UV zpY&yXk7J_%BKp**sUj$6uBCocTCRpx%$ZOgmDA~#mzS0>5|sme5zh_rr9I(>;B|3g z09#Xp+7pFr8Gpuw)`Ddc%atgA2aF=sl^)ps6L~k3XbCw1>&FbrQhYd=A3qt?)#R^E zdKNRsMcMp;dSX<1C>6WhsZo2Uk_o;uSxB}-T-G`k&NpW!ydE)y7Hvrq%tY7|eI#<^ zt{a%JLO7W|=&{<5Oqrc=Wj^^mjaow$RPlXMxF!5|U)pt^l|h=!1ZBPh6Xe~YWQb~v zk80^=Q*t^K&oBL4wyWf{5{L^V6_|hOJ9FWgiX0pFP>^`dD1+7G%%$Os6nsR}9>(}c z2d^u48y7M)uvSRe?xd3CU>H-4vD{fnu%u77-Os3fF|)~#9&i|UpKOBlBA*gA6~YQ1 zThcn9ZKUeL__r5Fk5um#p^IJBxsLF^biV4-0CItfl~Lqe+8Q^Bu~61Of?N!P_4Vs5 z$Q`#XFSu?CgPt&Kf_pOtRfi&i)nlq+&)H~_HhHYfe`p0_seDG*uwDyQBB+GExD#Gvh;oVA>$9O(8Mc=5X)IhtCBRz@La)p|~u7JT!x&hGdzQ z#dzjf@keEU1;FPZ!p}Pm2Sqxofu@xn6pt*}mPigQ*`BdgEQ?!6tYAj~ympP;u9SB} zJ*j0xnl%Dee8X(60(2;_GVOU3b)kuM>~a0>^D~&P>C6v8IeM`C9pXoY@2haqm2r#M>vU!X zUGRI+cBphLm*F~ab&crCmCBA-3WM%W!~LzRe8c@C6)2J3Vee|Z!H}7pi*FZnHsV_| z7&0bnm4%>333ktd)Ag`TlmtU(H2jtFUpAI*TRoh|(m00?v(l!Tk?`CpHlJB-NgGE6 z7uR5+)xLCt!A(!nBME0T+EYlUCY(;Xmjq}FtbLcRCEluIIw-d>tHT((tlHD#RcI8xYQ**6y^y>Yb*ZgxCuwAs=GM?~66mH!GC)SXs2L@uPl z{e>K`fz+Gs3OO#D2`fu5kWAN{E%k#9^G-AQ|ETrkW2eJN4A*csWubkmpfl0auXH?*g148Wbfy}pVSMLx?KH9?sv@>>UCWh$tyFG@luZlMy6)1FDHv%`fQJ8KF(p3+7`VnI~Z}fbd?>=yQe8~MW9w4 zD{)uKGRL#VFq#Q6N#u+Wa%Z(WFxuM_Q9+)0Cj5tCFjo$Sy_gI6gj=xF5Ptk8Trkgb zRup~K7DxxewUP$R9zkJmvU0IH|M~yFii4{DP{hit@N;RU5EyM{*uV| z;=BF1(LM6Gv-Oq+`5qzJGpm82H?FAc1B2)x=cjouoM0CXeEJQvunJT0A4ERut+n7U z2Ib#~FC3E7VY(ZznkP0e9}d(|kvWj*6hf8LwHKoJ5=5*ThC;=Kq_HofPkAgN!GKZY zL>msyn?s&@UJ#LzWV$0@h0Fx*>N&cws^OF) zek%;6;@Jc=?yc*U{wsG`a3~Hbu9E~2V<}z}9CCp}k(WG|w^EC1P6!-##CHFga`HqE zp6UcLZtJa&Hj?AT9~xn~n$a?o1NP)l;E)J>l*PhDct&Y3mZLS9Mi0BKAiOh{^<>5y znIQJQF-LKoB~(W|%LIL@404h=^|T!I3$3oOA_7%%Qw?^_cQ*W>5{n0I*|$Fbsfy;% zRnGH2zO+rhriS?tNM{^e`L}JhQ^0M5XF;jZeVr@0#jP+DGthG39$OdEoq5^2x5J2?i6!%(<47lTe}d^2fNp9ddR>pffwZP+lM zorgX{1wS6RP0BMza_)=w%U837*Ylg(hNg<(&odSzshuK7V zzdf8$J1qwJgJ~Qi9LX;FoYNh!QI0sA=!>{l#yqa#RWXjM+{mst0Huuf7%KPfbAt)B zD=k20Oyb%+eRK!B;jTD5pgQdIxU@Tg{a(y3Ax4L!X^=*=>S+{DTuujnH1XoK z^CH?Ee+VK<$y<@WMB@6yi|ICq@3)yh7jk{TnXZeGB(rh4WHns_uk<{XdPJi}g2as# zpn8l7>gA-RFK$R`uhF(}PWYNuvb~Hn!I#F4SS}~T2Cc9yi%|KJ$h7GS=6C7^AFhKp zfiQnip1i1l9gbl9SOmIK@DJsY_{Q|u39o?2Y=Kubc0h6H|=;!zdQ<$rC7JktD|X;(=k120%dJRiC1R@ zKaGE32c>#tV6;w{9=Y547}Xmc04wv(HNtw~RSOp^FYVT{W@A8%Ssf>4;iI}DYB#-j z;FngIPJm51d`);BgHg+61k&sX-FfQ7_lo95;*HRkMW4v?##`#K@K%f6r$ME;dq(U% z4{8d9}=Gg z^z}e?vkO`1uj0ytch*It4pT>FPtb2|_vC0(e%u)a*t@VVm~HH> zX+RHN$%xEFaoTadEhbOn0S$%=CKcK(nY7y~ZH?5WUuMH_=A5@1cY+NiA%{X49lSc9 zJaQ!iWfU?d-&2<7z`q9kmN|p2e`G0|!H(H09mpm6r^aqqB^}MsAuO_6;h+&1wC4?$ zuJzMDKaahY(q+q4B;ReCi6@HTu)^A?)?}y7$h~sZ6fYHH&#H}>rlUPgXWW&bV9Ok! dOZk+I`rOt#ziGw<+@G;F9Y03F^JoD!{U83+@J#>! literal 0 HcmV?d00001 diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v2.bin b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v2.bin new file mode 100644 index 0000000..0242c57 --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v2.bin @@ -0,0 +1,5419 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007, + * 2008, 2009, 2010. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + ------------------------------------------------------------------- + + Xdelta 3 + + The goal of this library is to to implement both the (stand-alone) + data-compression and delta-compression aspects of VCDIFF encoding, and + to support a programming interface that works like Zlib + (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic + Differencing and Compression Data Format. + + VCDIFF is a unified encoding that combines data-compression and + delta-encoding ("differencing"). + + VCDIFF has a detailed byte-code instruction set with many features. + The instruction format supports an immediate size operand for small + COPYs and ADDs (e.g., under 18 bytes). There are also instruction + "modes", which are used to compress COPY addresses by using two + address caches. An instruction mode refers to slots in the NEAR + and SAME caches for recent addresses. NEAR remembers the + previous 4 (by default) COPY addresses, and SAME catches + frequent re-uses of the same address using a 3-way (by default) + 256-entry associative cache of [ADDR mod 256], the encoded byte. + A hit in the NEAR/SAME cache requires 0/1 ADDR bytes. + + VCDIFF has a default instruction table, but an alternate + instruction tables may themselves be be delta-compressed and + included in the encoding header. This allows even more freedom. + There are 9 instruction modes in the default code table, 4 near, 3 + same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the + current position). + + ---------------------------------------------------------------------- + + Algorithms + + Aside from the details of encoding and decoding, there are a bunch + of algorithms needed. + + 1. STRING-MATCH. A two-level fingerprinting approach is used. A + single loop computes the two checksums -- small and large -- at + successive offsets in the TARGET file. The large checksum is more + accurate and is used to discover SOURCE matches, which are + potentially very long. The small checksum is used to discover + copies within the TARGET. Small matching, which is more expensive, + usually dominates the large STRING-MATCH costs in this code - the + more exhaustive the search, the better the results. Either of the + two string-matching mechanisms may be disabled. + + 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue + used to store overlapping copy instructions. There are two possible + optimizations that go beyond a greedy search. Both of these fall + into the category of "non-greedy matching" optimizations. + + The first optimization stems from backward SOURCE-COPY matching. + When a new SOURCE-COPY instruction covers a previous instruction in + the target completely, it is erased from the queue. Randal Burns + originally analyzed these algorithms and did a lot of related work + (\cite the 1.5-pass algorithm). + + The second optimization comes by the encoding of common very-small + COPY and ADD instructions, for which there are special DOUBLE-code + instructions, which code two instructions in a single byte. + + The cost of bad instruction-selection overhead is relatively high + for data-compression, relative to delta-compression, so this second + optimization is fairly important. With "lazy" matching (the name + used in Zlib for a similar optimization), the string-match + algorithm searches after a match for potential overlapping copy + instructions. In Xdelta and by default, VCDIFF, the minimum match + size is 4 bytes, whereas Zlib searches with a 3-byte minimum. This + feature, combined with double instructions, provides a nice + challenge. Search in this file for "black magic", a heuristic. + + 3. STREAM ALIGNMENT. Stream alignment is needed to compress large + inputs in constant space. See xd3_srcwin_move_point(). + + 4. WINDOW SELECTION. When the IOPT buffer flushes, in the first call + to xd3_iopt_finish_encoding containing any kind of copy instruction, + the parameters of the source window must be decided: the offset into + the source and the length of the window. Since the IOPT buffer is + finite, the program may be forced to fix these values before knowing + the best offset/length. + + 5. SECONDARY COMPRESSION. VCDIFF supports a secondary encoding to + be applied to the individual sections of the data format, which are + ADDRess, INSTruction, and DATA. Several secondary compressor + variations are implemented here, although none is standardized yet. + + One is an adaptive huffman algorithm -- the FGK algorithm (Faller, + Gallager, and Knuth, 1985). This compressor is extremely slow. + + The other is a simple static Huffman routine, which is the base + case of a semi-adaptive scheme published by D.J. Wheeler and first + widely used in bzip2 (by Julian Seward). This is a very + interesting algorithm, originally published in nearly cryptic form + by D.J. Wheeler. !!!NOTE!!! Because these are not standardized, + secondary compression remains off by default. + ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps} + -------------------------------------------------------------------- + + Other Features + + 1. USER CONVENIENCE + + For user convenience, it is essential to recognize Gzip-compressed + files and automatically Gzip-decompress them prior to + delta-compression (or else no delta-compression will be achieved + unless the user manually decompresses the inputs). The compressed + represention competes with Xdelta, and this must be hidden from the + command-line user interface. The Xdelta-1.x encoding was simple, not + compressed itself, so Xdelta-1.x uses Zlib internally to compress the + representation. + + This implementation supports external compression, which implements + the necessary fork() and pipe() mechanics. There is a tricky step + involved to support automatic detection of a compressed input in a + non-seekable input. First you read a bit of the input to detect + magic headers. When a compressed format is recognized, exec() the + external compression program and create a second child process to + copy the original input stream. [Footnote: There is a difficulty + related to using Gzip externally. It is not possible to decompress + and recompress a Gzip file transparently. If FILE.GZ had a + cryptographic signature, then, after: (1) Gzip-decompression, (2) + Xdelta-encoding, (3) Gzip-compression the signature could be + broken. The only way to solve this problem is to guess at Gzip's + compression level or control it by other means. I recommend that + specific implementations of any compression scheme store + information needed to exactly re-compress the input, that way + external compression is transparent - however, this won't happen + here until it has stabilized.] + + 2. APPLICATION-HEADER + + This feature was introduced in RFC3284. It allows any application + to include a header within the VCDIFF file format. This allows + general inter-application data exchange with support for + application-specific extensions to communicate metadata. + + 3. VCDIFF CHECKSUM + + An optional checksum value is included with each window, which can + be used to validate the final result. This verifies the correct source + file was used for decompression as well as the obvious advantage: + checking the implementation (and underlying) correctness. + + 4. LIGHT WEIGHT + + The code makes efforts to avoid copying data more than necessary. + The code delays many initialization tasks until the first use, it + optimizes for identical (perfectly matching) inputs. It does not + compute any checksums until the first lookup misses. Memory usage + is reduced. String-matching is templatized (by slightly gross use + of CPP) to hard-code alternative compile-time defaults. The code + has few outside dependencies. + ---------------------------------------------------------------------- + + The default rfc3284 instruction table: + (see RFC for the explanation) + + TYPE SIZE MODE TYPE SIZE MODE INDEX + -------------------------------------------------------------------- + 1. Run 0 0 Noop 0 0 0 + 2. Add 0, [1,17] 0 Noop 0 0 [1,18] + 3. Copy 0, [4,18] 0 Noop 0 0 [19,34] + 4. Copy 0, [4,18] 1 Noop 0 0 [35,50] + 5. Copy 0, [4,18] 2 Noop 0 0 [51,66] + 6. Copy 0, [4,18] 3 Noop 0 0 [67,82] + 7. Copy 0, [4,18] 4 Noop 0 0 [83,98] + 8. Copy 0, [4,18] 5 Noop 0 0 [99,114] + 9. Copy 0, [4,18] 6 Noop 0 0 [115,130] + 10. Copy 0, [4,18] 7 Noop 0 0 [131,146] + 11. Copy 0, [4,18] 8 Noop 0 0 [147,162] + 12. Add [1,4] 0 Copy [4,6] 0 [163,174] + 13. Add [1,4] 0 Copy [4,6] 1 [175,186] + 14. Add [1,4] 0 Copy [4,6] 2 [187,198] + 15. Add [1,4] 0 Copy [4,6] 3 [199,210] + 16. Add [1,4] 0 Copy [4,6] 4 [211,222] + 17. Add [1,4] 0 Copy [4,6] 5 [223,234] + 18. Add [1,4] 0 Copy 4 6 [235,238] + 19. Add [1,4] 0 Copy 4 7 [239,242] + 20. Add [1,4] 0 Copy 4 8 [243,246] + 21. Copy 4 [0,8] Add 1 0 [247,255] + -------------------------------------------------------------------- + + Reading the source: Overview + + This file includes itself in several passes to macro-expand certain + sections with variable forms. Just read ahead, there's only a + little confusion. I know this sounds ugly, but hard-coding some of + the string-matching parameters results in a 10-15% increase in + string-match performance. The only time this hurts is when you have + unbalanced #if/endifs. + + A single compilation unit tames the Makefile. In short, this is to + allow the above-described hack without an explodingMakefile. The + single compilation unit includes the core library features, + configurable string-match templates, optional main() command-line + tool, misc optional features, and a regression test. Features are + controled with CPP #defines, see Makefile.am. + + The initial __XDELTA3_C_HEADER_PASS__ starts first, the _INLINE_ and + _TEMPLATE_ sections follow. Easy stuff first, hard stuff last. + + Optional features include: + + xdelta3-main.h The command-line interface, external compression + support, POSIX-specific, info & VCDIFF-debug tools. + xdelta3-second.h The common secondary compression routines. + xdelta3-decoder.h All decoding routines. + xdelta3-djw.h The semi-adaptive huffman secondary encoder. + xdelta3-fgk.h The adaptive huffman secondary encoder. + xdelta3-test.h The unit test covers major algorithms, + encoding and decoding. There are single-bit + error decoding tests. There are 32/64-bit file size + boundary tests. There are command-line tests. + There are compression tests. There are external + compression tests. There are string-matching tests. + There should be more tests... + + Additional headers include: + + xdelta3.h The public header file. + xdelta3-cfgs.h The default settings for default, built-in + encoders. These are hard-coded at + compile-time. There is also a single + soft-coded string matcher for experimenting + with arbitrary values. + xdelta3-list.h A cyclic list template + + Misc little debug utilities: + + badcopy.c Randomly modifies an input file based on two + parameters: (1) the probability that a byte in + the file is replaced with a pseudo-random value, + and (2) the mean change size. Changes are + generated using an expoential distribution + which approximates the expected error_prob + distribution. + -------------------------------------------------------------------- + + This file itself is unusually large. I hope to defend this layout + with lots of comments. Everything in this file is related to + encoding and decoding. I like it all together - the template stuff + is just a hack. */ + +#ifndef __XDELTA3_C_HEADER_PASS__ +#define __XDELTA3_C_HEADER_PASS__ + +#include +#include + +#include "xdelta3.h" + +/*********************************************************************** + STATIC CONFIGURATION + ***********************************************************************/ + +#ifndef XD3_MAIN /* the main application */ +#define XD3_MAIN 0 +#endif + +#ifndef VCDIFF_TOOLS +#define VCDIFF_TOOLS XD3_MAIN +#endif + +#ifndef SECONDARY_FGK /* one from the algorithm preservation department: */ +#define SECONDARY_FGK 0 /* adaptive Huffman routines */ +#endif + +#ifndef SECONDARY_DJW /* semi-adaptive/static Huffman for the eventual */ +#define SECONDARY_DJW 0 /* standardization, off by default until such time. */ +#endif + +#ifndef GENERIC_ENCODE_TABLES /* These three are the RFC-spec'd app-specific */ +#define GENERIC_ENCODE_TABLES 0 /* code features. This is tested but not recommended */ +#endif /* unless there's a real application. */ +#ifndef GENERIC_ENCODE_TABLES_COMPUTE +#define GENERIC_ENCODE_TABLES_COMPUTE 0 +#endif +#ifndef GENERIC_ENCODE_TABLES_COMPUTE_PRINT +#define GENERIC_ENCODE_TABLES_COMPUTE_PRINT 0 +#endif + +#if XD3_ENCODER +#define IF_ENCODER(x) x +#else +#define IF_ENCODER(x) +#endif + +/***********************************************************************/ + + /* header indicator bits */ +#define VCD_SECONDARY (1U << 0) /* uses secondary compressor */ +#define VCD_CODETABLE (1U << 1) /* supplies code table data */ +#define VCD_APPHEADER (1U << 2) /* supplies application data */ +#define VCD_INVHDR (~0x7U) + + /* window indicator bits */ +#define VCD_SOURCE (1U << 0) /* copy window in source file */ +#define VCD_TARGET (1U << 1) /* copy window in target file */ +#define VCD_ADLER32 (1U << 2) /* has adler32 checksum */ +#define VCD_INVWIN (~0x7U) + +#define VCD_SRCORTGT (VCD_SOURCE | VCD_TARGET) + + /* delta indicator bits */ +#define VCD_DATACOMP (1U << 0) +#define VCD_INSTCOMP (1U << 1) +#define VCD_ADDRCOMP (1U << 2) +#define VCD_INVDEL (~0x7U) + +typedef enum { + VCD_DJW_ID = 1, + VCD_FGK_ID = 16, /* Note: these are not standard IANA-allocated IDs! */ +} xd3_secondary_ids; + +typedef enum { + SEC_NOFLAGS = 0, + + /* Note: SEC_COUNT_FREQS Not implemented (to eliminate 1st Huffman pass) */ + SEC_COUNT_FREQS = (1 << 0), +} xd3_secondary_flags; + +typedef enum { + DATA_SECTION, /* These indicate which section to the secondary + * compressor. */ + INST_SECTION, /* The header section is not compressed, therefore not + * listed here. */ + ADDR_SECTION, +} xd3_section_type; + +typedef unsigned int xd3_rtype; + +/***********************************************************************/ + +#include "xdelta3-list.h" + +XD3_MAKELIST(xd3_rlist, xd3_rinst, link); + +/***********************************************************************/ + +#define SECONDARY_MIN_SAVINGS 2 /* Secondary compression has to save + at least this many bytes. */ +#define SECONDARY_MIN_INPUT 10 /* Secondary compression needs at + least this many bytes. */ + +#define VCDIFF_MAGIC1 0xd6 /* 1st file byte */ +#define VCDIFF_MAGIC2 0xc3 /* 2nd file byte */ +#define VCDIFF_MAGIC3 0xc4 /* 3rd file byte */ +#define VCDIFF_VERSION 0x00 /* 4th file byte */ + +#define VCD_SELF 0 /* 1st address mode */ +#define VCD_HERE 1 /* 2nd address mode */ + +#define CODE_TABLE_STRING_SIZE (6 * 256) /* Should fit a code table string. */ +#define CODE_TABLE_VCDIFF_SIZE (6 * 256) /* Should fit a compressed code + * table string */ + +#define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK) + +#define ALPHABET_SIZE 256 /* Used in test code--size of the secondary + * compressor alphabet. */ + +#define HASH_PERMUTE 1 /* The input is permuted by random nums */ +#define ADLER_LARGE_CKSUM 1 /* Adler checksum vs. RK checksum */ + +#define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from + * offset 0 using this offset. */ + +#define MIN_SMALL_LOOK 2U /* Match-optimization stuff. */ +#define MIN_LARGE_LOOK 2U +#define MIN_MATCH_OFFSET 1U +#define MAX_MATCH_SPLIT 18U /* VCDIFF code table: 18 is the default limit + * for direct-coded ADD sizes */ + +#define LEAST_MATCH_INCR 0 /* The least number of bytes an overlapping + * match must beat the preceding match by. This + * is a bias for the lazy match optimization. A + * non-zero value means that an adjacent match + * has to be better by more than the step + * between them. 0. */ + +#define MIN_MATCH 4U /* VCDIFF code table: MIN_MATCH=4 */ +#define MIN_ADD 1U /* 1 */ +#define MIN_RUN 8U /* The shortest run, if it is shorter than this + * an immediate add/copy will be just as good. + * ADD1/COPY6 = 1I+1D+1A bytes, RUN18 = + * 1I+1D+1A. */ + +#define MAX_MODES 9 /* Maximum number of nodes used for + * compression--does not limit decompression. */ + +#define ENC_SECTS 4 /* Number of separate output sections. */ + +#define HDR_TAIL(s) ((s)->enc_tails[0]) +#define DATA_TAIL(s) ((s)->enc_tails[1]) +#define INST_TAIL(s) ((s)->enc_tails[2]) +#define ADDR_TAIL(s) ((s)->enc_tails[3]) + +#define HDR_HEAD(s) ((s)->enc_heads[0]) +#define DATA_HEAD(s) ((s)->enc_heads[1]) +#define INST_HEAD(s) ((s)->enc_heads[2]) +#define ADDR_HEAD(s) ((s)->enc_heads[3]) + +#define TOTAL_MODES(x) (2+(x)->acache.s_same+(x)->acache.s_near) + +/* Template instances. */ +#if XD3_BUILD_SLOW +#define IF_BUILD_SLOW(x) x +#else +#define IF_BUILD_SLOW(x) +#endif +#if XD3_BUILD_FAST +#define IF_BUILD_FAST(x) x +#else +#define IF_BUILD_FAST(x) +#endif +#if XD3_BUILD_FASTER +#define IF_BUILD_FASTER(x) x +#else +#define IF_BUILD_FASTER(x) +#endif +#if XD3_BUILD_FASTEST +#define IF_BUILD_FASTEST(x) x +#else +#define IF_BUILD_FASTEST(x) +#endif +#if XD3_BUILD_SOFT +#define IF_BUILD_SOFT(x) x +#else +#define IF_BUILD_SOFT(x) +#endif +#if XD3_BUILD_DEFAULT +#define IF_BUILD_DEFAULT(x) x +#else +#define IF_BUILD_DEFAULT(x) +#endif + +/* Consume N bytes of input, only used by the decoder. */ +#define DECODE_INPUT(n) \ + do { \ + stream->total_in += (xoff_t) (n); \ + stream->avail_in -= (n); \ + stream->next_in += (n); \ + } while (0) + +/* Update the run-length state */ +#define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } \ + else { run_c = (c); run_l = 1; } } while (0) + +/* This CPP-conditional stuff can be cleaned up... */ +#if REGRESSION_TEST +#define IF_REGRESSION(x) x +#else +#define IF_REGRESSION(x) +#endif + +/***********************************************************************/ + +#if XD3_ENCODER +static void* xd3_alloc0 (xd3_stream *stream, + usize_t elts, + usize_t size); + + +static xd3_output* xd3_alloc_output (xd3_stream *stream, + xd3_output *old_output); + +static int xd3_alloc_iopt (xd3_stream *stream, usize_t elts); + +static void xd3_free_output (xd3_stream *stream, + xd3_output *output); + +static int xd3_emit_byte (xd3_stream *stream, + xd3_output **outputp, + uint8_t code); + +static int xd3_emit_bytes (xd3_stream *stream, + xd3_output **outputp, + const uint8_t *base, + usize_t size); + +static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first, + xd3_rinst *second, usize_t code); +static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single, + usize_t code); + +static usize_t xd3_sizeof_output (xd3_output *output); +static void xd3_encode_reset (xd3_stream *stream); + +static int xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos); +static int xd3_source_extend_match (xd3_stream *stream); +static int xd3_srcwin_setup (xd3_stream *stream); +static usize_t xd3_iopt_last_matched (xd3_stream *stream); +static int xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, + uint32_t num); + +static usize_t xd3_smatch (xd3_stream *stream, + usize_t base, + usize_t scksum, + usize_t *match_offset); +static int xd3_string_match_init (xd3_stream *stream); +static uint32_t xd3_scksum (uint32_t *state, const uint8_t *seg, + const usize_t ln); +static usize_t xd3_comprun (const uint8_t *seg, usize_t slook, uint8_t *run_cp); +static int xd3_srcwin_move_point (xd3_stream *stream, + usize_t *next_move_point); + +static int xd3_emit_run (xd3_stream *stream, usize_t pos, + usize_t size, uint8_t *run_c); +static usize_t xd3_checksum_hash (const xd3_hash_cfg *cfg, + const usize_t cksum); +static xoff_t xd3_source_cksum_offset(xd3_stream *stream, usize_t low); +static void xd3_scksum_insert (xd3_stream *stream, + usize_t inx, + usize_t scksum, + usize_t pos); + + +#if XD3_DEBUG +static void xd3_verify_run_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_run_l, + uint8_t *x_run_c); +static void xd3_verify_large_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum); +static void xd3_verify_small_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum); + +#endif /* XD3_DEBUG */ +#endif /* XD3_ENCODER */ + +static int xd3_decode_allocate (xd3_stream *stream, usize_t size, + uint8_t **copied1, usize_t *alloc1); + +static void xd3_compute_code_table_string (const xd3_dinst *code_table, + uint8_t *str); +static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size); +static void xd3_free (xd3_stream *stream, void *ptr); + +static int xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *max, uint32_t *valp); + +#if REGRESSION_TEST +static int xd3_selftest (void); +#endif + +/***********************************************************************/ + +#define UINT32_OFLOW_MASK 0xfe000000U +#define UINT64_OFLOW_MASK 0xfe00000000000000ULL + +#ifndef UINT32_MAX +#define UINT32_MAX 4294967295U +#endif + +#ifndef UINT64_MAX +#define UINT64_MAX 18446744073709551615ULL +#endif + +#if SIZEOF_USIZE_T == 4 +#define USIZE_T_MAX UINT32_MAX +#define xd3_decode_size xd3_decode_uint32_t +#define xd3_emit_size xd3_emit_uint32_t +#define xd3_sizeof_size xd3_sizeof_uint32_t +#define xd3_read_size xd3_read_uint32_t +#elif SIZEOF_USIZE_T == 8 +#define USIZE_T_MAX UINT64_MAX +#define xd3_decode_size xd3_decode_uint64_t +#define xd3_emit_size xd3_emit_uint64_t +#define xd3_sizeof_size xd3_sizeof_uint64_t +#define xd3_read_size xd3_read_uint64_t +#endif + +#if SIZEOF_XOFF_T == 4 +#define XOFF_T_MAX UINT32_MAX +#define xd3_decode_offset xd3_decode_uint32_t +#define xd3_emit_offset xd3_emit_uint32_t +#elif SIZEOF_XOFF_T == 8 +#define XOFF_T_MAX UINT64_MAX +#define xd3_decode_offset xd3_decode_uint64_t +#define xd3_emit_offset xd3_emit_uint64_t +#endif + +#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b)) +#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b)) + +const char* xd3_strerror (int ret) +{ + switch (ret) + { + case XD3_INPUT: return "XD3_INPUT"; + case XD3_OUTPUT: return "XD3_OUTPUT"; + case XD3_GETSRCBLK: return "XD3_GETSRCBLK"; + case XD3_GOTHEADER: return "XD3_GOTHEADER"; + case XD3_WINSTART: return "XD3_WINSTART"; + case XD3_WINFINISH: return "XD3_WINFINISH"; + case XD3_TOOFARBACK: return "XD3_TOOFARBACK"; + case XD3_INTERNAL: return "XD3_INTERNAL"; + case XD3_INVALID: return "XD3_INVALID"; + case XD3_INVALID_INPUT: return "XD3_INVALID_INPUT"; + case XD3_NOSECOND: return "XD3_NOSECOND"; + case XD3_UNIMPLEMENTED: return "XD3_UNIMPLEMENTED"; + } + return NULL; +} + +/***********************************************************************/ + +#define xd3_sec_data(s) ((s)->sec_stream_d) +#define xd3_sec_inst(s) ((s)->sec_stream_i) +#define xd3_sec_addr(s) ((s)->sec_stream_a) + +struct _xd3_sec_type +{ + int id; + const char *name; + xd3_secondary_flags flags; + + /* xd3_sec_stream is opaque to the generic code */ + xd3_sec_stream* (*alloc) (xd3_stream *stream); + void (*destroy) (xd3_stream *stream, + xd3_sec_stream *sec); + void (*init) (xd3_sec_stream *sec); + int (*decode) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + const uint8_t **input, + const uint8_t *input_end, + uint8_t **output, + const uint8_t *output_end); +#if XD3_ENCODER + int (*encode) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); +#endif +}; + +#define BIT_STATE_ENCODE_INIT { 0, 1 } +#define BIT_STATE_DECODE_INIT { 0, 0x100 } + +typedef struct _bit_state bit_state; +struct _bit_state +{ + usize_t cur_byte; + usize_t cur_mask; +}; + +#if SECONDARY_ANY == 0 +#define IF_SEC(x) +#define IF_NSEC(x) x +#else /* yuck */ +#define IF_SEC(x) x +#define IF_NSEC(x) +static int +xd3_decode_secondary (xd3_stream *stream, + xd3_desect *sect, + xd3_sec_stream **sec_streamp); +#if XD3_ENCODER +static int +xd3_encode_secondary (xd3_stream *stream, + xd3_output **head, + xd3_output **tail, + xd3_sec_stream **sec_streamp, + xd3_sec_cfg *cfg, + int *did_it); +#endif +#endif /* SECONDARY_ANY */ + +#if SECONDARY_FGK +extern const xd3_sec_type fgk_sec_type; +#define IF_FGK(x) x +#define FGK_CASE(s) \ + s->sec_type = & fgk_sec_type; \ + break; +#else +#define IF_FGK(x) +#define FGK_CASE(s) \ + s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \ + return XD3_INTERNAL; +#endif + +#if SECONDARY_DJW +extern const xd3_sec_type djw_sec_type; +#define IF_DJW(x) x +#define DJW_CASE(s) \ + s->sec_type = & djw_sec_type; \ + break; +#else +#define IF_DJW(x) +#define DJW_CASE(s) \ + s->msg = "unavailable secondary compressor: DJW Static Huffman"; \ + return XD3_INTERNAL; +#endif + +/***********************************************************************/ + +#include "xdelta3-hash.h" + +/* Process template passes - this includes xdelta3.c several times. */ +#define __XDELTA3_C_TEMPLATE_PASS__ +#include "xdelta3-cfgs.h" +#undef __XDELTA3_C_TEMPLATE_PASS__ + +/* Process the inline pass. */ +#define __XDELTA3_C_INLINE_PASS__ +#include "xdelta3.c" +#undef __XDELTA3_C_INLINE_PASS__ + +/* Secondary compression */ +#if SECONDARY_ANY +#include "xdelta3-second.h" +#endif + +#if SECONDARY_FGK +#include "xdelta3-fgk.h" +const xd3_sec_type fgk_sec_type = +{ + VCD_FGK_ID, + "FGK Adaptive Huffman", + SEC_NOFLAGS, + (xd3_sec_stream* (*)(xd3_stream*)) fgk_alloc, + (void (*)(xd3_stream*, xd3_sec_stream*)) fgk_destroy, + (void (*)(xd3_sec_stream*)) fgk_init, + (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*, + uint8_t**, const uint8_t*)) xd3_decode_fgk, + IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*, + xd3_output*, xd3_sec_cfg*)) xd3_encode_fgk) +}; +#endif + +#if SECONDARY_DJW +#include "xdelta3-djw.h" +const xd3_sec_type djw_sec_type = +{ + VCD_DJW_ID, + "Static Huffman", + SEC_COUNT_FREQS, + (xd3_sec_stream* (*)(xd3_stream*)) djw_alloc, + (void (*)(xd3_stream*, xd3_sec_stream*)) djw_destroy, + (void (*)(xd3_sec_stream*)) djw_init, + (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*, + uint8_t**, const uint8_t*)) xd3_decode_huff, + IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*, + xd3_output*, xd3_sec_cfg*)) xd3_encode_huff) +}; +#endif + +#if XD3_MAIN || PYTHON_MODULE || SWIG_MODULE || NOT_MAIN +#include "xdelta3-main.h" +#endif + +#if REGRESSION_TEST +#include "xdelta3-test.h" +#endif + +#if PYTHON_MODULE +#include "xdelta3-python.h" +#endif + +#endif /* __XDELTA3_C_HEADER_PASS__ */ +#ifdef __XDELTA3_C_INLINE_PASS__ + +/**************************************************************** + Instruction tables + *****************************************************************/ + +/* The following code implements a parametrized description of the + * code table given above for a few reasons. It is not necessary for + * implementing the standard, to support compression with variable + * tables, so an implementation is only required to know the default + * code table to begin decompression. (If the encoder uses an + * alternate table, the table is included in compressed form inside + * the VCDIFF file.) + * + * Before adding variable-table support there were two functions which + * were hard-coded to the default table above. + * xd3_compute_default_table() would create the default table by + * filling a 256-elt array of xd3_dinst values. The corresponding + * function, xd3_choose_instruction(), would choose an instruction + * based on the hard-coded parameters of the default code table. + * + * Notes: The parametrized code table description here only generates + * tables of a certain regularity similar to the default table by + * allowing to vary the distribution of single- and + * double-instructions and change the number of near and same copy + * modes. More exotic tables are only possible by extending this + * code. + * + * For performance reasons, both the parametrized and non-parametrized + * versions of xd3_choose_instruction remain. The parametrized + * version is only needed for testing multi-table decoding support. + * If ever multi-table encoding is required, this can be optimized by + * compiling static functions for each table. + */ + +/* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the + * table description when GENERIC_ENCODE_TABLES are in use. The + * IF_GENCODETBL macro enables generic-code-table specific code. */ +#if GENERIC_ENCODE_TABLES +#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (stream->code_table_desc, prev, inst) +#define IF_GENCODETBL(x) x +#else +#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (prev, inst) +#define IF_GENCODETBL(x) +#endif + +/* This structure maintains information needed by + * xd3_choose_instruction to compute the code for a double instruction + * by first indexing an array of code_table_sizes by copy mode, then + * using (offset + (muliplier * X)) */ +struct _xd3_code_table_sizes { + uint8_t cpy_max; + uint8_t offset; + uint8_t mult; +}; + +/* This contains a complete description of a code table. */ +struct _xd3_code_table_desc +{ + /* Assumes a single RUN instruction */ + /* Assumes that MIN_MATCH is 4 */ + + uint8_t add_sizes; /* Number of immediate-size single adds (default 17) */ + uint8_t near_modes; /* Number of near copy modes (default 4) */ + uint8_t same_modes; /* Number of same copy modes (default 3) */ + uint8_t cpy_sizes; /* Number of immediate-size single copies (default 15) */ + + uint8_t addcopy_add_max; /* Maximum add size for an add-copy double instruction, + all modes (default 4) */ + uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy double instruction, + up through VCD_NEAR modes (default 6) */ + uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy double instruction, + VCD_SAME modes (default 4) */ + + uint8_t copyadd_add_max; /* Maximum add size for a copy-add double instruction, + all modes (default 1) */ + uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add double instruction, + up through VCD_NEAR modes (default 4) */ + uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add double instruction, + VCD_SAME modes (default 4) */ + + xd3_code_table_sizes addcopy_max_sizes[MAX_MODES]; + xd3_code_table_sizes copyadd_max_sizes[MAX_MODES]; +}; + +/* The rfc3284 code table is represented: */ +static const xd3_code_table_desc __rfc3284_code_table_desc = { + 17, /* add sizes */ + 4, /* near modes */ + 3, /* same modes */ + 15, /* copy sizes */ + + 4, /* add-copy max add */ + 6, /* add-copy max cpy, near */ + 4, /* add-copy max cpy, same */ + + 1, /* copy-add max add */ + 4, /* copy-add max cpy, near */ + 4, /* copy-add max cpy, same */ + + /* addcopy */ + { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{4,235,1},{4,239,1},{4,243,1} }, + /* copyadd */ + { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},{4,253,1},{4,254,1},{4,255,1} }, +}; + +#if GENERIC_ENCODE_TABLES +/* An alternate code table for testing (5 near, 0 same): + * + * TYPE SIZE MODE TYPE SIZE MODE INDEX + * --------------------------------------------------------------- + * 1. Run 0 0 Noop 0 0 0 + * 2. Add 0, [1,23] 0 Noop 0 0 [1,24] + * 3. Copy 0, [4,20] 0 Noop 0 0 [25,42] + * 4. Copy 0, [4,20] 1 Noop 0 0 [43,60] + * 5. Copy 0, [4,20] 2 Noop 0 0 [61,78] + * 6. Copy 0, [4,20] 3 Noop 0 0 [79,96] + * 7. Copy 0, [4,20] 4 Noop 0 0 [97,114] + * 8. Copy 0, [4,20] 5 Noop 0 0 [115,132] + * 9. Copy 0, [4,20] 6 Noop 0 0 [133,150] + * 10. Add [1,4] 0 Copy [4,6] 0 [151,162] + * 11. Add [1,4] 0 Copy [4,6] 1 [163,174] + * 12. Add [1,4] 0 Copy [4,6] 2 [175,186] + * 13. Add [1,4] 0 Copy [4,6] 3 [187,198] + * 14. Add [1,4] 0 Copy [4,6] 4 [199,210] + * 15. Add [1,4] 0 Copy [4,6] 5 [211,222] + * 16. Add [1,4] 0 Copy [4,6] 6 [223,234] + * 17. Copy 4 [0,6] Add [1,3] 0 [235,255] + * --------------------------------------------------------------- */ +static const xd3_code_table_desc __alternate_code_table_desc = { + 23, /* add sizes */ + 5, /* near modes */ + 0, /* same modes */ + 17, /* copy sizes */ + + 4, /* add-copy max add */ + 6, /* add-copy max cpy, near */ + 0, /* add-copy max cpy, same */ + + 3, /* copy-add max add */ + 4, /* copy-add max cpy, near */ + 0, /* copy-add max cpy, same */ + + /* addcopy */ + { {6,151,3},{6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{0,0,0},{0,0,0} }, + /* copyadd */ + { {4,235,1},{4,238,1},{4,241,1},{4,244,1},{4,247,1},{4,250,1},{4,253,1},{0,0,0},{0,0,0} }, +}; +#endif + +/* Computes code table entries of TBL using the specified description. */ +static void +xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl) +{ + usize_t size1, size2, mode; + usize_t cpy_modes = 2 + desc->near_modes + desc->same_modes; + xd3_dinst *d = tbl; + + (d++)->type1 = XD3_RUN; + (d++)->type1 = XD3_ADD; + + for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1) + { + d->type1 = XD3_ADD; + d->size1 = size1; + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + (d++)->type1 = XD3_CPY + mode; + + for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; size1 += 1, d += 1) + { + d->type1 = XD3_CPY + mode; + d->size1 = size1; + } + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1) + { + usize_t max = (mode < 2U + desc->near_modes) ? + desc->addcopy_near_cpy_max : + desc->addcopy_same_cpy_max; + + for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1) + { + d->type1 = XD3_ADD; + d->size1 = size1; + d->type2 = XD3_CPY + mode; + d->size2 = size2; + } + } + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + usize_t max = (mode < 2U + desc->near_modes) ? + desc->copyadd_near_cpy_max : + desc->copyadd_same_cpy_max; + + for (size1 = MIN_MATCH; size1 <= max; size1 += 1) + { + for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1) + { + d->type1 = XD3_CPY + mode; + d->size1 = size1; + d->type2 = XD3_ADD; + d->size2 = size2; + } + } + } + + XD3_ASSERT (d - tbl == 256); +} + +/* This function generates the static default code table. */ +static const xd3_dinst* +xd3_rfc3284_code_table (void) +{ + static xd3_dinst __rfc3284_code_table[256]; + + if (__rfc3284_code_table[0].type1 != XD3_RUN) + { + xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table); + } + + return __rfc3284_code_table; +} + +#if XD3_ENCODER +#if GENERIC_ENCODE_TABLES +/* This function generates the alternate code table. */ +static const xd3_dinst* +xd3_alternate_code_table (void) +{ + static xd3_dinst __alternate_code_table[256]; + + if (__alternate_code_table[0].type1 != XD3_RUN) + { + xd3_build_code_table (& __alternate_code_table_desc, __alternate_code_table); + } + + return __alternate_code_table; +} + +/* This function computes the ideal second instruction INST based on + * preceding instruction PREV. If it is possible to issue a double + * instruction based on this pair it sets PREV->code2, otherwise it + * sets INST->code1. */ +static void +xd3_choose_instruction (const xd3_code_table_desc *desc, xd3_rinst *prev, xd3_rinst *inst) +{ + switch (inst->type) + { + case XD3_RUN: + /* The 0th instruction is RUN */ + inst->code1 = 0; + break; + + case XD3_ADD: + + if (inst->size > desc->add_sizes) + { + /* The first instruction is non-immediate ADD */ + inst->code1 = 1; + } + else + { + /* The following ADD_SIZES instructions are immediate ADDs */ + inst->code1 = 1 + inst->size; + + /* Now check for a possible COPY-ADD double instruction */ + if (prev != NULL) + { + int prev_mode = prev->type - XD3_CPY; + + /* If previous is a copy. Note: as long as the previous + * is not a RUN instruction, it should be a copy because + * it cannot be an add. This check is more clear. */ + if (prev_mode >= 0 && inst->size <= desc->copyadd_add_max) + { + const xd3_code_table_sizes *sizes = & desc->copyadd_max_sizes[prev_mode]; + + /* This check and the inst->size-<= above are == in + the default table. */ + if (prev->size <= sizes->cpy_max) + { + /* The second and third exprs are 0 in the + default table. */ + prev->code2 = sizes->offset + + (sizes->mult * (prev->size - MIN_MATCH)) + + (inst->size - MIN_ADD); + } + } + } + } + break; + + default: + { + int mode = inst->type - XD3_CPY; + + /* The large copy instruction is offset by the run, large add, + * and immediate adds, then multipled by the number of + * immediate copies plus one (the large copy) (i.e., if there + * are 15 immediate copy instructions then there are 16 copy + * instructions per mode). */ + inst->code1 = 2 + desc->add_sizes + (1 + desc->cpy_sizes) * mode; + + /* Now if the copy is short enough for an immediate instruction. */ + if (inst->size < MIN_MATCH + desc->cpy_sizes && + /* TODO: there needs to be a more comprehensive test for this + * boundary condition, merge is now exercising code in which + * size < MIN_MATCH is possible and it's unclear if the above + * size < (MIN_MATCH + cpy_sizes) should be a <= from inspection + * of the default table version below. */ + inst->size >= MIN_MATCH) + { + inst->code1 += inst->size + 1 - MIN_MATCH; + + /* Now check for a possible ADD-COPY double instruction. */ + if ( (prev != NULL) && + (prev->type == XD3_ADD) && + (prev->size <= desc->addcopy_add_max) ) + { + const xd3_code_table_sizes *sizes = & desc->addcopy_max_sizes[mode]; + + if (inst->size <= sizes->cpy_max) + { + prev->code2 = sizes->offset + + (sizes->mult * (prev->size - MIN_ADD)) + + (inst->size - MIN_MATCH); + } + } + } + } + } +} +#else /* GENERIC_ENCODE_TABLES */ + +/* This version of xd3_choose_instruction is hard-coded for the default + table. */ +static void +xd3_choose_instruction (xd3_rinst *prev, xd3_rinst *inst) +{ + switch (inst->type) + { + case XD3_RUN: + inst->code1 = 0; + break; + + case XD3_ADD: + inst->code1 = 1; + + if (inst->size <= 17) + { + inst->code1 += inst->size; + + if ( (inst->size == 1) && + (prev != NULL) && + (prev->size == 4) && + (prev->type >= XD3_CPY) ) + { + prev->code2 = 247 + (prev->type - XD3_CPY); + } + } + + break; + + default: + { + int mode = inst->type - XD3_CPY; + + XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12); + + inst->code1 = 19 + 16 * mode; + + if (inst->size <= 18 && inst->size >= 4) + { + inst->code1 += inst->size - 3; + + if ( (prev != NULL) && + (prev->type == XD3_ADD) && + (prev->size <= 4) ) + { + if ( (inst->size <= 6) && + (mode <= 5) ) + { + prev->code2 = 163 + (mode * 12) + (3 * (prev->size - 1)) + (inst->size - 4); + + XD3_ASSERT (prev->code2 <= 234); + } + else if ( (inst->size == 4) && + (mode >= 6) ) + { + prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1); + + XD3_ASSERT (prev->code2 <= 246); + } + } + } + + XD3_ASSERT (inst->code1 <= 162); + } + break; + } +} +#endif /* GENERIC_ENCODE_TABLES */ + +/*********************************************************************** + Instruction table encoder/decoder + ***********************************************************************/ + +#if GENERIC_ENCODE_TABLES +#if GENERIC_ENCODE_TABLES_COMPUTE == 0 + +/* In this case, we hard-code the result of + * compute_code_table_encoding for each alternate code table, + * presuming that saves time/space. This has been 131 bytes, but + * secondary compression was turned off. */ +static const uint8_t __alternate_code_table_compressed[178] = +{0xd6,0xc3,0xc4,0x00,0x00,0x01,0x8a,0x6f,0x40,0x81,0x27,0x8c,0x00,0x00,0x4a,0x4a,0x0d,0x02,0x01,0x03, +0x01,0x03,0x00,0x01,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e, +0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x03,0x04, +0x04,0x04,0x04,0x00,0x04,0x05,0x06,0x01,0x02,0x03,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x05,0x05,0x05, +0x06,0x06,0x06,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x00,0x02,0x00,0x18,0x13,0x63,0x00,0x1b,0x00,0x54, +0x00,0x15,0x23,0x6f,0x00,0x28,0x13,0x54,0x00,0x15,0x01,0x1a,0x31,0x23,0x6c,0x0d,0x23,0x48,0x00,0x15, +0x93,0x6f,0x00,0x28,0x04,0x23,0x51,0x04,0x32,0x00,0x2b,0x00,0x12,0x00,0x12,0x00,0x12,0x00,0x12,0x00, +0x12,0x00,0x12,0x53,0x57,0x9c,0x07,0x43,0x6f,0x00,0x34,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00, +0x0c,0x00,0x0c,0x00,0x15,0x00,0x82,0x6f,0x00,0x15,0x12,0x0c,0x00,0x03,0x03,0x00,0x06,0x00,}; + +static int +xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size) +{ + (*data) = __alternate_code_table_compressed; + (*size) = sizeof (__alternate_code_table_compressed); + return 0; +} + +#else + +/* The alternate code table will be computed and stored here. */ +static uint8_t __alternate_code_table_compressed[CODE_TABLE_VCDIFF_SIZE]; +static usize_t __alternate_code_table_compressed_size; + +/* This function generates a delta describing the code table for + * encoding within a VCDIFF file. This function is NOT thread safe + * because it is only intended that this function is used to generate + * statically-compiled strings. "comp_string" must be sized + * CODE_TABLE_VCDIFF_SIZE. */ +int xd3_compute_code_table_encoding (xd3_stream *in_stream, + const xd3_dinst *code_table, + uint8_t *comp_string, + usize_t *comp_string_size) +{ + /* Use DJW secondary compression if it is on by default. This saves + * about 20 bytes. */ + uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; + uint8_t code_string[CODE_TABLE_STRING_SIZE]; + + xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string); + xd3_compute_code_table_string (code_table, code_string); + + return xd3_encode_memory (code_string, CODE_TABLE_STRING_SIZE, + dflt_string, CODE_TABLE_STRING_SIZE, + comp_string, comp_string_size, + CODE_TABLE_VCDIFF_SIZE, + /* flags */ 0); +} + +/* Compute a delta between alternate and rfc3284 tables. As soon as + * another alternate table is added, this code should become generic. + * For now there is only one alternate table for testing. */ +static int +xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size) +{ + int ret; + + if (__alternate_code_table_compressed[0] == 0) + { + if ((ret = xd3_compute_code_table_encoding (stream, xd3_alternate_code_table (), + __alternate_code_table_compressed, + & __alternate_code_table_compressed_size))) + { + return ret; + } + + /* During development of a new code table, enable this variable to print + * the new static contents and determine its size. At run time the + * table will be filled in appropriately, but at least it should have + * the proper size beforehand. */ +#if GENERIC_ENCODE_TABLES_COMPUTE_PRINT + { + int i; + + DP(RINT, "\nstatic const usize_t __alternate_code_table_compressed_size = %u;\n", + __alternate_code_table_compressed_size); + + DP(RINT, "static const uint8_t __alternate_code_table_compressed[%u] =\n{", + __alternate_code_table_compressed_size); + + for (i = 0; i < __alternate_code_table_compressed_size; i += 1) + { + DP(RINT, "0x%02x,", __alternate_code_table_compressed[i]); + if ((i % 20) == 19) { DP(RINT, "\n"); } + } + + DP(RINT, "};\n"); + } +#endif + } + + (*data) = __alternate_code_table_compressed; + (*size) = __alternate_code_table_compressed_size; + + return 0; +} +#endif /* GENERIC_ENCODE_TABLES_COMPUTE != 0 */ +#endif /* GENERIC_ENCODE_TABLES */ + +#endif /* XD3_ENCODER */ + +/* This function generates the 1536-byte string specified in sections 5.4 and + * 7 of rfc3284, which is used to represent a code table within a VCDIFF + * file. */ +void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str) +{ + int i, s; + + XD3_ASSERT (CODE_TABLE_STRING_SIZE == 6 * 256); + + for (s = 0; s < 6; s += 1) + { + for (i = 0; i < 256; i += 1) + { + switch (s) + { + case 0: *str++ = (code_table[i].type1 >= XD3_CPY ? XD3_CPY : code_table[i].type1); break; + case 1: *str++ = (code_table[i].type2 >= XD3_CPY ? XD3_CPY : code_table[i].type2); break; + case 2: *str++ = (code_table[i].size1); break; + case 3: *str++ = (code_table[i].size2); break; + case 4: *str++ = (code_table[i].type1 >= XD3_CPY ? code_table[i].type1 - XD3_CPY : 0); break; + case 5: *str++ = (code_table[i].type2 >= XD3_CPY ? code_table[i].type2 - XD3_CPY : 0); break; + } + } + } +} + +/* This function translates the code table string into the internal representation. The + * stream's near and same-modes should already be set. */ +static int +xd3_apply_table_string (xd3_stream *stream, const uint8_t *code_string) +{ + int i, s; + int modes = TOTAL_MODES (stream); + xd3_dinst *code_table; + + if ((code_table = stream->code_table_alloc = + (xd3_dinst*) xd3_alloc (stream, + (usize_t) sizeof (xd3_dinst), + 256)) == NULL) + { + return ENOMEM; + } + + for (s = 0; s < 6; s += 1) + { + for (i = 0; i < 256; i += 1) + { + switch (s) + { + case 0: + if (*code_string > XD3_CPY) + { + stream->msg = "invalid code-table opcode"; + return XD3_INTERNAL; + } + code_table[i].type1 = *code_string++; + break; + case 1: + if (*code_string > XD3_CPY) + { + stream->msg = "invalid code-table opcode"; + return XD3_INTERNAL; + } + code_table[i].type2 = *code_string++; + break; + case 2: + if (*code_string != 0 && code_table[i].type1 == XD3_NOOP) + { + stream->msg = "invalid code-table size"; + return XD3_INTERNAL; + } + code_table[i].size1 = *code_string++; + break; + case 3: + if (*code_string != 0 && code_table[i].type2 == XD3_NOOP) + { + stream->msg = "invalid code-table size"; + return XD3_INTERNAL; + } + code_table[i].size2 = *code_string++; + break; + case 4: + if (*code_string >= modes) + { + stream->msg = "invalid code-table mode"; + return XD3_INTERNAL; + } + if (*code_string != 0 && code_table[i].type1 != XD3_CPY) + { + stream->msg = "invalid code-table mode"; + return XD3_INTERNAL; + } + code_table[i].type1 += *code_string++; + break; + case 5: + if (*code_string >= modes) + { + stream->msg = "invalid code-table mode"; + return XD3_INTERNAL; + } + if (*code_string != 0 && code_table[i].type2 != XD3_CPY) + { + stream->msg = "invalid code-table mode"; + return XD3_INTERNAL; + } + code_table[i].type2 += *code_string++; + break; + } + } + } + + stream->code_table = code_table; + return 0; +} + +/* This function applies a code table delta and returns an actual code table. */ +static int +xd3_apply_table_encoding (xd3_stream *in_stream, const uint8_t *data, usize_t size) +{ + uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; + uint8_t code_string[CODE_TABLE_STRING_SIZE]; + usize_t code_size; + int ret; + + xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string); + + if ((ret = xd3_decode_memory (data, size, + dflt_string, CODE_TABLE_STRING_SIZE, + code_string, &code_size, + CODE_TABLE_STRING_SIZE, + 0))) { return ret; } + + if (code_size != sizeof (code_string)) + { + in_stream->msg = "corrupt code-table encoding"; + return XD3_INTERNAL; + } + + return xd3_apply_table_string (in_stream, code_string); +} + +/***********************************************************************/ + +static inline void +xd3_swap_uint8p (uint8_t** p1, uint8_t** p2) +{ + uint8_t *t = (*p1); + (*p1) = (*p2); + (*p2) = t; +} + +static inline void +xd3_swap_usize_t (usize_t* p1, usize_t* p2) +{ + usize_t t = (*p1); + (*p1) = (*p2); + (*p2) = t; +} + +/* It's not constant time, but it computes the log. */ +static int +xd3_check_pow2 (usize_t value, usize_t *logof) +{ + usize_t x = 1; + usize_t nolog; + if (logof == NULL) { + logof = &nolog; + } + + *logof = 0; + + for (; x != 0; x <<= 1, *logof += 1) + { + if (x == value) + { + return 0; + } + } + + return XD3_INTERNAL; +} + +static usize_t +xd3_pow2_roundup (usize_t x) +{ + usize_t i = 1; + while (x > i) { + i <<= 1U; + } + return i; +} + +static usize_t +xd3_round_blksize (usize_t sz, usize_t blksz) +{ + usize_t mod = sz & (blksz-1); + + XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0); + + return mod ? (sz + (blksz - mod)) : sz; +} + +/*********************************************************************** + Adler32 stream function: code copied from Zlib, defined in RFC1950 + ***********************************************************************/ + +#define A32_BASE 65521L /* Largest prime smaller than 2^16 */ +#define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1); +#define A32_DO4(buf,i) A32_DO2(buf,i); A32_DO2(buf,i+2); +#define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4); +#define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8); + +static unsigned long adler32 (unsigned long adler, const uint8_t *buf, + usize_t len) +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + while (len > 0) + { + k = (len < A32_NMAX) ? len : A32_NMAX; + len -= k; + + while (k >= 16) + { + A32_DO16(buf); + buf += 16; + k -= 16; + } + + if (k != 0) + { + do + { + s1 += *buf++; + s2 += s1; + } + while (--k); + } + + s1 %= A32_BASE; + s2 %= A32_BASE; + } + + return (s2 << 16) | s1; +} + +/*********************************************************************** + Run-length function + ***********************************************************************/ + +#if XD3_ENCODER +static usize_t +xd3_comprun (const uint8_t *seg, usize_t slook, uint8_t *run_cp) +{ + usize_t i; + usize_t run_l = 0; + uint8_t run_c = 0; + + for (i = 0; i < slook; i += 1) + { + NEXTRUN(seg[i]); + } + + (*run_cp) = run_c; + + return run_l; +} +#endif + +/*********************************************************************** + Basic encoder/decoder functions + ***********************************************************************/ + +static inline int +xd3_decode_byte (xd3_stream *stream, usize_t *val) +{ + if (stream->avail_in == 0) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + (*val) = stream->next_in[0]; + + DECODE_INPUT (1); + return 0; +} + +static inline int +xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size) +{ + usize_t want; + usize_t take; + + /* Note: The case where (*pos == size) happens when a zero-length + * appheader or code table is transmitted, but there is nothing in + * the standard against that. */ + while (*pos < size) + { + if (stream->avail_in == 0) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + want = size - *pos; + take = min (want, stream->avail_in); + + memcpy (buf + *pos, stream->next_in, (size_t) take); + + DECODE_INPUT (take); + (*pos) += take; + } + + return 0; +} + +#if XD3_ENCODER +static inline int +xd3_emit_byte (xd3_stream *stream, + xd3_output **outputp, + uint8_t code) +{ + xd3_output *output = (*outputp); + + if (output->next == output->avail) + { + xd3_output *aoutput; + + if ((aoutput = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + output = (*outputp) = aoutput; + } + + output->base[output->next++] = code; + + return 0; +} + +static inline int +xd3_emit_bytes (xd3_stream *stream, + xd3_output **outputp, + const uint8_t *base, + usize_t size) +{ + xd3_output *output = (*outputp); + + do + { + usize_t take; + + if (output->next == output->avail) + { + xd3_output *aoutput; + + if ((aoutput = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + output = (*outputp) = aoutput; + } + + take = min (output->avail - output->next, size); + + memcpy (output->base + output->next, base, (size_t) take); + + output->next += take; + size -= take; + base += take; + } + while (size > 0); + + return 0; +} +#endif /* XD3_ENCODER */ + +/********************************************************************* + Integer encoder/decoder functions + **********************************************************************/ + +#define DECODE_INTEGER_TYPE(PART,OFLOW) \ + while (stream->avail_in != 0) \ + { \ + usize_t next = stream->next_in[0]; \ + \ + DECODE_INPUT(1); \ + \ + if (PART & OFLOW) \ + { \ + stream->msg = "overflow in decode_integer"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + PART = (PART << 7) | (next & 127); \ + \ + if ((next & 128) == 0) \ + { \ + (*val) = PART; \ + PART = 0; \ + return 0; \ + } \ + } \ + \ + stream->msg = "further input required"; \ + return XD3_INPUT + +#define READ_INTEGER_TYPE(TYPE, OFLOW) \ + TYPE val = 0; \ + const uint8_t *inp = (*inpp); \ + usize_t next; \ + \ + do \ + { \ + if (inp == max) \ + { \ + stream->msg = "end-of-input in read_integer"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + if (val & OFLOW) \ + { \ + stream->msg = "overflow in read_intger"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + next = (*inp++); \ + val = (val << 7) | (next & 127); \ + } \ + while (next & 128); \ + \ + (*valp) = val; \ + (*inpp) = inp; \ + \ + return 0 + +#define EMIT_INTEGER_TYPE() \ + /* max 64-bit value in base-7 encoding is 9.1 bytes */ \ + uint8_t buf[10]; \ + usize_t bufi = 10; \ + \ + /* This loop performs division and turns on all MSBs. */ \ + do \ + { \ + buf[--bufi] = (num & 127) | 128; \ + num >>= 7U; \ + } \ + while (num != 0); \ + \ + /* Turn off MSB of the last byte. */ \ + buf[9] &= 127; \ + \ + return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi) + +#define IF_SIZEOF32(x) if (num < (1U << (7 * (x)))) return (x); +#define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x); + +#if USE_UINT32 +static inline uint32_t +xd3_sizeof_uint32_t (uint32_t num) +{ + IF_SIZEOF32(1); + IF_SIZEOF32(2); + IF_SIZEOF32(3); + IF_SIZEOF32(4); + return 5; +} + +static inline int +xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val) +{ DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); } + +static inline int +xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *max, uint32_t *valp) +{ READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); } + +#if XD3_ENCODER +static inline int +xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num) +{ EMIT_INTEGER_TYPE (); } +#endif +#endif + +#if USE_UINT64 +static inline int +xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val) +{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); } + +#if XD3_ENCODER +static inline int +xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num) +{ EMIT_INTEGER_TYPE (); } +#endif + +/* These are tested but not used */ +#if REGRESSION_TEST +static int +xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *max, uint64_t *valp) +{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); } + +static uint32_t +xd3_sizeof_uint64_t (uint64_t num) +{ + IF_SIZEOF64(1); + IF_SIZEOF64(2); + IF_SIZEOF64(3); + IF_SIZEOF64(4); + IF_SIZEOF64(5); + IF_SIZEOF64(6); + IF_SIZEOF64(7); + IF_SIZEOF64(8); + IF_SIZEOF64(9); + + return 10; +} +#endif + +#endif + +/*********************************************************************** + Address cache stuff + ***********************************************************************/ + +static int +xd3_alloc_cache (xd3_stream *stream) +{ + if (stream->acache.near_array != NULL) + { + xd3_free (stream, stream->acache.near_array); + } + + if (stream->acache.same_array != NULL) + { + xd3_free (stream, stream->acache.same_array); + } + + if (((stream->acache.s_near > 0) && + (stream->acache.near_array = (usize_t*) + xd3_alloc (stream, stream->acache.s_near, + (usize_t) sizeof (usize_t))) + == NULL) || + ((stream->acache.s_same > 0) && + (stream->acache.same_array = (usize_t*) + xd3_alloc (stream, stream->acache.s_same * 256, + (usize_t) sizeof (usize_t))) + == NULL)) + { + return ENOMEM; + } + + return 0; +} + +void +xd3_init_cache (xd3_addr_cache* acache) +{ + if (acache->s_near > 0) + { + memset (acache->near_array, 0, acache->s_near * sizeof (usize_t)); + acache->next_slot = 0; + } + + if (acache->s_same > 0) + { + memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t)); + } +} + +static void +xd3_update_cache (xd3_addr_cache* acache, usize_t addr) +{ + if (acache->s_near > 0) + { + acache->near_array[acache->next_slot] = addr; + acache->next_slot = (acache->next_slot + 1) % acache->s_near; + } + + if (acache->s_same > 0) + { + acache->same_array[addr % (acache->s_same*256)] = addr; + } +} + +#if XD3_ENCODER +/* OPT: this gets called a lot, can it be optimized? */ +static int +xd3_encode_address (xd3_stream *stream, + usize_t addr, + usize_t here, + uint8_t* mode) +{ + usize_t d, bestd; + usize_t i, bestm, ret; + xd3_addr_cache* acache = & stream->acache; + +#define SMALLEST_INT(x) do { if (((x) & ~127U) == 0) { goto good; } } while (0) + + /* Attempt to find the address mode that yields the smallest integer value + * for "d", the encoded address value, thereby minimizing the encoded size + * of the address. */ + bestd = addr; + bestm = VCD_SELF; + + XD3_ASSERT (addr < here); + + SMALLEST_INT (bestd); + + if ((d = here-addr) < bestd) + { + bestd = d; + bestm = VCD_HERE; + + SMALLEST_INT (bestd); + } + + for (i = 0; i < acache->s_near; i += 1) + { + /* Note: If we used signed computation here, we'd could compte d + * and then check (d >= 0 && d < bestd). */ + if (addr >= acache->near_array[i]) + { + d = addr - acache->near_array[i]; + + if (d < bestd) + { + bestd = d; + bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */ + + SMALLEST_INT (bestd); + } + } + } + + if (acache->s_same > 0 && + acache->same_array[d = addr%(acache->s_same*256)] == addr) + { + bestd = d%256; + /* 2 + s_near offsets past the VCD_NEAR modes */ + bestm = acache->s_near + 2 + d/256; + + if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) + { + return ret; + } + } + else + { + good: + + if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) + { + return ret; + } + } + + xd3_update_cache (acache, addr); + + (*mode) += bestm; + + return 0; +} +#endif + +static int +xd3_decode_address (xd3_stream *stream, usize_t here, + usize_t mode, const uint8_t **inpp, + const uint8_t *max, uint32_t *valp) +{ + int ret; + usize_t same_start = 2 + stream->acache.s_near; + + if (mode < same_start) + { + if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; } + + switch (mode) + { + case VCD_SELF: + break; + case VCD_HERE: + (*valp) = here - (*valp); + break; + default: + (*valp) += stream->acache.near_array[mode - 2]; + break; + } + } + else + { + if (*inpp == max) + { + stream->msg = "address underflow"; + return XD3_INVALID_INPUT; + } + + mode -= same_start; + + (*valp) = stream->acache.same_array[mode*256 + (**inpp)]; + + (*inpp) += 1; + } + + xd3_update_cache (& stream->acache, *valp); + + return 0; +} + +/*********************************************************************** + Alloc/free +***********************************************************************/ + +static void* +__xd3_alloc_func (void* opaque, usize_t items, usize_t size) +{ + return malloc ((size_t) items * (size_t) size); +} + +static void +__xd3_free_func (void* opaque, void* address) +{ + free (address); +} + +static void* +xd3_alloc (xd3_stream *stream, + usize_t elts, + usize_t size) +{ + void *a = stream->alloc (stream->opaque, elts, size); + + if (a != NULL) + { + IF_DEBUG (stream->alloc_cnt += 1); + IF_DEBUG2 (DP(RINT "[stream %p malloc] size %u ptr %p\n", + stream, elts * size, a)); + } + else + { + stream->msg = "out of memory"; + } + + return a; +} + +static void +xd3_free (xd3_stream *stream, + void *ptr) +{ + if (ptr != NULL) + { + IF_DEBUG (stream->free_cnt += 1); + XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt); + IF_DEBUG2 (DP(RINT "[stream %p free] %p\n", + stream, ptr)); + stream->free (stream->opaque, ptr); + } +} + +#if XD3_ENCODER +static void* +xd3_alloc0 (xd3_stream *stream, + usize_t elts, + usize_t size) +{ + void *a = xd3_alloc (stream, elts, size); + + if (a != NULL) + { + memset (a, 0, (size_t) (elts * size)); + } + + return a; +} + +static xd3_output* +xd3_alloc_output (xd3_stream *stream, + xd3_output *old_output) +{ + xd3_output *output; + uint8_t *base; + + if (stream->enc_free != NULL) + { + output = stream->enc_free; + stream->enc_free = output->next_page; + } + else + { + if ((output = (xd3_output*) xd3_alloc (stream, 1, + (usize_t) sizeof (xd3_output))) + == NULL) + { + return NULL; + } + + if ((base = (uint8_t*) xd3_alloc (stream, XD3_ALLOCSIZE, + sizeof (uint8_t))) == NULL) + { + xd3_free (stream, output); + return NULL; + } + + output->base = base; + output->avail = XD3_ALLOCSIZE; + } + + output->next = 0; + + if (old_output) + { + old_output->next_page = output; + } + + output->next_page = NULL; + + return output; +} + +static usize_t +xd3_sizeof_output (xd3_output *output) +{ + usize_t s = 0; + + for (; output; output = output->next_page) + { + s += output->next; + } + + return s; +} + +static void +xd3_freelist_output (xd3_stream *stream, + xd3_output *output) +{ + xd3_output *tmp; + + while (output) + { + tmp = output; + output = output->next_page; + + tmp->next = 0; + tmp->next_page = stream->enc_free; + stream->enc_free = tmp; + } +} + +static void +xd3_free_output (xd3_stream *stream, + xd3_output *output) +{ + xd3_output *next; + + again: + if (output == NULL) + { + return; + } + + next = output->next_page; + + xd3_free (stream, output->base); + xd3_free (stream, output); + + output = next; + goto again; +} +#endif /* XD3_ENCODER */ + +void +xd3_free_stream (xd3_stream *stream) +{ + xd3_iopt_buflist *blist = stream->iopt_alloc; + + while (blist != NULL) + { + xd3_iopt_buflist *tmp = blist; + blist = blist->next; + xd3_free (stream, tmp->buffer); + xd3_free (stream, tmp); + } + + xd3_free (stream, stream->large_table); + xd3_free (stream, stream->small_table); + xd3_free (stream, stream->small_prev); + +#if XD3_ENCODER + { + int i; + for (i = 0; i < ENC_SECTS; i += 1) + { + xd3_free_output (stream, stream->enc_heads[i]); + } + xd3_free_output (stream, stream->enc_free); + } +#endif + + xd3_free (stream, stream->acache.near_array); + xd3_free (stream, stream->acache.same_array); + + xd3_free (stream, stream->inst_sect.copied1); + xd3_free (stream, stream->addr_sect.copied1); + xd3_free (stream, stream->data_sect.copied1); + + xd3_free (stream, stream->dec_buffer); + xd3_free (stream, (uint8_t*) stream->dec_lastwin); + + xd3_free (stream, stream->buf_in); + xd3_free (stream, stream->dec_appheader); + xd3_free (stream, stream->dec_codetbl); + xd3_free (stream, stream->code_table_alloc); + +#if SECONDARY_ANY + xd3_free (stream, stream->inst_sect.copied2); + xd3_free (stream, stream->addr_sect.copied2); + xd3_free (stream, stream->data_sect.copied2); + + if (stream->sec_type != NULL) + { + stream->sec_type->destroy (stream, stream->sec_stream_d); + stream->sec_type->destroy (stream, stream->sec_stream_i); + stream->sec_type->destroy (stream, stream->sec_stream_a); + } +#endif + + xd3_free (stream, stream->whole_target.adds); + xd3_free (stream, stream->whole_target.inst); + xd3_free (stream, stream->whole_target.wininfo); + + XD3_ASSERT (stream->alloc_cnt == stream->free_cnt); + + memset (stream, 0, sizeof (xd3_stream)); +} + +#if (XD3_DEBUG > 1 || VCDIFF_TOOLS) +static const char* +xd3_rtype_to_string (xd3_rtype type, int print_mode) +{ + switch (type) + { + case XD3_NOOP: + return "NOOP "; + case XD3_RUN: + return "RUN "; + case XD3_ADD: + return "ADD "; + default: break; + } + if (! print_mode) + { + return "CPY "; + } + switch (type) + { + case XD3_CPY + 0: return "CPY_0"; + case XD3_CPY + 1: return "CPY_1"; + case XD3_CPY + 2: return "CPY_2"; + case XD3_CPY + 3: return "CPY_3"; + case XD3_CPY + 4: return "CPY_4"; + case XD3_CPY + 5: return "CPY_5"; + case XD3_CPY + 6: return "CPY_6"; + case XD3_CPY + 7: return "CPY_7"; + case XD3_CPY + 8: return "CPY_8"; + case XD3_CPY + 9: return "CPY_9"; + default: return "CPY>9"; + } +} +#endif + +/**************************************************************** + Stream configuration + ******************************************************************/ + +int +xd3_config_stream(xd3_stream *stream, + xd3_config *config) +{ + int ret; + xd3_config defcfg; + xd3_smatcher *smatcher = &stream->smatcher; + + if (config == NULL) + { + config = & defcfg; + memset (config, 0, sizeof (*config)); + } + + /* Initial setup: no error checks yet */ + memset (stream, 0, sizeof (*stream)); + + stream->winsize = config->winsize ? config->winsize : XD3_DEFAULT_WINSIZE; + stream->sprevsz = config->sprevsz ? config->sprevsz : XD3_DEFAULT_SPREVSZ; + stream->srcwin_maxsz = config->srcwin_maxsz ? + config->srcwin_maxsz : XD3_DEFAULT_SRCWINSZ; + + if (config->iopt_size == 0) + { + stream->iopt_size = XD3_ALLOCSIZE / sizeof(xd3_rinst); + stream->iopt_unlimited = 1; + } + else + { + stream->iopt_size = config->iopt_size; + } + + stream->getblk = config->getblk; + stream->alloc = config->alloc ? config->alloc : __xd3_alloc_func; + stream->free = config->freef ? config->freef : __xd3_free_func; + stream->opaque = config->opaque; + stream->flags = config->flags; + + /* Secondary setup. */ + stream->sec_data = config->sec_data; + stream->sec_inst = config->sec_inst; + stream->sec_addr = config->sec_addr; + + stream->sec_data.data_type = DATA_SECTION; + stream->sec_inst.data_type = INST_SECTION; + stream->sec_addr.data_type = ADDR_SECTION; + + /* Check static sizes. */ + if (sizeof (usize_t) != SIZEOF_USIZE_T || + sizeof (xoff_t) != SIZEOF_XOFF_T || + (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL))) + { + stream->msg = "incorrect compilation: wrong integer sizes"; + return XD3_INTERNAL; + } + + /* Check/set secondary compressor. */ + switch (stream->flags & XD3_SEC_TYPE) + { + case 0: + if (stream->flags & XD3_SEC_NOALL) + { + stream->msg = "XD3_SEC flags require a secondary compressor type"; + return XD3_INTERNAL; + } + break; + case XD3_SEC_FGK: + FGK_CASE (stream); + case XD3_SEC_DJW: + DJW_CASE (stream); + default: + stream->msg = "too many secondary compressor types set"; + return XD3_INTERNAL; + } + + /* Check/set encoder code table. */ + switch (stream->flags & XD3_ALT_CODE_TABLE) { + case 0: + stream->code_table_desc = & __rfc3284_code_table_desc; + stream->code_table_func = xd3_rfc3284_code_table; + break; +#if GENERIC_ENCODE_TABLES + case XD3_ALT_CODE_TABLE: + stream->code_table_desc = & __alternate_code_table_desc; + stream->code_table_func = xd3_alternate_code_table; + stream->comp_table_func = xd3_compute_alternate_table_encoding; + break; +#endif + default: + stream->msg = "alternate code table support was not compiled"; + return XD3_INTERNAL; + } + + /* Check sprevsz */ + if (smatcher->small_chain == 1 && + smatcher->small_lchain == 1) + { + stream->sprevsz = 0; + } + else + { + if ((ret = xd3_check_pow2 (stream->sprevsz, NULL))) + { + stream->msg = "sprevsz is required to be a power of two"; + return XD3_INTERNAL; + } + + stream->sprevmask = stream->sprevsz - 1; + } + + /* Default scanner settings. */ +#if XD3_ENCODER + switch (config->smatch_cfg) + { + IF_BUILD_SOFT(case XD3_SMATCH_SOFT: + { + *smatcher = config->smatcher_soft; + smatcher->string_match = __smatcher_soft.string_match; + smatcher->name = __smatcher_soft.name; + if (smatcher->large_look < MIN_MATCH || + smatcher->large_step < 1 || + smatcher->small_look < MIN_MATCH) + { + stream->msg = "invalid soft string-match config"; + return XD3_INVALID; + } + break; + }) + + IF_BUILD_DEFAULT(case XD3_SMATCH_DEFAULT: + *smatcher = __smatcher_default; + break;) + IF_BUILD_SLOW(case XD3_SMATCH_SLOW: + *smatcher = __smatcher_slow; + break;) + IF_BUILD_FASTEST(case XD3_SMATCH_FASTEST: + *smatcher = __smatcher_fastest; + break;) + IF_BUILD_FASTER(case XD3_SMATCH_FASTER: + *smatcher = __smatcher_faster; + break;) + IF_BUILD_FAST(case XD3_SMATCH_FAST: + *smatcher = __smatcher_fast; + break;) + default: + stream->msg = "invalid string match config type"; + return XD3_INTERNAL; + } + + if (config->smatch_cfg == XD3_SMATCH_DEFAULT && + (stream->flags & XD3_COMPLEVEL_MASK) != 0) + { + int level = (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT; + + switch (level) + { + case 1: + IF_BUILD_FASTEST(*smatcher = __smatcher_fastest; + break;) + case 2: + IF_BUILD_FASTER(*smatcher = __smatcher_faster; + break;) + case 3: case 4: case 5: + IF_BUILD_FAST(*smatcher = __smatcher_fast; + break;) + case 6: + IF_BUILD_DEFAULT(*smatcher = __smatcher_default; + break;) + default: + IF_BUILD_SLOW(*smatcher = __smatcher_slow; + break;) + IF_BUILD_DEFAULT(*smatcher = __smatcher_default; + break;) + IF_BUILD_FAST(*smatcher = __smatcher_fast; + break;) + IF_BUILD_FASTER(*smatcher = __smatcher_faster; + break;) + IF_BUILD_FASTEST(*smatcher = __smatcher_fastest; + break;) + } + } +#endif + + return 0; +} + +/*********************************************************** + Getblk interface + ***********************************************************/ + +inline +xoff_t xd3_source_eof(const xd3_source *src) +{ + xoff_t r = (src->blksize * src->max_blkno) + (xoff_t)src->onlastblk; + return r; +} + +inline +usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno) +{ + usize_t r = (blkno == src->max_blkno ? + src->onlastblk : + src->blksize); + return r; +} + +/* This function interfaces with the client getblk function, checks + * its results, updates frontier_blkno, max_blkno, onlastblk, eof_known. */ +static int +xd3_getblk (xd3_stream *stream, xoff_t blkno) +{ + int ret; + xd3_source *source = stream->src; + + if (source->curblk == NULL || blkno != source->curblkno) + { + source->getblkno = blkno; + + if (stream->getblk == NULL) + { + stream->msg = "getblk source input"; + return XD3_GETSRCBLK; + } + + ret = stream->getblk (stream, source, blkno); + if (ret != 0) + { + IF_DEBUG1 (DP(RINT "[getblk] app error blkno %"Q"u: %s\n", + blkno, xd3_strerror (ret))); + return ret; + } + } + + if (blkno >= source->frontier_blkno) + { + if (blkno > source->max_blkno) + { + source->max_blkno = blkno; + source->onlastblk = source->onblk; + } + + if (source->onblk == source->blksize) + { + source->frontier_blkno = blkno + 1; + + IF_DEBUG2 (DP(RINT "[getblk] full source blkno %"Q"u: " + "source length unknown %"Q"u\n", + blkno, + xd3_source_eof (source))); + } + else + { + if (!source->eof_known) + { + IF_DEBUG2 (DP(RINT "[getblk] eof block has %d bytes; " + "source length known %"Q"u\n", + xd3_bytes_on_srcblk (source, blkno), + xd3_source_eof (source))); + source->eof_known = 1; + } + + source->frontier_blkno = blkno; + } + } + + XD3_ASSERT (source->curblk != NULL); + IF_DEBUG2 (DP(RINT "[getblk] read source block %"Q"u onblk %u blksize %u\n", + blkno, source->onblk, source->blksize)); + + if (blkno == source->max_blkno) + { + /* In case the application sets the source as 1 block w/ a + preset buffer. */ + source->onlastblk = source->onblk; + + if (source->onblk == source->blksize) + { + source->frontier_blkno = blkno + 1; + } + } + return 0; +} + +/*********************************************************** + Stream open/close + ***************************************************************/ + +int +xd3_set_source (xd3_stream *stream, + xd3_source *src) +{ + usize_t shiftby; + + stream->src = src; + src->srclen = 0; + src->srcbase = 0; + + /* Enforce power-of-two blocksize so that source-block number + * calculations are cheap. */ + if (!xd3_check_pow2 (src->blksize, &shiftby) == 0) + { + int check; + src->blksize = xd3_pow2_roundup(src->blksize); + check = xd3_check_pow2 (src->blksize, &shiftby); + XD3_ASSERT (check == 0); + IF_DEBUG1 (DP(RINT "raising srcblksz to %u\n", src->blksize)); + } + + src->shiftby = shiftby; + src->maskby = (1 << shiftby) - 1; + return 0; +} + +int +xd3_set_source_and_size (xd3_stream *stream, + xd3_source *user_source, + xoff_t source_size) { + int ret = xd3_set_source (stream, user_source); + if (ret == 0) + { + stream->src->eof_known = 1; + IF_DEBUG2 (DP(RINT "[set source] size known %"Q"u\n", + source_size)); + + xd3_blksize_div(source_size, + stream->src, + &stream->src->max_blkno, + &stream->src->onlastblk); + } + return ret; +} + +void +xd3_abort_stream (xd3_stream *stream) +{ + stream->dec_state = DEC_ABORTED; + stream->enc_state = ENC_ABORTED; +} + +int +xd3_close_stream (xd3_stream *stream) +{ + if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED) + { + if (stream->buf_leftover != NULL) + { + stream->msg = "encoding is incomplete"; + return XD3_INTERNAL; + } + + if (stream->enc_state == ENC_POSTWIN) + { +#if XD3_ENCODER + xd3_encode_reset (stream); +#endif + stream->current_window += 1; + stream->enc_state = ENC_INPUT; + } + + /* If encoding, should be ready for more input but not actually + have any. */ + if (stream->enc_state != ENC_INPUT || stream->avail_in != 0) + { + stream->msg = "encoding is incomplete"; + return XD3_INTERNAL; + } + } + else + { + switch (stream->dec_state) + { + case DEC_VCHEAD: + case DEC_WININD: + /* TODO: Address the zero-byte ambiguity. Does the encoder + * emit a window or not? If so, then catch an error here. + * If not, need another routine to say + * decode_at_least_one_if_empty. */ + case DEC_ABORTED: + break; + default: + /* If decoding, should be ready for the next window. */ + stream->msg = "EOF in decode"; + return XD3_INTERNAL; + } + } + + return 0; +} + +/************************************************************** + Application header + ****************************************************************/ + +int +xd3_get_appheader (xd3_stream *stream, + uint8_t **data, + usize_t *size) +{ + if (stream->dec_state < DEC_WININD) + { + stream->msg = "application header not available"; + return XD3_INTERNAL; + } + + (*data) = stream->dec_appheader; + (*size) = stream->dec_appheadsz; + return 0; +} + +/********************************************************** + Decoder stuff + *************************************************/ + +#include "xdelta3-decode.h" + +/**************************************************************** + Encoder stuff + *****************************************************************/ + +#if XD3_ENCODER +void +xd3_set_appheader (xd3_stream *stream, + const uint8_t *data, + usize_t size) +{ + stream->enc_appheader = data; + stream->enc_appheadsz = size; +} + +#if XD3_DEBUG +static int +xd3_iopt_check (xd3_stream *stream) +{ + usize_t ul = xd3_rlist_length (& stream->iopt_used); + usize_t fl = xd3_rlist_length (& stream->iopt_free); + + return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size; +} +#endif + +static xd3_rinst* +xd3_iopt_free (xd3_stream *stream, xd3_rinst *i) +{ + xd3_rinst *n = xd3_rlist_remove (i); + xd3_rlist_push_back (& stream->iopt_free, i); + return n; +} + +static void +xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i) +{ + if (i->type != XD3_ADD) + { + xd3_rlist_push_back (& stream->iopt_free, i); + } +} + +/* When an instruction is ready to flush from the iopt buffer, this + * function is called to produce an encoding. It writes the + * instruction plus size, address, and data to the various encoding + * sections. */ +static int +xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst) +{ + int ret; + + /* Check for input overflow. */ + XD3_ASSERT (inst->pos + inst->size <= stream->avail_in); + + switch (inst->type) + { + case XD3_CPY: + { + /* the address may have an offset if there is a source window. */ + usize_t addr; + xd3_source *src = stream->src; + + if (src != NULL) + { + /* If there is a source copy, the source must have its + * source window decided before we can encode. This can + * be bad -- we have to make this decision even if no + * source matches have been found. */ + if (stream->srcwin_decided == 0) + { + if ((ret = xd3_srcwin_setup (stream))) { return ret; } + } + else + { + stream->srcwin_decided_early = (!stream->src->eof_known || + (stream->srcwin_cksum_pos < + xd3_source_eof (stream->src))); + } + + /* xtra field indicates the copy is from the source */ + if (inst->xtra) + { + XD3_ASSERT (inst->addr >= src->srcbase); + XD3_ASSERT (inst->addr + inst->size <= + src->srcbase + src->srclen); + addr = (usize_t)(inst->addr - src->srcbase); + stream->n_scpy += 1; + stream->l_scpy += (xoff_t) inst->size; + } + else + { + /* with source window: target copy address is offset + * by taroff. */ + addr = stream->taroff + (usize_t) inst->addr; + stream->n_tcpy += 1; + stream->l_tcpy += (xoff_t) inst->size; + } + } + else + { + addr = (usize_t) inst->addr; + stream->n_tcpy += 1; + stream->l_tcpy += inst->size; + } + + /* Note: used to assert inst->size >= MIN_MATCH, but not true + * for merge operations & identical match heuristics. */ + /* the "here" position is always offset by taroff */ + if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, + & inst->type))) + { + return ret; + } + + IF_DEBUG2 ({ + static int cnt; + DP(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n", + cnt++, + stream->total_in + inst->pos, + stream->total_in + inst->pos + inst->size, + inst->addr, inst->addr + inst->size, inst->size); + }); + break; + } + case XD3_RUN: + { + XD3_ASSERT (inst->size >= MIN_MATCH); + + if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; } + + stream->n_run += 1; + stream->l_run += inst->size; + + IF_DEBUG2 ({ + static int cnt; + DP(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); + }); + break; + } + case XD3_ADD: + { + if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream), + stream->next_in + inst->pos, inst->size))) { return ret; } + + stream->n_add += 1; + stream->l_add += inst->size; + + IF_DEBUG2 ({ + static int cnt; + DP(RINT "[iopt add:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); + }); + + break; + } + } + + /* This is the only place stream->unencoded_offset is incremented. */ + XD3_ASSERT (stream->unencoded_offset == inst->pos); + stream->unencoded_offset += inst->size; + + inst->code2 = 0; + + XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst); + + if (stream->iout != NULL) + { + if (stream->iout->code2 != 0) + { + if ((ret = xd3_emit_double (stream, stream->iout, inst, stream->iout->code2))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + xd3_iopt_free_nonadd (stream, inst); + stream->iout = NULL; + return 0; + } + else + { + if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + } + } + + stream->iout = inst; + + return 0; +} + +/* This possibly encodes an add instruction, iadd, which must remain + * on the stack until the following call to + * xd3_iopt_finish_encoding. */ +static int +xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd) +{ + int ret; + usize_t off = stream->unencoded_offset; + + if (pos > off) + { + iadd->type = XD3_ADD; + iadd->pos = off; + iadd->size = pos - off; + + if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; } + } + + return 0; +} + +/* This function calls xd3_iopt_finish_encoding to finish encoding an + * instruction, and it may also produce an add instruction for an + * unmatched region. */ +static int +xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst) +{ + int ret; + xd3_rinst iadd; + + if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; } + + if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; } + + return 0; +} + +/* Generates a final add instruction to encode the remaining input. */ +static int +xd3_iopt_add_finalize (xd3_stream *stream) +{ + int ret; + xd3_rinst iadd; + + if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; } + + if (stream->iout) + { + if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + stream->iout = NULL; + } + + return 0; +} + +/* Compact the instruction buffer by choosing the best non-overlapping + * instructions when lazy string-matching. There are no ADDs in the + * iopt buffer because those are synthesized in xd3_iopt_add_encoding + * and during xd3_iopt_add_finalize. */ +static int +xd3_iopt_flush_instructions (xd3_stream *stream, int force) +{ + xd3_rinst *r1 = xd3_rlist_front (& stream->iopt_used); + xd3_rinst *r2; + xd3_rinst *r3; + usize_t r1end; + usize_t r2end; + usize_t r2off; + usize_t r2moff; + usize_t gap; + usize_t flushed; + int ret; + + XD3_ASSERT (xd3_iopt_check (stream)); + + /* Note: once tried to skip this step if it's possible to assert + * there are no overlapping instructions. Doesn't work because + * xd3_opt_erase leaves overlapping instructions. */ + while (! xd3_rlist_end (& stream->iopt_used, r1) && + ! xd3_rlist_end (& stream->iopt_used, r2 = xd3_rlist_next (r1))) + { + r1end = r1->pos + r1->size; + + /* If the instructions do not overlap, continue. */ + if (r1end <= r2->pos) + { + r1 = r2; + continue; + } + + r2end = r2->pos + r2->size; + + /* The min_match adjustments prevent this. */ + XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR)); + + /* If r3 is available... */ + if (! xd3_rlist_end (& stream->iopt_used, r3 = xd3_rlist_next (r2))) + { + /* If r3 starts before r1 finishes or just about, r2 is irrelevant */ + if (r3->pos <= r1end + 1) + { + xd3_iopt_free (stream, r2); + continue; + } + } + else if (! force) + { + /* Unless force, end the loop when r3 is not available. */ + break; + } + + r2off = r2->pos - r1->pos; + r2moff = r2end - r1end; + gap = r2end - r1->pos; + + /* If the two matches overlap almost entirely, choose the better match + * and discard the other. The else branch can still create inefficient + * copies, e.g., a 4-byte copy that takes 4 bytes to encode, which + * xd3_smatch() wouldn't allow by its crude efficiency check. However, + * in this case there are adjacent copies which mean the add would cost + * one extra byte. Allow the inefficiency here. */ + if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2) + { + /* Only one match should be used, choose the longer one. */ + if (r1->size < r2->size) + { + xd3_iopt_free (stream, r1); + r1 = r2; + } + else + { + /* We are guaranteed that r1 does not overlap now, so advance past r2 */ + r1 = xd3_iopt_free (stream, r2); + } + continue; + } + else + { + /* Shorten one of the instructions -- could be optimized + * based on the address cache. */ + usize_t average; + usize_t newsize; + usize_t adjust1; + + XD3_ASSERT (r1end > r2->pos && r2end > r1->pos); + + /* Try to balance the length of both instructions, but avoid + * making both longer than MAX_MATCH_SPLIT . */ + average = gap / 2; + newsize = min (MAX_MATCH_SPLIT, gap - average); + + /* Should be possible to simplify this code. */ + if (newsize > r1->size) + { + /* shorten r2 */ + adjust1 = r1end - r2->pos; + } + else if (newsize > r2->size) + { + /* shorten r1 */ + adjust1 = r1end - r2->pos; + + XD3_ASSERT (r1->size > adjust1); + + r1->size -= adjust1; + + /* don't shorten r2 */ + adjust1 = 0; + } + else + { + /* shorten r1 */ + adjust1 = r1->size - newsize; + + if (r2->pos > r1end - adjust1) + { + adjust1 -= r2->pos - (r1end - adjust1); + } + + XD3_ASSERT (r1->size > adjust1); + + r1->size -= adjust1; + + /* shorten r2 */ + XD3_ASSERT (r1->pos + r1->size >= r2->pos); + + adjust1 = r1->pos + r1->size - r2->pos; + } + + /* Fallthrough above if-else, shorten r2 */ + XD3_ASSERT (r2->size > adjust1); + + r2->size -= adjust1; + r2->pos += adjust1; + r2->addr += adjust1; + + XD3_ASSERT (r1->size >= MIN_MATCH); + XD3_ASSERT (r2->size >= MIN_MATCH); + + r1 = r2; + } + } + + XD3_ASSERT (xd3_iopt_check (stream)); + + /* If forcing, pick instructions until the list is empty, otherwise + * this empties 50% of the queue. */ + for (flushed = 0; ! xd3_rlist_empty (& stream->iopt_used); ) + { + xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt_used); + if ((ret = xd3_iopt_add_encoding (stream, renc))) + { + return ret; + } + + if (! force) + { + if (++flushed > stream->iopt_size / 2) + { + break; + } + + /* If there are only two instructions remaining, break, + * because they were not optimized. This means there were + * more than 50% eliminated by the loop above. */ + r1 = xd3_rlist_front (& stream->iopt_used); + if (xd3_rlist_end(& stream->iopt_used, r1) || + xd3_rlist_end(& stream->iopt_used, r2 = xd3_rlist_next (r1)) || + xd3_rlist_end(& stream->iopt_used, r3 = xd3_rlist_next (r2))) + { + break; + } + } + } + + XD3_ASSERT (xd3_iopt_check (stream)); + + XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt_used) == 0); + + return 0; +} + +static int +xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr) +{ + xd3_rinst *i; + int ret; + + if (xd3_rlist_empty (& stream->iopt_free)) + { + if (stream->iopt_unlimited) + { + usize_t elts = XD3_ALLOCSIZE / sizeof(xd3_rinst); + + if ((ret = xd3_alloc_iopt (stream, elts))) + { + return ret; + } + + stream->iopt_size += elts; + } + else + { + if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; } + + XD3_ASSERT (! xd3_rlist_empty (& stream->iopt_free)); + } + } + + i = xd3_rlist_pop_back (& stream->iopt_free); + + xd3_rlist_push_back (& stream->iopt_used, i); + + (*iptr) = i; + + ++stream->i_slots_used; + + return 0; +} + +/* A copy is about to be emitted that extends backwards to POS, + * therefore it may completely cover some existing instructions in the + * buffer. If an instruction is completely covered by this new match, + * erase it. If the new instruction is covered by the previous one, + * return 1 to skip it. */ +static void +xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size) +{ + while (! xd3_rlist_empty (& stream->iopt_used)) + { + xd3_rinst *r = xd3_rlist_back (& stream->iopt_used); + + /* Verify that greedy is working. The previous instruction + * should end before the new one begins. */ + XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos)); + /* Verify that min_match is working. The previous instruction + * should end before the new one ends. */ + XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size)); + + /* See if the last instruction starts before the new + * instruction. If so, there is nothing to erase. */ + if (r->pos < pos) + { + return; + } + + /* Otherwise, the new instruction covers the old one, delete it + and repeat. */ + xd3_rlist_remove (r); + xd3_rlist_push_back (& stream->iopt_free, r); + --stream->i_slots_used; + } +} + +/* This function tells the last matched input position. */ +static usize_t +xd3_iopt_last_matched (xd3_stream *stream) +{ + xd3_rinst *r; + + if (xd3_rlist_empty (& stream->iopt_used)) + { + return 0; + } + + r = xd3_rlist_back (& stream->iopt_used); + + return r->pos + r->size; +} + +/********************************************************* + Emit routines + ***********************************************************/ + +static int +xd3_emit_single (xd3_stream *stream, xd3_rinst *single, usize_t code) +{ + int has_size = stream->code_table[code].size1 == 0; + int ret; + + IF_DEBUG2 (DP(RINT "[emit1] %u %s (%u) code %u\n", + single->pos, + xd3_rtype_to_string ((xd3_rtype) single->type, 0), + single->size, + code)); + + if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) + { + return ret; + } + + if (has_size) + { + if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) + { + return ret; + } + } + + return 0; +} + +static int +xd3_emit_double (xd3_stream *stream, xd3_rinst *first, + xd3_rinst *second, usize_t code) +{ + int ret; + + /* All double instructions use fixed sizes, so all we need to do is + * output the instruction code, no sizes. */ + XD3_ASSERT (stream->code_table[code].size1 != 0 && + stream->code_table[code].size2 != 0); + + if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) + { + return ret; + } + + IF_DEBUG2 (DP(RINT "[emit2]: %u %s (%u) %s (%u) code %u\n", + first->pos, + xd3_rtype_to_string ((xd3_rtype) first->type, 0), + first->size, + xd3_rtype_to_string ((xd3_rtype) second->type, 0), + second->size, + code)); + + return 0; +} + +/* This enters a potential run instruction into the iopt buffer. The + * position argument is relative to the target window. */ +static int +xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t *run_c) +{ + xd3_rinst* ri; + int ret; + + if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } + + ri->type = XD3_RUN; + ri->xtra = *run_c; + ri->pos = pos; + ri->size = size; + + return 0; +} + +/* This enters a potential copy instruction into the iopt buffer. The + * position argument is relative to the target window.. */ +int +xd3_found_match (xd3_stream *stream, usize_t pos, + usize_t size, xoff_t addr, int is_source) +{ + xd3_rinst* ri; + int ret; + + if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } + + ri->type = XD3_CPY; + ri->xtra = is_source; + ri->pos = pos; + ri->size = size; + ri->addr = addr; + + return 0; +} + +static int +xd3_emit_hdr (xd3_stream *stream) +{ + int ret; + int use_secondary = stream->sec_type != NULL; + int use_adler32 = stream->flags & (XD3_ADLER32 | XD3_ADLER32_RECODE); + int vcd_source = xd3_encoder_used_source (stream); + usize_t win_ind = 0; + usize_t del_ind = 0; + usize_t enc_len; + usize_t tgt_len; + usize_t data_len; + usize_t inst_len; + usize_t addr_len; + + if (stream->current_window == 0) + { + usize_t hdr_ind = 0; + int use_appheader = stream->enc_appheader != NULL; + int use_gencodetbl = GENERIC_ENCODE_TABLES && + (stream->code_table_desc != & __rfc3284_code_table_desc); + + if (use_secondary) { hdr_ind |= VCD_SECONDARY; } + if (use_gencodetbl) { hdr_ind |= VCD_CODETABLE; } + if (use_appheader) { hdr_ind |= VCD_APPHEADER; } + + if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC1)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC2)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC3)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_VERSION)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0) + { + return ret; + } + + /* Secondary compressor ID */ +#if SECONDARY_ANY + if (use_secondary && + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + stream->sec_type->id))) + { + return ret; + } +#endif + + /* Compressed code table */ + if (use_gencodetbl) + { + usize_t code_table_size; + const uint8_t *code_table_data; + + if ((ret = stream->comp_table_func (stream, & code_table_data, + & code_table_size))) + { + return ret; + } + + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + code_table_size + 2)) || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + stream->code_table_desc->near_modes)) || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + stream->code_table_desc->same_modes)) || + (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), + code_table_data, code_table_size))) + { + return ret; + } + } + + /* Application header */ + if (use_appheader) + { + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + stream->enc_appheadsz)) || + (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), + stream->enc_appheader, + stream->enc_appheadsz))) + { + return ret; + } + } + } + + /* try to compress this window */ +#if SECONDARY_ANY + if (use_secondary) + { + int data_sec = 0; + int inst_sec = 0; + int addr_sec = 0; + +# define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \ + ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \ + (ret = xd3_encode_secondary (stream, \ + & UPPER ## _HEAD (stream), \ + & UPPER ## _TAIL (stream), \ + & xd3_sec_ ## LOWER (stream), \ + & stream->sec_ ## LOWER, \ + & LOWER ## _sec))) + + if (ENCODE_SECONDARY_SECTION (DATA, data) || + ENCODE_SECONDARY_SECTION (INST, inst) || + ENCODE_SECONDARY_SECTION (ADDR, addr)) + { + return ret; + } + + del_ind |= (data_sec ? VCD_DATACOMP : 0); + del_ind |= (inst_sec ? VCD_INSTCOMP : 0); + del_ind |= (addr_sec ? VCD_ADDRCOMP : 0); + } +#endif + + /* if (vcd_target) { win_ind |= VCD_TARGET; } */ + if (vcd_source) { win_ind |= VCD_SOURCE; } + if (use_adler32) { win_ind |= VCD_ADLER32; } + + /* window indicator */ + if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) + { + return ret; + } + + /* source window */ + if (vcd_source) + { + /* or (vcd_target) { ... } */ + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + stream->src->srclen)) || + (ret = xd3_emit_offset (stream, & HDR_TAIL (stream), + stream->src->srcbase))) { return ret; } + } + + tgt_len = stream->avail_in; + data_len = xd3_sizeof_output (DATA_HEAD (stream)); + inst_len = xd3_sizeof_output (INST_HEAD (stream)); + addr_len = xd3_sizeof_output (ADDR_HEAD (stream)); + + /* The enc_len field is a redundency for future extensions.*/ + enc_len = (1 + (xd3_sizeof_size (tgt_len) + + xd3_sizeof_size (data_len) + + xd3_sizeof_size (inst_len) + + xd3_sizeof_size (addr_len)) + + data_len + + inst_len + + addr_len + + (use_adler32 ? 4 : 0)); + + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len))) + { + return ret; + } + + if (use_adler32) + { + uint8_t send[4]; + uint32_t a32; + + if (stream->flags & XD3_ADLER32) + { + a32 = adler32 (1L, stream->next_in, stream->avail_in); + } + else + { + a32 = stream->recode_adler32; + } + + /* Four bytes. */ + send[0] = (uint8_t) (a32 >> 24); + send[1] = (uint8_t) (a32 >> 16); + send[2] = (uint8_t) (a32 >> 8); + send[3] = (uint8_t) (a32 & 0x000000FFU); + + if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) + { + return ret; + } + } + + return 0; +} + +/**************************************************************** + Encode routines + ****************************************************************/ + +static int +xd3_encode_buffer_leftover (xd3_stream *stream) +{ + usize_t take; + usize_t room; + + /* Allocate the buffer. */ + if (stream->buf_in == NULL && + (stream->buf_in = (uint8_t*) xd3_alloc (stream, stream->winsize, 1)) == NULL) + { + return ENOMEM; + } + + IF_DEBUG2 (DP(RINT "[leftover] flush?=%s\n", (stream->flags & XD3_FLUSH) ? "yes" : "no")); + + /* Take leftover input first. */ + if (stream->buf_leftover != NULL) + { + XD3_ASSERT (stream->buf_avail == 0); + XD3_ASSERT (stream->buf_leftavail < stream->winsize); + + IF_DEBUG2 (DP(RINT "[leftover] previous %u avail %u\n", stream->buf_leftavail, stream->avail_in)); + + memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail); + + stream->buf_leftover = NULL; + stream->buf_avail = stream->buf_leftavail; + } + + /* Copy into the buffer. */ + room = stream->winsize - stream->buf_avail; + take = min (room, stream->avail_in); + + memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take); + + stream->buf_avail += take; + + if (take < stream->avail_in) + { + /* Buffer is full */ + stream->buf_leftover = stream->next_in + take; + stream->buf_leftavail = stream->avail_in - take; + } + else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH)) + { + /* Buffer has space */ + IF_DEBUG2 (DP(RINT "[leftover] emptied %u\n", take)); + return XD3_INPUT; + } + + /* Use the buffer: */ + IF_DEBUG2 (DP(RINT "[leftover] take %u remaining %u\n", take, stream->buf_leftavail)); + stream->next_in = stream->buf_in; + stream->avail_in = stream->buf_avail; + stream->buf_avail = 0; + + return 0; +} + +/* Allocates one block of xd3_rlist elements */ +static int +xd3_alloc_iopt (xd3_stream *stream, usize_t elts) +{ + usize_t i; + xd3_iopt_buflist* last = + (xd3_iopt_buflist*) xd3_alloc (stream, sizeof (xd3_iopt_buflist), 1); + + if (last == NULL || + (last->buffer = (xd3_rinst*) xd3_alloc (stream, sizeof (xd3_rinst), elts)) == NULL) + { + return ENOMEM; + } + + last->next = stream->iopt_alloc; + stream->iopt_alloc = last; + + for (i = 0; i < elts; i += 1) + { + xd3_rlist_push_back (& stream->iopt_free, & last->buffer[i]); + } + + return 0; +} + +/* This function allocates all memory initially used by the encoder. */ +static int +xd3_encode_init (xd3_stream *stream, int full_init) +{ + int i; + + if (full_init) + { + int large_comp = (stream->src != NULL); + int small_comp = ! (stream->flags & XD3_NOCOMPRESS); + + /* Memory allocations for checksum tables are delayed until + * xd3_string_match_init in the first call to string_match--that way + * identical or short inputs require no table allocation. */ + if (large_comp) + { + usize_t hash_values = (stream->srcwin_maxsz / + stream->smatcher.large_step); + + xd3_size_hashtable (stream, + hash_values, + & stream->large_hash); + } + + if (small_comp) + { + /* TODO: This is under devel: used to have min(sprevsz) here, which sort + * of makes sense, but observed fast performance w/ larger tables, which + * also sort of makes sense. @@@ */ + usize_t hash_values = stream->winsize; + + xd3_size_hashtable (stream, + hash_values, + & stream->small_hash); + } + } + + /* data buffers */ + for (i = 0; i < ENC_SECTS; i += 1) + { + if ((stream->enc_heads[i] = + stream->enc_tails[i] = + xd3_alloc_output (stream, NULL)) == NULL) + { + return ENOMEM; + } + } + + /* iopt buffer */ + xd3_rlist_init (& stream->iopt_used); + xd3_rlist_init (& stream->iopt_free); + + if (xd3_alloc_iopt (stream, stream->iopt_size) != 0) { goto fail; } + + XD3_ASSERT (xd3_rlist_length (& stream->iopt_free) == stream->iopt_size); + XD3_ASSERT (xd3_rlist_length (& stream->iopt_used) == 0); + + /* address cache, code table */ + stream->acache.s_near = stream->code_table_desc->near_modes; + stream->acache.s_same = stream->code_table_desc->same_modes; + stream->code_table = stream->code_table_func (); + + return xd3_alloc_cache (stream); + + fail: + + return ENOMEM; +} + +int +xd3_encode_init_full (xd3_stream *stream) +{ + return xd3_encode_init (stream, 1); +} + +int +xd3_encode_init_partial (xd3_stream *stream) +{ + return xd3_encode_init (stream, 0); +} + +/* Called after the ENC_POSTOUT state, this puts the output buffers + * back into separate lists and re-initializes some variables. (The + * output lists were spliced together during the ENC_FLUSH state.) */ +static void +xd3_encode_reset (xd3_stream *stream) +{ + int i; + xd3_output *olist; + + stream->avail_in = 0; + stream->small_reset = 1; + stream->i_slots_used = 0; + + if (stream->src != NULL) + { + stream->src->srcbase = 0; + stream->src->srclen = 0; + stream->srcwin_decided = 0; + stream->srcwin_decided_early = 0; + stream->match_minaddr = 0; + stream->match_maxaddr = 0; + stream->taroff = 0; + } + + /* Reset output chains. */ + olist = stream->enc_heads[0]; + + for (i = 0; i < ENC_SECTS; i += 1) + { + XD3_ASSERT (olist != NULL); + + stream->enc_heads[i] = olist; + stream->enc_tails[i] = olist; + olist = olist->next_page; + + stream->enc_heads[i]->next = 0; + stream->enc_heads[i]->next_page = NULL; + + stream->enc_tails[i]->next_page = NULL; + stream->enc_tails[i] = stream->enc_heads[i]; + } + + xd3_freelist_output (stream, olist); +} + +/* The main encoding routine. */ +int +xd3_encode_input (xd3_stream *stream) +{ + int ret, i; + + if (stream->dec_state != 0) + { + stream->msg = "encoder/decoder transition"; + return XD3_INTERNAL; + } + + switch (stream->enc_state) + { + case ENC_INIT: + /* Only reached on first time through: memory setup. */ + if ((ret = xd3_encode_init_full (stream))) { return ret; } + + stream->enc_state = ENC_INPUT; + + case ENC_INPUT: + + /* If there is no input yet, just return. This checks for + * next_in == NULL, not avail_in == 0 since zero bytes is a + * valid input. There is an assertion in xd3_avail_input() that + * next_in != NULL for this reason. By returning right away we + * avoid creating an input buffer before the caller has supplied + * its first data. It is possible for xd3_avail_input to be + * called both before and after the first call to + * xd3_encode_input(). */ + if (stream->next_in == NULL) + { + return XD3_INPUT; + } + + enc_flush: + /* See if we should buffer the input: either if there is already + * a leftover buffer, or if the input is short of winsize + * without flush. The label at this point is reached by a goto + * below, when there is leftover input after postout. */ + if ((stream->buf_leftover != NULL) || + (stream->buf_avail != 0) || + (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH))) + { + if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; } + } + + /* Initalize the address cache before each window. */ + xd3_init_cache (& stream->acache); + + stream->input_position = 0; + stream->min_match = MIN_MATCH; + stream->unencoded_offset = 0; + + stream->enc_state = ENC_SEARCH; + + IF_DEBUG2 (DP(RINT "[WINSTART:%"Q"u] input bytes %u offset %"Q"u\n", + stream->current_window, stream->avail_in, + stream->total_in)); + return XD3_WINSTART; + + case ENC_SEARCH: + IF_DEBUG2 (DP(RINT "[SEARCH] match_state %d avail_in %u %s\n", + stream->match_state, stream->avail_in, + stream->src ? "source" : "no source")); + + /* Reentrant matching. */ + if (stream->src != NULL) + { + switch (stream->match_state) + { + case MATCH_TARGET: + /* Try matching forward at the start of the target. + * This is entered the first time through, to check for + * a perfect match, and whenever there is a source match + * that extends to the end of the previous window. The + * match_srcpos field is initially zero and later set + * during xd3_source_extend_match. */ + + if (stream->avail_in > 0) + { + /* This call can't fail because the source window is + * unrestricted. */ + ret = xd3_source_match_setup (stream, stream->match_srcpos); + XD3_ASSERT (ret == 0); + stream->match_state = MATCH_FORWARD; + } + else + { + stream->match_state = MATCH_SEARCHING; + stream->match_fwd = 0; + } + XD3_ASSERT (stream->match_fwd == 0); + + case MATCH_FORWARD: + case MATCH_BACKWARD: + if (stream->avail_in != 0) + { + if ((ret = xd3_source_extend_match (stream)) != 0) + { + return ret; + } + + /* The search has to make forward progress here + * or else it can get stuck in a match-backward + * (getsrcblk) then match-forward (getsrcblk), + * find insufficient match length, then repeat + * exactly the same search. + */ + stream->input_position += stream->match_fwd; + } + + case MATCH_SEARCHING: + /* Continue string matching. (It's possible that the + * initial match continued through the entire input, in + * which case we're still in MATCH_FORWARD and should + * remain so for the next input window.) */ + break; + } + } + + /* String matching... */ + if (stream->avail_in != 0 && + (ret = stream->smatcher.string_match (stream))) + { + return ret; + } + + stream->enc_state = ENC_INSTR; + + case ENC_INSTR: + /* Note: Jump here to encode VCDIFF deltas w/o using this + * string-matching code. Merging code code enters here. */ + + /* Flush the instrution buffer, then possibly add one more + * instruction, then emit the header. */ + if ((ret = xd3_iopt_flush_instructions (stream, 1)) || + (ret = xd3_iopt_add_finalize (stream))) + { + return ret; + } + + stream->enc_state = ENC_FLUSH; + + case ENC_FLUSH: + /* Note: main_recode_func() bypasses string-matching by setting + * ENC_FLUSH. */ + if ((ret = xd3_emit_hdr (stream))) + { + return ret; + } + + /* Begin output. */ + stream->enc_current = HDR_HEAD (stream); + + /* Chain all the outputs together. After doing this, it looks + * as if there is only one section. The other enc_heads are set + * to NULL to avoid freeing them more than once. */ + for (i = 1; i < ENC_SECTS; i += 1) + { + stream->enc_tails[i-1]->next_page = stream->enc_heads[i]; + stream->enc_heads[i] = NULL; + } + + enc_output: + + stream->enc_state = ENC_POSTOUT; + stream->next_out = stream->enc_current->base; + stream->avail_out = stream->enc_current->next; + stream->total_out += (xoff_t) stream->avail_out; + + /* If there is any output in this buffer, return it, otherwise + * fall through to handle the next buffer or finish the window + * after all buffers have been output. */ + if (stream->avail_out > 0) + { + /* This is the only place xd3_encode returns XD3_OUTPUT */ + return XD3_OUTPUT; + } + + case ENC_POSTOUT: + + if (stream->avail_out != 0) + { + stream->msg = "missed call to consume output"; + return XD3_INTERNAL; + } + + /* Continue outputting one buffer at a time, until the next is NULL. */ + if ((stream->enc_current = stream->enc_current->next_page) != NULL) + { + goto enc_output; + } + + stream->total_in += (xoff_t) stream->avail_in; + stream->enc_state = ENC_POSTWIN; + + IF_DEBUG2 (DP(RINT "[WINFINISH:%"Q"u] in=%"Q"u\n", + stream->current_window, + stream->total_in)); + return XD3_WINFINISH; + + case ENC_POSTWIN: + + xd3_encode_reset (stream); + + stream->current_window += 1; + stream->enc_state = ENC_INPUT; + + /* If there is leftover input to flush, repeat. */ + if (stream->buf_leftover != NULL) + { + goto enc_flush; + } + + /* Ready for more input. */ + return XD3_INPUT; + + default: + stream->msg = "invalid state"; + return XD3_INTERNAL; + } +} +#endif /* XD3_ENCODER */ + +/***************************************************************** + Client convenience functions + ******************************************************************/ + +static int +xd3_process_stream (int is_encode, + xd3_stream *stream, + int (*func) (xd3_stream *), + int close_stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + usize_t ipos = 0; + usize_t n = min(stream->winsize, input_size); + + (*output_size) = 0; + + stream->flags |= XD3_FLUSH; + + xd3_avail_input (stream, input + ipos, n); + ipos += n; + + for (;;) + { + int ret; + switch((ret = func (stream))) + { + case XD3_OUTPUT: { /* memcpy below */ break; } + case XD3_INPUT: { + n = min(stream->winsize, input_size - ipos); + if (n == 0) { + goto done; + } + xd3_avail_input (stream, input + ipos, n); + ipos += n; + continue; + } + case XD3_GOTHEADER: { /* ignore */ continue; } + case XD3_WINSTART: { /* ignore */ continue; } + case XD3_WINFINISH: { /* ignore */ continue; } + case XD3_GETSRCBLK: + { + stream->msg = "stream requires source input"; + return XD3_INTERNAL; + } + case 0: + { + /* xd3_encode_input/xd3_decode_input never return 0 */ + stream->msg = "invalid return: 0"; + return XD3_INTERNAL; + } + default: + return ret; + } + + if (*output_size + stream->avail_out > output_size_max) + { + stream->msg = "insufficient output space"; + return ENOSPC; + } + + memcpy (output + *output_size, stream->next_out, stream->avail_out); + + *output_size += stream->avail_out; + + xd3_consume_output (stream); + } + done: + return (close_stream == 0) ? 0 : xd3_close_stream (stream); +} + +static int +xd3_process_memory (int is_encode, + int (*func) (xd3_stream *), + int close_stream, + const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + xd3_stream stream; + xd3_config config; + xd3_source src; + int ret; + + memset (& stream, 0, sizeof (stream)); + memset (& config, 0, sizeof (config)); + + if (input == NULL || output == NULL) { + stream.msg = "invalid input/output buffer"; + ret = XD3_INTERNAL; + goto exit; + } + + config.flags = flags; + + if (is_encode) + { + config.srcwin_maxsz = source_size; + config.winsize = min(input_size, (usize_t) XD3_DEFAULT_WINSIZE); + config.iopt_size = min(input_size / 32, XD3_DEFAULT_IOPT_SIZE); + config.iopt_size = max(config.iopt_size, 128U); + config.sprevsz = xd3_pow2_roundup (config.winsize); + } + + if ((ret = xd3_config_stream (&stream, &config)) != 0) + { + goto exit; + } + + if (source != NULL) + { + memset (& src, 0, sizeof (src)); + + src.blksize = source_size; + src.onblk = source_size; + src.curblk = source; + src.curblkno = 0; + + if ((ret = xd3_set_source_and_size (&stream, &src, source_size)) != 0) + { + goto exit; + } + } + + if ((ret = xd3_process_stream (is_encode, + & stream, + func, 1, + input, input_size, + output, + output_size, + output_size_max)) != 0) + { + goto exit; + } + + exit: + if (ret != 0) + { + IF_DEBUG2 (DP(RINT "process_memory: %d: %s\n", ret, stream.msg)); + } + xd3_free_stream(&stream); + return ret; +} + +int +xd3_decode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + return xd3_process_stream (0, stream, & xd3_decode_input, 1, + input, input_size, + output, output_size, output_size_max); +} + +int +xd3_decode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + return xd3_process_memory (0, & xd3_decode_input, 1, + input, input_size, + source, source_size, + output, output_size, output_size_max, + flags); +} + + +#if XD3_ENCODER +int +xd3_encode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + return xd3_process_stream (1, stream, & xd3_encode_input, 1, + input, input_size, + output, output_size, output_size_max); +} + +int +xd3_encode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + return xd3_process_memory (1, & xd3_encode_input, 1, + input, input_size, + source, source_size, + output, output_size, output_size_max, + flags); +} +#endif + + +/************************************************************* + String matching helpers + *************************************************************/ + +#if XD3_ENCODER +/* Do the initial xd3_string_match() checksum table setup. + * Allocations are delayed until first use to avoid allocation + * sometimes (e.g., perfect matches, zero-length inputs). */ +static int +xd3_string_match_init (xd3_stream *stream) +{ + const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); + const int DO_LARGE = (stream->src != NULL); + + if (DO_LARGE && stream->large_table == NULL) + { + if ((stream->large_table = + (usize_t*) xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + } + + if (DO_SMALL) + { + /* Subsequent calls can return immediately after checking reset. */ + if (stream->small_table != NULL) + { + /* The target hash table is reinitialized once per window. */ + /* TODO: This would not have to be reinitialized if absolute + * offsets were being stored. */ + if (stream->small_reset) + { + stream->small_reset = 0; + memset (stream->small_table, 0, + sizeof (usize_t) * stream->small_hash.size); + } + + return 0; + } + + if ((stream->small_table = + (usize_t*) xd3_alloc0 (stream, + stream->small_hash.size, + sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + + /* If there is a previous table needed. */ + if (stream->smatcher.small_lchain > 1 || + stream->smatcher.small_chain > 1) + { + if ((stream->small_prev = + (xd3_slist*) xd3_alloc (stream, + stream->sprevsz, + sizeof (xd3_slist))) == NULL) + { + return ENOMEM; + } + } + } + + return 0; +} + +#if XD3_USE_LARGEFILE64 +/* This function handles the 32/64bit ambiguity -- file positions are 64bit + * but the hash table for source-offsets is 32bit. */ +static xoff_t +xd3_source_cksum_offset(xd3_stream *stream, usize_t low) +{ + xoff_t scp = stream->srcwin_cksum_pos; + xoff_t s0 = scp >> 32; + + usize_t sr = (usize_t) scp; + + if (s0 == 0) { + return low; + } + + /* This should not be >= because srcwin_cksum_pos is the next + * position to index. */ + if (low > sr) { + return (--s0 << 32) | low; + } + + return (s0 << 32) | low; +} +#else +static xoff_t +xd3_source_cksum_offset(xd3_stream *stream, usize_t low) +{ + return (xoff_t) low; +} +#endif + +/* This function sets up the stream->src fields srcbase, srclen. The + * call is delayed until these values are needed to encode a copy + * address. At this point the decision has to be made. */ +static int +xd3_srcwin_setup (xd3_stream *stream) +{ + xd3_source *src = stream->src; + xoff_t length, x; + + /* Check the undecided state. */ + XD3_ASSERT (src->srclen == 0 && src->srcbase == 0); + + /* Avoid repeating this call. */ + stream->srcwin_decided = 1; + + /* If the stream is flushing, then the iopt buffer was able to + * contain the complete encoding. If no copies were issued no + * source window is actually needed. This prevents the VCDIFF + * header from including source base/len. xd3_emit_hdr checks for + * srclen == 0. */ + if (stream->enc_state == ENC_INSTR && stream->match_maxaddr == 0) + { + goto done; + } + + /* Check for overflow, srclen is usize_t - this can't happen unless + * XD3_DEFAULT_SRCBACK and related parameters are extreme - should + * use smaller windows. */ + length = stream->match_maxaddr - stream->match_minaddr; + + x = (xoff_t) USIZE_T_MAX; + if (length > x) + { + stream->msg = "source window length overflow (not 64bit)"; + return XD3_INTERNAL; + } + + /* If ENC_INSTR, then we know the exact source window to use because + * no more copies can be issued. */ + if (stream->enc_state == ENC_INSTR) + { + src->srcbase = stream->match_minaddr; + src->srclen = (usize_t) length; + XD3_ASSERT (src->srclen); + goto done; + } + + /* Otherwise, we have to make a guess. More copies may still be + * issued, but we have to decide the source window base and length + * now. */ + src->srcbase = stream->match_minaddr; + src->srclen = max ((usize_t) length, + stream->avail_in + (stream->avail_in >> 2)); + + /* OPT: If we know the source size, it might be possible to reduce + * srclen. */ + XD3_ASSERT (src->srclen); + done: + /* Set the taroff. This convenience variable is used even when + stream->src == NULL. */ + stream->taroff = src->srclen; + return 0; +} + +/* Sets the bounding region for a newly discovered source match, prior + * to calling xd3_source_extend_match(). This sets the match_maxfwd, + * match_maxback variables. Note: srcpos is an absolute position + * (xoff_t) but the match_maxfwd, match_maxback variables are usize_t. + * Returns 0 if the setup succeeds, or 1 if the source position lies + * outside an already-decided srcbase/srclen window. */ +static int +xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos) +{ + xd3_source *src = stream->src; + usize_t greedy_or_not; + xoff_t frontier_pos; + + stream->match_maxback = 0; + stream->match_maxfwd = 0; + stream->match_back = 0; + stream->match_fwd = 0; + + /* This avoids a non-blocking endless loop caused by scanning + * backwards across a block boundary, only to find not enough + * matching bytes to beat the current min_match due to a better lazy + * target match: the re-entry to xd3_string_match() repeats the same + * long match because the input position hasn't changed. TODO: if + * ever duplicates are added to the source hash table, this logic + * won't suffice to avoid loops. See testing/regtest.cc's + * TestNonBlockingProgress test! */ + if (srcpos != 0 && srcpos == stream->match_last_srcpos) + { + IF_DEBUG2(DP(RINT "[match_setup] looping failure\n")); + goto bad; + } + + /* Implement srcwin_maxsz, which prevents the encoder from seeking + * back further than the LRU cache maintaining FIFO discipline, (to + * avoid seeking). */ + frontier_pos = + stream->src->frontier_blkno * stream->src->blksize; + IF_DEBUG1(DP(RINT "[match_setup] frontier_pos %"Q"u, srcpos %"Q"u, " + "srcwin_maxsz %u\n", + frontier_pos, srcpos, stream->srcwin_maxsz)); + if (srcpos < frontier_pos && + frontier_pos - srcpos > stream->srcwin_maxsz) { + IF_DEBUG1(DP(RINT "[match_setup] rejected due to srcwin_maxsz " + "distance eof=%"Q"u srcpos=%"Q"u maxsz=%u\n", + xd3_source_eof (stream->src), + srcpos, stream->srcwin_maxsz)); + goto bad; + } + + /* Going backwards, the 1.5-pass algorithm allows some + * already-matched input may be covered by a longer source match. + * The greedy algorithm does not allow this. */ + if (stream->flags & XD3_BEGREEDY) + { + /* The greedy algorithm allows backward matching to the last + matched position. */ + greedy_or_not = xd3_iopt_last_matched (stream); + } + else + { + /* The 1.5-pass algorithm allows backward matching to go back as + * far as the unencoded offset, which is updated as instructions + * pass out of the iopt buffer. If this (default) is chosen, it + * means xd3_iopt_erase may be called to eliminate instructions + * when a covering source match is found. */ + greedy_or_not = stream->unencoded_offset; + } + + /* Backward target match limit. */ + XD3_ASSERT (stream->input_position >= greedy_or_not); + stream->match_maxback = stream->input_position - greedy_or_not; + + /* Forward target match limit. */ + XD3_ASSERT (stream->avail_in > stream->input_position); + stream->match_maxfwd = stream->avail_in - stream->input_position; + + /* Now we take the source position into account. It depends whether + * the srclen/srcbase have been decided yet. */ + if (stream->srcwin_decided == 0) + { + /* Unrestricted case: the match can cover the entire source, + * 0--src->size. We compare the usize_t + * match_maxfwd/match_maxback against the xoff_t + * src->size/srcpos values and take the min. */ + if (srcpos < (xoff_t) stream->match_maxback) + { + stream->match_maxback = (usize_t) srcpos; + } + + if (stream->src->eof_known) + { + xoff_t srcavail = xd3_source_eof (stream->src) - srcpos; + + if (srcavail < (xoff_t) stream->match_maxfwd) + { + stream->match_maxfwd = (usize_t) srcavail; + } + } + + IF_DEBUG1(DP(RINT + "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) " + "unrestricted maxback %u maxfwd %u\n", + srcpos, + stream->total_in + stream->input_position, + stream->match_maxback, + stream->match_maxfwd)); + goto good; + } + + /* Decided some source window. */ + XD3_ASSERT (src->srclen > 0); + + /* Restricted case: fail if the srcpos lies outside the source window */ + if ((srcpos < src->srcbase) || + (srcpos > (src->srcbase + (xoff_t) src->srclen))) + { + IF_DEBUG1(DP(RINT "[match_setup] restricted source window failure\n")); + goto bad; + } + else + { + usize_t srcavail; + + srcavail = (usize_t) (srcpos - src->srcbase); + if (srcavail < stream->match_maxback) + { + stream->match_maxback = srcavail; + } + + srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos); + if (srcavail < stream->match_maxfwd) + { + stream->match_maxfwd = srcavail; + } + + IF_DEBUG1(DP(RINT + "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) " + "restricted maxback %u maxfwd %u\n", + srcpos, + stream->total_in + stream->input_position, + stream->match_maxback, + stream->match_maxfwd)); + goto good; + } + + good: + stream->match_state = MATCH_BACKWARD; + stream->match_srcpos = srcpos; + stream->match_last_srcpos = srcpos; + return 0; + + bad: + stream->match_state = MATCH_SEARCHING; + return 1; +} + +static inline int +xd3_forward_match(const uint8_t *s1c, const uint8_t *s2c, int n) +{ + int i = 0; +#if UNALIGNED_OK + int nint = n / sizeof(int); + + if (nint >> 3) + { + int j = 0; + const int *s1 = (const int*)s1c; + const int *s2 = (const int*)s2c; + int nint_8 = nint - 8; + + while (i <= nint_8 && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++]) { } + + i = (i - 1) * sizeof(int); + } +#endif + + while (i < n && s1c[i] == s2c[i]) + { + i++; + } + return i; +} + +/* This function expands the source match backward and forward. It is + * reentrant, since xd3_getblk may return XD3_GETSRCBLK, so most + * variables are kept in xd3_stream. There are two callers of this + * function, the string_matching routine when a checksum match is + * discovered, and xd3_encode_input whenever a continuing (or initial) + * match is suspected. The two callers do different things with the + * input_position, thus this function leaves that variable untouched. + * If a match is taken the resulting stream->match_fwd is left + * non-zero. */ +static int +xd3_source_extend_match (xd3_stream *stream) +{ + int ret; + xd3_source *src = stream->src; + xoff_t matchoff; /* matchoff is the current right/left-boundary of + the source match being tested. */ + usize_t streamoff; /* streamoff is the current right/left-boundary + of the input match being tested. */ + xoff_t tryblk; /* tryblk, tryoff are the block, offset position + of matchoff */ + usize_t tryoff; + usize_t tryrem; /* tryrem is the number of matchable bytes */ + usize_t matched; + + IF_DEBUG2(DP(RINT "[extend match] srcpos %"Q"u\n", + stream->match_srcpos)); + + XD3_ASSERT (src != NULL); + + /* Does it make sense to compute backward match AFTER forward match? */ + if (stream->match_state == MATCH_BACKWARD) + { + /* Note: this code is practically duplicated below, substituting + * match_fwd/match_back and direction. TODO: Consolidate? */ + matchoff = stream->match_srcpos - stream->match_back; + streamoff = stream->input_position - stream->match_back; + xd3_blksize_div (matchoff, src, &tryblk, &tryoff); + + /* this loops backward over source blocks */ + while (stream->match_back < stream->match_maxback) + { + /* see if we're backing across a source block boundary */ + if (tryoff == 0) + { + tryoff = src->blksize; + tryblk -= 1; + } + + if ((ret = xd3_getblk (stream, tryblk))) + { + /* if search went too far back, continue forward. */ + if (ret == XD3_TOOFARBACK) + { + break; + } + + /* could be a XD3_GETSRCBLK failure. */ + return ret; + } + + tryrem = min (tryoff, stream->match_maxback - stream->match_back); + + IF_DEBUG2(DP(RINT "[maxback] maxback %u trysrc %"Q"u/%u tgt %u tryrem %u\n", + stream->match_maxback, tryblk, tryoff, streamoff, tryrem)); + + /* TODO: This code can be optimized similar to xd3_match_forward() */ + for (; tryrem != 0; tryrem -= 1, stream->match_back += 1) + { + if (src->curblk[tryoff-1] != stream->next_in[streamoff-1]) + { + goto doneback; + } + + tryoff -= 1; + streamoff -= 1; + } + } + + doneback: + stream->match_state = MATCH_FORWARD; + } + + XD3_ASSERT (stream->match_state == MATCH_FORWARD); + + matchoff = stream->match_srcpos + stream->match_fwd; + streamoff = stream->input_position + stream->match_fwd; + xd3_blksize_div (matchoff, src, & tryblk, & tryoff); + + /* Note: practically the same code as backwards case above: same comments */ + while (stream->match_fwd < stream->match_maxfwd) + { + if (tryoff == src->blksize) + { + tryoff = 0; + tryblk += 1; + } + + if ((ret = xd3_getblk (stream, tryblk))) + { + /* if search went too far back, continue forward. */ + if (ret == XD3_TOOFARBACK) + { + break; + } + + /* could be a XD3_GETSRCBLK failure. */ + return ret; + } + + tryrem = min(stream->match_maxfwd - stream->match_fwd, + src->onblk - tryoff); + + if (tryrem == 0) + { + /* Generally, this means we have a power-of-two size source + * and we just found the end-of-file, in this case it's an + * empty block. */ + XD3_ASSERT (src->onblk < src->blksize); + break; + } + + matched = xd3_forward_match(src->curblk + tryoff, + stream->next_in + streamoff, + tryrem); + tryoff += matched; + streamoff += matched; + stream->match_fwd += matched; + + if (tryrem != matched) + { + break; + } + } + + stream->match_state = MATCH_SEARCHING; + + /* If the match ends short of the last instruction end, we probably + * don't want it. There is the possibility that a copy ends short + * of the last copy but also goes further back, in which case we + * might want it. This code does not implement such: if so we would + * need more complicated xd3_iopt_erase logic. */ + if (stream->match_fwd < stream->min_match) + { + stream->match_fwd = 0; + } + else + { + usize_t total = stream->match_fwd + stream->match_back; + + /* Correct the variables to remove match_back from the equation. */ + usize_t target_position = stream->input_position - stream->match_back; + usize_t match_length = stream->match_back + stream->match_fwd; + xoff_t match_position = stream->match_srcpos - stream->match_back; + xoff_t match_end = stream->match_srcpos + stream->match_fwd; + + /* At this point we may have to erase any iopt-buffer + * instructions that are fully covered by a backward-extending + * copy. */ + if (stream->match_back > 0) + { + xd3_iopt_erase (stream, target_position, total); + } + + stream->match_back = 0; + + /* Update ranges. The first source match occurs with both + values set to 0. */ + if (stream->match_maxaddr == 0 || + match_position < stream->match_minaddr) + { + stream->match_minaddr = match_position; + } + + if (match_end > stream->match_maxaddr) + { + /* Note: per-window */ + stream->match_maxaddr = match_end; + } + + if (match_end > stream->maxsrcaddr) + { + /* Note: across windows */ + stream->maxsrcaddr = match_end; + } + + IF_DEBUG1 ({ + static int x = 0; + DP(RINT "[source match:%d] (%s) [ %u bytes ]\n", + x++, + stream->total_in + target_position, + stream->total_in + target_position + match_length, + match_position, + match_position + match_length, + (stream->total_in + target_position == match_position) ? "same" : "diff", + match_length); + }); + + if ((ret = xd3_found_match (stream, + /* decoder position */ target_position, + /* length */ match_length, + /* address */ match_position, + /* is_source */ 1))) + { + return ret; + } + + /* If the match ends with the available input: */ + if (target_position + match_length == stream->avail_in) + { + /* Setup continuing match for the next window. */ + stream->match_state = MATCH_TARGET; + stream->match_srcpos = match_end; + } + } + + return 0; +} + +/* Update the small hash. Values in the small_table are offset by + * HASH_CKOFFSET (1) to distinguish empty buckets from real offsets. */ +static void +xd3_scksum_insert (xd3_stream *stream, + usize_t inx, + usize_t scksum, + usize_t pos) +{ + /* If we are maintaining previous duplicates. */ + if (stream->small_prev) + { + usize_t last_pos = stream->small_table[inx]; + xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask]; + + /* Note last_pos is offset by HASH_CKOFFSET. */ + pos_list->last_pos = last_pos; + } + + /* Enter the new position into the hash bucket. */ + stream->small_table[inx] = pos + HASH_CKOFFSET; +} + +#if XD3_DEBUG +static int +xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0, + const uint8_t *inp_max, usize_t cmp_len) +{ + usize_t i; + + for (i = 0; i < cmp_len; i += 1) + { + XD3_ASSERT (ref0[i] == inp0[i]); + } + + if (inp0 + cmp_len < inp_max) + { + XD3_ASSERT (inp0[i] != ref0[i]); + } + + return 1; +} +#endif /* XD3_DEBUG */ + +/* When the hash table indicates a possible small string match, it + * calls this routine to find the best match. The first matching + * position is taken from the small_table, HASH_CKOFFSET is subtracted + * to get the actual position. After checking that match, if previous + * linked lists are in use (because stream->smatcher.small_chain > 1), + * previous matches are tested searching for the longest match. If + * (stream->min_match > MIN_MATCH) then a lazy match is in effect. + */ +static usize_t +xd3_smatch (xd3_stream *stream, + usize_t base, + usize_t scksum, + usize_t *match_offset) +{ + usize_t cmp_len; + usize_t match_length = 0; + usize_t chain = (stream->min_match == MIN_MATCH ? + stream->smatcher.small_chain : + stream->smatcher.small_lchain); + const uint8_t *inp_max = stream->next_in + stream->avail_in; + const uint8_t *inp; + const uint8_t *ref; + + SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position); + + XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in); + + base -= HASH_CKOFFSET; + + again: + + IF_DEBUG2 (DP(RINT "smatch at base=%u inp=%u cksum=%u\n", base, + stream->input_position, scksum)); + + /* For small matches, we can always go to the end-of-input because + * the matching position must be less than the input position. */ + XD3_ASSERT (base < stream->input_position); + + ref = stream->next_in + base; + inp = stream->next_in + stream->input_position; + + SMALL_HASH_DEBUG2 (stream, ref); + + /* Expand potential match forward. */ + while (inp < inp_max && *inp == *ref) + { + ++inp; + ++ref; + } + + cmp_len = (usize_t)(inp - (stream->next_in + stream->input_position)); + + /* Verify correctness */ + XD3_ASSERT (xd3_check_smatch (stream->next_in + base, + stream->next_in + stream->input_position, + inp_max, cmp_len)); + + /* Update longest match */ + if (cmp_len > match_length) + { + ( match_length) = cmp_len; + (*match_offset) = base; + + /* Stop if we match the entire input or have a long_enough match. */ + if (inp == inp_max || cmp_len >= stream->smatcher.long_enough) + { + goto done; + } + } + + /* If we have not reached the chain limit, see if there is another + previous position. */ + while (--chain != 0) + { + /* Calculate the previous offset. */ + usize_t prev_pos = stream->small_prev[base & stream->sprevmask].last_pos; + usize_t diff_pos; + + if (prev_pos == 0) + { + break; + } + + prev_pos -= HASH_CKOFFSET; + + if (prev_pos > base) + { + break; + } + + base = prev_pos; + + XD3_ASSERT (stream->input_position > base); + diff_pos = stream->input_position - base; + + /* Stop searching if we go beyond sprevsz, since those entries + * are for unrelated checksum entries. */ + if (diff_pos & ~stream->sprevmask) + { + break; + } + + goto again; + } + + done: + /* Crude efficiency test: if the match is very short and very far back, it's + * unlikely to help, but the exact calculation requires knowing the state of + * the address cache and adjacent instructions, which we can't do here. + * Rather than encode a probably inefficient copy here and check it later + * (which complicates the code a lot), do this: + */ + if (match_length == 4 && stream->input_position - (*match_offset) >= 1<<14) + { + /* It probably takes >2 bytes to encode an address >= 2^14 from here */ + return 0; + } + if (match_length == 5 && stream->input_position - (*match_offset) >= 1<<21) + { + /* It probably takes >3 bytes to encode an address >= 2^21 from here */ + return 0; + } + + /* It's unlikely that a window is large enough for the (match_length == 6 && + * address >= 2^28) check */ + return match_length; +} + +#if XD3_DEBUG +static void +xd3_verify_small_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum) +{ + uint32_t state; + uint32_t cksum = xd3_scksum (&state, inp, stream->smatcher.small_look); + + XD3_ASSERT (cksum == x_cksum); +} + +static void +xd3_verify_large_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum) +{ + uint32_t cksum = xd3_lcksum (inp, stream->smatcher.large_look); + XD3_ASSERT (cksum == x_cksum); +} +static void +xd3_verify_run_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_run_l, + uint8_t *x_run_c) +{ + usize_t slook = stream->smatcher.small_look; + uint8_t run_c; + usize_t run_l = xd3_comprun (inp, slook, &run_c); + + XD3_ASSERT (run_l == 0 || run_c == *x_run_c); + XD3_ASSERT (x_run_l > slook || run_l == x_run_l); +} +#endif /* XD3_DEBUG */ + +/* This function computes more source checksums to advance the window. + * Called at every entrance to the string-match loop and each time + * stream->input_position reaches the value returned as + * *next_move_point. NB: this is one of the most expensive functions + * in this code and also the most critical for good compression. + * TODO: optimize the inner loop + */ +static int +xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) +{ + xoff_t logical_input_cksum_pos; + xoff_t source_size; + + if (stream->src->eof_known) + { + source_size = xd3_source_eof (stream->src); + XD3_ASSERT(stream->srcwin_cksum_pos <= source_size); + + if (stream->srcwin_cksum_pos == source_size) + { + *next_move_point = USIZE_T_MAX; + return 0; + } + } + + /* Begin by advancing at twice the input rate, up to half the + * maximum window size. */ + logical_input_cksum_pos = min((stream->total_in + stream->input_position) * 2, + (stream->total_in + stream->input_position) + + (stream->srcwin_maxsz / 2)); + + /* If srcwin_cksum_pos is already greater, wait until the difference + * is met. */ + if (stream->srcwin_cksum_pos > logical_input_cksum_pos) + { + *next_move_point = stream->input_position + + (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); + return 0; + } + + /* A long match may have extended past srcwin_cksum_pos. Don't + * start checksumming already-matched source data. */ + if (stream->maxsrcaddr > stream->srcwin_cksum_pos) + { + stream->srcwin_cksum_pos = stream->maxsrcaddr; + } + + if (logical_input_cksum_pos < stream->srcwin_cksum_pos) + { + logical_input_cksum_pos = stream->srcwin_cksum_pos; + } + + /* Advance at least one source block. With the command-line + * defaults this means: + * + * if (src->size <= srcwin_maxsz), index the entire source at once + * using the position of the first non-match. This is good for + * small inputs, especially when the content may have moved anywhere + * in the file (e.g., tar files). + * + * if (src->size > srcwin_maxsz), index at least one block (which + * the command-line sets to 1/32 of srcwin_maxsz) ahead of the + * logical position. This is good for different reasons: when a + * long match spanning several source blocks is encountered, this + * avoids computing checksums for those blocks. If the data can + * move anywhere, this is bad. + */ + logical_input_cksum_pos += stream->src->blksize; + + while (stream->srcwin_cksum_pos < logical_input_cksum_pos && + (!stream->src->eof_known || + stream->srcwin_cksum_pos < xd3_source_eof (stream->src))) + { + xoff_t blkno; + xoff_t blkbaseoffset; + usize_t blkrem; + ssize_t oldpos; /* Using ssize_t because of a */ + ssize_t blkpos; /* do { blkpos-- } + while (blkpos >= oldpos); */ + int ret; + xd3_blksize_div (stream->srcwin_cksum_pos, + stream->src, &blkno, &blkrem); + oldpos = blkrem; + + if ((ret = xd3_getblk (stream, blkno))) + { + /* TOOFARBACK should never occur here, since we read forward. */ + if (ret == XD3_TOOFARBACK) + { + ret = XD3_INTERNAL; + } + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] async getblk return for %"Q"u\n", + blkno)); + return ret; + } + + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] T=%"Q"u{%"Q"u} S=%"Q"u EOF=%"Q"u %s\n", + stream->total_in + stream->input_position, + logical_input_cksum_pos, + stream->srcwin_cksum_pos, + xd3_source_eof (stream->src), + stream->src->eof_known ? "known" : "unknown")); + + blkpos = xd3_bytes_on_srcblk (stream->src, blkno); + + if (blkpos < (ssize_t) stream->smatcher.large_look) + { + stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; + IF_DEBUG1 (DP(RINT "[srcwin_move_point] continue (end-of-block)\n")); + continue; + } + + /* This inserts checksums for the entire block, in reverse, + * starting from the end of the block. This logic does not test + * stream->srcwin_cksum_pos because it always advances it to the + * start of the next block. + * + * oldpos is the srcwin_cksum_pos within this block. blkpos is + * the number of bytes available. Each iteration inspects + * large_look bytes then steps back large_step bytes. The + * if-stmt above ensures at least one large_look of data. */ + blkpos -= stream->smatcher.large_look; + blkbaseoffset = stream->src->blksize * blkno; + + do + { + uint32_t cksum = xd3_lcksum (stream->src->curblk + blkpos, + stream->smatcher.large_look); + usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); + + stream->large_table[hval] = + (usize_t) (blkbaseoffset + + (xoff_t)(blkpos + HASH_CKOFFSET)); + + IF_DEBUG (stream->large_ckcnt += 1); + + blkpos -= stream->smatcher.large_step; + } + while (blkpos >= oldpos); + + stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; + } + + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] exited loop T=%"Q"u{%"Q"u} " + "S=%"Q"u EOF=%"Q"u %s\n", + stream->total_in + stream->input_position, + logical_input_cksum_pos, + stream->srcwin_cksum_pos, + xd3_source_eof (stream->src), + stream->src->eof_known ? "known" : "unknown")); + + if (stream->src->eof_known) + { + source_size = xd3_source_eof (stream->src); + + if (stream->srcwin_cksum_pos >= source_size) + { + /* This invariant is needed for xd3_source_cksum_offset() */ + stream->srcwin_cksum_pos = source_size; + *next_move_point = USIZE_T_MAX; + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] finished with source input\n")); + return 0; + } + } + + /* How long until this function should be called again. */ + XD3_ASSERT(stream->srcwin_cksum_pos >= logical_input_cksum_pos); + *next_move_point = stream->input_position + 1 + + (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); + return 0; +} + +#endif /* XD3_ENCODER */ + +/******************************************************************** + TEMPLATE pass + *********************************************************************/ + +#endif /* __XDELTA3_C_INLINE_PASS__ */ +#ifdef __XDELTA3_C_TEMPLATE_PASS__ + +#if XD3_ENCODER + +/******************************************************************** + Templates + *******************************************************************/ + +/* Template macros */ +#define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE) +#define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n) +#define XD3_TEMPLATE3(x,n) x ## n +#define XD3_STRINGIFY(x) XD3_STRINGIFY2(x) +#define XD3_STRINGIFY2(x) #x + +static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream); + +static const xd3_smatcher XD3_TEMPLATE(__smatcher_) = +{ + XD3_STRINGIFY(TEMPLATE), + XD3_TEMPLATE(xd3_string_match_), +#if SOFTCFG == 1 + 0, 0, 0, 0, 0, 0, 0 +#else + LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, MAXLAZY, LONGENOUGH +#endif +}; + +static int +XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) +{ + const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); + const int DO_LARGE = (stream->src != NULL); + const int DO_RUN = (1); + + const uint8_t *inp; + uint32_t scksum = 0; + uint32_t scksum_state = 0; + uint32_t lcksum = 0; + usize_t sinx; + usize_t linx; + uint8_t run_c; + usize_t run_l; + int ret; + usize_t match_length; + usize_t match_offset = 0; + usize_t next_move_point; + + /* If there will be no compression due to settings or short input, + * skip it entirely. */ + if (! (DO_SMALL || DO_LARGE || DO_RUN) || + stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } + + if ((ret = xd3_string_match_init (stream))) { return ret; } + + /* The restartloop label is reached when the incremental loop state + * needs to be reset. */ + restartloop: + + /* If there is not enough input remaining for any kind of match, + skip it. */ + if (stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } + + /* Now reset the incremental loop state: */ + + /* The min_match variable is updated to avoid matching the same lazy + * match over and over again. For example, if you find a (small) + * match of length 9 at one position, you will likely find a match + * of length 8 at the next position. */ + if (xd3_iopt_last_matched (stream) > stream->input_position) + { + stream->min_match = max(MIN_MATCH, + 1 + xd3_iopt_last_matched(stream) - + stream->input_position); + } + else + { + stream->min_match = MIN_MATCH; + } + + /* The current input byte. */ + inp = stream->next_in + stream->input_position; + + /* Small match state. */ + if (DO_SMALL) + { + scksum = xd3_scksum (&scksum_state, inp, SLOOK); + } + + /* Run state. */ + if (DO_RUN) + { + run_l = xd3_comprun (inp, SLOOK, & run_c); + } + + /* Large match state. We continue the loop even after not enough + * bytes for LLOOK remain, so always check stream->input_position in + * DO_LARGE code. */ + if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in)) + { + /* Source window: next_move_point is the point that + * stream->input_position must reach before computing more + * source checksum. */ + if ((ret = xd3_srcwin_move_point (stream, & next_move_point))) + { + return ret; + } + + lcksum = xd3_lcksum (inp, LLOOK); + } + + /* TRYLAZYLEN: True if a certain length match should be followed by + * lazy search. This checks that LEN is shorter than MAXLAZY and + * that there is enough leftover data to consider lazy matching. + * "Enough" is set to 2 since the next match will start at the next + * offset, it must match two extra characters. */ +#define TRYLAZYLEN(LEN,POS,MAX) ((MAXLAZY) > 0 && (LEN) < (MAXLAZY) \ + && (POS) + (LEN) <= (MAX) - 2) + + /* HANDLELAZY: This statement is called each time an instruciton is + * emitted (three cases). If the instruction is large enough, the + * loop is restarted, otherwise lazy matching may ensue. */ +#define HANDLELAZY(mlen) \ + if (TRYLAZYLEN ((mlen), (stream->input_position), (stream->avail_in))) \ + { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \ + else \ + { stream->input_position += (mlen); goto restartloop; } + + /* Now loop over one input byte at a time until a match is found... */ + for (;; inp += 1, stream->input_position += 1) + { + /* Now we try three kinds of string match in order of expense: + * run, large match, small match. */ + + /* Expand the start of a RUN. The test for (run_l == SLOOK) + * avoids repeating this check when we pass through a run area + * performing lazy matching. The run is only expanded once when + * the min_match is first reached. If lazy matching is + * performed, the run_l variable will remain inconsistent until + * the first non-running input character is reached, at which + * time the run_l may then again grow to SLOOK. */ + if (DO_RUN && run_l == SLOOK) + { + usize_t max_len = stream->avail_in - stream->input_position; + + IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, &run_c)); + + while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; } + + /* Output a RUN instruction. */ + if (run_l >= stream->min_match && run_l >= MIN_RUN) + { + if ((ret = xd3_emit_run (stream, stream->input_position, + run_l, &run_c))) { return ret; } + + HANDLELAZY (run_l); + } + } + + /* If there is enough input remaining. */ + if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in)) + { + if ((stream->input_position >= next_move_point) && + (ret = xd3_srcwin_move_point (stream, & next_move_point))) + { + return ret; + } + + linx = xd3_checksum_hash (& stream->large_hash, lcksum); + + IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum)); + + if (stream->large_table[linx] != 0) + { + /* the match_setup will fail if the source window has + * been decided and the match lies outside it. + * OPT: Consider forcing a window at this point to + * permit a new source window. */ + xoff_t adj_offset = + xd3_source_cksum_offset(stream, + stream->large_table[linx] - + HASH_CKOFFSET); + if (xd3_source_match_setup (stream, adj_offset) == 0) + { + if ((ret = xd3_source_extend_match (stream))) + { + return ret; + } + + /* Update stream position. match_fwd is zero if no + * match. */ + if (stream->match_fwd > 0) + { + HANDLELAZY (stream->match_fwd); + } + } + } + } + + /* Small matches. */ + if (DO_SMALL) + { + sinx = xd3_checksum_hash (& stream->small_hash, scksum); + + /* Verify incremental state in debugging mode. */ + IF_DEBUG (xd3_verify_small_state (stream, inp, scksum)); + + /* Search for the longest match */ + if (stream->small_table[sinx] != 0) + { + match_length = xd3_smatch (stream, + stream->small_table[sinx], + scksum, + & match_offset); + } + else + { + match_length = 0; + } + + /* Insert a hash for this string. */ + xd3_scksum_insert (stream, sinx, scksum, stream->input_position); + + /* Maybe output a COPY instruction */ + if (match_length >= stream->min_match) + { + IF_DEBUG2 ({ + static int x = 0; + DP(RINT "[target match:%d] " + "(-%d) [ %u bytes ]\n", + x++, + stream->input_position, + stream->input_position + match_length, + match_offset, + match_offset + match_length, + stream->input_position - match_offset, + match_length); + }); + + if ((ret = xd3_found_match (stream, + /* decoder position */ + stream->input_position, + /* length */ match_length, + /* address */ (xoff_t) match_offset, + /* is_source */ 0))) + { + return ret; + } + + /* Copy instruction. */ + HANDLELAZY (match_length); + } + } + + /* The logic above prevents excess work during lazy matching by + * increasing min_match to avoid smaller matches. Each time we + * advance stream->input_position by one, the minimum match + * shortens as well. */ + if (stream->min_match > MIN_MATCH) + { + stream->min_match -= 1; + } + + updateone: + + /* See if there are no more incremental cksums to compute. */ + if (stream->input_position + SLOOK == stream->avail_in) + { + goto loopnomore; + } + + /* Compute next RUN, CKSUM */ + if (DO_RUN) + { + NEXTRUN (inp[SLOOK]); + } + + if (DO_SMALL) + { + scksum = xd3_small_cksum_update (&scksum_state, inp, SLOOK); + } + + if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in)) + { + lcksum = xd3_large_cksum_update (lcksum, inp, LLOOK); + } + } + + loopnomore: + return 0; +} + +#endif /* XD3_ENCODER */ +#endif /* __XDELTA3_C_TEMPLATE_PASS__ */ diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m new file mode 100644 index 0000000..67f1e3d --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m @@ -0,0 +1,25 @@ +/* xdelta3 - delta compression tools and library -*- Mode: objc *-* + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#import + +#import "Xd3iOSAppDelegate.h" + +int main(int argc, char *argv[]) +{ + @autoreleasepool { + return UIApplicationMain(argc, argv, nil, NSStringFromClass([Xd3iOSAppDelegate class])); + } +} diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist new file mode 100644 index 0000000..d0e8a58 --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist @@ -0,0 +1,52 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleDisplayName + ${PRODUCT_NAME} + CFBundleExecutable + ${EXECUTABLE_NAME} + CFBundleIconFiles + + CFBundleIdentifier + Joshua-MacDonald.${PRODUCT_NAME:rfc1034identifier} + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + ${PRODUCT_NAME} + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleSignature + ???? + CFBundleVersion + 1.0 + LSRequiresIPhoneOS + + UIMainStoryboardFile + MainStoryboard_iPhone + UIMainStoryboardFile~ipad + MainStoryboard_iPad + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + UIInterfaceOrientationPortraitUpsideDown + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + + diff --git a/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch new file mode 100644 index 0000000..69f0135 --- /dev/null +++ b/lib/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch @@ -0,0 +1,14 @@ +// +// Prefix header for all source files of the 'xdelta3-ios-test' target in the 'xdelta3-ios-test' project +// + +#import + +#ifndef __IPHONE_5_0 +#warning "This project uses features only available in iOS SDK 5.0 and later." +#endif + +#ifdef __OBJC__ + #import + #import +#endif diff --git a/lib/xdelta3/examples/small_page_test.c b/lib/xdelta3/examples/small_page_test.c new file mode 100644 index 0000000..0e33547 --- /dev/null +++ b/lib/xdelta3/examples/small_page_test.c @@ -0,0 +1,215 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include + +#define PAGE_SIZE 4096 + +#define SPACE_MAX 131072 // how much memory per process +#define OUTPUT_MAX 1024 // max size for output +#define XD3_ALLOCSIZE 256 // internal size for various buffers +#define IOPT_SIZE 128 // instruction buffer + +// SPACE_MAX of 32K is sufficient for most inputs with XD3_COMPLEVEL_1 +// XD3_COMPLEVEL_9 requires about 4x more space than XD3_COMPLEVEL_1 + +#include "xdelta3.h" +#include "xdelta3.c" + +typedef struct _context { + uint8_t *buffer; + int allocated; +} context_t; + +static int max_allocated = 0; + +void* +process_alloc (void* opaque, usize_t items, usize_t size) +{ + context_t *ctx = (context_t*) opaque; + usize_t t = items * size; + void *ret; + + if (ctx->allocated + t > SPACE_MAX) + { + return NULL; + } + + ret = ctx->buffer + ctx->allocated; + ctx->allocated += t; + return ret; +} + +void +process_free (void* opaque, void *ptr) +{ +} + +int +process_page (int is_encode, + int (*func) (xd3_stream *), + const uint8_t *input, + usize_t input_size, + const uint8_t *source, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + + /* On my x86 this is 1072 of objects on the stack */ + xd3_stream stream; + xd3_config config; + xd3_source src; + context_t *ctx = calloc(SPACE_MAX, 1); + int ret; + + memset (&config, 0, sizeof(config)); + + if (ctx == NULL) + { + printf("calloc failed\n"); + return -1; + } + + ctx->buffer = (uint8_t*)ctx; + ctx->allocated = sizeof(*ctx); + + config.flags = flags; + config.winsize = PAGE_SIZE; + config.sprevsz = PAGE_SIZE; + config.srcwin_maxsz = PAGE_SIZE; + config.iopt_size = IOPT_SIZE; + config.alloc = &process_alloc; + config.freef = &process_free; + config.opaque = (void*) ctx; + + src.blksize = PAGE_SIZE; + src.onblk = PAGE_SIZE; + src.curblk = source; + src.curblkno = 0; + + if ((ret = xd3_config_stream (&stream, &config)) != 0 || + (ret = xd3_set_source_and_size (&stream, &src, PAGE_SIZE)) != 0 || + (ret = xd3_process_stream (is_encode, + &stream, + func, 1, + input, input_size, + output, output_size, + output_size_max)) != 0) + { + if (stream.msg != NULL) + { + fprintf(stderr, "stream message: %s\n", stream.msg); + } + } + + xd3_free_stream (&stream); + if (max_allocated < ctx->allocated) + { + max_allocated = ctx->allocated; + fprintf(stderr, "max allocated %d\n", max_allocated); + } + + free(ctx); + return ret; +} + +int test(int stride, int encode_flags) +{ + uint8_t frompg[PAGE_SIZE]; + uint8_t topg[PAGE_SIZE]; + uint8_t output[OUTPUT_MAX]; + uint8_t reout[PAGE_SIZE]; + usize_t output_size; + usize_t re_size; + int i, j, ret; + + for (i = 0; i < PAGE_SIZE; i++) + { + topg[i] = frompg[i] = (rand() >> 3 ^ rand() >> 6 ^ rand() >> 9); + } + + // change 1 byte every stride + if (stride > 0) + { + for (j = stride; j <= PAGE_SIZE; j += stride) + { + topg[j - 1] ^= 0xff; + } + } + + if ((ret = process_page (1, xd3_encode_input, + topg, PAGE_SIZE, + frompg, output, + &output_size, OUTPUT_MAX, + encode_flags)) != 0) + { + fprintf (stderr, "encode failed: stride %u flags 0x%x\n", + stride, encode_flags); + return ret; + } + + if ((ret = process_page (0, xd3_decode_input, + output, output_size, + frompg, reout, + &re_size, PAGE_SIZE, + 0)) != 0) + { + fprintf (stderr, "decode failed: stride %u output_size %u flags 0x%x\n", + stride, output_size, encode_flags); + return ret; + } + + if (output_size > OUTPUT_MAX || re_size != PAGE_SIZE) + { + fprintf (stderr, "internal error: %u != %u\n", output_size, re_size); + return -1; + } + + for (i = 0; i < PAGE_SIZE; i++) + { + if (reout[i] != topg[i]) + { + fprintf (stderr, "encode-decode error: position %d\n", i); + return -1; + } + } + + fprintf(stderr, "stride %d flags 0x%x size %u ", + stride, encode_flags, output_size); + fprintf(stderr, "%s\n", (ret == 0) ? "OK" : "FAIL"); + + return 0; +} + +int main() +{ + int stride; + int level; + + for (level = 1; level < 10; level = (level == 1 ? 3 : level + 3)) + { + int lflag = level << XD3_COMPLEVEL_SHIFT; + + for (stride = 2; stride <= PAGE_SIZE; stride += 2) + { + test(stride, lflag); + test(stride, lflag | XD3_SEC_DJW); + } + } + + return 0; +} diff --git a/lib/xdelta3/examples/speed_test.c b/lib/xdelta3/examples/speed_test.c new file mode 100644 index 0000000..f8fbbb7 --- /dev/null +++ b/lib/xdelta3/examples/speed_test.c @@ -0,0 +1,87 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "test.h" + +usize_t bench_speed(const uint8_t *from_buf, const size_t from_len, + const uint8_t *to_buf, const size_t to_len, + uint8_t *delta_buf, const size_t delta_alloc, + int flags) { + usize_t delta_size; + int ret = xd3_encode_memory(to_buf, to_len, from_buf, from_len, + delta_buf, &delta_size, delta_alloc, flags); + if (ret != 0) { + fprintf(stderr, "encode failure: %d: %s\n", ret, xd3_strerror(ret)); + abort(); + } + return delta_size; +} + +int main(int argc, char **argv) { + int repeat, level; + char *from, *to; + uint8_t *from_buf = NULL, *to_buf = NULL, *delta_buf = NULL; + size_t from_len = 0, to_len, delta_alloc, delta_size = 0; + long start, finish; + int i, ret; + int flags; + + if (argc != 5) { + fprintf(stderr, "usage: speed_test LEVEL COUNT FROM TO\n"); + return 1; + } + + level = atoi(argv[1]); + repeat = atoi(argv[2]); + from = argv[3]; + to = argv[4]; + flags = (level << XD3_COMPLEVEL_SHIFT) & XD3_COMPLEVEL_MASK; + + if ((strcmp(from, "null") != 0 && + (ret = read_whole_file(from, &from_buf, &from_len))) || + (ret = read_whole_file(to, &to_buf, &to_len))) { + fprintf(stderr, "read_whole_file error\n"); + goto exit; + } + + delta_alloc = to_len * 11 / 10; + delta_buf = main_malloc(delta_alloc); + + start = get_millisecs_now(); + + for (i = 0; i < repeat; ++i) { + delta_size = bench_speed(from_buf, from_len, + to_buf, to_len, delta_buf, delta_alloc, flags); + } + + finish = get_millisecs_now(); + + fprintf(stderr, + "STAT: encode %3.2f ms from %s to %s repeat %d %zdbit delta %zd\n", + (double)(finish - start) / repeat, from, to, repeat, sizeof (xoff_t) * 8, delta_size); + + ret = 0; + + if (0) { + exit: + ret = 1; + } + + main_free(to_buf); + main_free(from_buf); + main_free(delta_buf); + return ret; +} diff --git a/lib/xdelta3/examples/test.h b/lib/xdelta3/examples/test.h new file mode 100644 index 0000000..f7082f2 --- /dev/null +++ b/lib/xdelta3/examples/test.h @@ -0,0 +1,56 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#define NOT_MAIN 1 + +#include "xdelta3.h" +#include "xdelta3.c" + +static int read_whole_file(const char *name, + uint8_t **buf_ptr, + size_t *buf_len) { + main_file file; + int ret; + xoff_t len; + usize_t nread; + main_file_init(&file); + file.filename = name; + ret = main_file_open(&file, name, XO_READ); + if (ret != 0) { + fprintf(stderr, "open failed\n"); + goto exit; + } + ret = main_file_stat(&file, &len); + if (ret != 0) { + fprintf(stderr, "stat failed\n"); + goto exit; + } + + (*buf_len) = (size_t)len; + (*buf_ptr) = (uint8_t*) main_malloc(*buf_len); + ret = main_file_read(&file, *buf_ptr, *buf_len, &nread, + "read failed"); + if (ret == 0 && *buf_len == nread) { + ret = 0; + } else { + fprintf(stderr, "invalid read\n"); + ret = XD3_INTERNAL; + } + exit: + main_file_cleanup(&file); + return ret; +} + diff --git a/lib/xdelta3/generate_build_files.sh b/lib/xdelta3/generate_build_files.sh new file mode 100644 index 0000000..a01cb1e --- /dev/null +++ b/lib/xdelta3/generate_build_files.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +aclocal && + autoreconf --install && + libtoolize && + autoconf && + automake --add-missing && + automake diff --git a/lib/xdelta3/go/src/regtest.go b/lib/xdelta3/go/src/regtest.go new file mode 100644 index 0000000..9d91f69 --- /dev/null +++ b/lib/xdelta3/go/src/regtest.go @@ -0,0 +1,274 @@ +package main + +import ( + "fmt" + "io" + "path" + "os" + "sort" + "time" + + "xdelta" +) + +const ( + xdataset = "/volume/home/jmacd/src/testdata" + xcompare = "/volume/home/jmacd/src/xdelta-devel/xdelta3/build/x86_64-pc-linux-gnu-m64/xoff64/xdelta3" + xdelta3 = "/volume/home/jmacd/src/xdelta-64bithash/xdelta3/build/x86_64-pc-linux-gnu-m64/usize64/xoff64/xdelta3" + seed = 1422253499919909358 +) + +type Config struct { + srcbuf_size int64 + window_size int64 + blocksize int +} + +func NewC() Config { + // TODO make these (and above) flags + return Config{1<<26, 1<<22, 1<<16} +} + +func (c Config) smokeTest(t *xdelta.TestGroup, p xdelta.Program) { + target := "Hello world!" + source := "Hello world, nice to meet you!" + + enc, err := t.Exec("encode", p, true, []string{"-e"}) + if err != nil { + t.Panic(err) + } + dec, err := t.Exec("decode", p, true, []string{"-d"}) + if err != nil { + t.Panic(err) + } + + encodeout := t.Drain(enc.Stdout, "encode.stdout") + decodeout := t.Drain(dec.Stdout, "decode.stdout") + + t.Empty(enc.Stderr, "encode") + t.Empty(dec.Stderr, "decode") + + t.TestWrite("encode.stdin", enc.Stdin, []byte(target)) + t.TestWrite("encode.srcin", enc.Srcin, []byte(source)) + + t.TestWrite("decode.stdin", dec.Stdin, <-encodeout) + t.TestWrite("decode.srcin", dec.Srcin, []byte(source)) + + if do := string(<-decodeout); do != target { + t.Panic(fmt.Errorf("It's not working! %s\n!=\n%s\n", do, target)) + } + t.Wait(enc, dec) +} + +type PairTest struct { + // Input + Config + program xdelta.Program + source, target string + + // Output + TestOutput +} + +type TestOutput struct { + encoded int64 + encDuration time.Duration + decDuration time.Duration + encSysDuration time.Duration + decSysDuration time.Duration +} + +func (to *TestOutput) Add(a TestOutput) { + to.encoded += a.encoded + to.encDuration += a.encDuration + to.decDuration += a.decDuration + to.encSysDuration += a.encSysDuration + to.decSysDuration += a.decSysDuration +} + +func (to *TestOutput) String() string { + return fmt.Sprintf("SIZE: %v\tT: %v\tTSYS: %v\tDT: %v\tDTSYS: %v", + to.encoded, to.encDuration, to.encSysDuration, to.decDuration, to.encSysDuration) +} + +// P is the test program, Q is the reference version. +func (cfg Config) datasetTest(t *xdelta.TestGroup, p, q xdelta.Program) { + dir, err := os.Open(xdataset) + if err != nil { + t.Panic(err) + } + dents, err := dir.Readdir(-1) + if err != nil { + t.Panic(err) + } + paths := make([]string, len(dents)) + var total int64 + for i, d := range dents { + if !d.Mode().IsRegular() { + continue + } + paths[i] = fmt.Sprint(xdataset, "/", d.Name()) + total += d.Size() + } + meansize := total / int64(len(dents)) + largest := uint(20) + for ; largest <= 31 && 1< (1.05 * float64(expect)) { + t.Fail("encoded size should be ~=", expect, ", actual ", encoded_size) + } +} + +func main() { + r, err := xdelta.NewRunner() + if err != nil { + panic(err) + } + defer r.Cleanup() + + cfg := NewC() + + prog := xdelta.Program{xdelta3} + + r.RunTest("smoketest", func(t *xdelta.TestGroup) { cfg.smokeTest(t, prog) }) + + for i := uint(29); i <= 33; i += 1 { + // The arguments to offsetTest are offset, source + // window size, and file size. The source window size + // is (2 << i) and (in the 3.0x release branch) is + // limited to 2^31, so the the greatest value of i is + // 30. + cfg.srcbuf_size = 2 << i + r.RunTest(fmt.Sprint("offset", i), func(t *xdelta.TestGroup) { + cfg.offsetTest(t, prog, 1 << i, 3 << i) }) + } + + comp := xdelta.Program{xcompare} + + r.RunTest("dataset", func(t *xdelta.TestGroup) { cfg.datasetTest(t, prog, comp) }) +} diff --git a/lib/xdelta3/go/src/xdelta/rstream.go b/lib/xdelta3/go/src/xdelta/rstream.go new file mode 100644 index 0000000..99c3d17 --- /dev/null +++ b/lib/xdelta3/go/src/xdelta/rstream.go @@ -0,0 +1,71 @@ +package xdelta + + +import ( + "io" + "math/rand" +) + +const ( + blocksize = 1<<17 +) + +func (t *TestGroup) WriteRstreams(desc string, seed, offset, len int64, + src, tgt io.WriteCloser) { + t.Go("src-write:"+desc, func (g *Goroutine) { + writeOne(g, seed, 0, len, tgt, false) + }) + t.Go("tgt-write:"+desc, func (g *Goroutine) { + writeOne(g, seed, offset, len, src, true) + }) +} + +func writeOne(g *Goroutine, seed, offset, len int64, stream io.WriteCloser, readall bool) { + if !readall { + // Allow the source-read to fail or block until the process terminates. + // This behavior is reserved for the decoder, which is not required to + // read the entire source. + g.OK() + } + if offset != 0 { + // Fill with other random data until the offset + if err := writeRand(g, rand.New(rand.NewSource(^seed)), offset, stream); err != nil { + g.Panic(err) + } + } + if err := writeRand(g, rand.New(rand.NewSource(seed)), + len - offset, stream); err != nil { + g.Panic(err) + } + if err := stream.Close(); err != nil { + g.Panic(err) + } + g.OK() +} + +func writeRand(g *Goroutine, r *rand.Rand, len int64, s io.Writer) error { + blk := make([]byte, blocksize) + for len > 0 { + fillRand(r, blk) + c := blocksize + if len < blocksize { + c = int(len) + } + if _, err := s.Write(blk[0:c]); err != nil { + return err + } + len -= int64(c) + } + return nil +} + +func fillRand(r *rand.Rand, blk []byte) { + for p := 0; p < len(blk); { + v := r.Int63() + for i := 7; i != 0 && p < len(blk); i-- { + blk[p] = byte(v) + p++ + v >>= 8 + } + } +} diff --git a/lib/xdelta3/go/src/xdelta/run.go b/lib/xdelta3/go/src/xdelta/run.go new file mode 100644 index 0000000..448fabe --- /dev/null +++ b/lib/xdelta3/go/src/xdelta/run.go @@ -0,0 +1,71 @@ +package xdelta + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" +) + +type Program struct { + Path string +} + +type Run struct { + Cmd exec.Cmd + Srcfile string + Stdin io.WriteCloser + Srcin io.WriteCloser + Stdout io.ReadCloser + Stderr io.ReadCloser +} + +type Runner struct { + Testdir string +} + +func (r *Run) Wait() error { + return r.Cmd.Wait() +} + +func NewRunner() (*Runner, error) { + if dir, err := ioutil.TempDir(tmpDir, "xrt"); err != nil { + return nil, err + } else { + return &Runner{dir}, nil + } +} + +func (r *Runner) newTestGroup(name string) (*TestGroup) { + tg := &TestGroup{Runner: r} + tg.WaitGroup.Add(1) + g0 := &Goroutine{tg, name, false} + tg.running = append(tg.running, g0) + tg.main = g0 + return tg +} + +func (r *Runner) Cleanup() { + os.RemoveAll(r.Testdir) +} + +func (r *Runner) RunTest(name string, f func (t *TestGroup)) { + t := r.newTestGroup(name) + c := make(chan interface{}) + go func() { + defer func() { + rec := recover() + c <- rec + }() + fmt.Println("Testing", name, "...") + f(t) + c <- nil + }() + rec := <- c + if t.errors == nil && rec == nil { + fmt.Println("Success:", name) + } else { + fmt.Println("FAILED:", name, t.errors, rec) + } +} diff --git a/lib/xdelta3/go/src/xdelta/test.go b/lib/xdelta3/go/src/xdelta/test.go new file mode 100644 index 0000000..7210698 --- /dev/null +++ b/lib/xdelta3/go/src/xdelta/test.go @@ -0,0 +1,164 @@ +package xdelta + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path" + "sync/atomic" + + "golang.org/x/sys/unix" +) + +var ( + tmpDir = "/tmp" + srcSeq int64 +) + +func (t *TestGroup) Drain(f io.ReadCloser, desc string) <-chan []byte { + c := make(chan []byte) + t.Go(desc, func(g *Goroutine) { + if b, err := ioutil.ReadAll(f); err != nil { + g.Panic(err) + } else { + c <- b + } + g.OK() + }) + return c +} + +func (t *TestGroup) Empty(f io.ReadCloser, desc string) *Goroutine { + return t.Go("empty:"+desc, func (g *Goroutine) { + s := bufio.NewScanner(f) + for s.Scan() { + os.Stderr.Write([]byte(fmt.Sprint(desc, ": ", s.Text(), "\n"))) + } + err := s.Err() + f.Close() + if err != nil { + g.Panic(err) + } + g.OK() + }) +} + +func (t *TestGroup) TestWrite(what string, f io.WriteCloser, b []byte) *Goroutine { + return t.Go("write", func(g *Goroutine) { + if _, err := f.Write(b); err != nil { + g.Panic(err) + } + if err := f.Close(); err != nil { + g.Panic(err) + } + g.OK() + }) +} + +func (t *TestGroup) CopyStreams(r io.ReadCloser, w io.WriteCloser, written *int64) *Goroutine { + return t.Go("copy", func(g *Goroutine) { + nwrite, err := io.Copy(w, r) + if err != nil { + g.Panic(err) + } + err = r.Close() + if err != nil { + g.Panic(err) + } + err = w.Close() + if err != nil { + g.Panic(err) + } + g.OK() + *written = nwrite + }) +} + +func (t *TestGroup) CompareStreams(r1 io.ReadCloser, r2 io.ReadCloser, length int64) *Goroutine { + return t.Go("compare", func(g *Goroutine) { + b1 := make([]byte, blocksize) + b2 := make([]byte, blocksize) + var idx int64 + for length > 0 { + c := blocksize + if length < blocksize { + c = int(length) + } + if _, err := io.ReadFull(r1, b1[0:c]); err != nil { + g.Panic(err) + } + if _, err := io.ReadFull(r2, b2[0:c]); err != nil { + g.Panic(err) + } + if bytes.Compare(b1[0:c], b2[0:c]) != 0 { + fmt.Println("B1 is", string(b1[0:c])) + fmt.Println("B2 is", string(b2[0:c])) + g.Panic(errors.New(fmt.Sprint("Bytes do not compare at ", idx))) + } + length -= int64(c) + idx += int64(c) + } + g.OK() + }) +} + +func (t *TestGroup) Exec(desc string, p Program, srcfifo bool, flags []string) (*Run, error) { + var err error + run := &Run{} + args := []string{p.Path} + if srcfifo { + num := atomic.AddInt64(&srcSeq, 1) + run.Srcfile = path.Join(t.Runner.Testdir, fmt.Sprint("source", num)) + if err = unix.Mkfifo(run.Srcfile, 0600); err != nil { + return nil, err + } + read, write := io.Pipe() + t.writeFifo(run.Srcfile, read) + run.Srcin = write + args = append(args, "-s") + args = append(args, run.Srcfile) + } + if run.Stdin, err = run.Cmd.StdinPipe(); err != nil { + return nil, err + } + if run.Stdout, err = run.Cmd.StdoutPipe(); err != nil { + return nil, err + } + if run.Stderr, err = run.Cmd.StderrPipe(); err != nil { + return nil, err + } + + run.Cmd.Path = p.Path + run.Cmd.Args = append(args, flags...) + run.Cmd.Dir = t.Runner.Testdir + if serr := run.Cmd.Start(); serr != nil { + return nil, serr + } + return run, nil +} + +func (t *TestGroup) Fail(v ...interface{}) { + panic(fmt.Sprintln(v...)) +} + +func (t *TestGroup) writeFifo(srcfile string, read io.Reader) *Goroutine { + return t.Go("compare", func(g *Goroutine) { + fifo, err := os.OpenFile(srcfile, os.O_WRONLY, 0600) + if err != nil { + fifo.Close() + g.Panic(err) + } + if _, err := io.Copy(fifo, read); err != nil { + fifo.Close() + g.Panic(err) + } + if err := fifo.Close(); err != nil { + g.Panic(err) + } + g.OK() + }) +} diff --git a/lib/xdelta3/go/src/xdelta/tgroup.go b/lib/xdelta3/go/src/xdelta/tgroup.go new file mode 100644 index 0000000..602b1e1 --- /dev/null +++ b/lib/xdelta3/go/src/xdelta/tgroup.go @@ -0,0 +1,97 @@ +package xdelta + +import ( + "fmt" + "runtime" + "sync" +) + +type TestGroup struct { + *Runner + main *Goroutine + sync.Mutex + sync.WaitGroup + running []*Goroutine + errors []error + nonerrors []error // For tolerated / expected conditions +} + +type Goroutine struct { + *TestGroup + name string + done bool +} + +func (g *Goroutine) String() string { + return fmt.Sprint("[", g.name, "]") +} + +func (g *Goroutine) finish(err error) { + wait := false + tg := g.TestGroup + sbuf := make([]byte, 4096) + sbuf = sbuf[0:runtime.Stack(sbuf, false)] + if err != nil { + err = fmt.Errorf("%v:%v:%v", g.name, err, string(sbuf)) + } + tg.Lock() + if g.done { + if err != nil { + tg.nonerrors = append(tg.nonerrors, err) + } + } else { + wait = true + g.done = true + if err != nil { + tg.errors = append(tg.errors, err) + } + } + tg.Unlock() + if wait { + tg.WaitGroup.Done() + } +} + +func (g *Goroutine) OK() { + g.finish(nil) +} + +func (g *Goroutine) Panic(err error) { + g.finish(err) + if g != g.TestGroup.main { + runtime.Goexit() + } +} + +func (t *TestGroup) Main() *Goroutine { return t.main } + +func (t *TestGroup) Panic(err error) { t.Main().Panic(err) } + +func (t *TestGroup) Go(name string, f func(*Goroutine)) *Goroutine { + g := &Goroutine{t, name, false} + t.Lock() + t.WaitGroup.Add(1) + t.running = append(t.running, g) + t.Unlock() + go f(g) + return g +} + +func (t *TestGroup) Wait(procs... *Run) { + t.Main().OK() + t.WaitGroup.Wait() + for _, p := range procs { + if err := p.Wait(); err != nil { + t.errors = append(t.errors, err) + } + } + for _, err := range t.errors { + fmt.Println(":ERROR:", err) + } + for _, err := range t.nonerrors { + fmt.Println("(ERROR)", err) + } + if len(t.errors) != 0 { + t.Fail("Test failed with", len(t.errors), "errors") + } +} diff --git a/lib/xdelta3/linkxd3lib.c b/lib/xdelta3/linkxd3lib.c new file mode 100644 index 0000000..0f7f739 --- /dev/null +++ b/lib/xdelta3/linkxd3lib.c @@ -0,0 +1,42 @@ +#include "xdelta3.h" + +extern int VVV; + +int VVV; + +void use(int r) +{ + VVV = r; +} + +int main() { + xd3_config config; + xd3_stream stream; + xd3_source source; + + xd3_init_config (& config, 0); + use (xd3_config_stream (&stream, &config)); + use (xd3_close_stream (&stream)); + xd3_abort_stream (&stream); + xd3_free_stream (&stream); + + xd3_avail_input (& stream, NULL, 0); + xd3_consume_output (& stream); + + use (xd3_set_source (& stream, & source)); + xd3_set_flags (& stream, 0); + + use (xd3_decode_stream (& stream, NULL, 0, NULL, NULL, 0)); + use (xd3_decode_input (&stream)); + use (xd3_get_appheader (& stream, NULL, NULL)); + +#if XD3_ENCODER + use (xd3_encode_input (&stream)); + use (xd3_encode_stream (& stream, NULL, 0, NULL, NULL, 0)); + use (xd3_set_appheader (& stream)); + use (xd3_encoder_used_source (& stream)); + use (xd3_encoder_srcbase (& stream)); + use (xd3_encoder_srclen (& stream)); +#endif + return 0; +} diff --git a/lib/xdelta3/m4/ax_check_aligned_access_required.m4 b/lib/xdelta3/m4/ax_check_aligned_access_required.m4 new file mode 100644 index 0000000..b078275 --- /dev/null +++ b/lib/xdelta3/m4/ax_check_aligned_access_required.m4 @@ -0,0 +1,84 @@ +# ==================================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_check_aligned_access_required.html +# ==================================================================================== +# +# SYNOPSIS +# +# AC_CHECK_ALIGNED_ACCESS_REQUIRED +# +# DESCRIPTION +# +# While the x86 CPUs allow access to memory objects to be unaligned it +# happens that most of the modern designs require objects to be aligned - +# or they will fail with a buserror. That mode is quite known by +# big-endian machines (sparc, etc) however the alpha cpu is little- +# endian. +# +# The following function will test for aligned access to be required and +# set a config.h define HAVE_ALIGNED_ACCESS_REQUIRED (name derived by +# standard usage). Structures loaded from a file (or mmapped to memory) +# should be accessed per-byte in that case to avoid segfault type errors. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 7 + +AC_DEFUN([AX_CHECK_ALIGNED_ACCESS_REQUIRED], +[AC_CACHE_CHECK([if pointers to integers require aligned access], + [ax_cv_have_aligned_access_required], + [AC_TRY_RUN([ +#include +#include + +int main() +{ + char* string = malloc(40); + int i; + for (i=0; i < 40; i++) string[[i]] = i; + { + void* s = string; + int* p = s+1; + int* q = s+2; + + if (*p == *q) { return 1; } + } + return 0; +} + ], + [ax_cv_have_aligned_access_required=yes], + [ax_cv_have_aligned_access_required=no], + [ax_cv_have_aligned_access_required=no]) + ]) +if test "$ax_cv_have_aligned_access_required" = yes ; then + AC_DEFINE([HAVE_ALIGNED_ACCESS_REQUIRED], [1], + [Define if pointers to integers require aligned access]) +fi +]) diff --git a/lib/xdelta3/m4/ax_pkg_swig.m4 b/lib/xdelta3/m4/ax_pkg_swig.m4 new file mode 100644 index 0000000..e112f3d --- /dev/null +++ b/lib/xdelta3/m4/ax_pkg_swig.m4 @@ -0,0 +1,135 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_pkg_swig.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PKG_SWIG([major.minor.micro], [action-if-found], [action-if-not-found]) +# +# DESCRIPTION +# +# This macro searches for a SWIG installation on your system. If found, +# then SWIG is AC_SUBST'd; if not found, then $SWIG is empty. If SWIG is +# found, then SWIG_LIB is set to the SWIG library path, and AC_SUBST'd. +# +# You can use the optional first argument to check if the version of the +# available SWIG is greater than or equal to the value of the argument. It +# should have the format: N[.N[.N]] (N is a number between 0 and 999. Only +# the first N is mandatory.) If the version argument is given (e.g. +# 1.3.17), AX_PKG_SWIG checks that the swig package is this version number +# or higher. +# +# As usual, action-if-found is executed if SWIG is found, otherwise +# action-if-not-found is executed. +# +# In configure.in, use as: +# +# AX_PKG_SWIG(1.3.17, [], [ AC_MSG_ERROR([SWIG is required to build..]) ]) +# AX_SWIG_ENABLE_CXX +# AX_SWIG_MULTI_MODULE_SUPPORT +# AX_SWIG_PYTHON +# +# LICENSE +# +# Copyright (c) 2008 Sebastian Huber +# Copyright (c) 2008 Alan W. Irwin +# Copyright (c) 2008 Rafael Laboissiere +# Copyright (c) 2008 Andrew Collier +# Copyright (c) 2011 Murray Cumming +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 8 + +AC_DEFUN([AX_PKG_SWIG],[ + # Ubuntu has swig 2.0 as /usr/bin/swig2.0 + AC_PATH_PROGS([SWIG],[swig swig2.0]) + if test -z "$SWIG" ; then + m4_ifval([$3],[$3],[:]) + elif test -n "$1" ; then + AC_MSG_CHECKING([SWIG version]) + [swig_version=`$SWIG -version 2>&1 | grep 'SWIG Version' | sed 's/.*\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\).*/\1/g'`] + AC_MSG_RESULT([$swig_version]) + if test -n "$swig_version" ; then + # Calculate the required version number components + [required=$1] + [required_major=`echo $required | sed 's/[^0-9].*//'`] + if test -z "$required_major" ; then + [required_major=0] + fi + [required=`echo $required | sed 's/[0-9]*[^0-9]//'`] + [required_minor=`echo $required | sed 's/[^0-9].*//'`] + if test -z "$required_minor" ; then + [required_minor=0] + fi + [required=`echo $required | sed 's/[0-9]*[^0-9]//'`] + [required_patch=`echo $required | sed 's/[^0-9].*//'`] + if test -z "$required_patch" ; then + [required_patch=0] + fi + # Calculate the available version number components + [available=$swig_version] + [available_major=`echo $available | sed 's/[^0-9].*//'`] + if test -z "$available_major" ; then + [available_major=0] + fi + [available=`echo $available | sed 's/[0-9]*[^0-9]//'`] + [available_minor=`echo $available | sed 's/[^0-9].*//'`] + if test -z "$available_minor" ; then + [available_minor=0] + fi + [available=`echo $available | sed 's/[0-9]*[^0-9]//'`] + [available_patch=`echo $available | sed 's/[^0-9].*//'`] + if test -z "$available_patch" ; then + [available_patch=0] + fi + # Convert the version tuple into a single number for easier comparison. + # Using base 100 should be safe since SWIG internally uses BCD values + # to encode its version number. + required_swig_vernum=`expr $required_major \* 10000 \ + \+ $required_minor \* 100 \+ $required_patch` + available_swig_vernum=`expr $available_major \* 10000 \ + \+ $available_minor \* 100 \+ $available_patch` + + if test $available_swig_vernum -lt $required_swig_vernum; then + AC_MSG_WARN([SWIG version >= $1 is required. You have $swig_version.]) + SWIG='' + m4_ifval([$3],[$3],[]) + else + AC_MSG_CHECKING([for SWIG library]) + SWIG_LIB=`$SWIG -swiglib` + AC_MSG_RESULT([$SWIG_LIB]) + m4_ifval([$2],[$2],[]) + fi + else + AC_MSG_WARN([cannot determine SWIG version]) + SWIG='' + m4_ifval([$3],[$3],[]) + fi + fi + AC_SUBST([SWIG_LIB]) +]) diff --git a/lib/xdelta3/m4/ax_python_devel.m4 b/lib/xdelta3/m4/ax_python_devel.m4 new file mode 100644 index 0000000..a62b860 --- /dev/null +++ b/lib/xdelta3/m4/ax_python_devel.m4 @@ -0,0 +1,325 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_python_devel.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PYTHON_DEVEL([version]) +# +# DESCRIPTION +# +# Note: Defines as a precious variable "PYTHON_VERSION". Don't override it +# in your configure.ac. +# +# This macro checks for Python and tries to get the include path to +# 'Python.h'. It provides the $(PYTHON_CPPFLAGS) and $(PYTHON_LDFLAGS) +# output variables. It also exports $(PYTHON_EXTRA_LIBS) and +# $(PYTHON_EXTRA_LDFLAGS) for embedding Python in your code. +# +# You can search for some particular version of Python by passing a +# parameter to this macro, for example ">= '2.3.1'", or "== '2.4'". Please +# note that you *have* to pass also an operator along with the version to +# match, and pay special attention to the single quotes surrounding the +# version number. Don't use "PYTHON_VERSION" for this: that environment +# variable is declared as precious and thus reserved for the end-user. +# +# This macro should work for all versions of Python >= 2.1.0. As an end +# user, you can disable the check for the python version by setting the +# PYTHON_NOVERSIONCHECK environment variable to something else than the +# empty string. +# +# If you need to use this macro for an older Python version, please +# contact the authors. We're always open for feedback. +# +# LICENSE +# +# Copyright (c) 2009 Sebastian Huber +# Copyright (c) 2009 Alan W. Irwin +# Copyright (c) 2009 Rafael Laboissiere +# Copyright (c) 2009 Andrew Collier +# Copyright (c) 2009 Matteo Settenvini +# Copyright (c) 2009 Horst Knorr +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 8 + +AU_ALIAS([AC_PYTHON_DEVEL], [AX_PYTHON_DEVEL]) +AC_DEFUN([AX_PYTHON_DEVEL],[ + # + # Allow the use of a (user set) custom python version + # + AC_ARG_VAR([PYTHON_VERSION],[The installed Python + version to use, for example '2.3'. This string + will be appended to the Python interpreter + canonical name.]) + + AC_PATH_PROG([PYTHON],[python[$PYTHON_VERSION]]) + if test -z "$PYTHON"; then + AC_MSG_ERROR([Cannot find python$PYTHON_VERSION in your system path]) + PYTHON_VERSION="" + fi + + # + # Check for a version of Python >= 2.1.0 + # + AC_MSG_CHECKING([for a version of Python >= '2.1.0']) + ac_supports_python_ver=`$PYTHON -c "import sys; \ + ver = sys.version.split ()[[0]]; \ + print (ver >= '2.1.0')"` + if test "$ac_supports_python_ver" != "True"; then + if test -z "$PYTHON_NOVERSIONCHECK"; then + AC_MSG_RESULT([no]) + AC_MSG_FAILURE([ +This version of the AC@&t@_PYTHON_DEVEL macro +doesn't work properly with versions of Python before +2.1.0. You may need to re-run configure, setting the +variables PYTHON_CPPFLAGS, PYTHON_LDFLAGS, PYTHON_SITE_PKG, +PYTHON_EXTRA_LIBS and PYTHON_EXTRA_LDFLAGS by hand. +Moreover, to disable this check, set PYTHON_NOVERSIONCHECK +to something else than an empty string. +]) + else + AC_MSG_RESULT([skip at user request]) + fi + else + AC_MSG_RESULT([yes]) + fi + + # + # if the macro parameter ``version'' is set, honour it + # + if test -n "$1"; then + AC_MSG_CHECKING([for a version of Python $1]) + ac_supports_python_ver=`$PYTHON -c "import sys; \ + ver = sys.version.split ()[[0]]; \ + print (ver $1)"` + if test "$ac_supports_python_ver" = "True"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + AC_MSG_ERROR([this package requires Python $1. +If you have it installed, but it isn't the default Python +interpreter in your system path, please pass the PYTHON_VERSION +variable to configure. See ``configure --help'' for reference. +]) + PYTHON_VERSION="" + fi + fi + + # + # Check if you have distutils, else fail + # + AC_MSG_CHECKING([for the distutils Python package]) + ac_distutils_result=`$PYTHON -c "import distutils" 2>&1` + if test -z "$ac_distutils_result"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + AC_MSG_ERROR([cannot import Python module "distutils". +Please check your Python installation. The error was: +$ac_distutils_result]) + PYTHON_VERSION="" + fi + + # + # Check for Python include path + # + AC_MSG_CHECKING([for Python include path]) + if test -z "$PYTHON_CPPFLAGS"; then + python_path=`$PYTHON -c "import distutils.sysconfig; \ + print (distutils.sysconfig.get_python_inc ());"` + if test -n "${python_path}"; then + python_path="-I$python_path" + fi + PYTHON_CPPFLAGS=$python_path + fi + AC_MSG_RESULT([$PYTHON_CPPFLAGS]) + AC_SUBST([PYTHON_CPPFLAGS]) + + # + # Check for Python library path + # + AC_MSG_CHECKING([for Python library path]) + if test -z "$PYTHON_LDFLAGS"; then + # (makes two attempts to ensure we've got a version number + # from the interpreter) + ac_python_version=`cat<]], + [[Py_Initialize();]]) + ],[pythonexists=yes],[pythonexists=no]) + AC_LANG_POP([C]) + # turn back to default flags + CPPFLAGS="$ac_save_CPPFLAGS" + LIBS="$ac_save_LIBS" + + AC_MSG_RESULT([$pythonexists]) + + if test ! "x$pythonexists" = "xyes"; then + AC_MSG_FAILURE([ + Could not link test program to Python. Maybe the main Python library has been + installed in some non-standard library path. If so, pass it to configure, + via the LDFLAGS environment variable. + Example: ./configure LDFLAGS="-L/usr/non-standard-path/python/lib" + ============================================================================ + ERROR! + You probably have to install the development version of the Python package + for your distribution. The exact name of this package varies among them. + ============================================================================ + ]) + PYTHON_VERSION="" + fi + + # + # all done! + # +]) diff --git a/lib/xdelta3/m4/ax_swig_python.m4 b/lib/xdelta3/m4/ax_swig_python.m4 new file mode 100644 index 0000000..8fd3df5 --- /dev/null +++ b/lib/xdelta3/m4/ax_swig_python.m4 @@ -0,0 +1,64 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_swig_python.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_SWIG_PYTHON([use-shadow-classes = {no, yes}]) +# +# DESCRIPTION +# +# Checks for Python and provides the $(AX_SWIG_PYTHON_CPPFLAGS), and +# $(AX_SWIG_PYTHON_OPT) output variables. +# +# $(AX_SWIG_PYTHON_OPT) contains all necessary SWIG options to generate +# code for Python. Shadow classes are enabled unless the value of the +# optional first argument is exactly 'no'. If you need multi module +# support (provided by the AX_SWIG_MULTI_MODULE_SUPPORT macro) use +# $(AX_SWIG_PYTHON_LIBS) to link against the appropriate library. It +# contains the SWIG Python runtime library that is needed by the type +# check system for example. +# +# LICENSE +# +# Copyright (c) 2008 Sebastian Huber +# Copyright (c) 2008 Alan W. Irwin +# Copyright (c) 2008 Rafael Laboissiere +# Copyright (c) 2008 Andrew Collier +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 7 + +AU_ALIAS([SWIG_PYTHON], [AX_SWIG_PYTHON]) +AC_DEFUN([AX_SWIG_PYTHON],[ + AC_REQUIRE([AX_PKG_SWIG]) + AC_REQUIRE([AX_PYTHON_DEVEL]) + test "x$1" != "xno" || swig_shadow=" -noproxy" + AC_SUBST([AX_SWIG_PYTHON_OPT],[-python$swig_shadow]) + AC_SUBST([AX_SWIG_PYTHON_CPPFLAGS],[$PYTHON_CPPFLAGS]) +]) diff --git a/lib/xdelta3/plot.sh b/lib/xdelta3/plot.sh new file mode 100644 index 0000000..8370ae7 --- /dev/null +++ b/lib/xdelta3/plot.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +G=/usr/bin/gnuplot + +D=./output_dir + +I=$1 +O=$D/$2 + +$G > $O < +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef BUFSIZE +#define BUFSIZE (1<<14) + +char *tmp_file_1; +gboolean tmp_file_1_free = TRUE; +char *tmp_file_2; +gboolean tmp_file_2_free = TRUE; + +int skip_count; +int small_count; +int large_count; +int process_count; + +extern time_t str2time (char const *, time_t, long); + +static guint8 readbuf[BUFSIZE]; + +static const char* rcswalk_input_dir = NULL; +static const char* config_output_base = NULL; +static const char* config_output_dir = NULL; +static const char* rcswalk_experiment = NULL; + +static ConfigOption rcswalk_options[] = { + { "rcswalk_experiment", "ex", CS_Use, CO_Required, CD_String, & rcswalk_experiment }, + { "rcs_input_dir", "id", CS_UseAsFile, CO_Required, CD_String, & rcswalk_input_dir } +}; + +static ConfigOption config_options[] = { + { "config_output_base", "ob", CS_Ignore, CO_Required, CD_String, & config_output_base } +}; + + +void +rcswalk_free_segment (RcsVersion *v) +{ + if (v->segment) + g_free (v->segment); + + if (v->filename == tmp_file_1) + tmp_file_1_free = TRUE; + else if (v->filename == tmp_file_2) + tmp_file_2_free = TRUE; + else if (v->filename) + g_free (v->filename); + + v->segment = NULL; + v->filename = NULL; +} + +int +rcswalk_checkout (RcsFile* rcs, RcsWalker* walker, RcsVersion *v) +{ + FILE* out; + char cmdbuf[1024]; + int nread; + int alloc = BUFSIZE; + int pos = 0; + + sprintf (cmdbuf, "co -ko -p%s %s 2>/dev/null\n", v->vname, rcs->filename); + + g_assert (! v->segment); + + v->segment = g_malloc (alloc); + + if (! (out = popen (cmdbuf, "r"))) + { + g_warning ("popen failed: %s: %s", cmdbuf, g_strerror (errno)); + return errno; + } + + for (;;) + { + nread = fread (readbuf, 1, BUFSIZE, out); + + if (nread == 0) + break; + + if (nread < 0) + { + g_warning ("fread failed: %s", g_strerror (errno)); + return errno; + } + + if (pos + nread > alloc) + { + alloc *= 2; + v->segment = g_realloc (v->segment, alloc); + } + + memcpy (v->segment + pos, readbuf, nread); + + pos += nread; + } + + if (pclose (out) < 0) + { + g_warning ("pclose failed"); + return errno; + } + + v->size = pos; + + if (walker->write_files) + { + char* file = NULL; + + if (! file && tmp_file_1_free) + { + file = tmp_file_1; + tmp_file_1_free = FALSE; + } + + if (! file && tmp_file_2_free) + { + file = tmp_file_2; + tmp_file_2_free = FALSE; + } + + g_assert (file); + + v->filename = file; + + if (! (out = fopen (file, "w"))) + { + g_warning ("fopen failed: %s\n", file); + return errno; + } + + if (fwrite (v->segment, v->size, 1, out) != 1) + { + g_warning ("fwrite failed: %s\n", file); + return errno; + } + + if (fclose (out) < 0) + { + g_warning ("fclose failed: %s\n", file); + return errno; + } + } + + return 0; +} + +int +rcswalk_delta_date (RcsFile* rcs, RcsWalker* walker, void* data) +{ + int i; + int ret; + RcsVersion *vf = NULL; + RcsVersion *vt = NULL; + + for (i = 0; i < (rcs->version_count-1); i += 1) + { + vf = rcs->versions_date[i+1]; + vt = rcs->versions_date[i]; + + if (! vt->segment && (ret = rcswalk_checkout (rcs, walker, vt))) { + return ret; + } + + if ((ret = rcswalk_checkout (rcs, walker, vf))) { + return ret; + } + + if ((ret = walker->delta_date (rcs, vf, vt, data))) { + return ret; + } + + rcswalk_free_segment (vt); + } + + if (vf) rcswalk_free_segment (vf); + if (vt) rcswalk_free_segment (vt); + + return 0; +} + +int +rcswalk_delta_orig (RcsFile* rcs, RcsWalker* walker, RcsVersion* version, int *count, void* data) +{ + int ret; + GSList *c; + RcsVersion *child; + + for (c = version->children; c; c = c->next) + { + gboolean reverse; + + child = c->data; + + if (! version->segment) + { + if ((ret = rcswalk_checkout (rcs, walker, version))) { + return ret; + } + } + + if ((ret = rcswalk_checkout (rcs, walker, child))) { + return ret; + } + + reverse = version->on_trunk && child->on_trunk; + + (* count) += 1; + + if ((ret = walker->delta_orig (rcs, reverse ? child : version, reverse ? version : child, data))) { + return ret; + } + + rcswalk_free_segment (version); + + if ((ret = rcswalk_delta_orig (rcs, walker, child, count, data))) { + return ret; + } + } + + rcswalk_free_segment (version); + return 0; +} + +int +rcswalk_dateorder (RcsFile* rcs, RcsWalker *walker, RcsStats *stats, void* data) +{ + int i, ret; + + for (i = 0; i < rcs->version_count; i += 1) + { + RcsVersion *v = rcs->versions_date[i]; + + if ((ret = rcswalk_checkout (rcs, walker, v))) { + return ret; + } + + stat_bincount_add_item (stats->avg_version_size, i, v->size); + + if ((ret = walker->dateorder (rcs, v, data))) { + return ret; + } + + rcswalk_free_segment (v); + } + + return 0; +} + +gboolean +rcswalk_match (char** line_p, char* str) +{ + int len = strlen (str); + + if (strncmp (*line_p, str, len) == 0) + { + (*line_p) += len; + return TRUE; + } + + return FALSE; +} + +void +rcswalk_find_parent (RcsFile *rcs, GHashTable* hash, RcsVersion *v) +{ + char *lastdot; + char mbuf[1024]; + int lastn; + RcsVersion *p; + + strcpy (mbuf, v->vname); + + if (! (lastdot = strchr (mbuf, '.'))) + abort (); + + if (! (lastdot = strchr (lastdot+1, '.'))) + v->on_trunk = TRUE; + + lastdot = strrchr (mbuf, '.'); + lastn = atoi (lastdot + 1); + + do + { + if (lastn == 1) + { + (*lastdot) = 0; + + if (strcmp (mbuf, "1") == 0) + { + /* Assuming the first version is always "1.1". + */ + rcs->root_version = v; + return; + } + else if (! (lastdot = strrchr (mbuf, '.'))) + { + int i = 1; + int br = atoi (mbuf) - 1; + RcsVersion *p2 = NULL; + + /* Now we have something like "2.1" and need to + * search for the highest "1.x" version. + */ + + do + { + sprintf (mbuf, "%d.%d", br, i++); + p = p2; + } + while ((p2 = g_hash_table_lookup (hash, mbuf))); + + if (p == NULL) + { + rcs->root_version = v; + return; + } + + break; + } + else + { + /* 1.2.3.1 => 1.2 */ + (*lastdot) = 0; + lastdot = strrchr (mbuf, '.'); + lastn = atoi (lastdot + 1); + } + } + else + { + lastn -= 1; + sprintf (lastdot, ".%d", lastn); + } + } + while (! (p = g_hash_table_lookup (hash, mbuf))); + + g_assert (p); + + v->parent = p; + + p->children = g_slist_prepend (p->children, v); +} + +int +rcswalk_traverse_graph (RcsFile* rcs, RcsVersion* version, RcsVersion *parent) +{ + GSList *c; + int distance = -1; + + version->cc = g_slist_length (version->children); + + if (version->cc > 1) + rcs->branch_count += (version->cc - 1); + + if (parent) + { + /* Insure that there is proper date ordering. */ + if (version->date <= parent->date) + version->date = parent->date + 1; + + if (parent->on_trunk && version->on_trunk) + rcs->reverse_count += 1; + else + rcs->forward_count += 1; + } + + for (c = version->children; c; c = c->next) + { + int c_dist = rcswalk_traverse_graph (rcs, c->data, version); + + distance = MAX (distance, c_dist); + } + + if (version == rcs->head_version) + distance = 0; + + if (distance >= 0) + { + version->chain_length = distance; + + return distance + 1; + } + + return -1; +} + +void +rcswalk_compute_chain_length (RcsFile* rcs, RcsVersion* version, RcsVersion *parent) +{ + GSList *c; + + if (! parent) + { + g_assert (version->chain_length >= 0); + } + else if (version->chain_length < 0) + { + version->chain_length = parent->chain_length + 1; + } + + for (c = version->children; c; c = c->next) + { + rcswalk_compute_chain_length (rcs, c->data, version); + } +} + +int +rcswalk_date_compare (const void* a, const void* b) +{ + RcsVersion **ra = (void*) a; + RcsVersion **rb = (void*) b; + + return (*ra)->date - (*rb)->date; +} + +int +rcswalk_build_graph (RcsFile* rcs) +{ + GHashTable* hash = g_hash_table_new (g_str_hash, g_str_equal); + int i; + + for (i = 0; i < rcs->version_count; i += 1) + g_hash_table_insert (hash, rcs->versions[i].vname, rcs->versions + i); + + for (i = 0; i < rcs->version_count; i += 1) + { + RcsVersion *v = rcs->versions + i; + + v->chain_length = -1; + v->rcs = rcs; + + rcswalk_find_parent (rcs, hash, v); + } + + rcs->head_version = g_hash_table_lookup (hash, rcs->headname); + + rcswalk_traverse_graph (rcs, rcs->root_version, NULL); + + rcswalk_compute_chain_length (rcs, rcs->root_version, NULL); + + for (i = 0; i < rcs->version_count; i += 1) + rcs->versions_date[i] = rcs->versions + i; + + qsort (rcs->versions_date, rcs->version_count, sizeof (RcsVersion*), & rcswalk_date_compare); + + for (i = 0; i < rcs->version_count; i += 1) + { + RcsVersion *v = rcs->versions_date[i]; + + v->dateseq = i; + } + + g_hash_table_destroy (hash); + + return 0; +} + +#define HEAD_STATE 0 +#define BAR_STATE 1 +#define REV_STATE 2 +#define DATE_STATE 3 + +int +rcswalk_load (RcsFile *rcs, gboolean *skip) +{ + FILE* rlog; + char cmdbuf[1024]; + char oneline[1024], *oneline_p; + char rbuf[1024]; + int version_i = 0, ret; + int read_state = HEAD_STATE; + + sprintf (cmdbuf, "rlog %s", rcs->filename); + + if (! (rlog = popen (cmdbuf, "r"))) + { + g_warning ("popen failed: %s", cmdbuf); + return errno; + } + + rcs->headname = NULL; + + while (fgets (oneline, 1024, rlog)) + { + oneline_p = oneline; + + if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "total revisions: ")) + { + if (sscanf (oneline_p, "%d", & rcs->version_count) != 1) + goto badscan; + + rcs->versions = g_new0 (RcsVersion, rcs->version_count); + rcs->versions_date = g_new (RcsVersion*, rcs->version_count); + read_state = BAR_STATE; + } + else if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "head: ")) + { + if (sscanf (oneline_p, "%s", rbuf) != 1) + goto badscan; + + rcs->headname = g_strdup (rbuf); + read_state = HEAD_STATE; /* no change */ + } + else if (read_state == BAR_STATE && rcswalk_match (& oneline_p, "----------------------------")) + { + read_state = REV_STATE; + } + else if (read_state == REV_STATE && rcswalk_match (& oneline_p, "revision ")) + { + if (version_i >= rcs->version_count) + { + /* jkh likes to insert the rlog of one RCS file into the log + * message of another, and this can confuse things. Why, oh why, + * doesn't rlog have an option to not print the log? + */ + fprintf (stderr, "rcswalk: too many versions: skipping file %s\n", rcs->filename); + *skip = TRUE; + skip_count += 1; + pclose (rlog); + return 0; + } + + if (sscanf (oneline_p, "%s", rbuf) != 1) + goto badscan; + + rcs->versions[version_i].vname = g_strdup (rbuf); + read_state = DATE_STATE; + + g_assert (rcs->versions[version_i].vname); + } + else if (read_state == DATE_STATE && rcswalk_match (& oneline_p, "date: ")) + { + char* semi = strchr (oneline_p, ';'); + + if (! semi) + goto badscan; + + strncpy (rbuf, oneline_p, semi - oneline_p); + + rbuf[semi - oneline_p] = 0; + + rcs->versions[version_i].date = str2time (rbuf, 0, 0); + + version_i += 1; + read_state = BAR_STATE; + } + } + + if (! rcs->headname) + { + fprintf (stderr, "rcswalk: no head version: skipping file %s\n", rcs->filename); + *skip = TRUE; + skip_count += 1; + pclose (rlog); + return 0; + } + + if (pclose (rlog) < 0) + { + g_warning ("pclose failed: %s", cmdbuf); + return errno; + } + + if ((ret = rcswalk_build_graph (rcs))) { + return ret; + } + + return 0; + + badscan: + + pclose (rlog); + + g_warning ("rlog syntax error"); + return -1; +} + +void +rcswalk_free (RcsFile* rcs) +{ + int i; + + for (i = 0; i < rcs->version_count; i += 1) + { + g_free (rcs->versions[i].vname); + g_slist_free (rcs->versions[i].children); + } + + g_free (rcs->filename); + g_free (rcs->headname); + g_free (rcs->versions); + g_free (rcs->versions_date); + g_free (rcs); +} + +int +rcswalk_one (char* rcsfile, char* copyfile, RcsWalker* walker, RcsStats* stats, void* data) +{ + RcsFile* rcs; + int i, ret; + long long maxsize = 0; + gboolean skip = FALSE; + + rcs = g_new0 (RcsFile, 1); + + rcs->filename = g_strdup (rcsfile); + rcs->copyname = copyfile; + + if ((ret = rcswalk_load (rcs, & skip))) { + return ret; + } + + if (walker->min_versions > rcs->version_count) + { + small_count += 1; + skip = TRUE; + } + + if (walker->max_versions < rcs->version_count) + { + large_count += 1; + skip = TRUE; + } + + if (! skip) + { + process_count += 1; + + if (walker->dateorder && (ret = rcswalk_dateorder (rcs, walker, stats, data))) { + return ret; + } + + if (walker->delta_orig) + { + int count = 0; + + if ((ret = rcswalk_delta_orig (rcs, walker, rcs->root_version, & count, data))) { + return ret; + } + + g_assert (count == (rcs->version_count - 1)); + } + + if (walker->delta_date && (ret = rcswalk_delta_date (rcs, walker, data))) { + return ret; + } + + for (i = 0; i < rcs->version_count; i += 1) + { + rcs->total_size += rcs->versions[i].size; + maxsize = MAX (rcs->versions[i].size, maxsize); + } + + stat_int_add_item (stats->version_stat, rcs->version_count); + stat_int_add_item (stats->forward_stat, rcs->forward_count); + stat_int_add_item (stats->reverse_stat, rcs->reverse_count); + stat_int_add_item (stats->branch_stat, rcs->branch_count); + stat_int_add_item (stats->unencoded_stat, rcs->total_size); + stat_int_add_item (stats->literal_stat, maxsize); + + if (walker->onefile && (ret = walker->onefile (rcs, stats, data))) { + return ret; + } + } + + rcswalk_free (rcs); + + return 0; +} + +int +rcswalk_dir (const char* dir, RcsWalker* walker, RcsStats* stats, void* data, const char* copy_dir) +{ + int ret; + DIR* thisdir; + struct dirent* ent; + + if (copy_dir && (ret = config_create_dir (copy_dir))) { + return ret; + } + + if (! (thisdir = opendir (dir))) + { + g_warning ("opendir failed: %s", dir); + return errno; + } + + while ((ent = readdir (thisdir))) + { + char* name = ent->d_name; + int len; + struct stat buf; + char* fullname; + char* copyname = NULL; + + if (strcmp (name, ".") == 0) + continue; + + if (strcmp (name, "..") == 0) + continue; + + len = strlen (name); + + fullname = g_strdup_printf ("%s/%s", dir, name); + + if (copy_dir) + copyname = g_strdup_printf ("%s/%s", copy_dir, name); + + if (len > 2 && strcmp (name + len - 2, ",v") == 0) + { + if ((ret = rcswalk_one (fullname, copyname, walker, stats, data))) { + goto abort; + } + } + else + { + if (stat (fullname, & buf) < 0) + { + g_warning ("stat failed: %s\n", fullname); + goto abort; + } + + if (S_ISDIR (buf.st_mode)) + { + if ((ret = rcswalk_dir (fullname, walker, stats, data, copyname))) { + goto abort; + } + } + } + + g_free (fullname); + + if (copyname) + g_free (copyname); + } + + if (closedir (thisdir) < 0) + { + g_warning ("closedir failed: %s", dir); + return errno; + } + + return 0; + + abort: + + if (thisdir) + closedir (thisdir); + + return -1; +} + +void +rcswalk_init (void) +{ + config_register (rcswalk_options, ARRAY_SIZE (rcswalk_options)); +} + +int +rcswalk (RcsWalker *walker, const char* copy_base) +{ + void* data = NULL; + RcsStats stats; + int ret; + + skip_count = 0; + small_count = 0; + process_count = 0; + large_count = 0; + + memset (& stats, 0, sizeof (stats)); + + stats.avg_version_size = stat_bincount_new ("AvgVersionSize"); /* @@@ leak */ + stats.version_stat = stat_int_new ("Version"); /* @@@ leak */ + stats.forward_stat = stat_int_new ("Forward"); /* @@@ leak */ + stats.reverse_stat = stat_int_new ("Reverse"); /* @@@ leak */ + stats.branch_stat = stat_int_new ("Branch"); /* @@@ leak */ + stats.unencoded_stat = stat_int_new ("Unencoded"); /* @@@ leak */ + stats.literal_stat = stat_int_new ("Literal"); /* @@@ leak */ + + tmp_file_1 = g_strdup_printf ("%s/rcs1.%d", g_get_tmp_dir (), (int) getpid ()); + tmp_file_2 = g_strdup_printf ("%s/rcs2.%d", g_get_tmp_dir (), (int) getpid ()); + + if (walker->initialize) + data = walker->initialize (); + + if ((ret = rcswalk_dir (rcswalk_input_dir, walker, & stats, data, copy_base))) { + return ret; + } + + if (walker->finalize) + { + if ((ret = walker->finalize (& stats, data))) { + return ret; + } + } + + unlink (tmp_file_1); + unlink (tmp_file_2); + + fprintf (stderr, "rcswalk: processed %d files: too small %d; too large: %d; damaged: %d\n", process_count, small_count, large_count, skip_count); + + return 0; +} + +/* Statistics + */ + +void +rcswalk_report (RcsStats* set) +{ + stat_bincount_report (set->avg_version_size); + stat_int_report (set->version_stat); + stat_int_report (set->forward_stat); + stat_int_report (set->reverse_stat); + stat_int_report (set->branch_stat); + stat_int_report (set->unencoded_stat); + stat_int_report (set->literal_stat); +} + +/* Int stat + */ +IntStat* +stat_int_new (const char* name) +{ + IntStat* s = g_new0 (IntStat, 1); + + s->name = name; + s->values = g_array_new (FALSE, FALSE, sizeof (long long)); + + return s; +} + +void +stat_int_add_item (IntStat* stat, long long v) +{ + if (! stat->count) + stat->min = v; + stat->count += 1; + stat->min = MIN (v, stat->min); + stat->max = MAX (v, stat->max); + stat->sum += v; + + g_array_append_val (stat->values, v); +} + +double +stat_int_stddev (IntStat *stat) +{ + double f = 0; + double m = (double) stat->sum / (double) stat->count; + double v; + int i; + + for (i = 0; i < stat->count; i += 1) + { + long long x = g_array_index (stat->values, long long, i); + + f += (m - (double) x) * (m - (double) x); + } + + v = f / (double) stat->count; + + return sqrt (v); +} + +int +ll_comp (const void* a, const void* b) +{ + const long long* lla = a; + const long long* llb = b; + return (*lla) - (*llb); +} + +void +stat_int_histogram (IntStat *stat) +{ + int i, consec; + long long cum = 0; + + FILE* p_out; + FILE* s_out; + + if (! (p_out = config_output ("%s.pop.hist", stat->name))) + abort (); + + if (! (s_out = config_output ("%s.sum.hist", stat->name))) + abort (); + + qsort (stat->values->data, stat->count, sizeof (long long), ll_comp); + + for (i = 0; i < stat->count; i += consec) + { + long long ix = g_array_index (stat->values, long long, i); + + for (consec = 1; (i+consec) < stat->count; consec += 1) + { + long long jx = g_array_index (stat->values, long long, i+consec); + + if (ix != jx) + break; + } + + cum += consec * g_array_index (stat->values, long long, i); + + fprintf (p_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) (i+consec) / (double) stat->count); + fprintf (s_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) cum / (double) stat->sum); + } + + if (fclose (p_out) < 0 || fclose (s_out) < 0) + { + g_error ("fclose failed\n"); + } +} + +void +stat_int_report (IntStat* stat) +{ + FILE* out; + + if (! (out = config_output ("%s.stat", stat->name))) + abort (); + + fprintf (out, "Name: %s\n", stat->name); + fprintf (out, "Count: %d\n", stat->count); + fprintf (out, "Min: %qd\n", stat->min); + fprintf (out, "Max: %qd\n", stat->max); + fprintf (out, "Sum: %qd\n", stat->sum); + fprintf (out, "Mean: %0.2f\n", (double) stat->sum / (double) stat->count); + fprintf (out, "Stddev: %0.2f\n", stat_int_stddev (stat)); + + if (fclose (out) < 0) + g_error ("fclose failed"); + + stat_int_histogram (stat); +} + +/* Dbl stat + */ + +DblStat* +stat_dbl_new (const char* name) +{ + DblStat* s = g_new0 (DblStat, 1); + + s->name = name; + s->values = g_array_new (FALSE, FALSE, sizeof (double)); + + return s; +} + +void +stat_dbl_add_item (DblStat* stat, double v) +{ + if (! stat->count) + stat->min = v; + stat->count += 1; + stat->min = MIN (v, stat->min); + stat->max = MAX (v, stat->max); + stat->sum += v; + + g_array_append_val (stat->values, v); +} + +double +stat_dbl_stddev (DblStat *stat) +{ + double f = 0; + double m = stat->sum / stat->count; + double v; + int i; + + for (i = 0; i < stat->count; i += 1) + { + double x = g_array_index (stat->values, double, i); + + f += (m - x) * (m - x); + } + + v = f / stat->count; + + return sqrt (v); +} + +int +dbl_comp (const void* a, const void* b) +{ + const double* da = a; + const double* db = b; + double diff = (*da) - (*db); + + if (diff > 0.0) + return 1; + else if (diff < 0.0) + return -1; + else + return 0; +} + +void +stat_dbl_histogram (DblStat *stat) +{ + int i, consec; + double cum = 0.0; + + FILE* p_out; + FILE* s_out; + + if (! (p_out = config_output ("%s.pop.hist", stat->name))) + abort (); + + if (! (s_out = config_output ("%s.sum.hist", stat->name))) + abort (); + + qsort (stat->values->data, stat->count, sizeof (double), dbl_comp); + + for (i = 0; i < stat->count; i += consec) + { + double ix = g_array_index (stat->values, double, i); + + for (consec = 1; (i+consec) < stat->count; consec += 1) + { + double jx = g_array_index (stat->values, double, i+consec); + + if (ix != jx) + break; + } + + cum += ((double) consec) * g_array_index (stat->values, double, i); + + fprintf (p_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), (double) (i+consec) / (double) stat->count); + fprintf (s_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), cum / stat->sum); + } + + if (fclose (p_out) < 0 || fclose (s_out) < 0) + { + g_error ("fclose failed\n"); + } +} + +void +stat_dbl_report (DblStat* stat) +{ + FILE* out; + + if (! (out = config_output ("%s.stat", stat->name))) + abort (); + + fprintf (out, "Name: %s\n", stat->name); + fprintf (out, "Count: %d\n", stat->count); + fprintf (out, "Min: %0.6f\n", stat->min); + fprintf (out, "Max: %0.6f\n", stat->max); + fprintf (out, "Sum: %0.6f\n", stat->sum); + fprintf (out, "Mean: %0.6f\n", stat->sum / stat->count); + fprintf (out, "Stddev: %0.6f\n", stat_dbl_stddev (stat)); + + if (fclose (out) < 0) + g_error ("fclose failed"); + + stat_dbl_histogram (stat); +} + +/* Bincount + */ +BinCounter* +stat_bincount_new (const char* name) +{ + BinCounter* bc = g_new0 (BinCounter, 1); + + bc->name = name; + bc->bins = g_ptr_array_new (); + + return bc; +} + +void +stat_bincount_add_item (BinCounter* bc, int bin, double val) +{ + GArray* one; + int last; + + if (bin >= bc->bins->len) + { + g_ptr_array_set_size (bc->bins, bin+1); + } + + if (! (one = bc->bins->pdata[bin])) + { + one = bc->bins->pdata[bin] = g_array_new (FALSE, TRUE, sizeof (double)); + } + + g_assert (one); + + last = one->len; + + g_array_set_size (one, last + 1); + + g_array_index (one, double, last) = val; +} + +void +stat_bincount_report (BinCounter* bc) +{ + FILE *avg_out; + FILE *raw_out; + int i; + + if (! (avg_out = config_output ("%s.avg", bc->name))) + abort (); + + if (! (raw_out = config_output ("%s.raw", bc->name))) + abort (); + + for (i = 0; i < bc->bins->len; i += 1) + { + GArray* one = bc->bins->pdata[i]; + + double sum = 0.0; + int j; + + for (j = 0; j < one->len; j += 1) + { + double d = g_array_index (one, double, j); + + sum += d; + + fprintf (raw_out, "%e ", d); + } + + fprintf (raw_out, "\n"); + fprintf (avg_out, "%e %d\n", sum / one->len, one->len); + } + + if (fclose (avg_out) < 0) + g_error ("fclose failed"); + + if (fclose (raw_out) < 0) + g_error ("fclose failed"); +} + +/* Config stuff + */ + +int +config_create_dir (const char* dirname) +{ + struct stat buf; + + if (stat (dirname, & buf) < 0) + { + if (mkdir (dirname, 0777) < 0) + { + fprintf (stderr, "mkdir failed: %s\n", dirname); + return errno; + } + } + else + { + if (! S_ISDIR (buf.st_mode)) + { + fprintf (stderr, "not a directory: %s\n", dirname); + return errno; + } + } + + return 0; +} + +int +config_clear_dir (const char* dir) +{ + char buf[1024]; + + if (dir) + { + sprintf (buf, "rm -rf %s", dir); + + system (buf); + } + + return 0; +} + +static ConfigOption all_options[64]; +static int option_count; + +void +config_init () +{ + static gboolean once = FALSE; + if (! once) + { + once = TRUE; + config_register (config_options, ARRAY_SIZE (config_options)); + } +} + +void +config_register (ConfigOption *opts, int nopts) +{ + int i; + + config_init (); + + for (i = 0; i < nopts; i += 1) + { + all_options[option_count++] = opts[i]; + } +} + +void +config_set_string (const char* var, const char* val) +{ + int i; + + for (i = 0; i < option_count; i += 1) + { + ConfigOption *opt = all_options + i; + + if (strcmp (opt->name, var) == 0) + { + (* (const char**) opt->value) = val; + opt->found = TRUE; + return; + } + } +} + +int +config_parse (const char* config_file) +{ + FILE *in; + char oname[1024], value[1024]; + int i; + + if (! (in = fopen (config_file, "r"))) + { + fprintf (stderr, "fopen failed: %s\n", config_file); + return errno; + } + + for (;;) + { + ConfigOption *opt = NULL; + + if (fscanf (in, "%s", oname) != 1) + break; + + for (i = 0; i < option_count; i += 1) + { + if (strcmp (oname, all_options[i].name) == 0) + { + opt = all_options + i; + break; + } + } + + if (opt && opt->arg == CO_None) + { + (* (gboolean*) opt->value) = TRUE; + opt->found = TRUE; + continue; + } + + if (fscanf (in, "%s", value) != 1) + { + fprintf (stderr, "no value for option: %s; file: %s\n", oname, config_file); + goto abort; + } + + if (! opt) + { + /*fprintf (stderr, "unrecognized option: %s\n", oname);*/ + continue; + } + + switch (opt->type) + { + case CD_Bool: + + if (strcasecmp (value, "yes") == 0 || + strcasecmp (value, "true") == 0 || + strcmp (value, "1") == 0 || + strcasecmp (value, "on") == 0) + { + ((gboolean*) opt->value) = TRUE; + } + else + { + ((gboolean*) opt->value) = FALSE; + } + + break; + case CD_Int32: + + if (sscanf (value, "%d", (gint32*) opt->value) != 1) + { + fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file); + goto abort; + } + + break; + case CD_Double: + + if (sscanf (value, "%lf", (double*) opt->value) != 1) + { + fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file); + goto abort; + } + + break; + case CD_String: + + (* (const char**) opt->value) = g_strdup (value); + + break; + } + + opt->found = TRUE; + } + + fclose (in); + + return 0; + + abort: + + fclose (in); + + return -1; +} + +int +config_compute_output_dir () +{ + char tmp[1024]; + char buf[1024]; + int i; + gboolean last = FALSE; + + buf[0] = 0; + + for (i = 0; i < option_count; i += 1) + { + ConfigOption *opt = all_options + i; + + if (opt->style == CS_Ignore) + continue; + + if (! opt->found) + continue; + + if (last) + strcat (buf, ","); + + last = TRUE; + + strcat (buf, opt->abbrev); + strcat (buf, "="); + + switch (opt->type) + { + case CD_Bool: + + if (* (gboolean*) opt->value) + strcat (buf, "true"); + else + strcat (buf, "false"); + + break; + case CD_Int32: + + sprintf (tmp, "%d", (* (gint32*) opt->value)); + strcat (buf, tmp); + + break; + case CD_Double: + + sprintf (tmp, "%0.2f", (* (double*) opt->value)); + strcat (buf, tmp); + + break; + case CD_String: + + if (opt->style == CS_UseAsFile) + { + const char* str = (* (const char**) opt->value); + const char* ls = strrchr (str, '/'); + + strcat (buf, ls ? (ls + 1) : str); + } + else + { + strcat (buf, (* (const char**) opt->value)); + } + + break; + } + } + + config_output_dir = g_strdup_printf ("%s/%s", config_output_base, buf); + + return 0; +} + +int +config_done (void) +{ + int i, ret; + FILE *out; + + for (i = 0; i < option_count; i += 1) + { + ConfigOption *opt = all_options + i; + + if (! opt->found && opt->arg == CO_Required) + { + fprintf (stderr, "required option not found: %s\n", all_options[i].name); + return -1; + } + } + + if ((ret = config_compute_output_dir ())) { + return ret; + } + + if ((ret = config_clear_dir (config_output_dir))) { + return ret; + } + + if ((ret = config_create_dir (config_output_dir))) { + return ret; + } + + if (! (out = config_output ("Options"))) + abort (); + + for (i = 0; i < option_count; i += 1) + { + ConfigOption *opt = all_options + i; + + fprintf (out, "option: %s; value: ", all_options[i].name); + + switch (opt->type) + { + case CD_Bool: + + fprintf (out, "%s", (* (gboolean*) opt->value) ? "TRUE" : "FALSE"); + + break; + case CD_Int32: + + fprintf (out, "%d", (* (gint32*) opt->value)); + + break; + case CD_Double: + + fprintf (out, "%0.2f", (* (double*) opt->value)); + + break; + case CD_String: + + fprintf (out, "%s", (* (const char**) opt->value)); + + break; + } + + fprintf (out, "\n"); + } + + if (fclose (out)) + { + fprintf (stderr, "fclose failed\n"); + return errno; + } + + return 0; +} + +const char* +config_help_arg (ConfigOption *opt) +{ + switch (opt->arg) + { + case CO_Required: + return "required"; + case CO_Optional: + return "optional"; + case CO_None: + return "no value"; + } + + return "unknown"; +} + +const char* +config_help_type (ConfigOption *opt) +{ + switch (opt->arg) + { + case CO_None: + return "boolean"; + default: + break; + } + + switch (opt->type) + { + case CD_Bool: + return "boolean"; + case CD_Int32: + return "int"; + case CD_Double: + return "double"; + case CD_String: + return "string"; + } + + return "unknown"; +} + +void +config_help (void) +{ + int i; + + fprintf (stderr, "Expecting the following options in one or more config files on the command line:\n"); + + for (i = 0; i < option_count; i += 1) + { + ConfigOption *opt = all_options + i; + + fprintf (stderr, "%s: %s %s\n", + opt->name, + config_help_arg (opt), + config_help_type (opt)); + } +} + +FILE* +config_output (const char* format, ...) +{ + gchar *buffer; + gchar *file; + va_list args; + FILE *f; + + va_start (args, format); + buffer = g_strdup_vprintf (format, args); + va_end (args); + + file = g_strdup_printf ("%s/%s", config_output_dir, buffer); + + if (! (f = fopen (file, "w"))) + g_error ("fopen failed: %s\n", buffer); + + g_free (file); + + g_free (buffer); + + return f; +} + + +#include +#include +#include +#include "xdfs.h" + +/* Warning: very cheesy! + */ + +#ifdef DEBUG_EXTRACT + FileHandle *fh2 = handle_read_file (filename); + + guint8* debug_buf = g_malloc (buflen); + + if (! handle_read (fh2, debug_buf, buflen)) + g_error ("read failed"); +#endif + +gboolean +rcs_count (const char* filename, guint *encoded_size) +{ + char *readbuf0, *readbuf; + gboolean in_string = FALSE; + gboolean in_text = FALSE; + guint string_start = 0; + guint string_end = 0; + guint current_pos = 0; + /*char *current_delta = NULL;*/ + FileHandle *fh = handle_read_file (filename); + guint buflen = handle_length (fh); + + (* encoded_size) = 0; + + readbuf0 = g_new (guint8, buflen); + + for (;;) + { + int c = handle_gets (fh, readbuf0, buflen); + + readbuf = readbuf0; + + if (c < 0) + break; + + if (strncmp (readbuf, "text", 4) == 0) + in_text = TRUE; + + if (! in_string && readbuf[0] == '@') + { + string_start = current_pos + 1; + in_string = TRUE; + readbuf += 1; + } + + current_pos += c; + + if (in_string) + { + while ((readbuf = strchr (readbuf, '@'))) + { + if (readbuf[1] == '@') + { + string_start += 1; /* @@@ bogus, just counting. */ + readbuf += 2; + continue; + } + + in_string = FALSE; + break; + } + + string_end = current_pos - 2; + + if (in_text && ! in_string) + { + in_text = FALSE; + + /*g_free (current_delta); + current_delta = NULL;*/ + + (* encoded_size) += (string_end - string_start); + } + + continue; + } + + if (isdigit (readbuf[0])) + { +#if 0 + (* strchr (readbuf, '\n')) = 0; + if (current_delta) + g_free (current_delta); + current_delta = g_strdup (readbuf); +#endif + } + } + + handle_close (fh); + + g_free (readbuf0); + +#if 0 + if (current_delta) + g_free (current_delta); +#endif + + return TRUE; +} + +#if 0 +int +main (int argc, char** argv) +{ + guint size; + + if (argc != 2) + g_error ("usage: %s RCS_file\n", argv[0]); + + if (! rcs_count (argv[1], &size)) + g_error ("rcs_parse failed"); + + return 0; +} +#endif diff --git a/lib/xdelta3/run_release.sh b/lib/xdelta3/run_release.sh new file mode 100644 index 0000000..4f76d09 --- /dev/null +++ b/lib/xdelta3/run_release.sh @@ -0,0 +1,288 @@ +#!/bin/bash + +# Run from the source dir. +SRCDIR=${PWD} + +# TODO replace w/ wget +LZMA="xz-5.2.1" +LZMA_FILE="${SRCDIR}/../${LZMA}.tar.gz" + +MAKEFLAGS="-j 10" + +BUILDDIR=${SRCDIR}/build +LZMASRC=${BUILDDIR}/${LZMA} + +NONWIN_CFLAGS="" +MINGW_CFLAGS="-DEXTERNAL_COMPRESSION=0 -DXD3_WIN32=1 -DSHELL_TESTS=0" + +MYOS=`uname` +DATE=`date` + +CLEAN="" + +LINUXTGTS="" +LINUXTEST1="" +LINUXTEST2="" + +WINTGTS="" +WINTEST1="" +WINTEST2="" + +OSXTGTS="" +OSXTEST1="" +OSXTEST2="" + +XTMP="/tmp" +if [ "${TMP}" != "" ]; then + XTMP="${TMP}" +fi +if [ "${TMPDIR}" != "" ]; then + XTMP="${TMPDIR}" +fi + +BUILDFILES=`ls -A ${BUILDDIR} 2> /dev/null` +if [ -d "${BUILDDIR}" ]; then + if [ -n "${BUILDFILES}" ]; then + echo "Directory ${BUILDDIR} should be empty" + exit 1 + fi +else + mkdir "${BUILDDIR}" +fi + +function setup { + libtoolize || glibtoolize + automake --add-missing + aclocal -I m4 + autoheader + automake + autoconf +} + +function try { + local w=$1 + shift + local dir=$1 + shift + echo -n " ${w} ... " + (cd "${dir}" && "$@" >${w}.stdout 2>${w}.stderr) + local s=$? + if [ ${s} -eq 0 ]; then + echo " success" + else + echo " failed!" + echo "Error $1 in ${dir}" >&2 + fi + return ${s} +} + +function buildlzma { + host=$1 + march=$2 + local target="${BUILDDIR}/lib-${host}${march}" + + echo " ... liblzma" + + mkdir -p ${target} + + try configure-lzma ${target} ${LZMASRC}/configure \ + --host=${host} \ + --prefix=${target} \ + --disable-shared \ + "CC=${CC}" \ + "CXX=${CXX}" \ + "CFLAGS=${march}" \ + "CXXFLAGS=${march}" \ + "LDFLAGS=${march}" + if [ $? -ne 0 ]; then + return + fi + + try build-lzma ${target} make ${MAKEFLAGS} + if [ $? -ne 0 ]; then + return + fi + try install-lzma ${target} make install + if [ $? -ne 0 ]; then + return + fi +} + +function buildit { + local host=$1 + local march=$2 + local usizebits=$3 + local offsetbits=$4 + local cargs=$5 + local afl=$6 + local BM="${host}${march}" + local USECC="${CC}" + local USECXX="${CXX}" + local LIBBM="${BM}" + + if [ "${afl}" = "1" ]; then + USECC="afl-gcc" + USECXX="afl-g++" + BM="${BM}-afl" + fi + + local D="build/${BM}/usize${usizebits}/xoff${offsetbits}" + local BMD="${BM}-${usizebits}-${offsetbits}" + + local FULLD="${SRCDIR}/${D}" + local CFLAGS="${march} ${cargs} -I${SRCDIR}/build/lib-${LIBBM}/include" + local CXXFLAGS="${march} ${cargs} -I${SRCDIR}/build/lib-${LIBBM}/include" + local CPPFLAGS="-I${SRCDIR}/build/lib-${LIBBM}/include" + local LDFLAGS="${march} -L${SRCDIR}/build/lib-${LIBBM}/lib" + + local EXEC_PREAMBLE="" + local EXEC_SUFFIX="" + + case ${host} in + *mingw*) + EXEC_PREAMBLE="wine" + EXEC_SUFFIX=".exe" + ;; + esac + + mkdir -p ${D} + + echo " ... ${BMD}" + + cat >> Makefile.test < \${TMP}/regtest.${BMD}.stdout 2> \${TMP}/regtest.${BMD}.stderr) + +.PHONY: selftest-${BMD} +selftest-${BMD}: + (cd ${D} && ${EXEC_PREAMBLE} ./bin/xdelta3${EXEC_SUFFIX} test 1> \${TMP}/selftest.${BMD}.stdout 2> \${TMP}/selftest.${BMD}.stderr) + + +EOF + + case ${host} in + *linux*) + LINUXTGTS="${LINUXTGTS} build-${BMD}" + LINUXTEST1="${LINUXTEST1} selftest-${BMD}" + LINUXTEST2="${LINUXTEST2} regtest-${BMD}" + ;; + *mingw*) + WINTGTS="${WINTGTS} build-${BMD}" + WINTEST1="${WINTEST1} selftest-${BMD}" + WINTEST2="${WINTEST2} regtest-${BMD}" + ;; + *apple*) + OSXTGTS="${OSXTGTS} build-${BMD}" + OSXTEST1="${OSXTEST1} selftest-${BMD}" + OSXTEST2="${OSXTEST2} regtest-${BMD}" + ;; + esac + CLEAN="${CLEAN} clean-${BMD}" + + try configure-xdelta ${FULLD} ${SRCDIR}/configure \ + --host=${host} \ + --prefix=${FULLD} \ + --enable-static \ + --disable-shared \ + --enable-debug-symbols \ + "CFLAGS=${CFLAGS}" \ + "CXXFLAGS=${CXXFLAGS}" \ + "CPPFLAGS=${CPPFLAGS}" \ + "LDFLAGS=${LDFLAGS}" \ + "CC=${USECC}" \ + "CXX=${USECXX}" + if [ $? -ne 0 ]; then + return + fi + + # try build-xdelta ${FULLD} make ${MAKEFLAGS} all + # if [ $? -ne 0 ]; then + # return + # fi + + # try install-xdelta ${FULLD} make install +} + +function buildall { + echo "" + echo "Host $1$2 afl=$4" + echo "" + + buildlzma "$1" "$2" + buildit "$1" "$2" 32 32 "-DXD3_USE_LARGESIZET=0 -DXD3_USE_LARGEFILE64=0 $3" "$4" + buildit "$1" "$2" 32 64 "-DXD3_USE_LARGESIZET=0 -DXD3_USE_LARGEFILE64=1 $3" "$4" + buildit "$1" "$2" 64 64 "-DXD3_USE_LARGESIZET=1 -DXD3_USE_LARGEFILE64=1 $3" "$4" +} + +setup + +try untar-lzma ${BUILDDIR} tar -xvf "${LZMA_FILE}" +if [ $? -ne 0 ]; then + exit $? +fi + +cat > Makefile.test <> Makefile.test < +#include +#include +#include + +#include "../cpp-btree/btree_map.h" + +extern "C" { +uint32_t xd3_large32_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look); +uint32_t xd3_large32_cksum_update_old (xd3_hash_cfg *cfg, uint32_t cksum, + const uint8_t *base, const usize_t look); + +uint64_t xd3_large64_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look); +uint64_t xd3_large64_cksum_update_old (xd3_hash_cfg *cfg, uint64_t cksum, + const uint8_t *base, const usize_t look); +} + +using btree::btree_map; +using std::list; +using std::vector; + +// MLCG parameters +// a, a* +uint32_t good_32bit_values[] = { + 1597334677U, // ... + 741103597U, 887987685U, +}; + +// a, a* +uint64_t good_64bit_values[] = { + 1181783497276652981ULL, 4292484099903637661ULL, + 7664345821815920749ULL, // ... +}; + +void print_header() { + static int hdr_cnt = 0; + if (hdr_cnt++ % 20 == 0) { + printf("%-32sConf\t\tCount\tUniq\tFull\tCover\tColls" + "\tMB/s\tIters\t#Colls\n", "Name"); + } +} + +struct true_type { }; +struct false_type { }; + +template +usize_t bitsof(); + +template<> +usize_t bitsof() { + return sizeof(unsigned int) * 8; +} + +template<> +usize_t bitsof() { + return sizeof(unsigned long) * 8; +} + +template<> +usize_t bitsof() { + return sizeof(unsigned long long) * 8; +} + +template +struct hhash { // shift "s" bits leaving the high bits as a hash value for + // this checksum, which are the most "distant" in terms of the + // spectral test for the rabin_karp MLCG. For short windows, + // the high bits aren't enough, XOR "mask" worth of these in. + Word operator()(const Word t, const Word s, const Word mask) { + return (t >> s) ^ (t & mask); + } +}; + +template +Word good_word(); + +template<> +uint32_t good_word() { + return good_32bit_values[0]; +} + +template<> +uint64_t good_word() { + return good_64bit_values[0]; +} + +// CLASSES + +#define SELF Word, CksumSize, CksumSkip, Hash, Compaction +#define MEMBER template + +MEMBER +struct cksum_params { + typedef Word word_type; + typedef Hash hash_type; + + static const int cksum_size = CksumSize; + static const int cksum_skip = CksumSkip; + static const int compaction = Compaction; +}; + +MEMBER +struct rabin_karp : public cksum_params { + // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ... + rabin_karp() + : powers(make_powers()), + product(powers[0] * good_word()), + incr_state(0) { } + + static Word* make_powers() { + Word *p = new Word[CksumSize]; + p[CksumSize - 1] = 1; + for (int i = CksumSize - 2; i >= 0; i--) { + p[i] = p[i + 1] * good_word(); + } + return p; + } + + ~rabin_karp() { + delete [] powers; + } + + Word step(const uint8_t *ptr) { + Word h = 0; + for (int i = 0; i < CksumSize; i++) { + h += (ptr[i]) * powers[i]; + } + return h; + } + + Word state0(const uint8_t *ptr) { + incr_state = step(ptr); + return incr_state; + } + + Word incr(const uint8_t *ptr) { + incr_state = good_word() * incr_state - + product * (ptr[-1]) + (ptr[CksumSize - 1]); + return incr_state; + } + + const Word *const powers; + const Word product; + Word incr_state; +}; + +MEMBER +struct with_stream : public cksum_params { + xd3_stream stream; + + with_stream() + { + xd3_config cfg; + memset (&stream, 0, sizeof (stream)); + xd3_init_config (&cfg, 0); + cfg.smatch_cfg = XD3_SMATCH_SOFT; + cfg.smatcher_soft.large_look = CksumSize; + cfg.smatcher_soft.large_step = CksumSkip; + cfg.smatcher_soft.small_look = 4; + cfg.smatcher_soft.small_chain = 4; + cfg.smatcher_soft.small_lchain = 4; + cfg.smatcher_soft.max_lazy = 4; + cfg.smatcher_soft.long_enough = 4; + CHECK_EQ(0, xd3_config_stream (&stream, &cfg)); + + CHECK_EQ(0, xd3_size_hashtable (&stream, + 1<<10 /* ignored */, + stream.smatcher.large_look, + & stream.large_hash)); + } + ~with_stream() + { + xd3_free_stream (&stream); + } +}; + +MEMBER +struct large_cksum : public with_stream { + Word step(const uint8_t *ptr) { + return xd3_large_cksum (&this->stream.large_hash, ptr, CksumSize); + } + + Word state0(const uint8_t *ptr) { + incr_state = step(ptr); + return incr_state; + } + + Word incr(const uint8_t *ptr) { + incr_state = xd3_large_cksum_update (&this->stream.large_hash, + incr_state, ptr - 1, CksumSize); + return incr_state; + } + + Word incr_state; +}; + +#if SIZEOF_USIZE_T == 4 +#define xd3_large_cksum_old xd3_large32_cksum_old +#define xd3_large_cksum_update_old xd3_large32_cksum_update_old +#elif SIZEOF_USIZE_T == 8 +#define xd3_large_cksum_old xd3_large64_cksum_old +#define xd3_large_cksum_update_old xd3_large64_cksum_update_old +#endif + +MEMBER +struct large_cksum_old : public with_stream { + Word step(const uint8_t *ptr) { + return xd3_large_cksum_old (&this->stream.large_hash, ptr, CksumSize); + } + + Word state0(const uint8_t *ptr) { + incr_state = step(ptr); + return incr_state; + } + + Word incr(const uint8_t *ptr) { + incr_state = xd3_large_cksum_update_old (&this->stream.large_hash, + incr_state, ptr - 1, CksumSize); + return incr_state; + } + + Word incr_state; +}; + +// TESTS + +template +struct file_stats { + typedef const uint8_t* ptr_type; + typedef Word word_type; + typedef btree::btree_multimap table_type; + typedef typename table_type::iterator table_iterator; + + usize_t cksum_size; + usize_t cksum_skip; + usize_t unique; + usize_t unique_values; + usize_t count; + table_type table; + + file_stats(usize_t size, usize_t skip) + : cksum_size(size), + cksum_skip(skip), + unique(0), + unique_values(0), + count(0) { + } + + void reset() { + unique = 0; + unique_values = 0; + count = 0; + table.clear(); + } + + void update(word_type word, ptr_type ptr) { + table_iterator t_i = table.find(word); + + count++; + if (t_i != table.end()) { + int collisions = 0; + for (table_iterator p_i = t_i; + p_i != table.end() && p_i->first == word; + ++p_i) { + if (memcmp(p_i->second, ptr, cksum_size) == 0) { + return; + } + collisions++; + } + if (collisions >= 1000) { + fprintf(stderr, "Something is not right, lots of collisions=%d\n", + collisions); + abort(); + } + } else { + unique_values++; + } + unique++; + table.insert(std::make_pair(word, ptr)); + return; + } + + void freeze() { + table.clear(); + } +}; + +struct test_result_base; + +static vector all_tests; + +struct test_result_base { + virtual ~test_result_base() { + } + virtual void reset() = 0; + virtual void print() = 0; + virtual void get(const uint8_t* buf, const size_t buf_size, + usize_t iters) = 0; + virtual void stat() = 0; + virtual usize_t count() = 0; + virtual usize_t dups() = 0; + virtual double uniqueness() = 0; + virtual double fullness() = 0; + virtual double collisions() = 0; + virtual double coverage() = 0; + virtual double compression() = 0; + virtual double time() = 0; + virtual double total_time() = 0; + virtual usize_t total_count() = 0; + virtual usize_t total_dups() = 0; +}; + +template +struct test_result : public test_result_base { + Checksum cksum; + const char *test_name; + file_stats fstats; + usize_t test_size; + usize_t n_steps; + usize_t n_incrs; + typename Checksum::word_type s_bits; + typename Checksum::word_type s_mask; + usize_t t_entries; + usize_t h_bits; + usize_t h_buckets_full; + char *hash_table; + long accum_millis; + usize_t accum_iters; + + // These are not reset + double accum_time; + usize_t accum_count; + usize_t accum_dups; + usize_t accum_colls; + size_t accum_size; + + test_result(const char *name) + : test_name(name), + fstats(Checksum::cksum_size, Checksum::cksum_skip), + hash_table(NULL), + accum_millis(0), + accum_iters(0), + accum_time(0.0), + accum_count(0), + accum_dups(0), + accum_colls(0), + accum_size(0) { + all_tests.push_back(this); + } + + ~test_result() { + reset(); + } + + void reset() { + // size of file + test_size = 0; + + // count + n_steps = 0; + n_incrs = 0; + + // four values used by new_table()/summarize_table() + s_bits = 0; + s_mask = 0; + t_entries = 0; + h_bits = 0; + h_buckets_full = 0; + + accum_millis = 0; + accum_iters = 0; + + fstats.reset(); + + // temporary + if (hash_table) { + delete(hash_table); + hash_table = NULL; + } + } + + usize_t count() { + if (Checksum::cksum_skip == 1) { + return n_incrs; + } else { + return n_steps; + } + } + + usize_t dups() { + return fstats.count - fstats.unique; + } + + /* Fraction of distinct strings of length cksum_size which are not + * represented in the hash table. */ + double collisions() { + return (fstats.unique - fstats.unique_values) / (double) fstats.unique; + } + usize_t colls() { + return (fstats.unique - fstats.unique_values); + } + + double uniqueness() { + return 1.0 - (double) dups() / count(); + } + + double fullness() { + return (double) h_buckets_full / (1 << h_bits); + } + + double coverage() { + return (double) h_buckets_full / uniqueness() / count(); + } + + double compression() { + return 1.0 - coverage(); + } + + double time() { + return (double) accum_millis / accum_iters; + } + + double total_time() { + return accum_time; + } + + usize_t total_count() { + return accum_count; + } + + usize_t total_dups() { + return accum_dups; + } + + usize_t total_colls() { + return accum_dups; + } + + void stat() { + accum_time += time(); + accum_count += count(); + accum_dups += dups(); + accum_colls += colls(); + accum_size += test_size; + } + + void print() { + if (fstats.count != count()) { + fprintf(stderr, "internal error: %" W "d != %" W "d\n", fstats.count, count()); + abort(); + } + print_header(); + printf("%-32s%d/%d 2^%" W "u\t%" W "u\t%0.4f\t%.4f\t%.4f\t%.1e\t%.2f\t" + "%" W "u\t%" W "u\n", + test_name, + Checksum::cksum_size, + Checksum::cksum_skip, + h_bits, + count(), + uniqueness(), + fullness(), + coverage(), + collisions(), + 0.001 * accum_iters * test_size / accum_millis, + accum_iters, + colls()); + } + + usize_t size_log2 (usize_t slots) { + usize_t bits = bitsof() - 1; + usize_t i; + + for (i = 3; i <= bits; i += 1) { + if (slots <= (1U << i)) { + return i - Checksum::compaction; + } + } + + return bits; + } + + void new_table(usize_t entries) { + t_entries = entries; + h_bits = size_log2(entries); + + usize_t n = 1 << h_bits; + + s_bits = bitsof() - h_bits; + s_mask = n - 1U; + + hash_table = new char[n / 8]; + memset(hash_table, 0, n / 8); + } + + int get_table_bit(usize_t i) { + return hash_table[i/8] & (1 << i%8); + } + + int set_table_bit(usize_t i) { + return hash_table[i/8] |= (1 << i%8); + } + + void summarize_table() { + usize_t n = 1 << h_bits; + usize_t f = 0; + for (usize_t i = 0; i < n; i++) { + if (get_table_bit(i)) { + f++; + } + } + h_buckets_full = f; + } + + void get(const uint8_t* buf, const size_t buf_size, usize_t test_iters) { + typename Checksum::hash_type hash; + const uint8_t *ptr; + const uint8_t *end; + usize_t periods; + int64_t last_offset; + int64_t stop; + + test_size = buf_size; + last_offset = buf_size - Checksum::cksum_size; + + if (last_offset < 0) { + periods = 0; + n_steps = 0; + n_incrs = 0; + stop = -Checksum::cksum_size; + } else { + periods = last_offset / Checksum::cksum_skip; + n_steps = periods + 1; + n_incrs = last_offset + 1; + stop = last_offset - (periods + 1) * Checksum::cksum_skip; + } + + // Compute file stats once. + if (fstats.unique_values == 0) { + if (Checksum::cksum_skip == 1) { + for (size_t i = 0; i <= buf_size - Checksum::cksum_size; i++) { + fstats.update(hash(cksum.step(buf + i), s_bits, s_mask), buf + i); + } + } else { + ptr = buf + last_offset; + end = buf + stop; + + for (; ptr != end; ptr -= Checksum::cksum_skip) { + fstats.update(hash(cksum.step(ptr), s_bits, s_mask), ptr); + } + } + fstats.freeze(); + } + + long start_test = get_millisecs_now(); + + if (Checksum::cksum_skip != 1) { + new_table(n_steps); + + for (usize_t i = 0; i < test_iters; i++) { + ptr = buf + last_offset; + end = buf + stop; + + for (; ptr != end; ptr -= Checksum::cksum_skip) { + set_table_bit(hash(cksum.step(ptr), s_bits, s_mask)); + } + } + + summarize_table(); + } + + stop = buf_size - Checksum::cksum_size + 1; + if (stop < 0) { + stop = 0; + } + + if (Checksum::cksum_skip == 1) { + new_table(n_incrs); + + for (usize_t i = 0; i < test_iters; i++) { + ptr = buf; + end = buf + stop; + + if (ptr != end) { + set_table_bit(hash(cksum.state0(ptr++), s_bits, s_mask)); + } + + for (; ptr != end; ptr++) { + typename Checksum::word_type w = cksum.incr(ptr); + CHECK_EQ(w, cksum.step(ptr)); + set_table_bit(hash(w, s_bits, s_mask)); + } + } + + summarize_table(); + } + + accum_iters += test_iters; + accum_millis += get_millisecs_now() - start_test; + } +}; + +static int read_whole_file(const char *name, + uint8_t **buf_ptr, + size_t *buf_len) { + main_file file; + int ret; + xoff_t len; + size_t nread; + main_file_init(&file); + file.filename = name; + ret = main_file_open(&file, name, XO_READ); + if (ret != 0) { + fprintf(stderr, "open failed\n"); + goto exit; + } + ret = main_file_stat(&file, &len); + if (ret != 0) { + fprintf(stderr, "stat failed\n"); + goto exit; + } + + (*buf_len) = (size_t)len; + (*buf_ptr) = (uint8_t*) main_malloc(*buf_len); + ret = main_file_read(&file, *buf_ptr, *buf_len, &nread, + "read failed"); + if (ret == 0 && *buf_len == nread) { + ret = 0; + } else { + fprintf(stderr, "invalid read\n"); + ret = XD3_INTERNAL; + } + exit: + main_file_cleanup(&file); + return ret; +} + +int main(int argc, char** argv) { + int i; + uint8_t *buf = NULL; + size_t buf_len = 0; + int ret; + + if (argc <= 1) { + fprintf(stderr, "usage: %s file ...\n", argv[0]); + return 1; + } + +// TODO: The xdelta3-hash.h code is identical now; add sameness test. +// using rabin_karp<> template. +#define TEST(T,Z,S,C) \ + test_result,C>> \ + _xck_ ## T ## _ ## Z ## _ ## S ## _ ## C \ + ("xck_" #T "_" #Z "_" #S "_" #C); \ + test_result,C>> \ + _old_ ## T ## _ ## Z ## _ ## S ## _ ## C \ + ("old_" #T "_" #Z "_" #S "_" #C) + +#define TESTS(SIZE, SKIP) \ + TEST(usize_t, SIZE, SKIP, 1); \ + TEST(usize_t, SIZE, SKIP, 2) + + TESTS(5, 1); + TESTS(6, 1); + TESTS(7, 1); + TESTS(8, 1); + TESTS(9, 1); + TESTS(10, 1); + TESTS(11, 1); + TESTS(12, 1); + TESTS(13, 1); + TESTS(14, 1); + TESTS(15, 1); + TESTS(16, 1); + TESTS(17, 1); + TESTS(18, 1); + TESTS(19, 1); + TESTS(20, 1); + TESTS(21, 1); + TESTS(22, 1); + TESTS(23, 1); + TESTS(24, 1); + TESTS(25, 1); + TESTS(26, 1); + TESTS(27, 1); + TESTS(28, 1); + TESTS(29, 1); + TESTS(30, 1); + TESTS(31, 1); + TESTS(32, 1); + TESTS(33, 1); + TESTS(34, 1); + TESTS(35, 1); + TESTS(36, 1); + TESTS(37, 1); + TESTS(38, 1); + TESTS(39, 1); + + + for (i = 1; i < argc; i++) { + if ((ret = read_whole_file(argv[i], + & buf, + & buf_len))) { + return 1; + } + + fprintf(stderr, "file %s is %zu bytes\n", + argv[i], buf_len); + + double min_time = -1.0; + double min_compression = 0.0; + + for (vector::iterator iter = all_tests.begin(); + iter != all_tests.end(); ++iter) { + test_result_base *test = *iter; + test->reset(); + + usize_t iters = 1; + long start_test = get_millisecs_now(); + + do { + test->get(buf, buf_len, iters); + iters *= 3; + iters /= 2; + } while (get_millisecs_now() - start_test < 2000); + + test->stat(); + + if (min_time < 0.0) { + min_compression = test->compression(); + min_time = test->time(); + } + + if (min_time > test->time()) { + min_time = test->time(); + } + + if (min_compression > test->compression()) { + min_compression = test->compression(); + } + + test->print(); + } + + main_free(buf); + buf = NULL; + } + + return 0; +} diff --git a/lib/xdelta3/testing/checksum_test_c.c b/lib/xdelta3/testing/checksum_test_c.c new file mode 100644 index 0000000..7b2ab44 --- /dev/null +++ b/lib/xdelta3/testing/checksum_test_c.c @@ -0,0 +1,189 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#include "../xdelta3.c" + +// OLD CHECKSUM CODE + +#define PERMUTE32(x) (__single_hash32[x]) +#define PERMUTE64(x) (__single_hash64[x]) + +const uint16_t __single_hash32[256] = +{ + /* This hashes the input alphabet (Scheme SLIB pseudo-random). */ + 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46, + 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602, + 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6, + 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a, + 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58, + 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b, + 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94, + 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176, + 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743, + 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6, + 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227, + 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d, + 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a, + 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31, + 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9, + 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6, + 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f, + 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f, + 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49, + 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037, + 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad, + 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f, + 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d, + 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b, + 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624, + 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272, + 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806, + 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050, + 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113, + 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2, + 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25, + 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5 +}; + +const uint32_t __single_hash64[256] = +{ + /* http://random.org 2014.10.24 */ + 0xd25e9f0a, 0xb1af9d5e, 0xb753dfa2, 0x157050f7, /* 0 */ + 0xc84b072c, 0xdd14fe7c, 0xf92208c3, 0xdf08a0c0, + 0x63a5c118, 0x76f5d90f, 0xa2f8b93e, 0xb6c12d22, + 0xaf074957, 0x966fb7d9, 0x62f7b785, 0xb40e8a09, + 0x0a811d5d, 0x323a6daa, 0xb62f7c5b, 0xfdcb9a53, + 0xf25a9067, 0x4506bc7a, 0xff58a74b, 0x5ae62817, + 0x74097675, 0x722c0fd9, 0x116a2a66, 0x65f76728, + 0x72c79651, 0xe043cf9d, 0x64b867c7, 0x6604834f, + 0xcdca58a6, 0x0f164e2d, 0x24515f05, 0x632cdbf8, + 0x18091d4a, 0x3eff4128, 0x673d1c33, 0xd8e10c71, + 0x1a3edf11, 0xba52892f, 0xa56949e0, 0xf3e1dd77, /* 10 */ + 0x86fcbe3e, 0x138d66d0, 0x4fc98359, 0xc22e5dd6, + 0xc59f2267, 0x6c6dd739, 0xe03da190, 0x07e8469c, + 0xadcfb02c, 0x00d3b0d9, 0xa1f44918, 0x8bd84d87, + 0x08ec9ec1, 0xbbcd156f, 0xb57718e3, 0x3177e752, + 0xf52a4d70, 0xde7aaad9, 0x075f1da0, 0x21ba00c6, + 0xb9469a5c, 0xcf08d5ba, 0x91ac9edc, 0xc6167b63, + 0xc1974919, 0xc8c8d195, 0x4b1996dd, 0xeff8991c, + 0xf7f66c6b, 0x25b012e2, 0x59d12a98, 0xea40d3cc, + 0x41f9970b, 0xec48101a, 0xa3bdcf90, 0x99f16905, + 0x27af6c97, 0xc849af37, 0x49cad89b, 0xf48c2278, /* 20 */ + 0x5529c3d8, 0x9e7d6dce, 0x16feb52d, 0xf1b0aca1, + 0xaf28fccb, 0x48e4ce3c, 0xc4436617, 0x64524e3e, + 0x61806681, 0x6384f2d7, 0x1172880f, 0x34a5ef5f, + 0xcc8cc0a8, 0x66e8f100, 0x2866085f, 0xba9b1b2d, + 0x51285949, 0x2be4b574, 0x889b1ef5, 0x3dbe920d, + 0x9277a62f, 0x0584a9f6, 0x085d8fc4, 0x4b5d403d, + 0x4e46ca78, 0x3294c2f9, 0x29313e70, 0xe4f09b24, + 0xe73b331c, 0x072f5552, 0x2e390b78, 0xea0021ca, + 0xd8f40320, 0xed0e16fd, 0x7de9cf7a, 0xf17e3d6c, + 0x8df1bd85, 0x052cae67, 0x3486e512, 0x3a1c09b8, /* 30 */ + 0x6c2a7b4e, 0x83455753, 0xbc0353ac, 0x0ffe20b6, + 0x5fdcef85, 0x010f506c, 0x595ce972, 0xe28680d0, + 0xa7e216b2, 0xa392ee0f, 0x25b73faa, 0x2b1f4983, + 0xeeaefe98, 0x1d3d9cbc, 0x6aebe97b, 0x8b7b3584, + 0x9e6a9a07, 0xd37f1e99, 0x4ac2a441, 0x8ae9a213, + 0x7d0e27d7, 0x5de54b9a, 0x8621de1f, 0xf0f2f866, + 0xcb08d275, 0x49c3f87e, 0xd5ee68c1, 0x9802fc77, + 0x68be6c5e, 0x65aa8c27, 0xf423d5f7, 0x10ec5502, + 0x9909bce1, 0x509cdf1b, 0x338fea72, 0x2733e9bf, + 0xf92f4fd7, 0x87738ea2, 0x931a8bbc, 0x0a5c9155, /* 40 */ + 0xbe5edd9b, 0xadbf5838, 0x0338f8d2, 0x290da210, + 0x390c37d8, 0xe7cffae8, 0x20617ebe, 0x464322dd, + 0x7b3c4e78, 0xac142dcb, 0x2d5cef76, 0xd8fe49fc, + 0x60f4e9a9, 0x7473816f, 0x0dc35f39, 0x5eed80c1, + 0x0cb55ab6, 0x1d3ac541, 0x13c7f529, 0x7bffdf4a, + 0xe334785b, 0x85263ec1, 0xd132ae56, 0x7c868b9e, + 0x47f60638, 0x1012b979, 0x81c31dd3, 0x1af868c8, + 0x0c5d0742, 0xd1b3e1a2, 0x5873200a, 0xf848465c, + 0x0fc4d596, 0x609c18af, 0xc9f5a480, 0xd1a94a84, + 0xa1431a3f, 0x7de8bb1a, 0x25f1256b, 0x1dcc732c, /* 50 */ + 0x6aa1549a, 0xa2367281, 0x32f2a77e, 0x82e62a0f, + 0x045cbb56, 0x74b2027c, 0xd71a32d9, 0x022e7cb5, + 0xe99be177, 0x60222fdf, 0xd69681ca, 0x9008ee2c, + 0x32923db4, 0xcf82bf97, 0x38960a5b, 0xb3503d5b, + 0x9bd4c7f2, 0x33c029c8, 0x1ef504a3, 0xdb249d3b, + 0x91e89676, 0x4ca43b36, 0x9191433c, 0x465d5dc4, + 0xf4dcb118, 0x9d11dd00, 0xb592f058, 0xdbe5ce30, + 0x74790d92, 0x779850a8, 0x7180d25b, 0xfa951d99, + 0x5990935a, 0x921cb022, 0x3b7c39bc, 0x6a38a7c7, + 0xdc22703b, 0x142bab3b, 0x4e3d9479, 0x44bb8482, /* 60 */ + 0x8043abce, 0xfebe832a, 0x8e6a2f98, 0x4d43c4fe, + 0xd192a70a, 0x802f3c3a, 0x5d11bbab, 0x2665d241, + 0xb3f3a680, 0x3a8d223f, 0xcf82cdb4, 0x4ed28743, +}; + +uint64_t +xd3_large64_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look) +{ + static const uint64_t kBits = 32; + static const uint64_t kMask = 0xffffffff; + usize_t i = 0; + uint64_t low = 0; + uint64_t high = 0; + + for (; i < look; i += 1) + { + low += PERMUTE64(*base++); + high += low; + } + + return ((high & kMask) << kBits) | (low & kMask); +} + +uint64_t +xd3_large64_cksum_update_old (xd3_hash_cfg *ignore, const uint64_t cksum, + const uint8_t *base, const usize_t look) +{ + static const uint64_t kBits = 32; + static const uint64_t kMask = 0xffffffff; + uint64_t old_c = PERMUTE64(base[0]); + uint64_t new_c = PERMUTE64(base[look]); + uint64_t low = ((cksum & kMask) - old_c + new_c) & kMask; + uint64_t high = ((cksum >> kBits) - (old_c * look) + low) & kMask; + return (high << kBits) | low; +} + +uint32_t +xd3_large32_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look) +{ + static const uint32_t kBits = 16; + static const uint32_t kMask = 0xffff; + usize_t i = 0; + uint32_t low = 0; + uint32_t high = 0; + + for (; i < look; i += 1) + { + low += PERMUTE32(*base++); + high += low; + } + + return ((high & kMask) << kBits) | (low & kMask); +} + +uint32_t +xd3_large32_cksum_update_old (xd3_hash_cfg *ignore, const uint32_t cksum, + const uint8_t *base, const usize_t look) +{ + static const uint32_t kBits = 16; + static const uint32_t kMask = 0xffff; + uint32_t old_c = PERMUTE32(base[0]); + uint32_t new_c = PERMUTE32(base[look]); + uint32_t low = ((cksum & kMask) - old_c + new_c) & kMask; + uint32_t high = ((cksum >> kBits) - (old_c * look) + low) & kMask; + return (high << kBits) | low; +} diff --git a/lib/xdelta3/testing/cmp.h b/lib/xdelta3/testing/cmp.h new file mode 100644 index 0000000..60748cb --- /dev/null +++ b/lib/xdelta3/testing/cmp.h @@ -0,0 +1,67 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +static size_t CmpDifferentBlockBytes(const Block &a, const Block &b) { + size_t total = 0; + size_t i = 0; + size_t m = min(a.Size(), b.Size()); + + for (; i < m; i++) { + if (a[i] != b[i]) { + total++; + } + } + + total += a.Size() - i; + total += b.Size() - i; + + return total; +} + +static xoff_t CmpDifferentBytes(const FileSpec &a, const FileSpec &b) { + Block block_a, block_b; + xoff_t total = 0; + typename FileSpec::iterator a_i(a), b_i(b); + + for (; !a_i.Done() && !b_i.Done(); a_i.Next(), b_i.Next()) { + + a_i.Get(&block_a); + b_i.Get(&block_b); + + total += CmpDifferentBlockBytes(block_a, block_b); + } + + for (; !a_i.Done(); a_i.Next()) { + total += a_i.BytesOnBlock(); + } + for (; !b_i.Done(); b_i.Next()) { + total += b_i.BytesOnBlock(); + } + + return total; +} + +static size_t CmpDifferentBlockBytesAtOffset(const Block &a, + const FileSpec &b_spec, + xoff_t offset) { + Block b; + size_t size = a.Size(); + CHECK_LE(offset, b_spec.Size()); + if (b_spec.Size() < offset + size) { + size = b_spec.Size() - offset; + } + b_spec.Get(&b, offset, size); + return CmpDifferentBlockBytes(a, b); +} diff --git a/lib/xdelta3/testing/delta.h b/lib/xdelta3/testing/delta.h new file mode 100644 index 0000000..bd38c6c --- /dev/null +++ b/lib/xdelta3/testing/delta.h @@ -0,0 +1,87 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +class Delta { +public: + Delta(const Block &block) { + int ret; + xd3_config config; + memset(&stream_, 0, sizeof (stream_)); + memset(&config, 0, sizeof (config)); + + xd3_init_config(&config, XD3_SKIP_EMIT | XD3_ADLER32_NOVER); + + CHECK_EQ(0, xd3_config_stream (&stream_, &config)); + + xd3_avail_input (&stream_, block.Data(), block.Size()); + + bool done = false; + while (!done) { + ret = xd3_decode_input(&stream_); + + switch (ret) { + case XD3_INPUT: + done = true; + break; + case XD3_OUTPUT: + CHECK_EQ(0, xd3_whole_append_window (&stream_)); + break; + case XD3_GOTHEADER: + case XD3_WINSTART: + case XD3_WINFINISH: + break; + default: + cerr << "decode: " << done; + abort(); + } + } + } + + ~Delta() { + xd3_free_stream(&stream_); + } + + xoff_t AddedBytes() const { + return stream_.whole_target.addslen; + } + + xoff_t Windows() const { + return stream_.whole_target.wininfolen; + } + +// Note: This does not benefit from -Wformat= checking, due to the +// enclosing template. Further, it was not used. +// void Print() const { +// for (size_t i = 0; i < stream_.whole_target.instlen; i++) { +// xd3_winst &winst = stream_.whole_target.inst[i]; +// switch (winst.type) { +// case XD3_RUN: +// DP(RINT, "%" Q "u run %" W "u\n", winst.position, winst.size); +// break; +// case XD3_ADD: +// DP(RINT "%" Q "u add %" W "u\n", winst.position, winst.size); +// break; +// default: +// DP(RINT "%" Q "u copy %" W "u @ %" Q "u (mode %u)\n", +// winst.position, winst.size, winst.addr, winst.mode); +// break; +// } +// } +// } + +private: + xd3_stream stream_; +}; diff --git a/lib/xdelta3/testing/file.h b/lib/xdelta3/testing/file.h new file mode 100644 index 0000000..d1828cf --- /dev/null +++ b/lib/xdelta3/testing/file.h @@ -0,0 +1,399 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +class Block; +class BlockIterator; +class TmpFile; + +class Block { +public: + Block() + : data_(NULL), + data_size_(0), + size_(0) { } + + ~Block() { + if (data_) { + delete [] data_; + } + } + + size_t Size() const { + return size_; + } + + uint8_t operator[](size_t i) const { + CHECK_LT(i, size_); + return data_[i]; + } + + uint8_t* Data() const { + if (data_ == NULL) { + CHECK_EQ(0, size_); + data_size_ = 1; + data_ = new uint8_t[1]; + } + return data_; + } + + // For writing to blocks + void Append(const uint8_t *data, size_t size) { + if (data_ == NULL) { + CHECK_EQ(0, size_); + CHECK_EQ(0, data_size_); + data_ = new uint8_t[Constants::BLOCK_SIZE]; + data_size_ = Constants::BLOCK_SIZE; + } + + if (size_ + size > data_size_) { + uint8_t *tmp = data_; + while (size_ + size > data_size_) { + data_size_ *= 2; + } + data_ = new uint8_t[data_size_]; + memcpy(data_, tmp, size_); + delete [] tmp; + } + + memcpy(data_ + size_, data, size); + size_ += size; + } + + // For cleaing a block + void Reset() { + size_ = 0; + } + + // Note: This does not benefit from -Wformat= checking, due to the + // enclosing template. Further, it was not used. + // void Print() const { + // xoff_t pos = 0; + // for (size_t i = 0; i < Size(); i++) { + // if (pos % 16 == 0) { + // DP(RINT "%5" Q "x: ", pos); + // } + // DP(RINT "%02x ", (*this)[i]); + // if (pos % 16 == 15) { + // DP(RINT "\n"); + // } + // pos++; + // } + // DP(RINT "\n"); + // } + + void WriteTmpFile(TmpFile *f) const { + f->Append(this); + } + + void SetSize(size_t size) { + uint8_t *t = NULL; + if (data_size_ < size) { + if (data_) { + t = data_; + } + data_ = new uint8_t[size]; + data_size_ = size; + } + if (t && size < size_) { + memcpy(data_, t, size); + } + delete [] t; + size_ = size; + } + +private: + friend class BlockIterator; + + mutable uint8_t *data_; + mutable size_t data_size_; + size_t size_; +}; + +class FileSpec { + public: + FileSpec(MTRandom *rand) + : rand_(rand) { + } + + // Generates a file with a known size + void GenerateFixedSize(xoff_t size) { + Reset(); + + for (xoff_t p = 0; p < size; ) { + xoff_t t = min(Constants::BLOCK_SIZE, size - p); + table_.insert(make_pair(p, Segment(t, rand_))); + p += t; + } + } + + // Generates a file with exponential-random distributed size + void GenerateRandomSize(xoff_t mean) { + GenerateFixedSize(rand_->ExpRand(mean)); + } + + // Returns the size of the file + xoff_t Size() const { + if (table_.empty()) { + return 0; + } + ConstSegmentMapIterator i = --table_.end(); + return i->first + i->second.Size(); + } + + // Returns the number of blocks + xoff_t Blocks(size_t blksize = Constants::BLOCK_SIZE) const { + if (table_.empty()) { + return 0; + } + return ((Size() - 1) / blksize) + 1; + } + + // Returns the number of segments + xoff_t Segments() const { + return table_.size(); + } + + // Create a mutation according to "what". + void ModifyTo(const Mutator &mutator, + FileSpec *modify) const { + modify->Reset(); + mutator.Mutate(&modify->table_, &table_, rand_); + modify->CheckSegments(); + } + + void CheckSegments() const { + for (ConstSegmentMapIterator iter(table_.begin()); + iter != table_.end(); ) { + ConstSegmentMapIterator iter0(iter++); + if (iter == table_.end()) { + break; + } + CHECK_EQ(iter0->first + iter0->second.Size(), iter->first); + } + } + + void Reset() { + table_.clear(); + } + + void Print() const { + for (ConstSegmentMapIterator iter(table_.begin()); + iter != table_.end(); + ++iter) { + const Segment &seg = iter->second; + cerr << "Segment at " << iter->first + << " (" << seg.ToString() << ")" << endl; + } + } + + void PrintData() const { + Block block; + for (BlockIterator iter(*this); !iter.Done(); iter.Next()) { + iter.Get(&block); + block.Print(); + } + } + + void WriteTmpFile(TmpFile *f) const { + Block block; + for (BlockIterator iter(*this); !iter.Done(); iter.Next()) { + iter.Get(&block); + f->Append(&block); + } + } + + void Get(Block *block, xoff_t offset, size_t size) const { + size_t got = 0; + block->SetSize(size); + + ConstSegmentMapIterator pos = table_.upper_bound(offset); + if (pos == table_.begin()) { + CHECK_EQ(0, Size()); + return; + } + --pos; + + while (got < size) { + CHECK(pos != table_.end()); + CHECK_GE(offset, pos->first); + + const Segment &seg = pos->second; + + // The position of this segment may start before this block starts, + // and then the position of the data may be offset from the seeding + // position. + size_t seg_offset = offset - pos->first; + size_t advance = min(seg.Size() - seg_offset, + size - got); + + seg.Fill(seg_offset, advance, block->Data() + got); + + got += advance; + offset += advance; + ++pos; + } + } + + typedef BlockIterator iterator; + + private: + friend class BlockIterator; + + MTRandom *rand_; + SegmentMap table_; +}; + +class BlockIterator { +public: + explicit BlockIterator(const FileSpec& spec) + : spec_(spec), + blkno_(0), + blksize_(Constants::BLOCK_SIZE) { } + + BlockIterator(const FileSpec& spec, + size_t blksize) + : spec_(spec), + blkno_(0), + blksize_(blksize) { } + + bool Done() const { + return blkno_ >= spec_.Blocks(blksize_); + } + + void Next() { + blkno_++; + } + + xoff_t Blkno() const { + return blkno_; + } + + xoff_t Blocks() const { + return spec_.Blocks(blksize_); + } + + xoff_t Offset() const { + return blkno_ * blksize_; + } + + void SetBlock(xoff_t blkno) { + CHECK_LE(blkno, Blocks()); + blkno_ = blkno; + } + + void Get(Block *block) const { + spec_.Get(block, blkno_ * blksize_, BytesOnBlock()); + } + + size_t BytesOnBlock() const { + xoff_t blocks = spec_.Blocks(blksize_); + xoff_t size = spec_.Size(); + + DCHECK((blkno_ < blocks) || + (blkno_ == blocks && size % blksize_ == 0)); + + if (blkno_ == blocks) { + return 0; + } + if (blkno_ + 1 == blocks) { + return ((size - 1) % blksize_) + 1; + } + return blksize_; + } + + size_t BlockSize() const { + return blksize_; + } + +private: + const FileSpec& spec_; + xoff_t blkno_; + size_t blksize_; +}; + +class ExtFile { +public: + ExtFile() { + static int static_counter = 0; + pid_t pid = getpid(); + char buf[64]; + xoff_t xpid = pid; + snprintf(buf, 64, "/tmp/regtest.%" Q "u.%d", xpid, static_counter++); + filename_.append(buf); + unlink(filename_.c_str()); + } + + ~ExtFile() { + unlink(filename_.c_str()); + } + + const char* Name() const { + return filename_.c_str(); + } + + // Check whether a real file matches a file spec. + bool EqualsSpec(const FileSpec &spec) const { + main_file t; + main_file_init(&t); + CHECK_EQ(0, main_file_open(&t, Name(), XO_READ)); + + Block tblock; + Block sblock; + for (BlockIterator iter(spec); !iter.Done(); iter.Next()) { + iter.Get(&sblock); + tblock.SetSize(sblock.Size()); + size_t tread; + CHECK_EQ(0, main_file_read(&t, + tblock.Data(), + tblock.Size(), &tread, "read failed")); + CHECK_EQ(0, CmpDifferentBlockBytes(tblock, sblock)); + } + + CHECK_EQ(0, main_file_close(&t)); + main_file_cleanup(&t); + return true; + } + +protected: + string filename_; +}; + +class TmpFile : public ExtFile { +public: + TmpFile() { + main_file_init(&file_); + CHECK_EQ(0, main_file_open(&file_, Name(), XO_WRITE)); + } + + ~TmpFile() { + main_file_cleanup(&file_); + } + + void Append(const Block *block) { + CHECK_EQ(0, main_file_write(&file_, + block->Data(), block->Size(), + "tmpfile write failed")); + } + + const char* Name() const { + if (main_file_isopen(&file_)) { + CHECK_EQ(0, main_file_close(&file_)); + } + return ExtFile::Name(); + } + +private: + mutable main_file file_; +}; diff --git a/lib/xdelta3/testing/modify.h b/lib/xdelta3/testing/modify.h new file mode 100644 index 0000000..6590ccd --- /dev/null +++ b/lib/xdelta3/testing/modify.h @@ -0,0 +1,400 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +class Mutator { +public: + virtual ~Mutator() { } + virtual void Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const = 0; +}; + +class Change { +public: + enum Kind { + MODIFY = 1, // Mutate a certain range w/ random or supplied data + ADD = 2, // Insert random or supplied data + DELRANGE = 3, // Delete a specified range of data + COPY = 4, // Copy from one region, inserting elsewhere + MOVE = 5, // Copy then delete copied-from range + COPYOVER = 6 // Copy then delete copied-to range + + // ADD, DELRANGE, and COPY change the file size + // MODIFY, MOVE, COPYOVER preserve the file size + }; + + // Constructor for modify, add, delete. + Change(Kind kind0, xoff_t size0, xoff_t addr1_0) + : kind(kind0), + size(size0), + addr1(addr1_0), + addr2(0), + insert(NULL) { + CHECK(kind != MOVE && kind != COPY && kind != COPYOVER); + } + + // Constructor for modify, add w/ provided data. + Change(Kind kind0, xoff_t size0, xoff_t addr1_0, Segment *insert0) + : kind(kind0), + size(size0), + addr1(addr1_0), + addr2(0), + insert(insert0) { + CHECK(kind != MOVE && kind != COPY && kind != COPYOVER); + } + + // Constructor for move, copy, overwrite + Change(Kind kind0, xoff_t size0, xoff_t addr1_0, xoff_t addr2_0) + : kind(kind0), + size(size0), + addr1(addr1_0), + addr2(addr2_0), + insert(NULL) { + CHECK(kind == MOVE || kind == COPY || kind == COPYOVER); + } + + Kind kind; + xoff_t size; + xoff_t addr1; + xoff_t addr2; + Segment *insert; // For modify and/or add +}; + +typedef list ChangeList; +typedef typename ChangeList::const_iterator ConstChangeListIterator; +typedef typename ChangeList::iterator ChangeListIterator; + +class ChangeListMutator : public Mutator { +public: + ChangeListMutator(const ChangeList &cl) + : cl_(cl) { } + + ChangeListMutator() { } + + void Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const { + // The speed of processing gigabytes of data is so slow compared with + // these table-copy operations, no attempt to make this fast. + SegmentMap tmp; + + for (ConstChangeListIterator iter(cl_.begin()); + iter != cl_.end(); ++iter) { + const Change &ch = *iter; + tmp.clear(); + Mutate(ch, &tmp, source_table, rand); + tmp.swap(*table); + source_table = table; + } + } + + static void Mutate(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + switch (ch.kind) { + case Change::ADD: + AddChange(ch, table, source_table, rand); + break; + case Change::MODIFY: + ModifyChange(ch, table, source_table, rand); + break; + case Change::DELRANGE: + DeleteChange(ch, table, source_table, rand); + break; + case Change::COPY: + CopyChange(ch, table, source_table, rand); + break; + case Change::MOVE: + MoveChange(ch, table, source_table, rand); + break; + case Change::COPYOVER: + OverwriteChange(ch, table, source_table, rand); + break; + } + } + + static void ModifyChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + xoff_t m_start = ch.addr1; + xoff_t m_end = m_start + ch.size; + xoff_t i_start = 0; + xoff_t i_end = 0; + + for (ConstSegmentMapIterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start || i_start >= m_end) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + // Insert the entire segment, even though it may extend into later + // segments. This condition avoids inserting it during later + // segments. + if (m_start >= i_start) { + if (ch.insert != NULL) { + table->insert(table->end(), make_pair(m_start, *ch.insert)); + } else { + Segment part(m_end - m_start, rand); + table->insert(table->end(), make_pair(m_start, part)); + } + } + + if (i_end > m_end) { + table->insert(table->end(), + make_pair(m_end, + seg.Subseg(m_end - i_start, i_end - m_end))); + } + } + + // This check verifies that the modify does not extend past the + // source_table EOF. + CHECK_LE(m_end, i_end); + } + + static void AddChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + xoff_t m_start = ch.addr1; + xoff_t i_start = 0; + xoff_t i_end = 0; + + for (ConstSegmentMapIterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start > m_start) { + table->insert(table->end(), make_pair(i_start + ch.size, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + if (ch.insert != NULL) { + table->insert(table->end(), make_pair(m_start, *ch.insert)); + } else { + Segment addseg(ch.size, rand); + table->insert(table->end(), make_pair(m_start, addseg)); + } + + if (m_start < i_end) { + table->insert(table->end(), + make_pair(m_start + ch.size, + seg.Subseg(m_start - i_start, + i_end - m_start))); + } + } + + CHECK_LE(m_start, i_end); + + // Special case for add at end-of-input. + if (m_start == i_end) { + Segment addseg(ch.size, rand); + table->insert(table->end(), make_pair(m_start, addseg)); + } + } + + static void DeleteChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + xoff_t m_start = ch.addr1; + xoff_t m_end = m_start + ch.size; + xoff_t i_start = 0; + xoff_t i_end = 0; + + for (ConstSegmentMapIterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start >= m_end) { + table->insert(table->end(), make_pair(i_start - ch.size, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + if (i_end > m_end) { + table->insert(table->end(), + make_pair(m_end - ch.size, + seg.Subseg(m_end - i_start, i_end - m_end))); + } + } + + CHECK_LT(m_start, i_end); + CHECK_LE(m_end, i_end); + } + + // A move is a copy followed by delete of the copied-from range. + static void MoveChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + SegmentMap tmp; + CHECK_NE(ch.addr1, ch.addr2); + CopyChange(ch, &tmp, source_table, rand); + Change d(Change::DELRANGE, ch.size, + ch.addr1 < ch.addr2 ? ch.addr1 : ch.addr1 + ch.size); + DeleteChange(d, table, &tmp, rand); + } + + // An overwrite is a copy followed by a delete of the copied-to range. + static void OverwriteChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + SegmentMap tmp; + CHECK_NE(ch.addr1, ch.addr2); + CopyChange(ch, &tmp, source_table, rand); + Change d(Change::DELRANGE, ch.size, ch.addr2 + ch.size); + DeleteChange(d, table, &tmp, rand); + } + + static void CopyChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *ignore) { + xoff_t m_start = ch.addr2; + xoff_t c_start = ch.addr1; + xoff_t i_start = 0; + xoff_t i_end = 0; + + // Like AddChange() with AppendCopy instead of a random segment. + for (ConstSegmentMapIterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start > m_start) { + table->insert(table->end(), make_pair(i_start + ch.size, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + AppendCopy(table, source_table, c_start, m_start, ch.size); + + if (m_start < i_end) { + table->insert(table->end(), + make_pair(m_start + ch.size, + seg.Subseg(m_start - i_start, i_end - m_start))); + } + } + + CHECK_LE(m_start, i_end); + + // Special case for copy to end-of-input. + if (m_start == i_end) { + AppendCopy(table, source_table, c_start, m_start, ch.size); + } + } + + static void AppendCopy(SegmentMap *table, + const SegmentMap *source_table, + xoff_t copy_offset, + xoff_t append_offset, + xoff_t length) { + ConstSegmentMapIterator pos(source_table->upper_bound(copy_offset)); + --pos; + xoff_t got = 0; + + while (got < length) { + size_t seg_offset = copy_offset - pos->first; + size_t advance = min(pos->second.Size() - seg_offset, + (size_t)(length - got)); + + table->insert(table->end(), + make_pair(append_offset, + pos->second.Subseg(seg_offset, + advance))); + + got += advance; + copy_offset += advance; + append_offset += advance; + ++pos; + } + } + + ChangeList* Changes() { + return &cl_; + } + + const ChangeList* Changes() const { + return &cl_; + } + +private: + ChangeList cl_; +}; + +class Modify1stByte : public Mutator { +public: + void Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const { + ChangeListMutator::Mutate(Change(Change::MODIFY, 1, 0), + table, source_table, rand); + } +}; diff --git a/lib/xdelta3/testing/random.h b/lib/xdelta3/testing/random.h new file mode 100644 index 0000000..6087f16 --- /dev/null +++ b/lib/xdelta3/testing/random.h @@ -0,0 +1,157 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#undef MT_LEN +#undef MT_IA +class MTRandom { + public: + enum Constants { + MT_LEN = 624, + MT_IA = 397 + }; + + static const uint32_t TEST_SEED1; + static const uint32_t UPPER_MASK; + static const uint32_t LOWER_MASK; + static const uint32_t MATRIX_A; + + MTRandom() { + Init(TEST_SEED1); + } + + explicit MTRandom(uint32_t seed) { + Init(seed); + } + + /* This Mersenne Twister code is attributed to Michael Brundage. Thanks! + * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html + */ + uint32_t Rand32 () { + uint32_t y; + static unsigned long mag01[2] = { + 0 , MATRIX_A + }; + + if (mt_index_ >= MT_LEN) { + int kk; + + for (kk = 0; kk < MT_LEN - MT_IA; kk++) { + y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK); + mt_buffer_[kk] = mt_buffer_[kk + MT_IA] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + for (;kk < MT_LEN - 1; kk++) { + y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK); + mt_buffer_[kk] = mt_buffer_[kk + (MT_IA - MT_LEN)] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + y = (mt_buffer_[MT_LEN - 1] & UPPER_MASK) | (mt_buffer_[0] & LOWER_MASK); + mt_buffer_[MT_LEN - 1] = mt_buffer_[MT_IA - 1] ^ (y >> 1) ^ mag01[y & 0x1UL]; + + mt_index_ = 0; + } + + y = mt_buffer_[mt_index_++]; + + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; + } + + uint32_t ExpRand32(uint32_t mean) { + double mean_d = mean; + double erand = log (1.0 / (Rand32() / (double)UINT32_MAX)); + uint32_t x = (uint32_t) (mean_d * erand + 0.5); + return x; + } + + uint64_t Rand64() { + return ((uint64_t)Rand32() << 32) | Rand32(); + } + + uint64_t ExpRand64(uint64_t mean) { + double mean_d = mean; + double erand = log (1.0 / (Rand64() / (double)UINT32_MAX)); + uint64_t x = (uint64_t) (mean_d * erand + 0.5); + return x; + } + + template + T Rand() { + switch (sizeof(T)) { + case sizeof(uint32_t): + return Rand32(); + case sizeof(uint64_t): + return Rand64(); + default: + cerr << "Invalid sizeof T" << endl; + abort(); + } + } + + template + T ExpRand(T mean) { + switch (sizeof(T)) { + case sizeof(uint32_t): + return ExpRand32(mean); + case sizeof(uint64_t): + return ExpRand64(mean); + default: + cerr << "Invalid sizeof T" << endl; + abort(); + } + } + + private: + void Init(uint32_t seed) { + mt_buffer_[0] = seed; + mt_index_ = MT_LEN; + for (int i = 1; i < MT_LEN; i++) { + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt_buffer_[i] = + (1812433253UL * (mt_buffer_[i-1] ^ (mt_buffer_[i-1] >> 30)) + i); + } + } + + int mt_index_; + uint32_t mt_buffer_[MT_LEN]; +}; + +const uint32_t MTRandom::TEST_SEED1 = 5489UL; +const uint32_t MTRandom::UPPER_MASK = 0x80000000; +const uint32_t MTRandom::LOWER_MASK = 0x7FFFFFFF; +const uint32_t MTRandom::MATRIX_A = 0x9908B0DF; + +class MTRandom8 { +public: + MTRandom8(MTRandom *rand) + : rand_(rand) { + } + + uint8_t Rand8() { + uint32_t r = rand_->Rand32(); + + // TODO: make this use a single byte at a time? + return (r & 0xff) ^ (r >> 7) ^ (r >> 15) ^ (r >> 21); + } + +private: + MTRandom *rand_; +}; diff --git a/lib/xdelta3/testing/regtest.cc b/lib/xdelta3/testing/regtest.cc new file mode 100644 index 0000000..daddc0d --- /dev/null +++ b/lib/xdelta3/testing/regtest.cc @@ -0,0 +1,1321 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "test.h" +#include "random.h" +#include "sizes.h" + +template +class Regtest { +public: + typedef typename Constants::Sizes Sizes; + + struct Options { + Options() + : encode_srcwin_maxsz(1<<20), + block_size(Constants::BLOCK_SIZE), + window_size(Constants::WINDOW_SIZE), + size_known(false), + iopt_size(XD3_DEFAULT_IOPT_SIZE), + smatch_cfg(XD3_SMATCH_DEFAULT) { } + + xoff_t encode_srcwin_maxsz; + size_t block_size; + xoff_t window_size; + bool size_known; + usize_t iopt_size; + xd3_smatch_cfg smatch_cfg; + }; + +#include "segment.h" +#include "modify.h" +#include "file.h" +#include "cmp.h" +#include "delta.h" + + void InMemoryEncodeDecode(const FileSpec &source_file, + const FileSpec &target_file, + Block *coded_data, + const Options &options) { + xd3_stream encode_stream; + xd3_config encode_config; + xd3_source encode_source; + + xd3_stream decode_stream; + xd3_config decode_config; + xd3_source decode_source; + xoff_t verified_bytes = 0; + xoff_t encoded_bytes = 0; + + if (coded_data) { + coded_data->Reset(); + } + + memset(&encode_stream, 0, sizeof (encode_stream)); + memset(&encode_source, 0, sizeof (encode_source)); + + memset(&decode_stream, 0, sizeof (decode_stream)); + memset(&decode_source, 0, sizeof (decode_source)); + + xd3_init_config(&encode_config, XD3_ADLER32); + xd3_init_config(&decode_config, XD3_ADLER32); + + encode_config.winsize = options.window_size; + encode_config.iopt_size = options.iopt_size; + encode_config.smatch_cfg = options.smatch_cfg; + + CHECK_EQ(0, xd3_config_stream (&encode_stream, &encode_config)); + CHECK_EQ(0, xd3_config_stream (&decode_stream, &decode_config)); + + encode_source.blksize = options.block_size; + decode_source.blksize = options.block_size; + + encode_source.max_winsize = options.encode_srcwin_maxsz; + decode_source.max_winsize = options.encode_srcwin_maxsz; + + if (!options.size_known) + { + xd3_set_source (&encode_stream, &encode_source); + xd3_set_source (&decode_stream, &decode_source); + } + else + { + xd3_set_source_and_size (&encode_stream, &encode_source, + source_file.Size()); + xd3_set_source_and_size (&decode_stream, &decode_source, + source_file.Size()); + } + + BlockIterator source_iterator(source_file, options.block_size); + BlockIterator target_iterator(target_file, Constants::WINDOW_SIZE); + Block encode_source_block, decode_source_block; + Block decoded_block, target_block; + bool encoding = true; + bool done = false; + bool done_after_input = false; + + IF_DEBUG1 (XPR(NTR "source %" Q "u[%" Z "u] target %" Q "u winsize %" Z "u\n", + source_file.Size(), options.block_size, + target_file.Size(), + Constants::WINDOW_SIZE)); + + while (!done) { + target_iterator.Get(&target_block); + + xoff_t blks = target_iterator.Blocks(); + + IF_DEBUG2(XPR(NTR "target in %s: %" Q "u[%" Z "u] %" Q "u(%" Q "u) " + "verified %" Q "u\n", + encoding ? "encoding" : "decoding", + target_iterator.Offset(), + target_block.Size(), + target_iterator.Blkno(), + blks, + verified_bytes)); + + if (blks == 0 || target_iterator.Blkno() == (blks - 1)) { + xd3_set_flags(&encode_stream, XD3_FLUSH | encode_stream.flags); + } + + xd3_avail_input(&encode_stream, target_block.Data(), target_block.Size()); + encoded_bytes += target_block.Size(); + + process: + int ret; + const char *msg; + if (encoding) { + ret = xd3_encode_input(&encode_stream); + msg = encode_stream.msg; + } else { + ret = xd3_decode_input(&decode_stream); + msg = decode_stream.msg; + } + (void) msg; + + switch (ret) { + case XD3_OUTPUT: + if (encoding) { + if (coded_data != NULL) { + // Optional encoded-output to the caller + coded_data->Append(encode_stream.next_out, + encode_stream.avail_out); + } + // Feed this data to the decoder. + xd3_avail_input(&decode_stream, + encode_stream.next_out, + encode_stream.avail_out); + xd3_consume_output(&encode_stream); + encoding = false; + } else { + decoded_block.Append(decode_stream.next_out, + decode_stream.avail_out); + xd3_consume_output(&decode_stream); + } + goto process; + + case XD3_GETSRCBLK: { + xd3_source *src = (encoding ? &encode_source : &decode_source); + Block *block = (encoding ? &encode_source_block : &decode_source_block); + if (encoding) { + IF_DEBUG2(XPR(NTR "[srcblock] %" Q "u last srcpos %" Q "u " + "encodepos %" Q "u\n", + encode_source.getblkno, + encode_stream.match_last_srcpos, + encode_stream.input_position + encode_stream.total_in)); + } + + source_iterator.SetBlock(src->getblkno); + source_iterator.Get(block); + src->curblkno = src->getblkno; + src->onblk = block->Size(); + src->curblk = block->Data(); + + goto process; + } + + case XD3_INPUT: + if (!encoding) { + encoding = true; + goto process; + } else { + if (done_after_input) { + done = true; + continue; + } + + if (target_block.Size() < target_iterator.BlockSize()) { + encoding = false; + } else { + target_iterator.Next(); + } + continue; + } + + case XD3_WINFINISH: + if (encoding) { + if (encode_stream.flags & XD3_FLUSH) { + done_after_input = true; + } + encoding = false; + } else { + CHECK_EQ(0, CmpDifferentBlockBytesAtOffset(decoded_block, + target_file, + verified_bytes)); + verified_bytes += decoded_block.Size(); + decoded_block.Reset(); + encoding = true; + } + goto process; + + case XD3_WINSTART: + case XD3_GOTHEADER: + goto process; + + default: + XPR(NTR "%s = %s %s\n", encoding ? "E " : " D", + xd3_strerror(ret), + msg == NULL ? "" : msg); + + CHECK_EQ(0, ret); + CHECK_EQ(-1, ret); + } + } + + CHECK_EQ(target_file.Size(), encoded_bytes); + CHECK_EQ(target_file.Size(), verified_bytes); + CHECK_EQ(0, xd3_close_stream(&decode_stream)); + CHECK_EQ(0, xd3_close_stream(&encode_stream)); + xd3_free_stream(&encode_stream); + xd3_free_stream(&decode_stream); + } + + void MainEncodeDecode(const TmpFile &source_file, + const TmpFile &target_file, + ExtFile *coded_data, + const Options &options) { + vector ecmd; + char bbuf[16]; + snprintf(bbuf, sizeof(bbuf), "-B%" Q "u", options.encode_srcwin_maxsz); + ecmd.push_back("xdelta3"); + ecmd.push_back(bbuf); + ecmd.push_back("-s"); + ecmd.push_back(source_file.Name()); + ecmd.push_back(target_file.Name()); + ecmd.push_back(coded_data->Name()); + ecmd.push_back(NULL); + + CHECK_EQ(0, xd3_main_cmdline(ecmd.size() - 1, + const_cast(&ecmd[0]))); + + vector dcmd; + ExtFile recon_file; + dcmd.push_back("xdelta3"); + ecmd.push_back(bbuf); + dcmd.push_back("-d"); + dcmd.push_back("-s"); + dcmd.push_back(source_file.Name()); + dcmd.push_back(coded_data->Name()); + dcmd.push_back(recon_file.Name()); + dcmd.push_back(NULL); + + CHECK_EQ(0, xd3_main_cmdline(dcmd.size() - 1, + const_cast(&dcmd[0]))); + + CHECK_EQ(0, test_compare_files(recon_file.Name(), + target_file.Name())); + } + + // Similar to xd3_process_memory, with support for test Options. + // Exercises xd3_process_stream. + int TestProcessMemory (int is_encode, + int (*func) (xd3_stream *), + const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + const Options &options) { + xd3_stream stream; + xd3_config config; + xd3_source src; + int ret; + + memset (& stream, 0, sizeof (stream)); + memset (& config, 0, sizeof (config)); + + if (is_encode) + { + config.winsize = input_size; + config.iopt_size = options.iopt_size; + config.sprevsz = xd3_pow2_roundup (config.winsize); + } + + if ((ret = xd3_config_stream (&stream, &config)) != 0) + { + goto exit; + } + + if (source != NULL) + { + memset (& src, 0, sizeof (src)); + + src.blksize = source_size; + src.onblk = source_size; + src.curblk = source; + src.curblkno = 0; + src.max_winsize = source_size; + + if ((ret = xd3_set_source_and_size (&stream, &src, source_size)) != 0) + { + goto exit; + } + } + + if ((ret = xd3_process_stream (is_encode, + & stream, + func, 1, + input, input_size, + output, + output_size, + output_size_max)) != 0) + { + goto exit; + } + + exit: + if (ret != 0) + { + IF_DEBUG2 (DP(RINT "test_process_memory: %d: %s\n", ret, stream.msg)); + } + xd3_free_stream(&stream); + return ret; + } + + void EncodeDecodeAPI(const FileSpec &spec0, const FileSpec &spec1, + Block *delta, const Options &options) { + Block from; + Block to; + spec0.Get(&from, 0, spec0.Size()); + spec1.Get(&to, 0, spec1.Size()); + + delta->SetSize(to.Size() * 1.5); + usize_t out_size; + int enc_ret = TestProcessMemory(true, + &xd3_encode_input, + to.Data(), + to.Size(), + from.Data(), + from.Size(), + delta->Data(), + &out_size, + delta->Size(), + options); + CHECK_EQ(0, enc_ret); + delta->SetSize(out_size); + + Block recon; + recon.SetSize(to.Size()); + usize_t recon_size; + int dec_ret = xd3_decode_memory(delta->Data(), + delta->Size(), + from.Data(), + from.Size(), + recon.Data(), + &recon_size, + recon.Size(), + 0); + CHECK_EQ(0, dec_ret); + CHECK_EQ(0, CmpDifferentBlockBytes(to, recon)); + } + +////////////////////////////////////////////////////////////////////// + +void TestPrintf() { + char buf[64]; + xoff_t x = XOFF_T_MAX; + snprintf_func (buf, sizeof(buf), "%" Q "u", x); + const char *expect = XD3_USE_LARGEFILE64 ? + "18446744073709551615" : "4294967295"; + XD3_ASSERT(strcmp (buf, expect) == 0); +} + +void TestRandomNumbers() { + MTRandom rand; + int rounds = 1<<20; + uint64_t usum = 0; + uint64_t esum = 0; + + for (int i = 0; i < rounds; i++) { + usum += rand.Rand32(); + esum += rand.ExpRand32(1024); + } + + double allowed_error = 0.01; + + uint32_t umean = usum / rounds; + uint32_t emean = esum / rounds; + + uint32_t uexpect = UINT32_MAX / 2; + uint32_t eexpect = 1024; + + if (umean < uexpect * (1.0 - allowed_error) || + umean > uexpect * (1.0 + allowed_error)) { + XPR(NT "uniform mean error: %u != %u\n", umean, uexpect); + abort(); + } + + if (emean < eexpect * (1.0 - allowed_error) || + emean > eexpect * (1.0 + allowed_error)) { + XPR(NT "exponential mean error: %u != %u\n", emean, eexpect); + abort(); + } +} + +void TestRandomFile() { + MTRandom rand1; + FileSpec spec1(&rand1); + BlockIterator bi(spec1); + + spec1.GenerateFixedSize(0); + CHECK_EQ(0, spec1.Size()); + CHECK_EQ(0, spec1.Segments()); + CHECK_EQ(0, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(0, bi.BytesOnBlock()); + + spec1.GenerateFixedSize(1); + CHECK_EQ(1, spec1.Size()); + CHECK_EQ(1, spec1.Segments()); + CHECK_EQ(1, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(1, bi.BytesOnBlock()); + + spec1.GenerateFixedSize(Constants::BLOCK_SIZE); + CHECK_EQ(Constants::BLOCK_SIZE, spec1.Size()); + CHECK_EQ(1, spec1.Segments()); + CHECK_EQ(1, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock()); + bi.SetBlock(1); + CHECK_EQ(0, bi.BytesOnBlock()); + + spec1.GenerateFixedSize(Constants::BLOCK_SIZE + 1); + CHECK_EQ(Constants::BLOCK_SIZE + 1, spec1.Size()); + CHECK_EQ(2, spec1.Segments()); + CHECK_EQ(2, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock()); + bi.SetBlock(1); + CHECK_EQ(1, bi.BytesOnBlock()); + + spec1.GenerateFixedSize(Constants::BLOCK_SIZE * 2); + CHECK_EQ(Constants::BLOCK_SIZE * 2, spec1.Size()); + CHECK_EQ(2, spec1.Segments()); + CHECK_EQ(2, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock()); + bi.SetBlock(1); + CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock()); +} + +void TestFirstByte() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(0); + spec1.GenerateFixedSize(1); + CHECK_EQ(0, CmpDifferentBytes(spec0, spec0)); + CHECK_EQ(0, CmpDifferentBytes(spec1, spec1)); + CHECK_EQ(1, CmpDifferentBytes(spec0, spec1)); + CHECK_EQ(1, CmpDifferentBytes(spec1, spec0)); + + spec0.GenerateFixedSize(1); + spec0.ModifyTo(Modify1stByte(), &spec1); + CHECK_EQ(1, CmpDifferentBytes(spec0, spec1)); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE + 1); + spec0.ModifyTo(Modify1stByte(), &spec1); + CHECK_EQ(1, CmpDifferentBytes(spec0, spec1)); + + SizeIterator si(&rand, Constants::TEST_ROUNDS); + + for (; !si.Done(); si.Next()) { + size_t size = si.Get(); + if (size == 0) { + continue; + } + spec0.GenerateFixedSize(size); + spec0.ModifyTo(Modify1stByte(), &spec1); + InMemoryEncodeDecode(spec0, spec1, NULL, Options()); + } +} + +void TestModifyMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3); + + struct { + size_t size; + size_t addr; + } test_cases[] = { + { Constants::BLOCK_SIZE, 0 }, + { Constants::BLOCK_SIZE / 2, 1 }, + { Constants::BLOCK_SIZE, 1 }, + { Constants::BLOCK_SIZE * 2, 1 }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::MODIFY, test_cases[i].size, + test_cases[i].addr)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size(), spec1.Size()); + + size_t diff = CmpDifferentBytes(spec0, spec1); + CHECK_LE(diff, test_cases[i].size); + + // There is a 1/256 probability of the changed byte matching the + // original value. The following allows double the probability to + // pass. + CHECK_GE(diff, test_cases[i].size - (2 * test_cases[i].size / 256)); + + InMemoryEncodeDecode(spec0, spec1, NULL, Options()); + } +} + +void TestAddMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 2); + // TODO: fix this test (for all block sizes)! it's broken because + // the same byte could be added? + + struct { + size_t size; + size_t addr; + size_t expected_adds; + } test_cases[] = { + { 1, 0, 2 /* 1st byte, last byte (short block) */ }, + { 1, 1, 3 /* 1st 2 bytes, last byte */ }, + { 1, Constants::BLOCK_SIZE - 1, 2 /* changed, last */ }, + { 1, Constants::BLOCK_SIZE, 2 /* changed, last */ }, + { 1, Constants::BLOCK_SIZE + 1, 3 /* changed + 1st of 2nd block, last */ }, + { 1, 2 * Constants::BLOCK_SIZE, 1 /* last byte */ }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::ADD, test_cases[i].size, test_cases[i].addr)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size() + test_cases[i].size, spec1.Size()); + + Block coded; + InMemoryEncodeDecode(spec0, spec1, &coded, Options()); + + Delta delta(coded); + CHECK_EQ(test_cases[i].expected_adds, + delta.AddedBytes()); + } +} + +void TestDeleteMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 4); + + struct { + size_t size; + size_t addr; + } test_cases[] = { + // Note: an entry { Constants::BLOCK_SIZE, 0 }, + // does not work because the xd3_srcwin_move_point logic won't + // find a copy if it occurs >= double its size into the file. + { Constants::BLOCK_SIZE / 2, 0 }, + { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2 }, + { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE / 2 }, + { Constants::BLOCK_SIZE * 2, Constants::BLOCK_SIZE * 3 / 2 }, + { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE * 2 }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::DELRANGE, test_cases[i].size, + test_cases[i].addr)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size() - test_cases[i].size, spec1.Size()); + + Block coded; + InMemoryEncodeDecode(spec0, spec1, &coded, Options()); + + Delta delta(coded); + CHECK_EQ(0, delta.AddedBytes()); + } +} + +void TestCopyMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3); + + struct { + size_t size; + size_t from; + size_t to; + } test_cases[] = { + // Copy is difficult to write tests for because where Xdelta finds + // copies, it does not enter checksums. So these tests copy data from + // later to earlier so that checksumming will start. + { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2, 0 }, + { Constants::BLOCK_SIZE, 2 * Constants::BLOCK_SIZE, + Constants::BLOCK_SIZE, }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::COPY, test_cases[i].size, + test_cases[i].from, test_cases[i].to)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size() + test_cases[i].size, spec1.Size()); + + Block coded; + InMemoryEncodeDecode(spec0, spec1, &coded, Options()); + + Delta delta(coded); + CHECK_EQ(0, delta.AddedBytes()); + } +} + +void TestMoveMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3); + + struct { + size_t size; + size_t from; + size_t to; + } test_cases[] = { + // This is easier to test than Copy but has the same trouble as Delete. + { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2, 0 }, + { Constants::BLOCK_SIZE / 2, 0, Constants::BLOCK_SIZE / 2 }, + { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE, 2 * + Constants::BLOCK_SIZE }, + { Constants::BLOCK_SIZE, 2 * Constants::BLOCK_SIZE, + Constants::BLOCK_SIZE }, + { Constants::BLOCK_SIZE * 3 / 2, Constants::BLOCK_SIZE, + Constants::BLOCK_SIZE * 3 / 2 }, + + // This is a no-op + { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE * 2, + 3 * Constants::BLOCK_SIZE }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::MOVE, test_cases[i].size, + test_cases[i].from, test_cases[i].to)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size(), spec1.Size()); + + Block coded; + InMemoryEncodeDecode(spec0, spec1, &coded, Options()); + + Delta delta(coded); + CHECK_EQ(0, delta.AddedBytes()); + } +} + +void TestOverwriteMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE); + + ChangeList cl1; + cl1.push_back(Change(Change::COPYOVER, 10, 0, 20)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size(), spec1.Size()); + + Block b0, b1; + BlockIterator(spec0).Get(&b0); + BlockIterator(spec1).Get(&b1); + + CHECK(memcmp(b0.Data(), b1.Data() + 20, 10) == 0); + CHECK(memcmp(b0.Data(), b1.Data(), 20) == 0); + CHECK(memcmp(b0.Data() + 30, b1.Data() + 30, + Constants::BLOCK_SIZE - 30) == 0); + + xoff_t zero = 0; + cl1.clear(); + cl1.push_back(Change(Change::COPYOVER, 10, 20, zero)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size(), spec1.Size()); + + BlockIterator(spec0).Get(&b0); + BlockIterator(spec1).Get(&b1); + + CHECK(memcmp(b0.Data() + 20, b1.Data(), 10) == 0); + CHECK(memcmp(b0.Data() + 10, b1.Data() + 10, + Constants::BLOCK_SIZE - 10) == 0); +} + +// Note: this test is written to expose a problem, but the problem was +// only exposed with BLOCK_SIZE = 128. +void TestNonBlocking() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + FileSpec spec2(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3); + + // This is a lazy target match + Change ct(Change::COPYOVER, 22, + Constants::BLOCK_SIZE + 50, + Constants::BLOCK_SIZE + 20); + + // This is a source match just after the block boundary, shorter + // than the lazy target match. + Change cs1(Change::COPYOVER, 16, + Constants::BLOCK_SIZE + 51, + Constants::BLOCK_SIZE - 1); + + // This overwrites the original source bytes. + Change cs2(Change::MODIFY, 108, + Constants::BLOCK_SIZE + 20); + + // This changes the first blocks + Change c1st(Change::MODIFY, Constants::BLOCK_SIZE - 2, 0); + + ChangeList csl; + csl.push_back(cs1); + csl.push_back(cs2); + csl.push_back(c1st); + + spec0.ModifyTo(ChangeListMutator(csl), &spec1); + + ChangeList ctl; + ctl.push_back(ct); + ctl.push_back(c1st); + + spec0.ModifyTo(ChangeListMutator(ctl), &spec2); + + InMemoryEncodeDecode(spec1, spec2, NULL, Options()); +} + +void TestEmptyInMemory() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + Block block; + + spec0.GenerateFixedSize(0); + spec1.GenerateFixedSize(0); + + InMemoryEncodeDecode(spec0, spec1, &block, Options()); + + Delta delta(block); + CHECK_LT(0, block.Size()); + CHECK_EQ(1, delta.Windows()); +} + +void TestBlockInMemory() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + Block block; + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE); + spec1.GenerateFixedSize(Constants::BLOCK_SIZE); + + InMemoryEncodeDecode(spec0, spec1, &block, Options()); + + Delta delta(block); + CHECK_EQ(spec1.Blocks(Constants::WINDOW_SIZE), delta.Windows()); +} + +void TestSmallStride() { + MTRandom rand; + FileSpec spec0(&rand); + usize_t size = Constants::BLOCK_SIZE * 4; + spec0.GenerateFixedSize(size); + + // Note: Not very good performance due to hash collisions, note 3x + // multiplier below. + for (int s = 15; s < 101; s++) { + usize_t changes = 0; + ChangeList cl; + for (usize_t j = s; j < size; j += s, ++changes) + { + cl.push_back(Change(Change::MODIFY, 1, j)); + } + + FileSpec spec1(&rand); + spec0.ModifyTo(ChangeListMutator(cl), &spec1); + + Options options; + options.encode_srcwin_maxsz = size; + options.iopt_size = 128; + options.smatch_cfg = XD3_SMATCH_SLOW; + options.size_known = false; + + Block block; + InMemoryEncodeDecode(spec0, spec1, &block, options); + Delta delta(block); + + IF_DEBUG1(DP(RINT "[stride=%d] changes=%" W "u adds=%" Q "u\n", + s, changes, delta.AddedBytes())); + double allowance = Constants::BLOCK_SIZE < 8192 || s < 30 ? 3.0 : 1.1; + CHECK_GE(allowance * changes, (double)delta.AddedBytes()); + } +} + +void TestCopyWindow() { + // Construct an input that has many copies, to fill the IOPT buffer + // and force a source window decision. "srclen" may be set to a + // value that goes beyond the end-of-source. + const int clen = 16; + const int size = 4096; + const int nmov = size / clen; + const int iters = 16; + uint32_t added_01 = 0; + uint32_t added_10 = 0; + for (int i = 1; i <= iters; i++) { + MTRandom rand(MTRandom::TEST_SEED1 * i); + FileSpec spec0(&rand); + ChangeList cl; + + spec0.GenerateFixedSize(size); + + for (int j = 0; j < nmov; j += 2) + { + cl.push_back(Change(Change::MOVE, + clen, (j + 1) * clen, j * clen)); + } + + FileSpec spec1(&rand); + spec0.ModifyTo(ChangeListMutator(cl), &spec1); + + Options options; + options.encode_srcwin_maxsz = size; + options.iopt_size = 128; + options.smatch_cfg = XD3_SMATCH_SLOW; + + Block block1; + InMemoryEncodeDecode(spec0, spec1, &block1, options); + Delta delta1(block1); + // Allow one missed window (e.g., hash collisions) + added_01 += delta1.AddedBytes(); + + Block block2; + InMemoryEncodeDecode(spec1, spec0, &block2, options); + Delta delta2(block2); + // Allow one missed window (e.g., hash collisions) + added_10 += delta2.AddedBytes(); + + Block block3; + Block block4; + EncodeDecodeAPI(spec0, spec1, &block3, options); + EncodeDecodeAPI(spec1, spec0, &block4, options); + } + // Average less than 0.5 misses (of length clen) per iteration. + CHECK_GE(clen * iters / 2, added_01); + CHECK_GE(clen * iters / 2, added_10); +} + +void TestCopyFromEnd() { + // Copies from the end of the source buffer, which reach a block + // boundary end-of-file. + const int size = 4096; + const int clen = 16; + const int nmov = (size / 2) / clen; + const int iters = 16; + uint32_t added_01 = 0; + uint32_t added_10 = 0; + for (int i = 1; i <= iters; i++) { + MTRandom rand(MTRandom::TEST_SEED1 * i); + FileSpec spec0(&rand); + ChangeList cl; + + spec0.GenerateFixedSize(size); + + cl.push_back(Change(Change::MODIFY, 2012, 2048)); + + for (int j = 0; j < nmov; j += 2) + { + cl.push_back(Change(Change::MOVE, + clen, (j + 1) * clen, j * clen)); + } + + cl.push_back(Change(Change::COPYOVER, 28, 4068, 3000)); + cl.push_back(Change(Change::COPYOVER, 30, 4066, 3100)); + cl.push_back(Change(Change::COPYOVER, 32, 4064, 3200)); + + FileSpec spec1(&rand); + spec0.ModifyTo(ChangeListMutator(cl), &spec1); + + Options options; + options.encode_srcwin_maxsz = size; + options.iopt_size = 128; + options.smatch_cfg = XD3_SMATCH_SLOW; + + Block block1; + InMemoryEncodeDecode(spec0, spec1, &block1, options); + Delta delta1(block1); + added_01 += delta1.AddedBytes(); + + Block block2; + InMemoryEncodeDecode(spec1, spec0, &block2, options); + Delta delta2(block2); + added_10 += delta2.AddedBytes(); + + Block block3; + Block block4; + EncodeDecodeAPI(spec0, spec1, &block3, options); + EncodeDecodeAPI(spec1, spec0, &block4, options); + } + CHECK_GE(2000 * iters, added_01); + CHECK_LE(2000 * iters, added_10); +} + +void TestHalfBlockCopy() { + // Create a half-block copy, 7.5 blocks apart, in a pair of files: + // 0 1 ... 6 7 + // spec0 [bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb][ccccc][bbbb_] + // spec1 [aaaaa][ccccc][aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_] + // where stage= + // 0: the final block is full + // a. (source)spec1->(target)spec0 copies block C: reads 8 source + // blocks during target block 0. + // b. (source)spec0->(target)spec1 does not copy block C b/c attempt + // to read past EOF empties block 0 from (virtual) block cache + // 1: the final block is less than full. + // a. (same) copies block C + // b. (same) copies block C, unlike 0a, no attempt to read past EOF + // + // "virtual" above refers to XD3_TOOFARBACK, since there is no caching + // in the API, there is simply a promise not to request blocks that are + // beyond source->max_winsize from the last known source file position. + for (int stage = 0; stage < 2; stage++) + { + IF_DEBUG1 (DP(RINT "half_block_copy stage %d\n", stage)); + + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 8 - stage); + + ChangeList cl1; + cl1.push_back(Change(Change::MODIFY, + Constants::BLOCK_SIZE / 2, // size + 0)); + cl1.push_back(Change(Change::COPYOVER, + Constants::BLOCK_SIZE / 2, // size + Constants::BLOCK_SIZE * 7, // offset + Constants::BLOCK_SIZE / 2)); + cl1.push_back(Change(Change::MODIFY, + Constants::BLOCK_SIZE * 7, + Constants::BLOCK_SIZE - stage)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + + Options options; + options.encode_srcwin_maxsz = Constants::BLOCK_SIZE * 8; + + Block block0; + Block block1; + InMemoryEncodeDecode(spec0, spec1, &block0, options); + InMemoryEncodeDecode(spec1, spec0, &block1, options); + + Delta delta0(block0); + Delta delta1(block1); + + const int yes = + Constants::BLOCK_SIZE * 8 - Constants::BLOCK_SIZE / 2; + const int no = + Constants::BLOCK_SIZE * 8 - Constants::BLOCK_SIZE / 2; + + if (stage == 0) + { + CHECK_EQ(yes, delta0.AddedBytes()); + CHECK_EQ(no, delta1.AddedBytes()); + } + else + { + CHECK_EQ(yes, delta0.AddedBytes()); + CHECK_EQ(yes, delta1.AddedBytes()); + } + } +} + +void FourWayMergeTest(const FileSpec &spec0, + const FileSpec &spec1, + const FileSpec &spec2, + const FileSpec &spec3) { + TmpFile f0, f1, f2, f3; + ExtFile d01, d12, d23; + Options options; + options.encode_srcwin_maxsz = + std::max(spec0.Size(), options.encode_srcwin_maxsz); + + spec0.WriteTmpFile(&f0); + spec1.WriteTmpFile(&f1); + spec2.WriteTmpFile(&f2); + spec3.WriteTmpFile(&f3); + + MainEncodeDecode(f0, f1, &d01, options); + MainEncodeDecode(f1, f2, &d12, options); + MainEncodeDecode(f2, f3, &d23, options); + + // Merge 2 + ExtFile out; + vector mcmd; + mcmd.push_back("xdelta3"); + mcmd.push_back("merge"); + mcmd.push_back("-m"); + mcmd.push_back(d01.Name()); + mcmd.push_back(d12.Name()); + mcmd.push_back(out.Name()); + mcmd.push_back(NULL); + + // XPR(NTR "Running one merge: %s\n", CommandToString(mcmd).c_str()); + CHECK_EQ(0, xd3_main_cmdline(mcmd.size() - 1, + const_cast(&mcmd[0]))); + + ExtFile recon; + vector tcmd; + tcmd.push_back("xdelta3"); + tcmd.push_back("-d"); + tcmd.push_back("-s"); + tcmd.push_back(f0.Name()); + tcmd.push_back(out.Name()); + tcmd.push_back(recon.Name()); + tcmd.push_back(NULL); + + // XPR(NTR "Running one recon! %s\n", CommandToString(tcmd).c_str()); + CHECK_EQ(0, xd3_main_cmdline(tcmd.size() - 1, + const_cast(&tcmd[0]))); + // XPR(NTR "Should equal! %s\n", f2.Name()); + + CHECK(recon.EqualsSpec(spec2)); + + // Merge 3 + ExtFile out3; + vector mcmd3; + mcmd3.push_back("xdelta3"); + mcmd3.push_back("merge"); + mcmd3.push_back("-m"); + mcmd3.push_back(d01.Name()); + mcmd3.push_back("-m"); + mcmd3.push_back(d12.Name()); + mcmd3.push_back(d23.Name()); + mcmd3.push_back(out3.Name()); + mcmd3.push_back(NULL); + + // XPR(NTR "Running one 3-merge: %s\n", CommandToString(mcmd3).c_str()); + CHECK_EQ(0, xd3_main_cmdline(mcmd3.size() - 1, + const_cast(&mcmd3[0]))); + + ExtFile recon3; + vector tcmd3; + tcmd3.push_back("xdelta3"); + tcmd3.push_back("-d"); + tcmd3.push_back("-s"); + tcmd3.push_back(f0.Name()); + tcmd3.push_back(out3.Name()); + tcmd3.push_back(recon3.Name()); + tcmd3.push_back(NULL); + + // XPR(NTR "Running one 3-recon %s\n", CommandToString(tcmd3).c_str()); + CHECK_EQ(0, xd3_main_cmdline(tcmd3.size() - 1, + const_cast(&tcmd3[0]))); + // XPR(NTR "Should equal %s\n", f3.Name()); + + CHECK(recon3.EqualsSpec(spec3)); +} + +void TestMergeCommand1() { + /* Repeat random-input testing for a number of iterations. + * Test 2, 3, and 4-file scenarios (i.e., 1, 2, and 3-delta merges). */ + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + FileSpec spec2(&rand); + FileSpec spec3(&rand); + + SizeIterator si0(&rand, 10); + + for (; !si0.Done(); si0.Next()) { + size_t size0 = si0.Get(); + + SizeIterator si1(&rand, 10); + for (; !si1.Done(); si1.Next()) { + size_t change1 = si1.Get(); + + if (change1 == 0) { + continue; + } + + // XPR(NTR "S0 = %lu\n", size0); + // XPR(NTR "C1 = %lu\n", change1); + // XPR(NTR "."); + + size_t add1_pos = size0 ? rand.Rand32() % size0 : 0; + size_t del2_pos = size0 ? rand.Rand32() % size0 : 0; + + spec0.GenerateFixedSize(size0); + + ChangeList cl1, cl2, cl3; + + size_t change3 = change1; + size_t change3_pos; + + if (change3 >= size0) { + change3 = size0; + change3_pos = 0; + } else { + change3_pos = rand.Rand32() % (size0 - change3); + } + + cl1.push_back(Change(Change::ADD, change1, add1_pos)); + cl2.push_back(Change(Change::DELRANGE, change1, del2_pos)); + cl3.push_back(Change(Change::MODIFY, change3, change3_pos)); + + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + spec1.ModifyTo(ChangeListMutator(cl2), &spec2); + spec2.ModifyTo(ChangeListMutator(cl3), &spec3); + + FourWayMergeTest(spec0, spec1, spec2, spec3); + FourWayMergeTest(spec3, spec2, spec1, spec0); + } + } +} + +void TestMergeCommand2() { + /* Same as above, different mutation pattern. */ + /* TODO: run this with large sizes too */ + /* TODO: run this with small sizes too */ + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + FileSpec spec2(&rand); + FileSpec spec3(&rand); + + SizeIterator si0(&rand, 10); + for (; !si0.Done(); si0.Next()) { + size_t size0 = si0.Get(); + + SizeIterator si1(&rand, 10); + for (; !si1.Done(); si1.Next()) { + size_t size1 = si1.Get(); + + SizeIterator si2(&rand, 10); + for (; !si2.Done(); si2.Next()) { + size_t size2 = si2.Get(); + + SizeIterator si3(&rand, 10); + for (; !si3.Done(); si3.Next()) { + size_t size3 = si3.Get(); + + // We're only interested in three sizes, strictly decreasing. */ + if (size3 >= size2 || size2 >= size1 || size1 >= size0) { + continue; + } + + // XPR(NTR "S0 = %lu\n", size0); + // XPR(NTR "S1 = %lu\n", size1); + // XPR(NTR "S2 = %lu\n", size2); + // XPR(NTR "S3 = %lu\n", size3); + // XPR(NTR "."); + + spec0.GenerateFixedSize(size0); + + ChangeList cl1, cl2, cl3; + + cl1.push_back(Change(Change::DELRANGE, size0 - size1, 0)); + cl2.push_back(Change(Change::DELRANGE, size0 - size2, 0)); + cl3.push_back(Change(Change::DELRANGE, size0 - size3, 0)); + + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + spec0.ModifyTo(ChangeListMutator(cl2), &spec2); + spec0.ModifyTo(ChangeListMutator(cl3), &spec3); + + FourWayMergeTest(spec0, spec1, spec2, spec3); + FourWayMergeTest(spec3, spec2, spec1, spec0); + } + } + } + } +} + +void TestLastFrontierBlock() { + // This test constructs an input that can expose + // https://github.com/jmacd/xdelta/issues/188 + // when run through the command-line with source via a FIFO. + // That is not tested here, but the test stays. + if (Constants::WINDOW_SIZE < XD3_ALLOCSIZE) + { + return; + } + + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + const xoff_t size = XD3_ALLOCSIZE * 64; // == XD3_MINSRCWINSZ * 2 + const xoff_t edit = XD3_ALLOCSIZE; + + Options options; + options.encode_srcwin_maxsz = XD3_MINSRCWINSZ; + options.block_size = XD3_ALLOCSIZE; + options.window_size = XD3_MINSRCWINSZ; + options.size_known = false; + + spec0.GenerateFixedSize(size); + + ChangeList cl; + + // Modify the 0th byte in order to induce indexing of subsequent + // bytes, but allow copying most of the file to keep the test fast. + cl.push_back(Change(Change::MODIFY, 1, edit * 31)); + cl.push_back(Change(Change::COPYOVER, edit, edit * 31, edit * 63)); + + spec0.ModifyTo(ChangeListMutator(cl), &spec1); + + Block noblock; + InMemoryEncodeDecode(spec0, spec1, &noblock, options); + InMemoryEncodeDecode(spec1, spec0, &noblock, options); +} + +}; // class Regtest + +#define TEST(x) XPR(NTR #x "...\n"); regtest.x() + +// These tests are primarily tests of the testing framework itself. +template +void UnitTest() { + Regtest regtest; + TEST(TestPrintf); + TEST(TestRandomNumbers); + TEST(TestRandomFile); + TEST(TestFirstByte); + TEST(TestModifyMutator); + TEST(TestAddMutator); + TEST(TestDeleteMutator); + TEST(TestCopyMutator); + TEST(TestMoveMutator); + TEST(TestOverwriteMutator); +} + +// These are Xdelta tests. +template +void MainTest() { + XPR(NT "Blocksize %" Q "u windowsize %" Z "u\n", + T::BLOCK_SIZE, T::WINDOW_SIZE); + Regtest regtest; + TEST(TestEmptyInMemory); + TEST(TestBlockInMemory); + TEST(TestSmallStride); + TEST(TestCopyWindow); + TEST(TestCopyFromEnd); + TEST(TestNonBlocking); + TEST(TestHalfBlockCopy); + TEST(TestLastFrontierBlock); + TEST(TestMergeCommand1); + TEST(TestMergeCommand2); +} + +#undef TEST + +int main(int argc, char **argv) +{ + vector mcmd; + string pn; + const char *sp = strrchr(argv[0], '/'); + if (sp != NULL) { + pn.append(argv[0], sp - argv[0] + 1); + } + pn.append("xdelta3"); + mcmd.push_back(pn.c_str()); + mcmd.push_back("test"); + mcmd.push_back(NULL); + + UnitTest(); + MainTest(); + MainTest(); + MainTest(); + MainTest(); + + CHECK_EQ(0, xd3_main_cmdline(mcmd.size() - 1, + const_cast(&mcmd[0]))); + + return 0; +} + diff --git a/lib/xdelta3/testing/regtest_c.c b/lib/xdelta3/testing/regtest_c.c new file mode 100644 index 0000000..42e32ce --- /dev/null +++ b/lib/xdelta3/testing/regtest_c.c @@ -0,0 +1,17 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../xdelta3.c" diff --git a/lib/xdelta3/testing/run_release.sh b/lib/xdelta3/testing/run_release.sh new file mode 100644 index 0000000..85ed1f7 --- /dev/null +++ b/lib/xdelta3/testing/run_release.sh @@ -0,0 +1,2 @@ +#!/bin/sh +(cd .. && ./run_release.sh) diff --git a/lib/xdelta3/testing/segment.h b/lib/xdelta3/testing/segment.h new file mode 100644 index 0000000..a242ad8 --- /dev/null +++ b/lib/xdelta3/testing/segment.h @@ -0,0 +1,112 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +class Segment { + public: + Segment(size_t size, MTRandom *rand) + : size_(size), + seed_(rand->Rand32()), + seed_offset_(0), + data_(NULL) { + CHECK_GT(size_, 0); + } + + Segment(size_t size, uint32_t seed) + : size_(size), + seed_(seed), + seed_offset_(0), + data_(NULL) { + CHECK_GT(size_, 0); + } + + Segment(size_t size, uint8_t *data) + : size_(size), + seed_(0), + seed_offset_(0), + data_(data) { + CHECK_GT(size_, 0); + } + + size_t Size() const { + return size_; + } + + Segment Subseg(size_t start, size_t size) const { + CHECK_LE(start + size, size_); + if (data_) { + return Segment(size, data_ + start); + } else { + return Segment(size, seed_, seed_offset_ + start); + } + } + + void Fill(size_t seg_offset, size_t size, uint8_t *data) const { + CHECK_LE(seg_offset + size, size_); + if (data_) { + memcpy(data, data_ + seg_offset, size); + } else { + size_t skip = seg_offset + seed_offset_; + MTRandom gen(seed_); + MTRandom8 gen8(&gen); + while (skip--) { + gen8.Rand8(); + } + for (size_t i = 0; i < size; i++) { + data[i] = gen8.Rand8(); + } + } + } + + string ToString() const { + string r; + if (data_) { + for (size_t i = 0; i < size_; i++) { + char buf[10]; + sprintf(buf, "%02x ", data_[i]); + r.append(buf); + } + } else { + char buf[256]; + sprintf(buf, "size=%ld,seed=%ud,skip=%ld", size_, seed_, seed_offset_); + r.append(buf); + } + return r; + } + +private: + // Used by Subseg() + Segment(size_t size, uint32_t seed, size_t seed_offset) + : size_(size), + seed_(seed), + seed_offset_(seed_offset), + data_(NULL) { + CHECK_GT(size_, 0); + } + + size_t size_; // Size of this segment + + // For random segments + uint32_t seed_; // Seed used for generating byte sequence + size_t seed_offset_; // Seed positions the sequence this many bytes + // before its beginning. + + // For literal segments (data is not owned) + uint8_t *data_; +}; + +typedef map SegmentMap; +typedef typename SegmentMap::const_iterator ConstSegmentMapIterator; +typedef typename SegmentMap::iterator SegmentMapIterator; diff --git a/lib/xdelta3/testing/sizes.h b/lib/xdelta3/testing/sizes.h new file mode 100644 index 0000000..637208b --- /dev/null +++ b/lib/xdelta3/testing/sizes.h @@ -0,0 +1,126 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +template +class SizeIterator { + public: + SizeIterator(MTRandom *rand, size_t howmany) + : rand_(rand), + count_(0), + fixed_(U::sizes), + fixed_size_(SIZEOF_ARRAY(U::sizes)), + howmany_(howmany) { } + + T Get() { + if (count_ < fixed_size_) { + return fixed_[count_]; + } + return rand_->Rand() % U::max_value; + } + + bool Done() { + return count_ >= fixed_size_ && count_ >= howmany_; + } + + void Next() { + count_++; + } + + private: + MTRandom *rand_; + size_t count_; + T* fixed_; + size_t fixed_size_; + size_t howmany_; +}; + +// Small sizes +class SmallSizes { +public: + static size_t sizes[]; + static size_t max_value; +}; + +size_t SmallSizes::sizes[] = { + 0, 1, 128 / 4, 3333, + 128 - (128 / 3), + 128, + 128 + (128 / 3), + 2 * 128 - (128 / 3), + 2 * 128, + 2 * 128 + (128 / 3), +}; + +size_t SmallSizes::max_value = 128 * 3; + +// Large sizes +class LargeSizes { +public: + static size_t sizes[]; + static size_t max_value; +}; + +size_t LargeSizes::sizes[] = { + 1 << 20, + 1 << 18, + 1 << 16, +}; + +size_t LargeSizes::max_value = 1<<20; + +// Base constants +struct BaseConstants { + static const size_t TEST_ROUNDS; +}; + +const size_t BaseConstants::TEST_ROUNDS = 10; + +// Regtest<> arguments +struct SmallBlock : public BaseConstants { + static const xoff_t BLOCK_SIZE; + static const size_t WINDOW_SIZE; + typedef SmallSizes Sizes; +}; + +const xoff_t SmallBlock::BLOCK_SIZE = 1<<7; +const size_t SmallBlock::WINDOW_SIZE = 1<<7; + +struct LargeBlock : public BaseConstants { + static const xoff_t BLOCK_SIZE; + static const size_t WINDOW_SIZE; + typedef LargeSizes Sizes; +}; + +const xoff_t LargeBlock::BLOCK_SIZE = (1 << 13); +const size_t LargeBlock::WINDOW_SIZE = (1 << 13); + +struct MixedBlock : public BaseConstants { + static const xoff_t BLOCK_SIZE; + static const size_t WINDOW_SIZE; + typedef SmallSizes Sizes; +}; + +const xoff_t MixedBlock::BLOCK_SIZE = 1<<7; +const size_t MixedBlock::WINDOW_SIZE = 1<<8; + +struct OversizeBlock : public BaseConstants { + static const xoff_t BLOCK_SIZE; + static const size_t WINDOW_SIZE; + typedef SmallSizes Sizes; +}; + +const xoff_t OversizeBlock::BLOCK_SIZE = 1<<8; +const size_t OversizeBlock::WINDOW_SIZE = 1<<7; diff --git a/lib/xdelta3/testing/test.h b/lib/xdelta3/testing/test.h new file mode 100644 index 0000000..628fb75 --- /dev/null +++ b/lib/xdelta3/testing/test.h @@ -0,0 +1,84 @@ +/* xdelta3 - delta compression tools and library -*- Mode: C++ -*- + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +extern "C" { +#include "../xdelta3.h" +#include "../xdelta3-internal.h" +} + +#include +#include +#include + +#define CHECK_EQ(x,y) CHECK_OP(x,y,==) +#define CHECK_NE(x,y) CHECK_OP(x,y,!=) +#define CHECK_LT(x,y) CHECK_OP(x,y,<) +#define CHECK_GT(x,y) CHECK_OP(x,y,>) +#define CHECK_LE(x,y) CHECK_OP(x,y,<=) +#define CHECK_GE(x,y) CHECK_OP(x,y,>=) + +#define CHECK_OP(x,y,OP) \ + do { \ + __typeof__(x) _x(x); \ + __typeof__(x) _y(y); \ + if (!(_x OP _y)) { \ + cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x " " #OP " " #y << endl; \ + cerr << __FILE__ << ":" << __LINE__ << " {0} " << _x << endl; \ + cerr << __FILE__ << ":" << __LINE__ << " {1} " << _y << endl; \ + abort(); \ + } } while (false) +#undef CHECK +#define CHECK(x) \ + do {if (!(x)) { \ + cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x << endl; \ + abort(); \ + } } while (false) + +#define DCHECK(x) + +using std::string; + +#include +using std::vector; + +inline string CommandToString(const vector &v) { + string s(v[0]); + for (size_t i = 1; i < v.size() && v[i] != NULL; i++) { + s.append(" "); + s.append(v[i]); + } + return s; +} + +#include +using std::cerr; +using std::endl; +using std::ostream; + +#include +using std::map; +using std::pair; + +#include +using std::list; + +template +pair make_pair(const T& t, const U& u) { + return pair(t, u); +} + +using std::min; +using std::max; diff --git a/lib/xdelta3/testing/xdelta3-regtest.py b/lib/xdelta3/testing/xdelta3-regtest.py new file mode 100644 index 0000000..aa54c46 --- /dev/null +++ b/lib/xdelta3/testing/xdelta3-regtest.py @@ -0,0 +1,1264 @@ +#!/usr/bin/python2.7 +# xdelta3 - delta compression tools and library -*- Mode: C++ -*- +# Copyright 2016 Joshua MacDonald +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TODO This code is no longer maintained :( + +import os, sys, math, re, time, types, array, random +import xdelta3 + +RCSDIR = '/tmp/rcs' +SAMPLEDIR = "/tmp/diff" + +# +MIN_SIZE = 0 + +TIME_TOO_SHORT = 0.050 + +SKIP_TRIALS = 2 +MIN_TRIALS = 3 +MAX_TRIALS = 15 + +# 10 = fast 1.5 = slow +MIN_STDDEV_PCT = 1.5 + +# How many results per round +MAX_RESULTS = 500 +TEST_ROUNDS = 10 +KEEP_P = (0.5) + +# For RCS testing, what percent to select +FILE_P = (0.50) + +# For run-speed tests +MIN_RUN = 1000 * 1000 * 1 +MAX_RUN = 1000 * 1000 * 10 + +# Testwide defaults +ALL_ARGS = [ + '-q' # '-vv' + ] + +# The first 7 args go to -C +SOFT_CONFIG_CNT = 7 + +CONFIG_ORDER = [ 'large_look', + 'large_step', + 'small_look', + 'small_chain', + 'small_lchain', + 'max_lazy', + 'long_enough', + + # > SOFT_CONFIG_CNT + 'nocompress', + 'winsize', + 'srcwinsize', + 'sprevsz', + 'iopt', + 'djw', + 'altcode', + ] + +CONFIG_ARGMAP = { + 'winsize' : '-W', + 'srcwinsize' : '-B', + 'sprevsz' : '-P', + 'iopt' : '-I', + 'nocompress' : '-N', + 'djw' : '-Sdjw', + 'altcode' : '-T', + } + +def INPUT_SPEC(rand): + return { + + # Time/space costs: + + # -C 1,2,3,4,5,6,7 + 'large_look' : lambda d: rand.choice([9, 10, 11, 12]), + 'large_step' : lambda d: rand.choice([25, 26, 27, 28, 29, 30]), + 'small_look' : lambda d: rand.choice([4]), + 'small_chain' : lambda d: rand.choice([1]), + 'small_lchain' : lambda d: rand.choice([1]), + 'max_lazy' : lambda d: rand.choice([4, 5, 6, 7, 8, 9, 10 ]), + + # Note: long_enough only refers to small matching and has no effect if + # small_chain == 1. + 'long_enough' : lambda d: rand.choice([4]), + + # -N + 'nocompress' : lambda d: rand.choice(['false']), + + # -T + 'altcode' : lambda d: rand.choice(['false']), + + # -S djw + 'djw' : lambda d: rand.choice(['false']), + + # Memory costs: + + # -W + 'winsize' : lambda d: 8 * (1<<20), + + # -B + 'srcwinsize' : lambda d: 64 * (1<<20), + + # -I 0 is unlimited + 'iopt' : lambda d: 0, + + # -P only powers of two + 'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]), + } +#end + +# +TMPDIR = '/tmp/xd3regtest.%d' % os.getpid() + +RUNFILE = os.path.join(TMPDIR, 'run') +DFILE = os.path.join(TMPDIR, 'output') +RFILE = os.path.join(TMPDIR, 'recon') +CMPTMP1 = os.path.join(TMPDIR, 'cmptmp1') +CMPTMP2 = os.path.join(TMPDIR, 'cmptmp2') + +HEAD_STATE = 0 +BAR_STATE = 1 +REV_STATE = 2 +DATE_STATE = 3 + +# +IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*') + +# rcs output +RE_TOTREV = re.compile('total revisions: (\\d+)') +RE_BAR = re.compile('----------------------------') +RE_REV = re.compile('revision (.+)') +RE_DATE = re.compile('date: ([^;]+);.*') +# xdelta output +RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)') +RE_EXTCOMP = re.compile('XDELTA ext comp.*') + +def c2str(c): + return ' '.join(['%s' % x for x in c]) +#end + +def SumList(l): + return reduce(lambda x,y: x+y, l) +#end + +# returns (total, mean, stddev, q2 (median), +# (q3-q1)/2 ("semi-interquartile range"), max-min (spread)) +class StatList: + def __init__(self,l,desc): + cnt = len(l) + assert(cnt > 1) + l.sort() + self.cnt = cnt + self.l = l + self.total = SumList(l) + self.mean = self.total / float(self.cnt) + self.s = math.sqrt(SumList([(x-self.mean) * + (x - self.mean) for x in l]) / + float(self.cnt-1)) + self.q0 = l[0] + self.q1 = l[int(self.cnt/4.0+0.5)] + self.q2 = l[int(self.cnt/2.0+0.5)] + self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))] + self.q4 = l[self.cnt-1] + self.siqr = (self.q3-self.q1)/2.0; + self.spread = (self.q4-self.q0) + if len(l) == 1: + self.str = '%s %s' % (desc, l[0]) + else: + self.str = '%s mean %.1f: 25%-ile %d %d %d %d %d' % \ + (desc, self.mean, self.q0, self.q1, self.q2, self.q3, self.q4) + #end +#end + +def RunCommand(args, ok = [0]): + #print 'run command %s' % (' '.join(args)) + p = os.spawnvp(os.P_WAIT, args[0], args) + if p not in ok: + raise CommandError(args, 'exited %d' % p) + #end +#end + +def RunCommandIO(args,infn,outfn): + p = os.fork() + if p == 0: + os.dup2(os.open(infn,os.O_RDONLY),0) + os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1) + os.execvp(args[0], args) + else: + s = os.waitpid(p,0) + o = os.WEXITSTATUS(s[1]) + if not os.WIFEXITED(s[1]) or o != 0: + raise CommandError(args, 'exited %d' % o) + #end + #end +#end + +class TimedTest: + def __init__(self, target, source, runnable, + skip_trials = SKIP_TRIALS, + min_trials = MIN_TRIALS, + max_trials = MAX_TRIALS, + min_stddev_pct = MIN_STDDEV_PCT): + self.target = target + self.source = source + self.runnable = runnable + + self.skip_trials = skip_trials + self.min_trials = min(min_trials, max_trials) + self.max_trials = max_trials + self.min_stddev_pct = min_stddev_pct + + self.encode_time = self.DoTest(DFILE, + lambda x: x.Encode(self.target, + self.source, DFILE)) + self.encode_size = runnable.EncodeSize(DFILE) + + self.decode_time = self.DoTest(RFILE, + lambda x: x.Decode(DFILE, + self.source, RFILE), + ) + runnable.Verify(self.target, RFILE) + #end + + def DoTest(self, fname, func): + trials = 0 + measured = [] + + while 1: + try: + os.remove(fname) + except OSError: + pass + + start_time = time.time() + start_clock = time.clock() + + func(self.runnable) + + total_clock = (time.clock() - start_clock) + total_time = (time.time() - start_time) + + elap_time = max(total_time, 0.0000001) + elap_clock = max(total_clock, 0.0000001) + + trials = trials + 1 + + # skip some of the first trials + if trials > self.skip_trials: + measured.append((elap_clock, elap_time)) + #print 'measurement total: %.1f ms' % (total_time * 1000.0) + + # at least so many + if trials < (self.skip_trials + self.min_trials): + #print 'continue: need more trials: %d' % trials + continue + + # compute %variance + done = 0 + if self.skip_trials + self.min_trials <= 2: + measured = measured + measured; + done = 1 + #end + + time_stat = StatList([x[1] for x in measured], 'elap time') + sp = float(time_stat.s) / float(time_stat.mean) + + # what if MAX_TRIALS is exceeded? + too_many = (trials - self.skip_trials) >= self.max_trials + good = (100.0 * sp) < self.min_stddev_pct + if done or too_many or good: + trials = trials - self.skip_trials + if not done and not good: + #print 'too many trials: %d' % trials + pass + #clock = StatList([x[0] for x in measured], 'elap clock') + return time_stat + #end + #end + #end +#end + +def Decimals(start, end): + l = [] + step = start + while 1: + r = range(step, step * 10, step) + l = l + r + if step * 10 >= end: + l.append(step * 10) + break + step = step * 10 + return l +#end + +# This tests the raw speed of 0-byte inputs +def RunSpeedTest(): + for L in Decimals(MIN_RUN, MAX_RUN): + SetFileSize(RUNFILE, L) + + trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)])) + ReportSpeed(L, trx, '1MB ') + + trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)])) + ReportSpeed(L, trx, '512k') + + trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)])) + ReportSpeed(L, trx, '256k') + + trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE)) + ReportSpeed(L, trm, 'swig') + + trg = TimedTest(RUNFILE, None, GzipRun1()) + ReportSpeed(L,trg,'gzip') + #end +#end + +def SetFileSize(F,L): + fd = os.open(F, os.O_CREAT | os.O_WRONLY) + os.ftruncate(fd,L) + assert os.fstat(fd).st_size == L + os.close(fd) +#end + +def ReportSpeed(L,tr,desc): + print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \ + (desc, L, + tr.encode_size, + tr.encode_time.mean * 1000.0, + tr.decode_time.mean * 1000.0) +#end + +class Xdelta3RunClass: + def __init__(self, extra): + self.extra = extra + #end + + def __str__(self): + return ' '.join(self.extra) + #end + + def New(self): + return Xdelta3Runner(self.extra) + #end +#end + +class Xdelta3Runner: + # Use "forkexec" to get special command-line only features like + # external compression support. + def __init__(self, extra, forkexec=False): + self.forkexec = forkexec + self.extra = extra + #end + + def Encode(self, target, source, output): + args = (ALL_ARGS + + self.extra + + ['-e']) + if source: + args.append('-s') + args.append(source) + #end + args = args + [target, output] + self.Main(args) + #end + + def Decode(self, input, source, output): + args = (ALL_ARGS + + ['-d']) + if source: + args.append('-s') + args.append(source) + #end + args = args + [input, output] + self.Main(args) + #end + + def Verify(self, target, recon): + if target[-3:] == ".gz": + RunCommandIO(('gzip', '-dc'), target, CMPTMP1) + RunCommandIO(('gzip', '-dc'), recon, CMPTMP2) + RunCommand(('cmp', CMPTMP1, CMPTMP2)) + else: + RunCommand(('cmp', target, recon)) + #end + + def EncodeSize(self, output): + return os.stat(output).st_size + #end + + def Main(self, args): + try: + if self.forkexec: + RunCommand(['../xdelta3'] + args) + else: + xdelta3.xd3_main_cmdline(args) + except Exception, e: + raise CommandError(args, "xdelta3.main exception: %s" % e) + #end + #end +#end + +class Xdelta3Mod1: + def __init__(self, file): + self.target_data = open(file, 'r').read() + #end + + def Encode(self, ignore1, ignore2, ignore3): + r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10) + if r1 != 0: + raise CommandError('memory', 'encode failed: %s' % r1) + #end + self.encoded = encoded + #end + + def Decode(self, ignore1, ignore2, ignore3): + r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data)) + if r2 != 0: + raise CommandError('memory', 'decode failed: %s' % r1) + #end + self.decoded = data1 + #end + + def Verify(self, ignore1, ignore2): + if self.target_data != self.decoded: + raise CommandError('memory', 'bad decode') + #end + #end + + def EncodeSize(self, ignore1): + return len(self.encoded) + #end +#end + +class GzipRun1: + def Encode(self, target, source, output): + assert source == None + RunCommandIO(['gzip', '-cf'], target, output) + #end + + def Decode(self, input, source, output): + assert source == None + RunCommandIO(['gzip', '-dcf'], input, output) + #end + + def Verify(self, target, recon): + RunCommand(('cmp', target, recon)) + #end + + def EncodeSize(self, output): + return os.stat(output).st_size + #end +#end + +class Xdelta1RunClass: + def __str__(self): + return 'xdelta1' + #end + + def New(self): + return Xdelta1Runner() + #end +#end + +class Xdelta1Runner: + def Encode(self, target, source, output): + assert source != None + args = ['xdelta1', 'delta', '-q', source, target, output] + RunCommand(args, [0, 1]) + #end + + def Decode(self, input, source, output): + assert source != None + args = ['xdelta1', 'patch', '-q', input, source, output] + # Note: for dumb historical reasons, xdelta1 returns 1 or 0 + RunCommand(args) + #end + + def Verify(self, target, recon): + RunCommand(('cmp', target, recon)) + #end + + def EncodeSize(self, output): + return os.stat(output).st_size + #end +#end + +# exceptions +class SkipRcsException: + def __init__(self,reason): + self.reason = reason + #end +#end + +class NotEnoughVersions: + def __init__(self): + pass + #end +#end + +class CommandError: + def __init__(self,cmd,str): + if type(cmd) is types.TupleType or \ + type(cmd) is types.ListType: + cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd) + #end + print 'command was: ',cmd + print 'command failed: ',str + print 'have fun debugging' + #end +#end + +class RcsVersion: + def __init__(self,vstr): + self.vstr = vstr + #end + def __cmp__(self,other): + return cmp(self.date, other.date) + #end + def __str__(self): + return str(self.vstr) + #end +#end + +class RcsFile: + + def __init__(self, fname): + self.fname = fname + self.versions = [] + self.state = HEAD_STATE + #end + + def SetTotRev(self,s): + self.totrev = int(s) + #end + + def Rev(self,s): + self.rev = RcsVersion(s) + if len(self.versions) >= self.totrev: + raise SkipRcsException('too many versions (in log messages)') + #end + self.versions.append(self.rev) + #end + + def Date(self,s): + self.rev.date = s + #end + + def Match(self, line, state, rx, gp, newstate, f): + if state == self.state: + m = rx.match(line) + if m: + if f: + f(m.group(gp)) + #end + self.state = newstate + return 1 + #end + #end + return None + #end + + def Sum1Rlog(self): + f = os.popen('rlog '+self.fname, "r") + l = f.readline() + while l: + if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev): + pass + elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None): + pass + elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev): + pass + elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date): + pass + #end + l = f.readline() + #end + c = f.close() + if c != None: + raise c + #end + #end + + def Sum1(self): + st = os.stat(self.fname) + self.rcssize = st.st_size + self.Sum1Rlog() + if self.totrev != len(self.versions): + raise SkipRcsException('wrong version count') + #end + self.versions.sort() + #end + + def Checkout(self,n): + v = self.versions[n] + out = open(self.Verf(n), "w") + cmd = 'co -ko -p%s %s' % (v.vstr, self.fname) + total = 0 + (inf, + stream, + err) = os.popen3(cmd, "r") + inf.close() + buf = stream.read() + while buf: + total = total + len(buf) + out.write(buf) + buf = stream.read() + #end + v.vsize = total + estr = '' + buf = err.read() + while buf: + estr = estr + buf + buf = err.read() + #end + if stream.close(): + raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr)) + #end + out.close() + err.close() + #end + + def Vdate(self,n): + return self.versions[n].date + #end + + def Vstr(self,n): + return self.versions[n].vstr + #end + + def Verf(self,n): + return os.path.join(TMPDIR, 'input.%d' % n) + #end + + def FilePairsByDate(self, runclass): + if self.totrev < 2: + raise NotEnoughVersions() + #end + self.Checkout(0) + ntrials = [] + if self.totrev < 2: + return vtrials + #end + for v in range(0,self.totrev-1): + if v > 1: + os.remove(self.Verf(v-1)) + #end + self.Checkout(v+1) + if os.stat(self.Verf(v)).st_size < MIN_SIZE or \ + os.stat(self.Verf(v+1)).st_size < MIN_SIZE: + continue + #end + + result = TimedTest(self.Verf(v+1), + self.Verf(v), + runclass.New()) + + target_size = os.stat(self.Verf(v+1)).st_size + + ntrials.append(result) + #end + + os.remove(self.Verf(self.totrev-1)) + os.remove(self.Verf(self.totrev-2)) + return ntrials + #end + + def AppendVersion(self, f, n): + self.Checkout(n) + rf = open(self.Verf(n), "r") + data = rf.read() + f.write(data) + rf.close() + return len(data) + #end + +class RcsFinder: + def __init__(self): + self.subdirs = [] + self.rcsfiles = [] + self.others = [] + self.skipped = [] + self.biground = 0 + #end + + def Scan1(self,dir): + dents = os.listdir(dir) + subdirs = [] + rcsfiles = [] + others = [] + for dent in dents: + full = os.path.join(dir, dent) + if os.path.isdir(full): + subdirs.append(full) + elif dent[len(dent)-2:] == ",v": + rcsfiles.append(RcsFile(full)) + else: + others.append(full) + #end + #end + self.subdirs = self.subdirs + subdirs + self.rcsfiles = self.rcsfiles + rcsfiles + self.others = self.others + others + return subdirs + #end + + def Crawl(self, dir): + subdirs = [dir] + while subdirs: + s1 = self.Scan1(subdirs[0]) + subdirs = subdirs[1:] + s1 + #end + #end + + def Summarize(self): + good = [] + for rf in self.rcsfiles: + try: + rf.Sum1() + if rf.totrev < 2: + raise SkipRcsException('too few versions (< 2)') + #end + except SkipRcsException, e: + #print 'skipping file %s: %s' % (rf.fname, e.reason) + self.skipped.append(rf) + else: + good.append(rf) + #end + self.rcsfiles = good + #end + + def AllPairsByDate(self, runclass): + results = [] + good = [] + for rf in self.rcsfiles: + try: + results = results + rf.FilePairsByDate(runclass) + except SkipRcsException: + print 'file %s has compressed versions: skipping' % (rf.fname) + except NotEnoughVersions: + print 'testing %s on %s: not enough versions' % (runclass, rf.fname) + else: + good.append(rf) + #end + self.rcsfiles = good + self.ReportPairs(runclass, results) + return results + #end + + def ReportPairs(self, name, results): + encode_time = 0 + decode_time = 0 + encode_size = 0 + for r in results: + encode_time += r.encode_time.mean + decode_time += r.decode_time.mean + encode_size += r.encode_size + #end + print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \ + (name, encode_time, decode_time, encode_size) + #end + + def MakeBigFiles(self, rand): + f1 = open(TMPDIR + "/big.1", "w") + f2 = open(TMPDIR + "/big.2", "w") + population = [] + for file in self.rcsfiles: + if len(file.versions) < 2: + continue + population.append(file) + #end + f1sz = 0 + f2sz = 0 + fcount = int(len(population) * FILE_P) + assert fcount > 0 + for file in rand.sample(population, fcount): + m = IGNORE_FILENAME.match(file.fname) + if m != None: + continue + #end + r1, r2 = rand.sample(xrange(0, len(file.versions)), 2) + f1sz += file.AppendVersion(f1, r1) + f2sz += file.AppendVersion(f2, r2) + #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], + #file.Vstr(r1), file.Vstr(r2))) + #end + testkey = 'rcs%d' % self.biground + self.biground = self.biground + 1 + + print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz) + f1.close() + f2.close() + return (TMPDIR + "/big.1", + TMPDIR + "/big.2", + testkey) + #end + + def Generator(self): + return lambda rand: self.MakeBigFiles(rand) + #end +#end + +# find a set of RCS files for testing +def GetTestRcsFiles(): + rcsf = RcsFinder() + rcsf.Crawl(RCSDIR) + if len(rcsf.rcsfiles) == 0: + raise CommandError('', 'no RCS files') + #end + rcsf.Summarize() + print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % ( + len(rcsf.rcsfiles), + len(rcsf.subdirs), + len(rcsf.others), + len(rcsf.skipped)) + print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str + print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str + return rcsf +#end + +class SampleDataTest: + def __init__(self, dirs): + dirs_in = dirs + self.pairs = [] + while dirs: + d = dirs[0] + dirs = dirs[1:] + l = os.listdir(d) + files = [] + for e in l: + p = os.path.join(d, e) + if os.path.isdir(p): + dirs.append(p) + else: + files.append(p) + #end + #end + if len(files) > 1: + files.sort() + for x in xrange(len(files)): + for y in xrange(len(files)): + self.pairs.append((files[x], files[y], + '%s-%s' % (files[x], files[y]))) + #end + #end + #end + #end + print "Sample data test using %d file pairs in %s" % ( + len(self.pairs), dirs_in) + #end + + def Generator(self): + return lambda rand: rand.choice(self.pairs) + #end +#end + +# configs are represented as a list of values, +# program takes a list of strings: +def ConfigToArgs(config): + args = [ '-C', + ','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])] + for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)): + key = CONFIG_ARGMAP[CONFIG_ORDER[i]] + val = config[i] + if val == 'true' or val == 'false': + if val == 'true': + args.append('%s' % key) + #end + else: + args.append('%s=%s' % (key, val)) + #end + #end + return args +#end + +# +class RandomTest: + def __init__(self, tnum, tinput, config, syntuple = None): + self.mytinput = tinput[2] + self.myconfig = config + self.tnum = tnum + + if syntuple != None: + self.runtime = syntuple[0] + self.compsize = syntuple[1] + self.decodetime = None + else: + args = ConfigToArgs(config) + result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args)) + + self.runtime = result.encode_time.mean + self.compsize = result.encode_size + self.decodetime = result.decode_time.mean + #end + + self.score = None + self.time_pos = None + self.size_pos = None + self.score_pos = None + #end + + def __str__(self): + decodestr = ' %s' % self.decodetime + return 'time %.6f%s size %d%s << %s >>%s' % ( + self.time(), ((self.time_pos != None) and + (" (%s)" % self.time_pos) or ""), + self.size(), ((self.size_pos != None) and + (" (%s)" % self.size_pos) or ""), + c2str(self.config()), + decodestr) + #end + + def time(self): + return self.runtime + #end + + def size(self): + return self.compsize + #end + + def config(self): + return self.myconfig + #end + + def score(self): + return self.score + #end + + def tinput(self): + return self.mytinput + #end +#end + +def PosInAlist(l, e): + for i in range(0, len(l)): + if l[i][1] == e: + return i; + #end + #end + return -1 +#end + +# Generates a set of num_results test configurations, given the list of +# retest-configs. +def RandomTestConfigs(rand, input_configs, num_results): + + outputs = input_configs[:] + have_set = dict([(c,c) for c in input_configs]) + + # Compute a random configuration + def RandomConfig(): + config = [] + cmap = {} + for key in CONFIG_ORDER: + val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap) + config.append(val) + #end + return tuple(config) + #end + + while len(outputs) < num_results: + newc = None + for i in xrange(100): + c = RandomConfig() + if have_set.has_key(c): + continue + #end + have_set[c] = c + newc = c + break + if newc is None: + print 'stopped looking for configs at %d' % len(outputs) + break + #end + outputs.append(c) + #end + outputs.sort() + return outputs +#end + +def RunOptimizationLoop(rand, generator, rounds): + configs = [] + for rnum in xrange(rounds): + configs = RandomTestConfigs(rand, configs, MAX_RESULTS) + tinput = generator(rand) + tests = [] + for x in xrange(len(configs)): + t = RandomTest(x, tinput, configs[x]) + print 'Round %d test %d: %s' % (rnum, x, t) + tests.append(t) + #end + results = ScoreTests(tests) + + for r in results: + c = r.config() + if not test_all_config_results.has_key(c): + test_all_config_results[c] = [r] + else: + test_all_config_results[c].append(r) + #end + #end + + #GraphResults('expt%d' % rnum, results) + #GraphSummary('sum%d' % rnum, results) + + # re-test some fraction + configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]] + #end +#end + +# TODO: cleanup +test_all_config_results = {} + +def ScoreTests(results): + scored = [] + timed = [] + sized = [] + + t_min = float(min([test.time() for test in results])) + #t_max = float(max([test.time() for test in results])) + s_min = float(min([test.size() for test in results])) + #s_max = float(max([test.size() for test in results])) + + for test in results: + + # Hyperbolic function. Smaller scores still better + red = 0.999 # minimum factors for each dimension are 1/1000 + test.score = ((test.size() - s_min * red) * + (test.time() - t_min * red)) + + scored.append((test.score, test)) + timed.append((test.time(), test)) + sized.append((test.size(), test)) + #end + + scored.sort() + timed.sort() + sized.sort() + + best_by_size = [] + best_by_time = [] + + pos = 0 + for (score, test) in scored: + pos += 1 + test.score_pos = pos + #end + + scored = [x[1] for x in scored] + + for test in scored: + test.size_pos = PosInAlist(sized, test) + test.time_pos = PosInAlist(timed, test) + #end + + for test in scored: + c = test.config() + s = 0.0 + print 'H-Score: %0.9f %s' % (test.score, test) + #end + + return scored +#end + +def GraphResults(desc, results): + f = open("data-%s.csv" % desc, "w") + for r in results: + f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r)) + #end + f.close() + os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc)) +#end + +def GraphSummary(desc, results_ignore): + test_population = 0 + config_ordered = [] + + # drops duplicate test/config pairs (TODO: don't retest them) + for config, cresults in test_all_config_results.items(): + input_config_map = {} + uniq = [] + for test in cresults: + assert test.config() == config + test_population += 1 + key = test.tinput() + if not input_config_map.has_key(key): + input_config_map[key] = {} + #end + if input_config_map[key].has_key(config): + print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test) + continue + #end + input_config_map[key][config] = test + uniq.append(test) + #end + config_ordered.append(uniq) + #end + + # sort configs descending by number of tests + config_ordered.sort(lambda x, y: len(y) - len(x)) + + print 'population %d: %d configs %d results' % \ + (test_population, + len(config_ordered), + len(config_ordered[0])) + + if config_ordered[0] == 1: + return + #end + + # a map from test-key to test-list w/ various configs + input_set = {} + osize = len(config_ordered) + + for i in xrange(len(config_ordered)): + config = config_ordered[i][0].config() + config_tests = config_ordered[i] + + #print '%s has %d tested inputs' % (config, len(config_tests)) + + if len(input_set) == 0: + input_set = dict([(t.tinput(), [t]) for t in config_tests]) + continue + #end + + # a map from test-key to test-list w/ various configs + update_set = {} + for r in config_tests: + t = r.tinput() + if input_set.has_key(t): + update_set[t] = input_set[t] + [r] + else: + #print 'config %s does not have test %s' % (config, t) + pass + #end + #end + + if len(update_set) <= 1: + break + #end + + input_set = update_set + + # continue if there are more w/ the same number of inputs + if i < (len(config_ordered) - 1) and \ + len(config_ordered[i + 1]) == len(config_tests): + continue + #end + + # synthesize results for multi-test inputs + config_num = None + + # map of config to sum(various test-keys) + smap = {} + for (key, tests) in input_set.items(): + if config_num == None: + # config_num should be the same in all elements + config_num = len(tests) + smap = dict([(r.config(), + (r.time(), + r.size())) + for r in tests]) + else: + # compuate the per-config sum of time/size + assert config_num == len(tests) + smap = dict([(r.config(), + (smap[r.config()][0] + r.time(), + smap[r.config()][1] + r.size())) + for r in tests]) + #end + #end + + if config_num == 1: + continue + #end + + if len(input_set) == osize: + break + #end + + summary = '%s-%d' % (desc, len(input_set)) + osize = len(input_set) + + print 'generate %s w/ %d configs' % (summary, config_num) + syn = [RandomTest(0, (None, None, summary), config, + syntuple = (smap[config][0], smap[config][1])) + for config in smap.keys()] + syn = ScoreTests(syn) + #print 'smap is %s' % (smap,) + #print 'syn is %s' % (' and '.join([str(x) for x in syn])) + #GraphResults(summary, syn) + #end +#end + +def RunRegressionTest(pairs, rounds): + for args in [ + [], + ['-S=djw'], + ['-B=412907520'], + ['-B 412907520', ], + + ]: + print "Args %s" % (args) + for (file1, file2, testkey) in pairs: + ttest = TimedTest(file1, file2, Xdelta3Runner(args, forkexec=True), + skip_trials = 0, + min_trials = 1, + max_trials = 1) + print "Source %s\nTarget %s\nEncode %s\nDecode %s\nSize %s\n\n" % ( + file1, file2, + ttest.encode_time.str, + ttest.decode_time.str, + ttest.encode_size) + #end +#end + +if __name__ == "__main__": + try: + RunCommand(['rm', '-rf', TMPDIR]) + os.mkdir(TMPDIR) + + #rcsf = GetTestRcsFiles() + #generator = rcsf.Generator() + + sample = SampleDataTest([SAMPLEDIR]) + generator = sample.Generator() + + rand = random.Random(135135135135135) + + RunRegressionTest(sample.pairs, TEST_ROUNDS) + + #RunSpeedTest() + + # the idea below is to add the default configurations and + # xdelta1 to the optimization loop: + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-3', '-6'])) + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9'])) + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw'])) + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-S', 'djw'])) + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T'])) + #x1r = rcsf.AllPairsByDate(Xdelta1RunClass()) + + except CommandError: + pass + else: + RunCommand(['rm', '-rf', TMPDIR]) + pass + #end +#end diff --git a/lib/xdelta3/testing/xdelta3-test.py b/lib/xdelta3/testing/xdelta3-test.py new file mode 100644 index 0000000..468db24 --- /dev/null +++ b/lib/xdelta3/testing/xdelta3-test.py @@ -0,0 +1,153 @@ +#!/usr/bin/python2.7 +# xdelta3 - delta compression tools and library -*- Mode: C++ -*- +# Copyright 2016 Joshua MacDonald +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import xdelta3 + +# the test data section is expected to be len('target') +source = 'source source input0 source source' +target = 'source source target source source' + +# +# + +print 'encode: basic ...' +result, patch = xdelta3.xd3_encode_memory(target, source, 50) + +assert result == 0 +assert len(patch) < len(source) + +print 'encode: adler32 ...' +result, patch_adler32 = xdelta3.xd3_encode_memory(target, source, 50, + xdelta3.XD3_ADLER32) + +assert result == 0 +assert len(patch_adler32) < len(source) +assert len(patch_adler32) > len(patch) + +print 'encode: secondary ...' +result, patch_djw = xdelta3.xd3_encode_memory(target, source, 50, + xdelta3.XD3_SEC_DJW) + +assert result == 0 +# secondary compression doesn't help +assert len(patch_djw) > len(patch) + +print 'encode: exact ...' +result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch)) + +assert result == 0 +assert len(ignore) < len(source) + +print 'encode: out of space ...' +result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch) - 1) + +assert result == 28 +assert ignore == None + +print 'encode: zero space ...' +result, ignore = xdelta3.xd3_encode_memory(target, source, 0) + +assert result == 28 +assert ignore == None + +print 'encode: no source ...' +result, zdata = xdelta3.xd3_encode_memory(target, None, 50) + +assert result == 0 +assert len(zdata) > len(patch) + +print 'encode: no input ...' +result, ignore = xdelta3.xd3_encode_memory(None, None, 50) + +assert result != 0 + +print 'decode: basic ...' +result, target1 = xdelta3.xd3_decode_memory(patch, source, len(target)) + +assert result == 0 +assert len(target1) == len(target) +assert target1 == target + +print 'decode: out of space ...' +result, ignore = xdelta3.xd3_decode_memory(patch, source, len(target) - 1) + +assert result == 28 +assert ignore == None + +print 'decode: zero space ...' +result, ignore = xdelta3.xd3_decode_memory(patch, source, 0) + +assert result == 28 +assert ignore == None + +print 'decode: single byte error ...' +# a few expected single-byte errors, e.g., unused address cache bits, see +# xdelta3-test.h's single-bit error tests +extra_count = 4 +noverify_count = 0 +for corrupt_pos in range(len(patch_adler32)): + input = ''.join([j == corrupt_pos and '\xff' or patch_adler32[j] + for j in range(len(patch_adler32))]) + + result, ignore = xdelta3.xd3_decode_memory(input, source, len(target), 0) + assert result == -17712 + assert ignore == None + + # without adler32 verification, the error may be in the data section which + # in this case is 6 bytes 'target' + result, corrupt = xdelta3.xd3_decode_memory(input, source, len(target), + xdelta3.XD3_ADLER32_NOVER) + if result == 0: + noverify_count = noverify_count + 1 + #print "got %s" % corrupt + #end +#end +assert noverify_count == len('target') + extra_count + +print 'decode: no source ...' +result, target2 = xdelta3.xd3_decode_memory(zdata, None, len(target)) + +assert result == 0 +assert target == target2 + +# Test compression level setting via flags. assumes a 9 byte checksum +# and that level 9 steps 2, level 1 steps 15: +# 01234567890123456789012345678901 +# level 1 only indexes 2 checksums "abcdefghi" and "ABCDEFGHI" +# outputs 43 vs. 23 bytes +print 'encode: compression level ...' + +source = '_la_la_abcdefghi_la_la_ABCDEFGHI' +target = 'la_la_ABCDEFGH__la_la_abcdefgh__' + +result1, level1 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_1) +result9, level9 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_9) + +assert result1 == 0 and result9 == 0 +assert len(level1) > len(level9) + +# +# Issue 65 +print 'encode: 65 ...' +source = 'Hello World' +target = 'Hello everyone' +result, patch = xdelta3.xd3_encode_memory(target, source, len(target)) +assert result != 0 + +result, patch = xdelta3.xd3_encode_memory(target, source, 2 * len(target)) +assert result == 0 + +print 'PASS' diff --git a/lib/xdelta3/xdelta3-blkcache.h b/lib/xdelta3/xdelta3-blkcache.h new file mode 100644 index 0000000..f7b1d59 --- /dev/null +++ b/lib/xdelta3/xdelta3-blkcache.h @@ -0,0 +1,557 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "xdelta3-internal.h" + +typedef struct _main_blklru main_blklru; +typedef struct _main_blklru_list main_blklru_list; + + +#define XD3_INVALID_OFFSET XOFF_T_MAX + +struct _main_blklru_list +{ + main_blklru_list *next; + main_blklru_list *prev; +}; + +struct _main_blklru +{ + uint8_t *blk; + xoff_t blkno; + usize_t size; + main_blklru_list link; +}; + +XD3_MAKELIST(main_blklru_list,main_blklru,link); + +static usize_t lru_size = 0; +static main_blklru *lru = NULL; /* array of lru_size elts */ +static main_blklru_list lru_list; +static int do_src_fifo = 0; /* set to avoid lru */ + +static int lru_hits = 0; +static int lru_misses = 0; +static int lru_filled = 0; + +static void main_lru_reset (void) +{ + lru_size = 0; + lru = NULL; + do_src_fifo = 0; + lru_hits = 0; + lru_misses = 0; + lru_filled = 0; +} + +static void main_lru_cleanup (void) +{ + if (lru != NULL) + { + main_buffree (lru[0].blk); + } + + main_free (lru); + lru = NULL; + + lru_hits = 0; + lru_misses = 0; + lru_filled = 0; +} + +/* This is called at different times for encoding and decoding. The + * encoder calls it immediately, the decoder delays until the + * application header is received. */ +static int +main_set_source (xd3_stream *stream, xd3_cmd cmd, + main_file *sfile, xd3_source *source) +{ + int ret = 0; + usize_t i; + xoff_t source_size = 0; + usize_t blksize; + + XD3_ASSERT (lru == NULL); + XD3_ASSERT (stream->src == NULL); + XD3_ASSERT (option_srcwinsz >= XD3_MINSRCWINSZ); + + /* TODO: this code needs refactoring into FIFO, LRU, FAKE. Yuck! + * This is simplified from 3.0z which had issues with sizing the + * source buffer memory allocation and the source blocksize. */ + + /* LRU-specific */ + main_blklru_list_init (& lru_list); + + if (allow_fake_source) + { + /* TODO: refactor + * TOOLS/recode-specific: Check "allow_fake_source" mode looks + * broken now. */ + sfile->mode = XO_READ; + sfile->realname = sfile->filename; + sfile->nread = 0; + } + else + { + /* Either a regular file (possibly compressed) or a FIFO + * (possibly compressed). */ + if ((ret = main_file_open (sfile, sfile->filename, XO_READ))) + { + return ret; + } + + /* If the file is regular we know it's size. If the file turns + * out to be externally compressed, size_known may change. */ + sfile->size_known = (main_file_stat (sfile, &source_size) == 0); + } + + /* Note: The API requires a power-of-two blocksize and srcwinsz + * (-B). The logic here will use a single block if the entire file + * is known to fit into srcwinsz. */ + option_srcwinsz = xd3_xoff_roundup (option_srcwinsz); + + /* Though called "lru", it is not LRU-specific. We always allocate + * a maximum number of source block buffers. If the entire file + * fits into srcwinsz, this buffer will stay as the only + * (lru_size==1) source block. Otherwise, we know that at least + * option_srcwinsz bytes are available. Split the source window + * into buffers. */ + if ((lru = (main_blklru*) main_malloc (MAX_LRU_SIZE * + sizeof (main_blklru))) == NULL) + { + ret = ENOMEM; + return ret; + } + + memset (lru, 0, sizeof(lru[0]) * MAX_LRU_SIZE); + + /* Allocate the entire buffer. */ + if ((lru[0].blk = (uint8_t*) main_bufalloc (option_srcwinsz)) == NULL) + { + ret = ENOMEM; + return ret; + } + + /* Main calls main_getblk_func() once before xd3_set_source(). This + * is the point at which external decompression may begin. Set the + * system for a single block. */ + lru_size = 1; + lru[0].blkno = XD3_INVALID_OFFSET; + blksize = option_srcwinsz; + main_blklru_list_push_back (& lru_list, & lru[0]); + XD3_ASSERT (blksize != 0); + + /* Initialize xd3_source. */ + source->blksize = blksize; + source->name = sfile->filename; + source->ioh = sfile; + source->curblkno = XD3_INVALID_OFFSET; + source->curblk = NULL; + source->max_winsize = option_srcwinsz; + + if ((ret = main_getblk_func (stream, source, 0)) != 0) + { + XPR(NT "error reading source: %s: %s\n", + sfile->filename, + xd3_mainerror (ret)); + return ret; + } + + source->onblk = lru[0].size; /* xd3 sets onblk */ + + /* If the file is smaller than a block, size is known. */ + if (!sfile->size_known && source->onblk < blksize) + { + source_size = source->onblk; + source->onlastblk = source_size; + sfile->size_known = 1; + } + + /* If the size is not known or is greater than the buffer size, we + * split the buffer across MAX_LRU_SIZE blocks (already allocated in + * "lru"). */ + if (!sfile->size_known || source_size > option_srcwinsz) + { + /* Modify block 0, change blocksize. */ + blksize = option_srcwinsz / MAX_LRU_SIZE; + source->blksize = blksize; + source->onblk = blksize; + source->onlastblk = blksize; + source->max_blkno = MAX_LRU_SIZE - 1; + + lru[0].size = blksize; + lru_size = MAX_LRU_SIZE; + + /* Setup rest of blocks. */ + for (i = 1; i < lru_size; i += 1) + { + lru[i].blk = lru[0].blk + (blksize * i); + lru[i].blkno = i; + lru[i].size = blksize; + main_blklru_list_push_back (& lru_list, & lru[i]); + } + } + + if (! sfile->size_known) + { + /* If the size is not know, we must use FIFO discipline. */ + do_src_fifo = 1; + } + + /* Call the appropriate set_source method, handle errors, print + * verbose message, etc. */ + if (sfile->size_known) + { + ret = xd3_set_source_and_size (stream, source, source_size); + } + else + { + ret = xd3_set_source (stream, source); + } + + if (ret) + { + XPR(NT XD3_LIB_ERRMSG (stream, ret)); + return ret; + } + + XD3_ASSERT (stream->src == source); + XD3_ASSERT (source->blksize == blksize); + + if (option_verbose) + { + static shortbuf srcszbuf; + static shortbuf srccntbuf; + static shortbuf winszbuf; + static shortbuf blkszbuf; + static shortbuf nbufs; + + if (sfile->size_known) + { + short_sprintf (srcszbuf, "source size %s [%"Q"u]", + main_format_bcnt (source_size, &srccntbuf), + source_size); + } + else + { + short_sprintf (srcszbuf, "%s", "source size unknown"); + } + + nbufs.buf[0] = 0; + + if (option_verbose > 1) + { + short_sprintf (nbufs, " #bufs %"W"u", lru_size); + } + + XPR(NT "source %s %s blksize %s window %s%s%s\n", + sfile->filename, + srcszbuf.buf, + main_format_bcnt (blksize, &blkszbuf), + main_format_bcnt (option_srcwinsz, &winszbuf), + nbufs.buf, + do_src_fifo ? " (FIFO)" : ""); + } + + return 0; +} + +static int +main_getblk_lru (xd3_source *source, xoff_t blkno, + main_blklru** blrup, int *is_new) +{ + main_blklru *blru = NULL; + usize_t i; + + (*is_new) = 0; + + if (do_src_fifo) + { + /* Direct lookup assumes sequential scan w/o skipping blocks. */ + int idx = blkno % lru_size; + blru = & lru[idx]; + if (blru->blkno == blkno) + { + (*blrup) = blru; + return 0; + } + /* No going backwards in a sequential scan. */ + if (blru->blkno != XD3_INVALID_OFFSET && blru->blkno > blkno) + { + return XD3_TOOFARBACK; + } + } + else + { + /* Sequential search through LRU. */ + for (i = 0; i < lru_size; i += 1) + { + blru = & lru[i]; + if (blru->blkno == blkno) + { + main_blklru_list_remove (blru); + main_blklru_list_push_back (& lru_list, blru); + (*blrup) = blru; + IF_DEBUG1 (DP(RINT "[getblk_lru] HIT blkno = %"Q"u lru_size=%"W"u\n", + blkno, lru_size)); + return 0; + } + } + IF_DEBUG1 (DP(RINT "[getblk_lru] MISS blkno = %"Q"u lru_size=%"W"u\n", + blkno, lru_size)); + } + + if (do_src_fifo) + { + int idx = blkno % lru_size; + blru = & lru[idx]; + } + else + { + XD3_ASSERT (! main_blklru_list_empty (& lru_list)); + blru = main_blklru_list_pop_front (& lru_list); + main_blklru_list_push_back (& lru_list, blru); + } + + lru_filled += 1; + (*is_new) = 1; + (*blrup) = blru; + blru->blkno = XD3_INVALID_OFFSET; + return 0; +} + +static int +main_read_seek_source (xd3_stream *stream, + xd3_source *source, + xoff_t blkno) { + xoff_t pos = blkno * source->blksize; + main_file *sfile = (main_file*) source->ioh; + main_blklru *blru; + int is_new; + size_t nread = 0; + int ret = 0; + + if (!sfile->seek_failed) + { + ret = main_file_seek (sfile, pos); + + if (ret == 0) + { + sfile->source_position = pos; + } + } + + if (sfile->seek_failed || ret != 0) + { + /* For an unseekable file (or other seek error, does it + * matter?) */ + if (sfile->source_position > pos) + { + /* Could assert !IS_ENCODE(), this shouldn't happen + * because of do_src_fifo during encode. */ + if (!option_quiet) + { + XPR(NT "source can't seek backwards; requested block offset " + "%"Q"u source position is %"Q"u\n", + pos, sfile->source_position); + } + + sfile->seek_failed = 1; + stream->msg = "non-seekable source: " + "copy is too far back (try raising -B)"; + return XD3_TOOFARBACK; + } + + /* There's a chance here, that an genuine lseek error will cause + * xdelta3 to shift into non-seekable mode, entering a degraded + * condition. */ + if (!sfile->seek_failed && option_verbose) + { + XPR(NT "source can't seek, will use FIFO for %s\n", + sfile->filename); + + if (option_verbose > 1) + { + XPR(NT "seek error at offset %"Q"u: %s\n", + pos, xd3_mainerror (ret)); + } + } + + sfile->seek_failed = 1; + + if (option_verbose > 1 && pos != sfile->source_position) + { + XPR(NT "non-seekable source skipping %"Q"u bytes @ %"Q"u\n", + pos - sfile->source_position, + sfile->source_position); + } + + while (sfile->source_position < pos) + { + xoff_t skip_blkno; + usize_t skip_offset; + + xd3_blksize_div (sfile->source_position, source, + &skip_blkno, &skip_offset); + + /* Read past unused data */ + XD3_ASSERT (pos - sfile->source_position >= source->blksize); + XD3_ASSERT (skip_offset == 0); + + if ((ret = main_getblk_lru (source, skip_blkno, + & blru, & is_new))) + { + return ret; + } + + XD3_ASSERT (is_new); + blru->blkno = skip_blkno; + + if ((ret = main_read_primary_input (sfile, + (uint8_t*) blru->blk, + source->blksize, + & nread))) + { + return ret; + } + + if (nread != source->blksize) + { + IF_DEBUG1 (DP(RINT "[getblk] short skip block nread = %"Z"u\n", + nread)); + stream->msg = "non-seekable input is short"; + return XD3_INVALID_INPUT; + } + + sfile->source_position += nread; + blru->size = nread; + + IF_DEBUG1 (DP(RINT "[getblk] skip blkno %"Q"u size %"W"u\n", + skip_blkno, blru->size)); + + XD3_ASSERT (sfile->source_position <= pos); + } + } + + return 0; +} + +/* This is the callback for reading a block of source. This function + * is blocking and it implements a small LRU. + * + * Note that it is possible for main_input() to handle getblk requests + * in a non-blocking manner. If the callback is NULL then the caller + * of xd3_*_input() must handle the XD3_GETSRCBLK return value and + * fill the source in the same way. See xd3_getblk for details. To + * see an example of non-blocking getblk, see xdelta-test.h. */ +static int +main_getblk_func (xd3_stream *stream, + xd3_source *source, + xoff_t blkno) +{ + int ret = 0; + xoff_t pos = blkno * source->blksize; + main_file *sfile = (main_file*) source->ioh; + main_blklru *blru; + int is_new; + size_t nread = 0; + + if (allow_fake_source) + { + source->curblkno = blkno; + source->onblk = 0; + source->curblk = lru[0].blk; + lru[0].size = 0; + return 0; + } + + if ((ret = main_getblk_lru (source, blkno, & blru, & is_new))) + { + return ret; + } + + if (!is_new) + { + source->curblkno = blkno; + source->onblk = blru->size; + source->curblk = blru->blk; + lru_hits++; + return 0; + } + + lru_misses += 1; + + if (pos != sfile->source_position) + { + /* Only try to seek when the position is wrong. This means the + * decoder will fail when the source buffer is too small, but + * only when the input is non-seekable. */ + if ((ret = main_read_seek_source (stream, source, blkno))) + { + return ret; + } + } + + XD3_ASSERT (sfile->source_position == pos); + + if ((ret = main_read_primary_input (sfile, + (uint8_t*) blru->blk, + source->blksize, + & nread))) + { + return ret; + } + + /* Save the last block read, used to handle non-seekable files. */ + sfile->source_position = pos + nread; + + if (option_verbose > 3) + { + if (blru->blkno != XD3_INVALID_OFFSET) + { + if (blru->blkno != blkno) + { + XPR(NT "source block %"Q"u read %"Z"u ejects %"Q"u (lru_hits=%u, " + "lru_misses=%u, lru_filled=%u)\n", + blkno, nread, blru->blkno, lru_hits, lru_misses, lru_filled); + } + else + { + XPR(NT "source block %"Q"u read %"Z"u (lru_hits=%u, " + "lru_misses=%u, lru_filled=%u)\n", + blkno, nread, lru_hits, lru_misses, lru_filled); + } + } + else + { + XPR(NT "source block %"Q"u read %"Z"u (lru_hits=%u, lru_misses=%u, " + "lru_filled=%u)\n", blkno, nread, + lru_hits, lru_misses, lru_filled); + } + } + + source->curblk = blru->blk; + source->curblkno = blkno; + source->onblk = nread; + blru->size = nread; + blru->blkno = blkno; + + IF_DEBUG1 (DP(RINT "[main_getblk] blkno %"Q"u onblk %"Z"u pos %"Q"u " + "srcpos %"Q"u\n", + blkno, nread, pos, sfile->source_position)); + + return 0; +} diff --git a/lib/xdelta3/xdelta3-cfgs.h b/lib/xdelta3/xdelta3-cfgs.h new file mode 100644 index 0000000..84a2221 --- /dev/null +++ b/lib/xdelta3/xdelta3-cfgs.h @@ -0,0 +1,171 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/****************************************************************** + SOFT string matcher + ******************************************************************/ + +#if XD3_BUILD_SOFT + +#define TEMPLATE soft +#define LLOOK stream->smatcher.large_look +#define LSTEP stream->smatcher.large_step +#define SLOOK stream->smatcher.small_look +#define SCHAIN stream->smatcher.small_chain +#define SLCHAIN stream->smatcher.small_lchain +#define MAXLAZY stream->smatcher.max_lazy +#define LONGENOUGH stream->smatcher.long_enough + +#define SOFTCFG 1 +#include "xdelta3.c" +#undef SOFTCFG + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +#define SOFTCFG 0 + +/************************************************************ + FASTEST string matcher + **********************************************************/ +#if XD3_BUILD_FASTEST +#define TEMPLATE fastest +#define LLOOK 9 +#define LSTEP 26 +#define SLOOK 4U +#define SCHAIN 1 +#define SLCHAIN 1 +#define MAXLAZY 6 +#define LONGENOUGH 6 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/************************************************************ + FASTER string matcher + **********************************************************/ +#if XD3_BUILD_FASTER +#define TEMPLATE faster +#define LLOOK 9 +#define LSTEP 15 +#define SLOOK 4U +#define SCHAIN 1 +#define SLCHAIN 1 +#define MAXLAZY 18 +#define LONGENOUGH 18 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/****************************************************** + FAST string matcher + ********************************************************/ +#if XD3_BUILD_FAST +#define TEMPLATE fast +#define LLOOK 9 +#define LSTEP 8 +#define SLOOK 4U +#define SCHAIN 4 +#define SLCHAIN 1 +#define MAXLAZY 18 +#define LONGENOUGH 35 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/************************************************** + SLOW string matcher + **************************************************************/ +#if XD3_BUILD_SLOW +#define TEMPLATE slow +#define LLOOK 9 +#define LSTEP 2 +#define SLOOK 4U +#define SCHAIN 44 +#define SLCHAIN 13 +#define MAXLAZY 90 +#define LONGENOUGH 70 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/******************************************************** + DEFAULT string matcher + ************************************************************/ +#if XD3_BUILD_DEFAULT +#define TEMPLATE default +#define LLOOK 9 +#define LSTEP 3 +#define SLOOK 4U +#define SCHAIN 8 +#define SLCHAIN 2 +#define MAXLAZY 36 +#define LONGENOUGH 70 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif diff --git a/lib/xdelta3/xdelta3-decode.h b/lib/xdelta3/xdelta3-decode.h new file mode 100644 index 0000000..a329591 --- /dev/null +++ b/lib/xdelta3/xdelta3-decode.h @@ -0,0 +1,1219 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _XDELTA3_DECODE_H_ +#define _XDELTA3_DECODE_H_ + +#include "xdelta3-internal.h" + +#define SRCORTGT(x) ((((x) & VCD_SRCORTGT) == VCD_SOURCE) ? \ + VCD_SOURCE : ((((x) & VCD_SRCORTGT) == \ + VCD_TARGET) ? VCD_TARGET : 0)) + +static inline int +xd3_decode_byte (xd3_stream *stream, usize_t *val) +{ + if (stream->avail_in == 0) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + (*val) = stream->next_in[0]; + + DECODE_INPUT (1); + return 0; +} + +static inline int +xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size) +{ + usize_t want; + usize_t take; + + /* Note: The case where (*pos == size) happens when a zero-length + * appheader or code table is transmitted, but there is nothing in + * the standard against that. */ + while (*pos < size) + { + if (stream->avail_in == 0) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + want = size - *pos; + take = xd3_min (want, stream->avail_in); + + memcpy (buf + *pos, stream->next_in, (size_t) take); + + DECODE_INPUT (take); + (*pos) += take; + } + + return 0; +} + +/* Initialize the decoder for a new window. The dec_tgtlen value is + * preserved across successive window decodings, and the update to + * dec_winstart is delayed until a new window actually starts. This + * is to avoid throwing an error due to overflow until the last + * possible moment. This makes it possible to encode exactly 4GB + * through a 32-bit encoder. */ +static int +xd3_decode_init_window (xd3_stream *stream) +{ + stream->dec_cpylen = 0; + stream->dec_cpyoff = 0; + stream->dec_cksumbytes = 0; + + xd3_init_cache (& stream->acache); + + return 0; +} + +/* Allocates buffer space for the target window and possibly the + * VCD_TARGET copy-window. Also sets the base of the two copy + * segments. */ +static int +xd3_decode_setup_buffers (xd3_stream *stream) +{ + /* If VCD_TARGET is set then the previous buffer may be reused. */ + if (stream->dec_win_ind & VCD_TARGET) + { + /* Note: this implementation is untested, since Xdelta3 itself + * does not implement an encoder for VCD_TARGET mode. Thus, mark + * unimplemented until needed. */ + if (1) + { + stream->msg = "VCD_TARGET not implemented"; + return XD3_UNIMPLEMENTED; + } + + /* But this implementation only supports copying from the last + * target window. If the offset is outside that range, it can't + * be done. */ + if (stream->dec_cpyoff < stream->dec_laststart) + { + stream->msg = "unsupported VCD_TARGET offset"; + return XD3_INVALID_INPUT; + } + + /* See if the two windows are the same. This indicates the + * first time VCD_TARGET is used. This causes a second buffer + * to be allocated, after that the two are swapped in the + * DEC_FINISH case. */ + if (stream->dec_lastwin == stream->next_out) + { + stream->next_out = NULL; + stream->space_out = 0; + } + + /* TODO: (See note above, this looks incorrect) */ + stream->dec_cpyaddrbase = stream->dec_lastwin + + (usize_t) (stream->dec_cpyoff - stream->dec_laststart); + } + + /* See if the current output window is large enough. */ + if (stream->space_out < stream->dec_tgtlen) + { + xd3_free (stream, stream->dec_buffer); + + stream->space_out = + xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE); + + if ((stream->dec_buffer = + (uint8_t*) xd3_alloc (stream, stream->space_out, 1)) == NULL) + { + return ENOMEM; + } + + stream->next_out = stream->dec_buffer; + } + + /* dec_tgtaddrbase refers to an invalid base address, but it is + * always used with a sufficiently large instruction offset (i.e., + * beyond the copy window). This condition is enforced by + * xd3_decode_output_halfinst. */ + stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen; + + return 0; +} + +static int +xd3_decode_allocate (xd3_stream *stream, + usize_t size, + uint8_t **buf_ptr, + usize_t *buf_alloc) +{ + IF_DEBUG2 (DP(RINT "[xd3_decode_allocate] size %"W"u alloc %"W"u\n", + size, *buf_alloc)); + + if (*buf_ptr != NULL && *buf_alloc < size) + { + xd3_free (stream, *buf_ptr); + *buf_ptr = NULL; + } + + if (*buf_ptr == NULL) + { + *buf_alloc = xd3_round_blksize (size, XD3_ALLOCSIZE); + + if ((*buf_ptr = (uint8_t*) xd3_alloc (stream, *buf_alloc, 1)) == NULL) + { + return ENOMEM; + } + } + + return 0; +} + +static int +xd3_decode_section (xd3_stream *stream, + xd3_desect *section, + xd3_decode_state nstate, + int copy) +{ + XD3_ASSERT (section->pos <= section->size); + XD3_ASSERT (stream->dec_state != nstate); + + if (section->pos < section->size) + { + usize_t sect_take; + + if (stream->avail_in == 0) + { + return XD3_INPUT; + } + + if ((copy == 0) && (section->pos == 0)) + { + /* No allocation/copy needed */ + section->buf = stream->next_in; + sect_take = section->size; + IF_DEBUG1 (DP(RINT "[xd3_decode_section] zerocopy %"W"u @ %"W"u avail %"W"u\n", + sect_take, section->pos, stream->avail_in)); + } + else + { + usize_t sect_need = section->size - section->pos; + + /* Allocate and copy */ + sect_take = xd3_min (sect_need, stream->avail_in); + + if (section->pos == 0) + { + int ret; + + if ((ret = xd3_decode_allocate (stream, + section->size, + & section->copied1, + & section->alloc1))) + { + return ret; + } + + section->buf = section->copied1; + } + + IF_DEBUG2 (DP(RINT "[xd3_decode_section] take %"W"u @ %"W"u [need %"W"u] avail %"W"u\n", + sect_take, section->pos, sect_need, stream->avail_in)); + XD3_ASSERT (section->pos + sect_take <= section->alloc1); + + memcpy (section->copied1 + section->pos, + stream->next_in, + sect_take); + } + + section->pos += sect_take; + + stream->dec_winbytes += sect_take; + + DECODE_INPUT (sect_take); + } + + if (section->pos < section->size) + { + IF_DEBUG1 (DP(RINT "[xd3_decode_section] further input required %"W"u\n", + section->size - section->pos)); + stream->msg = "further input required"; + return XD3_INPUT; + } + + XD3_ASSERT (section->pos == section->size); + + stream->dec_state = nstate; + section->buf_max = section->buf + section->size; + section->pos = 0; + return 0; +} + +/* Decode the size and address for half of an instruction (i.e., a + * single opcode). This updates the stream->dec_position, which are + * bytes already output prior to processing this instruction. Perform + * bounds checking for sizes and copy addresses, which uses the + * dec_position (which is why these checks are done here). */ +static int +xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst) +{ + int ret; + + /* If the size from the instruction table is zero then read a size value. */ + if ((inst->size == 0) && + (ret = xd3_read_size (stream, + & stream->inst_sect.buf, + stream->inst_sect.buf_max, + & inst->size))) + { + return XD3_INVALID_INPUT; + } + + /* For copy instructions, read address. */ + if (inst->type >= XD3_CPY) + { + IF_DEBUG2 ({ + static int cnt = 0; + XPR(NT "DECODE:%u: COPY at %"Q"u (winoffset %"W"u) " + "size %"W"u winaddr %"W"u\n", + cnt++, + stream->total_out + (stream->dec_position - + stream->dec_cpylen), + (stream->dec_position - stream->dec_cpylen), + inst->size, + inst->addr); + }); + + if ((ret = xd3_decode_address (stream, + stream->dec_position, + inst->type - XD3_CPY, + & stream->addr_sect.buf, + stream->addr_sect.buf_max, + & inst->addr))) + { + return ret; + } + + /* Cannot copy an address before it is filled-in. */ + if (inst->addr >= stream->dec_position) + { + stream->msg = "address too large"; + return XD3_INVALID_INPUT; + } + + /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining + * buffer space in its own segment. */ + if (inst->addr < stream->dec_cpylen && + inst->addr + inst->size > stream->dec_cpylen) + { + stream->msg = "size too large"; + return XD3_INVALID_INPUT; + } + } + else + { + IF_DEBUG2 ({ + if (inst->type == XD3_ADD) + { + static int cnt; + XPR(NT "DECODE:%d: ADD at %"Q"u (winoffset %"W"u) size %"W"u\n", + cnt++, + (stream->total_out + stream->dec_position - stream->dec_cpylen), + stream->dec_position - stream->dec_cpylen, + inst->size); + } + else + { + static int cnt; + XD3_ASSERT (inst->type == XD3_RUN); + XPR(NT "DECODE:%d: RUN at %"Q"u (winoffset %"W"u) size %"W"u\n", + cnt++, + stream->total_out + stream->dec_position - stream->dec_cpylen, + stream->dec_position - stream->dec_cpylen, + inst->size); + } + }); + } + + /* Check: The instruction will not overflow the output buffer. */ + if (stream->dec_position + inst->size > stream->dec_maxpos) + { + stream->msg = "size too large"; + return XD3_INVALID_INPUT; + } + + stream->dec_position += inst->size; + return 0; +} + +/* Decode a single opcode and then decode the two half-instructions. */ +static int +xd3_decode_instruction (xd3_stream *stream) +{ + int ret; + const xd3_dinst *inst; + + if (stream->inst_sect.buf == stream->inst_sect.buf_max) + { + stream->msg = "instruction underflow"; + return XD3_INVALID_INPUT; + } + + inst = &stream->code_table[*stream->inst_sect.buf++]; + + stream->dec_current1.type = inst->type1; + stream->dec_current2.type = inst->type2; + stream->dec_current1.size = inst->size1; + stream->dec_current2.size = inst->size2; + + /* For each instruction with a real operation, decode the + * corresponding size and addresses if necessary. Assume a + * code-table may have NOOP in either position, although this is + * unlikely. */ + if (inst->type1 != XD3_NOOP && + (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1))) + { + return ret; + } + if (inst->type2 != XD3_NOOP && + (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2))) + { + return ret; + } + return 0; +} + +/* Output the result of a single half-instruction. OPT: This the + decoder hotspot. Modifies "hinst", see below. */ +static int +xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst) +{ + /* This method is reentrant for copy instructions which may return + * XD3_GETSRCBLK to the caller. Each time through a copy takes the + * minimum of inst->size and the available space on whichever block + * supplies the data */ + usize_t take = inst->size; + + if (USIZE_T_OVERFLOW (stream->avail_out, take) || + stream->avail_out + take > stream->space_out) + { + stream->msg = "overflow while decoding"; + return XD3_INVALID_INPUT; + } + + XD3_ASSERT (inst->type != XD3_NOOP); + + switch (inst->type) + { + case XD3_RUN: + { + /* Only require a single data byte. */ + if (stream->data_sect.buf == stream->data_sect.buf_max) + { + stream->msg = "data underflow"; + return XD3_INVALID_INPUT; + } + + memset (stream->next_out + stream->avail_out, + stream->data_sect.buf[0], + take); + + stream->data_sect.buf += 1; + stream->avail_out += take; + inst->type = XD3_NOOP; + break; + } + case XD3_ADD: + { + /* Require at least TAKE data bytes. */ + if (stream->data_sect.buf + take > stream->data_sect.buf_max) + { + stream->msg = "data underflow"; + return XD3_INVALID_INPUT; + } + + memcpy (stream->next_out + stream->avail_out, + stream->data_sect.buf, + take); + + stream->data_sect.buf += take; + stream->avail_out += take; + inst->type = XD3_NOOP; + break; + } + default: + { + usize_t i; + const uint8_t *src; + uint8_t *dst; + int overlap; + + /* See if it copies from the VCD_TARGET/VCD_SOURCE window or + * the target window. Out-of-bounds checks for the addresses + * and sizes are performed in xd3_decode_parse_halfinst. This + * if/else must set "overlap", "src", and "dst". */ + if (inst->addr < stream->dec_cpylen) + { + /* In both branches we are copying from outside the + * current decoder window, the first (VCD_TARGET) is + * unimplemented. */ + overlap = 0; + + /* This branch sets "src". As a side-effect, we modify + * "inst" so that if we reenter this method after a + * XD3_GETSRCBLK response the state is correct. So if the + * instruction can be fulfilled by a contiguous block of + * memory then we will set: + * + * inst->type = XD3_NOOP; + * inst->size = 0; + */ + if (stream->dec_win_ind & VCD_TARGET) + { + /* TODO: Users have requested long-distance copies of + * similar material within a target (e.g., for dup + * supression in backups). This code path is probably + * dead due to XD3_UNIMPLEMENTED in xd3_decode_setup_buffers */ + inst->size = 0; + inst->type = XD3_NOOP; + stream->msg = "VCD_TARGET not implemented"; + return XD3_UNIMPLEMENTED; + } + else + { + /* In this case we have to read a source block, which + * could return control to the caller. We need to + * know the first block number needed for this + * copy. */ + xd3_source *source = stream->src; + xoff_t block = source->cpyoff_blocks; + usize_t blkoff = source->cpyoff_blkoff; + const usize_t blksize = source->blksize; + int ret; + + xd3_blksize_add (&block, &blkoff, source, inst->addr); + XD3_ASSERT (blkoff < blksize); + + if ((ret = xd3_getblk (stream, block))) + { + /* could be a XD3_GETSRCBLK failure. */ + if (ret == XD3_TOOFARBACK) + { + stream->msg = "non-seekable source in decode"; + ret = XD3_INTERNAL; + } + return ret; + } + + src = source->curblk + blkoff; + + /* This block is either full, or a partial block that + * must contain enough bytes. */ + if ((source->onblk != blksize) && + (blkoff + take > source->onblk)) + { + IF_DEBUG1 (XPR(NT "[srcfile] short at blkno %"Q"u onblk " + "%"W"u blksize %"W"u blkoff %"W"u take %"W"u\n", + block, + source->onblk, + blksize, + blkoff, + take)); + stream->msg = "source file too short"; + return XD3_INVALID_INPUT; + } + + XD3_ASSERT (blkoff != blksize); + + /* Check if we have enough data on this block to + * finish the instruction. */ + if (blkoff + take <= blksize) + { + inst->type = XD3_NOOP; + inst->size = 0; + } + else + { + take = blksize - blkoff; + inst->size -= take; + inst->addr += take; + + /* because (blkoff + take > blksize), above */ + XD3_ASSERT (inst->size != 0); + } + } + } + else + { + /* TODO: the memcpy/overlap optimization, etc. Overlap + * here could be more specific, it's whether (inst->addr - + * srclen) + inst->size > input_pos ? And is the system + * memcpy really any good? */ + overlap = 1; + + /* For a target-window copy, we know the entire range is + * in-memory. The dec_tgtaddrbase is negatively offset by + * dec_cpylen because the addresses start beyond that + * point. */ + src = stream->dec_tgtaddrbase + inst->addr; + inst->type = XD3_NOOP; + inst->size = 0; + } + + dst = stream->next_out + stream->avail_out; + + stream->avail_out += take; + + if (overlap) + { + /* Can't just memcpy here due to possible overlap. */ + for (i = take; i != 0; i -= 1) + { + *dst++ = *src++; + } + } + else + { + memcpy (dst, src, take); + } + } + } + + return 0; +} + +static int +xd3_decode_finish_window (xd3_stream *stream) +{ + stream->dec_winbytes = 0; + stream->dec_state = DEC_FINISH; + + stream->data_sect.pos = 0; + stream->inst_sect.pos = 0; + stream->addr_sect.pos = 0; + + return XD3_OUTPUT; +} + +static int +xd3_decode_secondary_sections (xd3_stream *secondary_stream) +{ +#if SECONDARY_ANY + int ret; +#define DECODE_SECONDARY_SECTION(UPPER,LOWER) \ + ((secondary_stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \ + (ret = xd3_decode_secondary (secondary_stream, \ + & secondary_stream-> LOWER ## _sect, \ + & xd3_sec_ ## LOWER (secondary_stream)))) + + if (DECODE_SECONDARY_SECTION (DATA, data) || + DECODE_SECONDARY_SECTION (INST, inst) || + DECODE_SECONDARY_SECTION (ADDR, addr)) + { + return ret; + } +#undef DECODE_SECONDARY_SECTION +#endif + return 0; +} + +static int +xd3_decode_sections (xd3_stream *stream) +{ + usize_t need, more, take; + int copy, ret; + + if ((stream->flags & XD3_JUST_HDR) != 0) + { + /* Nothing left to do. */ + return xd3_decode_finish_window (stream); + } + + /* To avoid extra copying, allocate three sections at once (but + * check for overflow). */ + need = stream->inst_sect.size; + + if (USIZE_T_OVERFLOW (need, stream->addr_sect.size)) + { + stream->msg = "decoder section size overflow"; + return XD3_INTERNAL; + } + need += stream->addr_sect.size; + + if (USIZE_T_OVERFLOW (need, stream->data_sect.size)) + { + stream->msg = "decoder section size overflow"; + return XD3_INTERNAL; + } + need += stream->data_sect.size; + + /* The window may be entirely processed. */ + XD3_ASSERT (stream->dec_winbytes <= need); + + /* Compute how much more input is needed. */ + more = (need - stream->dec_winbytes); + + /* How much to consume. */ + take = xd3_min (more, stream->avail_in); + + /* See if the input is completely available, to avoid copy. */ + copy = (take != more); + + /* If the window is skipped... */ + if ((stream->flags & XD3_SKIP_WINDOW) != 0) + { + /* Skip the available input. */ + DECODE_INPUT (take); + + stream->dec_winbytes += take; + + if (copy) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + return xd3_decode_finish_window (stream); + } + + /* Process all but the DATA section. */ + switch (stream->dec_state) + { + default: + stream->msg = "internal error"; + return XD3_INVALID_INPUT; + + case DEC_DATA: + if ((ret = xd3_decode_section (stream, & stream->data_sect, + DEC_INST, copy))) { return ret; } + case DEC_INST: + if ((ret = xd3_decode_section (stream, & stream->inst_sect, + DEC_ADDR, copy))) { return ret; } + case DEC_ADDR: + if ((ret = xd3_decode_section (stream, & stream->addr_sect, + DEC_EMIT, copy))) { return ret; } + } + + XD3_ASSERT (stream->dec_winbytes == need); + + if ((ret = xd3_decode_secondary_sections (stream))) { return ret; } + + if (stream->flags & XD3_SKIP_EMIT) + { + return xd3_decode_finish_window (stream); + } + + /* OPT: A possible optimization is to avoid allocating memory in + * decode_setup_buffers and to avoid a large memcpy when the window + * consists of a single VCD_SOURCE copy instruction. */ + if ((ret = xd3_decode_setup_buffers (stream))) { return ret; } + + return 0; +} + +static int +xd3_decode_emit (xd3_stream *stream) +{ + int ret; + + /* Produce output: originally structured to allow reentrant code + * that fills as much of the output buffer as possible, but VCDIFF + * semantics allows to copy from anywhere from the target window, so + * instead allocate a sufficiently sized buffer after the target + * window length is decoded. + * + * This code still needs to be reentrant to allow XD3_GETSRCBLK to + * return control. This is handled by setting the + * stream->dec_currentN instruction types to XD3_NOOP after they + * have been processed. */ + XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT)); + XD3_ASSERT (stream->dec_tgtlen <= stream->space_out); + + while (stream->inst_sect.buf != stream->inst_sect.buf_max || + stream->dec_current1.type != XD3_NOOP || + stream->dec_current2.type != XD3_NOOP) + { + /* Decode next instruction pair. */ + if ((stream->dec_current1.type == XD3_NOOP) && + (stream->dec_current2.type == XD3_NOOP) && + (ret = xd3_decode_instruction (stream))) { return ret; } + + /* Output dec_current1 */ + while ((stream->dec_current1.type != XD3_NOOP)) + { + if ((ret = xd3_decode_output_halfinst (stream, & stream->dec_current1))) + { + return ret; + } + } + /* Output dec_current2 */ + while (stream->dec_current2.type != XD3_NOOP) + { + if ((ret = xd3_decode_output_halfinst (stream, & stream->dec_current2))) + { + return ret; + } + } + } + + if (stream->avail_out != stream->dec_tgtlen) + { + IF_DEBUG2 (DP(RINT "AVAIL_OUT(%"W"u) != DEC_TGTLEN(%"W"u)\n", + stream->avail_out, stream->dec_tgtlen)); + stream->msg = "wrong window length"; + return XD3_INVALID_INPUT; + } + + if (stream->data_sect.buf != stream->data_sect.buf_max) + { + stream->msg = "extra data section"; + return XD3_INVALID_INPUT; + } + + if (stream->addr_sect.buf != stream->addr_sect.buf_max) + { + stream->msg = "extra address section"; + return XD3_INVALID_INPUT; + } + + /* OPT: Should cksum computation be combined with the above loop? */ + if ((stream->dec_win_ind & VCD_ADLER32) != 0 && + (stream->flags & XD3_ADLER32_NOVER) == 0) + { + uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out); + + if (a32 != stream->dec_adler32) + { + stream->msg = "target window checksum mismatch"; + return XD3_INVALID_INPUT; + } + } + + /* Finished with a window. */ + return xd3_decode_finish_window (stream); +} + +int +xd3_decode_input (xd3_stream *stream) +{ + int ret; + + if (stream->enc_state != 0) + { + stream->msg = "encoder/decoder transition"; + return XD3_INVALID_INPUT; + } + +#define BYTE_CASE(expr,x,nstate) \ + do { \ + if ( (expr) && \ + ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; } \ + stream->dec_state = (nstate); \ + } while (0) + +#define OFFSET_CASE(expr,x,nstate) \ + do { \ + if ( (expr) && \ + ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; } \ + stream->dec_state = (nstate); \ + } while (0) + +#define SIZE_CASE(expr,x,nstate) \ + do { \ + if ( (expr) && \ + ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; } \ + stream->dec_state = (nstate); \ + } while (0) + + switch (stream->dec_state) + { + case DEC_VCHEAD: + { + if ((ret = xd3_decode_bytes (stream, stream->dec_magic, + & stream->dec_magicbytes, 4))) + { + return ret; + } + + if (stream->dec_magic[0] != VCDIFF_MAGIC1 || + stream->dec_magic[1] != VCDIFF_MAGIC2 || + stream->dec_magic[2] != VCDIFF_MAGIC3) + { + stream->msg = "not a VCDIFF input"; + return XD3_INVALID_INPUT; + } + + if (stream->dec_magic[3] != 0) + { + stream->msg = "VCDIFF input version > 0 is not supported"; + return XD3_INVALID_INPUT; + } + + stream->dec_state = DEC_HDRIND; + } + case DEC_HDRIND: + { + if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind))) + { + return ret; + } + + if ((stream->dec_hdr_ind & VCD_INVHDR) != 0) + { + stream->msg = "unrecognized header indicator bits set"; + return XD3_INVALID_INPUT; + } + + stream->dec_state = DEC_SECONDID; + } + + case DEC_SECONDID: + /* Secondary compressor ID: only if VCD_SECONDARY is set */ + if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) + { + BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN); + + switch (stream->dec_secondid) + { + case VCD_FGK_ID: + FGK_CASE (stream); + case VCD_DJW_ID: + DJW_CASE (stream); + case VCD_LZMA_ID: + LZMA_CASE (stream); + default: + stream->msg = "unknown secondary compressor ID"; + return XD3_INVALID_INPUT; + } + } + + case DEC_TABLEN: + /* Length of code table data: only if VCD_CODETABLE is set */ + SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0, + stream->dec_codetblsz, DEC_NEAR); + + /* The codetblsz counts the two NEAR/SAME bytes */ + if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) { + if (stream->dec_codetblsz <= 2) { + stream->msg = "invalid code table size"; + return ENOMEM; + } + stream->dec_codetblsz -= 2; + } + case DEC_NEAR: + /* Near modes: only if VCD_CODETABLE is set */ + BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, + stream->acache.s_near, DEC_SAME); + case DEC_SAME: + /* Same modes: only if VCD_CODETABLE is set */ + BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, + stream->acache.s_same, DEC_TABDAT); + case DEC_TABDAT: + /* Compressed code table data */ + + if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) + { + stream->msg = "VCD_CODETABLE support was removed"; + return XD3_UNIMPLEMENTED; + } + else + { + /* Use the default table. */ + stream->acache.s_near = __rfc3284_code_table_desc.near_modes; + stream->acache.s_same = __rfc3284_code_table_desc.same_modes; + stream->code_table = xd3_rfc3284_code_table (); + } + + if ((ret = xd3_alloc_cache (stream))) { return ret; } + + stream->dec_state = DEC_APPLEN; + + case DEC_APPLEN: + /* Length of application data */ + SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0, + stream->dec_appheadsz, DEC_APPDAT); + + case DEC_APPDAT: + /* Application data */ + if (stream->dec_hdr_ind & VCD_APPHEADER) + { + /* Note: we add an additional byte for padding, to allow + 0-termination. Check for overflow: */ + if (USIZE_T_OVERFLOW(stream->dec_appheadsz, 1)) + { + stream->msg = "exceptional appheader size"; + return XD3_INVALID_INPUT; + } + + if ((stream->dec_appheader == NULL) && + (stream->dec_appheader = + (uint8_t*) xd3_alloc (stream, + stream->dec_appheadsz+1, 1)) == NULL) + { + return ENOMEM; + } + + stream->dec_appheader[stream->dec_appheadsz] = 0; + + if ((ret = xd3_decode_bytes (stream, stream->dec_appheader, + & stream->dec_appheadbytes, + stream->dec_appheadsz))) + { + return ret; + } + } + + /* xoff_t -> usize_t is safe because this is the first block. */ + stream->dec_hdrsize = (usize_t) stream->total_in; + stream->dec_state = DEC_WININD; + + case DEC_WININD: + { + /* Start of a window: the window indicator */ + if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind))) + { + return ret; + } + + stream->current_window = stream->dec_window_count; + + if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen)) + { + stream->msg = "decoder file offset overflow"; + return XD3_INVALID_INPUT; + } + + stream->dec_winstart += stream->dec_tgtlen; + + if ((stream->dec_win_ind & VCD_INVWIN) != 0) + { + stream->msg = "unrecognized window indicator bits set"; + return XD3_INVALID_INPUT; + } + + if ((ret = xd3_decode_init_window (stream))) { return ret; } + + stream->dec_state = DEC_CPYLEN; + + IF_DEBUG2 (DP(RINT "--------- TARGET WINDOW %"Q"u -----------\n", + stream->current_window)); + } + + case DEC_CPYLEN: + /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */ + SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen, + DEC_CPYOFF); + + /* Set the initial, logical decoder position (HERE address) in + * dec_position. This is set to just after the source/copy + * window, as we are just about to output the first byte of + * target window. */ + stream->dec_position = stream->dec_cpylen; + + case DEC_CPYOFF: + /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */ + OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, + DEC_ENCLEN); + + /* Copy offset and copy length may not overflow. */ + if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen)) + { + stream->msg = "decoder copy window overflows a file offset"; + return XD3_INVALID_INPUT; + } + + /* Check copy window bounds: VCD_TARGET window may not exceed + current position. */ + if ((stream->dec_win_ind & VCD_TARGET) && + (stream->dec_cpyoff + stream->dec_cpylen > + stream->dec_winstart)) + { + stream->msg = "VCD_TARGET window out of bounds"; + return XD3_INVALID_INPUT; + } + + case DEC_ENCLEN: + /* Length of the delta encoding */ + SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN); + case DEC_TGTLEN: + /* Length of target window */ + SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND); + + /* Set the maximum decoder position, beyond which we should not + * decode any data. This is the maximum value for dec_position. + * This may not exceed the size of a usize_t. */ + if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen)) + { + stream->msg = "decoder target window overflows a usize_t"; + return XD3_INVALID_INPUT; + } + + /* Check for malicious files. */ + if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE) + { + stream->msg = "hard window size exceeded"; + return XD3_INVALID_INPUT; + } + + stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen; + + case DEC_DELIND: + /* Delta indicator */ + BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN); + + if ((stream->dec_del_ind & VCD_INVDEL) != 0) + { + stream->msg = "unrecognized delta indicator bits set"; + return XD3_INVALID_INPUT; + } + + /* Delta indicator is only used with secondary compression. */ + if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL)) + { + stream->msg = "invalid delta indicator bits set"; + return XD3_INVALID_INPUT; + } + + /* Section lengths */ + case DEC_DATALEN: + SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN); + case DEC_INSTLEN: + SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN); + case DEC_ADDRLEN: + SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM); + + case DEC_CKSUM: + /* Window checksum. */ + if ((stream->dec_win_ind & VCD_ADLER32) != 0) + { + int i; + + if ((ret = xd3_decode_bytes (stream, stream->dec_cksum, + & stream->dec_cksumbytes, 4))) + { + return ret; + } + + for (i = 0; i < 4; i += 1) + { + stream->dec_adler32 = + (stream->dec_adler32 << 8) | stream->dec_cksum[i]; + } + } + + stream->dec_state = DEC_DATA; + + /* Check dec_enclen for redundency, otherwise it is not really used. */ + { + usize_t enclen_check = + (1 + (xd3_sizeof_size (stream->dec_tgtlen) + + xd3_sizeof_size (stream->data_sect.size) + + xd3_sizeof_size (stream->inst_sect.size) + + xd3_sizeof_size (stream->addr_sect.size)) + + stream->data_sect.size + + stream->inst_sect.size + + stream->addr_sect.size + + ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0)); + + if (stream->dec_enclen != enclen_check) + { + stream->msg = "incorrect encoding length (redundent)"; + return XD3_INVALID_INPUT; + } + } + + /* Returning here gives the application a chance to inspect the + * header, skip the window, etc. */ + if (stream->current_window == 0) { return XD3_GOTHEADER; } + else { return XD3_WINSTART; } + + case DEC_DATA: + case DEC_INST: + case DEC_ADDR: + /* Next read the three sections. */ + if ((ret = xd3_decode_sections (stream))) { return ret; } + + case DEC_EMIT: + + /* To speed VCD_SOURCE block-address calculations, the source + * cpyoff_blocks and cpyoff_blkoff are pre-computed. */ + if (stream->dec_win_ind & VCD_SOURCE) + { + xd3_source *src = stream->src; + + if (src == NULL) + { + stream->msg = "source input required"; + return XD3_INVALID_INPUT; + } + + xd3_blksize_div(stream->dec_cpyoff, src, + &src->cpyoff_blocks, + &src->cpyoff_blkoff); + + IF_DEBUG2(DP(RINT + "[decode_cpyoff] %"Q"u " + "cpyblkno %"Q"u " + "cpyblkoff %"W"u " + "blksize %"W"u\n", + stream->dec_cpyoff, + src->cpyoff_blocks, + src->cpyoff_blkoff, + src->blksize)); + } + + /* xd3_decode_emit returns XD3_OUTPUT on every success. */ + if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT) + { + stream->total_out += stream->avail_out; + } + + return ret; + + case DEC_FINISH: + { + if (stream->dec_win_ind & VCD_TARGET) + { + if (stream->dec_lastwin == NULL) + { + stream->dec_lastwin = stream->next_out; + stream->dec_lastspace = stream->space_out; + } + else + { + xd3_swap_uint8p (& stream->dec_lastwin, + & stream->next_out); + xd3_swap_usize_t (& stream->dec_lastspace, + & stream->space_out); + } + } + + stream->dec_lastlen = stream->dec_tgtlen; + stream->dec_laststart = stream->dec_winstart; + stream->dec_window_count += 1; + + /* Note: the updates to dec_winstart & current_window are + * deferred until after the next DEC_WININD byte is read. */ + stream->dec_state = DEC_WININD; + return XD3_WINFINISH; + } + + default: + stream->msg = "invalid state"; + return XD3_INVALID_INPUT; + } +} + +#endif // _XDELTA3_DECODE_H_ diff --git a/lib/xdelta3/xdelta3-djw.h b/lib/xdelta3/xdelta3-djw.h new file mode 100644 index 0000000..e4a5d1f --- /dev/null +++ b/lib/xdelta3/xdelta3-djw.h @@ -0,0 +1,1835 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _XDELTA3_DJW_H_ +#define _XDELTA3_DJW_H_ + +/* The following people deserve much credit for the algorithms and + * techniques contained in this file: + + Julian Seward + Bzip2 sources, implementation of the multi-table Huffman technique. + + Jean-loup Gailly and Mark Adler and L. Peter Deutsch + Zlib source code, RFC 1951 + + Daniel S. Hirschberg and Debra A. LeLewer + "Efficient Decoding of Prefix Codes" + Communications of the ACM, April 1990 33(4). + + David J. Wheeler + Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps. + This contains the idea behind the multi-table Huffman and 1-2 coding + techniques. + ftp://ftp.cl.cam.ac.uk/users/djw3/ + +*/ + +/* OPT: during the multi-table iteration, pick the worst-overall + * performing table and replace it with exactly the frequencies of the + * worst-overall performing sector or N-worst performing sectors. */ + +/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with + * the Bzip prefix coding strategy. xdfs-0.256 contains the last of + * the other-format tests, including RFC1950 and the RFC1950+MTF + * tests. */ + +#define DJW_MAX_CODELEN 20U /* Maximum length of an alphabet code. */ + +/* Code lengths are themselves code-length encoded, so the total number of + * codes is: [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */ +#define DJW_TOTAL_CODES (DJW_MAX_CODELEN+2) + +#define RUN_0 0U /* Symbols used in MTF+1/2 coding. */ +#define RUN_1 1U + +/* Number of code lengths always encoded (djw_encode_basic array) */ +#define DJW_BASIC_CODES 5U +#define DJW_RUN_CODES 2U /* Number of run codes */ + +/* Offset of extra codes */ +#define DJW_EXTRA_12OFFSET (DJW_BASIC_CODES + DJW_RUN_CODES) + +/* Number of optionally encoded code lengths (djw_encode_extra array) */ +#define DJW_EXTRA_CODES 15U + +/* Number of bits to code [0-DJW_EXTRA_CODES] */ +#define DJW_EXTRA_CODE_BITS 4U + +#define DJW_MAX_GROUPS 8U /* Max number of group coding tables */ +#define DJW_GROUP_BITS 3U /* Number of bits to code [1-DJW_MAX_GROUPS] */ + +#define DJW_SECTORSZ_MULT 5U /* Multiplier for encoded sectorsz */ +#define DJW_SECTORSZ_BITS 5U /* Number of bits to code group size */ +#define DJW_SECTORSZ_MAX ((1U << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT) + +/* Maximum number of iterations to find group tables. */ +#define DJW_MAX_ITER 6U +/* Minimum number of bits an iteration must reduce coding by. */ +#define DJW_MIN_IMPROVEMENT 20U + +/* Maximum code length of a prefix code length */ +#define DJW_MAX_CLCLEN 15U + +/* Number of bits to code [0-DJW_MAX_CLCLEN] */ +#define DJW_CLCLEN_BITS 4U + +#define DJW_MAX_GBCLEN 7U /* Maximum code length of a group selector */ + +/* Number of bits to code [0-DJW_MAX_GBCLEN] + * TODO: Actually, should never have zero code lengths here, or else a group + * went unused. Write a test for this: if a group goes unused, eliminate + * it? */ +#define DJW_GBCLEN_BITS 3U + +/* It has to save at least this many bits... */ +#define EFFICIENCY_BITS 16U + +typedef struct _djw_stream djw_stream; +typedef struct _djw_heapen djw_heapen; +typedef struct _djw_prefix djw_prefix; +typedef uint32_t djw_weight; + +struct _djw_heapen +{ + uint32_t depth; + uint32_t freq; + uint32_t parent; +}; + +struct _djw_prefix +{ + usize_t scount; + uint8_t *symbol; + usize_t mcount; + uint8_t *mtfsym; + uint8_t *repcnt; +}; + +struct _djw_stream +{ + int unused; +}; + +/* Each Huffman table consists of 256 "code length" (CLEN) codes, + * which are themselves Huffman coded after eliminating repeats and + * move-to-front coding. The prefix consists of all the CLEN codes in + * djw_encode_basic plus a 4-bit value stating how many of the + * djw_encode_extra codes are actually coded (the rest are presumed + * zero, or unused CLEN codes). + * + * These values of these two arrays were arrived at by studying the + * distribution of min and max clen over a collection of DATA, INST, + * and ADDR inputs. The goal is to specify the order of + * djw_extra_codes that is most likely to minimize the number of extra + * codes that must be encoded. + * + * Results: 158896 sections were counted by compressing files (window + * size 512K) listed with: `find / -type f ( -user jmacd -o -perm +444 + * )` + * + * The distribution of CLEN codes for each efficient invocation of the + * secondary compressor (taking the best number of groups/sector size) + * was recorded. Then we look at the distribution of min and max clen + * values, counting the number of times the value C_low is less than + * the min and C_high is greater than the max. Values >= C_high and + * <= C_low will not have their lengths coded. The results are sorted + * and the least likely 15 are placed into the djw_encode_extra[] + * array in order. These values are used as the initial MTF ordering. + + clow[1] = 155119 + clow[2] = 140325 + clow[3] = 84072 + --- + clow[4] = 7225 + clow[5] = 1093 + clow[6] = 215 + --- + chigh[4] = 1 + chigh[5] = 30 + chigh[6] = 218 + chigh[7] = 2060 + chigh[8] = 13271 + --- + chigh[9] = 39463 + chigh[10] = 77360 + chigh[11] = 118298 + chigh[12] = 141360 + chigh[13] = 154086 + chigh[14] = 157967 + chigh[15] = 158603 + chigh[16] = 158864 + chigh[17] = 158893 + chigh[18] = 158895 + chigh[19] = 158896 + chigh[20] = 158896 + +*/ + +static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] = + { + 9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20, + }; + +static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] = + { + 4, 5, 6, 7, 8, + }; + +/*********************************************************************/ +/* DECLS */ +/*********************************************************************/ + +static djw_stream* djw_alloc (xd3_stream *stream); +static int djw_init (xd3_stream *stream, + djw_stream *h, + int is_encode); +static void djw_destroy (xd3_stream *stream, + djw_stream *h); + +#if XD3_ENCODER +static int xd3_encode_huff (xd3_stream *stream, + djw_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); +#endif + +static int xd3_decode_huff (xd3_stream *stream, + djw_stream *sec_stream, + const uint8_t **input, + const uint8_t *const input_end, + uint8_t **output, + const uint8_t *const output_end); + +/*********************************************************************/ +/* HUFFMAN */ +/*********************************************************************/ + +static djw_stream* +djw_alloc (xd3_stream *stream) +{ + return (djw_stream*) xd3_alloc (stream, sizeof (djw_stream), 1); +} + +static int +djw_init (xd3_stream *stream, djw_stream *h, int is_encode) +{ + /* Fields are initialized prior to use. */ + return 0; +} + +static void +djw_destroy (xd3_stream *stream, + djw_stream *h) +{ + xd3_free (stream, h); +} + + +/*********************************************************************/ +/* HEAP */ +/*********************************************************************/ + +static inline int +heap_less (const djw_heapen *a, const djw_heapen *b) +{ + return a->freq < b->freq || + (a->freq == b->freq && + a->depth < b->depth); +} + +static inline void +heap_insert (usize_t *heap, const djw_heapen *ents, usize_t p, const usize_t e) +{ + /* Insert ents[e] into next slot heap[p] */ + usize_t pp = p/2; /* P's parent */ + + while (heap_less (& ents[e], & ents[heap[pp]])) + { + heap[p] = heap[pp]; + p = pp; + pp = p/2; + } + + heap[p] = e; +} + +static inline djw_heapen* +heap_extract (usize_t *heap, const djw_heapen *ents, usize_t heap_last) +{ + usize_t smallest = heap[1]; + usize_t p, pc, t; + + /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */ + heap[1] = heap[heap_last+1]; + + /* Re-heapify */ + for (p = 1; ; p = pc) + { + pc = p*2; + + /* Reached bottom of heap */ + if (pc > heap_last) { break; } + + /* See if second child is smaller. */ + if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]])) + { + pc += 1; + } + + /* If pc is not smaller than p, heap property re-established. */ + if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; } + + t = heap[pc]; + heap[pc] = heap[p]; + heap[p] = t; + } + + return (djw_heapen*) & ents[smallest]; +} + +#if XD3_DEBUG +static void +heap_check (usize_t *heap, djw_heapen *ents, usize_t heap_last) +{ + usize_t i; + for (i = 1; i <= heap_last; i += 1) + { + /* Heap property: child not less than parent */ + XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]])); + + IF_DEBUG2 (DP(RINT "heap[%"W"u] = %u\n", i, ents[heap[i]].freq)); + } +} +#endif + +/*********************************************************************/ +/* MTF, 1/2 */ +/*********************************************************************/ + +static inline usize_t +djw_update_mtf (uint8_t *mtf, usize_t mtf_i) +{ + int k; + usize_t sym = mtf[mtf_i]; + + for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; } + + mtf[0] = sym; + return sym; +} + +static inline void +djw_update_1_2 (int *mtf_run, usize_t *mtf_i, + uint8_t *mtfsym, djw_weight *freq) +{ + uint8_t code; + + do + { + /* Offset by 1, since any number of RUN_ symbols implies run>0... */ + *mtf_run -= 1; + + code = (*mtf_run & 1) ? RUN_1 : RUN_0; + + mtfsym[(*mtf_i)++] = code; + freq[code] += 1; + *mtf_run >>= 1; + } + while (*mtf_run >= 1); + + *mtf_run = 0; +} + +static void +djw_init_clen_mtf_1_2 (uint8_t *clmtf) +{ + usize_t i, cl_i = 0; + + clmtf[cl_i++] = 0; + for (i = 0; i < DJW_BASIC_CODES; i += 1) + { + clmtf[cl_i++] = djw_encode_12basic[i]; + } + for (i = 0; i < DJW_EXTRA_CODES; i += 1) + { + clmtf[cl_i++] = djw_encode_12extra[i]; + } +} + +/*********************************************************************/ +/* PREFIX CODES */ +/*********************************************************************/ +#if XD3_ENCODER +static usize_t +djw_build_prefix (const djw_weight *freq, uint8_t *clen, usize_t asize, usize_t maxlen) +{ + /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1 + * internal nodes, never more than ALPHABET_SIZE entries actually in the + * heap (minimum weight subtrees during prefix construction). First + * ALPHABET_SIZE entries are the actual symbols, next ALPHABET_SIZE-1 are + * internal nodes. */ + djw_heapen ents[ALPHABET_SIZE * 2]; + usize_t heap[ALPHABET_SIZE + 1]; + + usize_t heap_last; /* Index of the last _valid_ heap entry. */ + usize_t ents_size; /* Number of entries, including 0th fake entry */ + usize_t overflow; /* Number of code lengths that overflow */ + usize_t total_bits; + usize_t i; + + IF_DEBUG (usize_t first_bits = 0); + + /* Insert real symbol frequences. */ + for (i = 0; i < asize; i += 1) + { + ents[i+1].freq = freq[i]; + IF_DEBUG2 (DP(RINT "ents[%"W"i] = freq[%"W"u] = %d\n", + i+1, i, freq[i])); + } + + again: + + /* The loop is re-entered each time an overflow occurs. Re-initialize... */ + heap_last = 0; + ents_size = 1; + overflow = 0; + total_bits = 0; + + /* 0th entry terminates the while loop in heap_insert (it's the parent of + * the smallest element, always less-than) */ + heap[0] = 0; + ents[0].depth = 0; + ents[0].freq = 0; + + /* Initial heap. */ + for (i = 0; i < asize; i += 1, ents_size += 1) + { + ents[ents_size].depth = 0; + ents[ents_size].parent = 0; + + if (ents[ents_size].freq != 0) + { + heap_insert (heap, ents, ++heap_last, ents_size); + } + } + + IF_DEBUG (heap_check (heap, ents, heap_last)); + + /* Must be at least one symbol, or else we can't get here. */ + XD3_ASSERT (heap_last != 0); + + /* If there is only one symbol, fake a second to prevent zero-length + * codes. */ + if (heap_last == 1) + { + /* Pick either the first or last symbol. */ + usize_t s = freq[0] ? asize-1 : 0; + ents[s+1].freq = 1; + goto again; + } + + /* Build prefix tree. */ + while (heap_last > 1) + { + djw_heapen *h1 = heap_extract (heap, ents, --heap_last); + djw_heapen *h2 = heap_extract (heap, ents, --heap_last); + + ents[ents_size].freq = h1->freq + h2->freq; + ents[ents_size].depth = 1 + xd3_max (h1->depth, h2->depth); + ents[ents_size].parent = 0; + + h1->parent = h2->parent = ents_size; + + heap_insert (heap, ents, ++heap_last, ents_size++); + } + + IF_DEBUG (heap_check (heap, ents, heap_last)); + + /* Now compute prefix code lengths, counting parents. */ + for (i = 1; i < asize+1; i += 1) + { + usize_t b = 0; + + if (ents[i].freq != 0) + { + usize_t p = i; + + while ((p = ents[p].parent) != 0) { b += 1; } + + if (b > maxlen) { overflow = 1; } + + total_bits += b * freq[i-1]; + } + + /* clen is 0-origin, unlike ents. */ + IF_DEBUG2 (DP(RINT "clen[%"W"u] = %"W"u\n", i-1, b)); + clen[i-1] = b; + } + + IF_DEBUG (if (first_bits == 0) first_bits = total_bits); + + if (! overflow) + { + IF_DEBUG2 (if (first_bits != total_bits) + { + DP(RINT "code length overflow changed %"W"u bits\n", + total_bits - first_bits); + }); + return total_bits; + } + + /* OPT: There is a non-looping way to fix overflow shown in zlib, but this + * is easier (for now), as done in bzip2. */ + for (i = 1; i < asize+1; i += 1) + { + ents[i].freq = ents[i].freq / 2 + 1; + } + + goto again; +} + +static void +djw_build_codes (usize_t *codes, const uint8_t *clen, usize_t asize, usize_t abs_max) +{ + usize_t i, l; + usize_t min_clen = DJW_MAX_CODELEN; + usize_t max_clen = 0; + usize_t code = 0; + + /* Find the min and max code length */ + for (i = 0; i < asize; i += 1) + { + if (clen[i] > 0 && clen[i] < min_clen) + { + min_clen = clen[i]; + } + + max_clen = xd3_max (max_clen, (usize_t) clen[i]); + } + + XD3_ASSERT (max_clen <= abs_max); + + /* Generate a code for each symbol with the appropriate length. */ + for (l = min_clen; l <= max_clen; l += 1) + { + for (i = 0; i < asize; i += 1) + { + if (clen[i] == l) + { + codes[i] = code++; + } + } + + code <<= 1; + } + + IF_DEBUG2 ({ + for (i = 0; i < asize; i += 1) + { + DP(RINT "code[%"W"u] = %"W"u\n", i, codes[i]); + } + }); +} + +/*********************************************************************/ +/* MOVE-TO-FRONT */ +/*********************************************************************/ +static void +djw_compute_mtf_1_2 (djw_prefix *prefix, + uint8_t *mtf, + djw_weight *freq_out, + usize_t nsym) +{ + size_t i, j, k; + usize_t sym; + usize_t size = prefix->scount; + usize_t mtf_i = 0; + int mtf_run = 0; + + /* This +2 is for the RUN_0, RUN_1 codes */ + memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+2)); + + for (i = 0; i < size; ) + { + /* OPT: Bzip optimizes this algorithm a little by effectively checking + * j==0 before the MTF update. */ + sym = prefix->symbol[i++]; + + for (j = 0; mtf[j] != sym; j += 1) { } + + XD3_ASSERT (j <= nsym); + + for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; } + + mtf[0] = sym; + + if (j == 0) + { + mtf_run += 1; + continue; + } + + if (mtf_run > 0) + { + djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); + } + + /* Non-zero symbols are offset by RUN_1 */ + prefix->mtfsym[mtf_i++] = (uint8_t)(j+RUN_1); + freq_out[j+RUN_1] += 1; + } + + if (mtf_run > 0) + { + djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); + } + + prefix->mcount = mtf_i; +} + +/* Counts character frequencies of the input buffer, returns the size. */ +static usize_t +djw_count_freqs (djw_weight *freq, xd3_output *input) +{ + xd3_output *in; + usize_t size = 0; + + memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE); + + for (in = input; in; in = in->next_page) + { + const uint8_t *p = in->base; + const uint8_t *p_max = p + in->next; + + size += in->next; + + do + { + ++freq[*p]; + } + while (++p < p_max); + } + + IF_DEBUG2 ({int i; + DP(RINT "freqs: "); + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + DP(RINT "%u ", freq[i]); + } + DP(RINT "\n");}); + + return size; +} + +static void +djw_compute_multi_prefix (usize_t groups, + uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE], + djw_prefix *prefix) +{ + usize_t gp, i; + + prefix->scount = ALPHABET_SIZE; + memcpy (prefix->symbol, clen[0], ALPHABET_SIZE); + + for (gp = 1; gp < groups; gp += 1) + { + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if (clen[gp][i] == 0) + { + continue; + } + + prefix->symbol[prefix->scount++] = clen[gp][i]; + } + } +} + +static void +djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq) +{ + /* This +1 is for the 0 code-length. */ + uint8_t clmtf[DJW_MAX_CODELEN+1]; + + djw_init_clen_mtf_1_2 (clmtf); + + djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN); +} + +static int +djw_encode_prefix (xd3_stream *stream, + xd3_output **output, + bit_state *bstate, + djw_prefix *prefix) +{ + int ret; + size_t i; + usize_t num_to_encode; + djw_weight clfreq[DJW_TOTAL_CODES]; + uint8_t clclen[DJW_TOTAL_CODES]; + usize_t clcode[DJW_TOTAL_CODES]; + + /* Move-to-front encode prefix symbols, count frequencies */ + djw_compute_prefix_1_2 (prefix, clfreq); + + /* Compute codes */ + djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); + djw_build_codes (clcode, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); + + /* Compute number of extra codes beyond basic ones for this template. */ + num_to_encode = DJW_TOTAL_CODES; + while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) + { + num_to_encode -= 1; + } + XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS)); + + /* Encode: # of extra codes */ + if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS, + num_to_encode - DJW_EXTRA_12OFFSET))) + { + return ret; + } + + /* Encode: MTF code lengths */ + for (i = 0; i < num_to_encode; i += 1) + { + if ((ret = xd3_encode_bits (stream, output, bstate, + DJW_CLCLEN_BITS, clclen[i]))) + { + return ret; + } + } + + /* Encode: CLEN code lengths */ + for (i = 0; i < prefix->mcount; i += 1) + { + usize_t mtf_sym = prefix->mtfsym[i]; + usize_t bits = clclen[mtf_sym]; + usize_t code = clcode[mtf_sym]; + + if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) + { + return ret; + } + } + + return 0; +} + +static void +djw_compute_selector_1_2 (djw_prefix *prefix, + usize_t groups, + djw_weight *gbest_freq) +{ + uint8_t grmtf[DJW_MAX_GROUPS]; + usize_t i; + + for (i = 0; i < groups; i += 1) { grmtf[i] = i; } + + djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups); +} + +static int +xd3_encode_howmany_groups (xd3_stream *stream, + xd3_sec_cfg *cfg, + usize_t input_size, + usize_t *ret_groups, + usize_t *ret_sector_size) +{ + usize_t cfg_groups = 0; + usize_t cfg_sector_size = 0; + usize_t sugg_groups = 0; + usize_t sugg_sector_size = 0; + + if (cfg->ngroups != 0) + { + if (cfg->ngroups > DJW_MAX_GROUPS) + { + stream->msg = "invalid secondary encoder group number"; + return XD3_INTERNAL; + } + + cfg_groups = cfg->ngroups; + } + + if (cfg->sector_size != 0) + { + if (cfg->sector_size < DJW_SECTORSZ_MULT || + cfg->sector_size > DJW_SECTORSZ_MAX || + (cfg->sector_size % DJW_SECTORSZ_MULT) != 0) + { + stream->msg = "invalid secondary encoder sector size"; + return XD3_INTERNAL; + } + + cfg_sector_size = cfg->sector_size; + } + + if (cfg_groups == 0 || cfg_sector_size == 0) + { + /* These values were found empirically using xdelta3-tune around version + * xdfs-0.256. */ + switch (cfg->data_type) + { + case DATA_SECTION: + if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; } + else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; } + else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; } + else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; } + else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; } + else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; } + else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; } + else { sugg_groups = 8; sugg_sector_size = 70; } + break; + case INST_SECTION: + if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; } + else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; } + else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; } + else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; } + else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; } + else { sugg_groups = 8; sugg_sector_size = 40; } + break; + case ADDR_SECTION: + if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; } + else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; } + else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; } + else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; } + else { sugg_groups = 7; sugg_sector_size = 130; } + break; + } + + if (cfg_groups == 0) + { + cfg_groups = sugg_groups; + } + + if (cfg_sector_size == 0) + { + cfg_sector_size = sugg_sector_size; + } + } + + if (cfg_groups != 1 && cfg_sector_size == 0) + { + switch (cfg->data_type) + { + case DATA_SECTION: + cfg_sector_size = 20; + break; + case INST_SECTION: + cfg_sector_size = 50; + break; + case ADDR_SECTION: + cfg_sector_size = 130; + break; + } + } + + (*ret_groups) = cfg_groups; + (*ret_sector_size) = cfg_sector_size; + + XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS); + XD3_ASSERT (cfg_groups == 1 || + (cfg_sector_size >= DJW_SECTORSZ_MULT && + cfg_sector_size <= DJW_SECTORSZ_MAX)); + + return 0; +} + +static int +xd3_encode_huff (xd3_stream *stream, + djw_stream *h, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg) +{ + int ret; + usize_t groups, sector_size; + bit_state bstate = BIT_STATE_ENCODE_INIT; + xd3_output *in; + usize_t output_bits; + usize_t input_bits; + usize_t input_bytes; + usize_t initial_offset = output->next; + djw_weight real_freq[ALPHABET_SIZE]; + uint8_t *gbest = NULL; + uint8_t *gbest_mtf = NULL; + + input_bytes = djw_count_freqs (real_freq, input); + input_bits = input_bytes * 8; + + XD3_ASSERT (input_bytes > 0); + + if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, + & groups, & sector_size))) + { + return ret; + } + + if (0) + { + regroup: + /* Sometimes we dynamically decide there are too many groups. Arrive + * here. */ + output->next = initial_offset; + xd3_bit_state_encode_init (& bstate); + } + + /* Encode: # of groups (3 bits) */ + if ((ret = xd3_encode_bits (stream, & output, & bstate, + DJW_GROUP_BITS, groups-1))) { goto failure; } + + if (groups == 1) + { + /* Single Huffman group. */ + usize_t code[ALPHABET_SIZE]; /* Codes */ + uint8_t clen[ALPHABET_SIZE]; + uint8_t prefix_mtfsym[ALPHABET_SIZE]; + djw_prefix prefix; + + output_bits = + djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); + djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); + + if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + /* Encode: prefix */ + prefix.mtfsym = prefix_mtfsym; + prefix.symbol = clen; + prefix.scount = ALPHABET_SIZE; + + if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) + { + goto failure; + } + + if (output_bits + (8 * output->next) + EFFICIENCY_BITS >= + input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + /* Encode: data */ + for (in = input; in; in = in->next_page) + { + const uint8_t *p = in->base; + const uint8_t *p_max = p + in->next; + + do + { + usize_t sym = *p++; + usize_t bits = clen[sym]; + + IF_DEBUG (output_bits -= bits); + + if ((ret = xd3_encode_bits (stream, & output, + & bstate, bits, code[sym]))) + { + goto failure; + } + } + while (p < p_max); + } + + XD3_ASSERT (output_bits == 0); + } + else + { + /* DJW Huffman */ + djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE]; + uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE]; + djw_weight left = input_bytes; + usize_t gp; + usize_t niter = 0; + usize_t select_bits; + usize_t sym1 = 0, sym2 = 0, s; + usize_t gcost[DJW_MAX_GROUPS]; + usize_t gbest_code[DJW_MAX_GROUPS+2]; + uint8_t gbest_clen[DJW_MAX_GROUPS+2]; + usize_t gbest_max = 1 + (input_bytes - 1) / sector_size; + usize_t best_bits = 0; + usize_t gbest_no; + usize_t gpcnt; + const uint8_t *p; + IF_DEBUG2 (usize_t gcount[DJW_MAX_GROUPS]); + + /* Encode: sector size (5 bits) */ + if ((ret = xd3_encode_bits (stream, & output, & bstate, + DJW_SECTORSZ_BITS, + (sector_size/DJW_SECTORSZ_MULT)-1))) + { + goto failure; + } + + /* Dynamic allocation. */ + if (gbest == NULL) + { + if ((gbest = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL) + { + ret = ENOMEM; + goto failure; + } + } + + if (gbest_mtf == NULL) + { + if ((gbest_mtf = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL) + { + ret = ENOMEM; + goto failure; + } + } + + /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */ + + /* Generate initial code length tables. */ + for (gp = 0; gp < groups; gp += 1) + { + djw_weight sum = 0; + djw_weight goal = left / (groups - gp); + + IF_DEBUG2 (usize_t nz = 0); + + /* Due to the single-code granularity of this distribution, it may + * be that we can't generate a distribution for each group. In that + * case subtract one group and try again. If (inefficient), we're + * testing group behavior, so don't mess things up. */ + if (goal == 0 && !cfg->inefficient) + { + IF_DEBUG2 (DP(RINT "too many groups (%"W"u), dropping one\n", + groups)); + groups -= 1; + goto regroup; + } + + /* Sum == goal is possible when (cfg->inefficient)... */ + while (sum < goal) + { + XD3_ASSERT (sym2 < ALPHABET_SIZE); + IF_DEBUG2 (nz += real_freq[sym2] != 0); + sum += real_freq[sym2++]; + } + + IF_DEBUG2(DP(RINT "group %"W"u has symbols %"W"u..%"W"u (%"W"u non-zero) " + "(%u/%"W"u = %.3f)\n", + gp, sym1, sym2, nz, sum, + input_bytes, sum / (double)input_bytes);); + + for (s = 0; s < ALPHABET_SIZE; s += 1) + { + evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16; + } + + left -= sum; + sym1 = sym2+1; + } + + repeat: + + niter += 1; + gbest_no = 0; + memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups); + IF_DEBUG2 (memset (gcount, 0, sizeof (gcount[0]) * groups)); + + /* For each input page (loop is irregular to allow non-pow2-size group + * size. */ + in = input; + p = in->base; + + /* For each group-size sector. */ + do + { + const uint8_t *p0 = p; + xd3_output *in0 = in; + usize_t best = 0; + usize_t winner = 0; + + /* Select best group for each sector, update evolve_freq. */ + memset (gcost, 0, sizeof (gcost[0]) * groups); + + /* For each byte in sector. */ + for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) + { + /* For each group. */ + for (gp = 0; gp < groups; gp += 1) + { + gcost[gp] += evolve_clen[gp][*p]; + } + + /* Check end-of-input-page. */ +# define GP_PAGE() \ + if ((usize_t)(++p - in->base) == in->next) \ + { \ + in = in->next_page; \ + if (in == NULL) { break; } \ + p = in->base; \ + } + + GP_PAGE (); + } + + /* Find min cost group for this sector */ + best = USIZE_T_MAX; + for (gp = 0; gp < groups; gp += 1) + { + if (gcost[gp] < best) + { + best = gcost[gp]; + winner = gp; + } + } + + XD3_ASSERT(gbest_no < gbest_max); + gbest[gbest_no++] = winner; + IF_DEBUG2 (gcount[winner] += 1); + + p = p0; + in = in0; + + /* Update group frequencies. */ + for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) + { + evolve_freq[winner][*p] += 1; + + GP_PAGE (); + } + } + while (in != NULL); + + XD3_ASSERT (gbest_no == gbest_max); + + /* Recompute code lengths. */ + output_bits = 0; + for (gp = 0; gp < groups; gp += 1) + { + int i; + uint8_t evolve_zero[ALPHABET_SIZE]; + int any_zeros = 0; + + memset (evolve_zero, 0, sizeof (evolve_zero)); + + /* Cannot allow a zero clen when the real frequency is non-zero. + * Note: this means we are going to encode a fairly long code for + * these unused entries. An improvement would be to implement a + * NOTUSED code for when these are actually zero, but this requires + * another data structure (evolve_zero) since we don't know when + * evolve_freq[i] == 0... Briefly tested, looked worse. */ + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if (evolve_freq[gp][i] == 0 && real_freq[i] != 0) + { + evolve_freq[gp][i] = 1; + evolve_zero[i] = 1; + any_zeros = 1; + } + } + + output_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], + ALPHABET_SIZE, DJW_MAX_CODELEN); + + /* The above faking of frequencies does not matter for the last + * iteration, but we don't know when that is yet. However, it also + * breaks the output_bits computation. Necessary for accuracy, and + * for the (output_bits==0) assert after all bits are output. */ + if (any_zeros) + { + IF_DEBUG2 (usize_t save_total = output_bits); + + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if (evolve_zero[i]) { output_bits -= evolve_clen[gp][i]; } + } + + IF_DEBUG2 (DP(RINT "evolve_zero reduced %"W"u bits in group %"W"u\n", + save_total - output_bits, gp)); + } + } + + IF_DEBUG2( + DP(RINT "pass %"W"u total bits: %"W"u group uses: ", niter, output_bits); + for (gp = 0; gp < groups; gp += 1) { DP(RINT "%"W"u ", gcount[gp]); } + DP(RINT "\n"); + ); + + /* End iteration. */ + + IF_DEBUG2 (if (niter > 1 && best_bits < output_bits) { + DP(RINT "iteration lost %"W"u bits\n", output_bits - best_bits); }); + + if (niter == 1 || (niter < DJW_MAX_ITER && + (best_bits - output_bits) >= DJW_MIN_IMPROVEMENT)) + { + best_bits = output_bits; + goto repeat; + } + + /* Efficiency check. */ + if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + IF_DEBUG2 (DP(RINT "djw compression: %"W"u -> %0.3f\n", + input_bytes, output_bits / 8.0)); + + /* Encode: prefix */ + { + uint8_t prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE]; + uint8_t prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE]; + uint8_t prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE]; + djw_prefix prefix; + + prefix.symbol = prefix_symbol; + prefix.mtfsym = prefix_mtfsym; + prefix.repcnt = prefix_repcnt; + + djw_compute_multi_prefix (groups, evolve_clen, & prefix); + if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) + { + goto failure; + } + } + + /* Encode: selector frequencies */ + { + /* DJW_MAX_GROUPS +2 is for RUN_0, RUN_1 symbols. */ + djw_weight gbest_freq[DJW_MAX_GROUPS+2]; + djw_prefix gbest_prefix; + usize_t i; + + gbest_prefix.scount = gbest_no; + gbest_prefix.symbol = gbest; + gbest_prefix.mtfsym = gbest_mtf; + + djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq); + + select_bits = + djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN); + djw_build_codes (gbest_code, gbest_clen, groups+1, DJW_MAX_GBCLEN); + + for (i = 0; i < groups+1; i += 1) + { + if ((ret = xd3_encode_bits (stream, & output, & bstate, + DJW_GBCLEN_BITS, gbest_clen[i]))) + { + goto failure; + } + } + + for (i = 0; i < gbest_prefix.mcount; i += 1) + { + usize_t gp_mtf = gbest_mtf[i]; + usize_t gp_sel_bits = gbest_clen[gp_mtf]; + usize_t gp_sel_code = gbest_code[gp_mtf]; + + XD3_ASSERT (gp_mtf < groups+1); + + if ((ret = xd3_encode_bits (stream, & output, & bstate, + gp_sel_bits, gp_sel_code))) + { + goto failure; + } + + IF_DEBUG (select_bits -= gp_sel_bits); + } + + XD3_ASSERT (select_bits == 0); + } + + /* Efficiency check. */ + if (output_bits + select_bits + (8 * output->next) + + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + /* Encode: data */ + { + usize_t evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE]; + usize_t sector = 0; + + /* Build code tables for each group. */ + for (gp = 0; gp < groups; gp += 1) + { + djw_build_codes (evolve_code[gp], evolve_clen[gp], + ALPHABET_SIZE, DJW_MAX_CODELEN); + } + + /* Now loop over the input. */ + in = input; + p = in->base; + + do + { + /* For each sector. */ + usize_t gp_best = gbest[sector]; + usize_t *gp_codes = evolve_code[gp_best]; + uint8_t *gp_clens = evolve_clen[gp_best]; + + XD3_ASSERT (sector < gbest_no); + + sector += 1; + + /* Encode the sector data. */ + for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) + { + usize_t sym = *p; + usize_t bits = gp_clens[sym]; + usize_t code = gp_codes[sym]; + + IF_DEBUG (output_bits -= bits); + + if ((ret = xd3_encode_bits (stream, & output, & bstate, + bits, code))) + { + goto failure; + } + + GP_PAGE (); + } + } + while (in != NULL); + + XD3_ASSERT (select_bits == 0); + XD3_ASSERT (output_bits == 0); + } + } + + ret = xd3_flush_bits (stream, & output, & bstate); + + if (0) + { + nosecond: + stream->msg = "secondary compression was inefficient"; + ret = XD3_NOSECOND; + } + + failure: + + xd3_free (stream, gbest); + xd3_free (stream, gbest_mtf); + return ret; +} +#endif /* XD3_ENCODER */ + +/*********************************************************************/ +/* DECODE */ +/*********************************************************************/ + +static void +djw_build_decoder (xd3_stream *stream, + usize_t asize, + usize_t abs_max, + const uint8_t *clen, + uint8_t *inorder, + usize_t *base, + usize_t *limit, + usize_t *min_clenp, + usize_t *max_clenp) +{ + usize_t i, l; + const uint8_t *ci; + usize_t nr_clen [DJW_TOTAL_CODES]; + usize_t tmp_base[DJW_TOTAL_CODES]; + usize_t min_clen; + usize_t max_clen; + + /* Assumption: the two temporary arrays are large enough to hold abs_max. */ + XD3_ASSERT (abs_max <= DJW_MAX_CODELEN); + + /* This looks something like the start of zlib's inftrees.c */ + memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1)); + + /* Count number of each code length */ + i = asize; + ci = clen; + do + { + /* Caller _must_ check that values are in-range. Most of the time the + * caller decodes a specific number of bits, which imply the max value, + * and the other time the caller decodes a huffman value, which must be + * in-range. Therefore, its an assertion and this function cannot + * otherwise fail. */ + XD3_ASSERT (*ci <= abs_max); + + nr_clen[*ci++]++; + } + while (--i != 0); + + /* Compute min, max. */ + for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } } + min_clen = i; + for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } } + max_clen = i; + + /* Fill the BASE, LIMIT table. */ + tmp_base[min_clen] = 0; + base[min_clen] = 0; + limit[min_clen] = nr_clen[min_clen] - 1; + for (i = min_clen + 1; i <= max_clen; i += 1) + { + usize_t last_limit = ((limit[i-1] + 1) << 1); + tmp_base[i] = tmp_base[i-1] + nr_clen[i-1]; + limit[i] = last_limit + nr_clen[i] - 1; + base[i] = last_limit - tmp_base[i]; + } + + /* Fill the inorder array, canonically ordered codes. */ + ci = clen; + for (i = 0; i < asize; i += 1) + { + if ((l = *ci++) != 0) + { + inorder[tmp_base[l]++] = i; + } + } + + *min_clenp = min_clen; + *max_clenp = max_clen; +} + +static inline int +djw_decode_symbol (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + const uint8_t *inorder, + const usize_t *base, + const usize_t *limit, + usize_t min_clen, + usize_t max_clen, + usize_t *sym, + usize_t max_sym) +{ + usize_t code = 0; + usize_t bits = 0; + + /* OPT: Supposedly a small lookup table improves speed here... */ + + /* Code outline is similar to xd3_decode_bits... */ + if (bstate->cur_mask == 0x100) { goto next_byte; } + + for (;;) + { + do + { + if (bits == max_clen) { goto corrupt; } + + bits += 1; + code = (code << 1); + + if (bstate->cur_byte & bstate->cur_mask) { code |= 1; } + + bstate->cur_mask <<= 1; + + if (bits >= min_clen && code <= limit[bits]) { goto done; } + } + while (bstate->cur_mask != 0x100); + + next_byte: + + if (*input == input_end) + { + stream->msg = "secondary decoder end of input"; + return XD3_INVALID_INPUT; + } + + bstate->cur_byte = *(*input)++; + bstate->cur_mask = 1; + } + + done: + + if (base[bits] <= code) + { + usize_t offset = code - base[bits]; + + if (offset <= max_sym) + { + IF_DEBUG2 (DP(RINT "(j) %"W"u ", code)); + *sym = inorder[offset]; + return 0; + } + } + + corrupt: + stream->msg = "secondary decoder invalid code"; + return XD3_INVALID_INPUT; +} + +static int +djw_decode_clclen (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + uint8_t *cl_inorder, + usize_t *cl_base, + usize_t *cl_limit, + usize_t *cl_minlen, + usize_t *cl_maxlen, + uint8_t *cl_mtf) +{ + int ret; + uint8_t cl_clen[DJW_TOTAL_CODES]; + usize_t num_codes, value; + usize_t i; + + /* How many extra code lengths to encode. */ + if ((ret = xd3_decode_bits (stream, bstate, input, + input_end, DJW_EXTRA_CODE_BITS, & num_codes))) + { + return ret; + } + + num_codes += DJW_EXTRA_12OFFSET; + + /* Read num_codes. */ + for (i = 0; i < num_codes; i += 1) + { + if ((ret = xd3_decode_bits (stream, bstate, input, + input_end, DJW_CLCLEN_BITS, & value))) + { + return ret; + } + + cl_clen[i] = value; + } + + /* Set the rest to zero. */ + for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; } + + /* No need to check for in-range clen values, because: */ + XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1); + + /* Build the code-length decoder. */ + djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN, + cl_clen, cl_inorder, cl_base, + cl_limit, cl_minlen, cl_maxlen); + + /* Initialize the MTF state. */ + djw_init_clen_mtf_1_2 (cl_mtf); + + return 0; +} + +static inline int +djw_decode_1_2 (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + const uint8_t *inorder, + const usize_t *base, + const usize_t *limit, + const usize_t *minlen, + const usize_t *maxlen, + uint8_t *mtfvals, + usize_t elts, + usize_t skip_offset, + uint8_t *values) +{ + usize_t n = 0, rep = 0, mtf = 0, s = 0; + int ret; + + while (n < elts) + { + /* Special case inside generic code: CLEN only: If not the first group, + * we already know the zero frequencies. */ + if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0) + { + values[n++] = 0; + continue; + } + + /* Repeat last symbol. */ + if (rep != 0) + { + values[n++] = mtfvals[0]; + rep -= 1; + continue; + } + + /* Symbol following last repeat code. */ + if (mtf != 0) + { + usize_t sym = djw_update_mtf (mtfvals, mtf); + values[n++] = sym; + mtf = 0; + continue; + } + + /* Decode next symbol/repeat code. */ + if ((ret = djw_decode_symbol (stream, bstate, input, input_end, + inorder, base, limit, *minlen, *maxlen, + & mtf, DJW_TOTAL_CODES))) { return ret; } + + if (mtf <= RUN_1) + { + /* Repetition. */ + rep = ((mtf + 1) << s); + mtf = 0; + s += 1; + } + else + { + /* Remove the RUN_1 MTF offset. */ + mtf -= 1; + s = 0; + } + } + + /* If (rep != 0) there were too many codes received. */ + if (rep != 0) + { + stream->msg = "secondary decoder invalid repeat code"; + return XD3_INVALID_INPUT; + } + + return 0; +} + +static inline int +djw_decode_prefix (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + const uint8_t *cl_inorder, + const usize_t *cl_base, + const usize_t *cl_limit, + const usize_t *cl_minlen, + const usize_t *cl_maxlen, + uint8_t *cl_mtf, + usize_t groups, + uint8_t *clen) +{ + return djw_decode_1_2 (stream, bstate, input, input_end, + cl_inorder, cl_base, cl_limit, + cl_minlen, cl_maxlen, cl_mtf, + ALPHABET_SIZE * groups, ALPHABET_SIZE, clen); +} + +static int +xd3_decode_huff (xd3_stream *stream, + djw_stream *h, + const uint8_t **input_pos, + const uint8_t *const input_end, + uint8_t **output_pos, + const uint8_t *const output_end) +{ + const uint8_t *input = *input_pos; + uint8_t *output = *output_pos; + bit_state bstate = BIT_STATE_DECODE_INIT; + uint8_t *sel_group = NULL; + usize_t groups, gp; + usize_t output_bytes = (usize_t)(output_end - output); + usize_t sector_size; + usize_t sectors; + int ret; + + /* Invalid input. */ + if (output_bytes == 0) + { + stream->msg = "secondary decoder invalid input"; + return XD3_INVALID_INPUT; + } + + /* Decode: number of groups */ + if ((ret = xd3_decode_bits (stream, & bstate, & input, + input_end, DJW_GROUP_BITS, & groups))) + { + goto fail; + } + + groups += 1; + + if (groups > 1) + { + /* Decode: group size */ + if ((ret = xd3_decode_bits (stream, & bstate, & input, + input_end, DJW_SECTORSZ_BITS, + & sector_size))) { goto fail; } + + sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT; + } + else + { + /* Default for groups == 1 */ + sector_size = output_bytes; + } + + sectors = 1 + (output_bytes - 1) / sector_size; + + /* TODO: In the case of groups==1, lots of extra stack space gets used here. + * Could dynamically allocate this memory, which would help with excess + * parameter passing, too. Passing too many parameters in this file, + * simplify it! */ + + /* Outer scope: per-group symbol decoder tables. */ + { + uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE]; + usize_t base [DJW_MAX_GROUPS][DJW_TOTAL_CODES]; + usize_t limit [DJW_MAX_GROUPS][DJW_TOTAL_CODES]; + usize_t minlen [DJW_MAX_GROUPS]; + usize_t maxlen [DJW_MAX_GROUPS]; + + /* Nested scope: code length decoder tables. */ + { + uint8_t clen [DJW_MAX_GROUPS][ALPHABET_SIZE]; + uint8_t cl_inorder[DJW_TOTAL_CODES]; + usize_t cl_base [DJW_MAX_CLCLEN+2]; + usize_t cl_limit [DJW_MAX_CLCLEN+2]; + uint8_t cl_mtf [DJW_TOTAL_CODES]; + usize_t cl_minlen; + usize_t cl_maxlen; + + /* Compute the code length decoder. */ + if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end, + cl_inorder, cl_base, cl_limit, & cl_minlen, + & cl_maxlen, cl_mtf))) { goto fail; } + + /* Now decode each group decoder. */ + if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end, + cl_inorder, cl_base, cl_limit, + & cl_minlen, & cl_maxlen, cl_mtf, + groups, clen[0]))) { goto fail; } + + /* Prepare the actual decoding tables. */ + for (gp = 0; gp < groups; gp += 1) + { + djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN, + clen[gp], inorder[gp], base[gp], limit[gp], + & minlen[gp], & maxlen[gp]); + } + } + + /* Decode: selector clens. */ + { + uint8_t sel_inorder[DJW_MAX_GROUPS+2]; + usize_t sel_base [DJW_MAX_GBCLEN+2]; + usize_t sel_limit [DJW_MAX_GBCLEN+2]; + uint8_t sel_mtf [DJW_MAX_GROUPS+2]; + usize_t sel_minlen; + usize_t sel_maxlen; + + /* Setup group selection. */ + if (groups > 1) + { + uint8_t sel_clen[DJW_MAX_GROUPS+1]; + + for (gp = 0; gp < groups+1; gp += 1) + { + usize_t value; + + if ((ret = xd3_decode_bits (stream, & bstate, & input, + input_end, DJW_GBCLEN_BITS, + & value))) { goto fail; } + + sel_clen[gp] = value; + sel_mtf[gp] = gp; + } + + if ((sel_group = (uint8_t*) xd3_alloc (stream, sectors, 1)) == NULL) + { + ret = ENOMEM; + goto fail; + } + + djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen, + sel_inorder, sel_base, sel_limit, + & sel_minlen, & sel_maxlen); + + if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end, + sel_inorder, sel_base, + sel_limit, & sel_minlen, + & sel_maxlen, sel_mtf, + sectors, 0, sel_group))) { goto fail; } + } + + /* Now decode each sector. */ + { + /* Initialize for (groups==1) case. */ + uint8_t *gp_inorder = inorder[0]; + usize_t *gp_base = base[0]; + usize_t *gp_limit = limit[0]; + usize_t gp_minlen = minlen[0]; + usize_t gp_maxlen = maxlen[0]; + usize_t c; + + for (c = 0; c < sectors; c += 1) + { + usize_t n; + + if (groups >= 2) + { + gp = sel_group[c]; + + XD3_ASSERT (gp < groups); + + gp_inorder = inorder[gp]; + gp_base = base[gp]; + gp_limit = limit[gp]; + gp_minlen = minlen[gp]; + gp_maxlen = maxlen[gp]; + } + + if (output_end < output) + { + stream->msg = "secondary decoder invalid input"; + return XD3_INVALID_INPUT; + } + + /* Decode next sector. */ + n = xd3_min (sector_size, (usize_t) (output_end - output)); + + do + { + usize_t sym; + + if ((ret = djw_decode_symbol (stream, & bstate, + & input, input_end, + gp_inorder, gp_base, + gp_limit, gp_minlen, gp_maxlen, + & sym, ALPHABET_SIZE))) + { + goto fail; + } + + *output++ = sym; + } + while (--n); + } + } + } + } + + IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate))) + { goto fail; }); + XD3_ASSERT (ret == 0); + + fail: + xd3_free (stream, sel_group); + + (*input_pos) = input; + (*output_pos) = output; + return ret; +} + +#endif diff --git a/lib/xdelta3/xdelta3-fgk.h b/lib/xdelta3/xdelta3-fgk.h new file mode 100644 index 0000000..f880ad9 --- /dev/null +++ b/lib/xdelta3/xdelta3-fgk.h @@ -0,0 +1,857 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + For demonstration purposes only. + */ + +#ifndef _XDELTA3_FGK_h_ +#define _XDELTA3_FGK_h_ + +/* An implementation of the FGK algorithm described by D.E. Knuth in + * "Dynamic Huffman Coding" in Journal of Algorithms 6. */ + +/* A 32bit counter (fgk_weight) is used as the frequency counter for + * nodes in the huffman tree. TODO: Need oto test for overflow and/or + * reset stats. */ + +typedef struct _fgk_stream fgk_stream; +typedef struct _fgk_node fgk_node; +typedef struct _fgk_block fgk_block; +typedef unsigned int fgk_bit; +typedef uint32_t fgk_weight; + +struct _fgk_block { + union { + fgk_node *un_leader; + fgk_block *un_freeptr; + } un; +}; + +#define block_leader un.un_leader +#define block_freeptr un.un_freeptr + +/* The code can also support fixed huffman encoding/decoding. */ +#define IS_ADAPTIVE 1 + +/* weight is a count of the number of times this element has been seen + * in the current encoding/decoding. parent, right_child, and + * left_child are pointers defining the tree structure. right and + * left point to neighbors in an ordered sequence of weights. The + * left child of a node is always guaranteed to have weight not + * greater than its sibling. fgk_blockLeader points to the element + * with the same weight as itself which is closest to the next + * increasing weight block. */ +struct _fgk_node +{ + fgk_weight weight; + fgk_node *parent; + fgk_node *left_child; + fgk_node *right_child; + fgk_node *left; + fgk_node *right; + fgk_block *my_block; +}; + +/* alphabet_size is the a count of the number of possible leaves in + * the huffman tree. The number of total nodes counting internal + * nodes is ((2 * alphabet_size) - 1). zero_freq_count is the number + * of elements remaining which have zero frequency. zero_freq_exp and + * zero_freq_rem satisfy the equation zero_freq_count = + * 2^zero_freq_exp + zero_freq_rem. root_node is the root of the + * tree, which is initialized to a node with zero frequency and + * contains the 0th such element. free_node contains a pointer to the + * next available fgk_node space. alphabet contains all the elements + * and is indexed by N. remaining_zeros points to the head of the + * list of zeros. */ +struct _fgk_stream +{ + usize_t alphabet_size; + usize_t zero_freq_count; + usize_t zero_freq_exp; + usize_t zero_freq_rem; + usize_t coded_depth; + + usize_t total_nodes; + usize_t total_blocks; + + fgk_bit *coded_bits; + + fgk_block *block_array; + fgk_block *free_block; + + fgk_node *decode_ptr; + fgk_node *remaining_zeros; + fgk_node *alphabet; + fgk_node *root_node; + fgk_node *free_node; +}; + +/*********************************************************************/ +/* Encoder */ +/*********************************************************************/ + +static fgk_stream* fgk_alloc (xd3_stream *stream /*, usize_t alphabet_size */); +static int fgk_init (xd3_stream *stream, + fgk_stream *h, + int is_encode); +static usize_t fgk_encode_data (fgk_stream *h, + usize_t n); +static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h); + +static int xd3_encode_fgk (xd3_stream *stream, + fgk_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); + +/*********************************************************************/ +/* Decoder */ +/*********************************************************************/ + +static inline int fgk_decode_bit (fgk_stream *h, + fgk_bit b); +static usize_t fgk_decode_data (fgk_stream *h); +static void fgk_destroy (xd3_stream *stream, + fgk_stream *h); + +static int xd3_decode_fgk (xd3_stream *stream, + fgk_stream *sec_stream, + const uint8_t **input, + const uint8_t *const input_end, + uint8_t **output, + const uint8_t *const output_end); + +/*********************************************************************/ +/* Private */ +/*********************************************************************/ + +static unsigned int fgk_find_nth_zero (fgk_stream *h, usize_t n); +static usize_t fgk_nth_zero (fgk_stream *h, usize_t n); +static void fgk_update_tree (fgk_stream *h, usize_t n); +static fgk_node* fgk_increase_zero_weight (fgk_stream *h, usize_t n); +static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node); +static void fgk_move_right (fgk_stream *h, fgk_node *node); +static void fgk_promote (fgk_stream *h, fgk_node *node); +static void fgk_init_node (fgk_node *node, usize_t i, usize_t size); +static fgk_block* fgk_make_block (fgk_stream *h, fgk_node *l); +static void fgk_free_block (fgk_stream *h, fgk_block *b); +static void fgk_factor_remaining (fgk_stream *h); +static inline void fgk_swap_ptrs (fgk_node **one, fgk_node **two); + +/*********************************************************************/ +/* Basic Routines */ +/*********************************************************************/ + +/* returns an initialized huffman encoder for an alphabet with the + * given size. returns NULL if enough memory cannot be allocated */ +static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */) +{ + usize_t alphabet_size0 = ALPHABET_SIZE; + fgk_stream *h; + + if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL) + { + return NULL; + } + + h->total_nodes = (2 * alphabet_size0) - 1; + h->total_blocks = (2 * h->total_nodes); + h->alphabet = (fgk_node*) xd3_alloc (stream, h->total_nodes, sizeof (fgk_node)); + h->block_array = (fgk_block*) xd3_alloc (stream, h->total_blocks, sizeof (fgk_block)); + h->coded_bits = (fgk_bit*) xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit)); + + if (h->coded_bits == NULL || + h->alphabet == NULL || + h->block_array == NULL) + { + fgk_destroy (stream, h); + return NULL; + } + + h->alphabet_size = alphabet_size0; + + return h; +} + +static int fgk_init (xd3_stream *stream, fgk_stream *h, int is_encode) +{ + usize_t ui; + ssize_t si; + + h->root_node = h->alphabet; + h->decode_ptr = h->root_node; + h->free_node = h->alphabet + h->alphabet_size; + h->remaining_zeros = h->alphabet; + h->coded_depth = 0; + h->zero_freq_count = h->alphabet_size + 2; + + /* after two calls to factor_remaining, zero_freq_count == alphabet_size */ + fgk_factor_remaining(h); /* set ZFE and ZFR */ + fgk_factor_remaining(h); /* set ZFDB according to prev state */ + + IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes)); + + for (ui = 0; ui < h->total_blocks-1; ui += 1) + { + h->block_array[ui].block_freeptr = &h->block_array[ui + 1]; + } + + h->block_array[h->total_blocks - 1].block_freeptr = NULL; + h->free_block = h->block_array; + + /* Zero frequency nodes are inserted in the first alphabet_size + * positions, with Value, weight, and a pointer to the next zero + * frequency node. */ + for (si = h->alphabet_size - 1; si >= 0; si -= 1) + { + fgk_init_node (h->alphabet + si, (usize_t) si, h->alphabet_size); + } + + return 0; +} + +static void fgk_swap_ptrs(fgk_node **one, fgk_node **two) +{ + fgk_node *tmp = *one; + *one = *two; + *two = tmp; +} + +/* Takes huffman transmitter h and n, the nth elt in the alphabet, and + * returns the number of required to encode n. */ +static usize_t fgk_encode_data (fgk_stream* h, usize_t n) +{ + fgk_node *target_ptr = h->alphabet + n; + + XD3_ASSERT (n < h->alphabet_size); + + h->coded_depth = 0; + + /* First encode the binary representation of the nth remaining + * zero frequency element in reverse such that bit, which will be + * encoded from h->coded_depth down to 0 will arrive in increasing + * order following the tree path. If there is only one left, it + * is not neccesary to encode these bits. */ + if (IS_ADAPTIVE && target_ptr->weight == 0) + { + usize_t where, shift; + usize_t bits; + + where = fgk_find_nth_zero(h, n); + shift = 1; + + if (h->zero_freq_rem == 0) + { + bits = h->zero_freq_exp; + } + else + { + bits = h->zero_freq_exp + 1; + } + + while (bits > 0) + { + h->coded_bits[h->coded_depth++] = (shift & where) && 1; + + bits -= 1; + shift <<= 1; + }; + + target_ptr = h->remaining_zeros; + } + + /* The path from root to node is filled into coded_bits in reverse so + * that it is encoded in the right order */ + while (target_ptr != h->root_node) + { + h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr); + + target_ptr = target_ptr->parent; + } + + if (IS_ADAPTIVE) + { + fgk_update_tree(h, n); + } + + return h->coded_depth; +} + +/* Should be called as many times as fgk_encode_data returns. + */ +static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h) +{ + XD3_ASSERT (h->coded_depth > 0); + + return h->coded_bits[--h->coded_depth]; +} + +/* This procedure updates the tree after alphabet[n] has been encoded + * or decoded. + */ +static void fgk_update_tree (fgk_stream *h, usize_t n) +{ + fgk_node *incr_node; + + if (h->alphabet[n].weight == 0) + { + incr_node = fgk_increase_zero_weight (h, n); + } + else + { + incr_node = h->alphabet + n; + } + + while (incr_node != h->root_node) + { + fgk_move_right (h, incr_node); + fgk_promote (h, incr_node); + incr_node->weight += 1; /* incr the parent */ + incr_node = incr_node->parent; /* repeat */ + } + + h->root_node->weight += 1; +} + +static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd) +{ + fgk_node **fwd_par_ptr, **back_par_ptr; + fgk_node *move_back, *tmp; + + move_back = move_fwd->my_block->block_leader; + + if (move_fwd == move_back || + move_fwd->parent == move_back || + move_fwd->weight == 0) + { + return; + } + + move_back->right->left = move_fwd; + + if (move_fwd->left) + { + move_fwd->left->right = move_back; + } + + tmp = move_fwd->right; + move_fwd->right = move_back->right; + + if (tmp == move_back) + { + move_back->right = move_fwd; + } + else + { + tmp->left = move_back; + move_back->right = tmp; + } + + tmp = move_back->left; + move_back->left = move_fwd->left; + + if (tmp == move_fwd) + { + move_fwd->left = move_back; + } + else + { + tmp->right = move_fwd; + move_fwd->left = tmp; + } + + if (move_fwd->parent->right_child == move_fwd) + { + fwd_par_ptr = &move_fwd->parent->right_child; + } + else + { + fwd_par_ptr = &move_fwd->parent->left_child; + } + + if (move_back->parent->right_child == move_back) + { + back_par_ptr = &move_back->parent->right_child; + } + else + { + back_par_ptr = &move_back->parent->left_child; + } + + fgk_swap_ptrs (&move_fwd->parent, &move_back->parent); + fgk_swap_ptrs (fwd_par_ptr, back_par_ptr); + + move_fwd->my_block->block_leader = move_fwd; +} + +/* Shifts node, the leader of its block, into the next block. */ +static void fgk_promote (fgk_stream *h, fgk_node *node) +{ + fgk_node *my_left, *my_right; + fgk_block *cur_block; + + my_right = node->right; + my_left = node->left; + cur_block = node->my_block; + + if (node->weight == 0) + { + return; + } + + /* if left is right child, parent of remaining zeros case (?), means parent + * has same weight as right child. */ + if (my_left == node->right_child && + node->left_child && + node->left_child->weight == 0) + { + XD3_ASSERT (node->left_child == h->remaining_zeros); + XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */ + + if (node->weight == (my_right->weight - 1) && my_right != h->root_node) + { + fgk_free_block (h, cur_block); + node->my_block = my_right->my_block; + my_left->my_block = my_right->my_block; + } + + return; + } + + if (my_left == h->remaining_zeros) + { + return; + } + + /* true if not the leftmost node */ + if (my_left->my_block == cur_block) + { + my_left->my_block->block_leader = my_left; + } + else + { + fgk_free_block (h, cur_block); + } + + /* node->parent != my_right */ + if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node)) + { + node->my_block = my_right->my_block; + } + else + { + node->my_block = fgk_make_block (h, node); + } +} + +/* When an element is seen the first time this is called to remove it from the list of + * zero weight elements and introduce a new internal node to the tree. */ +static fgk_node* fgk_increase_zero_weight (fgk_stream *h, usize_t n) +{ + fgk_node *this_zero, *new_internal, *zero_ptr; + + this_zero = h->alphabet + n; + + if (h->zero_freq_count == 1) + { + /* this is the last one */ + this_zero->right_child = NULL; + + if (this_zero->right->weight == 1) + { + this_zero->my_block = this_zero->right->my_block; + } + else + { + this_zero->my_block = fgk_make_block (h, this_zero); + } + + h->remaining_zeros = NULL; + + return this_zero; + } + + zero_ptr = h->remaining_zeros; + + new_internal = h->free_node++; + + new_internal->parent = zero_ptr->parent; + new_internal->right = zero_ptr->right; + new_internal->weight = 0; + new_internal->right_child = this_zero; + new_internal->left = this_zero; + + if (h->remaining_zeros == h->root_node) + { + /* This is the first element to be coded */ + h->root_node = new_internal; + this_zero->my_block = fgk_make_block (h, this_zero); + new_internal->my_block = fgk_make_block (h, new_internal); + } + else + { + new_internal->right->left = new_internal; + + if (zero_ptr->parent->right_child == zero_ptr) + { + zero_ptr->parent->right_child = new_internal; + } + else + { + zero_ptr->parent->left_child = new_internal; + } + + if (new_internal->right->weight == 1) + { + new_internal->my_block = new_internal->right->my_block; + } + else + { + new_internal->my_block = fgk_make_block (h, new_internal); + } + + this_zero->my_block = new_internal->my_block; + } + + fgk_eliminate_zero (h, this_zero); + + new_internal->left_child = h->remaining_zeros; + + this_zero->right = new_internal; + this_zero->left = h->remaining_zeros; + this_zero->parent = new_internal; + this_zero->left_child = NULL; + this_zero->right_child = NULL; + + h->remaining_zeros->parent = new_internal; + h->remaining_zeros->right = this_zero; + + return this_zero; +} + +/* When a zero frequency element is encoded, it is followed by the + * binary representation of the index into the remaining elements. + * Sets a cache to the element before it so that it can be removed + * without calling this procedure again. */ +static unsigned int fgk_find_nth_zero (fgk_stream* h, usize_t n) +{ + fgk_node *target_ptr = h->alphabet + n; + fgk_node *head_ptr = h->remaining_zeros; + unsigned int idx = 0; + + while (target_ptr != head_ptr) + { + head_ptr = head_ptr->right_child; + idx += 1; + } + + return idx; +} + +/* Splices node out of the list of zeros. */ +static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node) +{ + if (h->zero_freq_count == 1) + { + return; + } + + fgk_factor_remaining(h); + + if (node->left_child == NULL) + { + h->remaining_zeros = h->remaining_zeros->right_child; + h->remaining_zeros->left_child = NULL; + } + else if (node->right_child == NULL) + { + node->left_child->right_child = NULL; + } + else + { + node->right_child->left_child = node->left_child; + node->left_child->right_child = node->right_child; + } +} + +static void fgk_init_node (fgk_node *node, usize_t i, usize_t size) +{ + if (i < size - 1) + { + node->right_child = node + 1; + } + else + { + node->right_child = NULL; + } + + if (i >= 1) + { + node->left_child = node - 1; + } + else + { + node->left_child = NULL; + } + + node->weight = 0; + node->parent = NULL; + node->right = NULL; + node->left = NULL; + node->my_block = NULL; +} + +/* The data structure used is an array of blocks, which are unions of + * free pointers and huffnode pointers. free blocks are a linked list + * of free blocks, the front of which is h->free_block. The used + * blocks are pointers to the head of each block. */ +static fgk_block* fgk_make_block (fgk_stream *h, fgk_node* lead) +{ + fgk_block *ret = h->free_block; + + XD3_ASSERT (h->free_block != NULL); + + h->free_block = h->free_block->block_freeptr; + + ret->block_leader = lead; + + return ret; +} + +/* Restores the block to the front of the free list. */ +static void fgk_free_block (fgk_stream *h, fgk_block *b) +{ + b->block_freeptr = h->free_block; + h->free_block = b; +} + +/* sets zero_freq_count, zero_freq_rem, and zero_freq_exp to satsity + * the equation given above. */ +static void fgk_factor_remaining (fgk_stream *h) +{ + unsigned int i; + + i = (--h->zero_freq_count); + h->zero_freq_exp = 0; + + while (i > 1) + { + h->zero_freq_exp += 1; + i >>= 1; + } + + i = 1 << h->zero_freq_exp; + + h->zero_freq_rem = h->zero_freq_count - i; +} + +/* receives a bit at a time and returns true when a complete code has + * been received. + */ +static inline int fgk_decode_bit (fgk_stream* h, fgk_bit b) +{ + XD3_ASSERT (b == 1 || b == 0); + + if (IS_ADAPTIVE && h->decode_ptr->weight == 0) + { + usize_t bitsreq; + + if (h->zero_freq_rem == 0) + { + bitsreq = h->zero_freq_exp; + } + else + { + bitsreq = h->zero_freq_exp + 1; + } + + h->coded_bits[h->coded_depth] = b; + h->coded_depth += 1; + + return h->coded_depth >= bitsreq; + } + else + { + if (b) + { + h->decode_ptr = h->decode_ptr->right_child; + } + else + { + h->decode_ptr = h->decode_ptr->left_child; + } + + if (h->decode_ptr->left_child == NULL) + { + /* If the weight is non-zero, finished. */ + if (h->decode_ptr->weight != 0) + { + return 1; + } + + /* zero_freq_count is dropping to 0, finished. */ + return h->zero_freq_count == 1; + } + else + { + return 0; + } + } +} + +static usize_t fgk_nth_zero (fgk_stream* h, usize_t n) +{ + fgk_node *ret = h->remaining_zeros; + + /* ERROR: if during this loop (ret->right_child == NULL) then the + * encoder's zero count is too high. Could return an error code + * now, but is probably unnecessary overhead, since the caller + * should check integrity anyway. */ + for (; n != 0 && ret->right_child != NULL; n -= 1) + { + ret = ret->right_child; + } + + return (usize_t)(ret - h->alphabet); +} + +/* once fgk_decode_bit returns 1, this retrieves an index into the + * alphabet otherwise this returns 0, indicating more bits are + * required. + */ +static usize_t fgk_decode_data (fgk_stream* h) +{ + usize_t elt = (usize_t)(h->decode_ptr - h->alphabet); + + if (IS_ADAPTIVE && h->decode_ptr->weight == 0) { + usize_t i = 0; + usize_t n = 0; + + if (h->coded_depth > 0) + { + for (; i < h->coded_depth - 1; i += 1) + { + n |= h->coded_bits[i]; + n <<= 1; + } + } + + n |= h->coded_bits[i]; + elt = fgk_nth_zero(h, n); + } + + h->coded_depth = 0; + + if (IS_ADAPTIVE) + { + fgk_update_tree(h, elt); + } + + h->decode_ptr = h->root_node; + + return elt; +} + +static void fgk_destroy (xd3_stream *stream, + fgk_stream *h) +{ + if (h != NULL) + { + xd3_free (stream, h->alphabet); + xd3_free (stream, h->coded_bits); + xd3_free (stream, h->block_array); + xd3_free (stream, h); + } +} + +/*********************************************************************/ +/* Xdelta */ +/*********************************************************************/ + +static int +xd3_encode_fgk (xd3_stream *stream, fgk_stream *sec_stream, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg) +{ + bit_state bstate = BIT_STATE_ENCODE_INIT; + xd3_output *cur_page; + int ret; + + /* OPT: quit compression early if it looks bad */ + for (cur_page = input; cur_page; cur_page = cur_page->next_page) + { + const uint8_t *inp = cur_page->base; + const uint8_t *inp_max = inp + cur_page->next; + + while (inp < inp_max) + { + usize_t bits = fgk_encode_data (sec_stream, *inp++); + + while (bits--) + { + if ((ret = xd3_encode_bit (stream, & output, & bstate, fgk_get_encoded_bit (sec_stream)))) { return ret; } + } + } + } + + return xd3_flush_bits (stream, & output, & bstate); +} + +static int +xd3_decode_fgk (xd3_stream *stream, + fgk_stream *sec_stream, + const uint8_t **input_pos, + const uint8_t *const input_max, + uint8_t **output_pos, + const uint8_t *const output_max) +{ + bit_state bstate; + uint8_t *output = *output_pos; + const uint8_t *input = *input_pos; + + for (;;) + { + if (input == input_max) + { + stream->msg = "secondary decoder end of input"; + return XD3_INTERNAL; + } + + bstate.cur_byte = *input++; + + for (bstate.cur_mask = 1; bstate.cur_mask != 0x100; bstate.cur_mask <<= 1) + { + int done = fgk_decode_bit (sec_stream, (bstate.cur_byte & bstate.cur_mask) ? 1U : 0U); + + if (! done) { continue; } + + *output++ = fgk_decode_data (sec_stream); + + if (output == output_max) + { + /* During regression testing: */ + IF_REGRESSION ({ + int ret; + bstate.cur_mask <<= 1; + if ((ret = xd3_test_clean_bits (stream, & bstate))) { return ret; } + }); + + (*output_pos) = output; + (*input_pos) = input; + return 0; + } + } + } +} + +#endif /* _XDELTA3_FGK_ */ diff --git a/lib/xdelta3/xdelta3-hash.h b/lib/xdelta3/xdelta3-hash.h new file mode 100644 index 0000000..9238ecd --- /dev/null +++ b/lib/xdelta3/xdelta3-hash.h @@ -0,0 +1,159 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef _XDELTA3_HASH_H_ +#define _XDELTA3_HASH_H_ + +#include "xdelta3-internal.h" + +#if XD3_DEBUG +#define SMALL_HASH_DEBUG1(s,inp) \ + uint32_t debug_state; \ + uint32_t debug_hval = xd3_checksum_hash (& (s)->small_hash, \ + xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look)) +#define SMALL_HASH_DEBUG2(s,inp) \ + XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \ + xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look))) +#else +#define SMALL_HASH_DEBUG1(s,inp) +#define SMALL_HASH_DEBUG2(s,inp) +#endif /* XD3_DEBUG */ + +#if UNALIGNED_OK +#define UNALIGNED_READ32(dest,src) (*(dest)) = (*(uint32_t*)(src)) +#else +#define UNALIGNED_READ32(dest,src) memcpy((dest), (src), 4); +#endif + +/* These are good hash multipliers for 32-bit and 64-bit LCGs: see + * "linear congruential generators of different sizes and good lattice + * structure" */ +#define xd3_hash_multiplier32 1597334677U +#define xd3_hash_multiplier64 1181783497276652981ULL + +/* TODO: small cksum is hard-coded for 4 bytes (i.e., "look" is unused) */ +static inline uint32_t +xd3_scksum (uint32_t *state, + const uint8_t *base, + const usize_t look) +{ + UNALIGNED_READ32(state, base); + return (*state) * xd3_hash_multiplier32; +} +static inline uint32_t +xd3_small_cksum_update (uint32_t *state, + const uint8_t *base, + usize_t look) +{ + UNALIGNED_READ32(state, base+1); + return (*state) * xd3_hash_multiplier32; +} + +#if XD3_ENCODER +inline usize_t +xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum) +{ + return (cksum >> cfg->shift) ^ (cksum & cfg->mask); +} + +#if SIZEOF_USIZE_T == 4 +inline uint32_t +xd3_large32_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look) +{ + uint32_t h = 0; + for (usize_t i = 0; i < look; i++) { + h += base[i] * cfg->powers[i]; + } + return h; +} + +inline uint32_t +xd3_large32_cksum_update (xd3_hash_cfg *cfg, const uint32_t cksum, + const uint8_t *base, const usize_t look) +{ + return xd3_hash_multiplier32 * cksum - cfg->multiplier * base[0] + base[look]; +} +#endif + +#if SIZEOF_USIZE_T == 8 +inline uint64_t +xd3_large64_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look) +{ + uint64_t h = 0; + for (usize_t i = 0; i < look; i++) { + h += base[i] * cfg->powers[i]; + } + return h; +} + +inline uint64_t +xd3_large64_cksum_update (xd3_hash_cfg *cfg, const uint64_t cksum, + const uint8_t *base, const usize_t look) +{ + return xd3_hash_multiplier64 * cksum - cfg->multiplier * base[0] + base[look]; +} +#endif + +static usize_t +xd3_size_hashtable_bits (usize_t slots) +{ + usize_t bits = (SIZEOF_USIZE_T * 8) - 1; + usize_t i; + + for (i = 3; i <= bits; i += 1) + { + if (slots < (1U << i)) + { + /* Note: this is the compaction=1 setting measured in + * checksum_test */ + bits = i - 1; + break; + } + } + + return bits; +} + +int +xd3_size_hashtable (xd3_stream *stream, + usize_t slots, + usize_t look, + xd3_hash_cfg *cfg) +{ + usize_t bits = xd3_size_hashtable_bits (slots); + + cfg->size = (1U << bits); + cfg->mask = (cfg->size - 1); + cfg->shift = (SIZEOF_USIZE_T * 8) - bits; + cfg->look = look; + + if ((cfg->powers = + (usize_t*) xd3_alloc0 (stream, look, sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + + cfg->powers[look-1] = 1; + for (int i = look-2; i >= 0; i--) + { + cfg->powers[i] = cfg->powers[i+1] * xd3_hash_multiplier; + } + cfg->multiplier = cfg->powers[0] * xd3_hash_multiplier; + + return 0; +} + +#endif /* XD3_ENCODER */ +#endif /* _XDELTA3_HASH_H_ */ diff --git a/lib/xdelta3/xdelta3-internal.h b/lib/xdelta3/xdelta3-internal.h new file mode 100644 index 0000000..0c6a1bb --- /dev/null +++ b/lib/xdelta3/xdelta3-internal.h @@ -0,0 +1,385 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef XDELTA3_INTERNAL_H__ +#define XDELTA3_INTERNAL_H__ + +#include "xdelta3.h" + +typedef struct _main_file main_file; +typedef struct _main_extcomp main_extcomp; + +void main_buffree (void *ptr); +void* main_bufalloc (size_t size); +void main_file_init (main_file *xfile); +int main_file_close (main_file *xfile); +void main_file_cleanup (main_file *xfile); +int main_file_isopen (main_file *xfile); +int main_file_open (main_file *xfile, const char* name, int mode); +int main_file_exists (main_file *xfile); +int main_file_stat (main_file *xfile, xoff_t *size); +int xd3_whole_append_window (xd3_stream *stream); +int xd3_main_cmdline (int argc, char **argv); +int main_file_read (main_file *ifile, + uint8_t *buf, + size_t size, + size_t *nread, + const char *msg); +int main_file_write (main_file *ofile, uint8_t *buf, + usize_t size, const char *msg); +void* main_malloc (size_t size); +void main_free (void *ptr); + +int test_compare_files (const char* f0, const char* f1); +usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno); +xoff_t xd3_source_eof(const xd3_source *src); + +uint32_t xd3_large_cksum_update (uint32_t cksum, + const uint8_t *base, + usize_t look); +int xd3_emit_byte (xd3_stream *stream, + xd3_output **outputp, + uint8_t code); + +int xd3_emit_bytes (xd3_stream *stream, + xd3_output **outputp, + const uint8_t *base, + usize_t size); +xd3_output* xd3_alloc_output (xd3_stream *stream, + xd3_output *old_output); + +int xd3_encode_init_full (xd3_stream *stream); +usize_t xd3_pow2_roundup (usize_t x); +long get_millisecs_now (void); +int xd3_process_stream (int is_encode, + xd3_stream *stream, + int (*func) (xd3_stream *), + int close_stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max); + +#if PYTHON_MODULE || SWIG_MODULE || NOT_MAIN +int xd3_main_cmdline (int argc, char **argv); +#endif + +#if REGRESSION_TEST +int xd3_selftest (void); +#endif + +/* main_file->mode values */ +typedef enum +{ + XO_READ = 0, + XO_WRITE = 1 +} main_file_modes; + +#ifndef XD3_POSIX +#define XD3_POSIX 0 +#endif +#ifndef XD3_STDIO +#define XD3_STDIO 0 +#endif +#ifndef XD3_WIN32 +#define XD3_WIN32 0 +#endif +#ifndef NOT_MAIN +#define NOT_MAIN 0 +#endif + +/* If none are set, default to posix. */ +#if (XD3_POSIX + XD3_STDIO + XD3_WIN32) == 0 +#undef XD3_POSIX +#define XD3_POSIX 1 +#endif + +struct _main_file +{ +#if XD3_WIN32 + HANDLE file; +#elif XD3_STDIO + FILE *file; +#elif XD3_POSIX + int file; +#endif + + int mode; /* XO_READ and XO_WRITE */ + const char *filename; /* File name or /dev/stdin, + * /dev/stdout, /dev/stderr. */ + char *filename_copy; /* File name or /dev/stdin, + * /dev/stdout, /dev/stderr. */ + const char *realname; /* File name or /dev/stdin, + * /dev/stdout, /dev/stderr. */ + const main_extcomp *compressor; /* External compression struct. */ + int flags; /* RD_FIRST, RD_NONEXTERNAL, ... */ + xoff_t nread; /* for input position */ + xoff_t nwrite; /* for output position */ + uint8_t *snprintf_buf; /* internal snprintf() use */ + int size_known; /* Set by main_set_souze */ + xoff_t source_position; /* for avoiding seek in getblk_func */ + int seek_failed; /* after seek fails once, try FIFO */ +}; + +#ifndef UINT32_MAX +#define UINT32_MAX 4294967295U +#endif + +#ifndef UINT64_MAX +#define UINT64_MAX 18446744073709551615ULL +#endif + +#define UINT32_OFLOW_MASK 0xfe000000U +#define UINT64_OFLOW_MASK 0xfe00000000000000ULL + +/********************************************************************* + Integer encoder/decoder functions + **********************************************************************/ + +/* Consume N bytes of input, only used by the decoder. */ +#define DECODE_INPUT(n) \ + do { \ + stream->total_in += (xoff_t) (n); \ + stream->avail_in -= (n); \ + stream->next_in += (n); \ + } while (0) + +#define DECODE_INTEGER_TYPE(PART,OFLOW) \ + while (stream->avail_in != 0) \ + { \ + usize_t next = stream->next_in[0]; \ + \ + DECODE_INPUT(1); \ + \ + if (PART & OFLOW) \ + { \ + stream->msg = "overflow in decode_integer"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + PART = (PART << 7) | (next & 127); \ + \ + if ((next & 128) == 0) \ + { \ + (*val) = PART; \ + PART = 0; \ + return 0; \ + } \ + } \ + \ + stream->msg = "further input required"; \ + return XD3_INPUT + +#define READ_INTEGER_TYPE(TYPE, OFLOW) \ + TYPE val = 0; \ + const uint8_t *inp = (*inpp); \ + usize_t next; \ + \ + do \ + { \ + if (inp == maxp) \ + { \ + stream->msg = "end-of-input in read_integer"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + if (val & OFLOW) \ + { \ + stream->msg = "overflow in read_intger"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + next = (*inp++); \ + val = (val << 7) | (next & 127); \ + } \ + while (next & 128); \ + \ + (*valp) = val; \ + (*inpp) = inp; \ + \ + return 0 + +#define EMIT_INTEGER_TYPE() \ + /* max 64-bit value in base-7 encoding is 9.1 bytes */ \ + uint8_t buf[10]; \ + usize_t bufi = 10; \ + \ + /* This loop performs division and turns on all MSBs. */ \ + do \ + { \ + buf[--bufi] = (num & 127) | 128; \ + num >>= 7U; \ + } \ + while (num != 0); \ + \ + /* Turn off MSB of the last byte. */ \ + buf[9] &= 127; \ + \ + return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi) + +#define IF_SIZEOF32(x) if (num < (1U << (7 * (x)))) return (x); +#define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x); + +#if USE_UINT32 +static inline uint32_t +xd3_sizeof_uint32_t (uint32_t num) +{ + IF_SIZEOF32(1); + IF_SIZEOF32(2); + IF_SIZEOF32(3); + IF_SIZEOF32(4); + return 5; +} + +static inline int +xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val) +{ DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); } + +static inline int +xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *maxp, uint32_t *valp) +{ READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); } + +#if XD3_ENCODER +static inline int +xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num) +{ EMIT_INTEGER_TYPE (); } +#endif /* XD3_ENCODER */ +#endif /* USE_UINT32 */ + +#if USE_UINT64 +static inline uint32_t +xd3_sizeof_uint64_t (uint64_t num) +{ + IF_SIZEOF64(1); + IF_SIZEOF64(2); + IF_SIZEOF64(3); + IF_SIZEOF64(4); + IF_SIZEOF64(5); + IF_SIZEOF64(6); + IF_SIZEOF64(7); + IF_SIZEOF64(8); + IF_SIZEOF64(9); + + return 10; +} + +static inline int +xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val) +{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); } + +static inline int +xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *maxp, uint64_t *valp) +{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); } + +#if XD3_ENCODER +static inline int +xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num) +{ EMIT_INTEGER_TYPE (); } +#endif /* XD3_ENCODER */ +#endif /* USE_UINT64 */ + +#if SIZEOF_USIZE_T == 4 +#define USIZE_T_MAX UINT32_MAX +#define USIZE_T_MAXBLKSZ 0x80000000U +#define XD3_MAXSRCWINSZ (1ULL << 31) +#define xd3_large_cksum xd3_large32_cksum +#define xd3_large_cksum_update xd3_large32_cksum_update +#define xd3_hash_multiplier xd3_hash_multiplier32 + +static inline uint32_t xd3_sizeof_size (usize_t num) +{ return xd3_sizeof_uint32_t (num); } +static inline int xd3_decode_size (xd3_stream *stream, usize_t *valp) +{ return xd3_decode_uint32_t (stream, (uint32_t*) valp); } +static inline int xd3_read_size (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *maxp, usize_t *valp) +{ return xd3_read_uint32_t (stream, inpp, maxp, (uint32_t*) valp); } +#if XD3_ENCODER +static inline int xd3_emit_size (xd3_stream *stream, xd3_output **output, usize_t num) +{ return xd3_emit_uint32_t (stream, output, num); } +#endif + +#elif SIZEOF_USIZE_T == 8 +#define USIZE_T_MAX UINT64_MAX +#define USIZE_T_MAXBLKSZ 0x8000000000000000ULL +#define XD3_MAXSRCWINSZ (1ULL << 61) +#define xd3_large_cksum xd3_large64_cksum +#define xd3_large_cksum_update xd3_large64_cksum_update +#define xd3_hash_multiplier xd3_hash_multiplier64 + +static inline uint32_t xd3_sizeof_size (usize_t num) +{ return xd3_sizeof_uint64_t (num); } +static inline int xd3_decode_size (xd3_stream *stream, usize_t *valp) +{ return xd3_decode_uint64_t (stream, (uint64_t*) valp); } +static inline int xd3_read_size (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *maxp, usize_t *valp) +{ return xd3_read_uint64_t (stream, inpp, maxp, (uint64_t*) valp); } +#if XD3_ENCODER +static inline int xd3_emit_size (xd3_stream *stream, xd3_output **output, usize_t num) +{ return xd3_emit_uint64_t (stream, output, num); } +#endif + +#endif /* SIZEOF_USIZE_T */ + +#if SIZEOF_XOFF_T == 4 +#define XOFF_T_MAX UINT32_MAX + +static inline int xd3_decode_offset (xd3_stream *stream, xoff_t *valp) +{ return xd3_decode_uint32_t (stream, (uint32_t*) valp); } +#if XD3_ENCODER +static inline int xd3_emit_offset (xd3_stream *stream, xd3_output **output, xoff_t num) +{ return xd3_emit_uint32_t (stream, output, num); } +#endif + +#elif SIZEOF_XOFF_T == 8 +#define XOFF_T_MAX UINT64_MAX + +static inline int xd3_decode_offset (xd3_stream *stream, xoff_t *valp) +{ return xd3_decode_uint64_t (stream, (uint64_t*) valp); } +#if XD3_ENCODER +static inline int xd3_emit_offset (xd3_stream *stream, xd3_output **output, xoff_t num) +{ return xd3_emit_uint64_t (stream, output, num); } +#endif + +#endif + +#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b)) +#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b)) + +int xd3_size_hashtable (xd3_stream *stream, + usize_t slots, + usize_t look, + xd3_hash_cfg *cfg); + +usize_t xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum); + +#if USE_UINT32 +uint32_t xd3_large32_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look); +uint32_t xd3_large32_cksum_update (xd3_hash_cfg *cfg, const uint32_t cksum, + const uint8_t *base, const usize_t look); +#endif /* USE_UINT32 */ + +#if USE_UINT64 +uint64_t xd3_large64_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look); +uint64_t xd3_large64_cksum_update (xd3_hash_cfg *cfg, const uint64_t cksum, + const uint8_t *base, const usize_t look); +#endif /* USE_UINT64 */ + +#define MAX_LRU_SIZE 32U +#define XD3_MINSRCWINSZ (XD3_ALLOCSIZE * MAX_LRU_SIZE) + +#endif // XDELTA3_INTERNAL_H__ diff --git a/lib/xdelta3/xdelta3-list.h b/lib/xdelta3/xdelta3-list.h new file mode 100644 index 0000000..b6616fe --- /dev/null +++ b/lib/xdelta3/xdelta3-list.h @@ -0,0 +1,127 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef __XDELTA3_LIST__ +#define __XDELTA3_LIST__ + +#define XD3_MAKELIST(LTYPE,ETYPE,LNAME) \ + \ +static inline ETYPE* \ +LTYPE ## _entry (LTYPE* l) \ +{ \ + return (ETYPE*) ((char*) l - (ptrdiff_t) &((ETYPE*) 0)->LNAME); \ +} \ + \ +static inline void \ +LTYPE ## _init (LTYPE *l) \ +{ \ + l->next = l; \ + l->prev = l; \ +} \ + \ +static inline void \ +LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins) \ +{ \ + next->prev = ins; \ + prev->next = ins; \ + ins->next = next; \ + ins->prev = prev; \ +} \ + \ +static inline void \ +LTYPE ## _push_back (LTYPE *l, ETYPE *i) \ +{ \ + LTYPE ## _add (l->prev, l, & i->LNAME); \ +} \ + \ +static inline void \ +LTYPE ## _del (LTYPE *next, \ + LTYPE *prev) \ +{ \ + next->prev = prev; \ + prev->next = next; \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _remove (ETYPE *f) \ +{ \ + LTYPE *i = f->LNAME.next; \ + LTYPE ## _del (f->LNAME.next, f->LNAME.prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _pop_back (LTYPE *l) \ +{ \ + LTYPE *i = l->prev; \ + LTYPE ## _del (i->next, i->prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _pop_front (LTYPE *l) \ +{ \ + LTYPE *i = l->next; \ + LTYPE ## _del (i->next, i->prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static inline int \ +LTYPE ## _empty (LTYPE *l) \ +{ \ + return l == l->next; \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _front (LTYPE *f) \ +{ \ + return LTYPE ## _entry (f->next); \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _back (LTYPE *f) \ +{ \ + return LTYPE ## _entry (f->prev); \ +} \ + \ +static inline int \ +LTYPE ## _end (LTYPE *f, ETYPE *i) \ +{ \ + return f == & i->LNAME; \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _next (ETYPE *f) \ +{ \ + return LTYPE ## _entry (f->LNAME.next); \ +} \ + \ +static inline usize_t \ +LTYPE ## _length (LTYPE *l) \ +{ \ + LTYPE *p; \ + usize_t c = 0; \ + \ + for (p = l->next; p != l; p = p->next) \ + { \ + c += 1; \ + } \ + \ + return c; \ +} \ + \ +typedef int unused_ ## LTYPE + +#endif diff --git a/lib/xdelta3/xdelta3-lzma.h b/lib/xdelta3/xdelta3-lzma.h new file mode 100644 index 0000000..a707da8 --- /dev/null +++ b/lib/xdelta3/xdelta3-lzma.h @@ -0,0 +1,195 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* Note: The use of the _easy_ decoder means we're not calling the + * xd3_stream malloc hooks. TODO(jmacd) Fix if anyone cares. */ + +#ifndef _XDELTA3_LZMA_H_ +#define _XDELTA3_LZMA_H_ + +#include + +typedef struct _xd3_lzma_stream xd3_lzma_stream; + +struct _xd3_lzma_stream { + lzma_stream lzma; + lzma_options_lzma options; + lzma_filter filters[2]; +}; + +static xd3_sec_stream* +xd3_lzma_alloc (xd3_stream *stream) +{ + return (xd3_sec_stream*) xd3_alloc (stream, sizeof (xd3_lzma_stream), 1); +} + +static void +xd3_lzma_destroy (xd3_stream *stream, xd3_sec_stream *sec_stream) +{ + xd3_lzma_stream *ls = (xd3_lzma_stream*) sec_stream; + lzma_end (&ls->lzma); + xd3_free (stream, ls); +} + +static int +xd3_lzma_init (xd3_stream *stream, xd3_lzma_stream *sec, int is_encode) +{ + int ret; + + memset (&sec->lzma, 0, sizeof(sec->lzma)); + + if (is_encode) + { + uint32_t preset = + (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT; + + if (lzma_lzma_preset(&sec->options, preset)) + { + stream->msg = "invalid lzma preset"; + return XD3_INVALID; + } + + sec->filters[0].id = LZMA_FILTER_LZMA2; + sec->filters[0].options = &sec->options; + sec->filters[1].id = LZMA_VLI_UNKNOWN; + + ret = lzma_stream_encoder (&sec->lzma, &sec->filters[0], LZMA_CHECK_NONE); + } + else + { + ret = lzma_stream_decoder (&sec->lzma, UINT64_MAX, LZMA_TELL_NO_CHECK); + } + + if (ret != LZMA_OK) + { + stream->msg = "lzma stream init failed"; + return XD3_INTERNAL; + } + + return 0; +} + +static int xd3_decode_lzma (xd3_stream *stream, xd3_lzma_stream *sec, + const uint8_t **input_pos, + const uint8_t *const input_end, + uint8_t **output_pos, + const uint8_t *const output_end) +{ + uint8_t *output = *output_pos; + const uint8_t *input = *input_pos; + size_t avail_in = input_end - input; + size_t avail_out = output_end - output; + + sec->lzma.avail_in = avail_in; + sec->lzma.next_in = input; + sec->lzma.avail_out = avail_out; + sec->lzma.next_out = output; + + while (1) + { + int lret = lzma_code (&sec->lzma, LZMA_RUN); + + switch (lret) + { + case LZMA_NO_CHECK: + case LZMA_OK: + if (sec->lzma.avail_out == 0) + { + (*output_pos) = sec->lzma.next_out; + (*input_pos) = sec->lzma.next_in; + return 0; + } + break; + + default: + stream->msg = "lzma decoding error"; + return XD3_INTERNAL; + } + } +} + +#if XD3_ENCODER + +static int xd3_encode_lzma (xd3_stream *stream, + xd3_lzma_stream *sec, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg) + +{ + lzma_action action = LZMA_RUN; + + cfg->inefficient = 1; /* Can't skip windows */ + sec->lzma.next_in = NULL; + sec->lzma.avail_in = 0; + sec->lzma.next_out = (output->base + output->next); + sec->lzma.avail_out = (output->avail - output->next); + + while (1) + { + int lret; + size_t nwrite; + if (sec->lzma.avail_in == 0 && input != NULL) + { + sec->lzma.avail_in = input->next; + sec->lzma.next_in = input->base; + + if ((input = input->next_page) == NULL) + { + action = LZMA_SYNC_FLUSH; + } + } + + lret = lzma_code (&sec->lzma, action); + + nwrite = (output->avail - output->next) - sec->lzma.avail_out; + + if (nwrite != 0) + { + output->next += nwrite; + + if (output->next == output->avail) + { + if ((output = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + sec->lzma.next_out = output->base; + sec->lzma.avail_out = output->avail; + } + } + + switch (lret) + { + case LZMA_OK: + break; + + case LZMA_STREAM_END: + return 0; + + default: + stream->msg = "lzma encoding error"; + return XD3_INTERNAL; + } + } + + return 0; +} + +#endif /* XD3_ENCODER */ + +#endif /* _XDELTA3_LZMA_H_ */ diff --git a/lib/xdelta3/xdelta3-main.h b/lib/xdelta3/xdelta3-main.h new file mode 100644 index 0000000..7f1e589 --- /dev/null +++ b/lib/xdelta3/xdelta3-main.h @@ -0,0 +1,4062 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* This is all the extra stuff you need for convenience to users in a + * command line application. It contains these major components: + * + * 1. VCDIFF tools 2. external compression support (this is + * POSIX-specific). 3. a general read/write loop that handles all of + * the Xdelta decode/encode/VCDIFF-print functions 4. command-line + * interpreter 5. an Xdelta application header which stores default + * filename, external compression settings 6. output/error printing + * 7. basic file support and OS interface + */ + +/* TODO list: 1. do exact gzip-like filename, stdout handling. make a + * .vcdiff extension, refuse to encode to stdout without -cf, etc. + * 2. Allow the user to add a comment string to the app header without + * disturbing the default behavior. + */ + +/* On error handling and printing: + * + * The xdelta library sets stream->msg to indicate what condition + * caused an internal failure, but many failures originate here and + * are printed here. The return convention is 0 for success, as + * throughout Xdelta code, but special attention is required here for + * the operating system calls with different error handling. See the + * main_file_* routines. All errors in this file have a message + * printed at the time of occurance. Since some of these calls occur + * within calls to the library, the error may end up being printed + * again with a more general error message. + */ + +/*********************************************************************/ + +#include + +#ifndef XD3_POSIX +#define XD3_POSIX 0 +#endif +#ifndef XD3_STDIO +#define XD3_STDIO 0 +#endif +#ifndef XD3_WIN32 +#define XD3_WIN32 0 +#endif +#ifndef NOT_MAIN +#define NOT_MAIN 0 +#endif + +/* Combines xd3_strerror() and strerror() */ +const char* xd3_mainerror(int err_num); + +#include "xdelta3-internal.h" + +int +xsnprintf_func (char *str, size_t n, const char *fmt, ...) +{ + va_list a; + int ret; + va_start (a, fmt); + ret = vsnprintf_func (str, n, fmt, a); + va_end (a); + if (ret < 0) + { + ret = n; + } + return ret; +} + +/* Handle externally-compressed inputs. */ +#ifndef EXTERNAL_COMPRESSION +#define EXTERNAL_COMPRESSION 1 +#endif + +#define PRINTHDR_SPECIAL -4378291 + +/* The number of soft-config variables. */ +#define XD3_SOFTCFG_VARCNT 7 + +/* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an + * error message from the library. */ +#define XD3_LIB_ERRMSG(stream, ret) "%s: %s\n", \ + xd3_errstring (stream), xd3_mainerror (ret) + +#if XD3_POSIX +#include /* close, read, write... */ +#include +#include +#endif + +#ifndef _WIN32 +#include /* lots */ +#include /* gettimeofday() */ +#include /* stat() and fstat() */ +#else +#if defined(_MSC_VER) +#define strtoll _strtoi64 +#endif +#include +#include +#ifndef WIFEXITED +# define WIFEXITED(stat) (((*((int *) &(stat))) & 0xff) == 0) +#endif +#ifndef WEXITSTATUS +# define WEXITSTATUS(stat) (((*((int *) &(stat))) >> 8) & 0xff) +#endif +#ifndef S_ISREG +//# ifdef S_IFREG +//# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +//# else +# define S_ISREG(m) 1 +//# endif +#endif /* !S_ISREG */ + +// For standard input/output handles +static STARTUPINFO winStartupInfo; +#endif + +/********************************************************************** + ENUMS and TYPES + *********************************************************************/ + +/* These flags (mainly pertaining to main_read() operations) are set + * in the main_file->flags variable. All are related to with external + * decompression support. + * + * RD_FIRST causes the external decompression check when the input is + * first read. + * + * RD_NONEXTERNAL disables external decompression for reading a + * compressed input, in the case of Xdelta inputs. Note: Xdelta is + * supported as an external compression type, which makes is the + * reason for this flag. An example to justify this is: to create a + * delta between two files that are VCDIFF-compressed. Two external + * Xdelta decoders are run to supply decompressed source and target + * inputs to the Xdelta encoder. */ +typedef enum +{ + RD_FIRST = (1 << 0), + RD_NONEXTERNAL = (1 << 1), + RD_DECOMPSET = (1 << 2), + RD_MAININPUT = (1 << 3), +} xd3_read_flags; + +/* Main commands. For example, CMD_PRINTHDR is the "xdelta printhdr" + * command. */ +typedef enum +{ + CMD_NONE = 0, + CMD_PRINTHDR, + CMD_PRINTHDRS, + CMD_PRINTDELTA, + CMD_RECODE, + CMD_MERGE_ARG, + CMD_MERGE, +#if XD3_ENCODER + CMD_ENCODE, +#endif + CMD_DECODE, + CMD_TEST, + CMD_CONFIG, +} xd3_cmd; + +#if XD3_ENCODER +#define CMD_DEFAULT CMD_ENCODE +#define IS_ENCODE(cmd) (cmd == CMD_ENCODE) +#else +#define CMD_DEFAULT CMD_DECODE +#define IS_ENCODE(cmd) (0) +#endif + +typedef struct _main_merge main_merge; +typedef struct _main_merge_list main_merge_list; + +/* Various strings and magic values used to detect and call external + * compression. See below for examples. */ +struct _main_extcomp +{ + const char *recomp_cmdname; + const char *recomp_options; + + const char *decomp_cmdname; + const char *decomp_options; + + const char *ident; + const char *magic; + usize_t magic_size; + int flags; +}; + +/* Merge state: */ + +struct _main_merge_list +{ + main_merge_list *next; + main_merge_list *prev; +}; + +struct _main_merge +{ + const char *filename; + + main_merge_list link; +}; + +XD3_MAKELIST(main_merge_list,main_merge,link); + +/* TODO: really need to put options in a struct so that internal + * callers can easily reset state. */ + +#define DEFAULT_VERBOSE 0 + +/* Program options: various command line flags and options. */ +static int option_stdout = 0; +static int option_force = 0; +static int option_verbose = DEFAULT_VERBOSE; +static int option_quiet = 0; +static int option_use_appheader = 1; +static uint8_t* option_appheader = NULL; +static int option_use_secondary = 1; +static const char* option_secondary = NULL; +static int option_use_checksum = 1; +static const char* option_smatch_config = NULL; +static int option_no_compress = 0; +static int option_no_output = 0; /* do not write output */ +static const char *option_source_filename = NULL; + +static int option_level = XD3_DEFAULT_LEVEL; +static usize_t option_iopt_size = XD3_DEFAULT_IOPT_SIZE; +static usize_t option_winsize = XD3_DEFAULT_WINSIZE; + +/* option_srcwinsz is restricted to [16kB, 2GB] when usize_t is 32 bits. */ +static xoff_t option_srcwinsz = XD3_DEFAULT_SRCWINSZ; +static usize_t option_sprevsz = XD3_DEFAULT_SPREVSZ; + +/* These variables are supressed to avoid their use w/o support. main() warns + * appropriately when external compression is not enabled. */ +#if EXTERNAL_COMPRESSION +static int num_subprocs = 0; +static int option_force2 = 0; +static int option_decompress_inputs = 1; +static int option_recompress_outputs = 1; +#endif + +/* This is for comparing "printdelta" output without attention to + * copy-instruction modes. */ +#if VCDIFF_TOOLS +static int option_print_cpymode = 1; /* Note: see reset_defaults(). */ +#endif + +/* Static variables */ +IF_DEBUG(static int main_mallocs = 0;) + +static char* program_name = NULL; +static uint8_t* appheader_used = NULL; +static uint8_t* main_bdata = NULL; +static usize_t main_bsize = 0; + +/* Hacks for VCDIFF tools, recode command. */ +static int allow_fake_source = 0; + +/* recode_stream is used by both recode/merge for reading vcdiff inputs */ +static xd3_stream *recode_stream = NULL; + +/* merge_stream is used by merge commands for storing the source encoding */ +static xd3_stream *merge_stream = NULL; + +/* This array of compressor types is compiled even if EXTERNAL_COMPRESSION is + * false just so the program knows the mapping of IDENT->NAME. */ +static main_extcomp extcomp_types[] = +{ + { "bzip2", "-c", "bzip2", "-dc", "B", "BZh", 3, 0 }, + { "gzip", "-c", "gzip", "-dc", "G", "\037\213", 2, 0 }, + { "compress", "-c", "uncompress", "-c", "Z", "\037\235", 2, 0 }, + + /* Xz is lzma with a magic number http://tukaani.org/xz/format.html */ + { "xz", "-c", "xz", "-dc", "Y", "\xfd\x37\x7a\x58\x5a\x00", 2, 0 }, +}; + +static int main_input (xd3_cmd cmd, main_file *ifile, + main_file *ofile, main_file *sfile); +static void main_get_appheader (xd3_stream *stream, main_file *ifile, + main_file *output, main_file *sfile); + +static int main_getblk_func (xd3_stream *stream, + xd3_source *source, + xoff_t blkno); +static int main_file_seek (main_file *xfile, xoff_t pos); +static int main_read_primary_input (main_file *file, + uint8_t *buf, + size_t size, + size_t *nread); + +static const char* main_format_bcnt (xoff_t r, shortbuf *buf); +static int main_help (void); + +#if XD3_ENCODER +static int xd3_merge_input_output (xd3_stream *stream, + xd3_whole_state *source); +#endif + +/* The code in xdelta3-blk.h is essentially part of this unit, see + * comments there. */ +#include "xdelta3-blkcache.h" + +static void (*xprintf_message_func)(const char*msg) = NULL; + +void +xprintf (const char *fmt, ...) +{ + char buf[1000]; + va_list a; + int size; + va_start (a, fmt); + size = vsnprintf_func (buf, 1000, fmt, a); + va_end (a); + if (size < 0) + { + size = sizeof(buf) - 1; + buf[size] = 0; + } + if (xprintf_message_func != NULL) { + xprintf_message_func(buf); + } else { + size_t ignore = fwrite(buf, 1, size, stderr); + (void) ignore; + } +} + +static int +main_version (void) +{ + /* $Format: " XPR(NTR \"Xdelta version $Xdelta3Version$, Copyright (C) Joshua MacDonald\\n\");" $ */ + XPR(NTR "Xdelta version 3.1.1, Copyright (C) Joshua MacDonald\n"); + XPR(NTR "Xdelta comes with ABSOLUTELY NO WARRANTY.\n"); + XPR(NTR "Licensed under the Apache License, Version 2.0\n"); + XPR(NTR "See \"LICENSE\" for details.\n"); + return EXIT_SUCCESS; +} + +static int +main_config (void) +{ + main_version (); + + XPR(NTR "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION); + XPR(NTR "REGRESSION_TEST=%d\n", REGRESSION_TEST); + XPR(NTR "SECONDARY_DJW=%d\n", SECONDARY_DJW); + XPR(NTR "SECONDARY_FGK=%d\n", SECONDARY_FGK); + XPR(NTR "SECONDARY_LZMA=%d\n", SECONDARY_LZMA); + XPR(NTR "UNALIGNED_OK=%d\n", UNALIGNED_OK); + XPR(NTR "VCDIFF_TOOLS=%d\n", VCDIFF_TOOLS); + XPR(NTR "XD3_ALLOCSIZE=%d\n", XD3_ALLOCSIZE); + XPR(NTR "XD3_DEBUG=%d\n", XD3_DEBUG); + XPR(NTR "XD3_ENCODER=%d\n", XD3_ENCODER); + XPR(NTR "XD3_POSIX=%d\n", XD3_POSIX); + XPR(NTR "XD3_STDIO=%d\n", XD3_STDIO); + XPR(NTR "XD3_WIN32=%d\n", XD3_WIN32); + XPR(NTR "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64); + XPR(NTR "XD3_USE_LARGESIZET=%d\n", XD3_USE_LARGESIZET); + XPR(NTR "XD3_DEFAULT_LEVEL=%d\n", XD3_DEFAULT_LEVEL); + XPR(NTR "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE); + XPR(NTR "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ); + XPR(NTR "XD3_DEFAULT_SRCWINSZ=%d\n", XD3_DEFAULT_SRCWINSZ); + XPR(NTR "XD3_DEFAULT_WINSIZE=%d\n", XD3_DEFAULT_WINSIZE); + XPR(NTR "XD3_HARDMAXWINSIZE=%d\n", XD3_HARDMAXWINSIZE); + XPR(NTR "sizeof(void*)=%d\n", (int)sizeof(void*)); + XPR(NTR "sizeof(int)=%d\n", (int)sizeof(int)); + XPR(NTR "sizeof(long)=%d\n", (int)sizeof(long)); + XPR(NTR "sizeof(long long)=%d\n", (int)sizeof(long long)); + XPR(NTR "sizeof(unsigned long long)=%d\n", (int)sizeof(unsigned long long)); + XPR(NTR "sizeof(size_t)=%d\n", (int)sizeof(size_t)); + XPR(NTR "sizeof(uint32_t)=%d\n", (int)sizeof(uint32_t)); + XPR(NTR "sizeof(uint64_t)=%d\n", (int)sizeof(uint64_t)); + XPR(NTR "sizeof(usize_t)=%d\n", (int)sizeof(usize_t)); + XPR(NTR "sizeof(xoff_t)=%d\n", (int)sizeof(xoff_t)); + + return EXIT_SUCCESS; +} + +static void +reset_defaults(void) +{ + option_stdout = 0; + option_force = 0; + option_verbose = DEFAULT_VERBOSE; + option_quiet = 0; + option_appheader = NULL; + option_use_secondary = 1; + option_secondary = NULL; + option_smatch_config = NULL; + option_no_compress = 0; + option_no_output = 0; + option_source_filename = NULL; + program_name = NULL; + appheader_used = NULL; + main_bdata = NULL; + main_bsize = 0; + allow_fake_source = 0; + option_smatch_config = NULL; + + main_lru_reset(); + + option_use_appheader = 1; + option_use_checksum = 1; +#if EXTERNAL_COMPRESSION + option_force2 = 0; + option_decompress_inputs = 1; + option_recompress_outputs = 1; + num_subprocs = 0; +#endif +#if VCDIFF_TOOLS + option_print_cpymode = 1; +#endif + option_level = XD3_DEFAULT_LEVEL; + option_iopt_size = XD3_DEFAULT_IOPT_SIZE; + option_winsize = XD3_DEFAULT_WINSIZE; + option_srcwinsz = XD3_DEFAULT_SRCWINSZ; + option_sprevsz = XD3_DEFAULT_SPREVSZ; +} + +static void* +main_malloc1 (size_t size) +{ + void* r = malloc (size); + if (r == NULL) { XPR(NT "malloc: %s\n", xd3_mainerror (ENOMEM)); } + return r; +} + +void* main_bufalloc (size_t size) { +#if XD3_WIN32 + return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); +#else + return main_malloc1(size); +#endif +} + +void* +main_malloc (size_t size) +{ + void *r = main_malloc1 (size); + if (r) { IF_DEBUG (main_mallocs += 1); } + return r; +} + +static void* +main_alloc (void *opaque, + size_t items, + usize_t size) +{ + return main_malloc1 (items * size); +} + +static void +main_free1 (void *opaque, void *ptr) +{ + free (ptr); +} + +void +main_free (void *ptr) +{ + if (ptr) + { + IF_DEBUG (main_mallocs -= 1); + main_free1 (NULL, ptr); + IF_DEBUG (XD3_ASSERT(main_mallocs >= 0)); + } +} + +void main_buffree (void *ptr) { +#if XD3_WIN32 + VirtualFree(ptr, 0, MEM_RELEASE); +#else + main_free1(NULL, ptr); +#endif +} + +/* This ensures that (ret = errno) always indicates failure, in case errno was + * accidentally not set. If this prints there's a bug somewhere. */ +static int +get_errno (void) +{ +#ifndef _WIN32 + if (errno == 0) + { + XPR(NT "you found a bug: expected errno != 0\n"); + errno = XD3_INTERNAL; + } + return errno; +#else + DWORD err_num = GetLastError(); + if (err_num == NO_ERROR) + { + err_num = XD3_INTERNAL; + } + return err_num; +#endif +} + +const char* +xd3_mainerror(int err_num) { +#ifndef _WIN32 + const char* x = xd3_strerror (err_num); + if (x != NULL) + { + return x; + } + return strerror(err_num); +#else + static char err_buf[256]; + const char* x = xd3_strerror (err_num); + if (x != NULL) + { + return x; + } + memset (err_buf, 0, 256); + FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, err_num, + MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT), + err_buf, 256, NULL); + if (err_buf[0] != 0 && err_buf[strlen(err_buf) - 1] == '\n') + { + err_buf[strlen(err_buf) - 1] = 0; + } + return err_buf; +#endif +} + +long +get_millisecs_now (void) +{ +#ifndef _WIN32 + struct timeval tv; + + gettimeofday (& tv, NULL); + + return (tv.tv_sec) * 1000L + (tv.tv_usec) / 1000; +#else + SYSTEMTIME st; + FILETIME ft; + __int64 *pi = (__int64*)&ft; + GetLocalTime(&st); + SystemTimeToFileTime(&st, &ft); + return (long)((*pi) / 10000); +#endif +} + +/* Always >= 1 millisec, right? */ +static long +get_millisecs_since (void) +{ + static long last = 0; + long now = get_millisecs_now(); + long diff = now - last; + last = now; + return diff; +} + +static const char* +main_format_bcnt (xoff_t r, shortbuf *buf) +{ + static const char* fmts[] = { "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB" }; + usize_t i; + + for (i = 0; i < SIZEOF_ARRAY(fmts) - 1; i += 1) + { + xoff_t new_r; + + if (r == 0) + { + short_sprintf (*buf, "0 %s", fmts[i]); + return buf->buf; + } + + if (r >= 1 && r < 10) + { + short_sprintf (*buf, "%.2f %s", (double) r, fmts[i]); + return buf->buf; + } + + if (r >= 10 && r < 100) + { + short_sprintf (*buf, "%.1f %s", (double) r, fmts[i]); + return buf->buf; + } + + if (r >= 100 && r < 1000) + { + short_sprintf (*buf, "%"Q"u %s", r, fmts[i]); + return buf->buf; + } + + new_r = r / 1024; + + if (new_r < 10) + { + short_sprintf (*buf, "%.2f %s", (double) r / 1024.0, fmts[i + 1]); + return buf->buf; + } + + if (new_r < 100) + { + short_sprintf (*buf, "%.1f %s", (double) r / 1024.0, fmts[i + 1]); + return buf->buf; + } + + r = new_r; + } + XD3_ASSERT (0); + return ""; +} + +static char* +main_format_rate (xoff_t bytes, long millis, shortbuf *buf) +{ + xoff_t r = (xoff_t)(1.0 * bytes / (1.0 * millis / 1000.0)); + static shortbuf lbuf; + + main_format_bcnt (r, &lbuf); + short_sprintf (*buf, "%s/s", lbuf.buf); + return buf->buf; +} + +static char* +main_format_millis (long millis, shortbuf *buf) +{ + if (millis < 1000) + { + short_sprintf (*buf, "%lu ms", millis); + } + else if (millis < 10000) + { + short_sprintf (*buf, "%.1f sec", millis / 1000.0); + } + else + { + short_sprintf (*buf, "%lu sec", millis / 1000L); + } + return buf->buf; +} + +/* A safe version of strtol for xoff_t. */ +static int +main_strtoxoff (const char* s, xoff_t *xo, char which) +{ + char *e; + xoff_t x; + + XD3_ASSERT(s && *s != 0); + + { +#if SIZEOF_XOFF_T == SIZEOF_UNSIGNED_LONG_LONG + unsigned long long xx = strtoull (s, &e, 0); + unsigned long long bad = ULLONG_MAX; +#elif SIZEOF_XOFF_T <= SIZEOF_UNSIGNED_LONG + unsigned long xx = strtoul (s, &e, 0); + unsigned long long bad = ULONG_MAX; +#else + /* Something wrong with SIZEOF_XOFF_T, SIZEOF_UNSIGNED_LONG, etc. */ + #error Bad configure script +#endif + + if (xx == bad) + { + XPR(NT "-%c: negative integer: %s\n", which, s); + return EXIT_FAILURE; + } + + x = xx; + } + + if (*e != 0) + { + XPR(NT "-%c: invalid integer: %s\n", which, s); + return EXIT_FAILURE; + } + + (*xo) = x; + return 0; +} + +static int +main_atoux (const char* arg, xoff_t *xo, xoff_t low, + xoff_t high, char which) +{ + xoff_t x; + int ret; + + if ((ret = main_strtoxoff (arg, & x, which))) { return ret; } + + if (x < low) + { + XPR(NT "-%c: minimum value: %"Q"u\n", which, low); + return EXIT_FAILURE; + } + if (high != 0 && x > high) + { + XPR(NT "-%c: maximum value: %"Q"u\n", which, high); + return EXIT_FAILURE; + } + (*xo) = x; + return 0; +} + +static int +main_atou (const char* arg, usize_t *uo, usize_t low, + usize_t high, char which) +{ + int ret; + xoff_t xo; + if ((ret = main_atoux (arg, &xo, low, high, which))) + { + return ret; + } + *uo = (usize_t)xo; + return 0; +} + +/****************************************************************** + FILE BASICS + ******************************************************************/ + +/* With all the variation in file system-call semantics, arguments, + * return values and error-handling for the POSIX and STDIO file APIs, + * the insides of these functions make me sick, which is why these + * wrappers exist. */ + +#define XOPEN_OPNAME (xfile->mode == XO_READ ? "read" : "write") +#define XOPEN_STDIO (xfile->mode == XO_READ ? "rb" : "wb") +#define XOPEN_POSIX (xfile->mode == XO_READ ? \ + O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC) +#define XOPEN_MODE (xfile->mode == XO_READ ? 0 : 0666) + +#define XF_ERROR(op, name, ret) \ + do { if (!option_quiet) { XPR(NT "file %s failed: %s: %s: %s\n", (op), \ + XOPEN_OPNAME, (name), xd3_mainerror (ret)); } } while (0) + +#if XD3_STDIO +#define XFNO(f) fileno(f->file) +#define XSTDOUT_XF(f) { (f)->file = stdout; (f)->filename = "/dev/stdout"; } +#define XSTDIN_XF(f) { (f)->file = stdin; (f)->filename = "/dev/stdin"; } + +#elif XD3_POSIX +#define XFNO(f) f->file +#define XSTDOUT_XF(f) \ + { (f)->file = STDOUT_FILENO; (f)->filename = "/dev/stdout"; } +#define XSTDIN_XF(f) \ + { (f)->file = STDIN_FILENO; (f)->filename = "/dev/stdin"; } + +#elif XD3_WIN32 +#define XFNO(f) -1 +#define XSTDOUT_XF(f) { \ + (f)->file = GetStdHandle(STD_OUTPUT_HANDLE); \ + (f)->filename = "(stdout)"; \ + } +#define XSTDIN_XF(f) { \ + (f)->file = GetStdHandle(STD_INPUT_HANDLE); \ + (f)->filename = "(stdin)"; \ + } +#endif + +void +main_file_init (main_file *xfile) +{ + memset (xfile, 0, sizeof (*xfile)); + +#if XD3_POSIX + xfile->file = -1; +#endif +#if XD3_WIN32 + xfile->file = INVALID_HANDLE_VALUE; +#endif +} + +int +main_file_isopen (main_file *xfile) +{ +#if XD3_STDIO + return xfile->file != NULL; + +#elif XD3_POSIX + return xfile->file != -1; + +#elif XD3_WIN32 + return xfile->file != INVALID_HANDLE_VALUE; +#endif +} + +int +main_file_close (main_file *xfile) +{ + int ret = 0; + + if (! main_file_isopen (xfile)) + { + return 0; + } + +#if XD3_STDIO + ret = fclose (xfile->file); + xfile->file = NULL; + +#elif XD3_POSIX + ret = close (xfile->file); + xfile->file = -1; + +#elif XD3_WIN32 + if (!CloseHandle(xfile->file)) { + ret = get_errno (); + } + xfile->file = INVALID_HANDLE_VALUE; +#endif + + if (ret != 0) { XF_ERROR ("close", xfile->filename, ret = get_errno ()); } + return ret; +} + +void +main_file_cleanup (main_file *xfile) +{ + XD3_ASSERT (xfile != NULL); + + if (main_file_isopen (xfile)) + { + main_file_close (xfile); + } + + if (xfile->snprintf_buf != NULL) + { + main_free(xfile->snprintf_buf); + xfile->snprintf_buf = NULL; + } + + if (xfile->filename_copy != NULL) + { + main_free(xfile->filename_copy); + xfile->filename_copy = NULL; + } +} + +int +main_file_open (main_file *xfile, const char* name, int mode) +{ + int ret = 0; + + xfile->mode = mode; + + XD3_ASSERT (name != NULL); + XD3_ASSERT (! main_file_isopen (xfile)); + if (name[0] == 0) + { + XPR(NT "invalid file name: empty string\n"); + return XD3_INVALID; + } + + IF_DEBUG1(DP(RINT "[main] open source %s\n", name)); + +#if XD3_STDIO + xfile->file = fopen (name, XOPEN_STDIO); + + ret = (xfile->file == NULL) ? get_errno () : 0; + +#elif XD3_POSIX + /* TODO: Should retry this call if interrupted, similar to read/write */ + if ((ret = open (name, XOPEN_POSIX, XOPEN_MODE)) < 0) + { + ret = get_errno (); + } + else + { + xfile->file = ret; + ret = 0; + } + +#elif XD3_WIN32 + xfile->file = CreateFile(name, + (mode == XO_READ) ? GENERIC_READ : GENERIC_WRITE, + FILE_SHARE_READ, + NULL, + (mode == XO_READ) ? + OPEN_EXISTING : + (option_force ? CREATE_ALWAYS : CREATE_NEW), + FILE_ATTRIBUTE_NORMAL, + NULL); + if (xfile->file == INVALID_HANDLE_VALUE) + { + ret = get_errno (); + } +#endif + if (ret) { XF_ERROR ("open", name, ret); } + else { xfile->realname = name; xfile->nread = 0; } + return ret; +} + +int +main_file_stat (main_file *xfile, xoff_t *size) +{ + int ret = 0; +#if XD3_WIN32 + if (GetFileType(xfile->file) != FILE_TYPE_DISK) + { + return -1; + } +# if (_WIN32_WINNT >= 0x0500) + { + LARGE_INTEGER li; + if (GetFileSizeEx(xfile->file, &li) == 0) + { + return get_errno (); + } + *size = li.QuadPart; + } +# else + { + DWORD filesize = GetFileSize(xfile->file, NULL); + if (filesize == INVALID_FILE_SIZE) + { + return get_errno () + } + *size = filesize; + } +# endif +#else + struct stat sbuf; + if (fstat (XFNO (xfile), & sbuf) < 0) + { + ret = get_errno (); + return ret; + } + + if (! S_ISREG (sbuf.st_mode)) + { + return ESPIPE; + } + (*size) = sbuf.st_size; +#endif + return ret; +} + +int +main_file_exists (main_file *xfile) +{ + struct stat sbuf; + return stat (xfile->filename, & sbuf) == 0 && S_ISREG (sbuf.st_mode); +} + +#if (XD3_POSIX || EXTERNAL_COMPRESSION) +/* POSIX-generic code takes a function pointer to read() or write(). + * This calls the function repeatedly until the buffer is full or EOF. + * The NREAD parameter is not set for write, NULL is passed. Return + * is signed, < 0 indicate errors, otherwise byte count. */ +typedef int (xd3_posix_func) (int fd, uint8_t *buf, usize_t size); + +static int +xd3_posix_io (int fd, uint8_t *buf, size_t size, + xd3_posix_func *func, size_t *nread) +{ + int ret; + size_t nproc = 0; + + while (nproc < size) + { + size_t tryread = xd3_min(size - nproc, 1U << 30); + ssize_t result = (*func) (fd, buf + nproc, tryread); + + if (result < 0) + { + ret = get_errno (); + if (ret != EAGAIN && ret != EINTR) + { + return ret; + } + continue; + } + + if (nread != NULL && result == 0) { break; } + + nproc += result; + } + if (nread != NULL) { (*nread) = nproc; } + return 0; +} +#endif + +#if XD3_WIN32 +static int +xd3_win32_io (HANDLE file, uint8_t *buf, size_t size, + int is_read, size_t *nread) +{ + int ret = 0; + size_t nproc = 0; + + while (nproc < size) + { + DWORD nproc2 = 0; /* hmm */ + DWORD nremain = size - nproc; + if ((is_read ? + ReadFile (file, buf + nproc, nremain, &nproc2, NULL) : + WriteFile (file, buf + nproc, nremain, &nproc2, NULL)) == 0) + { + ret = get_errno(); + if (ret != ERROR_HANDLE_EOF && ret != ERROR_BROKEN_PIPE) + { + return ret; + } + /* By falling through here, we'll break this loop in the + * read case in case of eof or broken pipe. */ + } + + nproc += nproc2; + + if (nread != NULL && nproc2 == 0) { break; } + } + if (nread != NULL) { (*nread) = nproc; } + return 0; +} +#endif + +/* POSIX is unbuffered, while STDIO is buffered. main_file_read() + * should always be called on blocks. */ +int +main_file_read (main_file *ifile, + uint8_t *buf, + size_t size, + size_t *nread, + const char *msg) +{ + int ret = 0; + IF_DEBUG1(DP(RINT "[main] read %s up to %"Z"u\n", ifile->filename, size)); + +#if XD3_STDIO + size_t result; + + result = fread (buf, 1, size, ifile->file); + + if (result < size && ferror (ifile->file)) + { + ret = get_errno (); + } + else + { + *nread = result; + } + +#elif XD3_POSIX + ret = xd3_posix_io (ifile->file, buf, size, (xd3_posix_func*) &read, nread); +#elif XD3_WIN32 + ret = xd3_win32_io (ifile->file, buf, size, 1 /* is_read */, nread); +#endif + + if (ret) + { + XPR(NT "%s: %s: %s\n", msg, ifile->filename, xd3_mainerror (ret)); + } + else + { + if (option_verbose > 4) { XPR(NT "read %s: %"Z"u bytes\n", + ifile->filename, (*nread)); } + ifile->nread += (*nread); + } + + return ret; +} + +int +main_file_write (main_file *ofile, uint8_t *buf, usize_t size, const char *msg) +{ + int ret = 0; + + IF_DEBUG1(DP(RINT "[main] write %"W"u\n bytes", size)); + +#if XD3_STDIO + usize_t result; + + result = fwrite (buf, 1, size, ofile->file); + + if (result != size) { ret = get_errno (); } + +#elif XD3_POSIX + ret = xd3_posix_io (ofile->file, buf, size, (xd3_posix_func*) &write, NULL); + +#elif XD3_WIN32 + ret = xd3_win32_io (ofile->file, buf, size, 0, NULL); + +#endif + + if (ret) + { + XPR(NT "%s: %s: %s\n", msg, ofile->filename, xd3_mainerror (ret)); + } + else + { + if (option_verbose > 5) { XPR(NT "write %s: %"W"u bytes\n", + ofile->filename, size); } + ofile->nwrite += size; + } + + return ret; +} + +static int +main_file_seek (main_file *xfile, xoff_t pos) +{ + int ret = 0; + +#if XD3_STDIO + if (fseek (xfile->file, pos, SEEK_SET) != 0) { ret = get_errno (); } + +#elif XD3_POSIX + if ((xoff_t) lseek (xfile->file, pos, SEEK_SET) != pos) + { ret = get_errno (); } + +#elif XD3_WIN32 +# if (_WIN32_WINNT >= 0x0500) + LARGE_INTEGER move, out; + move.QuadPart = pos; + if (SetFilePointerEx(xfile->file, move, &out, FILE_BEGIN) == 0) + { + ret = get_errno (); + } +# else + if (SetFilePointer(xfile->file, (LONG)pos, NULL, FILE_BEGIN) == + INVALID_SET_FILE_POINTER) + { + ret = get_errno (); + } +# endif +#endif + + return ret; +} + +/* This function simply writes the stream output buffer, if there is + * any, for encode, decode and recode commands. (The VCDIFF tools use + * main_print_func()). */ +static int +main_write_output (xd3_stream* stream, main_file *ofile) +{ + int ret; + + IF_DEBUG1(DP(RINT "[main] write(%s) %"W"u\n bytes", ofile->filename, stream->avail_out)); + + if (option_no_output) + { + return 0; + } + + if (stream->avail_out > 0 && + (ret = main_file_write (ofile, stream->next_out, + stream->avail_out, "write failed"))) + { + return ret; + } + + return 0; +} + +static int +main_set_secondary_flags (xd3_config *config) +{ + int ret; + if (!option_use_secondary) + { + return 0; + } + if (option_secondary == NULL) + { + /* Set a default secondary compressor if LZMA is built in, otherwise + * default to no secondary compressor. */ + if (SECONDARY_LZMA) + { + config->flags |= XD3_SEC_LZMA; + } + } + else + { + if (strcmp (option_secondary, "lzma") == 0 && SECONDARY_LZMA) + { + config->flags |= XD3_SEC_LZMA; + } + else if (strcmp (option_secondary, "fgk") == 0 && SECONDARY_FGK) + { + config->flags |= XD3_SEC_FGK; + } + else if (strncmp (option_secondary, "djw", 3) == 0 && SECONDARY_DJW) + { + usize_t level = XD3_DEFAULT_SECONDARY_LEVEL; + + config->flags |= XD3_SEC_DJW; + + if (strlen (option_secondary) > 3 && + (ret = main_atou (option_secondary + 3, + &level, + 0, 9, 'S')) != 0 && + !option_quiet) + { + return XD3_INVALID; + } + + /* XD3_SEC_NOXXXX flags disable secondary compression on + * a per-section basis. For djw, ngroups=1 indicates + * minimum work, ngroups=0 uses default settings, which + * is > 1 groups by default. */ + if (level < 1) { config->flags |= XD3_SEC_NODATA; } + if (level < 7) { config->sec_data.ngroups = 1; } + else { config->sec_data.ngroups = 0; } + + if (level < 3) { config->flags |= XD3_SEC_NOINST; } + if (level < 8) { config->sec_inst.ngroups = 1; } + else { config->sec_inst.ngroups = 0; } + + if (level < 5) { config->flags |= XD3_SEC_NOADDR; } + if (level < 9) { config->sec_addr.ngroups = 1; } + else { config->sec_addr.ngroups = 0; } + } + else if (*option_secondary == 0 || + strcmp (option_secondary, "none") == 0) + { + } + else + { + if (!option_quiet) + { + XPR(NT "unrecognized or not compiled secondary compressor: %s\n", + option_secondary); + } + return XD3_INVALID; + } + } + + if (option_verbose) + { + XPR(NT "secondary compression: %s\n", + (config->flags | XD3_SEC_LZMA) ? "lzma" : + ((config->flags | XD3_SEC_FGK) ? "fgk" : + ((config->flags | XD3_SEC_DJW) ? "djw" : + "none"))); + } + + return 0; +} + +/****************************************************************** + VCDIFF TOOLS + *****************************************************************/ + +#include "xdelta3-merge.h" + +#if VCDIFF_TOOLS + +/* The following macros let VCDIFF print using main_file_write(), + * for example: + * + * VC(UT "trying to be portable: %d\n", x)VE; + */ +#define SNPRINTF_BUFSIZE 1024 +#define VC do { if (((ret = xsnprintf_func +#define UT (char*)xfile->snprintf_buf, SNPRINTF_BUFSIZE, +#define VE ) >= SNPRINTF_BUFSIZE \ + && (ret = main_print_overflow(ret)) != 0) \ + || (ret = main_file_write(xfile, xfile->snprintf_buf, \ + (usize_t)ret, "print")) != 0) \ + { return ret; } } while (0) + +static int +main_print_overflow (int x) +{ + XPR(NT "internal print buffer overflow: %d bytes\n", x); + return XD3_INTERNAL; +} + +/* This function prints a single VCDIFF window. */ +static int +main_print_window (xd3_stream* stream, main_file *xfile) +{ + int ret; + usize_t size = 0; + + VC(UT " Offset Code Type1 Size1 @Addr1 + Type2 Size2 @Addr2\n")VE; + + while (stream->inst_sect.buf < stream->inst_sect.buf_max) + { + usize_t code = stream->inst_sect.buf[0]; + const uint8_t *addr_before = stream->addr_sect.buf; + const uint8_t *inst_before = stream->inst_sect.buf; + usize_t addr_bytes; + usize_t inst_bytes; + usize_t size_before = size; + + if ((ret = xd3_decode_instruction (stream))) + { + XPR(NT "instruction decode error at %"Q"u: %s\n", + stream->dec_winstart + size, stream->msg); + return ret; + } + + addr_bytes = (usize_t)(stream->addr_sect.buf - addr_before); + inst_bytes = (usize_t)(stream->inst_sect.buf - inst_before); + + VC(UT " %06"Q"u %03"W"u %s %6"W"u", + stream->dec_winstart + size, + option_print_cpymode ? code : 0, + xd3_rtype_to_string ((xd3_rtype) stream->dec_current1.type, + option_print_cpymode), + stream->dec_current1.size)VE; + + if (stream->dec_current1.type != XD3_NOOP) + { + if (stream->dec_current1.type >= XD3_CPY) + { + if (stream->dec_current1.addr >= stream->dec_cpylen) + { + VC(UT " T@%-6"W"u", + stream->dec_current1.addr - stream->dec_cpylen)VE; + } + else + { + VC(UT " S@%-6"Q"u", + stream->dec_cpyoff + stream->dec_current1.addr)VE; + } + } + else + { + VC(UT " ")VE; + } + + size += stream->dec_current1.size; + } + + if (stream->dec_current2.type != XD3_NOOP) + { + VC(UT " %s %6"W"u", + xd3_rtype_to_string ((xd3_rtype) stream->dec_current2.type, + option_print_cpymode), + stream->dec_current2.size)VE; + + if (stream->dec_current2.type >= XD3_CPY) + { + if (stream->dec_current2.addr >= stream->dec_cpylen) + { + VC(UT " T@%-6"W"u", + stream->dec_current2.addr - stream->dec_cpylen)VE; + } + else + { + VC(UT " S@%-6"Q"u", + stream->dec_cpyoff + stream->dec_current2.addr)VE; + } + } + + size += stream->dec_current2.size; + } + + VC(UT "\n")VE; + + if (option_verbose && + addr_bytes + inst_bytes >= (size - size_before) && + (stream->dec_current1.type >= XD3_CPY || + stream->dec_current2.type >= XD3_CPY)) + { + VC(UT " %06"Q"u (inefficiency) %"W"u encoded as %"W"u bytes\n", + stream->dec_winstart + size_before, + size - size_before, + addr_bytes + inst_bytes)VE; + } + } + + if (stream->dec_tgtlen != size && (stream->flags & XD3_SKIP_WINDOW) == 0) + { + XPR(NT "target window size inconsistency"); + return XD3_INTERNAL; + } + + if (stream->dec_position != stream->dec_maxpos) + { + XPR(NT "target window position inconsistency"); + return XD3_INTERNAL; + } + + if (stream->addr_sect.buf != stream->addr_sect.buf_max) + { + XPR(NT "address section inconsistency"); + return XD3_INTERNAL; + } + + return 0; +} + +static int +main_print_vcdiff_file (main_file *xfile, main_file *file, const char *type) +{ + int ret; /* Used by above macros */ + if (file->filename) + { + VC(UT "XDELTA filename (%s): %s\n", type, + file->filename)VE; + } + if (file->compressor) + { + VC(UT "XDELTA ext comp (%s): %s\n", type, + file->compressor->recomp_cmdname)VE; + } + return 0; +} + +/* This function prints a VCDIFF input, mainly for debugging purposes. */ +static int +main_print_func (xd3_stream* stream, main_file *xfile) +{ + int ret; + + if (option_no_output) + { + return 0; + } + + if (xfile->snprintf_buf == NULL) + { + if ((xfile->snprintf_buf = + (uint8_t*)main_malloc(SNPRINTF_BUFSIZE)) == NULL) + { + return ENOMEM; + } + } + + if (stream->dec_winstart == 0) + { + VC(UT "VCDIFF version: 0\n")VE; + VC(UT "VCDIFF header size: %"W"u\n", + stream->dec_hdrsize)VE; + VC(UT "VCDIFF header indicator: ")VE; + if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) + VC(UT "VCD_SECONDARY ")VE; + if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) + VC(UT "VCD_CODETABLE ")VE; + if ((stream->dec_hdr_ind & VCD_APPHEADER) != 0) + VC(UT "VCD_APPHEADER ")VE; + if (stream->dec_hdr_ind == 0) + VC(UT "none")VE; + VC(UT "\n")VE; + + IF_SEC(VC(UT "VCDIFF secondary compressor: %s\n", + stream->sec_type ? stream->sec_type->name : "none")VE); + IF_NSEC(VC(UT "VCDIFF secondary compressor: unsupported\n")VE); + + if (stream->dec_hdr_ind & VCD_APPHEADER) + { + uint8_t *apphead; + usize_t appheadsz; + ret = xd3_get_appheader (stream, & apphead, & appheadsz); + + if (ret == 0 && appheadsz > 0) + { + int sq = option_quiet; + main_file i, o, s; + XD3_ASSERT (apphead != NULL); + VC(UT "VCDIFF application header: ")VE; + if ((ret = main_file_write (xfile, apphead, + appheadsz, "print")) != 0) + { return ret; } + VC(UT "\n")VE; + + main_file_init (& i); + main_file_init (& o); + main_file_init (& s); + option_quiet = 1; + main_get_appheader (stream, &i, & o, & s); + option_quiet = sq; + if ((ret = main_print_vcdiff_file (xfile, & o, "output"))) + { return ret; } + if ((ret = main_print_vcdiff_file (xfile, & s, "source"))) + { return ret; } + main_file_cleanup (& i); + main_file_cleanup (& o); + main_file_cleanup (& s); + } + } + } + else + { + VC(UT "\n")VE; + } + + VC(UT "VCDIFF window number: %"Q"u\n", stream->current_window)VE; + VC(UT "VCDIFF window indicator: ")VE; + if ((stream->dec_win_ind & VCD_SOURCE) != 0) VC(UT "VCD_SOURCE ")VE; + if ((stream->dec_win_ind & VCD_TARGET) != 0) VC(UT "VCD_TARGET ")VE; + if ((stream->dec_win_ind & VCD_ADLER32) != 0) VC(UT "VCD_ADLER32 ")VE; + if (stream->dec_win_ind == 0) VC(UT "none")VE; + VC(UT "\n")VE; + + if ((stream->dec_win_ind & VCD_ADLER32) != 0) + { + VC(UT "VCDIFF adler32 checksum: %08X\n", + stream->dec_adler32)VE; + } + + if (stream->dec_del_ind != 0) + { + VC(UT "VCDIFF delta indicator: ")VE; + if ((stream->dec_del_ind & VCD_DATACOMP) != 0) VC(UT "VCD_DATACOMP ")VE; + if ((stream->dec_del_ind & VCD_INSTCOMP) != 0) VC(UT "VCD_INSTCOMP ")VE; + if ((stream->dec_del_ind & VCD_ADDRCOMP) != 0) VC(UT "VCD_ADDRCOMP ")VE; + if (stream->dec_del_ind == 0) VC(UT "none")VE; + VC(UT "\n")VE; + } + + if (stream->dec_winstart != 0) + { + VC(UT "VCDIFF window at offset: %"Q"u\n", stream->dec_winstart)VE; + } + + if (SRCORTGT (stream->dec_win_ind)) + { + VC(UT "VCDIFF copy window length: %"W"u\n", + stream->dec_cpylen)VE; + VC(UT "VCDIFF copy window offset: %"Q"u\n", + stream->dec_cpyoff)VE; + } + + VC(UT "VCDIFF delta encoding length: %"W"u\n", + (usize_t)stream->dec_enclen)VE; + VC(UT "VCDIFF target window length: %"W"u\n", + (usize_t)stream->dec_tgtlen)VE; + + VC(UT "VCDIFF data section length: %"W"u\n", + (usize_t)stream->data_sect.size)VE; + VC(UT "VCDIFF inst section length: %"W"u\n", + (usize_t)stream->inst_sect.size)VE; + VC(UT "VCDIFF addr section length: %"W"u\n", + (usize_t)stream->addr_sect.size)VE; + + ret = 0; + if ((stream->flags & XD3_JUST_HDR) != 0) + { + /* Print a header -- finished! */ + ret = PRINTHDR_SPECIAL; + } + else if ((stream->flags & XD3_SKIP_WINDOW) == 0) + { + ret = main_print_window (stream, xfile); + } + + return ret; +} + +static int +main_recode_copy (xd3_stream* stream, + xd3_output* output, + xd3_desect* input) +{ + int ret; + + XD3_ASSERT(output != NULL); + XD3_ASSERT(output->next_page == NULL); + + if ((ret = xd3_decode_allocate (recode_stream, + input->size, + &output->base, + &output->avail))) + { + XPR(NT XD3_LIB_ERRMSG (stream, ret)); + return ret; + } + + memcpy (output->base, + /* Note: decoder advances buf, so get base of buffer with + * buf_max - size */ + input->buf_max - input->size, + input->size); + output->next = input->size; + return 0; +} + +// Re-encode one window +static int +main_recode_func (xd3_stream* stream, main_file *ofile) +{ + int ret; + xd3_source decode_source; + + XD3_ASSERT(stream->dec_state == DEC_FINISH); + XD3_ASSERT(recode_stream->enc_state == ENC_INIT || + recode_stream->enc_state == ENC_INPUT); + + // Copy partial decoder output to partial encoder inputs + if ((ret = main_recode_copy (recode_stream, + DATA_HEAD(recode_stream), + &stream->data_sect)) || + (ret = main_recode_copy (recode_stream, + INST_HEAD(recode_stream), + &stream->inst_sect)) || + (ret = main_recode_copy (recode_stream, + ADDR_HEAD(recode_stream), + &stream->addr_sect))) + { + return ret; + } + + // This jumps to xd3_emit_hdr() + recode_stream->enc_state = ENC_FLUSH; + recode_stream->avail_in = stream->dec_tgtlen; + + if (SRCORTGT (stream->dec_win_ind)) + { + recode_stream->src = & decode_source; + decode_source.srclen = stream->dec_cpylen; + decode_source.srcbase = stream->dec_cpyoff; + } + + if (option_use_checksum && + (stream->dec_win_ind & VCD_ADLER32) != 0) + { + recode_stream->flags |= XD3_ADLER32_RECODE; + recode_stream->recode_adler32 = stream->dec_adler32; + } + + if (option_use_appheader != 0 && + option_appheader != NULL) + { + xd3_set_appheader (recode_stream, option_appheader, + (usize_t) strlen ((char*) option_appheader)); + } + else if (option_use_appheader != 0 && + option_appheader == NULL) + { + if (stream->dec_appheader != NULL) + { + xd3_set_appheader (recode_stream, + stream->dec_appheader, stream->dec_appheadsz); + } + } + + // Output loop + for (;;) + { + switch((ret = xd3_encode_input (recode_stream))) + { + case XD3_INPUT: { + /* finished recoding one window */ + stream->total_out = recode_stream->total_out; + return 0; + } + case XD3_OUTPUT: { + /* main_file_write below */ + break; + } + case XD3_GOTHEADER: + case XD3_WINSTART: + case XD3_WINFINISH: { + /* ignore */ + continue; + } + case XD3_GETSRCBLK: + case 0: { + return XD3_INTERNAL; + } + default: + return ret; + } + + if ((ret = main_write_output (recode_stream, ofile))) + { + return ret; + } + + xd3_consume_output (recode_stream); + } +} +#endif /* VCDIFF_TOOLS */ + +/******************************************************************* + VCDIFF merging + ******************************************************************/ + +#if VCDIFF_TOOLS +/* Modifies static state. */ +static int +main_init_recode_stream (void) +{ + int ret; + int stream_flags = XD3_ADLER32_NOVER | XD3_SKIP_EMIT; + int recode_flags; + xd3_config recode_config; + + XD3_ASSERT (recode_stream == NULL); + + if ((recode_stream = (xd3_stream*) main_malloc(sizeof(xd3_stream))) == NULL) + { + return ENOMEM; + } + + recode_flags = (stream_flags & XD3_SEC_TYPE); + + recode_config.alloc = main_alloc; + recode_config.freef = main_free1; + + xd3_init_config(&recode_config, recode_flags); + + if ((ret = main_set_secondary_flags (&recode_config)) || + (ret = xd3_config_stream (recode_stream, &recode_config)) || + (ret = xd3_encode_init_partial (recode_stream)) || + (ret = xd3_whole_state_init (recode_stream))) + { + XPR(NT XD3_LIB_ERRMSG (recode_stream, ret)); + xd3_free_stream (recode_stream); + recode_stream = NULL; + return ret; + } + + return 0; +} + +/* This processes the sequence of -m arguments. The final input + * is processed as part of the ordinary main_input() loop. */ +static int +main_merge_arguments (main_merge_list* merges) +{ + int ret = 0; + int count = 0; + main_merge *merge = NULL; + xd3_stream merge_input; + + if (main_merge_list_empty (merges)) + { + return 0; + } + + if ((ret = xd3_config_stream (& merge_input, NULL)) || + (ret = xd3_whole_state_init (& merge_input))) + { + XPR(NT XD3_LIB_ERRMSG (& merge_input, ret)); + return ret; + } + + merge = main_merge_list_front (merges); + while (!main_merge_list_end (merges, merge)) + { + main_file mfile; + main_file_init (& mfile); + mfile.filename = merge->filename; + mfile.flags = RD_NONEXTERNAL; + + if ((ret = main_file_open (& mfile, merge->filename, XO_READ))) + { + goto error; + } + + ret = main_input (CMD_MERGE_ARG, & mfile, NULL, NULL); + + if (ret == 0) + { + if (count++ == 0) + { + /* The first merge source is the next merge input. */ + xd3_swap_whole_state (& recode_stream->whole_target, + & merge_input.whole_target); + } + else + { + /* Merge the recode_stream with merge_input. */ + ret = xd3_merge_input_output (recode_stream, + & merge_input.whole_target); + + /* Save the next merge source in merge_input. */ + xd3_swap_whole_state (& recode_stream->whole_target, + & merge_input.whole_target); + } + } + + main_file_cleanup (& mfile); + + if (recode_stream != NULL) + { + xd3_free_stream (recode_stream); + main_free (recode_stream); + recode_stream = NULL; + } + + if (main_bdata != NULL) + { + main_buffree (main_bdata); + main_bdata = NULL; + main_bsize = 0; + } + + if (ret != 0) + { + goto error; + } + + merge = main_merge_list_next (merge); + } + + XD3_ASSERT (merge_stream == NULL); + + if ((merge_stream = (xd3_stream*) main_malloc (sizeof(xd3_stream))) == NULL) + { + ret = ENOMEM; + goto error; + } + + if ((ret = xd3_config_stream (merge_stream, NULL)) || + (ret = xd3_whole_state_init (merge_stream))) + { + XPR(NT XD3_LIB_ERRMSG (& merge_input, ret)); + goto error; + } + + xd3_swap_whole_state (& merge_stream->whole_target, + & merge_input.whole_target); + ret = 0; + error: + xd3_free_stream (& merge_input); + return ret; +} + +/* This processes each window of the final merge input. This routine + * does not output, it buffers the entire delta into memory. */ +static int +main_merge_func (xd3_stream* stream, main_file *no_write) +{ + int ret; + + if ((ret = xd3_whole_append_window (stream))) + { + return ret; + } + + return 0; +} + + +/* This is called after all windows have been read, as a final step in + * main_input(). This is only called for the final merge step. */ +static int +main_merge_output (xd3_stream *stream, main_file *ofile) +{ + int ret; + usize_t inst_pos = 0; + xoff_t output_pos = 0; + xd3_source recode_source; + usize_t window_num = 0; + int at_least_once = 0; + + /* merge_stream is set if there were arguments. this stream's input + * needs to be applied to the merge_stream source. */ + if ((merge_stream != NULL) && + (ret = xd3_merge_input_output (stream, + & merge_stream->whole_target))) + { + XPR(NT XD3_LIB_ERRMSG (stream, ret)); + return ret; + } + + if (option_use_appheader != 0 && + option_appheader != NULL) + { + xd3_set_appheader (recode_stream, option_appheader, + (usize_t) strlen ((char*) option_appheader)); + } + + /* Enter the ENC_INPUT state and bypass the next_in == NULL test + * and (leftover) input buffering logic. */ + XD3_ASSERT(recode_stream->enc_state == ENC_INIT); + recode_stream->enc_state = ENC_INPUT; + recode_stream->next_in = main_bdata; + recode_stream->flags |= XD3_FLUSH; + + /* This encodes the entire target. */ + while (inst_pos < stream->whole_target.instlen || !at_least_once) + { + xoff_t window_start = output_pos; + int window_srcset = 0; + xoff_t window_srcmin = 0; + xoff_t window_srcmax = 0; + usize_t window_pos = 0; + usize_t window_size; + + /* at_least_once ensures that we encode at least one window, + * which handles the 0-byte case. */ + at_least_once = 1; + + XD3_ASSERT (recode_stream->enc_state == ENC_INPUT); + + if ((ret = xd3_encode_input (recode_stream)) != XD3_WINSTART) + { + XPR(NT "invalid merge state: %s\n", xd3_mainerror (ret)); + return XD3_INVALID; + } + + /* Window sizes must match from the input to the output, so that + * target copies are in-range (and so that checksums carry + * over). */ + XD3_ASSERT (window_num < stream->whole_target.wininfolen); + window_size = stream->whole_target.wininfo[window_num].length; + + /* Output position should also match. */ + if (output_pos != stream->whole_target.wininfo[window_num].offset) + { + XPR(NT "internal merge error: offset mismatch\n"); + return XD3_INVALID; + } + + if (option_use_checksum && + (stream->dec_win_ind & VCD_ADLER32) != 0) + { + recode_stream->flags |= XD3_ADLER32_RECODE; + recode_stream->recode_adler32 = + stream->whole_target.wininfo[window_num].adler32; + } + + window_num++; + + if (main_bsize < window_size) + { + main_buffree (main_bdata); + main_bdata = NULL; + main_bsize = 0; + if ((main_bdata = (uint8_t*) + main_bufalloc (window_size)) == NULL) + { + return ENOMEM; + } + main_bsize = window_size; + } + + /* This encodes a single target window. */ + while (window_pos < window_size && + inst_pos < stream->whole_target.instlen) + { + xd3_winst *inst = &stream->whole_target.inst[inst_pos]; + usize_t take = xd3_min(inst->size, window_size - window_pos); + xoff_t addr; + + switch (inst->type) + { + case XD3_RUN: + if ((ret = xd3_emit_run (recode_stream, window_pos, take, + &stream->whole_target.adds[inst->addr]))) + { + return ret; + } + break; + + case XD3_ADD: + /* Adds are implicit, put them into the input buffer. */ + memcpy (main_bdata + window_pos, + stream->whole_target.adds + inst->addr, take); + break; + + default: /* XD3_COPY + copy mode */ + if (inst->mode != 0) + { + if (window_srcset) { + window_srcmin = xd3_min (window_srcmin, inst->addr); + window_srcmax = xd3_max (window_srcmax, inst->addr + take); + } else { + window_srcset = 1; + window_srcmin = inst->addr; + window_srcmax = inst->addr + take; + } + addr = inst->addr; + } + else + { + XD3_ASSERT (inst->addr >= window_start); + addr = inst->addr - window_start; + } + IF_DEBUG2 ({ + XPR(NTR "[merge copy] winpos %"W"u take %"W"u " + "addr %"Q"u mode %u\n", + window_pos, take, addr, inst->mode); + }); + if ((ret = xd3_found_match (recode_stream, window_pos, take, + addr, inst->mode != 0))) + { + return ret; + } + break; + } + + window_pos += take; + output_pos += take; + + if (take == inst->size) + { + inst_pos += 1; + } + else + { + /* Modify the instruction for the next pass. */ + if (inst->type != XD3_RUN) + { + inst->addr += take; + } + inst->size -= take; + } + } + + xd3_avail_input (recode_stream, main_bdata, window_pos); + + recode_stream->enc_state = ENC_INSTR; + + if (window_srcset) { + recode_stream->srcwin_decided = 1; + recode_stream->src = &recode_source; + recode_source.srclen = (usize_t)(window_srcmax - window_srcmin); + recode_source.srcbase = window_srcmin; + recode_stream->taroff = recode_source.srclen; + + XD3_ASSERT (recode_source.srclen != 0); + } else { + recode_stream->srcwin_decided = 0; + recode_stream->src = NULL; + recode_stream->taroff = 0; + } + + for (;;) + { + switch ((ret = xd3_encode_input (recode_stream))) + { + case XD3_INPUT: { + goto done_window; + } + case XD3_OUTPUT: { + /* main_file_write below */ + break; + } + case XD3_GOTHEADER: + case XD3_WINSTART: + case XD3_WINFINISH: { + /* ignore */ + continue; + } + case XD3_GETSRCBLK: + case 0: { + return XD3_INTERNAL; + } + default: + return ret; + } + + if ((ret = main_write_output(recode_stream, ofile))) + { + return ret; + } + + xd3_consume_output (recode_stream); + } + done_window: + (void) 0; + } + + return 0; +} +#endif + +/******************************************************************* + Input decompression, output recompression + ******************************************************************/ + +#if EXTERNAL_COMPRESSION +/* This is tricky POSIX-specific code with lots of fork(), pipe(), + * dup(), waitpid(), and exec() business. Most of this code + * originated in PRCS1, which did automatic package-file + * decompression. It works with both XD3_POSIX and XD3_STDIO file + * disciplines. + * + * To automatically detect compressed inputs requires a child process + * to reconstruct the input stream, which was advanced in order to + * detect compression, because it may not be seekable. In other + * words, the main program reads part of the input stream, and if it + * detects a compressed input it then forks a pipe copier process, + * which copies the first-read block out of the main-program's memory, + * then streams the remaining compressed input into the + * input-decompression pipe. + */ + +#include +#include +#include +#include + +/* Remember which pipe FD is which. */ +#define PIPE_READ_FD 0 +#define PIPE_WRITE_FD 1 +#define MAX_SUBPROCS 4 /* max(source + copier + output, + source + copier + input + copier). */ +static pid_t ext_subprocs[MAX_SUBPROCS]; + +/* Like write(), applies to a fd instead of a main_file, for the pipe + * copier subprocess. Does not print an error, to facilitate ignoring + * trailing garbage, see main_pipe_copier(). */ +static int +main_pipe_write (int outfd, uint8_t *exist_buf, usize_t remain) +{ + int ret; + + if ((ret = xd3_posix_io (outfd, exist_buf, remain, + (xd3_posix_func*) &write, NULL))) + { + return ret; + } + + return 0; +} + +/* A simple error-reporting waitpid interface. */ +static int +main_waitpid_check(pid_t pid) +{ + int status; + int ret = 0; + + if (waitpid (pid, & status, 0) < 0) + { + ret = get_errno (); + XPR(NT "external compression [pid %d] wait: %s\n", + pid, xd3_mainerror (ret)); + } + else if (! WIFEXITED (status)) + { + // SIGPIPE will be delivered to the child process whenever it + // writes data after this process closes the pipe, + // happens if xdelta does not require access to the entire + // source file. Considered normal. + if (! WIFSIGNALED (status) || WTERMSIG (status) != SIGPIPE) + { + ret = ECHILD; + XPR(NT "external compression [pid %d] signal %d\n", pid, + WIFSIGNALED (status) ? WTERMSIG (status) : WSTOPSIG (status)); + } + else if (option_verbose) + { + XPR(NT "external compression sigpipe\n"); + } + } + else if (WEXITSTATUS (status) != 0) + { + ret = ECHILD; + if (option_verbose > 1) + { + /* Presumably, the error was printed by the subprocess. */ + XPR(NT "external compression [pid %d] exit %d\n", + pid, WEXITSTATUS (status)); + } + } + + return ret; +} + +/* Wait for any existing child processes to check for abnormal exit. */ +static int +main_external_compression_finish (void) +{ + int i; + int ret; + + for (i = 0; i < num_subprocs; i += 1) + { + if (! ext_subprocs[i]) { continue; } + + if ((ret = main_waitpid_check (ext_subprocs[i]))) + { + return ret; + } + + ext_subprocs[i] = 0; + } + + return 0; +} + +/* Kills any outstanding compression process. */ +static void +main_external_compression_cleanup (void) +{ + int i; + + for (i = 0; i < num_subprocs; i += 1) + { + if (! ext_subprocs[i]) { continue; } + + kill (ext_subprocs[i], SIGTERM); + + ext_subprocs[i] = 0; + } +} + +/* This runs as a forked process of main_input_decompress_setup() to + * copy input to the decompression process. First, the available + * input is copied out of the existing buffer, then the buffer is + * reused to continue reading from the compressed input file. */ +static int +main_pipe_copier (uint8_t *pipe_buf, + usize_t pipe_bufsize, + size_t nread, + main_file *ifile, + int outfd) +{ + int ret; + xoff_t skipped = 0; + + /* Prevent SIGPIPE signals, allow EPIPE return values instead. This + * is safe to comment-out, except that the -F flag will not work + * properly (the parent would need to treat WTERMSIG(status) == + * SIGPIPE). */ + struct sigaction sa; + sa.sa_handler = SIG_IGN; + sigaction (SIGPIPE, &sa, NULL); + + for (;;) + { + /* force_drain will be set when option_force and EPIPE cause us + * to skip data. This is reset each time through the loop, so + * the break condition below works. */ + int force_drain = 0; + if (nread > 0 && (ret = main_pipe_write (outfd, pipe_buf, nread))) + { + if (ret == EPIPE) + { + /* This causes the loop to continue reading until nread + * == 0. */ + skipped += nread; + force_drain = 1; + } + else + { + XPR(NT "pipe write failed: %s\n", xd3_mainerror (ret)); + return ret; + } + } + + if (nread < pipe_bufsize && !force_drain) + { + break; + } + + if ((ret = main_file_read (ifile, pipe_buf, pipe_bufsize, + & nread, "pipe read failed")) < 0) + { + return ret; + } + } + + if (option_verbose && skipped != 0) + { + XPR(NT "skipping %"Q"u bytes in %s\n", + skipped, ifile->filename); + } + return 0; +} + +/* This function is called after we have read some amount of data from + * the input file and detected a compressed input. Here we start a + * decompression subprocess by forking twice. The first process runs + * the decompression command, the second process copies data to the + * input of the first. */ +static int +main_input_decompress_setup (const main_extcomp *decomp, + main_file *ifile, + uint8_t *input_buf, + usize_t input_bufsize, + uint8_t *pipe_buf, + usize_t pipe_bufsize, + usize_t pipe_avail, + size_t *nread) +{ + /* The two pipes: input and output file descriptors. */ + int outpipefd[2], inpipefd[2]; + int input_fd = -1; /* The resulting input_fd (output of decompression). */ + pid_t decomp_id, copier_id; /* The two subprocs. */ + int ret; + + outpipefd[0] = outpipefd[1] = -1; + inpipefd[0] = inpipefd[1] = -1; + + if (pipe (outpipefd) || pipe (inpipefd)) + { + XPR(NT "pipe failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + if ((decomp_id = fork ()) < 0) + { + XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + /* The first child runs the decompression process: */ + if (decomp_id == 0) + { + if (option_verbose > 2) + { + XPR(NT "external decompression pid %d\n", getpid ()); + } + + /* Setup pipes: write to the outpipe, read from the inpipe. */ + if (dup2 (outpipefd[PIPE_WRITE_FD], STDOUT_FILENO) < 0 || + dup2 (inpipefd[PIPE_READ_FD], STDIN_FILENO) < 0 || + close (outpipefd[PIPE_READ_FD]) || + close (outpipefd[PIPE_WRITE_FD]) || + close (inpipefd[PIPE_READ_FD]) || + close (inpipefd[PIPE_WRITE_FD]) || + execlp (decomp->decomp_cmdname, decomp->decomp_cmdname, + decomp->decomp_options, + option_force2 ? "-f" : NULL, + NULL)) + { + XPR(NT "child process %s failed to execute: %s\n", + decomp->decomp_cmdname, xd3_mainerror (get_errno ())); + } + + _exit (127); + } + + XD3_ASSERT(num_subprocs < MAX_SUBPROCS); + ext_subprocs[num_subprocs++] = decomp_id; + + if ((copier_id = fork ()) < 0) + { + XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + /* The second child runs the copier process: */ + if (copier_id == 0) + { + int exitval = 0; + + if (option_verbose > 2) + { + XPR(NT "child pipe-copier pid %d\n", getpid ()); + } + + if (close (inpipefd[PIPE_READ_FD]) || + close (outpipefd[PIPE_READ_FD]) || + close (outpipefd[PIPE_WRITE_FD]) || + main_pipe_copier (pipe_buf, pipe_bufsize, pipe_avail, + ifile, inpipefd[PIPE_WRITE_FD]) || + close (inpipefd[PIPE_WRITE_FD])) + { + XPR(NT "child copier process failed: %s\n", + xd3_mainerror (get_errno ())); + exitval = 1; + } + + _exit (exitval); + } + + XD3_ASSERT(num_subprocs < MAX_SUBPROCS); + ext_subprocs[num_subprocs++] = copier_id; + + /* The parent closes both pipes after duplicating the output of + * compression. */ + input_fd = dup (outpipefd[PIPE_READ_FD]); + + if (input_fd < 0 || + main_file_close (ifile) || + close (outpipefd[PIPE_READ_FD]) || + close (outpipefd[PIPE_WRITE_FD]) || + close (inpipefd[PIPE_READ_FD]) || + close (inpipefd[PIPE_WRITE_FD])) + { + XPR(NT "dup/close failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + +#if XD3_STDIO + /* Note: fdopen() acquires the fd, closes it when finished. */ + if ((ifile->file = fdopen (input_fd, "r")) == NULL) + { + XPR(NT "fdopen failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + +#elif XD3_POSIX + ifile->file = input_fd; +#endif + + ifile->compressor = decomp; + + /* Now the input file is decompressed. */ + return main_file_read (ifile, input_buf, input_bufsize, + nread, "input decompression failed"); + + pipe_cleanup: + close (input_fd); + close (outpipefd[PIPE_READ_FD]); + close (outpipefd[PIPE_WRITE_FD]); + close (inpipefd[PIPE_READ_FD]); + close (inpipefd[PIPE_WRITE_FD]); + return ret; +} + + +/* This routine is called when the first buffer of input data is read + * by the main program (unless input decompression is disabled by + * command-line option). If it recognizes the magic number of a known + * input type it invokes decompression. + * + * Skips decompression if the decompression type or the file type is + * RD_NONEXTERNAL. + * + * Behaves exactly like main_file_read, otherwise. + * + * This function uses a separate buffer to read the first small block + * of input. If a compressed input is detected, the separate buffer + * is passed to the pipe copier. This avoids using the same size + * buffer in both cases. */ +static int +main_secondary_decompress_check (main_file *file, + uint8_t *input_buf, + size_t input_size, + size_t *nread) +{ + int ret; + usize_t i; + usize_t try_read = xd3_min (input_size, XD3_ALLOCSIZE); + size_t check_nread = 0; + uint8_t check_buf[XD3_ALLOCSIZE]; /* TODO: heap allocate */ + const main_extcomp *decompressor = NULL; + + if ((ret = main_file_read (file, check_buf, + try_read, + & check_nread, "input read failed"))) + { + return ret; + } + + if (file->flags & RD_DECOMPSET) + { + /* This allows the application header to override the magic + * number, for whatever reason. */ + decompressor = file->compressor; + } + else + { + for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) + { + const main_extcomp *decomp = & extcomp_types[i]; + + if (check_nread > decomp->magic_size) + { + /* The following expr checks if we are trying to read a + * VCDIFF input, in which case do not treat it as + * "secondary" decompression. */ + int skip_this_type = (decomp->flags & RD_NONEXTERNAL) && + (file->flags & RD_NONEXTERNAL); + + if (skip_this_type) + { + continue; + } + + if (memcmp (check_buf, decomp->magic, decomp->magic_size) == 0) + { + decompressor = decomp; + break; + } + } + } + } + + if (decompressor != NULL) + { + if (! option_quiet) + { + XPR(NT "externally compressed input: %s %s%s < %s\n", + decompressor->decomp_cmdname, + decompressor->decomp_options, + (option_force2 ? " -f" : ""), + file->filename); + if (file->flags & RD_MAININPUT) + { + XPR(NT + "WARNING: the encoder is automatically decompressing the input file;\n"); + XPR(NT + "WARNING: the decoder will automatically recompress the output file;\n"); + XPR(NT + "WARNING: this may result in different compressed data and checksums\n"); + XPR(NT + "WARNING: despite being identical data; if this is an issue, use -D\n"); + XPR(NT + "WARNING: to avoid decompression and/or use -R to avoid recompression\n"); + XPR(NT + "WARNING: and/or manually decompress the input file; if you know the\n"); + XPR(NT + "WARNING: compression settings that will produce identical output\n"); + XPR(NT + "WARNING: you may set those flags using the environment (e.g., GZIP=-9)\n"); + } + } + + file->size_known = 0; + return main_input_decompress_setup (decompressor, file, + input_buf, input_size, + check_buf, XD3_ALLOCSIZE, + check_nread, nread); + } + + /* Now read the rest of the input block. */ + (*nread) = 0; + + if (check_nread == try_read) + { + ret = main_file_read (file, + input_buf + try_read, + input_size - try_read, + nread, + "input read failed"); + } + + memcpy (input_buf, check_buf, check_nread); + + (*nread) += check_nread; + + return 0; +} + +/* Initiate re-compression of the output stream. This is easier than + * input decompression because we know beforehand that the stream will + * be compressed, whereas the input has already been read when we + * decide it should be decompressed. Thus, it only requires one + * subprocess and one pipe. */ +static int +main_recompress_output (main_file *ofile) +{ + pid_t recomp_id; /* One subproc. */ + int pipefd[2]; /* One pipe. */ + int output_fd = -1; + int ret; + const main_extcomp *recomp = ofile->compressor; + + pipefd[0] = pipefd[1] = -1; + + if (pipe (pipefd)) + { + XPR(NT "pipe failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + if ((recomp_id = fork ()) < 0) + { + XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + /* The child runs the recompression process: */ + if (recomp_id == 0) + { + if (option_verbose > 2) + { + XPR(NT "external recompression pid %d\n", getpid ()); + } + + /* Setup pipes: write to the output file, read from the pipe. */ + if (dup2 (XFNO (ofile), STDOUT_FILENO) < 0 || + dup2 (pipefd[PIPE_READ_FD], STDIN_FILENO) < 0 || + close (pipefd[PIPE_READ_FD]) || + close (pipefd[PIPE_WRITE_FD]) || + execlp (recomp->recomp_cmdname, recomp->recomp_cmdname, + recomp->recomp_options, + option_force2 ? "-f" : NULL, + NULL)) + { + XPR(NT "child process %s failed to execute: %s\n", + recomp->recomp_cmdname, xd3_mainerror (get_errno ())); + } + + _exit (127); + } + + XD3_ASSERT(num_subprocs < MAX_SUBPROCS); + ext_subprocs[num_subprocs++] = recomp_id; + + /* The parent closes both pipes after duplicating the output-fd for + * writing to the compression pipe. */ + output_fd = dup (pipefd[PIPE_WRITE_FD]); + + if (output_fd < 0 || + main_file_close (ofile) || + close (pipefd[PIPE_READ_FD]) || + close (pipefd[PIPE_WRITE_FD])) + { + XPR(NT "close failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + +#if XD3_STDIO + /* Note: fdopen() acquires the fd, closes it when finished. */ + if ((ofile->file = fdopen (output_fd, "w")) == NULL) + { + XPR(NT "fdopen failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + +#elif XD3_POSIX + ofile->file = output_fd; +#endif + + /* Now the output file will be compressed. */ + return 0; + + pipe_cleanup: + close (output_fd); + close (pipefd[PIPE_READ_FD]); + close (pipefd[PIPE_WRITE_FD]); + return ret; +} +#endif /* EXTERNAL_COMPRESSION */ + +/* Identify the compressor that was used based on its ident string, + * which is passed in the application header. */ +static const main_extcomp* +main_ident_compressor (const char *ident) +{ + usize_t i; + + for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) + { + if (strcmp (extcomp_types[i].ident, ident) == 0) + { + return & extcomp_types[i]; + } + } + + return NULL; +} + +/* Return the main_extcomp record to use for this identifier, if possible. */ +static const main_extcomp* +main_get_compressor (const char *ident) +{ + const main_extcomp *ext = main_ident_compressor (ident); + + if (ext == NULL) + { + if (! option_quiet) + { + XPR(NT "warning: cannot recompress output: " + "unrecognized external compression ID: %s\n", ident); + } + return NULL; + } + else if (! EXTERNAL_COMPRESSION) + { + if (! option_quiet) + { + XPR(NT "warning: external support not compiled: " + "original input was compressed: %s\n", ext->recomp_cmdname); + } + return NULL; + } + else + { + return ext; + } +} + +/********************************************************************* + APPLICATION HEADER + *******************************************************************/ + +#if XD3_ENCODER +static const char* +main_apphead_string (const char* x) +{ + const char *y; + + if (x == NULL) { return ""; } + + if (strcmp (x, "/dev/stdin") == 0 || + strcmp (x, "/dev/stdout") == 0 || + strcmp (x, "/dev/stderr") == 0) { return "-"; } + + // TODO: this is not portable + return (y = strrchr (x, '/')) == NULL ? x : y + 1; +} + +static int +main_set_appheader (xd3_stream *stream, main_file *input, main_file *sfile) +{ + /* The user may disable the application header. Once the appheader + * is set, this disables setting it again. */ + if (appheader_used || ! option_use_appheader) { return 0; } + + /* The user may specify the application header, otherwise format the + default header. */ + if (option_appheader) + { + appheader_used = option_appheader; + } + else + { + const char *iname; + const char *icomp; + const char *sname; + const char *scomp; + usize_t len; + + iname = main_apphead_string (input->filename); + icomp = (input->compressor == NULL) ? "" : input->compressor->ident; + len = (usize_t) strlen (iname) + (usize_t) strlen (icomp) + 2; + + if (sfile->filename != NULL) + { + sname = main_apphead_string (sfile->filename); + scomp = (sfile->compressor == NULL) ? "" : sfile->compressor->ident; + len += (usize_t) strlen (sname) + (usize_t) strlen (scomp) + 2; + } + else + { + sname = scomp = ""; + } + + if ((appheader_used = (uint8_t*) main_malloc (len)) == NULL) + { + return ENOMEM; + } + + if (sfile->filename == NULL) + { + snprintf_func ((char*)appheader_used, len, "%s/%s", iname, icomp); + } + else + { + snprintf_func ((char*)appheader_used, len, "%s/%s/%s/%s", + iname, icomp, sname, scomp); + } + } + + xd3_set_appheader (stream, appheader_used, + (usize_t) strlen ((char*)appheader_used)); + + return 0; +} +#endif + +static void +main_get_appheader_params (main_file *file, char **parsed, + int output, const char *type, + main_file *other) +{ + /* Set the filename if it was not specified. If output, option_stdout (-c) + * overrides. */ + if (file->filename == NULL && + ! (output && option_stdout) && + strcmp (parsed[0], "-") != 0) + { + file->filename = parsed[0]; + + if (other->filename != NULL) { + /* Take directory from the other file, if it has one. */ + /* TODO: This results in nonsense names like /dev/foo.tar.gz + * and probably the filename-default logic interferes with + * multi-file operation and the standard file extension? + * Possibly the name header is bad, should be off by default. + * Possibly we just want to remember external/compression + * settings. */ + const char *last_slash = strrchr(other->filename, '/'); + + if (last_slash != NULL) { + usize_t dlen = (usize_t) (last_slash - other->filename); + + XD3_ASSERT(file->filename_copy == NULL); + file->filename_copy = + (char*) main_malloc(dlen + 2 + (usize_t) strlen(file->filename)); + + strncpy(file->filename_copy, other->filename, dlen); + file->filename_copy[dlen] = '/'; + strcpy(file->filename_copy + dlen + 1, parsed[0]); + + file->filename = file->filename_copy; + } + } + + if (! option_quiet) + { + XPR(NT "using default %s filename: %s\n", type, file->filename); + } + } + + /* Set the compressor, initiate de/recompression later. */ + if (file->compressor == NULL && *parsed[1] != 0) + { + file->flags |= RD_DECOMPSET; + file->compressor = main_get_compressor (parsed[1]); + } +} + +static void +main_get_appheader (xd3_stream *stream, main_file *ifile, + main_file *output, main_file *sfile) +{ + uint8_t *apphead; + usize_t appheadsz; + int ret; + + /* The user may disable the application header. Once the appheader + * is set, this disables setting it again. */ + if (! option_use_appheader) { return; } + + ret = xd3_get_appheader (stream, & apphead, & appheadsz); + + /* Ignore failure, it only means we haven't received a header yet. */ + if (ret != 0) { return; } + + if (appheadsz > 0) + { + char *start = (char*)apphead; + char *slash; + int place = 0; + const int kMaxArgs = 4; + char *parsed[4]; + + memset (parsed, 0, sizeof (parsed)); + + while ((slash = strchr (start, '/')) != NULL && place < (kMaxArgs-1)) + { + *slash = 0; + parsed[place++] = start; + start = slash + 1; + } + + parsed[place++] = start; + + /* First take the output parameters. */ + if (place == 2 || place == 4) + { + main_get_appheader_params (output, parsed, 1, "output", ifile); + } + + /* Then take the source parameters. */ + if (place == 4) + { + main_get_appheader_params (sfile, parsed+2, 0, "source", ifile); + } + } + + option_use_appheader = 0; + return; +} + +/********************************************************************* + Main I/O routines + **********************************************************************/ + +/* This function acts like the above except it may also try to + * recognize a compressed input (source or target) when the first + * buffer of data is read. The EXTERNAL_COMPRESSION code is called to + * search for magic numbers. */ +static int +main_read_primary_input (main_file *file, + uint8_t *buf, + size_t size, + size_t *nread) +{ +#if EXTERNAL_COMPRESSION + if (option_decompress_inputs && file->flags & RD_FIRST) + { + file->flags &= ~RD_FIRST; + return main_secondary_decompress_check (file, buf, size, nread); + } +#endif + + return main_file_read (file, buf, size, nread, "input read failed"); +} + +/* Open the main output file, sets a default file name, initiate + * recompression. This function is expected to fprint any error + * messages. */ +static int +main_open_output (xd3_stream *stream, main_file *ofile) +{ + int ret; + + if (option_no_output) + { + return 0; + } + + if (ofile->filename == NULL) + { + XSTDOUT_XF (ofile); + + if (option_verbose > 1) + { + XPR(NT "using standard output: %s\n", ofile->filename); + } + } + else + { + /* Stat the file to check for overwrite. */ + if (option_force == 0 && main_file_exists (ofile)) + { + if (!option_quiet) + { + XPR(NT "to overwrite output file specify -f: %s\n", + ofile->filename); + } + return EEXIST; + } + + if ((ret = main_file_open (ofile, ofile->filename, XO_WRITE))) + { + return ret; + } + + if (option_verbose > 1) { XPR(NT "output %s\n", ofile->filename); } + } + +#if EXTERNAL_COMPRESSION + /* Do output recompression. */ + if (ofile->compressor != NULL && option_recompress_outputs == 1) + { + if (! option_quiet) + { + XPR(NT "externally compressed output: %s %s%s > %s\n", + ofile->compressor->recomp_cmdname, + ofile->compressor->recomp_options, + (option_force2 ? " -f" : ""), + ofile->filename); + } + + if ((ret = main_recompress_output (ofile))) + { + return ret; + } + } +#endif + + return 0; +} + +static usize_t +main_get_winsize (main_file *ifile) { + xoff_t file_size = 0; + usize_t size = option_winsize; + static shortbuf iszbuf; + + if (main_file_stat (ifile, &file_size) == 0) + { + size = (usize_t) xd3_min (file_size, (xoff_t) size); + } + + size = xd3_max (size, XD3_ALLOCSIZE); + + if (option_verbose > 1) + { + XPR(NT "input %s window size %s\n", + ifile->filename, + main_format_bcnt (size, &iszbuf)); + } + + return size; +} + +/********************************************************************* + Main routines + ********************************************************************/ + +/* This is a generic input function. It calls the xd3_encode_input or + * xd3_decode_input functions and makes calls to the various input + * handling routines above, which coordinate external decompression. + */ +static int +main_input (xd3_cmd cmd, + main_file *ifile, + main_file *ofile, + main_file *sfile) +{ + int ret; + xd3_stream stream; + size_t nread = 0; + usize_t winsize; + int stream_flags = 0; + xd3_config config; + xd3_source source; + xoff_t last_total_in = 0; + xoff_t last_total_out = 0; + long start_time; + int stdout_only = 0; + int (*input_func) (xd3_stream*); + int (*output_func) (xd3_stream*, main_file *); + + memset (& stream, 0, sizeof (stream)); + memset (& source, 0, sizeof (source)); + memset (& config, 0, sizeof (config)); + + config.alloc = main_alloc; + config.freef = main_free1; + + config.iopt_size = option_iopt_size; + config.sprevsz = option_sprevsz; + + do_src_fifo = 0; + + start_time = get_millisecs_now (); + + if (option_use_checksum) { stream_flags |= XD3_ADLER32; } + + /* main_input setup. */ + switch ((int) cmd) + { +#if VCDIFF_TOOLS + if (1) { case CMD_PRINTHDR: stream_flags |= XD3_JUST_HDR; } + else if (1) { case CMD_PRINTHDRS: stream_flags |= XD3_SKIP_WINDOW; } + else { case CMD_PRINTDELTA: stream_flags |= XD3_SKIP_EMIT; } + ifile->flags |= RD_NONEXTERNAL; + input_func = xd3_decode_input; + output_func = main_print_func; + stream_flags |= XD3_ADLER32_NOVER; + stdout_only = 1; + break; + + case CMD_RECODE: + case CMD_MERGE: + case CMD_MERGE_ARG: + /* No source will be read */ + stream_flags |= XD3_ADLER32_NOVER | XD3_SKIP_EMIT; + ifile->flags |= RD_NONEXTERNAL; + input_func = xd3_decode_input; + + if ((ret = main_init_recode_stream ())) + { + return EXIT_FAILURE; + } + + if (cmd == CMD_RECODE) { output_func = main_recode_func; } + else { output_func = main_merge_func; } + break; +#endif /* VCDIFF_TOOLS */ + +#if XD3_ENCODER + case CMD_ENCODE: + do_src_fifo = 1; + input_func = xd3_encode_input; + output_func = main_write_output; + + if (option_no_compress) { stream_flags |= XD3_NOCOMPRESS; } + if (option_smatch_config) + { + const char *s = option_smatch_config; + char *e; + long values[XD3_SOFTCFG_VARCNT]; + int got; + + config.smatch_cfg = XD3_SMATCH_SOFT; + + for (got = 0; got < XD3_SOFTCFG_VARCNT; got += 1, s = e + 1) + { + values[got] = strtol (s, &e, 10); + + if ((values[got] < 0) || + (e == s) || + (got < XD3_SOFTCFG_VARCNT-1 && *e == 0) || + (got == XD3_SOFTCFG_VARCNT-1 && *e != 0)) + { + XPR(NT "invalid string match specifier (-C) %d: %s\n", + got, s); + return EXIT_FAILURE; + } + } + + config.smatcher_soft.large_look = values[0]; + config.smatcher_soft.large_step = values[1]; + config.smatcher_soft.small_look = values[2]; + config.smatcher_soft.small_chain = values[3]; + config.smatcher_soft.small_lchain = values[4]; + config.smatcher_soft.max_lazy = values[5]; + config.smatcher_soft.long_enough = values[6]; + } + else + { + if (option_verbose > 2) + { + XPR(NT "compression level: %d\n", option_level); + } + if (option_level == 0) + { + stream_flags |= XD3_NOCOMPRESS; + config.smatch_cfg = XD3_SMATCH_FASTEST; + } + else if (option_level == 1) + { config.smatch_cfg = XD3_SMATCH_FASTEST; } + else if (option_level == 2) + { config.smatch_cfg = XD3_SMATCH_FASTER; } + else if (option_level <= 5) + { config.smatch_cfg = XD3_SMATCH_FAST; } + else if (option_level == 6) + { config.smatch_cfg = XD3_SMATCH_DEFAULT; } + else + { config.smatch_cfg = XD3_SMATCH_SLOW; } + } + break; +#endif + case CMD_DECODE: + if (option_use_checksum == 0) { stream_flags |= XD3_ADLER32_NOVER; } + ifile->flags |= RD_NONEXTERNAL; + input_func = xd3_decode_input; + output_func = main_write_output; + break; + default: + XPR(NT "internal error\n"); + return EXIT_FAILURE; + } + + main_bsize = winsize = main_get_winsize (ifile); + + if ((main_bdata = (uint8_t*) main_bufalloc (winsize)) == NULL) + { + return EXIT_FAILURE; + } + + config.winsize = winsize; + config.getblk = main_getblk_func; + config.flags = stream_flags; + + if ((ret = main_set_secondary_flags (&config)) || + (ret = xd3_config_stream (& stream, & config))) + { + XPR(NT XD3_LIB_ERRMSG (& stream, ret)); + return EXIT_FAILURE; + } + +#if VCDIFF_TOOLS + if ((cmd == CMD_MERGE || cmd == CMD_MERGE_ARG) && + (ret = xd3_whole_state_init (& stream))) + { + XPR(NT XD3_LIB_ERRMSG (& stream, ret)); + return EXIT_FAILURE; + } +#endif + + if (cmd != CMD_DECODE) + { + /* When not decoding, set source now. The decoder delays this + * step until XD3_GOTHEADER. */ + if (sfile && sfile->filename != NULL) + { + if ((ret = main_set_source (& stream, cmd, sfile, & source))) + { + return EXIT_FAILURE; + } + + XD3_ASSERT(stream.src != NULL); + } + } + + if (cmd == CMD_PRINTHDR || + cmd == CMD_PRINTHDRS || + cmd == CMD_PRINTDELTA || + cmd == CMD_RECODE) + { + if (sfile->filename == NULL) + { + allow_fake_source = 1; + sfile->filename = ""; + main_set_source (& stream, cmd, sfile, & source); + } + } + + /* This times each window. */ + get_millisecs_since (); + + /* Main input loop. */ + do + { + xoff_t input_offset; + xoff_t input_remain; + usize_t try_read; + + input_offset = ifile->nread; + + input_remain = XOFF_T_MAX - input_offset; + + try_read = (usize_t) xd3_min ((xoff_t) config.winsize, input_remain); + + if ((ret = main_read_primary_input (ifile, main_bdata, + try_read, & nread))) + { + return EXIT_FAILURE; + } + + /* If we've reached EOF tell the stream to flush. */ + if (nread < try_read) + { + stream.flags |= XD3_FLUSH; + } + +#if XD3_ENCODER + /* After the first main_read_primary_input completes, we know + * all the information needed to encode the application + * header. */ + if (cmd == CMD_ENCODE && + (ret = main_set_appheader (& stream, ifile, sfile))) + { + return EXIT_FAILURE; + } +#endif + xd3_avail_input (& stream, main_bdata, nread); + + /* If we read zero bytes after encoding at least one window... */ + if (nread == 0 && stream.current_window > 0) { + break; + } + + again: + ret = input_func (& stream); + + switch (ret) + { + case XD3_INPUT: + continue; + + case XD3_GOTHEADER: + { + XD3_ASSERT (stream.current_window == 0); + + /* Need to process the appheader as soon as possible. It may + * contain a suggested default filename/decompression routine for + * the ofile, and it may contain default/decompression routine for + * the sources. */ + if (cmd == CMD_DECODE) + { + /* May need to set the sfile->filename if none was given. */ + main_get_appheader (& stream, ifile, ofile, sfile); + + /* Now open the source file. */ + if ((sfile->filename != NULL) && + (ret = main_set_source (& stream, cmd, sfile, & source))) + { + return EXIT_FAILURE; + } + } + } + /* FALLTHROUGH */ + case XD3_WINSTART: + { + /* e.g., set or unset XD3_SKIP_WINDOW. */ + goto again; + } + + case XD3_OUTPUT: + { + /* Defer opening the output file until the stream produces its + * first output for both encoder and decoder, this way we + * delay long enough for the decoder to receive the + * application header. (Or longer if there are skipped + * windows, but I can't think of any reason not to delay + * open.) */ + if (ofile != NULL && + ! main_file_isopen (ofile) && + (ret = main_open_output (& stream, ofile)) != 0) + { + return EXIT_FAILURE; + } + + if ((ret = output_func (& stream, ofile)) && + (ret != PRINTHDR_SPECIAL)) + { + return EXIT_FAILURE; + } + + if (ret == PRINTHDR_SPECIAL) + { + xd3_abort_stream (& stream); + ret = EXIT_SUCCESS; + goto done; + } + + ret = 0; + + xd3_consume_output (& stream); + goto again; + } + + case XD3_WINFINISH: + { + if (IS_ENCODE (cmd) || cmd == CMD_DECODE || cmd == CMD_RECODE) + { + if (! option_quiet && IS_ENCODE (cmd) && + main_file_isopen (sfile)) + { + /* Warn when no source copies are found */ + if (option_verbose && ! xd3_encoder_used_source (& stream)) + { + XPR(NT "warning: input window %"Q"u..%"Q"u has " + "no source copies\n", + stream.current_window * winsize, + (stream.current_window+1) * winsize); + XD3_ASSERT (stream.src != NULL); + } + + /* Limited i-buffer size affects source copies + * when the sourcewin is decided early. */ + if (option_verbose > 1 && + stream.srcwin_decided_early && + stream.i_slots_used > stream.iopt_size) + { + XPR(NT "warning: input position %"Q"u overflowed " + "instruction buffer, needed %"W"u (vs. %"W"u), " + "consider changing -I\n", + stream.current_window * winsize, + stream.i_slots_used, stream.iopt_size); + } + } + + if (option_verbose) + { + shortbuf rrateavg, wrateavg, tm; + shortbuf rdb, wdb; + shortbuf trdb, twdb; + shortbuf srcpos; + long millis = get_millisecs_since (); + usize_t this_read = (usize_t)(stream.total_in - + last_total_in); + usize_t this_write = (usize_t)(stream.total_out - + last_total_out); + last_total_in = stream.total_in; + last_total_out = stream.total_out; + + if (option_verbose > 1) + { + XPR(NT "%"Q"u: in %s (%s): out %s (%s): " + "total in %s: out %s: %s: srcpos %s\n", + stream.current_window, + main_format_bcnt (this_read, &rdb), + main_format_rate (this_read, millis, &rrateavg), + main_format_bcnt (this_write, &wdb), + main_format_rate (this_write, millis, &wrateavg), + main_format_bcnt (stream.total_in, &trdb), + main_format_bcnt (stream.total_out, &twdb), + main_format_millis (millis, &tm), + main_format_bcnt (stream.srcwin_cksum_pos, &srcpos)); + } + else + { + XPR(NT "%"Q"u: in %s: out %s: total in %s: " + "out %s: %s\n", + stream.current_window, + main_format_bcnt (this_read, &rdb), + main_format_bcnt (this_write, &wdb), + main_format_bcnt (stream.total_in, &trdb), + main_format_bcnt (stream.total_out, &twdb), + main_format_millis (millis, &tm)); + } + } + } + goto again; + } + + default: + /* input_func() error */ + XPR(NT XD3_LIB_ERRMSG (& stream, ret)); + if (! option_quiet && ret == XD3_INVALID_INPUT && + sfile != NULL && sfile->filename != NULL) + { + XPR(NT "normally this indicates that the source file is incorrect\n"); + XPR(NT "please verify the source file with sha1sum or equivalent\n"); + } + return EXIT_FAILURE; + } + } + while (nread == config.winsize); +done: + /* Close the inputs. (ifile must be open, sfile may be open) */ + main_file_close (ifile); + if (sfile != NULL) + { + main_file_close (sfile); + } + +#if VCDIFF_TOOLS + if (cmd == CMD_MERGE && + (ret = main_merge_output (& stream, ofile))) + { + return EXIT_FAILURE; + } + + if (cmd == CMD_MERGE_ARG) + { + xd3_swap_whole_state (& stream.whole_target, + & recode_stream->whole_target); + } +#endif /* VCDIFF_TOOLS */ + + /* If output file is not open yet because of delayed-open, it means + * we never encountered a window in the delta, but it could have had + * a VCDIFF header? TODO: solve this elsewhere. For now, it prints + * "nothing to output" below, but the check doesn't happen in case + * of option_no_output. */ + if (! option_no_output && ofile != NULL) + { + if (!stdout_only && ! main_file_isopen (ofile)) + { + XPR(NT "nothing to output: %s\n", ifile->filename); + return EXIT_FAILURE; + } + + /* Have to close the output before calling + * main_external_compression_finish, or else it hangs. */ + if (main_file_close (ofile) != 0) + { + return EXIT_FAILURE; + } + } + +#if EXTERNAL_COMPRESSION + if ((ret = main_external_compression_finish ())) + { + XPR(NT "external compression commands failed\n"); + return EXIT_FAILURE; + } +#endif + + if ((ret = xd3_close_stream (& stream))) + { + XPR(NT XD3_LIB_ERRMSG (& stream, ret)); + return EXIT_FAILURE; + } + +#if XD3_ENCODER + if (option_verbose > 1 && cmd == CMD_ENCODE) + { + XPR(NT "scanner configuration: %s\n", stream.smatcher.name); + XPR(NT "target hash table size: %"W"u\n", stream.small_hash.size); + if (sfile != NULL && sfile->filename != NULL) + { + XPR(NT "source hash table size: %"W"u\n", stream.large_hash.size); + } + } + + if (option_verbose > 2 && cmd == CMD_ENCODE) + { + XPR(NT "source copies: %"Q"u (%"Q"u bytes)\n", + stream.n_scpy, stream.l_scpy); + XPR(NT "target copies: %"Q"u (%"Q"u bytes)\n", + stream.n_tcpy, stream.l_tcpy); + XPR(NT "adds: %"Q"u (%"Q"u bytes)\n", stream.n_add, stream.l_add); + XPR(NT "runs: %"Q"u (%"Q"u bytes)\n", stream.n_run, stream.l_run); + } +#endif + + xd3_free_stream (& stream); + + if (option_verbose) + { + shortbuf tm; + long end_time = get_millisecs_now (); + xoff_t nwrite = ofile != NULL ? ofile->nwrite : 0; + + XPR(NT "finished in %s; input %"Q"u output %"Q"u bytes (%0.2f%%)\n", + main_format_millis (end_time - start_time, &tm), + ifile->nread, nwrite, 100.0 * nwrite / ifile->nread); + } + + return EXIT_SUCCESS; +} + +/* free memory before exit, reset single-use variables. */ +static void +main_cleanup (void) +{ + if (appheader_used != NULL && + appheader_used != option_appheader) + { + main_free (appheader_used); + appheader_used = NULL; + } + + main_buffree (main_bdata); + main_bdata = NULL; + main_bsize = 0; + + main_lru_cleanup(); + + if (recode_stream != NULL) + { + xd3_free_stream (recode_stream); + main_free (recode_stream); + recode_stream = NULL; + } + + if (merge_stream != NULL) + { + xd3_free_stream (merge_stream); + main_free (merge_stream); + merge_stream = NULL; + } + + XD3_ASSERT (main_mallocs == 0); +} + +static void +setup_environment (int argc, + char **argv, + int *argc_out, + char ***argv_out, + char ***argv_free, + char **env_free) +{ + int n, i, i0; + char *p, *v = getenv("XDELTA"); + if (v == NULL) { + (*argc_out) = argc; + (*argv_out) = argv; + (*argv_free) = NULL; + (*env_free) = NULL; + return; + } + + (*env_free) = (char*) main_malloc((usize_t) strlen(v) + 1); + strcpy(*env_free, v); + + /* Space needed for extra args, at least # of spaces */ + n = argc + 1; + for (p = *env_free; *p != 0; ) { + if (*p++ == ' ') { + n++; + } + } + + (*argv_free) = (char**) main_malloc(sizeof(char*) * (n + 1)); + (*argv_out) = (*argv_free); + (*argv_out)[0] = argv[0]; + (*argv_out)[n] = NULL; + + i = 1; + for (p = *env_free; *p != 0; ) { + (*argv_out)[i++] = p; + while (*p != ' ' && *p != 0) { + p++; + } + while (*p == ' ') { + *p++ = 0; + } + } + + for (i0 = 1; i0 < argc; i0++) { + (*argv_out)[i++] = argv[i0]; + } + + /* Counting spaces is an upper bound, argv stays NULL terminated. */ + (*argc_out) = i; + while (i <= n) { + (*argv_out)[i++] = NULL; + } +} + +#if PYTHON_MODULE || SWIG_MODULE || NOT_MAIN +int xd3_main_cmdline (int argc, char **argv) +#else +int main (int argc, char **argv) +#endif +{ + static const char *flags = + "0123456789cdefhnqvDFJNORVs:m:B:C:E:I:L:O:M:P:W:A::S::"; + xd3_cmd cmd; + main_file ifile; + main_file ofile; + main_file sfile; + main_merge_list merge_order; + main_merge *merge; + int my_optind; + const char *my_optarg; + const char *my_optstr; + const char *sfilename; + int env_argc; + char **env_argv; + char **free_argv; /* malloc() in setup_environment() */ + char *free_value; /* malloc() in setup_environment() */ + int ret; + +#ifdef _WIN32 + GetStartupInfo(&winStartupInfo); + setvbuf(stderr, NULL, _IONBF, 0); /* Do not buffer stderr */ +#endif + + main_file_init (& ifile); + main_file_init (& ofile); + main_file_init (& sfile); + main_merge_list_init (& merge_order); + + reset_defaults(); + + free_argv = NULL; + free_value = NULL; + setup_environment(argc, argv, &env_argc, &env_argv, + &free_argv, &free_value); + cmd = CMD_NONE; + sfilename = NULL; + my_optind = 1; + argv = env_argv; + argc = env_argc; + program_name = env_argv[0]; + + takearg: + my_optarg = NULL; + my_optstr = argv[my_optind]; + + /* This doesn't use getopt() because it makes trouble for -P & python which + * reenter main() and thus care about freeing all memory. I never had much + * trust for getopt anyway, it's too opaque. This implements a fairly + * standard non-long-option getopt with support for named operations (e.g., + * "xdelta3 [encode|decode|printhdr...] < in > out"). */ + if (my_optstr) + { + if (*my_optstr == '-') { my_optstr += 1; } + else if (cmd == CMD_NONE) { goto nonflag; } + else { my_optstr = NULL; } + } + while (my_optstr) + { + const char *s; + my_optarg = NULL; + if ((ret = *my_optstr++) == 0) { my_optind += 1; goto takearg; } + + /* Option handling: first check for one ':' following the option in + * flags, then check for two. The syntax allows: + * + * 1. -Afoo defines optarg="foo" + * 2. -A foo defines optarg="foo" + * 3. -A "" defines optarg="" (allows empty-string) + * 4. -A [EOA or -moreargs] error (mandatory case) + * 5. -A [EOA -moreargs] defines optarg=NULL (optional case) + * 6. -A=foo defines optarg="foo" + * 7. -A= defines optarg="" (mandatory case) + * 8. -A= defines optarg=NULL (optional case) + * + * See tests in test_command_line_arguments(). + */ + s = strchr (flags, ret); + if (s && s[1] && s[1] == ':') + { + int option = s[2] && s[2] == ':'; + + /* Case 1, set optarg to the remaining characters. */ + my_optarg = my_optstr; + my_optstr = ""; + + /* Case 2-5 */ + if (*my_optarg == 0) + { + /* Condition 4-5 */ + int have_arg = (my_optind < (argc - 1) && + *argv[my_optind+1] != '-'); + + if (! have_arg) + { + if (! option) + { + /* Case 4 */ + XPR(NT "-%c: requires an argument\n", ret); + ret = EXIT_FAILURE; + goto cleanup; + } + /* Case 5. */ + my_optarg = NULL; + } + else + { + /* Case 2-3. */ + my_optarg = argv[++my_optind]; + } + } + /* Case 6-8. */ + else if (*my_optarg == '=') + { + /* Remove the = in all cases. */ + my_optarg += 1; + + if (option && *my_optarg == 0) + { + /* Case 8. */ + my_optarg = NULL; + } + } + } + + switch (ret) + { + /* case: if no '-' was found, maybe check for a command name. */ + nonflag: + if (strcmp (my_optstr, "decode") == 0) { cmd = CMD_DECODE; } + else if (strcmp (my_optstr, "encode") == 0) + { +#if XD3_ENCODER + cmd = CMD_ENCODE; +#else + XPR(NT "encoder support not compiled\n"); + return EXIT_FAILURE; +#endif + } + else if (strcmp (my_optstr, "config") == 0) { cmd = CMD_CONFIG; } +#if REGRESSION_TEST + else if (strcmp (my_optstr, "test") == 0) { cmd = CMD_TEST; } +#endif +#if VCDIFF_TOOLS + else if (strcmp (my_optstr, "printhdr") == 0) { cmd = CMD_PRINTHDR; } + else if (strcmp (my_optstr, "printhdrs") == 0) + { cmd = CMD_PRINTHDRS; } + else if (strcmp (my_optstr, "printdelta") == 0) + { cmd = CMD_PRINTDELTA; } + else if (strcmp (my_optstr, "recode") == 0) { cmd = CMD_RECODE; } + else if (strcmp (my_optstr, "merge") == 0) { cmd = CMD_MERGE; } +#endif + + /* If no option was found and still no command, let the default + * command be encode. The remaining args are treated as + * filenames. */ + if (cmd == CMD_NONE) + { + cmd = CMD_DEFAULT; + my_optstr = NULL; + break; + } + else + { + /* But if we find a command name, continue the getopt loop. */ + my_optind += 1; + goto takearg; + } + + /* gzip-like options */ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + option_level = ret - '0'; + break; + case 'f': option_force = 1; break; + case 'F': +#if EXTERNAL_COMPRESSION + option_force2 = 1; +#else + XPR(NT "warning: -F option ignored, " + "external compression support was not compiled\n"); + break; +#endif + case 'v': option_verbose += 1; option_quiet = 0; break; + case 'q': option_quiet = 1; option_verbose = 0; break; + case 'c': option_stdout = 1; break; + case 'd': + if (cmd == CMD_NONE) { cmd = CMD_DECODE; } + else { ret = main_help (); goto exit; } + break; + case 'e': +#if XD3_ENCODER + if (cmd == CMD_NONE) { cmd = CMD_ENCODE; } + else { ret = main_help (); goto exit; } + break; +#else + XPR(NT "encoder support not compiled\n"); + return EXIT_FAILURE; +#endif + + case 'n': option_use_checksum = 0; break; + case 'N': option_no_compress = 1; break; + case 'C': option_smatch_config = my_optarg; break; + case 'J': option_no_output = 1; break; + case 'S': if (my_optarg == NULL) + { + option_use_secondary = 0; + option_secondary = NULL; + } + else + { + option_use_secondary = 1; + option_secondary = my_optarg; + } + break; + case 'A': if (my_optarg == NULL) { option_use_appheader = 0; } + else { option_appheader = (uint8_t*) my_optarg; } break; + case 'B': { + xoff_t bsize; + if ((ret = main_atoux (my_optarg, & bsize, + XD3_MINSRCWINSZ, XD3_MAXSRCWINSZ, 'B'))) + { + goto exit; + } + option_srcwinsz = bsize; + break; + } + case 'I': + if ((ret = main_atou (my_optarg, & option_iopt_size, 0, + 0, 'I'))) + { + goto exit; + } + break; + case 'P': + if ((ret = main_atou (my_optarg, & option_sprevsz, 0, + 0, 'P'))) + { + goto exit; + } + break; + case 'W': + if ((ret = main_atou (my_optarg, & option_winsize, XD3_ALLOCSIZE, + XD3_HARDMAXWINSIZE, 'W'))) + { + goto exit; + } + break; + case 'D': +#if EXTERNAL_COMPRESSION == 0 + if (option_verbose > 0) + { + XPR(NT "warning: -D option ignored, " + "external compression support was not compiled\n"); + } +#else + option_decompress_inputs = 0; +#endif + break; + case 'R': +#if EXTERNAL_COMPRESSION == 0 + if (option_verbose > 0) + { + XPR(NT "warning: -R option ignored, " + "external compression support was not compiled\n"); + } +#else + option_recompress_outputs = 0; +#endif + break; + case 's': + if (sfilename != NULL) + { + XPR(NT "specify only one source file\n"); + goto cleanup; + } + + sfilename = my_optarg; + break; + case 'm': + if ((merge = (main_merge*) + main_malloc (sizeof (main_merge))) == NULL) + { + goto cleanup; + } + main_merge_list_push_back (& merge_order, merge); + merge->filename = my_optarg; + break; + case 'V': + ret = main_version (); goto exit; + default: + ret = main_help (); goto exit; + } + } + + option_source_filename = sfilename; + + /* In case there were no arguments, set the default command. */ + if (cmd == CMD_NONE) { cmd = CMD_DEFAULT; } + + argc -= my_optind; + argv += my_optind; + + /* There may be up to two more arguments. */ + if (argc > 2) + { + XPR(NT "too many filenames: %s ...\n", argv[2]); + goto cleanup; + } + + ifile.flags = RD_FIRST | RD_MAININPUT; + sfile.flags = RD_FIRST; + sfile.filename = option_source_filename; + + /* The infile takes the next argument, if there is one. But if not, infile + * is set to stdin. */ + if (argc > 0) + { + ifile.filename = argv[0]; + + if ((ret = main_file_open (& ifile, ifile.filename, XO_READ))) + { + goto cleanup; + } + } + else + { + XSTDIN_XF (& ifile); + } + + /* The ofile takes the following argument, if there is one. But if not, it + * is left NULL until the application header is processed. It will be set + * in main_open_output. */ + if (argc > 1) + { + /* Check for conflicting arguments. */ + if (option_stdout && ! option_quiet) + { + XPR(NT "warning: -c option overrides output filename: %s\n", + argv[1]); + } + + if (! option_stdout) { ofile.filename = argv[1]; } + } + +#if VCDIFF_TOOLS + if (cmd == CMD_MERGE && + (ret = main_merge_arguments (&merge_order))) + { + goto cleanup; + } +#endif /* VCDIFF_TOOLS */ + + switch (cmd) + { + case CMD_PRINTHDR: + case CMD_PRINTHDRS: + case CMD_PRINTDELTA: +#if XD3_ENCODER + case CMD_ENCODE: + case CMD_RECODE: + case CMD_MERGE: +#endif + case CMD_DECODE: + ret = main_input (cmd, & ifile, & ofile, & sfile); + break; + +#if REGRESSION_TEST + case CMD_TEST: + main_config (); + ret = xd3_selftest (); + break; +#endif + + case CMD_CONFIG: + ret = main_config (); + break; + + default: + ret = main_help (); + break; + } + + if (0) + { + cleanup: + ret = EXIT_FAILURE; + exit: + (void)0; + } + +#if EXTERNAL_COMPRESSION + main_external_compression_cleanup (); +#endif + + main_file_cleanup (& ifile); + main_file_cleanup (& ofile); + main_file_cleanup (& sfile); + + while (! main_merge_list_empty (& merge_order)) + { + merge = main_merge_list_pop_front (& merge_order); + main_free (merge); + } + + main_free (free_argv); + main_free (free_value); + + main_cleanup (); + + fflush (stdout); + fflush (stderr); + return ret; +} + +static int +main_help (void) +{ + main_version(); + + /* Note: update wiki when command-line features change */ + XPR(NTR "usage: xdelta3 [command/options] [input [output]]\n"); + XPR(NTR "make patch:\n"); + XPR(NTR "\n"); + XPR(NTR " xdelta3.exe -e -s old_file new_file delta_file\n"); + XPR(NTR "\n"); + XPR(NTR "apply patch:\n"); + XPR(NTR "\n"); + XPR(NTR " xdelta3.exe -d -s old_file delta_file decoded_new_file\n"); + XPR(NTR "\n"); + XPR(NTR "special command names:\n"); + XPR(NTR " config prints xdelta3 configuration\n"); + XPR(NTR " decode decompress the input\n"); + XPR(NTR " encode compress the input%s\n", + XD3_ENCODER ? "" : " [Not compiled]"); +#if REGRESSION_TEST + XPR(NTR " test run the builtin tests\n"); +#endif +#if VCDIFF_TOOLS + XPR(NTR "special commands for VCDIFF inputs:\n"); + XPR(NTR " printdelta print information about the entire delta\n"); + XPR(NTR " printhdr print information about the first window\n"); + XPR(NTR " printhdrs print information about all windows\n"); + XPR(NTR " recode encode with new application/secondary settings\n"); + XPR(NTR " merge merge VCDIFF inputs (see below)\n"); +#endif + XPR(NTR "merge patches:\n"); + XPR(NTR "\n"); + XPR(NTR " xdelta3 merge -m 1.vcdiff -m 2.vcdiff 3.vcdiff merged.vcdiff\n"); + XPR(NTR "\n"); + XPR(NTR "standard options:\n"); + XPR(NTR " -0 .. -9 compression level\n"); + XPR(NTR " -c use stdout\n"); + XPR(NTR " -d decompress\n"); + XPR(NTR " -e compress%s\n", + XD3_ENCODER ? "" : " [Not compiled]"); + XPR(NTR " -f force (overwrite, ignore trailing garbage)\n"); +#if EXTERNAL_COMPRESSION + XPR(NTR " -F force the external-compression subprocess\n"); +#endif + XPR(NTR " -h show help\n"); + XPR(NTR " -q be quiet\n"); + XPR(NTR " -v be verbose (max 2)\n"); + XPR(NTR " -V show version\n"); + + XPR(NTR "memory options:\n"); + XPR(NTR " -B bytes source window size\n"); + XPR(NTR " -W bytes input window size\n"); + XPR(NTR " -P size compression duplicates window\n"); + XPR(NTR " -I size instruction buffer size (0 = unlimited)\n"); + + XPR(NTR "compression options:\n"); + XPR(NTR " -s source source file to copy from (if any)\n"); + XPR(NTR " -S [lzma|djw|fgk] enable/disable secondary compression\n"); + XPR(NTR " -N disable small string-matching compression\n"); + XPR(NTR " -D disable external decompression (encode/decode)\n"); + XPR(NTR " -R disable external recompression (decode)\n"); + XPR(NTR " -n disable checksum (encode/decode)\n"); + XPR(NTR " -C soft config (encode, undocumented)\n"); + XPR(NTR " -A [apphead] disable/provide application header (encode)\n"); + XPR(NTR " -J disable output (check/compute only)\n"); + XPR(NTR " -m arguments for \"merge\"\n"); + + XPR(NTR "the XDELTA environment variable may contain extra args:\n"); + XPR(NTR " XDELTA=\"-s source-x.y.tar.gz\" \\\n"); + XPR(NTR " tar --use-compress-program=xdelta3 \\\n"); + XPR(NTR " -cf target-x.z.tar.gz.vcdiff target-x.y\n"); + return EXIT_FAILURE; +} diff --git a/lib/xdelta3/xdelta3-merge.h b/lib/xdelta3/xdelta3-merge.h new file mode 100644 index 0000000..a093843 --- /dev/null +++ b/lib/xdelta3/xdelta3-merge.h @@ -0,0 +1,583 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef _XDELTA3_MERGE_H_ +#define _XDELTA3_MERGE_H_ + +int xd3_merge_inputs (xd3_stream *stream, + xd3_whole_state *source, + xd3_whole_state *input); + +static int +xd3_whole_state_init (xd3_stream *stream) +{ + XD3_ASSERT (stream->whole_target.adds == NULL); + XD3_ASSERT (stream->whole_target.inst == NULL); + XD3_ASSERT (stream->whole_target.wininfo == NULL); + XD3_ASSERT (stream->whole_target.length == 0); + + stream->whole_target.adds_alloc = XD3_ALLOCSIZE; + stream->whole_target.inst_alloc = XD3_ALLOCSIZE; + stream->whole_target.wininfo_alloc = XD3_ALLOCSIZE; + + if ((stream->whole_target.adds = (uint8_t*) + xd3_alloc (stream, stream->whole_target.adds_alloc, 1)) == NULL || + (stream->whole_target.inst = (xd3_winst*) + xd3_alloc (stream, stream->whole_target.inst_alloc, 1)) == NULL || + (stream->whole_target.wininfo = (xd3_wininfo*) + xd3_alloc (stream, stream->whole_target.wininfo_alloc, 1)) == NULL) + { + return ENOMEM; + } + return 0; +} + +static void +xd3_swap_whole_state (xd3_whole_state *a, + xd3_whole_state *b) +{ + xd3_whole_state tmp; + XD3_ASSERT (a->inst != NULL && a->adds != NULL); + XD3_ASSERT (b->inst != NULL && b->adds != NULL); + XD3_ASSERT (b->wininfo != NULL && b->wininfo != NULL); + memcpy (&tmp, a, sizeof (xd3_whole_state)); + memcpy (a, b, sizeof (xd3_whole_state)); + memcpy (b, &tmp, sizeof (xd3_whole_state)); +} + +static int +xd3_realloc_buffer (xd3_stream *stream, + usize_t current_units, + usize_t unit_size, + usize_t new_units, + usize_t *alloc_size, + void **alloc_ptr) +{ + usize_t needed; + usize_t new_alloc; + usize_t cur_size; + uint8_t *new_buf; + + needed = (current_units + new_units) * unit_size; + + if (needed <= *alloc_size) + { + return 0; + } + + cur_size = current_units * unit_size; + new_alloc = xd3_round_blksize (needed * 2, XD3_ALLOCSIZE); + + if ((new_buf = (uint8_t*) xd3_alloc (stream, new_alloc, 1)) == NULL) + { + return ENOMEM; + } + + if (cur_size != 0) + { + memcpy (new_buf, *alloc_ptr, cur_size); + } + + if (*alloc_ptr != NULL) + { + xd3_free (stream, *alloc_ptr); + } + + *alloc_size = new_alloc; + *alloc_ptr = new_buf; + + return 0; +} + +/* allocate one new output instruction */ +static int +xd3_whole_alloc_winst (xd3_stream *stream, + xd3_winst **winstp) +{ + int ret; + + if ((ret = xd3_realloc_buffer (stream, + stream->whole_target.instlen, + sizeof (xd3_winst), + 1, + & stream->whole_target.inst_alloc, + (void**) & stream->whole_target.inst))) + { + return ret; + } + + *winstp = &stream->whole_target.inst[stream->whole_target.instlen++]; + + return 0; +} + +static int +xd3_whole_alloc_adds (xd3_stream *stream, + usize_t count) +{ + return xd3_realloc_buffer (stream, + stream->whole_target.addslen, + 1, + count, + & stream->whole_target.adds_alloc, + (void**) & stream->whole_target.adds); +} + +static int +xd3_whole_alloc_wininfo (xd3_stream *stream, + xd3_wininfo **wininfop) +{ + int ret; + + if ((ret = xd3_realloc_buffer (stream, + stream->whole_target.wininfolen, + sizeof (xd3_wininfo), + 1, + & stream->whole_target.wininfo_alloc, + (void**) & stream->whole_target.wininfo))) + { + return ret; + } + + *wininfop = &stream->whole_target.wininfo[stream->whole_target.wininfolen++]; + + return 0; +} + +static int +xd3_whole_append_inst (xd3_stream *stream, + xd3_hinst *inst) +{ + int ret; + xd3_winst *winst; + + if ((ret = xd3_whole_alloc_winst (stream, &winst))) + { + return ret; + } + + winst->type = inst->type; + winst->mode = 0; + winst->size = inst->size; + winst->position = stream->whole_target.length; + stream->whole_target.length += inst->size; + + if (((inst->type == XD3_ADD) || (inst->type == XD3_RUN)) && + (ret = xd3_whole_alloc_adds (stream, + (inst->type == XD3_RUN ? 1 : inst->size)))) + { + return ret; + } + + switch (inst->type) + { + case XD3_RUN: + winst->addr = stream->whole_target.addslen; + stream->whole_target.adds[stream->whole_target.addslen++] = + *stream->data_sect.buf++; + break; + + case XD3_ADD: + winst->addr = stream->whole_target.addslen; + memcpy (stream->whole_target.adds + stream->whole_target.addslen, + stream->data_sect.buf, + inst->size); + stream->data_sect.buf += inst->size; + stream->whole_target.addslen += inst->size; + break; + + default: + if (inst->addr < stream->dec_cpylen) + { + winst->mode = SRCORTGT (stream->dec_win_ind); + winst->addr = stream->dec_cpyoff + inst->addr; + } + else + { + winst->addr = (stream->dec_winstart + + inst->addr - + stream->dec_cpylen); + } + break; + } + + return 0; +} + +int +xd3_whole_append_window (xd3_stream *stream) +{ + int ret; + xd3_wininfo *wininfo; + + if ((ret = xd3_whole_alloc_wininfo (stream, &wininfo))) { return ret; } + + wininfo->length = stream->dec_tgtlen; + wininfo->offset = stream->dec_winstart; + wininfo->adler32 = stream->dec_adler32; + + while (stream->inst_sect.buf < stream->inst_sect.buf_max) + { + if ((ret = xd3_decode_instruction (stream))) + { + return ret; + } + + if ((stream->dec_current1.type != XD3_NOOP) && + (ret = xd3_whole_append_inst (stream, + & stream->dec_current1))) + { + return ret; + } + + if ((stream->dec_current2.type != XD3_NOOP) && + (ret = xd3_whole_append_inst (stream, + & stream->dec_current2))) + { + return ret; + } + } + + return 0; +} + +/* xd3_merge_input_output applies *source to *stream, returns the + * result in stream. */ +static int xd3_merge_input_output (xd3_stream *stream, + xd3_whole_state *source) +{ + int ret; + xd3_stream tmp_stream; + memset (& tmp_stream, 0, sizeof (tmp_stream)); + if ((ret = xd3_config_stream (& tmp_stream, NULL)) || + (ret = xd3_whole_state_init (& tmp_stream)) || + (ret = xd3_merge_inputs (& tmp_stream, + source, + & stream->whole_target))) + { + XPR(NT XD3_LIB_ERRMSG (&tmp_stream, ret)); + return ret; + } + + /* the output is in tmp_stream.whole_state, swap into input */ + xd3_swap_whole_state (& stream->whole_target, + & tmp_stream.whole_target); + /* total allocation counts are preserved */ + xd3_free_stream (& tmp_stream); + return 0; +} + +static int +xd3_merge_run (xd3_stream *stream, + xd3_whole_state *target, + xd3_winst *iinst) +{ + int ret; + xd3_winst *oinst; + + if ((ret = xd3_whole_alloc_winst (stream, &oinst)) || + (ret = xd3_whole_alloc_adds (stream, 1))) + { + return ret; + } + + oinst->type = iinst->type; + oinst->mode = iinst->mode; + oinst->size = iinst->size; + oinst->addr = stream->whole_target.addslen; + + XD3_ASSERT (stream->whole_target.length == iinst->position); + oinst->position = stream->whole_target.length; + stream->whole_target.length += iinst->size; + + stream->whole_target.adds[stream->whole_target.addslen++] = + target->adds[iinst->addr]; + + return 0; +} + +static int +xd3_merge_add (xd3_stream *stream, + xd3_whole_state *target, + xd3_winst *iinst) +{ + int ret; + xd3_winst *oinst; + + if ((ret = xd3_whole_alloc_winst (stream, &oinst)) || + (ret = xd3_whole_alloc_adds (stream, iinst->size))) + { + return ret; + } + + oinst->type = iinst->type; + oinst->mode = iinst->mode; + oinst->size = iinst->size; + oinst->addr = stream->whole_target.addslen; + + XD3_ASSERT (stream->whole_target.length == iinst->position); + oinst->position = stream->whole_target.length; + stream->whole_target.length += iinst->size; + + memcpy(stream->whole_target.adds + stream->whole_target.addslen, + target->adds + iinst->addr, + iinst->size); + + stream->whole_target.addslen += iinst->size; + + return 0; +} + +static int +xd3_merge_target_copy (xd3_stream *stream, + xd3_winst *iinst) +{ + int ret; + xd3_winst *oinst; + + if ((ret = xd3_whole_alloc_winst (stream, &oinst))) + { + return ret; + } + + XD3_ASSERT (stream->whole_target.length == iinst->position); + + memcpy (oinst, iinst, sizeof (*oinst)); + return 0; +} + +static int +xd3_merge_find_position (xd3_stream *stream, + xd3_whole_state *source, + xoff_t address, + usize_t *inst_num) +{ + usize_t low; + usize_t high; + + if (address >= source->length) + { + stream->msg = "Invalid copy offset in merge"; + return XD3_INVALID_INPUT; + } + + low = 0; + high = source->instlen; + + while (low != high) + { + xoff_t mid_lpos; + xoff_t mid_hpos; + usize_t mid = low + (high - low) / 2; + mid_lpos = source->inst[mid].position; + + if (address < mid_lpos) + { + high = mid; + continue; + } + + mid_hpos = mid_lpos + source->inst[mid].size; + + if (address >= mid_hpos) + { + low = mid + 1; + continue; + } + + *inst_num = mid; + return 0; + } + + stream->msg = "Internal error in merge"; + return XD3_INTERNAL; +} + +static int +xd3_merge_source_copy (xd3_stream *stream, + xd3_whole_state *source, + const xd3_winst *iinst_orig) +{ + int ret; + xd3_winst iinst; + usize_t sinst_num; + + memcpy (& iinst, iinst_orig, sizeof (iinst)); + + XD3_ASSERT (iinst.mode == VCD_SOURCE); + + if ((ret = xd3_merge_find_position (stream, source, + iinst.addr, &sinst_num))) + { + return ret; + } + + while (iinst.size > 0) + { + xd3_winst *sinst; + xd3_winst *minst; + usize_t sinst_offset; + usize_t sinst_left; + usize_t this_take; + + XD3_ASSERT (sinst_num < source->instlen); + + sinst = &source->inst[sinst_num]; + + XD3_ASSERT (iinst.addr >= sinst->position); + + sinst_offset = (usize_t)(iinst.addr - sinst->position); + + XD3_ASSERT (sinst->size > sinst_offset); + + sinst_left = sinst->size - sinst_offset; + this_take = xd3_min (iinst.size, sinst_left); + + XD3_ASSERT (this_take > 0); + + if ((ret = xd3_whole_alloc_winst (stream, &minst))) + { + return ret; + } + + minst->size = this_take; + minst->type = sinst->type; + minst->position = iinst.position; + minst->mode = 0; + + switch (sinst->type) + { + case XD3_RUN: + if ((ret = xd3_whole_alloc_adds (stream, 1))) + { + return ret; + } + + minst->addr = stream->whole_target.addslen; + stream->whole_target.adds[stream->whole_target.addslen++] = + source->adds[sinst->addr]; + break; + case XD3_ADD: + if ((ret = xd3_whole_alloc_adds (stream, this_take))) + { + return ret; + } + + minst->addr = stream->whole_target.addslen; + memcpy(stream->whole_target.adds + stream->whole_target.addslen, + source->adds + sinst->addr + sinst_offset, + this_take); + stream->whole_target.addslen += this_take; + break; + default: + if (sinst->mode != 0) + { + minst->mode = sinst->mode; + minst->addr = sinst->addr + sinst_offset; + } + else + { + // Note: A better implementation will construct the + // mapping of output ranges, starting from the input + // range, applying deltas in forward order, using an + // interval tree. This code uses recursion to construct + // each copied range, recursively (using binary search + // in xd3_merge_find_position). + // + // TODO: This code can cause stack overflow. Fix as + // described above. + xd3_winst tinst; + tinst.type = XD3_CPY; + tinst.mode = iinst.mode; + tinst.addr = sinst->addr + sinst_offset; + tinst.size = this_take; + tinst.position = iinst.position; + + // The instruction allocated in this frame will not be used. + stream->whole_target.instlen -= 1; + + if ((ret = xd3_merge_source_copy (stream, source, &tinst))) + { + return ret; + } + } + break; + } + + iinst.position += this_take; + iinst.addr += this_take; + iinst.size -= this_take; + sinst_num += 1; + } + + return 0; +} + +/* xd3_merge_inputs() applies *input to *source, returns its result in + * stream. */ +int xd3_merge_inputs (xd3_stream *stream, + xd3_whole_state *source, + xd3_whole_state *input) +{ + int ret = 0; + usize_t i; + size_t input_i; + + for (i = 0; i < input->wininfolen; ++i) { + xd3_wininfo *copyinfo; + + if ((ret = xd3_whole_alloc_wininfo (stream, ©info))) { return ret; } + + *copyinfo = input->wininfo[i]; + } + + /* iterate over each instruction. */ + for (input_i = 0; ret == 0 && input_i < input->instlen; ++input_i) + { + xd3_winst *iinst = &input->inst[input_i]; + + switch (iinst->type) + { + case XD3_RUN: + ret = xd3_merge_run (stream, input, iinst); + break; + case XD3_ADD: + ret = xd3_merge_add (stream, input, iinst); + break; + default: + if (iinst->mode == 0) + { + ret = xd3_merge_target_copy (stream, iinst); + } + else if (iinst->mode == VCD_TARGET) + { + ret = XD3_INVALID_INPUT; + } + else + { + ret = xd3_merge_source_copy (stream, source, iinst); + } + + /* The whole_target.length is not updated in the xd3_merge*copy + * routine because of recursion in xd3_merge_source_copy. */ + stream->whole_target.length += iinst->size; + break; + } + } + + return ret; +} + +#endif diff --git a/lib/xdelta3/xdelta3-second.h b/lib/xdelta3/xdelta3-second.h new file mode 100644 index 0000000..8dc5b47 --- /dev/null +++ b/lib/xdelta3/xdelta3-second.h @@ -0,0 +1,321 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef _XDELTA3_SECOND_H_ +#define _XDELTA3_SECOND_H_ + +static inline void xd3_bit_state_encode_init (bit_state *bits) +{ + bits->cur_byte = 0; + bits->cur_mask = 1; +} + +static inline int xd3_decode_bits (xd3_stream *stream, + bit_state *bits, + const uint8_t **input, + const uint8_t *input_max, + usize_t nbits, + usize_t *valuep) +{ + usize_t value = 0; + usize_t vmask = 1 << nbits; + + if (bits->cur_mask == 0x100) { goto next_byte; } + + for (;;) + { + do + { + vmask >>= 1; + + if (bits->cur_byte & bits->cur_mask) + { + value |= vmask; + } + + bits->cur_mask <<= 1; + + if (vmask == 1) { goto done; } + } + while (bits->cur_mask != 0x100); + + next_byte: + + if (*input == input_max) + { + stream->msg = "secondary decoder end of input"; + return XD3_INTERNAL; + } + + bits->cur_byte = *(*input)++; + bits->cur_mask = 1; + } + + done: + + IF_DEBUG2 (DP(RINT "(d) %"W"u ", value)); + + (*valuep) = value; + return 0; +} + +#if REGRESSION_TEST +/* There may be extra bits at the end of secondary decompression, this macro + * checks for non-zero bits. This is overly strict, but helps pass the + * single-bit-error regression test. */ +static int +xd3_test_clean_bits (xd3_stream *stream, bit_state *bits) +{ + for (; bits->cur_mask != 0x100; bits->cur_mask <<= 1) + { + if (bits->cur_byte & bits->cur_mask) + { + stream->msg = "secondary decoder garbage"; + return XD3_INTERNAL; + } + } + + return 0; +} +#endif + +static int +xd3_get_secondary (xd3_stream *stream, xd3_sec_stream **sec_streamp, + int is_encode) +{ + if (*sec_streamp == NULL) + { + int ret; + + if ((*sec_streamp = stream->sec_type->alloc (stream)) == NULL) + { + stream->msg = "error initializing secondary stream"; + return XD3_INVALID; + } + + if ((ret = stream->sec_type->init (stream, *sec_streamp, is_encode)) != 0) + { + return ret; + } + } + + return 0; +} + +static int +xd3_decode_secondary (xd3_stream *stream, + xd3_desect *sect, + xd3_sec_stream **sec_streamp) +{ + usize_t dec_size; + uint8_t *out_used; + int ret; + + if ((ret = xd3_get_secondary (stream, sec_streamp, 0)) != 0) + { + return ret; + } + + /* Decode the size, allocate the buffer. */ + if ((ret = xd3_read_size (stream, & sect->buf, + sect->buf_max, & dec_size)) || + (ret = xd3_decode_allocate (stream, dec_size, + & sect->copied2, & sect->alloc2))) + { + return ret; + } + + if (dec_size == 0) + { + stream->msg = "secondary decoder invalid output size"; + return XD3_INVALID_INPUT; + } + + out_used = sect->copied2; + + if ((ret = stream->sec_type->decode (stream, *sec_streamp, + & sect->buf, sect->buf_max, + & out_used, out_used + dec_size))) + { + return ret; + } + + if (sect->buf != sect->buf_max) + { + stream->msg = "secondary decoder finished with unused input"; + return XD3_INTERNAL; + } + + if (out_used != sect->copied2 + dec_size) + { + stream->msg = "secondary decoder short output"; + return XD3_INTERNAL; + } + + sect->buf = sect->copied2; + sect->buf_max = sect->copied2 + dec_size; + sect->size = dec_size; + + return 0; +} + +#if XD3_ENCODER +static inline int xd3_encode_bit (xd3_stream *stream, + xd3_output **output, + bit_state *bits, + usize_t bit) +{ + int ret; + + if (bit) + { + bits->cur_byte |= bits->cur_mask; + } + + /* OPT: Might help to buffer more than 8 bits at once. */ + if (bits->cur_mask == 0x80) + { + if ((ret = xd3_emit_byte (stream, output, bits->cur_byte)) != 0) + { + return ret; + } + + bits->cur_mask = 1; + bits->cur_byte = 0; + } + else + { + bits->cur_mask <<= 1; + } + + return 0; +} + +static inline int xd3_flush_bits (xd3_stream *stream, + xd3_output **output, + bit_state *bits) +{ + return (bits->cur_mask == 1) ? 0 : + xd3_emit_byte (stream, output, bits->cur_byte); +} + +static inline int xd3_encode_bits (xd3_stream *stream, + xd3_output **output, + bit_state *bits, + usize_t nbits, + usize_t value) +{ + int ret; + usize_t mask = 1 << nbits; + + XD3_ASSERT (nbits > 0); + XD3_ASSERT (nbits < sizeof (usize_t) * 8); + XD3_ASSERT (value < mask); + + do + { + mask >>= 1; + + if ((ret = xd3_encode_bit (stream, output, bits, value & mask))) + { + return ret; + } + } + while (mask != 1); + + IF_DEBUG2 (DP(RINT "(e) %"W"u ", value)); + + return 0; +} + +static int +xd3_encode_secondary (xd3_stream *stream, + xd3_output **head, + xd3_output **tail, + xd3_sec_stream **sec_streamp, + xd3_sec_cfg *cfg, + int *did_it) +{ + xd3_output *tmp_head; + xd3_output *tmp_tail; + + usize_t comp_size; + usize_t orig_size; + + int ret; + + orig_size = xd3_sizeof_output (*head); + + if (orig_size < SECONDARY_MIN_INPUT) { return 0; } + + if ((ret = xd3_get_secondary (stream, sec_streamp, 1)) != 0) + { + return ret; + } + + tmp_head = xd3_alloc_output (stream, NULL); + + /* Encode the size, encode the data. Encoding the size makes it + * simpler, but is a little gross. Should not need the entire + * section in contiguous memory, but it is much easier this way. */ + if ((ret = xd3_emit_size (stream, & tmp_head, orig_size)) || + (ret = stream->sec_type->encode (stream, *sec_streamp, *head, + tmp_head, cfg))) + { + goto getout; + } + + /* If the secondary compressor determines it's no good, it returns + * XD3_NOSECOND. */ + + /* Setup tmp_tail, comp_size */ + tmp_tail = tmp_head; + comp_size = tmp_head->next; + + while (tmp_tail->next_page != NULL) + { + tmp_tail = tmp_tail->next_page; + comp_size += tmp_tail->next; + } + + XD3_ASSERT (comp_size == xd3_sizeof_output (tmp_head)); + XD3_ASSERT (tmp_tail != NULL); + + if (comp_size < (orig_size - SECONDARY_MIN_SAVINGS) || cfg->inefficient) + { + if (comp_size < orig_size) + { + IF_DEBUG1(DP(RINT "[encode_secondary] saved %"W"u bytes: %"W"u -> %"W"u (%0.2f%%)\n", + orig_size - comp_size, orig_size, comp_size, + 100.0 * (double) comp_size / (double) orig_size)); + } + + xd3_free_output (stream, *head); + + *head = tmp_head; + *tail = tmp_tail; + *did_it = 1; + } + else + { + getout: + if (ret == XD3_NOSECOND) { ret = 0; } + xd3_free_output (stream, tmp_head); + } + + return ret; +} +#endif /* XD3_ENCODER */ +#endif /* _XDELTA3_SECOND_H_ */ diff --git a/lib/xdelta3/xdelta3-test.h b/lib/xdelta3/xdelta3-test.h new file mode 100644 index 0000000..5d6cf45 --- /dev/null +++ b/lib/xdelta3/xdelta3-test.h @@ -0,0 +1,3022 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +static const uint32_t TEST_SEED1 = 5489UL; +#define MT_LEN 624 +#define MT_IA 397 +static const uint32_t UPPER_MASK = 0x80000000; +static const uint32_t LOWER_MASK = 0x7FFFFFFF; +static const uint32_t MATRIX_A = 0x9908B0DF; + +#ifndef SHELL_TESTS +#define SHELL_TESTS 1 +#endif + +typedef struct mtrand mtrand; + +struct mtrand { + int mt_index_; + uint32_t mt_buffer_[MT_LEN]; +}; + +int test_compare_files (const char* tgt, const char *rec); +void mt_init(mtrand *mt, uint32_t seed); +uint32_t mt_random (mtrand *mt); +int test_setup (void); + +/* The Mersenne Twister code used herein is code to Michael Brundage. Thanks! + * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html + */ +void mt_init(mtrand *mt, uint32_t seed) { + int i; + mt->mt_buffer_[0] = seed; + mt->mt_index_ = MT_LEN; + for (i = 1; i < MT_LEN; i++) { + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt->mt_buffer_[i] = + (1812433253UL * (mt->mt_buffer_[i-1] ^ + (mt->mt_buffer_[i-1] >> 30)) + i); + } +} + +uint32_t mt_random (mtrand *mt) { + uint32_t y; + unsigned long mag01[2]; + mag01[0] = 0; + mag01[1] = MATRIX_A; + + if (mt->mt_index_ >= MT_LEN) { + int kk; + + for (kk = 0; kk < MT_LEN - MT_IA; kk++) { + y = (mt->mt_buffer_[kk] & UPPER_MASK) | + (mt->mt_buffer_[kk + 1] & LOWER_MASK); + mt->mt_buffer_[kk] = mt->mt_buffer_[kk + MT_IA] ^ + (y >> 1) ^ mag01[y & 0x1UL]; + } + for (;kk < MT_LEN - 1; kk++) { + y = (mt->mt_buffer_[kk] & UPPER_MASK) | + (mt->mt_buffer_[kk + 1] & LOWER_MASK); + mt->mt_buffer_[kk] = mt->mt_buffer_[kk + (MT_IA - MT_LEN)] ^ + (y >> 1) ^ mag01[y & 0x1UL]; + } + y = (mt->mt_buffer_[MT_LEN - 1] & UPPER_MASK) | + (mt->mt_buffer_[0] & LOWER_MASK); + mt->mt_buffer_[MT_LEN - 1] = mt->mt_buffer_[MT_IA - 1] ^ + (y >> 1) ^ mag01[y & 0x1UL]; + mt->mt_index_ = 0; + } + + y = mt->mt_buffer_[mt->mt_index_++]; + + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +static mtrand static_mtrand; + +#include + +static uint32_t +mt_exp_rand (uint32_t mean, uint32_t max_value) +{ + double mean_d = mean; + double erand = log (1.0 / (mt_random (&static_mtrand) / + (double)UINT32_MAX)); + uint32_t x = (uint32_t) (mean_d * erand + 0.5); + + return xd3_min (x, max_value); +} + +#if SHELL_TESTS +#include +#endif + +#define MSG_IS(x) (stream->msg != NULL && strcmp ((x), stream->msg) == 0) + +static const usize_t TWO_MEGS_AND_DELTA = (3 << 20); +static const usize_t ADDR_CACHE_ROUNDS = 10000; + +static const usize_t TEST_FILE_MEAN = 16384; +static const double TEST_ADD_MEAN = 128; +static const double TEST_ADD_MAX = 512; +static const double TEST_ADD_RATIO = 0.1; +static const double TEST_EPSILON = 0.25; + +#define TESTBUFSIZE (1024 * 16) + +#define TESTFILESIZE (1024) + +static char TEST_TARGET_FILE[TESTFILESIZE]; +static char TEST_SOURCE_FILE[TESTFILESIZE]; +static char TEST_DELTA_FILE[TESTFILESIZE]; +static char TEST_RECON_FILE[TESTFILESIZE]; +static char TEST_RECON2_FILE[TESTFILESIZE]; +static char TEST_COPY_FILE[TESTFILESIZE]; +static char TEST_NOPERM_FILE[TESTFILESIZE]; + +#define CHECK(cond) \ + if (!(cond)) { \ + XPR(NT __FILE__":%d: check failure: " #cond, __LINE__); \ + abort(); } + +#if SHELL_TESTS +/* Use a fixed soft config so that test values are fixed. See also + * test_compress_text(). */ +static const char* test_softcfg_str = "-C9,3,4,8,2,36,70"; +#endif + +/*********************************************************************** + TEST HELPERS + ***********************************************************************/ + +static void DOT (void) { XPR(NTR "."); } +static int do_cmd (xd3_stream *stream, const char *buf) +{ + int ret; + if ((ret = system (buf)) != 0) + { + if (WIFEXITED (ret)) + { + stream->msg = "command exited non-zero"; + IF_DEBUG1 (XPR(NT "command was: %s\n", buf)); + } + else + { + stream->msg = "abnormal command termination"; + } + return ret; + } + return 0; +} + +static int do_fail (xd3_stream *stream, const char *buf) +{ + int ret; + ret = system (buf); + if (! WIFEXITED (ret) || WEXITSTATUS (ret) != 1) + { + stream->msg = "command should have not succeeded"; + XPR(NT "command was %s\n", buf); + return XD3_INTERNAL; + } + return 0; +} + +/* Test that the exponential distribution actually produces its mean. */ +static int +test_random_numbers (xd3_stream *stream, int ignore) +{ + usize_t i; + usize_t sum = 0; + usize_t mean = 50; + usize_t n_rounds = 1000000; + double average, error; + double allowed_error = 0.1; + + mt_init (& static_mtrand, 0x9f73f7fe); + + for (i = 0; i < n_rounds; i += 1) + { + sum += mt_exp_rand (mean, UINT32_MAX); + } + + average = (double) sum / (double) n_rounds; + error = average - (double) mean; + + if (error < allowed_error && error > -allowed_error) + { + return 0; + } + + /*XPR(NT "error is %f\n", error);*/ + stream->msg = "random distribution looks broken"; + return XD3_INTERNAL; +} + +static int +test_printf_xoff (xd3_stream *stream, int ignore) +{ + char buf[64]; + xoff_t x = XOFF_T_MAX; + snprintf_func (buf, sizeof(buf), "%"Q"u", x); + const char *expect = XD3_USE_LARGEFILE64 ? + "18446744073709551615" : "4294967295"; + if (strcmp (buf, expect) == 0) { + return 0; + } + return XD3_INTERNAL; +} + +static void +test_unlink (char* file) +{ + int ret; + if (file != NULL && *file != 0 && + (ret = unlink (file)) != 0 && errno != ENOENT) + { + XPR(NT "unlink %s failed: %s\n", file, strerror(ret)); + } +} + +static void +test_cleanup (void) +{ +#if 1 + test_unlink (TEST_TARGET_FILE); + test_unlink (TEST_SOURCE_FILE); + test_unlink (TEST_DELTA_FILE); + test_unlink (TEST_RECON_FILE); + test_unlink (TEST_RECON2_FILE); + test_unlink (TEST_COPY_FILE); + test_unlink (TEST_NOPERM_FILE); +#endif +} + +int test_setup (void) +{ + static int x = 0; + pid_t pid = getpid(); + x++; + + test_cleanup(); + + snprintf_func (TEST_TARGET_FILE, TESTFILESIZE, + "/tmp/xdtest.%d.target.%d", pid, x); + snprintf_func (TEST_SOURCE_FILE, TESTFILESIZE, + "/tmp/xdtest.%d.source.%d", pid, x); + snprintf_func (TEST_DELTA_FILE, TESTFILESIZE, + "/tmp/xdtest.%d.delta.%d", pid, x); + snprintf_func (TEST_RECON_FILE, TESTFILESIZE, + "/tmp/xdtest.%d.recon.%d", pid, x); + snprintf_func (TEST_RECON2_FILE, TESTFILESIZE, + "/tmp/xdtest.%d.recon2.%d", pid, x); + snprintf_func (TEST_COPY_FILE, TESTFILESIZE, + "/tmp/xdtest.%d.copy.%d", pid, x); + snprintf_func (TEST_NOPERM_FILE, TESTFILESIZE, + "/tmp/xdtest.%d.noperm.%d", pid, x); + + test_cleanup(); + return 0; +} + +static int +test_make_inputs (xd3_stream *stream, xoff_t *ss_out, xoff_t *ts_out) +{ + usize_t ts = (mt_random (&static_mtrand) % TEST_FILE_MEAN) + + TEST_FILE_MEAN / 2; + usize_t ss = (mt_random (&static_mtrand) % TEST_FILE_MEAN) + + TEST_FILE_MEAN / 2; + uint8_t *buf = (uint8_t*) malloc (ts + ss), *sbuf = buf, *tbuf = buf + ss; + usize_t sadd = 0, sadd_max = (usize_t)(ss * TEST_ADD_RATIO); + FILE *tf = NULL, *sf = NULL; + usize_t i, j; + int ret; + + if (buf == NULL) { return ENOMEM; } + + if ((tf = fopen (TEST_TARGET_FILE, "w")) == NULL || + (ss_out != NULL && (sf = fopen (TEST_SOURCE_FILE, "w")) == NULL)) + { + stream->msg = "write failed"; + ret = get_errno (); + goto failure; + } + + if (ss_out != NULL) + { + for (i = 0; i < ss; ) + { + sbuf[i++] = (uint8_t) mt_random (&static_mtrand); + } + } + + /* Then modify the data to produce copies, everything not copied is + * an add. The following logic produces the TEST_ADD_RATIO. The + * variable SADD contains the number of adds so far, which should + * not exceed SADD_MAX. */ + + /* XPR(NT "ss = %u ts = %u\n", ss, ts); */ + for (i = 0; i < ts; ) + { + usize_t left = ts - i; + usize_t next = mt_exp_rand ((uint32_t) TEST_ADD_MEAN, + (uint32_t) TEST_ADD_MAX); + usize_t add_left = sadd_max - sadd; + double add_prob = (left == 0) ? 0 : (add_left / (double) left); + int do_copy; + + next = xd3_min (left, next); + do_copy = (next > add_left || + (mt_random (&static_mtrand) / \ + (double)USIZE_T_MAX) >= add_prob); + + if (ss_out == NULL) + { + do_copy &= (i > 0); + } + else + { + do_copy &= (ss - next) > 0; + } + + if (do_copy) + { + /* Copy */ + size_t offset = mt_random (&static_mtrand) % ((ss_out == NULL) ? + i : + (ss - next)); + /* XPR(NT "[%u] copy %u at %u ", i, next, offset); */ + + for (j = 0; j < next; j += 1) + { + char c = ((ss_out == NULL) ? tbuf : sbuf)[offset + j]; + /* XPR(NT "%x%x", (c >> 4) & 0xf, c & 0xf); */ + tbuf[i++] = c; + } + /* XPR(NT "\n"); */ + } + else + { + /* Add */ + /* XPR(NT "[%u] add %u ", i, next); */ + for (j = 0; j < next; j += 1) + { + char c = (char) mt_random (&static_mtrand); + /* XPR(NT "%x%x", (c >> 4) & 0xf, c & 0xf); */ + tbuf[i++] = c; + } + /* XPR(NT "\n"); */ + sadd += next; + } + } + + /* XPR(NT "sadd = %u max = %u\n", sadd, sadd_max); */ + + if ((fwrite (tbuf, 1, ts, tf) != ts) || + (ss_out != NULL && (fwrite (sbuf, 1, ss, sf) != ss))) + { + stream->msg = "write failed"; + ret = get_errno (); + goto failure; + } + + if ((ret = fclose (tf)) || (ss_out != NULL && (ret = fclose (sf)))) + { + stream->msg = "close failed"; + ret = get_errno (); + goto failure; + } + + if (ts_out) { (*ts_out) = ts; } + if (ss_out) { (*ss_out) = ss; } + + failure: + free (buf); + return ret; +} + +int +test_compare_files (const char* tgt, const char *rec) +{ + FILE *orig, *recons; + static uint8_t obuf[TESTBUFSIZE], rbuf[TESTBUFSIZE]; + xoff_t offset = 0; + size_t i; + size_t oc, rc; + xoff_t diffs = 0; + + if ((orig = fopen (tgt, "r")) == NULL) + { + XPR(NT "open %s failed\n", tgt); + return get_errno (); + } + + if ((recons = fopen (rec, "r")) == NULL) + { + XPR(NT "open %s failed\n", rec); + return get_errno (); + } + + for (;;) + { + oc = fread (obuf, 1, TESTBUFSIZE, orig); + rc = fread (rbuf, 1, TESTBUFSIZE, recons); + + if (oc != rc) + { + return XD3_INTERNAL; + } + + if (oc == 0) + { + break; + } + + for (i = 0; i < oc; i += 1) + { + if (obuf[i] != rbuf[i]) + { + XPR(NT "byte %u (read %u @ %"Q"u) %d != %d\n", + (int)i, (int)oc, offset, obuf[i], rbuf[i]); + diffs++; + return XD3_INTERNAL; + } + } + + offset += oc; + } + + fclose (orig); + fclose (recons); + if (diffs != 0) + { + return XD3_INTERNAL; + } + return 0; +} + +static int +test_copy_to (const char *from, const char *to) +{ + char buf[TESTBUFSIZE]; + int ret; + + snprintf_func (buf, TESTBUFSIZE, "cp -f %s %s", from, to); + + if ((ret = system (buf)) != 0) + { + return XD3_INTERNAL; + } + + return 0; +} + +static int +test_save_copy (const char *origname) +{ + return test_copy_to(origname, TEST_COPY_FILE); +} + +static int +test_file_size (const char* file, xoff_t *size) +{ + struct stat sbuf; + int ret; + (*size) = 0; + + if (stat (file, & sbuf) < 0) + { + ret = get_errno (); + XPR(NT "stat failed: %s: %s\n", file, strerror (ret)); + return ret; + } + + if (! S_ISREG (sbuf.st_mode)) + { + ret = XD3_INTERNAL; + XPR(NT "not a regular file: %s: %s\n", file, strerror (ret)); + return ret; + } + + (*size) = sbuf.st_size; + return 0; +} + +/*********************************************************************** + READ OFFSET + ***********************************************************************/ + +/* Common test for read_integer errors: encodes a 64-bit value and + * then attempts to read as a 32-bit value. If TRUNC is non-zero, + * attempts to get errors by shortening the input, otherwise it should + * overflow. Expects XD3_INTERNAL and MSG. */ +static int +test_read_integer_error (xd3_stream *stream, usize_t trunto, const char *msg) +{ + uint64_t eval = 1ULL << 34; + uint32_t rval; + xd3_output *buf = NULL; + const uint8_t *max; + const uint8_t *inp; + int ret; + + buf = xd3_alloc_output (stream, buf); + + if ((ret = xd3_emit_uint64_t (stream, & buf, eval))) + { + goto fail; + } + + again: + + inp = buf->base; + max = buf->base + buf->next - trunto; + + if ((ret = xd3_read_uint32_t (stream, & inp, max, & rval)) != + XD3_INVALID_INPUT || + !MSG_IS (msg)) + { + ret = XD3_INTERNAL; + } + else if (trunto && trunto < buf->next) + { + trunto += 1; + goto again; + } + else + { + ret = 0; + } + + fail: + xd3_free_output (stream, buf); + return ret; +} + +/* Test integer overflow using the above routine. */ +static int +test_decode_integer_overflow (xd3_stream *stream, int unused) +{ + return test_read_integer_error (stream, 0, "overflow in read_intger"); +} + +/* Test integer EOI using the above routine. */ +static int +test_decode_integer_end_of_input (xd3_stream *stream, int unused) +{ + return test_read_integer_error (stream, 1, "end-of-input in read_integer"); +} + +/* Test that emit_integer/decode_integer/sizeof_integer/read_integer + * work on correct inputs. Tests powers of (2^7), plus or minus, up + * to the maximum value. */ +#define TEST_ENCODE_DECODE_INTEGER(TYPE,ONE,MAX) \ + xd3_output *rbuf = NULL; \ + xd3_output *dbuf = NULL; \ + TYPE values[64]; \ + usize_t nvalues = 0; \ + usize_t i; \ + int ret = 0; \ + \ + for (i = 0; i < (sizeof (TYPE) * 8); i += 7) \ + { \ + values[nvalues++] = (ONE << i) - ONE; \ + values[nvalues++] = (ONE << i); \ + values[nvalues++] = (ONE << i) + ONE; \ + } \ + \ + values[nvalues++] = MAX-ONE; \ + values[nvalues++] = MAX; \ + \ + rbuf = xd3_alloc_output (stream, rbuf); \ + dbuf = xd3_alloc_output (stream, dbuf); \ + \ + for (i = 0; i < nvalues; i += 1) \ + { \ + const uint8_t *max; \ + const uint8_t *inp; \ + TYPE val; \ + \ + DOT (); \ + rbuf->next = 0; \ + \ + if ((ret = xd3_emit_ ## TYPE (stream, & rbuf, values[i])) || \ + (ret = xd3_emit_ ## TYPE (stream, & dbuf, values[i]))) \ + { \ + goto fail; \ + } \ + \ + inp = rbuf->base; \ + max = rbuf->base + rbuf->next; \ + \ + if (rbuf->next != xd3_sizeof_ ## TYPE (values[i])) \ + { \ + ret = XD3_INTERNAL; \ + goto fail; \ + } \ + \ + if ((ret = xd3_read_ ## TYPE (stream, & inp, max, & val))) \ + { \ + goto fail; \ + } \ + \ + if (val != values[i]) \ + { \ + ret = XD3_INTERNAL; \ + goto fail; \ + } \ + \ + DOT (); \ + } \ + \ + stream->next_in = dbuf->base; \ + stream->avail_in = dbuf->next; \ + \ + for (i = 0; i < nvalues; i += 1) \ + { \ + TYPE val; \ + \ + if ((ret = xd3_decode_ ## TYPE (stream, & val))) \ + { \ + goto fail; \ + } \ + \ + if (val != values[i]) \ + { \ + ret = XD3_INTERNAL; \ + goto fail; \ + } \ + } \ + \ + if (stream->avail_in != 0) \ + { \ + ret = XD3_INTERNAL; \ + goto fail; \ + } \ + \ + fail: \ + xd3_free_output (stream, rbuf); \ + xd3_free_output (stream, dbuf); \ + \ + return ret + +static int +test_encode_decode_uint32_t (xd3_stream *stream, int unused) +{ + TEST_ENCODE_DECODE_INTEGER(uint32_t,1U,UINT32_MAX); +} + +static int +test_encode_decode_uint64_t (xd3_stream *stream, int unused) +{ + TEST_ENCODE_DECODE_INTEGER(uint64_t,1ULL,UINT64_MAX); +} + +static int +test_usize_t_overflow (xd3_stream *stream, int unused) +{ + if (USIZE_T_OVERFLOW (USIZE_T_MAX, 0)) { goto fail; } + if (USIZE_T_OVERFLOW (0, USIZE_T_MAX)) { goto fail; } + if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2)) { goto fail; } + if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2 + 1)) { goto fail; } + + if (! USIZE_T_OVERFLOW (USIZE_T_MAX, 1)) { goto fail; } + if (! USIZE_T_OVERFLOW (1, USIZE_T_MAX)) { goto fail; } + if (! USIZE_T_OVERFLOW (USIZE_T_MAX / 2 + 1, USIZE_T_MAX / 2 + 1)) { goto fail; } + + return 0; + + fail: + stream->msg = "incorrect overflow computation"; + return XD3_INTERNAL; +} + +static int +test_forward_match (xd3_stream *stream, int unused) +{ + usize_t i; + uint8_t buf1[256], buf2[256]; + + memset(buf1, 0, 256); + memset(buf2, 0, 256); + + for (i = 0; i < 256; i++) + { + CHECK(xd3_forward_match(buf1, buf2, i) == i); + } + + for (i = 0; i < 255; i++) + { + buf2[i] = 1; + CHECK(xd3_forward_match(buf1, buf2, 256) == i); + buf2[i] = 0; + } + + return 0; +} + +/*********************************************************************** + Address cache + ***********************************************************************/ + +static int +test_address_cache (xd3_stream *stream, int unused) +{ + int ret; + usize_t i; + usize_t offset; + usize_t *addrs; + uint8_t *big_buf, *buf_max; + const uint8_t *buf; + xd3_output *outp; + uint8_t *modes; + int mode_counts[16]; + + stream->acache.s_near = stream->code_table_desc->near_modes; + stream->acache.s_same = stream->code_table_desc->same_modes; + + if ((ret = xd3_encode_init_partial (stream))) { return ret; } + + addrs = (usize_t*) xd3_alloc (stream, sizeof (usize_t), ADDR_CACHE_ROUNDS); + modes = (uint8_t*) xd3_alloc (stream, sizeof (uint8_t), ADDR_CACHE_ROUNDS); + + memset (mode_counts, 0, sizeof (mode_counts)); + memset (modes, 0, ADDR_CACHE_ROUNDS); + + addrs[0] = 0; + + mt_init (& static_mtrand, 0x9f73f7fc); + + /* First pass: encode addresses */ + xd3_init_cache (& stream->acache); + + for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1) + { + double p; + usize_t addr; + usize_t prev_i; + usize_t nearby; + + p = (mt_random (&static_mtrand) / (double)UINT32_MAX); + prev_i = mt_random (&static_mtrand) % offset; + nearby = (mt_random (&static_mtrand) % 256) % offset; + nearby = xd3_max (1U, nearby); + + if (p < 0.1) { addr = addrs[offset-nearby]; } + else if (p < 0.4) { addr = xd3_min (addrs[prev_i] + nearby, offset-1); } + else { addr = prev_i; } + + if ((ret = xd3_encode_address (stream, addr, offset, & modes[offset]))) { return ret; } + + addrs[offset] = addr; + mode_counts[modes[offset]] += 1; + } + + /* Copy addresses into a contiguous buffer. */ + big_buf = (uint8_t*) xd3_alloc (stream, xd3_sizeof_output (ADDR_HEAD (stream)), 1); + + for (offset = 0, outp = ADDR_HEAD (stream); outp != NULL; offset += outp->next, outp = outp->next_page) + { + memcpy (big_buf + offset, outp->base, outp->next); + } + + buf_max = big_buf + offset; + buf = big_buf; + + /* Second pass: decode addresses */ + xd3_init_cache (& stream->acache); + + for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1) + { + usize_t addr; + + if ((ret = xd3_decode_address (stream, offset, modes[offset], + & buf, buf_max, & addr))) + { + return ret; + } + + if (addr != addrs[offset]) + { + stream->msg = "incorrect decoded address"; + return XD3_INTERNAL; + } + } + + /* Check that every byte, mode was used. */ + if (buf != buf_max) + { + stream->msg = "address bytes not used"; + return XD3_INTERNAL; + } + + for (i = 0; i < (2 + stream->acache.s_same + stream->acache.s_near); i += 1) + { + if (mode_counts[i] == 0) + { + stream->msg = "address mode not used"; + return XD3_INTERNAL; + } + } + + xd3_free (stream, modes); + xd3_free (stream, addrs); + xd3_free (stream, big_buf); + + return 0; +} + +/*********************************************************************** + Encode and decode with single bit error + ***********************************************************************/ + +/* It compresses from 256 to around 185 bytes. + * Avoids matching addresses that are a single-bit difference. + * Avoids matching address 0. */ +static const uint8_t test_text[] = +"this is a story\n" +"abouttttttttttt\n" +"- his is a stor\n" +"- about nothing " +" all. boutique -" +"his story is a -" +"about " +"what happens all" +" the time what -" +"am I ttttttt the" +" person said, so" +" what, per son -" +" gory story is -" +" about nothing -" +"tttttt to test -" +"his sto nothing"; + +static const uint8_t test_apphead[] = "header test"; + +static int +test_compress_text (xd3_stream *stream, + uint8_t *encoded, + usize_t *encoded_size) +{ + int ret; + xd3_config cfg; + int oflags = stream->flags; + int flags = stream->flags | XD3_FLUSH; + + xd3_free_stream (stream); + xd3_init_config (& cfg, flags); + + /* This configuration is fixed so that the "expected non-error" the counts in + * decompress_single_bit_errors are too. See test_coftcfg_str. */ + cfg.smatch_cfg = XD3_SMATCH_SOFT; + cfg.smatcher_soft.name = "test"; + cfg.smatcher_soft.large_look = 64; /* no source, not used */ + cfg.smatcher_soft.large_step = 64; /* no source, not used */ + cfg.smatcher_soft.small_look = 4; + cfg.smatcher_soft.small_chain = 128; + cfg.smatcher_soft.small_lchain = 16; + cfg.smatcher_soft.max_lazy = 8; + cfg.smatcher_soft.long_enough = 128; + + xd3_config_stream (stream, & cfg); + + (*encoded_size) = 0; + + xd3_set_appheader (stream, test_apphead, + (usize_t) strlen ((char*) test_apphead)); + + if ((ret = xd3_encode_stream (stream, test_text, sizeof (test_text), + encoded, encoded_size, 4*sizeof (test_text)))) { goto fail; } + + if ((ret = xd3_close_stream (stream))) { goto fail; } + + fail: + xd3_free_stream (stream); + xd3_init_config (& cfg, oflags); + xd3_config_stream (stream, & cfg); + return ret; +} + +static int +test_decompress_text (xd3_stream *stream, uint8_t *enc, usize_t enc_size, usize_t test_desize) +{ + xd3_config cfg; + char decoded[sizeof (test_text)]; + uint8_t *apphead; + usize_t apphead_size; + usize_t decoded_size; + const char *msg; + int ret; + usize_t pos = 0; + int flags = stream->flags; + usize_t take; + + input: + /* Test decoding test_desize input bytes at a time */ + take = xd3_min (enc_size - pos, test_desize); + CHECK(take > 0); + + xd3_avail_input (stream, enc + pos, take); + again: + ret = xd3_decode_input (stream); + + pos += take; + take = 0; + + switch (ret) + { + case XD3_OUTPUT: + break; + case XD3_WINSTART: + case XD3_GOTHEADER: + goto again; + case XD3_INPUT: + if (pos < enc_size) { goto input; } + /* else fallthrough */ + case XD3_WINFINISH: + default: + goto fail; + } + + CHECK(ret == XD3_OUTPUT); + CHECK(pos == enc_size); + + if (stream->avail_out != sizeof (test_text)) + { + stream->msg = "incorrect output size"; + ret = XD3_INTERNAL; + goto fail; + } + + decoded_size = stream->avail_out; + memcpy (decoded, stream->next_out, stream->avail_out); + + xd3_consume_output (stream); + + if ((ret = xd3_get_appheader (stream, & apphead, & apphead_size))) { goto fail; } + + if (apphead_size != strlen ((char*) test_apphead) || + memcmp (apphead, test_apphead, strlen ((char*) test_apphead)) != 0) + { + stream->msg = "incorrect appheader"; + ret = XD3_INTERNAL; + goto fail; + } + + if ((ret = xd3_decode_input (stream)) != XD3_WINFINISH || + (ret = xd3_close_stream (stream)) != 0) + { + goto fail; + } + + if (decoded_size != sizeof (test_text) || + memcmp (decoded, test_text, sizeof (test_text)) != 0) + { + stream->msg = "incorrect output text"; + ret = EIO; + } + + fail: + msg = stream->msg; + xd3_free_stream (stream); + xd3_init_config (& cfg, flags); + xd3_config_stream (stream, & cfg); + stream->msg = msg; + + return ret; +} + +static int +test_decompress_single_bit_error (xd3_stream *stream, int expected_non_failures) +{ + int ret; + usize_t i; + uint8_t encoded[4*sizeof (test_text)]; /* make room for alt code table */ + usize_t encoded_size; + int non_failures = 0; + int cksum = (stream->flags & XD3_ADLER32) != 0; + +//#define DEBUG_TEST_FAILURES +#ifndef DEBUG_TEST_FAILURES +#define TEST_FAILURES() +#else + /* For checking non-failure cases by hand, enable this macro and run + * xdelta printdelta with print_cpymode disabled. Every non-failure + * should change a copy address mode, which doesn't cause a failure + * because the address cache starts out with all zeros. + + ./xdelta3 test + for i in test_text.xz.*; do ./xdelta3 printdelta $i > $i.out; + diff $i.out test_text.xz.0.out; done + + */ + system ("rm -rf test_text.*"); + { + char buf[TESTBUFSIZE]; + FILE *f; + snprintf_func (buf, TESTBUFSIZE, "test_text"); + f = fopen (buf, "w"); + fwrite (test_text,1,sizeof (test_text),f); + fclose (f); + } +#define TEST_FAILURES() \ + do { \ + char buf[TESTBUFSIZE]; \ + FILE *f; \ + snprintf_func (buf, TESTBUFSIZE, "test_text.xz.%d", non_failures); \ + f = fopen (buf, "w"); \ + fwrite (encoded,1,encoded_size,f); \ + fclose (f); \ + } while (0) +#endif + + stream->sec_data.inefficient = 1; + stream->sec_inst.inefficient = 1; + stream->sec_addr.inefficient = 1; + + /* Encode text, test correct input */ + if ((ret = test_compress_text (stream, encoded, & encoded_size))) + { + /*stream->msg = "without error: encode failure";*/ + return ret; + } + + if ((ret = test_decompress_text (stream, encoded, encoded_size, + sizeof (test_text) / 4))) + { + /*stream->msg = "without error: decode failure";*/ + return ret; + } + + TEST_FAILURES(); + + for (i = 0; i < encoded_size*8; i += 1) + { + /* Single bit error. */ + encoded[i/8] ^= 1 << (i%8); + + if ((ret = test_decompress_text (stream, encoded, + encoded_size, sizeof (test_text))) == 0) + { + non_failures += 1; +#ifdef DEBUG_TEST_FAILURES + XPR(NT "%u[%u] non-failure %u\n", i/8, i%8, non_failures); +#endif + TEST_FAILURES(); + } + else + { + /*XPR(NT "%u[%u] failure: %s\n", i/8, i%8, stream->msg);*/ + } + + /* decompress_text returns EIO when the final memcmp() fails, but that + * should never happen with checksumming on. */ + if (cksum && ret == EIO) + { + /*XPR(NT "%u[%u] cksum mismatch\n", i/8, i%8);*/ + stream->msg = "checksum mismatch"; + return XD3_INTERNAL; + } + + /* Undo single bit error. */ + encoded[i/8] ^= 1 << (i%8); + } + + /* Test correct input again */ + if ((ret = test_decompress_text (stream, encoded, encoded_size, 1))) + { + /*stream->msg = "without error: decode failure";*/ + return ret; + } + + /* Check expected non-failures */ + if (non_failures > expected_non_failures) + { + XPR(NT "non-failures %u > expected %u", + non_failures, expected_non_failures); + stream->msg = "incorrect"; + return XD3_INTERNAL; + } + + DOT (); + + return 0; +} + +/*********************************************************************** + Secondary compression tests + ***********************************************************************/ + +#if SECONDARY_ANY +typedef int (*sec_dist_func) (xd3_stream *stream, xd3_output *data); + +static int sec_dist_func1 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func2 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func3 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func4 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func5 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func6 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func7 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func8 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func9 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func10 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func11 (xd3_stream *stream, xd3_output *data); + +static sec_dist_func sec_dists[] = +{ + sec_dist_func1, + sec_dist_func2, + sec_dist_func3, + sec_dist_func4, + sec_dist_func5, + sec_dist_func6, + sec_dist_func7, + sec_dist_func8, + sec_dist_func9, + sec_dist_func10, + sec_dist_func11, +}; + +/* Test ditsribution: 100 bytes of the same character (13). */ +static int +sec_dist_func1 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + for (i = 0; i < 100; i += 1) + { + if ((ret = xd3_emit_byte (stream, & data, 13))) { return ret; } + } + return 0; +} + +/* Test ditsribution: uniform covering half the alphabet. */ +static int +sec_dist_func2 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if ((ret = xd3_emit_byte (stream, & data, i%(ALPHABET_SIZE/2)))) { return ret; } + } + return 0; +} + +/* Test ditsribution: uniform covering the entire alphabet. */ +static int +sec_dist_func3 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if ((ret = xd3_emit_byte (stream, & data, i%ALPHABET_SIZE))) { return ret; } + } + return 0; +} + +/* Test distribution: An exponential distribution covering half the alphabet */ +static int +sec_dist_func4 (xd3_stream *stream, xd3_output *data) +{ + int i, ret, x; + for (i = 0; i < ALPHABET_SIZE*20; i += 1) + { + x = mt_exp_rand (10, ALPHABET_SIZE/2); + if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } + } + return 0; +} + +/* Test distribution: An exponential distribution covering the entire alphabet */ +static int +sec_dist_func5 (xd3_stream *stream, xd3_output *data) +{ + int i, ret, x; + for (i = 0; i < ALPHABET_SIZE*20; i += 1) + { + x = mt_exp_rand (10, ALPHABET_SIZE-1); + if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } + } + return 0; +} + +/* Test distribution: An uniform random distribution covering half the alphabet */ +static int +sec_dist_func6 (xd3_stream *stream, xd3_output *data) +{ + int i, ret, x; + for (i = 0; i < ALPHABET_SIZE*20; i += 1) + { + x = mt_random (&static_mtrand) % (ALPHABET_SIZE/2); + if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } + } + return 0; +} + +/* Test distribution: An uniform random distribution covering the entire alphabet */ +static int +sec_dist_func7 (xd3_stream *stream, xd3_output *data) +{ + int i, ret, x; + for (i = 0; i < ALPHABET_SIZE*200; i += 1) + { + x = mt_random (&static_mtrand) % ALPHABET_SIZE; + if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } + } + return 0; +} + +/* Test distribution: A small number of frequent characters, difficult + * to divide into many groups */ +static int +sec_dist_func8 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + for (i = 0; i < ALPHABET_SIZE*5; i += 1) + { + if ((ret = xd3_emit_byte (stream, & data, 0))) { return ret; } + if ((ret = xd3_emit_byte (stream, & data, 64))) { return ret; } + if ((ret = xd3_emit_byte (stream, & data, 128))) { return ret; } + if ((ret = xd3_emit_byte (stream, & data, 255))) { return ret; } + } + return 0; +} + +/* Test distribution: One that causes many FGK block promotions (found a bug) */ +static int +sec_dist_func9 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + + int ramp = 0; + int rcount = 0; + int prom = 0; + int pcount = 0; + + /* 200 was long enough to trigger it--only when stricter checking + * that counted all blocks was turned on, but it seems I deleted + * this code. (missing fgk_free_block on line 398). */ + for (i = 0; i < ALPHABET_SIZE*200; i += 1) + { + repeat: + if (ramp < ALPHABET_SIZE) + { + /* Initially Nth symbol has (N+1) frequency */ + if (rcount <= ramp) + { + rcount += 1; + if ((ret = xd3_emit_byte (stream, & data, ramp))) { return ret; } + continue; + } + + ramp += 1; + rcount = 0; + goto repeat; + } + + /* Thereafter, promote least freq to max freq */ + if (pcount == ALPHABET_SIZE) + { + pcount = 0; + prom = (prom + 1) % ALPHABET_SIZE; + } + + pcount += 1; + if ((ret = xd3_emit_byte (stream, & data, prom))) { return ret; } + } + + return 0; +} + +/* Test distribution: freq[i] == i*i, creates a 21-bit code length, fixed in 3.0r. */ +static int +sec_dist_func10 (xd3_stream *stream, xd3_output *data) +{ + int i, j, ret; + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + for (j = 0; j <= (i*i); j += 1) + { + if ((ret = xd3_emit_byte (stream, & data, i))) { return ret; } + } + } + return 0; +} + +/* Test distribution: fibonacci */ +static int +sec_dist_func11 (xd3_stream *stream, xd3_output *data) +{ + int sum0 = 0; + int sum1 = 1; + int i, j, ret; + for (i = 0; i < 33; ++i) + { + for (j = 0; j < (sum0 + sum1); ++j) + { + if ((ret = xd3_emit_byte (stream, & data, i))) { return ret; } + } + sum0 = sum1; + sum1 = j; + } + return 0; +} + +static int +test_secondary_decode (xd3_stream *stream, + const xd3_sec_type *sec, + usize_t input_size, + usize_t compress_size, + const uint8_t *dec_input, + const uint8_t *dec_correct, + uint8_t *dec_output) +{ + int ret; + xd3_sec_stream *dec_stream; + const uint8_t *dec_input_used, *dec_input_end; + uint8_t *dec_output_used, *dec_output_end; + + if ((dec_stream = sec->alloc (stream)) == NULL) { return ENOMEM; } + + if ((ret = sec->init (stream, dec_stream, 0)) != 0) { goto fail; } + + dec_input_used = dec_input; + dec_input_end = dec_input + compress_size; + + dec_output_used = dec_output; + dec_output_end = dec_output + input_size; + + if ((ret = sec->decode (stream, dec_stream, + & dec_input_used, dec_input_end, + & dec_output_used, dec_output_end))) + { + goto fail; + } + + if (dec_input_used != dec_input_end) + { + stream->msg = "unused input"; + ret = XD3_INTERNAL; + goto fail; + } + + if (dec_output_used != dec_output_end) + { + stream->msg = "unfinished output"; + ret = XD3_INTERNAL; + goto fail; + } + + if (memcmp (dec_output, dec_correct, input_size) != 0) + { + stream->msg = "incorrect output"; + ret = XD3_INTERNAL; + goto fail; + } + + fail: + sec->destroy (stream, dec_stream); + return ret; +} + +static int +test_secondary (xd3_stream *stream, const xd3_sec_type *sec, usize_t groups) +{ + usize_t test_i; + int ret; + xd3_output *in_head, *out_head, *p; + usize_t p_off, input_size, compress_size; + uint8_t *dec_input = NULL, *dec_output = NULL, *dec_correct = NULL; + xd3_sec_stream *enc_stream; + xd3_sec_cfg cfg; + + memset (& cfg, 0, sizeof (cfg)); + + cfg.inefficient = 1; + + for (cfg.ngroups = 1; cfg.ngroups <= groups; cfg.ngroups += 1) + { + XPR(NTR "\n..."); + for (test_i = 0; test_i < SIZEOF_ARRAY (sec_dists); test_i += 1) + { + mt_init (& static_mtrand, 0x9f73f7fc); + + in_head = xd3_alloc_output (stream, NULL); + out_head = xd3_alloc_output (stream, NULL); + enc_stream = sec->alloc (stream); + dec_input = NULL; + dec_output = NULL; + dec_correct = NULL; + + if (in_head == NULL || out_head == NULL || enc_stream == NULL) + { + goto nomem; + } + + if ((ret = sec_dists[test_i] (stream, in_head))) { goto fail; } + + if ((ret = sec->init (stream, enc_stream, 1)) != 0) { goto fail; } + + /* Encode data */ + if ((ret = sec->encode (stream, enc_stream, + in_head, out_head, & cfg))) + { + XPR(NT "test %"W"u: encode: %s", test_i, stream->msg); + goto fail; + } + + /* Calculate sizes, allocate contiguous arrays for decoding */ + input_size = xd3_sizeof_output (in_head); + compress_size = xd3_sizeof_output (out_head); + + XPR(NTR "%.3f", 8.0 * (double) compress_size / (double) input_size); + + if ((dec_input = (uint8_t*) xd3_alloc (stream, compress_size, 1)) == NULL || + (dec_output = (uint8_t*) xd3_alloc (stream, input_size, 1)) == NULL || + (dec_correct = (uint8_t*) xd3_alloc (stream, input_size, 1)) == NULL) + { + goto nomem; + } + + /* Fill the compressed data array */ + for (p_off = 0, p = out_head; p != NULL; + p_off += p->next, p = p->next_page) + { + memcpy (dec_input + p_off, p->base, p->next); + } + + CHECK(p_off == compress_size); + + /* Fill the input data array */ + for (p_off = 0, p = in_head; p != NULL; + p_off += p->next, p = p->next_page) + { + memcpy (dec_correct + p_off, p->base, p->next); + } + + CHECK(p_off == input_size); + + if ((ret = test_secondary_decode (stream, sec, input_size, + compress_size, dec_input, + dec_correct, dec_output))) + { + XPR(NT "test %"W"u: decode: %s", test_i, stream->msg); + goto fail; + } + + /* Single-bit error test, only cover the first 10 bytes. + * Some non-failures are expected in the Huffman case: + * Changing the clclen array, for example, may not harm the + * decoding. Really looking for faults here. */ + { + int i; + int bytes = xd3_min (compress_size, 10U); + for (i = 0; i < bytes * 8; i += 1) + { + dec_input[i/8] ^= 1 << (i%8); + + if ((ret = test_secondary_decode (stream, sec, input_size, + compress_size, dec_input, + dec_correct, dec_output)) + == 0) + { + /*XPR(NT "test %u: decode single-bit [%u/%u] + error non-failure", test_i, i/8, i%8);*/ + } + + dec_input[i/8] ^= 1 << (i%8); + + if ((i % (2*bytes)) == (2*bytes)-1) + { + DOT (); + } + } + ret = 0; + } + + if (0) { nomem: ret = ENOMEM; } + + fail: + sec->destroy (stream, enc_stream); + xd3_free_output (stream, in_head); + xd3_free_output (stream, out_head); + xd3_free (stream, dec_input); + xd3_free (stream, dec_output); + xd3_free (stream, dec_correct); + + if (ret != 0) { return ret; } + } + } + + return 0; +} + +IF_FGK (static int test_secondary_fgk (xd3_stream *stream, usize_t gp) + { return test_secondary (stream, & fgk_sec_type, gp); }) +IF_DJW (static int test_secondary_huff (xd3_stream *stream, usize_t gp) + { return test_secondary (stream, & djw_sec_type, gp); }) +IF_LZMA (static int test_secondary_lzma (xd3_stream *stream, usize_t gp) + { return test_secondary (stream, & lzma_sec_type, gp); }) + +#endif /* SECONDARY_ANY */ + +/*********************************************************************** + TEST INSTRUCTION TABLE + ***********************************************************************/ + +/* Test that xd3_choose_instruction() does the right thing for its code + * table. */ +static int +test_choose_instruction (xd3_stream *stream, int ignore) +{ + int i; + + stream->code_table = (*stream->code_table_func) (); + + for (i = 0; i < 256; i += 1) + { + const xd3_dinst *d = stream->code_table + i; + xd3_rinst prev, inst; + + CHECK(d->type1 > 0); + + memset (& prev, 0, sizeof (prev)); + memset (& inst, 0, sizeof (inst)); + + if (d->type2 == 0) + { + inst.type = d->type1; + + if ((inst.size = d->size1) == 0) + { + inst.size = TESTBUFSIZE; + } + + XD3_CHOOSE_INSTRUCTION (stream, NULL, & inst); + + if (inst.code2 != 0 || inst.code1 != i) + { + stream->msg = "wrong single instruction"; + return XD3_INTERNAL; + } + } + else + { + prev.type = d->type1; + prev.size = d->size1; + inst.type = d->type2; + inst.size = d->size2; + + XD3_CHOOSE_INSTRUCTION (stream, & prev, & inst); + + if (prev.code2 != i) + { + stream->msg = "wrong double instruction"; + return XD3_INTERNAL; + } + } + } + + return 0; +} + +static int +test_checksum_step (xd3_stream *stream, int ignore) +{ + const int bufsize = 128; + uint8_t buf[128]; + for (int i = 0; i < bufsize; i++) + { + buf[i] = mt_random (&static_mtrand) & 0xff; + } + + for (usize_t cksize = 4; cksize <= 32; cksize += 3) + { + xd3_hash_cfg h1; + usize_t x; + int ret; + + if ((ret = xd3_size_hashtable (stream, XD3_ALLOCSIZE, cksize, &h1)) != 0) + { + return ret; + } + + x = xd3_large_cksum (&h1, buf, cksize); + for (usize_t pos = 0; pos <= (bufsize - cksize); pos++) + { + usize_t y = xd3_large_cksum (&h1, buf + pos, cksize); + if (x != y) + { + stream->msg = "checksum != incremental checksum"; + return XD3_INTERNAL; + } + x = xd3_large_cksum_update (&h1, x, buf + pos, cksize); + } + + xd3_free (stream, h1.powers); + } + + return 0; +} + +/*********************************************************************** + 64BIT STREAMING + ***********************************************************************/ + +/* This test encodes and decodes a series of 1 megabyte windows, each + * containing a long run of zeros along with a single xoff_t size + * record to indicate the sequence. */ +static int +test_streaming (xd3_stream *in_stream, uint8_t *encbuf, uint8_t *decbuf, uint8_t *delbuf, usize_t megs) +{ + xd3_stream estream, dstream; + int ret; + usize_t i, delsize, decsize; + xd3_config cfg; + xd3_init_config (& cfg, in_stream->flags); + cfg.flags |= XD3_COMPLEVEL_6; + + if ((ret = xd3_config_stream (& estream, & cfg)) || + (ret = xd3_config_stream (& dstream, & cfg))) + { + goto fail; + } + + for (i = 0; i < megs; i += 1) + { + ((usize_t*) encbuf)[0] = i; + + if ((i % 200) == 199) { DOT (); } + + if ((ret = xd3_process_stream (1, & estream, xd3_encode_input, 0, + encbuf, 1 << 20, + delbuf, & delsize, 1 << 20))) + { + in_stream->msg = estream.msg; + goto fail; + } + + if ((ret = xd3_process_stream (0, & dstream, xd3_decode_input, 0, + delbuf, delsize, + decbuf, & decsize, 1 << 20))) + { + in_stream->msg = dstream.msg; + goto fail; + } + + if (decsize != 1 << 20 || + memcmp (encbuf, decbuf, 1 << 20) != 0) + { + in_stream->msg = "wrong result"; + ret = XD3_INTERNAL; + goto fail; + } + } + + if ((ret = xd3_close_stream (& estream)) || + (ret = xd3_close_stream (& dstream))) + { + goto fail; + } + + fail: + xd3_free_stream (& estream); + xd3_free_stream (& dstream); + return ret; +} + +/* Run tests of data streaming of over and around 4GB of data. */ +static int +test_compressed_stream_overflow (xd3_stream *stream, int ignore) +{ + int ret; + int i; + uint8_t *buf; + + if ((buf = (uint8_t*) malloc (TWO_MEGS_AND_DELTA)) == NULL) { return ENOMEM; } + + memset (buf, 0, TWO_MEGS_AND_DELTA); + for (i = 0; i < (2 << 20); i += 256) + { + int j; + int off = mt_random(& static_mtrand) % 10; + for (j = 0; j < 256; j++) + { + buf[i + j] = j + off; + } + } + + /* Test overflow of a 32-bit file offset. */ + if (SIZEOF_XOFF_T == 4) + { + ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), (1 << 12) + 1); + + if (ret == XD3_INVALID_INPUT && MSG_IS ("decoder file offset overflow")) + { + ret = 0; + } + else + { + XPR(NT XD3_LIB_ERRMSG (stream, ret)); + stream->msg = "expected overflow condition"; + ret = XD3_INTERNAL; + goto fail; + } + } + + /* Test transfer of exactly 32bits worth of data. */ + if ((ret = test_streaming (stream, + buf, + buf + (1 << 20), + buf + (2 << 20), + 1 << 12))) + { + goto fail; + } + fail: + free (buf); + return ret; +} + +/*********************************************************************** + COMMAND LINE + ***********************************************************************/ + +#if SHELL_TESTS + +/* For each pair of command templates in the array below, test that + * encoding and decoding commands work. Also check for the expected + * size delta, which should be approximately TEST_ADD_RATIO times the + * file size created by test_make_inputs. Due to differences in the + * application header, it is suppressed (-A) so that all delta files + * are the same. */ +static int +test_command_line_arguments (xd3_stream *stream, int ignore) +{ + int i, ret; + + static const char* cmdpairs[] = + { + /* standard input, output */ + "%s %s -A < %s > %s", "%s -d < %s > %s", + "%s %s -A -e < %s > %s", "%s -d < %s > %s", + "%s %s -A= encode < %s > %s", "%s decode < %s > %s", + "%s %s -A -q encode < %s > %s", "%s -qdq < %s > %s", + + /* file input, standard output */ + "%s %s -A= %s > %s", "%s -d %s > %s", + "%s %s -A -e %s > %s", "%s -d %s > %s", + "%s %s encode -A= %s > %s", "%s decode %s > %s", + + /* file input, output */ + "%s %s -A= %s %s", "%s -d %s %s", + "%s %s -A -e %s %s", "%s -d %s %s", + "%s %s -A= encode %s %s", "%s decode %s %s", + + /* option placement */ + "%s %s -A -f %s %s", "%s -f -d %s %s", + "%s %s -e -A= %s %s", "%s -d -f %s %s", + "%s %s -f encode -A= %s %s", "%s -f decode -f %s %s", + }; + + char ecmd[TESTBUFSIZE], dcmd[TESTBUFSIZE]; + int pairs = SIZEOF_ARRAY (cmdpairs) / 2; + xoff_t tsize; + xoff_t dsize; + double ratio; + + mt_init (& static_mtrand, 0x9f73f7fc); + + for (i = 0; i < pairs; i += 1) + { + test_setup (); + if ((ret = test_make_inputs (stream, NULL, & tsize))) { return ret; } + + snprintf_func (ecmd, TESTBUFSIZE, cmdpairs[2*i], program_name, + test_softcfg_str, TEST_TARGET_FILE, TEST_DELTA_FILE); + snprintf_func (dcmd, TESTBUFSIZE, cmdpairs[2*i+1], program_name, + TEST_DELTA_FILE, TEST_RECON_FILE); + + /* Encode and decode. */ + if ((ret = system (ecmd)) != 0) + { + XPR(NT "encode command: %s\n", ecmd); + stream->msg = "encode cmd failed"; + return XD3_INTERNAL; + } + + if ((ret = system (dcmd)) != 0) + { + XPR(NT "decode command: %s\n", dcmd); + stream->msg = "decode cmd failed"; + return XD3_INTERNAL; + } + + /* Compare the target file. */ + if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_RECON_FILE))) + { + return ret; + } + + if ((ret = test_file_size (TEST_DELTA_FILE, & dsize))) + { + return ret; + } + + ratio = (double) dsize / (double) tsize; + + /* Check that it is not too small, not too large. */ + if (ratio >= TEST_ADD_RATIO + TEST_EPSILON) + { + XPR(NT "test encode with size ratio %.4f, " + "expected < %.4f (%"Q"u, %"Q"u)\n", + ratio, TEST_ADD_RATIO + TEST_EPSILON, dsize, tsize); + stream->msg = "strange encoding"; + return XD3_INTERNAL; + } + + if (ratio <= TEST_ADD_RATIO * (1.0 - 2 * TEST_EPSILON)) + { + XPR(NT "test encode with size ratio %.4f, " + "expected > %.4f\n", + ratio, TEST_ADD_RATIO - TEST_EPSILON); + stream->msg = "strange encoding"; + return XD3_INTERNAL; + } + + /* Also check that test_compare_files works. The delta and original should + * not be identical. */ + if ((ret = test_compare_files (TEST_DELTA_FILE, + TEST_TARGET_FILE)) == 0) + { + stream->msg = "broken test_compare_files"; + return XD3_INTERNAL; + } + + test_cleanup (); + DOT (); + } + + return 0; +} + +static int +check_vcdiff_header (xd3_stream *stream, + const char *input, + const char *line_start, + const char *matches, + int yes_or_no) +{ + int ret; + char vcmd[TESTBUFSIZE], gcmd[TESTBUFSIZE]; + + snprintf_func (vcmd, TESTBUFSIZE, "%s printhdr -f %s %s", + program_name, input, TEST_RECON2_FILE); + + if ((ret = system (vcmd)) != 0) + { + XPR(NT "printhdr command: %s\n", vcmd); + stream->msg = "printhdr cmd failed"; + return XD3_INTERNAL; + } + + snprintf_func (gcmd, TESTBUFSIZE, "grep \"%s.*%s.*\" %s > /dev/null", + line_start, matches, TEST_RECON2_FILE); + + if (yes_or_no) + { + if ((ret = do_cmd (stream, gcmd))) + { + XPR(NT "%s\n", gcmd); + return ret; + } + } + else + { + if ((ret = do_fail (stream, gcmd))) + { + XPR(NT "%s\n", gcmd); + return ret; + } + } + + return 0; +} + +static int +test_recode_command2 (xd3_stream *stream, int has_source, + int variant, int change) +{ + int has_adler32 = (variant & 0x1) != 0; + int has_apphead = (variant & 0x2) != 0; + int has_secondary = (variant & 0x4) != 0; + + int change_adler32 = (change & 0x1) != 0; + int change_apphead = (change & 0x2) != 0; + int change_secondary = (change & 0x4) != 0; + + int recoded_adler32 = change_adler32 ? !has_adler32 : has_adler32; + int recoded_apphead = change_apphead ? !has_apphead : has_apphead; + int recoded_secondary = change_secondary ? !has_secondary : has_secondary; + + char ecmd[TESTBUFSIZE], recmd[TESTBUFSIZE], dcmd[TESTBUFSIZE]; + xoff_t tsize, ssize; + int ret; + + test_setup (); + + if ((ret = test_make_inputs (stream, has_source ? & ssize : NULL, & tsize))) + { + return ret; + } + + /* First encode */ + snprintf_func (ecmd, TESTBUFSIZE, "%s %s -f %s %s %s %s %s %s %s", + program_name, test_softcfg_str, + has_adler32 ? "" : "-n ", + has_apphead ? "-A=encode_apphead " : "-A= ", + has_secondary ? "-S djw " : "-S none ", + has_source ? "-s " : "", + has_source ? TEST_SOURCE_FILE : "", + TEST_TARGET_FILE, + TEST_DELTA_FILE); + + if ((ret = system (ecmd)) != 0) + { + XPR(NT "encode command: %s\n", ecmd); + stream->msg = "encode cmd failed"; + return XD3_INTERNAL; + } + + /* Now recode */ + snprintf_func (recmd, TESTBUFSIZE, + "%s recode %s -f %s %s %s %s %s", program_name, test_softcfg_str, + recoded_adler32 ? "" : "-n ", + !change_apphead ? "" : + (recoded_apphead ? "-A=recode_apphead " : "-A= "), + recoded_secondary ? "-S djw " : "-S= ", + TEST_DELTA_FILE, + TEST_COPY_FILE); + + if ((ret = system (recmd)) != 0) + { + XPR(NT "recode command: %s\n", recmd); + stream->msg = "recode cmd failed"; + return XD3_INTERNAL; + } + + /* Check recode changes. */ + + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF window indicator", + "VCD_SOURCE", + has_source))) { return ret; } + + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF header indicator", + "VCD_SECONDARY", + recoded_secondary))) { return ret; } + + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF window indicator", + "VCD_ADLER32", + /* Recode can't generate an adler32 + * checksum, it can only preserve it or + * remove it. */ + has_adler32 && recoded_adler32))) + { + return ret; + } + + if (!change_apphead) + { + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF header indicator", + "VCD_APPHEADER", + has_apphead))) + { + return ret; + } + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF application header", + "encode_apphead", + has_apphead))) + { + return ret; + } + } + else + { + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF header indicator", + "VCD_APPHEADER", + recoded_apphead))) + { + return ret; + } + if (recoded_apphead && + (ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF application header", + "recode_apphead", + 1))) + { + return ret; + } + } + + /* Now decode */ + snprintf_func (dcmd, TESTBUFSIZE, "%s -fd %s %s %s %s ", program_name, + has_source ? "-s " : "", + has_source ? TEST_SOURCE_FILE : "", + TEST_COPY_FILE, + TEST_RECON_FILE); + + if ((ret = system (dcmd)) != 0) + { + XPR(NT "decode command: %s\n", dcmd); + stream->msg = "decode cmd failed"; + return XD3_INTERNAL; + } + + /* Now compare. */ + if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_RECON_FILE))) + { + return ret; + } + test_cleanup (); + + return 0; +} + +static int +test_recode_command (xd3_stream *stream, int ignore) +{ + /* Things to test: + * - with and without a source file (recode does not change) + * + * (recode may or may not change -- 8 variations) + * - with and without adler32 + * - with and without app header + * - with and without secondary + */ + int has_source; + int variant; + int change; + int ret; + + for (has_source = 0; has_source < 2; has_source++) + { + for (variant = 0; variant < 8; variant++) + { + for (change = 0; change < 8; change++) + { + if ((ret = test_recode_command2 (stream, has_source, + variant, change))) + { + return ret; + } + } + DOT (); + } + } + + return 0; +} + +#if SECONDARY_LZMA +static int test_secondary_lzma_default (xd3_stream *stream, int ignore) +{ + char ecmd[TESTBUFSIZE]; + int ret; + + test_setup (); + + if ((ret = test_make_inputs (stream, NULL, NULL))) + { + return ret; + } + + /* First encode */ + snprintf_func (ecmd, TESTBUFSIZE, "%s -e %s %s", + program_name, + TEST_TARGET_FILE, + TEST_DELTA_FILE); + + if ((ret = system (ecmd)) != 0) + { + return XD3_INTERNAL; + } + + if ((ret = check_vcdiff_header (stream, + TEST_DELTA_FILE, + "VCDIFF secondary compressor", + "lzma", + 1))) + { + return ret; + } + + test_cleanup (); + return 0; +} + +#endif /* SECONDARY_LZMA */ +#endif /* SHELL_TESTS */ + +/*********************************************************************** + EXTERNAL I/O DECOMPRESSION/RECOMPRESSION + ***********************************************************************/ + +#if EXTERNAL_COMPRESSION +/* This performs one step of the test_externally_compressed_io + * function described below. It builds a pipe containing both Xdelta + * and external compression/decompression that should not modify the + * data passing through. */ +static int +test_compressed_pipe (xd3_stream *stream, main_extcomp *ext, char* buf, + const char* comp_options, const char* decomp_options, + int do_ext_recomp, const char* msg) +{ + int ret; + char decomp_buf[TESTBUFSIZE]; + + if (do_ext_recomp) + { + snprintf_func (decomp_buf, TESTBUFSIZE, + " | %s %s", ext->decomp_cmdname, ext->decomp_options); + } + else + { + decomp_buf[0] = 0; + } + + snprintf_func (buf, TESTBUFSIZE, "%s %s < %s | %s %s | %s %s%s > %s", + ext->recomp_cmdname, ext->recomp_options, + TEST_TARGET_FILE, + program_name, comp_options, + program_name, decomp_options, + decomp_buf, + TEST_RECON_FILE); + + if ((ret = system (buf)) != 0) + { + stream->msg = msg; + return XD3_INTERNAL; + } + + if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_RECON_FILE))) + { + return XD3_INTERNAL; + } + + DOT (); + return 0; +} + +/* We want to test that a pipe such as: + * + * --> | gzip -cf | xdelta3 -cf | xdelta3 -dcf | gzip -dcf | --> + * + * is transparent, i.e., does not modify the stream of data. However, + * we also want to verify that at the center the data is properly + * compressed, i.e., that we do not just have a re-compressed gzip + * format, that we have an VCDIFF format. We do this in two steps. + * First test the above pipe, then test with suppressed output + * recompression (-D). The result should be the original input: + * + * --> | gzip -cf | xdelta3 -cf | xdelta3 -Ddcf | --> + * + * Finally we want to test that -D also disables input decompression: + * + * --> | gzip -cf | xdelta3 -Dcf | xdelta3 -Ddcf | gzip -dcf | --> + */ +static int +test_externally_compressed_io (xd3_stream *stream, int ignore) +{ + usize_t i; + int ret; + char buf[TESTBUFSIZE]; + + mt_init (& static_mtrand, 0x9f73f7fc); + + if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; } + + for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) + { + main_extcomp *ext = & extcomp_types[i]; + + /* Test for the existence of the external command first, if not skip. */ + snprintf_func (buf, TESTBUFSIZE, "%s %s < /dev/null > /dev/null", ext->recomp_cmdname, ext->recomp_options); + + if ((ret = system (buf)) != 0) + { + XPR(NT "%s=0", ext->recomp_cmdname); + continue; + } + + if ((ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-dcfq", 1, + "compression failed: identity pipe")) || + (ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-Rdcfq", 0, + "compression failed: without recompression")) || + (ret = test_compressed_pipe (stream, ext, buf, "-Dcfq", "-Rdcfq", 1, + "compression failed: without decompression"))) + { + return ret; + } + } + + return 0; +} + +/* This tests the proper functioning of external decompression for + * source files. The source and target files are identical and + * compressed by gzip. Decoding such a delta with recompression + * disbaled (-R) should produce the original, uncompressed + * source/target file. Then it checks with output recompression + * enabled--in this case the output should be a compressed copy of the + * original source/target file. Then it checks that encoding with + * decompression disabled works--the compressed files are identical + * and decoding them should always produce a compressed output, + * regardless of -R since the encoded delta file had decompression + * disabled.. + */ +static int +test_source_decompression (xd3_stream *stream, int ignore) +{ + int ret; + char buf[TESTBUFSIZE]; + const main_extcomp *ext; + xoff_t dsize; + + mt_init (& static_mtrand, 0x9f73f7fc); + + test_setup (); + if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; } + + /* Use gzip. */ + if ((ext = main_get_compressor ("G")) == NULL) + { + XPR(NT "skipped"); + return 0; + } + + /* Save an uncompressed copy. */ + if ((ret = test_save_copy (TEST_TARGET_FILE))) { return ret; } + + /* Compress the source. */ + snprintf_func (buf, TESTBUFSIZE, "%s -1 %s < %s > %s", ext->recomp_cmdname, + ext->recomp_options, TEST_COPY_FILE, TEST_SOURCE_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + /* Compress the target. */ + snprintf_func (buf, TESTBUFSIZE, "%s -9 %s < %s > %s", ext->recomp_cmdname, + ext->recomp_options, TEST_COPY_FILE, TEST_TARGET_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Now the two identical files are compressed. Delta-encode the target, + * with decompression. */ + snprintf_func (buf, TESTBUFSIZE, "%s -e -vfq -s%s %s %s", program_name, TEST_SOURCE_FILE, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Check that the compressed file is small (b/c inputs are + * identical). */ + if ((ret = test_file_size (TEST_DELTA_FILE, & dsize))) { return ret; } + /* Deltas for identical files should be very small. */ + if (dsize > 200) + { + XPR(NT "external compression did not happen\n"); + stream->msg = "external compression did not happen"; + return XD3_INTERNAL; + } + + /* Decode the delta file with recompression disabled, should get an + * uncompressed file out. */ + snprintf_func (buf, TESTBUFSIZE, "%s -v -dq -R -s%s %s %s", program_name, + TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + if ((ret = test_compare_files (TEST_COPY_FILE, + TEST_RECON_FILE))) { return ret; } + + /* Decode the delta file with recompression, should get a compressed file + * out. But we can't compare compressed files directly. */ + snprintf_func (buf, TESTBUFSIZE, "%s -v -dqf -s%s %s %s", program_name, + TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + snprintf_func (buf, TESTBUFSIZE, "%s %s < %s > %s", ext->decomp_cmdname, ext->decomp_options, + TEST_RECON_FILE, TEST_RECON2_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + if ((ret = test_compare_files (TEST_COPY_FILE, + TEST_RECON2_FILE))) { return ret; } + + /* Encode with decompression disabled */ + snprintf_func (buf, TESTBUFSIZE, "%s -e -D -vfq -s%s %s %s", program_name, + TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Decode the delta file with decompression disabled, should get the + * identical compressed file out. */ + snprintf_func (buf, TESTBUFSIZE, "%s -d -D -vfq -s%s %s %s", program_name, + TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + if ((ret = test_compare_files (TEST_TARGET_FILE, + TEST_RECON_FILE))) { return ret; } + + test_cleanup(); + return 0; +} +#endif + +/*********************************************************************** + FORCE, STDOUT + ***********************************************************************/ + +/* This tests that output will not overwrite an existing file unless + * -f was specified. The test is for encoding (the same code handles + * it for decoding). */ +static int +test_force_behavior (xd3_stream *stream, int ignore) +{ + int ret; + char buf[TESTBUFSIZE]; + + /* Create empty target file */ + test_setup (); + snprintf_func (buf, TESTBUFSIZE, "cp /dev/null %s", TEST_TARGET_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Encode to delta file */ + snprintf_func (buf, TESTBUFSIZE, "%s -e %s %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Encode again, should fail. */ + snprintf_func (buf, TESTBUFSIZE, "%s -q -e %s %s ", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_fail (stream, buf))) { return ret; } + + /* Force it, should succeed. */ + snprintf_func (buf, TESTBUFSIZE, "%s -f -e %s %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + test_cleanup(); + return 0; +} + +/* This checks the proper operation of the -c flag. When specified + * the default output becomes stdout, otherwise the input must be + * provided (encode) or it may be defaulted (decode w/ app header). */ +static int +test_stdout_behavior (xd3_stream *stream, int ignore) +{ + int ret; + char buf[TESTBUFSIZE]; + + test_setup(); + snprintf_func (buf, TESTBUFSIZE, "cp /dev/null %s", TEST_TARGET_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Without -c, encode writes to delta file */ + snprintf_func (buf, TESTBUFSIZE, "%s -e %s %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* With -c, encode writes to stdout */ + snprintf_func (buf, TESTBUFSIZE, "%s -e -c %s > %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Without -c, decode writes to target file name, but it fails because the + * file exists. */ + snprintf_func (buf, TESTBUFSIZE, "%s -q -d %s ", program_name, TEST_DELTA_FILE); + if ((ret = do_fail (stream, buf))) { return ret; } + + /* With -c, decode writes to stdout */ + snprintf_func (buf, TESTBUFSIZE, "%s -d -c %s > /dev/null", program_name, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + test_cleanup(); + + return 0; +} + +/* This tests that the no-output flag (-J) works. */ +static int +test_no_output (xd3_stream *stream, int ignore) +{ + int ret; + char buf[TESTBUFSIZE]; + + test_setup (); + + snprintf_func (buf, TESTBUFSIZE, "touch %s && chmod 0000 %s", + TEST_NOPERM_FILE, TEST_NOPERM_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; } + + /* Try no_output encode w/out unwritable output file */ + snprintf_func (buf, TESTBUFSIZE, "%s -q -f -e %s %s", program_name, + TEST_TARGET_FILE, TEST_NOPERM_FILE); + if ((ret = do_fail (stream, buf))) { return ret; } + snprintf_func (buf, TESTBUFSIZE, "%s -J -e %s %s", program_name, + TEST_TARGET_FILE, TEST_NOPERM_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Now really write the delta to test decode no-output */ + snprintf_func (buf, TESTBUFSIZE, "%s -e %s %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + snprintf_func (buf, TESTBUFSIZE, "%s -q -f -d %s %s", program_name, + TEST_DELTA_FILE, TEST_NOPERM_FILE); + if ((ret = do_fail (stream, buf))) { return ret; } + snprintf_func (buf, TESTBUFSIZE, "%s -J -d %s %s", program_name, + TEST_DELTA_FILE, TEST_NOPERM_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + test_cleanup (); + return 0; +} + +/* This tests that the default appheader works */ +static int +test_appheader (xd3_stream *stream, int ignore) +{ + int i; + int ret; + char buf[TESTBUFSIZE]; + char bogus[TESTBUFSIZE]; + xoff_t ssize, tsize; + test_setup (); + + if ((ret = test_make_inputs (stream, &ssize, &tsize))) { return ret; } + + snprintf_func (buf, TESTBUFSIZE, "%s -q -f -e -s %s %s %s", program_name, + TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + if ((ret = test_copy_to (program_name, TEST_RECON2_FILE))) { return ret; } + + snprintf_func (buf, TESTBUFSIZE, "chmod 0700 %s", TEST_RECON2_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + if ((ret = test_save_copy (TEST_TARGET_FILE))) { return ret; } + if ((ret = test_copy_to (TEST_SOURCE_FILE, TEST_TARGET_FILE))) { return ret; } + + if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_COPY_FILE)) == 0) + { + return XD3_INVALID; // I.e., files are different! + } + + // Test that the target file is restored. + snprintf_func (buf, TESTBUFSIZE, "(cd /tmp && %s -q -f -d %s)", + TEST_RECON2_FILE, + TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_COPY_FILE)) != 0) + { + return ret; + } + + // Test a malicious string w/ entries > 4 in the appheader by having + // the encoder write it: + for (i = 0; i < TESTBUFSIZE / 4; ++i) + { + bogus[2*i] = 'G'; + bogus[2*i+1] = '/'; + } + bogus[TESTBUFSIZE/2-1] = 0; + + snprintf_func (buf, TESTBUFSIZE, + "%s -q -f -A=%s -e -s %s %s %s", program_name, bogus, + TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + // Then read it: + snprintf_func (buf, TESTBUFSIZE, "(cd /tmp && %s -q -f -d %s)", + TEST_RECON2_FILE, + TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf)) == 0) + { + return XD3_INVALID; // Impossible + } + if (!WIFEXITED(ret)) + { + return XD3_INVALID; // Must have crashed! + } + + test_cleanup (); + return 0; +} + +/*********************************************************************** + Source identical optimization + ***********************************************************************/ + +/* Computing a delta should be fastest when the two inputs are + * identical, this checks it. The library is called to compute a + * delta between a 10000 byte file, 1000 byte winsize, 500 byte source + * blocksize. The same buffer is used for both source and target. */ +static int +test_identical_behavior (xd3_stream *stream, int ignore) +{ +#define IDB_TGTSZ 10000 /* Not a power of two b/c of hard-coded expectations below. */ +#define IDB_BLKSZ 512 +#define IDB_WINSZ 1000 +#define IDB_DELSZ 1000 +#define IDB_WINCNT (IDB_TGTSZ / IDB_WINSZ) + + int ret, i; + uint8_t buf[IDB_TGTSZ]; + uint8_t del[IDB_DELSZ]; + uint8_t rec[IDB_TGTSZ]; + xd3_source source; + int nextencwin = 0; + int winstarts = 0, winfinishes = 0; + usize_t delpos = 0, recsize; + xd3_config config; + memset(&source, 0, sizeof(source)); + + for (i = 0; i < IDB_TGTSZ; i += 1) + { + buf[i] = (uint8_t) mt_random (&static_mtrand); + } + + stream->winsize = IDB_WINSZ; + + source.blksize = IDB_BLKSZ; + source.name = ""; + source.curblk = NULL; + source.curblkno = 0; + + if ((ret = xd3_set_source (stream, & source))) { goto fail; } + + /* Compute an delta between identical source and targets. */ + for (;;) + { + ret = xd3_encode_input (stream); + + if (ret == XD3_INPUT) + { + xd3_avail_input (stream, buf + (IDB_WINSZ * nextencwin), IDB_WINSZ); + nextencwin += 1; + continue; + } + + if (ret == XD3_GETSRCBLK) + { + source.curblkno = source.getblkno; + source.onblk = IDB_BLKSZ; + source.curblk = buf + source.getblkno * IDB_BLKSZ; + continue; + } + + if (ret == XD3_WINSTART) + { + winstarts++; + continue; + } + if (ret == XD3_WINFINISH) + { + winfinishes++; + if (winfinishes == IDB_WINCNT) + { + break; + } + continue; + } + + if (ret != XD3_OUTPUT) { goto fail; } + + CHECK(delpos + stream->avail_out <= IDB_DELSZ); + + memcpy (del + delpos, stream->next_out, stream->avail_out); + + delpos += stream->avail_out; + + xd3_consume_output (stream); + } + + CHECK(winfinishes == IDB_WINCNT); + CHECK(winstarts == IDB_WINCNT); + CHECK(nextencwin == IDB_WINCNT); + + /* Reset. */ + memset(&source, 0, sizeof(source)); + source.blksize = IDB_TGTSZ; + source.onblk = IDB_TGTSZ; + source.curblk = buf; + source.curblkno = 0; + + if ((ret = xd3_close_stream (stream))) { goto fail; } + xd3_free_stream (stream); + xd3_init_config (& config, 0); + if ((ret = xd3_config_stream (stream, & config))) { goto fail; } + if ((ret = xd3_set_source_and_size (stream, & source, IDB_TGTSZ))) { goto fail; } + + /* Decode. */ + if ((ret = xd3_decode_stream (stream, del, delpos, rec, & recsize, IDB_TGTSZ))) { goto fail; } + + /* Check result size and data. */ + if (recsize != IDB_TGTSZ) { stream->msg = "wrong size reconstruction"; goto fail; } + if (memcmp (rec, buf, IDB_TGTSZ) != 0) { stream->msg = "wrong data reconstruction"; goto fail; } + + /* Check that there was one copy per window. */ + IF_DEBUG (if (stream->n_scpy != IDB_WINCNT || + stream->n_add != 0 || + stream->n_run != 0) { stream->msg = "wrong copy count"; goto fail; }); + + /* Check that no checksums were computed because the initial match + was presumed. */ + IF_DEBUG (if (stream->large_ckcnt != 0) { stream->msg = "wrong checksum behavior"; goto fail; }); + + ret = 0; + fail: + return ret; +} + +/*********************************************************************** + String matching test + ***********************************************************************/ + +/* Check particular matching behaviors by calling + * xd3_string_match_soft directly with specific arguments. */ +typedef struct _string_match_test string_match_test; + +typedef enum +{ + SM_NONE = 0, + SM_LAZY = (1 << 1), +} string_match_flags; + +struct _string_match_test +{ + const char *input; + int flags; + const char *result; +}; + +static const string_match_test match_tests[] = +{ + /* nothing */ + { "1234567890", SM_NONE, "" }, + + /* basic run, copy */ + { "11111111112323232323", SM_NONE, "R0/10 C12/8@10" }, + + /* no run smaller than MIN_RUN=8 */ + { "1111111", SM_NONE, "C1/6@0" }, + { "11111111", SM_NONE, "R0/8" }, + + /* simple promotion: the third copy address depends on promotion */ + { "ABCDEF_ABCDEF^ABCDEF", SM_NONE, "C7/6@0 C14/6@7" }, + /* { "ABCDEF_ABCDEF^ABCDEF", SM_PROMOTE, "C7/6@0 C14/6@0" }, forgotten */ + + /* simple lazy: there is a better copy starting with "23 X" than "123 " */ + { "123 23 XYZ 123 XYZ", SM_NONE, "C11/4@0" }, + { "123 23 XYZ 123 XYZ", SM_LAZY, "C11/4@0 C12/6@4" }, + + /* trylazy: no lazy matches unless there are at least two characters beyond + * the first match */ + { "2123_121212", SM_LAZY, "C7/4@5" }, + { "2123_1212123", SM_LAZY, "C7/4@5" }, + { "2123_1212123_", SM_LAZY, "C7/4@5 C8/5@0" }, + + /* trylazy: no lazy matches if the copy is >= MAXLAZY=10 */ + { "2123_121212123_", SM_LAZY, "C7/6@5 C10/5@0" }, + { "2123_12121212123_", SM_LAZY, "C7/8@5 C12/5@0" }, + { "2123_1212121212123_", SM_LAZY, "C7/10@5" }, + + /* lazy run: check a run overlapped by a longer copy */ + { "11111112 111111112 1", SM_LAZY, "C1/6@0 R9/8 C10/10@0" }, + + /* lazy match: match_length,run_l >= min_match tests, shouldn't get any + * copies within the run, no run within the copy */ + { "^________^________ ", SM_LAZY, "R1/8 C9/9@0" }, + + /* chain depth: it only goes back 10. this checks that the 10th match hits + * and the 11th misses. */ + { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234 ", SM_NONE, + "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/5@0" }, + { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234>1234 ", SM_NONE, + "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/4@45 C55/4@50" }, + + /* ssmatch test */ + { "ABCDE___ABCDE*** BCDE***", SM_NONE, "C8/5@0 C17/4@1" }, + /*{ "ABCDE___ABCDE*** BCDE***", SM_SSMATCH, "C8/5@0 C17/7@9" }, forgotten */ +}; + +static int +test_string_matching (xd3_stream *stream, int ignore) +{ + usize_t i; + int ret; + xd3_config config; + char rbuf[TESTBUFSIZE]; + + for (i = 0; i < SIZEOF_ARRAY (match_tests); i += 1) + { + const string_match_test *test = & match_tests[i]; + char *rptr = rbuf; + usize_t len = (usize_t) strlen (test->input); + + xd3_free_stream (stream); + xd3_init_config (& config, 0); + + config.smatch_cfg = XD3_SMATCH_SOFT; + config.smatcher_soft.large_look = 4; + config.smatcher_soft.large_step = 4; + config.smatcher_soft.small_look = 4; + config.smatcher_soft.small_chain = 10; + config.smatcher_soft.small_lchain = 10; + config.smatcher_soft.max_lazy = (test->flags & SM_LAZY) ? 10 : 0; + config.smatcher_soft.long_enough = 10; + + if ((ret = xd3_config_stream (stream, & config))) { return ret; } + if ((ret = xd3_encode_init_full (stream))) { return ret; } + + xd3_avail_input (stream, (uint8_t*)test->input, len); + + if ((ret = stream->smatcher.string_match (stream))) { return ret; } + + *rptr = 0; + while (! xd3_rlist_empty (& stream->iopt_used)) + { + xd3_rinst *inst = xd3_rlist_pop_front (& stream->iopt_used); + + switch (inst->type) + { + case XD3_RUN: *rptr++ = 'R'; break; + case XD3_CPY: *rptr++ = 'C'; break; + default: CHECK(0); + } + + snprintf_func (rptr, rbuf+TESTBUFSIZE-rptr, "%"W"u/%"W"u", + inst->pos, inst->size); + rptr += strlen (rptr); + + if (inst->type == XD3_CPY) + { + *rptr++ = '@'; + snprintf_func (rptr, rbuf+TESTBUFSIZE-rptr, "%"Q"u", inst->addr); + rptr += strlen (rptr); + } + + *rptr++ = ' '; + + xd3_rlist_push_back (& stream->iopt_free, inst); + } + + if (rptr != rbuf) + { + rptr -= 1; *rptr = 0; + } + + if (strcmp (rbuf, test->result) != 0) + { + XPR(NT "test %"W"u: expected %s: got %s", i, test->result, rbuf); + stream->msg = "wrong result"; + return XD3_INTERNAL; + } + } + + return 0; +} + +/* + * This is a test for many overlapping instructions. It must be a lazy + * matcher. + */ +static int +test_iopt_flush_instructions (xd3_stream *stream, int ignore) +{ + int ret, i; + usize_t tpos = 0; + usize_t delta_size, recon_size; + xd3_config config; + uint8_t target[TESTBUFSIZE]; + uint8_t delta[TESTBUFSIZE]; + uint8_t recon[TESTBUFSIZE]; + + xd3_free_stream (stream); + xd3_init_config (& config, 0); + + config.smatch_cfg = XD3_SMATCH_SOFT; + config.smatcher_soft.large_look = 16; + config.smatcher_soft.large_step = 16; + config.smatcher_soft.small_look = 4; + config.smatcher_soft.small_chain = 128; + config.smatcher_soft.small_lchain = 16; + config.smatcher_soft.max_lazy = 8; + config.smatcher_soft.long_enough = 128; + + if ((ret = xd3_config_stream (stream, & config))) { return ret; } + + for (i = 1; i < 250; i++) + { + target[tpos++] = i; + target[tpos++] = i+1; + target[tpos++] = i+2; + target[tpos++] = i+3; + target[tpos++] = 0; + } + for (i = 1; i < 253; i++) + { + target[tpos++] = i; + } + + if ((ret = xd3_encode_stream (stream, target, tpos, + delta, & delta_size, sizeof (delta)))) + { + return ret; + } + + xd3_free_stream(stream); + if ((ret = xd3_config_stream (stream, & config))) { return ret; } + + if ((ret = xd3_decode_stream (stream, delta, delta_size, + recon, & recon_size, sizeof (recon)))) + { + return ret; + } + + CHECK(tpos == recon_size); + CHECK(memcmp(target, recon, recon_size) == 0); + + return 0; +} + +/* + * This tests the 32/64bit ambiguity for source-window matching. + */ +#if !XD3_USE_LARGESIZET +static int +test_source_cksum_offset (xd3_stream *stream, int ignore) + { + xd3_source source; + + // Inputs are: + struct { + xoff_t cpos; // stream->srcwin_cksum_pos; + xoff_t ipos; // stream->total_in; + xoff_t size; // stream->src->size; + + usize_t input; // input 32-bit offset + xoff_t output; // output 64-bit offset + + } cksum_test[] = { + // If cpos is <= 2^32 + { 1, 1, 1, 1, 1 }, + +#if XD3_USE_LARGEFILE64 +// cpos ipos size input output +// 0x____xxxxxULL, 0x____xxxxxULL, 0x____xxxxxULL, 0x___xxxxxUL, 0x____xxxxxULL + { 0x100100000ULL, 0x100000000ULL, 0x100200000ULL, 0x00000000UL, 0x100000000ULL }, + { 0x100100000ULL, 0x100000000ULL, 0x100200000ULL, 0xF0000000UL, 0x0F0000000ULL }, + + { 0x100200000ULL, 0x100100000ULL, 0x100200000ULL, 0x00300000UL, 0x000300000ULL }, + + { 25771983104ULL, 25770000000ULL, 26414808769ULL, 2139216707UL, 23614053187ULL }, + +#endif + + { 0, 0, 0, 0, 0 }, + }, *test_ptr; + + stream->src = &source; + + for (test_ptr = cksum_test; test_ptr->cpos; test_ptr++) { + xoff_t r; + stream->srcwin_cksum_pos = test_ptr->cpos; + stream->total_in = test_ptr->ipos; + + r = xd3_source_cksum_offset(stream, test_ptr->input); + CHECK(r == test_ptr->output); + } + return 0; +} +#endif /* !XD3_USE_LARGESIZET */ + +static int +test_in_memory (xd3_stream *stream, int ignore) +{ + // test_text is 256 bytes + uint8_t ibuf[sizeof(test_text)]; + uint8_t dbuf[sizeof(test_text)]; + uint8_t obuf[sizeof(test_text)]; + usize_t size = sizeof(test_text); + usize_t dsize, osize; + int r1, r2; + int eflags = SECONDARY_DJW ? XD3_SEC_DJW : 0; + + memcpy(ibuf, test_text, size); + memset(ibuf + 128, 0, 16); + + r1 = xd3_encode_memory(ibuf, size, + test_text, size, + dbuf, &dsize, size, eflags); + + r2 = xd3_decode_memory(dbuf, dsize, + test_text, size, + obuf, &osize, size, 0); + + if (r1 != 0 || r2 != 0 || dsize >= (size/2) || dsize < 1 || + osize != size) { + stream->msg = "encode/decode size error"; + return XD3_INTERNAL; + } + + if (memcmp(obuf, ibuf, size) != 0) { + stream->msg = "encode/decode data error"; + return XD3_INTERNAL; + } + + return 0; +} + +/*********************************************************************** + TEST MAIN + ***********************************************************************/ + +int xd3_selftest (void) +{ +#define DO_TEST(fn,flags,arg) \ + do { \ + xd3_stream stream; \ + xd3_config config; \ + xd3_init_config (& config, flags); \ + XPR(NT "testing " #fn "%s...", \ + flags ? (" (" #flags ")") : ""); \ + if ((ret = xd3_config_stream (& stream, & config) == 0) && \ + (ret = test_ ## fn (& stream, arg)) == 0) { \ + XPR(NTR " success\n"); \ + } else { \ + XPR(NTR " failed: %s: %s\n", xd3_errstring (& stream), \ + xd3_mainerror (ret)); } \ + xd3_free_stream (& stream); \ + if (ret != 0) { goto failure; } \ + } while (0) + + int ret; + DO_TEST (random_numbers, 0, 0); + DO_TEST (printf_xoff, 0, 0); + + DO_TEST (decode_integer_end_of_input, 0, 0); + DO_TEST (decode_integer_overflow, 0, 0); + DO_TEST (encode_decode_uint32_t, 0, 0); + DO_TEST (encode_decode_uint64_t, 0, 0); + DO_TEST (usize_t_overflow, 0, 0); + DO_TEST (checksum_step, 0, 0); + DO_TEST (forward_match, 0, 0); + DO_TEST (address_cache, 0, 0); + + DO_TEST (string_matching, 0, 0); + DO_TEST (choose_instruction, 0, 0); + DO_TEST (identical_behavior, 0, 0); + DO_TEST (in_memory, 0, 0); + + DO_TEST (iopt_flush_instructions, 0, 0); +#if !XD3_USE_LARGESIZET + DO_TEST (source_cksum_offset, 0, 0); +#endif + + DO_TEST (decompress_single_bit_error, 0, 3); + DO_TEST (decompress_single_bit_error, XD3_ADLER32, 3); + + IF_LZMA (DO_TEST (decompress_single_bit_error, XD3_SEC_LZMA, 54)); + IF_FGK (DO_TEST (decompress_single_bit_error, XD3_SEC_FGK, 3)); + IF_DJW (DO_TEST (decompress_single_bit_error, XD3_SEC_DJW, 8)); + +#if SHELL_TESTS + DO_TEST (force_behavior, 0, 0); + DO_TEST (stdout_behavior, 0, 0); + DO_TEST (no_output, 0, 0); + DO_TEST (appheader, 0, 0); + DO_TEST (command_line_arguments, 0, 0); + +#if EXTERNAL_COMPRESSION + DO_TEST (source_decompression, 0, 0); + DO_TEST (externally_compressed_io, 0, 0); +#endif + + DO_TEST (recode_command, 0, 0); + IF_LZMA (DO_TEST (secondary_lzma_default, 0, 0)); +#endif + + IF_LZMA (DO_TEST (secondary_lzma, 0, 1)); + IF_DJW (DO_TEST (secondary_huff, 0, DJW_MAX_GROUPS)); + IF_FGK (DO_TEST (secondary_fgk, 0, 1)); + + DO_TEST (compressed_stream_overflow, 0, 0); + IF_LZMA (DO_TEST (compressed_stream_overflow, XD3_SEC_LZMA, 0)); + +failure: + test_cleanup (); + return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +#undef DO_TEST +} diff --git a/lib/xdelta3/xdelta3.1 b/lib/xdelta3/xdelta3.1 new file mode 100644 index 0000000..693171e --- /dev/null +++ b/lib/xdelta3/xdelta3.1 @@ -0,0 +1,153 @@ +.TH XDELTA3 "1" "August 2009" "Xdelta3" +.SH NAME +xdelta3 \- VCDIFF (RFC 3284) binary diff tool +.SH SYNOPSIS +.B xdelta3 +.RI [ command ] +.RI [ options ] +.RI [ input +.RI [ output ]] +.SH DESCRIPTION +.B xdelta3 +is a binary diff tool that uses the VCDIFF (RFC 3284) format and compression. +.SH COMMANDS +.TP +.BI config +prints xdelta3 configuration +.TP +.BI decode +decompress the input, also set by -d +.TP +.BI encode +compress the input, also set by -e (default) +.TP +.BI test +run the builtin tests +.TP +.BI printdelta +print information about the entire delta +.TP +.BI printhdr +print information about the first window +.TP +.BI printhdrs +print information about all windows +.TP +.BI recode +encode with new application/secondary settings + +.SH OPTIONS +standard options: +.TP +.BI "\-0 .. \-9" +compression level +.TP +.BI "\-c" +use stdout +.TP +.BI "\-d" +decompress +.TP +.BI \-e +compress +.TP +.BI \-f +force overwrite +.TP +.BI \-h +show help +.TP +.BI \-q +be quiet +.TP +.BI \-v +be verbose (max 2) +.TP +.BI \-V +show version + +.TP +memory options: +.TP +.BI \-B +.RI bytes +source window size +.TP +.BI \-W +.RI bytes +input window size +.TP +.BI \-P +.RI size +compression duplicates window +.TP +.BI \-I +.RI size +instruction buffer size (0 = unlimited) + +.TP +compression options: +.TP +.BI \-s +.RI source +source file to copy from (if any) +.TP +.BI "\-S " [djw|fgk] +enable/disable secondary compression +.TP +.BI \-N +disable small string-matching compression +.TP +.BI \-D +disable external decompression (encode/decode) +.TP +.BI \-R +disable external recompression (decode) +.TP +.BI \-n +disable checksum (encode/decode) +.TP +.BI \-C +soft config (encode, undocumented) +.TP +.BI "\-A " [apphead] +disable/provide application header (encode) +.TP +.BI \-J +disable output (check/compute only) +.TP +.BI \-T +use alternate code table (test) + +.SH NOTES +The +.B XDELTA +environment variable may contain extra args: + +.RS +XDELTA="-s source-x.y.tar.gz" \\ +.br +tar --use-compress-program=xdelta3 -cf \\ +.br +target-x.z.tar.gz.vcdiff target-x.y/ + +.SH EXAMPLES + +Compress the differences between SOURCE and TARGET, yielding OUT, +using "djw" secondary compression: + +xdelta3 -S djw -s SOURCE TARGET OUT + +Do the same, using standard input and output: + +xdelta3 -S djw -s SOURCE < TARGET > OUT + +To decompress OUT, using SOURCE, yielding TARGET: + +xdelta3 -d -s SOURCE OUT TARGET + +.SH AUTHOR +xdelta3 was written by Josh MacDonald . +.PP +This manual page was written by Leo 'costela' Antunes +for the Debian project (but may be used by others). diff --git a/lib/xdelta3/xdelta3.c b/lib/xdelta3/xdelta3.c new file mode 100644 index 0000000..ef518cd --- /dev/null +++ b/lib/xdelta3/xdelta3.c @@ -0,0 +1,4819 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + ------------------------------------------------------------------- + + Xdelta 3 + + The goal of this library is to to implement both the (stand-alone) + data-compression and delta-compression aspects of VCDIFF encoding, and + to support a programming interface that works like Zlib + (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic + Differencing and Compression Data Format. + + VCDIFF is a unified encoding that combines data-compression and + delta-encoding ("differencing"). + + VCDIFF has a detailed byte-code instruction set with many features. + The instruction format supports an immediate size operand for small + COPYs and ADDs (e.g., under 18 bytes). There are also instruction + "modes", which are used to compress COPY addresses by using two + address caches. An instruction mode refers to slots in the NEAR + and SAME caches for recent addresses. NEAR remembers the + previous 4 (by default) COPY addresses, and SAME catches + frequent re-uses of the same address using a 3-way (by default) + 256-entry associative cache of [ADDR mod 256], the encoded byte. + A hit in the NEAR/SAME cache requires 0/1 ADDR bytes. + + VCDIFF has a default instruction table, but an alternate + instruction tables may themselves be be delta-compressed and + included in the encoding header. This allows even more freedom. + There are 9 instruction modes in the default code table, 4 near, 3 + same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the + current position). + + ---------------------------------------------------------------------- + + Algorithms + + Aside from the details of encoding and decoding, there are a bunch + of algorithms needed. + + 1. STRING-MATCH. A two-level fingerprinting approach is used. A + single loop computes the two checksums -- small and large -- at + successive offsets in the TARGET file. The large checksum is more + accurate and is used to discover SOURCE matches, which are + potentially very long. The small checksum is used to discover + copies within the TARGET. Small matching, which is more expensive, + usually dominates the large STRING-MATCH costs in this code - the + more exhaustive the search, the better the results. Either of the + two string-matching mechanisms may be disabled. + + 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue + used to store overlapping copy instructions. There are two possible + optimizations that go beyond a greedy search. Both of these fall + into the category of "non-greedy matching" optimizations. + + The first optimization stems from backward SOURCE-COPY matching. + When a new SOURCE-COPY instruction covers a previous instruction in + the target completely, it is erased from the queue. Randal Burns + originally analyzed these algorithms and did a lot of related work + (\cite the 1.5-pass algorithm). + + The second optimization comes by the encoding of common very-small + COPY and ADD instructions, for which there are special DOUBLE-code + instructions, which code two instructions in a single byte. + + The cost of bad instruction-selection overhead is relatively high + for data-compression, relative to delta-compression, so this second + optimization is fairly important. With "lazy" matching (the name + used in Zlib for a similar optimization), the string-match + algorithm searches after a match for potential overlapping copy + instructions. In Xdelta and by default, VCDIFF, the minimum match + size is 4 bytes, whereas Zlib searches with a 3-byte minimum. This + feature, combined with double instructions, provides a nice + challenge. Search in this file for "black magic", a heuristic. + + 3. STREAM ALIGNMENT. Stream alignment is needed to compress large + inputs in constant space. See xd3_srcwin_move_point(). + + 4. WINDOW SELECTION. When the IOPT buffer flushes, in the first call + to xd3_iopt_finish_encoding containing any kind of copy instruction, + the parameters of the source window must be decided: the offset into + the source and the length of the window. Since the IOPT buffer is + finite, the program may be forced to fix these values before knowing + the best offset/length. + + 5. SECONDARY COMPRESSION. VCDIFF supports a secondary encoding to + be applied to the individual sections of the data format, which are + ADDRess, INSTruction, and DATA. Several secondary compressor + variations are implemented here, although none is standardized yet. + + One is an adaptive huffman algorithm -- the FGK algorithm (Faller, + Gallager, and Knuth, 1985). This compressor is extremely slow. + + The other is a simple static Huffman routine, which is the base + case of a semi-adaptive scheme published by D.J. Wheeler and first + widely used in bzip2 (by Julian Seward). This is a very + interesting algorithm, originally published in nearly cryptic form + by D.J. Wheeler. !!!NOTE!!! Because these are not standardized, + secondary compression remains off by default. + ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps} + -------------------------------------------------------------------- + + Other Features + + 1. USER CONVENIENCE + + For user convenience, it is essential to recognize Gzip-compressed + files and automatically Gzip-decompress them prior to + delta-compression (or else no delta-compression will be achieved + unless the user manually decompresses the inputs). The compressed + represention competes with Xdelta, and this must be hidden from the + command-line user interface. The Xdelta-1.x encoding was simple, not + compressed itself, so Xdelta-1.x uses Zlib internally to compress the + representation. + + This implementation supports external compression, which implements + the necessary fork() and pipe() mechanics. There is a tricky step + involved to support automatic detection of a compressed input in a + non-seekable input. First you read a bit of the input to detect + magic headers. When a compressed format is recognized, exec() the + external compression program and create a second child process to + copy the original input stream. [Footnote: There is a difficulty + related to using Gzip externally. It is not possible to decompress + and recompress a Gzip file transparently. If FILE.GZ had a + cryptographic signature, then, after: (1) Gzip-decompression, (2) + Xdelta-encoding, (3) Gzip-compression the signature could be + broken. The only way to solve this problem is to guess at Gzip's + compression level or control it by other means. I recommend that + specific implementations of any compression scheme store + information needed to exactly re-compress the input, that way + external compression is transparent - however, this won't happen + here until it has stabilized.] + + 2. APPLICATION-HEADER + + This feature was introduced in RFC3284. It allows any application + to include a header within the VCDIFF file format. This allows + general inter-application data exchange with support for + application-specific extensions to communicate metadata. + + 3. VCDIFF CHECKSUM + + An optional checksum value is included with each window, which can + be used to validate the final result. This verifies the correct source + file was used for decompression as well as the obvious advantage: + checking the implementation (and underlying) correctness. + + 4. LIGHT WEIGHT + + The code makes efforts to avoid copying data more than necessary. + The code delays many initialization tasks until the first use, it + optimizes for identical (perfectly matching) inputs. It does not + compute any checksums until the first lookup misses. Memory usage + is reduced. String-matching is templatized (by slightly gross use + of CPP) to hard-code alternative compile-time defaults. The code + has few outside dependencies. + ---------------------------------------------------------------------- + + The default rfc3284 instruction table: + (see RFC for the explanation) + + TYPE SIZE MODE TYPE SIZE MODE INDEX + -------------------------------------------------------------------- + 1. Run 0 0 Noop 0 0 0 + 2. Add 0, [1,17] 0 Noop 0 0 [1,18] + 3. Copy 0, [4,18] 0 Noop 0 0 [19,34] + 4. Copy 0, [4,18] 1 Noop 0 0 [35,50] + 5. Copy 0, [4,18] 2 Noop 0 0 [51,66] + 6. Copy 0, [4,18] 3 Noop 0 0 [67,82] + 7. Copy 0, [4,18] 4 Noop 0 0 [83,98] + 8. Copy 0, [4,18] 5 Noop 0 0 [99,114] + 9. Copy 0, [4,18] 6 Noop 0 0 [115,130] + 10. Copy 0, [4,18] 7 Noop 0 0 [131,146] + 11. Copy 0, [4,18] 8 Noop 0 0 [147,162] + 12. Add [1,4] 0 Copy [4,6] 0 [163,174] + 13. Add [1,4] 0 Copy [4,6] 1 [175,186] + 14. Add [1,4] 0 Copy [4,6] 2 [187,198] + 15. Add [1,4] 0 Copy [4,6] 3 [199,210] + 16. Add [1,4] 0 Copy [4,6] 4 [211,222] + 17. Add [1,4] 0 Copy [4,6] 5 [223,234] + 18. Add [1,4] 0 Copy 4 6 [235,238] + 19. Add [1,4] 0 Copy 4 7 [239,242] + 20. Add [1,4] 0 Copy 4 8 [243,246] + 21. Copy 4 [0,8] Add 1 0 [247,255] + -------------------------------------------------------------------- + + Reading the source: Overview + + This file includes itself in several passes to macro-expand certain + sections with variable forms. Just read ahead, there's only a + little confusion. I know this sounds ugly, but hard-coding some of + the string-matching parameters results in a 10-15% increase in + string-match performance. The only time this hurts is when you have + unbalanced #if/endifs. + + A single compilation unit tames the Makefile. In short, this is to + allow the above-described hack without an explodingMakefile. The + single compilation unit includes the core library features, + configurable string-match templates, optional main() command-line + tool, misc optional features, and a regression test. Features are + controled with CPP #defines, see Makefile.am. + + The initial __XDELTA3_C_HEADER_PASS__ starts first, the _INLINE_ and + _TEMPLATE_ sections follow. Easy stuff first, hard stuff last. + + Optional features include: + + xdelta3-main.h The command-line interface, external compression + support, POSIX-specific, info & VCDIFF-debug tools. + xdelta3-second.h The common secondary compression routines. + xdelta3-decoder.h All decoding routines. + xdelta3-djw.h The semi-adaptive huffman secondary encoder. + xdelta3-fgk.h The adaptive huffman secondary encoder. + xdelta3-test.h The unit test covers major algorithms, + encoding and decoding. There are single-bit + error decoding tests. There are 32/64-bit file size + boundary tests. There are command-line tests. + There are compression tests. There are external + compression tests. There are string-matching tests. + There should be more tests... + + Additional headers include: + + xdelta3.h The public header file. + xdelta3-cfgs.h The default settings for default, built-in + encoders. These are hard-coded at + compile-time. There is also a single + soft-coded string matcher for experimenting + with arbitrary values. + xdelta3-list.h A cyclic list template + + Misc little debug utilities: + + badcopy.c Randomly modifies an input file based on two + parameters: (1) the probability that a byte in + the file is replaced with a pseudo-random value, + and (2) the mean change size. Changes are + generated using an expoential distribution + which approximates the expected error_prob + distribution. + -------------------------------------------------------------------- + + This file itself is unusually large. I hope to defend this layout + with lots of comments. Everything in this file is related to + encoding and decoding. I like it all together - the template stuff + is just a hack. */ + +#ifndef __XDELTA3_C_HEADER_PASS__ +#define __XDELTA3_C_HEADER_PASS__ + +#include "xdelta3.h" +#include "xdelta3-internal.h" + +/*********************************************************************** + STATIC CONFIGURATION + ***********************************************************************/ + +#ifndef XD3_MAIN /* the main application */ +#define XD3_MAIN 0 +#endif + +#ifndef VCDIFF_TOOLS +#define VCDIFF_TOOLS XD3_MAIN +#endif + +#ifndef SECONDARY_FGK /* one from the algorithm preservation department: */ +#define SECONDARY_FGK 0 /* adaptive Huffman routines */ +#endif + +#ifndef SECONDARY_DJW /* semi-adaptive/static Huffman for the eventual */ +#define SECONDARY_DJW 0 /* standardization, off by default until such time. */ +#endif + +#ifndef SECONDARY_LZMA +#ifdef HAVE_LZMA_H +#define SECONDARY_LZMA 1 +#else +#define SECONDARY_LZMA 0 +#endif +#endif + +#if XD3_ENCODER +#define IF_ENCODER(x) x +#else +#define IF_ENCODER(x) +#endif + +/***********************************************************************/ + + /* header indicator bits */ +#define VCD_SECONDARY (1U << 0) /* uses secondary compressor */ +#define VCD_CODETABLE (1U << 1) /* supplies code table data */ +#define VCD_APPHEADER (1U << 2) /* supplies application data */ +#define VCD_INVHDR (~0x7U) + + /* window indicator bits */ +#define VCD_SOURCE (1U << 0) /* copy window in source file */ +#define VCD_TARGET (1U << 1) /* copy window in target file */ +#define VCD_ADLER32 (1U << 2) /* has adler32 checksum */ +#define VCD_INVWIN (~0x7U) + +#define VCD_SRCORTGT (VCD_SOURCE | VCD_TARGET) + + /* delta indicator bits */ +#define VCD_DATACOMP (1U << 0) +#define VCD_INSTCOMP (1U << 1) +#define VCD_ADDRCOMP (1U << 2) +#define VCD_INVDEL (~0x7U) + +typedef enum { + VCD_DJW_ID = 1, + VCD_LZMA_ID = 2, + VCD_FGK_ID = 16 /* Note: these are not standard IANA-allocated IDs! */ +} xd3_secondary_ids; + +typedef enum { + SEC_NOFLAGS = 0, + + /* Note: SEC_COUNT_FREQS Not implemented (to eliminate 1st Huffman pass) */ + SEC_COUNT_FREQS = (1 << 0) +} xd3_secondary_flags; + +typedef enum { + DATA_SECTION, /* These indicate which section to the secondary + * compressor. */ + INST_SECTION, /* The header section is not compressed, therefore not + * listed here. */ + ADDR_SECTION +} xd3_section_type; + +typedef unsigned int xd3_rtype; + +/***********************************************************************/ + +#include "xdelta3-list.h" + +#if XD3_ENCODER +XD3_MAKELIST(xd3_rlist, xd3_rinst, link); +#endif + +/***********************************************************************/ + +#define SECONDARY_MIN_SAVINGS 2 /* Secondary compression has to save + at least this many bytes. */ +#define SECONDARY_MIN_INPUT 10 /* Secondary compression needs at + least this many bytes. */ + +#define VCDIFF_MAGIC1 0xd6 /* 1st file byte */ +#define VCDIFF_MAGIC2 0xc3 /* 2nd file byte */ +#define VCDIFF_MAGIC3 0xc4 /* 3rd file byte */ +#define VCDIFF_VERSION 0x00 /* 4th file byte */ + +#define VCD_SELF 0 /* 1st address mode */ +#define VCD_HERE 1 /* 2nd address mode */ + +#define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK || SECONDARY_LZMA) + +#define ALPHABET_SIZE 256 /* Used in test code--size of the secondary + * compressor alphabet. */ + +#define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from + * offset 0 using this offset. */ + +#define MAX_MATCH_SPLIT 18U /* VCDIFF code table: 18 is the default limit + * for direct-coded ADD sizes */ + +#define LEAST_MATCH_INCR 0 /* The least number of bytes an overlapping + * match must beat the preceding match by. This + * is a bias for the lazy match optimization. A + * non-zero value means that an adjacent match + * has to be better by more than the step + * between them. 0. */ + +#define MIN_MATCH 4U /* VCDIFF code table: MIN_MATCH=4 */ +#define MIN_RUN 8U /* The shortest run, if it is shorter than this + * an immediate add/copy will be just as good. + * ADD1/COPY6 = 1I+1D+1A bytes, RUN18 = + * 1I+1D+1A. */ + +#define MAX_MODES 9 /* Maximum number of nodes used for + * compression--does not limit decompression. */ + +#define ENC_SECTS 4 /* Number of separate output sections. */ + +#define HDR_TAIL(s) ((s)->enc_tails[0]) +#define DATA_TAIL(s) ((s)->enc_tails[1]) +#define INST_TAIL(s) ((s)->enc_tails[2]) +#define ADDR_TAIL(s) ((s)->enc_tails[3]) + +#define HDR_HEAD(s) ((s)->enc_heads[0]) +#define DATA_HEAD(s) ((s)->enc_heads[1]) +#define INST_HEAD(s) ((s)->enc_heads[2]) +#define ADDR_HEAD(s) ((s)->enc_heads[3]) + +/* Template instances. */ +#if XD3_BUILD_SLOW +#define IF_BUILD_SLOW(x) x +#else +#define IF_BUILD_SLOW(x) +#endif +#if XD3_BUILD_FAST +#define IF_BUILD_FAST(x) x +#else +#define IF_BUILD_FAST(x) +#endif +#if XD3_BUILD_FASTER +#define IF_BUILD_FASTER(x) x +#else +#define IF_BUILD_FASTER(x) +#endif +#if XD3_BUILD_FASTEST +#define IF_BUILD_FASTEST(x) x +#else +#define IF_BUILD_FASTEST(x) +#endif +#if XD3_BUILD_SOFT +#define IF_BUILD_SOFT(x) x +#else +#define IF_BUILD_SOFT(x) +#endif +#if XD3_BUILD_DEFAULT +#define IF_BUILD_DEFAULT(x) x +#else +#define IF_BUILD_DEFAULT(x) +#endif + +/* Update the run-length state */ +#define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } \ + else { run_c = (c); run_l = 1; } } while (0) + +/* This CPP-conditional stuff can be cleaned up... */ +#if REGRESSION_TEST +#define IF_REGRESSION(x) x +#else +#define IF_REGRESSION(x) +#endif + +/***********************************************************************/ + +#if XD3_ENCODER +static void* xd3_alloc0 (xd3_stream *stream, + usize_t elts, + usize_t size); + + +static int xd3_alloc_iopt (xd3_stream *stream, usize_t elts); + +static void xd3_free_output (xd3_stream *stream, + xd3_output *output); + +static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first, + xd3_rinst *second, uint8_t code); +static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single, + uint8_t code); + +static usize_t xd3_sizeof_output (xd3_output *output); +static void xd3_encode_reset (xd3_stream *stream); + +static int xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos); +static int xd3_source_extend_match (xd3_stream *stream); +static int xd3_srcwin_setup (xd3_stream *stream); +static usize_t xd3_iopt_last_matched (xd3_stream *stream); +static int xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, + uint32_t num); + +static usize_t xd3_smatch (xd3_stream *stream, + usize_t base, + usize_t scksum, + usize_t *match_offset); +static int xd3_string_match_init (xd3_stream *stream); +static uint32_t xd3_scksum (uint32_t *state, const uint8_t *seg, + const usize_t ln); +static usize_t xd3_comprun (const uint8_t *seg, usize_t slook, uint8_t *run_cp); +static int xd3_srcwin_move_point (xd3_stream *stream, + usize_t *next_move_point); + +static int xd3_emit_run (xd3_stream *stream, usize_t pos, + usize_t size, uint8_t *run_c); +static xoff_t xd3_source_cksum_offset(xd3_stream *stream, usize_t low); +static void xd3_scksum_insert (xd3_stream *stream, + usize_t inx, + usize_t scksum, + usize_t pos); + + +#if XD3_DEBUG +static void xd3_verify_run_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_run_l, + uint8_t *x_run_c); +static void xd3_verify_large_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_cksum); +static void xd3_verify_small_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum); + +#endif /* XD3_DEBUG */ +#endif /* XD3_ENCODER */ + +static int xd3_decode_allocate (xd3_stream *stream, usize_t size, + uint8_t **copied1, usize_t *alloc1); + +static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size); +static void xd3_free (xd3_stream *stream, void *ptr); + +const char* xd3_strerror (int ret) +{ + switch (ret) + { + case XD3_INPUT: return "XD3_INPUT"; + case XD3_OUTPUT: return "XD3_OUTPUT"; + case XD3_GETSRCBLK: return "XD3_GETSRCBLK"; + case XD3_GOTHEADER: return "XD3_GOTHEADER"; + case XD3_WINSTART: return "XD3_WINSTART"; + case XD3_WINFINISH: return "XD3_WINFINISH"; + case XD3_TOOFARBACK: return "XD3_TOOFARBACK"; + case XD3_INTERNAL: return "XD3_INTERNAL"; + case XD3_INVALID: return "XD3_INVALID"; + case XD3_INVALID_INPUT: return "XD3_INVALID_INPUT"; + case XD3_NOSECOND: return "XD3_NOSECOND"; + case XD3_UNIMPLEMENTED: return "XD3_UNIMPLEMENTED"; + } + return NULL; +} + +/***********************************************************************/ + +#define xd3_sec_data(s) ((s)->sec_stream_d) +#define xd3_sec_inst(s) ((s)->sec_stream_i) +#define xd3_sec_addr(s) ((s)->sec_stream_a) + +struct _xd3_sec_type +{ + uint8_t id; + const char *name; + xd3_secondary_flags flags; + + /* xd3_sec_stream is opaque to the generic code */ + xd3_sec_stream* (*alloc) (xd3_stream *stream); + void (*destroy) (xd3_stream *stream, + xd3_sec_stream *sec); + int (*init) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + int is_encode); + int (*decode) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + const uint8_t **input, + const uint8_t *input_end, + uint8_t **output, + const uint8_t *output_end); +#if XD3_ENCODER + int (*encode) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); +#endif +}; + +#define BIT_STATE_ENCODE_INIT { 0, 1 } +#define BIT_STATE_DECODE_INIT { 0, 0x100 } + +typedef struct _bit_state bit_state; +struct _bit_state +{ + uint8_t cur_byte; + usize_t cur_mask; +}; + +#if SECONDARY_ANY == 0 +#define IF_SEC(x) +#define IF_NSEC(x) x +#else /* yuck */ +#define IF_SEC(x) x +#define IF_NSEC(x) +static int +xd3_decode_secondary (xd3_stream *stream, + xd3_desect *sect, + xd3_sec_stream **sec_streamp); +#if XD3_ENCODER +static int +xd3_encode_secondary (xd3_stream *stream, + xd3_output **head, + xd3_output **tail, + xd3_sec_stream **sec_streamp, + xd3_sec_cfg *cfg, + int *did_it); +#endif +#endif /* SECONDARY_ANY */ + +#if SECONDARY_FGK +extern const xd3_sec_type fgk_sec_type; +#define IF_FGK(x) x +#define FGK_CASE(s) \ + s->sec_type = & fgk_sec_type; \ + break; +#else +#define IF_FGK(x) +#define FGK_CASE(s) \ + s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \ + return XD3_INTERNAL; +#endif + +#if SECONDARY_DJW +extern const xd3_sec_type djw_sec_type; +#define IF_DJW(x) x +#define DJW_CASE(s) \ + s->sec_type = & djw_sec_type; \ + break; +#else +#define IF_DJW(x) +#define DJW_CASE(s) \ + s->msg = "unavailable secondary compressor: DJW Static Huffman"; \ + return XD3_INTERNAL; +#endif + +#if SECONDARY_LZMA +extern const xd3_sec_type lzma_sec_type; +#define IF_LZMA(x) x +#define LZMA_CASE(s) \ + s->sec_type = & lzma_sec_type; \ + break; +#else +#define IF_LZMA(x) +#define LZMA_CASE(s) \ + s->msg = "unavailable secondary compressor: LZMA"; \ + return XD3_INTERNAL; +#endif + +/***********************************************************************/ + +#include "xdelta3-hash.h" + +/* Process template passes - this includes xdelta3.c several times. */ +#define __XDELTA3_C_TEMPLATE_PASS__ +#include "xdelta3-cfgs.h" +#undef __XDELTA3_C_TEMPLATE_PASS__ + +/* Process the inline pass. */ +#define __XDELTA3_C_INLINE_PASS__ +#include "xdelta3.c" +#undef __XDELTA3_C_INLINE_PASS__ + +/* Secondary compression */ +#if SECONDARY_ANY +#include "xdelta3-second.h" +#endif + +#if SECONDARY_FGK +#include "xdelta3-fgk.h" +const xd3_sec_type fgk_sec_type = +{ + VCD_FGK_ID, + "FGK Adaptive Huffman", + SEC_NOFLAGS, + (xd3_sec_stream* (*)(xd3_stream*)) fgk_alloc, + (void (*)(xd3_stream*, xd3_sec_stream*)) fgk_destroy, + (int (*)(xd3_stream*, xd3_sec_stream*, int)) fgk_init, + (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*, + uint8_t**, const uint8_t*)) xd3_decode_fgk, + IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*, + xd3_output*, xd3_sec_cfg*)) xd3_encode_fgk) +}; +#endif + +#if SECONDARY_DJW +#include "xdelta3-djw.h" +const xd3_sec_type djw_sec_type = +{ + VCD_DJW_ID, + "Static Huffman", + SEC_COUNT_FREQS, + (xd3_sec_stream* (*)(xd3_stream*)) djw_alloc, + (void (*)(xd3_stream*, xd3_sec_stream*)) djw_destroy, + (int (*)(xd3_stream*, xd3_sec_stream*, int)) djw_init, + (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*, + uint8_t**, const uint8_t*)) xd3_decode_huff, + IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*, + xd3_output*, xd3_sec_cfg*)) xd3_encode_huff) +}; +#endif + +#if SECONDARY_LZMA +#include "xdelta3-lzma.h" +const xd3_sec_type lzma_sec_type = +{ + VCD_LZMA_ID, + "lzma", + SEC_NOFLAGS, + (xd3_sec_stream* (*)(xd3_stream*)) xd3_lzma_alloc, + (void (*)(xd3_stream*, xd3_sec_stream*)) xd3_lzma_destroy, + (int (*)(xd3_stream*, xd3_sec_stream*, int)) xd3_lzma_init, + (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*, + uint8_t**, const uint8_t*)) xd3_decode_lzma, + IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*, + xd3_output*, xd3_sec_cfg*)) xd3_encode_lzma) +}; +#endif + +#if XD3_MAIN || PYTHON_MODULE || SWIG_MODULE || NOT_MAIN +#include "xdelta3-main.h" +#endif + +#if REGRESSION_TEST +#include "xdelta3-test.h" +#endif + +#endif /* __XDELTA3_C_HEADER_PASS__ */ +#ifdef __XDELTA3_C_INLINE_PASS__ + +/**************************************************************** + Instruction tables + *****************************************************************/ + +/* The following code implements a parametrized description of the + * code table given above for a few reasons. It is not necessary for + * implementing the standard, to support compression with variable + * tables, so an implementation is only required to know the default + * code table to begin decompression. (If the encoder uses an + * alternate table, the table is included in compressed form inside + * the VCDIFF file.) + * + * Before adding variable-table support there were two functions which + * were hard-coded to the default table above. + * xd3_compute_default_table() would create the default table by + * filling a 256-elt array of xd3_dinst values. The corresponding + * function, xd3_choose_instruction(), would choose an instruction + * based on the hard-coded parameters of the default code table. + * + * Notes: The parametrized code table description here only generates + * tables of a certain regularity similar to the default table by + * allowing to vary the distribution of single- and + * double-instructions and change the number of near and same copy + * modes. More exotic tables are only possible by extending this + * code. + * + * For performance reasons, both the parametrized and non-parametrized + * versions of xd3_choose_instruction remain. The parametrized + * version is only needed for testing multi-table decoding support. + * If ever multi-table encoding is required, this can be optimized by + * compiling static functions for each table. + */ + +/* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the + * table description when GENERIC_ENCODE_TABLES are in use. The + * IF_GENCODETBL macro enables generic-code-table specific code + * (removed 10/2014). */ +#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) \ + xd3_choose_instruction (prev, inst) + +/* This structure maintains information needed by + * xd3_choose_instruction to compute the code for a double instruction + * by first indexing an array of code_table_sizes by copy mode, then + * using (offset + (muliplier * X)) */ +struct _xd3_code_table_sizes { + uint8_t cpy_max; + uint8_t offset; + uint8_t mult; +}; + +/* This contains a complete description of a code table. */ +struct _xd3_code_table_desc +{ + /* Assumes a single RUN instruction */ + /* Assumes that MIN_MATCH is 4 */ + + uint8_t add_sizes; /* Number of immediate-size single + adds (default 17) */ + uint8_t near_modes; /* Number of near copy modes (default 4) */ + uint8_t same_modes; /* Number of same copy modes (default 3) */ + uint8_t cpy_sizes; /* Number of immediate-size single + copies (default 15) */ + + uint8_t addcopy_add_max; /* Maximum add size for an add-copy + double instruction, all modes + (default 4) */ + uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy + double instruction, up through + VCD_NEAR modes (default 6) */ + uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy + double instruction, VCD_SAME modes + (default 4) */ + + uint8_t copyadd_add_max; /* Maximum add size for a copy-add + double instruction, all modes + (default 1) */ + uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add + double instruction, up through + VCD_NEAR modes (default 4) */ + uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add + double instruction, VCD_SAME modes + (default 4) */ + + xd3_code_table_sizes addcopy_max_sizes[MAX_MODES]; + xd3_code_table_sizes copyadd_max_sizes[MAX_MODES]; +}; + +/* The rfc3284 code table is represented: */ +static const xd3_code_table_desc __rfc3284_code_table_desc = { + 17, /* add sizes */ + 4, /* near modes */ + 3, /* same modes */ + 15, /* copy sizes */ + + 4, /* add-copy max add */ + 6, /* add-copy max cpy, near */ + 4, /* add-copy max cpy, same */ + + 1, /* copy-add max add */ + 4, /* copy-add max cpy, near */ + 4, /* copy-add max cpy, same */ + + /* addcopy */ + { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3}, + {4,235,1},{4,239,1},{4,243,1} }, + /* copyadd */ + { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1}, + {4,253,1},{4,254,1},{4,255,1} }, +}; + +/* Computes code table entries of TBL using the specified description. */ +static void +xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl) +{ + uint8_t size1, size2; + uint8_t mode; + usize_t cpy_modes = 2U + desc->near_modes + desc->same_modes; + xd3_dinst *d = tbl; + + (d++)->type1 = XD3_RUN; + (d++)->type1 = XD3_ADD; + + for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1) + { + d->type1 = XD3_ADD; + d->size1 = size1; + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + (d++)->type1 = XD3_CPY + mode; + + for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; + size1 += 1, d += 1) + { + d->type1 = XD3_CPY + mode; + d->size1 = size1; + } + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1) + { + usize_t max = (mode < 2U + desc->near_modes) ? + desc->addcopy_near_cpy_max : + desc->addcopy_same_cpy_max; + + for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1) + { + d->type1 = XD3_ADD; + d->size1 = size1; + d->type2 = XD3_CPY + mode; + d->size2 = size2; + } + } + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + usize_t max = (mode < 2U + desc->near_modes) ? + desc->copyadd_near_cpy_max : + desc->copyadd_same_cpy_max; + + for (size1 = MIN_MATCH; size1 <= max; size1 += 1) + { + for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1) + { + d->type1 = XD3_CPY + mode; + d->size1 = size1; + d->type2 = XD3_ADD; + d->size2 = size2; + } + } + } + + XD3_ASSERT (d - tbl == 256); +} + +/* This function generates the static default code table. */ +static const xd3_dinst* +xd3_rfc3284_code_table (void) +{ + static xd3_dinst __rfc3284_code_table[256]; + + if (__rfc3284_code_table[0].type1 != XD3_RUN) + { + xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table); + } + + return __rfc3284_code_table; +} + +#if XD3_ENCODER +/* This version of xd3_choose_instruction is hard-coded for the default + table. */ +static void +xd3_choose_instruction (xd3_rinst *prev, xd3_rinst *inst) +{ + switch (inst->type) + { + case XD3_RUN: + inst->code1 = 0; + break; + + case XD3_ADD: + inst->code1 = 1; + + if (inst->size <= 17) + { + inst->code1 += inst->size; + + if ( (inst->size == 1) && + (prev != NULL) && + (prev->size == 4) && + (prev->type >= XD3_CPY) ) + { + prev->code2 = 247 + (prev->type - XD3_CPY); + } + } + + break; + + default: + { + uint8_t mode = inst->type - XD3_CPY; + + XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12); + + inst->code1 = 19 + 16 * mode; + + if (inst->size <= 18 && inst->size >= 4) + { + inst->code1 += inst->size - 3; + + if ( (prev != NULL) && + (prev->type == XD3_ADD) && + (prev->size <= 4) ) + { + if ( (inst->size <= 6) && + (mode <= 5) ) + { + prev->code2 = (uint8_t)(163 + (mode * 12) + + (3 * (prev->size - 1)) + + (inst->size - 4)); + XD3_ASSERT (prev->code2 <= 234); + } + else if ( (inst->size == 4) && + (mode >= 6) ) + { + prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1); + + XD3_ASSERT (prev->code2 <= 246); + } + } + } + + XD3_ASSERT (inst->code1 <= 162); + } + break; + } +} +#endif /* XD3_ENCODER */ + +/***********************************************************************/ + +static inline void +xd3_swap_uint8p (uint8_t** p1, uint8_t** p2) +{ + uint8_t *t = (*p1); + (*p1) = (*p2); + (*p2) = t; +} + +static inline void +xd3_swap_usize_t (usize_t* p1, usize_t* p2) +{ + usize_t t = (*p1); + (*p1) = (*p2); + (*p2) = t; +} + +/* It's not constant time, but it computes the log. */ +static int +xd3_check_pow2 (xoff_t value, usize_t *logof) +{ + xoff_t x = 1; + usize_t nolog; + if (logof == NULL) { + logof = &nolog; + } + + *logof = 0; + + for (; x != 0; x <<= 1, *logof += 1) + { + if (x == value) + { + return 0; + } + } + + return XD3_INTERNAL; +} + +usize_t +xd3_pow2_roundup (usize_t x) +{ + usize_t i = 1; + while (x > i) { + i <<= 1U; + } + return i; +} + +static xoff_t +xd3_xoff_roundup (xoff_t x) +{ + xoff_t i = 1; + while (x > i) { + i <<= 1U; + } + return i; +} + +static usize_t +xd3_round_blksize (usize_t sz, usize_t blksz) +{ + usize_t mod = sz & (blksz-1); + + XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0); + + if (mod == 0) + { + return sz; + } + + if (sz > USIZE_T_MAXBLKSZ) + { + return USIZE_T_MAXBLKSZ; + } + + return sz + (blksz - mod); +} + +/*********************************************************************** + Adler32 stream function: code copied from Zlib, defined in RFC1950 + ***********************************************************************/ + +#define A32_BASE 65521L /* Largest prime smaller than 2^16 */ +#define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + + (n+1)(BASE-1) <= 2^32-1 */ + +#define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1); +#define A32_DO4(buf,i) A32_DO2(buf,i); A32_DO2(buf,i+2); +#define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4); +#define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8); + +static uint32_t adler32 (uint32_t adler, const uint8_t *buf, usize_t len) +{ + uint32_t s1 = adler & 0xffffU; + uint32_t s2 = (adler >> 16) & 0xffffU; + int k; + + while (len > 0) + { + k = (len < A32_NMAX) ? len : A32_NMAX; + len -= k; + + while (k >= 16) + { + A32_DO16(buf); + buf += 16; + k -= 16; + } + + if (k != 0) + { + do + { + s1 += *buf++; + s2 += s1; + } + while (--k); + } + + s1 %= A32_BASE; + s2 %= A32_BASE; + } + + return (s2 << 16) | s1; +} + +/*********************************************************************** + Run-length function + ***********************************************************************/ + +#if XD3_ENCODER +static usize_t +xd3_comprun (const uint8_t *seg, usize_t slook, uint8_t *run_cp) +{ + usize_t i; + usize_t run_l = 0; + uint8_t run_c = 0; + + for (i = 0; i < slook; i += 1) + { + NEXTRUN(seg[i]); + } + + (*run_cp) = run_c; + + return run_l; +} +#endif + +/*********************************************************************** + Basic encoder/decoder functions + ***********************************************************************/ + +#if XD3_ENCODER +inline int +xd3_emit_byte (xd3_stream *stream, + xd3_output **outputp, + uint8_t code) +{ + xd3_output *output = (*outputp); + + if (output->next == output->avail) + { + xd3_output *aoutput; + + if ((aoutput = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + output = (*outputp) = aoutput; + } + + output->base[output->next++] = code; + + return 0; +} + +inline int +xd3_emit_bytes (xd3_stream *stream, + xd3_output **outputp, + const uint8_t *base, + usize_t size) +{ + xd3_output *output = (*outputp); + + do + { + usize_t take; + + if (output->next == output->avail) + { + xd3_output *aoutput; + + if ((aoutput = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + output = (*outputp) = aoutput; + } + + take = xd3_min (output->avail - output->next, size); + + memcpy (output->base + output->next, base, (size_t) take); + + output->next += take; + size -= take; + base += take; + } + while (size > 0); + + return 0; +} +#endif /* XD3_ENCODER */ + +/*********************************************************************** + Address cache stuff + ***********************************************************************/ + +static int +xd3_alloc_cache (xd3_stream *stream) +{ + if (stream->acache.near_array != NULL) + { + xd3_free (stream, stream->acache.near_array); + } + + if (stream->acache.same_array != NULL) + { + xd3_free (stream, stream->acache.same_array); + } + + if (((stream->acache.s_near > 0) && + (stream->acache.near_array = (usize_t*) + xd3_alloc (stream, stream->acache.s_near, + (usize_t) sizeof (usize_t))) + == NULL) || + ((stream->acache.s_same > 0) && + (stream->acache.same_array = (usize_t*) + xd3_alloc (stream, stream->acache.s_same * 256, + (usize_t) sizeof (usize_t))) + == NULL)) + { + return ENOMEM; + } + + return 0; +} + +void +xd3_init_cache (xd3_addr_cache* acache) +{ + if (acache->s_near > 0) + { + memset (acache->near_array, 0, acache->s_near * sizeof (usize_t)); + acache->next_slot = 0; + } + + if (acache->s_same > 0) + { + memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t)); + } +} + +static void +xd3_update_cache (xd3_addr_cache* acache, usize_t addr) +{ + if (acache->s_near > 0) + { + acache->near_array[acache->next_slot] = addr; + acache->next_slot = (acache->next_slot + 1) % acache->s_near; + } + + if (acache->s_same > 0) + { + acache->same_array[addr % (acache->s_same*256)] = addr; + } +} + +#if XD3_ENCODER +/* OPT: this gets called a lot, can it be optimized? */ +static int +xd3_encode_address (xd3_stream *stream, + usize_t addr, + usize_t here, + uint8_t* mode) +{ + usize_t d, bestd; + usize_t i, bestm; + int ret; + xd3_addr_cache* acache = & stream->acache; + +#define SMALLEST_INT(x) do { if (((x) & ~127U) == 0) { goto good; } } while (0) + + /* Attempt to find the address mode that yields the smallest integer value + * for "d", the encoded address value, thereby minimizing the encoded size + * of the address. */ + bestd = addr; + bestm = VCD_SELF; + + XD3_ASSERT (addr < here); + + SMALLEST_INT (bestd); + + if ((d = here-addr) < bestd) + { + bestd = d; + bestm = VCD_HERE; + + SMALLEST_INT (bestd); + } + + for (i = 0; i < acache->s_near; i += 1) + { + /* Note: If we used signed computation here, we'd could compte d + * and then check (d >= 0 && d < bestd). */ + if (addr >= acache->near_array[i]) + { + d = addr - acache->near_array[i]; + + if (d < bestd) + { + bestd = d; + bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */ + + SMALLEST_INT (bestd); + } + } + } + + if (acache->s_same > 0 && + acache->same_array[d = addr%(acache->s_same*256)] == addr) + { + bestd = d%256; + /* 2 + s_near offsets past the VCD_NEAR modes */ + bestm = acache->s_near + 2 + d/256; + + if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) + { + return ret; + } + } + else + { + good: + + if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) + { + return ret; + } + } + + xd3_update_cache (acache, addr); + + (*mode) += bestm; + + return 0; +} +#endif + +static int +xd3_decode_address (xd3_stream *stream, usize_t here, + usize_t mode, const uint8_t **inpp, + const uint8_t *max, usize_t *valp) +{ + int ret; + usize_t same_start = 2 + stream->acache.s_near; + + if (mode < same_start) + { + if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; } + + switch (mode) + { + case VCD_SELF: + break; + case VCD_HERE: + (*valp) = here - (*valp); + break; + default: + (*valp) += stream->acache.near_array[mode - 2]; + break; + } + } + else + { + if (*inpp == max) + { + stream->msg = "address underflow"; + return XD3_INVALID_INPUT; + } + + mode -= same_start; + + (*valp) = stream->acache.same_array[mode*256 + (**inpp)]; + + (*inpp) += 1; + } + + xd3_update_cache (& stream->acache, *valp); + + return 0; +} + +/*********************************************************************** + Alloc/free +***********************************************************************/ + +static void* +__xd3_alloc_func (void* opaque, size_t items, usize_t size) +{ + return malloc (items * (size_t) size); +} + +static void +__xd3_free_func (void* opaque, void* address) +{ + free (address); +} + +static void* +xd3_alloc (xd3_stream *stream, + usize_t elts, + usize_t size) +{ + void *a = stream->alloc (stream->opaque, elts, size); + + if (a != NULL) + { + IF_DEBUG (stream->alloc_cnt += 1); + IF_DEBUG2 (DP(RINT "[stream %p malloc] size %"W"u ptr %p\n", + (void*)stream, elts * size, a)); + } + else + { + stream->msg = "out of memory"; + } + + return a; +} + +static void +xd3_free (xd3_stream *stream, + void *ptr) +{ + if (ptr != NULL) + { + IF_DEBUG (stream->free_cnt += 1); + XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt); + IF_DEBUG2 (DP(RINT "[stream %p free] %p\n", + (void*)stream, ptr)); + stream->free (stream->opaque, ptr); + } +} + +#if XD3_ENCODER +static void* +xd3_alloc0 (xd3_stream *stream, + usize_t elts, + usize_t size) +{ + void *a = xd3_alloc (stream, elts, size); + + if (a != NULL) + { + memset (a, 0, (size_t) (elts * size)); + } + + return a; +} + +xd3_output* +xd3_alloc_output (xd3_stream *stream, + xd3_output *old_output) +{ + xd3_output *output; + uint8_t *base; + + if (stream->enc_free != NULL) + { + output = stream->enc_free; + stream->enc_free = output->next_page; + } + else + { + if ((output = (xd3_output*) xd3_alloc (stream, 1, + (usize_t) sizeof (xd3_output))) + == NULL) + { + return NULL; + } + + if ((base = (uint8_t*) xd3_alloc (stream, XD3_ALLOCSIZE, + sizeof (uint8_t))) == NULL) + { + xd3_free (stream, output); + return NULL; + } + + output->base = base; + output->avail = XD3_ALLOCSIZE; + } + + output->next = 0; + + if (old_output) + { + old_output->next_page = output; + } + + output->next_page = NULL; + + return output; +} + +static usize_t +xd3_sizeof_output (xd3_output *output) +{ + usize_t s = 0; + + for (; output; output = output->next_page) + { + s += output->next; + } + + return s; +} + +static void +xd3_freelist_output (xd3_stream *stream, + xd3_output *output) +{ + xd3_output *tmp; + + while (output) + { + tmp = output; + output = output->next_page; + + tmp->next = 0; + tmp->next_page = stream->enc_free; + stream->enc_free = tmp; + } +} + +static void +xd3_free_output (xd3_stream *stream, + xd3_output *output) +{ + xd3_output *next; + + again: + if (output == NULL) + { + return; + } + + next = output->next_page; + + xd3_free (stream, output->base); + xd3_free (stream, output); + + output = next; + goto again; +} +#endif /* XD3_ENCODER */ + +void +xd3_free_stream (xd3_stream *stream) +{ + xd3_iopt_buflist *blist = stream->iopt_alloc; + + while (blist != NULL) + { + xd3_iopt_buflist *tmp = blist; + blist = blist->next; + xd3_free (stream, tmp->buffer); + xd3_free (stream, tmp); + } + +#if XD3_ENCODER + xd3_free (stream, stream->large_table); + xd3_free (stream, stream->small_table); + xd3_free (stream, stream->large_hash.powers); + xd3_free (stream, stream->small_hash.powers); + xd3_free (stream, stream->small_prev); + + { + int i; + for (i = 0; i < ENC_SECTS; i += 1) + { + xd3_free_output (stream, stream->enc_heads[i]); + } + xd3_free_output (stream, stream->enc_free); + } +#endif + + xd3_free (stream, stream->acache.near_array); + xd3_free (stream, stream->acache.same_array); + + xd3_free (stream, stream->inst_sect.copied1); + xd3_free (stream, stream->addr_sect.copied1); + xd3_free (stream, stream->data_sect.copied1); + + if (stream->dec_lastwin != stream->dec_buffer) + { + xd3_free (stream, (uint8_t*) stream->dec_lastwin); + } + xd3_free (stream, stream->dec_buffer); + + xd3_free (stream, stream->buf_in); + xd3_free (stream, stream->dec_appheader); + xd3_free (stream, stream->dec_codetbl); + xd3_free (stream, stream->code_table_alloc); + +#if SECONDARY_ANY + xd3_free (stream, stream->inst_sect.copied2); + xd3_free (stream, stream->addr_sect.copied2); + xd3_free (stream, stream->data_sect.copied2); + + if (stream->sec_type != NULL) + { + stream->sec_type->destroy (stream, stream->sec_stream_d); + stream->sec_type->destroy (stream, stream->sec_stream_i); + stream->sec_type->destroy (stream, stream->sec_stream_a); + } +#endif + + xd3_free (stream, stream->whole_target.adds); + xd3_free (stream, stream->whole_target.inst); + xd3_free (stream, stream->whole_target.wininfo); + + XD3_ASSERT (stream->alloc_cnt == stream->free_cnt); + + memset (stream, 0, sizeof (xd3_stream)); +} + +#if (XD3_DEBUG > 1 || VCDIFF_TOOLS) +static const char* +xd3_rtype_to_string (xd3_rtype type, int print_mode) +{ + switch (type) + { + case XD3_NOOP: + return "NOOP "; + case XD3_RUN: + return "RUN "; + case XD3_ADD: + return "ADD "; + default: break; + } + if (! print_mode) + { + return "CPY "; + } + switch (type) + { + case XD3_CPY + 0: return "CPY_0"; + case XD3_CPY + 1: return "CPY_1"; + case XD3_CPY + 2: return "CPY_2"; + case XD3_CPY + 3: return "CPY_3"; + case XD3_CPY + 4: return "CPY_4"; + case XD3_CPY + 5: return "CPY_5"; + case XD3_CPY + 6: return "CPY_6"; + case XD3_CPY + 7: return "CPY_7"; + case XD3_CPY + 8: return "CPY_8"; + case XD3_CPY + 9: return "CPY_9"; + default: return "CPY>9"; + } +} +#endif + +/**************************************************************** + Stream configuration + ******************************************************************/ + +int +xd3_config_stream(xd3_stream *stream, + xd3_config *config) +{ + int ret; + xd3_config defcfg; + xd3_smatcher *smatcher = &stream->smatcher; + + if (config == NULL) + { + config = & defcfg; + memset (config, 0, sizeof (*config)); + } + + /* Initial setup: no error checks yet */ + memset (stream, 0, sizeof (*stream)); + + stream->winsize = config->winsize ? config->winsize : XD3_DEFAULT_WINSIZE; + stream->sprevsz = config->sprevsz ? config->sprevsz : XD3_DEFAULT_SPREVSZ; + + if (config->iopt_size == 0) + { + stream->iopt_size = XD3_ALLOCSIZE / sizeof(xd3_rinst); + stream->iopt_unlimited = 1; + } + else + { + stream->iopt_size = config->iopt_size; + } + + stream->getblk = config->getblk; + stream->alloc = config->alloc ? config->alloc : __xd3_alloc_func; + stream->free = config->freef ? config->freef : __xd3_free_func; + stream->opaque = config->opaque; + stream->flags = config->flags; + + /* Secondary setup. */ + stream->sec_data = config->sec_data; + stream->sec_inst = config->sec_inst; + stream->sec_addr = config->sec_addr; + + stream->sec_data.data_type = DATA_SECTION; + stream->sec_inst.data_type = INST_SECTION; + stream->sec_addr.data_type = ADDR_SECTION; + + /* Check static sizes. */ + if (sizeof (usize_t) != SIZEOF_USIZE_T || + sizeof (xoff_t) != SIZEOF_XOFF_T || + (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL))) + { + stream->msg = "incorrect compilation: wrong integer sizes"; + return XD3_INTERNAL; + } + + /* Check/set secondary compressor. */ + switch (stream->flags & XD3_SEC_TYPE) + { + case 0: + if (stream->flags & XD3_SEC_NOALL) + { + stream->msg = "XD3_SEC flags require a secondary compressor type"; + return XD3_INTERNAL; + } + break; + case XD3_SEC_FGK: + FGK_CASE (stream); + case XD3_SEC_DJW: + DJW_CASE (stream); + case XD3_SEC_LZMA: + LZMA_CASE (stream); + default: + stream->msg = "too many secondary compressor types set"; + return XD3_INTERNAL; + } + + stream->code_table_desc = & __rfc3284_code_table_desc; + stream->code_table_func = xd3_rfc3284_code_table; + + /* Check sprevsz */ + if (smatcher->small_chain == 1 && + smatcher->small_lchain == 1) + { + stream->sprevsz = 0; + } + else + { + if ((ret = xd3_check_pow2 (stream->sprevsz, NULL))) + { + stream->msg = "sprevsz is required to be a power of two"; + return XD3_INTERNAL; + } + + stream->sprevmask = stream->sprevsz - 1; + } + + /* Default scanner settings. */ +#if XD3_ENCODER + switch (config->smatch_cfg) + { + IF_BUILD_SOFT(case XD3_SMATCH_SOFT: + { + *smatcher = config->smatcher_soft; + smatcher->string_match = __smatcher_soft.string_match; + smatcher->name = __smatcher_soft.name; + if (smatcher->large_look < MIN_MATCH || + smatcher->large_step < 1 || + smatcher->small_look < MIN_MATCH) + { + stream->msg = "invalid soft string-match config"; + return XD3_INVALID; + } + break; + }) + + IF_BUILD_DEFAULT(case XD3_SMATCH_DEFAULT: + *smatcher = __smatcher_default; + break;) + IF_BUILD_SLOW(case XD3_SMATCH_SLOW: + *smatcher = __smatcher_slow; + break;) + IF_BUILD_FASTEST(case XD3_SMATCH_FASTEST: + *smatcher = __smatcher_fastest; + break;) + IF_BUILD_FASTER(case XD3_SMATCH_FASTER: + *smatcher = __smatcher_faster; + break;) + IF_BUILD_FAST(case XD3_SMATCH_FAST: + *smatcher = __smatcher_fast; + break;) + default: + stream->msg = "invalid string match config type"; + return XD3_INTERNAL; + } + + if (config->smatch_cfg == XD3_SMATCH_DEFAULT && + (stream->flags & XD3_COMPLEVEL_MASK) != 0) + { + int level = (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT; + + switch (level) + { + case 1: + IF_BUILD_FASTEST(*smatcher = __smatcher_fastest; + break;) + case 2: + IF_BUILD_FASTER(*smatcher = __smatcher_faster; + break;) + case 3: case 4: case 5: + IF_BUILD_FAST(*smatcher = __smatcher_fast; + break;) + case 6: + IF_BUILD_DEFAULT(*smatcher = __smatcher_default; + break;) + default: + IF_BUILD_SLOW(*smatcher = __smatcher_slow; + break;) + IF_BUILD_DEFAULT(*smatcher = __smatcher_default; + break;) + IF_BUILD_FAST(*smatcher = __smatcher_fast; + break;) + IF_BUILD_FASTER(*smatcher = __smatcher_faster; + break;) + IF_BUILD_FASTEST(*smatcher = __smatcher_fastest; + break;) + } + } +#endif + + return 0; +} + +/*********************************************************** + Getblk interface + ***********************************************************/ + +inline +xoff_t xd3_source_eof(const xd3_source *src) +{ + xoff_t r = (src->max_blkno << src->shiftby) + (xoff_t)src->onlastblk; + return r; +} + +inline +usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno) +{ + usize_t r = (blkno == src->max_blkno ? + src->onlastblk : + src->blksize); + return r; +} + +/* This function interfaces with the client getblk function, checks + * its results, updates max_blkno, onlastblk, eof_known. */ +static int +xd3_getblk (xd3_stream *stream, xoff_t blkno) +{ + int ret; + xd3_source *source = stream->src; + + if (source->curblk == NULL || blkno != source->curblkno) + { + source->getblkno = blkno; + + if (stream->getblk == NULL) + { + IF_DEBUG2 (DP(RINT "[getblk] XD3_GETSRCBLK %"Q"u\n", blkno)); + stream->msg = "getblk source input"; + return XD3_GETSRCBLK; + } + + ret = stream->getblk (stream, source, blkno); + if (ret != 0) + { + IF_DEBUG2 (DP(RINT "[getblk] app error blkno %"Q"u: %s\n", + blkno, xd3_strerror (ret))); + return ret; + } + + IF_DEBUG2 (DP(RINT "[getblk] read source block %"Q"u onblk " + "%"W"u blksize %"W"u max_blkno %"Q"u\n", blkno, source->onblk, + source->blksize, source->max_blkno)); + } + + if (blkno > source->max_blkno) + { + source->max_blkno = blkno; + + if (source->onblk == source->blksize) + { + IF_DEBUG1 (DP(RINT "[getblk] full source blkno %"Q"u: " + "source length unknown %"Q"u\n", + blkno, + xd3_source_eof (source))); + } + else if (!source->eof_known) + { + IF_DEBUG1 (DP(RINT "[getblk] eof block has %"W"u bytes; " + "source length known %"Q"u\n", + xd3_bytes_on_srcblk (source, blkno), + xd3_source_eof (source))); + source->eof_known = 1; + } + } + + XD3_ASSERT (source->curblk != NULL); + + if (blkno == source->max_blkno) + { + /* In case the application sets the source as 1 block w/ a + * preset buffer. */ + source->onlastblk = source->onblk; + } + return 0; +} + +/*********************************************************** + Stream open/close + ***************************************************************/ + +int +xd3_set_source (xd3_stream *stream, + xd3_source *src) +{ + usize_t shiftby; + + stream->src = src; + src->srclen = 0; + src->srcbase = 0; + + /* Enforce power-of-two blocksize so that source-block number + * calculations are cheap. */ + if (xd3_check_pow2 (src->blksize, &shiftby) != 0) + { + src->blksize = xd3_pow2_roundup(src->blksize); + xd3_check_pow2 (src->blksize, &shiftby); + IF_DEBUG1 (DP(RINT "raising src_blksz to %"W"u\n", src->blksize)); + } + + src->shiftby = shiftby; + src->maskby = (1ULL << shiftby) - 1ULL; + + if (xd3_check_pow2 (src->max_winsize, NULL) != 0) + { + src->max_winsize = xd3_xoff_roundup(src->max_winsize); + IF_DEBUG1 (DP(RINT "raising src_maxsize to %"W"u\n", src->blksize)); + } + src->max_winsize = xd3_max (src->max_winsize, XD3_ALLOCSIZE); + return 0; +} + +int +xd3_set_source_and_size (xd3_stream *stream, + xd3_source *user_source, + xoff_t source_size) { + int ret = xd3_set_source (stream, user_source); + if (ret == 0) + { + stream->src->eof_known = 1; + IF_DEBUG2 (DP(RINT "[set source] size known %"Q"u\n", + source_size)); + xd3_blksize_div(source_size, + stream->src, + &stream->src->max_blkno, + &stream->src->onlastblk); + + IF_DEBUG1 (DP(RINT "[set source] size known %"Q"u max_blkno %"Q"u\n", + source_size, stream->src->max_blkno)); + } + return ret; +} + +void +xd3_abort_stream (xd3_stream *stream) +{ + stream->dec_state = DEC_ABORTED; + stream->enc_state = ENC_ABORTED; +} + +int +xd3_close_stream (xd3_stream *stream) +{ + if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED) + { + if (stream->buf_leftover != NULL) + { + stream->msg = "encoding is incomplete"; + return XD3_INTERNAL; + } + + if (stream->enc_state == ENC_POSTWIN) + { +#if XD3_ENCODER + xd3_encode_reset (stream); +#endif + stream->current_window += 1; + stream->enc_state = ENC_INPUT; + } + + /* If encoding, should be ready for more input but not actually + have any. */ + if (stream->enc_state != ENC_INPUT || stream->avail_in != 0) + { + stream->msg = "encoding is incomplete"; + return XD3_INTERNAL; + } + } + else + { + switch (stream->dec_state) + { + case DEC_VCHEAD: + case DEC_WININD: + /* TODO: Address the zero-byte ambiguity. Does the encoder + * emit a window or not? If so, then catch an error here. + * If not, need another routine to say + * decode_at_least_one_if_empty. */ + case DEC_ABORTED: + break; + default: + /* If decoding, should be ready for the next window. */ + stream->msg = "eof in decode"; + return XD3_INVALID_INPUT; + } + } + + return 0; +} + +/************************************************************** + Application header + ****************************************************************/ + +int +xd3_get_appheader (xd3_stream *stream, + uint8_t **data, + usize_t *size) +{ + if (stream->dec_state < DEC_WININD) + { + stream->msg = "application header not available"; + return XD3_INTERNAL; + } + + (*data) = stream->dec_appheader; + (*size) = stream->dec_appheadsz; + return 0; +} + +/********************************************************** + Decoder stuff + *************************************************/ + +#include "xdelta3-decode.h" + +/**************************************************************** + Encoder stuff + *****************************************************************/ + +#if XD3_ENCODER +void +xd3_set_appheader (xd3_stream *stream, + const uint8_t *data, + usize_t size) +{ + stream->enc_appheader = data; + stream->enc_appheadsz = size; +} + +#if XD3_DEBUG +static int +xd3_iopt_check (xd3_stream *stream) +{ + usize_t ul = xd3_rlist_length (& stream->iopt_used); + usize_t fl = xd3_rlist_length (& stream->iopt_free); + + return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size; +} +#endif + +static xd3_rinst* +xd3_iopt_free (xd3_stream *stream, xd3_rinst *i) +{ + xd3_rinst *n = xd3_rlist_remove (i); + xd3_rlist_push_back (& stream->iopt_free, i); + return n; +} + +static void +xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i) +{ + if (i->type != XD3_ADD) + { + xd3_rlist_push_back (& stream->iopt_free, i); + } +} + +/* When an instruction is ready to flush from the iopt buffer, this + * function is called to produce an encoding. It writes the + * instruction plus size, address, and data to the various encoding + * sections. */ +static int +xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst) +{ + int ret; + + /* Check for input overflow. */ + XD3_ASSERT (inst->pos + inst->size <= stream->avail_in); + + switch (inst->type) + { + case XD3_CPY: + { + /* the address may have an offset if there is a source window. */ + usize_t addr; + xd3_source *src = stream->src; + + if (src != NULL) + { + /* If there is a source copy, the source must have its + * source window decided before we can encode. This can + * be bad -- we have to make this decision even if no + * source matches have been found. */ + if (stream->srcwin_decided == 0) + { + if ((ret = xd3_srcwin_setup (stream))) { return ret; } + } + else + { + stream->srcwin_decided_early = (!stream->src->eof_known || + (stream->srcwin_cksum_pos < + xd3_source_eof (stream->src))); + } + + /* xtra field indicates the copy is from the source */ + if (inst->xtra) + { + XD3_ASSERT (inst->addr >= src->srcbase); + XD3_ASSERT (inst->addr + inst->size <= + src->srcbase + src->srclen); + addr = inst->addr - src->srcbase; + stream->n_scpy += 1; + stream->l_scpy += inst->size; + } + else + { + /* with source window: target copy address is offset + * by taroff. */ + addr = stream->taroff + inst->addr; + stream->n_tcpy += 1; + stream->l_tcpy += inst->size; + } + } + else + { + addr = inst->addr; + stream->n_tcpy += 1; + stream->l_tcpy += inst->size; + } + + /* Note: used to assert inst->size >= MIN_MATCH, but not true + * for merge operations & identical match heuristics. */ + /* the "here" position is always offset by taroff */ + if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, + & inst->type))) + { + return ret; + } + + IF_DEBUG2 ({ + static int cnt; + DP(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %"W"u\n", + cnt++, + stream->total_in + inst->pos, + stream->total_in + inst->pos + inst->size, + inst->addr, inst->addr + inst->size, inst->size); + }); + break; + } + case XD3_RUN: + { + if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; } + + stream->n_run += 1; + stream->l_run += inst->size; + + IF_DEBUG2 ({ + static int cnt; + DP(RINT "[iopt run:%d] pos %"Q"u size %"W"u\n", cnt++, stream->total_in + inst->pos, inst->size); + }); + break; + } + case XD3_ADD: + { + if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream), + stream->next_in + inst->pos, inst->size))) { return ret; } + + stream->n_add += 1; + stream->l_add += inst->size; + + IF_DEBUG2 ({ + static int cnt; + DP(RINT "[iopt add:%d] pos %"Q"u size %"W"u\n", cnt++, stream->total_in + inst->pos, inst->size); + }); + + break; + } + } + + /* This is the only place stream->unencoded_offset is incremented. */ + XD3_ASSERT (stream->unencoded_offset == inst->pos); + stream->unencoded_offset += inst->size; + + inst->code2 = 0; + + XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst); + + if (stream->iout != NULL) + { + if (stream->iout->code2 != 0) + { + if ((ret = xd3_emit_double (stream, stream->iout, inst, + stream->iout->code2))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + xd3_iopt_free_nonadd (stream, inst); + stream->iout = NULL; + return 0; + } + else + { + if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + } + } + + stream->iout = inst; + + return 0; +} + +/* This possibly encodes an add instruction, iadd, which must remain + * on the stack until the following call to + * xd3_iopt_finish_encoding. */ +static int +xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd) +{ + int ret; + usize_t off = stream->unencoded_offset; + + if (pos > off) + { + iadd->type = XD3_ADD; + iadd->pos = off; + iadd->size = pos - off; + + if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; } + } + + return 0; +} + +/* This function calls xd3_iopt_finish_encoding to finish encoding an + * instruction, and it may also produce an add instruction for an + * unmatched region. */ +static int +xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst) +{ + int ret; + xd3_rinst iadd; + + if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; } + + if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; } + + return 0; +} + +/* Generates a final add instruction to encode the remaining input. */ +static int +xd3_iopt_add_finalize (xd3_stream *stream) +{ + int ret; + xd3_rinst iadd; + + if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; } + + if (stream->iout) + { + if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + stream->iout = NULL; + } + + return 0; +} + +/* Compact the instruction buffer by choosing the best non-overlapping + * instructions when lazy string-matching. There are no ADDs in the + * iopt buffer because those are synthesized in xd3_iopt_add_encoding + * and during xd3_iopt_add_finalize. */ +static int +xd3_iopt_flush_instructions (xd3_stream *stream, int force) +{ + xd3_rinst *r1 = xd3_rlist_front (& stream->iopt_used); + xd3_rinst *r2; + xd3_rinst *r3; + usize_t r1end; + usize_t r2end; + usize_t r2off; + usize_t r2moff; + usize_t gap; + usize_t flushed; + int ret; + + XD3_ASSERT (xd3_iopt_check (stream)); + + /* Note: once tried to skip this step if it's possible to assert + * there are no overlapping instructions. Doesn't work because + * xd3_opt_erase leaves overlapping instructions. */ + while (! xd3_rlist_end (& stream->iopt_used, r1) && + ! xd3_rlist_end (& stream->iopt_used, r2 = xd3_rlist_next (r1))) + { + r1end = r1->pos + r1->size; + + /* If the instructions do not overlap, continue. */ + if (r1end <= r2->pos) + { + r1 = r2; + continue; + } + + r2end = r2->pos + r2->size; + + /* The min_match adjustments prevent this. */ + XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR)); + + /* If r3 is available... */ + if (! xd3_rlist_end (& stream->iopt_used, r3 = xd3_rlist_next (r2))) + { + /* If r3 starts before r1 finishes or just about, r2 is irrelevant */ + if (r3->pos <= r1end + 1) + { + xd3_iopt_free (stream, r2); + continue; + } + } + else if (! force) + { + /* Unless force, end the loop when r3 is not available. */ + break; + } + + r2off = r2->pos - r1->pos; + r2moff = r2end - r1end; + gap = r2end - r1->pos; + + /* If the two matches overlap almost entirely, choose the better match + * and discard the other. The else branch can still create inefficient + * copies, e.g., a 4-byte copy that takes 4 bytes to encode, which + * xd3_smatch() wouldn't allow by its crude efficiency check. However, + * in this case there are adjacent copies which mean the add would cost + * one extra byte. Allow the inefficiency here. */ + if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2) + { + /* Only one match should be used, choose the longer one. */ + if (r1->size < r2->size) + { + xd3_iopt_free (stream, r1); + r1 = r2; + } + else + { + /* We are guaranteed that r1 does not overlap now, so advance past r2 */ + r1 = xd3_iopt_free (stream, r2); + } + continue; + } + else + { + /* Shorten one of the instructions -- could be optimized + * based on the address cache. */ + usize_t average; + usize_t newsize; + usize_t adjust1; + + XD3_ASSERT (r1end > r2->pos && r2end > r1->pos); + + /* Try to balance the length of both instructions, but avoid + * making both longer than MAX_MATCH_SPLIT . */ + average = gap / 2; + newsize = xd3_min (MAX_MATCH_SPLIT, gap - average); + + /* Should be possible to simplify this code. */ + if (newsize > r1->size) + { + /* shorten r2 */ + adjust1 = r1end - r2->pos; + } + else if (newsize > r2->size) + { + /* shorten r1 */ + adjust1 = r1end - r2->pos; + + XD3_ASSERT (r1->size > adjust1); + + r1->size -= adjust1; + + /* don't shorten r2 */ + adjust1 = 0; + } + else + { + /* shorten r1 */ + adjust1 = r1->size - newsize; + + if (r2->pos > r1end - adjust1) + { + adjust1 -= r2->pos - (r1end - adjust1); + } + + XD3_ASSERT (r1->size > adjust1); + + r1->size -= adjust1; + + /* shorten r2 */ + XD3_ASSERT (r1->pos + r1->size >= r2->pos); + + adjust1 = r1->pos + r1->size - r2->pos; + } + + /* Fallthrough above if-else, shorten r2 */ + XD3_ASSERT (r2->size > adjust1); + + r2->size -= adjust1; + r2->pos += adjust1; + r2->addr += adjust1; + + XD3_ASSERT (r1->size >= MIN_MATCH); + XD3_ASSERT (r2->size >= MIN_MATCH); + + r1 = r2; + } + } + + XD3_ASSERT (xd3_iopt_check (stream)); + + /* If forcing, pick instructions until the list is empty, otherwise + * this empties 50% of the queue. */ + for (flushed = 0; ! xd3_rlist_empty (& stream->iopt_used); ) + { + xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt_used); + if ((ret = xd3_iopt_add_encoding (stream, renc))) + { + return ret; + } + + if (! force) + { + if (++flushed > stream->iopt_size / 2) + { + break; + } + + /* If there are only two instructions remaining, break, + * because they were not optimized. This means there were + * more than 50% eliminated by the loop above. */ + r1 = xd3_rlist_front (& stream->iopt_used); + if (xd3_rlist_end(& stream->iopt_used, r1) || + xd3_rlist_end(& stream->iopt_used, r2 = xd3_rlist_next (r1)) || + xd3_rlist_end(& stream->iopt_used, r3 = xd3_rlist_next (r2))) + { + break; + } + } + } + + XD3_ASSERT (xd3_iopt_check (stream)); + + XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt_used) == 0); + + return 0; +} + +static int +xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr) +{ + xd3_rinst *i; + int ret; + + if (xd3_rlist_empty (& stream->iopt_free)) + { + if (stream->iopt_unlimited) + { + usize_t elts = XD3_ALLOCSIZE / sizeof(xd3_rinst); + + if ((ret = xd3_alloc_iopt (stream, elts))) + { + return ret; + } + + stream->iopt_size += elts; + } + else + { + if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; } + + XD3_ASSERT (! xd3_rlist_empty (& stream->iopt_free)); + } + } + + i = xd3_rlist_pop_back (& stream->iopt_free); + + xd3_rlist_push_back (& stream->iopt_used, i); + + (*iptr) = i; + + ++stream->i_slots_used; + + return 0; +} + +/* A copy is about to be emitted that extends backwards to POS, + * therefore it may completely cover some existing instructions in the + * buffer. If an instruction is completely covered by this new match, + * erase it. If the new instruction is covered by the previous one, + * return 1 to skip it. */ +static void +xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size) +{ + while (! xd3_rlist_empty (& stream->iopt_used)) + { + xd3_rinst *r = xd3_rlist_back (& stream->iopt_used); + + /* Verify that greedy is working. The previous instruction + * should end before the new one begins. */ + XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos)); + /* Verify that min_match is working. The previous instruction + * should end before the new one ends. */ + XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size)); + + /* See if the last instruction starts before the new + * instruction. If so, there is nothing to erase. */ + if (r->pos < pos) + { + return; + } + + /* Otherwise, the new instruction covers the old one, delete it + and repeat. */ + xd3_rlist_remove (r); + xd3_rlist_push_back (& stream->iopt_free, r); + --stream->i_slots_used; + } +} + +/* This function tells the last matched input position. */ +static usize_t +xd3_iopt_last_matched (xd3_stream *stream) +{ + xd3_rinst *r; + + if (xd3_rlist_empty (& stream->iopt_used)) + { + return 0; + } + + r = xd3_rlist_back (& stream->iopt_used); + + return r->pos + r->size; +} + +/********************************************************* + Emit routines + ***********************************************************/ + +static int +xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint8_t code) +{ + int has_size = stream->code_table[code].size1 == 0; + int ret; + + IF_DEBUG2 (DP(RINT "[emit1] %"W"u %s (%"W"u) code %u\n", + single->pos, + xd3_rtype_to_string ((xd3_rtype) single->type, 0), + single->size, + code)); + + if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) + { + return ret; + } + + if (has_size) + { + if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) + { + return ret; + } + } + + return 0; +} + +static int +xd3_emit_double (xd3_stream *stream, xd3_rinst *first, + xd3_rinst *second, uint8_t code) +{ + int ret; + + /* All double instructions use fixed sizes, so all we need to do is + * output the instruction code, no sizes. */ + XD3_ASSERT (stream->code_table[code].size1 != 0 && + stream->code_table[code].size2 != 0); + + if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) + { + return ret; + } + + IF_DEBUG2 (DP(RINT "[emit2]: %"W"u %s (%"W"u) %s (%"W"u) code %u\n", + first->pos, + xd3_rtype_to_string ((xd3_rtype) first->type, 0), + first->size, + xd3_rtype_to_string ((xd3_rtype) second->type, 0), + second->size, + code)); + + return 0; +} + +/* This enters a potential run instruction into the iopt buffer. The + * position argument is relative to the target window. */ +static int +xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t *run_c) +{ + xd3_rinst* ri; + int ret; + + if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } + + ri->type = XD3_RUN; + ri->xtra = *run_c; + ri->pos = pos; + ri->size = size; + + return 0; +} + +/* This enters a potential copy instruction into the iopt buffer. The + * position argument is relative to the target window.. */ +int +xd3_found_match (xd3_stream *stream, usize_t pos, + usize_t size, xoff_t addr, int is_source) +{ + xd3_rinst* ri; + int ret; + + if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } + + ri->type = XD3_CPY; + ri->xtra = is_source; + ri->pos = pos; + ri->size = size; + ri->addr = addr; + + return 0; +} + +static int +xd3_emit_hdr (xd3_stream *stream) +{ + int ret; + int use_secondary = stream->sec_type != NULL; + int use_adler32 = stream->flags & (XD3_ADLER32 | XD3_ADLER32_RECODE); + int vcd_source = xd3_encoder_used_source (stream); + uint8_t win_ind = 0; + uint8_t del_ind = 0; + usize_t enc_len; + usize_t tgt_len; + usize_t data_len; + usize_t inst_len; + usize_t addr_len; + + if (stream->current_window == 0) + { + uint8_t hdr_ind = 0; + int use_appheader = stream->enc_appheader != NULL; + + if (use_secondary) { hdr_ind |= VCD_SECONDARY; } + if (use_appheader) { hdr_ind |= VCD_APPHEADER; } + + if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC1)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC2)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC3)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_VERSION)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0) + { + return ret; + } + + /* Secondary compressor ID */ +#if SECONDARY_ANY + if (use_secondary && + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + stream->sec_type->id))) + { + return ret; + } +#endif + + /* Application header */ + if (use_appheader) + { + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + stream->enc_appheadsz)) || + (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), + stream->enc_appheader, + stream->enc_appheadsz))) + { + return ret; + } + } + } + + /* try to compress this window */ +#if SECONDARY_ANY + if (use_secondary) + { + int data_sec = 0; + int inst_sec = 0; + int addr_sec = 0; + +# define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \ + ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \ + (ret = xd3_encode_secondary (stream, \ + & UPPER ## _HEAD (stream), \ + & UPPER ## _TAIL (stream), \ + & xd3_sec_ ## LOWER (stream), \ + & stream->sec_ ## LOWER, \ + & LOWER ## _sec))) + + if (ENCODE_SECONDARY_SECTION (DATA, data) || + ENCODE_SECONDARY_SECTION (INST, inst) || + ENCODE_SECONDARY_SECTION (ADDR, addr)) + { + return ret; + } + + del_ind |= (data_sec ? VCD_DATACOMP : 0); + del_ind |= (inst_sec ? VCD_INSTCOMP : 0); + del_ind |= (addr_sec ? VCD_ADDRCOMP : 0); + } +#endif + + /* if (vcd_target) { win_ind |= VCD_TARGET; } */ + if (vcd_source) { win_ind |= VCD_SOURCE; } + if (use_adler32) { win_ind |= VCD_ADLER32; } + + /* window indicator */ + if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) + { + return ret; + } + + /* source window */ + if (vcd_source) + { + /* or (vcd_target) { ... } */ + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + stream->src->srclen)) || + (ret = xd3_emit_offset (stream, & HDR_TAIL (stream), + stream->src->srcbase))) { return ret; } + } + + tgt_len = stream->avail_in; + data_len = xd3_sizeof_output (DATA_HEAD (stream)); + inst_len = xd3_sizeof_output (INST_HEAD (stream)); + addr_len = xd3_sizeof_output (ADDR_HEAD (stream)); + + /* The enc_len field is a redundency for future extensions. */ + enc_len = (1 + (xd3_sizeof_size (tgt_len) + + xd3_sizeof_size (data_len) + + xd3_sizeof_size (inst_len) + + xd3_sizeof_size (addr_len)) + + data_len + + inst_len + + addr_len + + (use_adler32 ? 4 : 0)); + + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len))) + { + return ret; + } + + if (use_adler32) + { + uint8_t send[4]; + uint32_t a32; + + if (stream->flags & XD3_ADLER32) + { + a32 = adler32 (1L, stream->next_in, stream->avail_in); + } + else + { + a32 = stream->recode_adler32; + } + + /* Four bytes. */ + send[0] = (uint8_t) (a32 >> 24); + send[1] = (uint8_t) (a32 >> 16); + send[2] = (uint8_t) (a32 >> 8); + send[3] = (uint8_t) (a32 & 0x000000FFU); + + if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) + { + return ret; + } + } + + return 0; +} + +/**************************************************************** + Encode routines + ****************************************************************/ + +static int +xd3_encode_buffer_leftover (xd3_stream *stream) +{ + usize_t take; + usize_t room; + + /* Allocate the buffer. */ + if (stream->buf_in == NULL && + (stream->buf_in = (uint8_t*) xd3_alloc (stream, stream->winsize, 1)) == NULL) + { + return ENOMEM; + } + + IF_DEBUG2 (DP(RINT "[leftover] flush?=%s\n", (stream->flags & XD3_FLUSH) ? "yes" : "no")); + + /* Take leftover input first. */ + if (stream->buf_leftover != NULL) + { + XD3_ASSERT (stream->buf_avail == 0); + XD3_ASSERT (stream->buf_leftavail < stream->winsize); + + IF_DEBUG2 (DP(RINT "[leftover] previous %"W"u avail %"W"u\n", + stream->buf_leftavail, stream->avail_in)); + + memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail); + + stream->buf_leftover = NULL; + stream->buf_avail = stream->buf_leftavail; + } + + /* Copy into the buffer. */ + room = stream->winsize - stream->buf_avail; + take = xd3_min (room, stream->avail_in); + + memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take); + + stream->buf_avail += take; + + if (take < stream->avail_in) + { + /* Buffer is full */ + stream->buf_leftover = stream->next_in + take; + stream->buf_leftavail = stream->avail_in - take; + } + else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH)) + { + /* Buffer has space */ + IF_DEBUG2 (DP(RINT "[leftover] emptied %"W"u\n", take)); + return XD3_INPUT; + } + + /* Use the buffer: */ + IF_DEBUG2 (DP(RINT "[leftover] take %"W"u remaining %"W"u\n", take, stream->buf_leftavail)); + stream->next_in = stream->buf_in; + stream->avail_in = stream->buf_avail; + stream->buf_avail = 0; + + return 0; +} + +/* Allocates one block of xd3_rlist elements */ +static int +xd3_alloc_iopt (xd3_stream *stream, usize_t elts) +{ + usize_t i; + xd3_iopt_buflist* last = + (xd3_iopt_buflist*) xd3_alloc (stream, sizeof (xd3_iopt_buflist), 1); + + if (last == NULL || + (last->buffer = (xd3_rinst*) xd3_alloc (stream, sizeof (xd3_rinst), elts)) == NULL) + { + return ENOMEM; + } + + last->next = stream->iopt_alloc; + stream->iopt_alloc = last; + + for (i = 0; i < elts; i += 1) + { + xd3_rlist_push_back (& stream->iopt_free, & last->buffer[i]); + } + + return 0; +} + +/* This function allocates all memory initially used by the encoder. */ +static int +xd3_encode_init (xd3_stream *stream, int full_init) +{ + int ret; + int i; + + if (full_init) + { + int large_comp = (stream->src != NULL); + int small_comp = ! (stream->flags & XD3_NOCOMPRESS); + + /* Memory allocations for checksum tables are delayed until + * xd3_string_match_init in the first call to string_match--that way + * identical or short inputs require no table allocation. */ + if (large_comp) + { + /* TODO Need to check for overflow here. */ + usize_t hash_values = stream->src->max_winsize / + stream->smatcher.large_step; + + if ((ret = xd3_size_hashtable (stream, + hash_values, + stream->smatcher.large_look, + & stream->large_hash))) + { + return ret; + } + } + + if (small_comp) + { + /* TODO: This is under devel: used to have min (sprevsz) here, which sort + * of makes sense, but observed fast performance w/ larger tables, which + * also sort of makes sense. @@@ */ + usize_t hash_values = stream->winsize; + + if ((ret = xd3_size_hashtable (stream, + hash_values, + stream->smatcher.small_look, + & stream->small_hash))) + { + return ret; + } + } + } + + /* data buffers */ + for (i = 0; i < ENC_SECTS; i += 1) + { + if ((stream->enc_heads[i] = + stream->enc_tails[i] = + xd3_alloc_output (stream, NULL)) == NULL) + { + return ENOMEM; + } + } + + /* iopt buffer */ + xd3_rlist_init (& stream->iopt_used); + xd3_rlist_init (& stream->iopt_free); + + if (xd3_alloc_iopt (stream, stream->iopt_size) != 0) { goto fail; } + + XD3_ASSERT (xd3_rlist_length (& stream->iopt_free) == stream->iopt_size); + XD3_ASSERT (xd3_rlist_length (& stream->iopt_used) == 0); + + /* address cache, code table */ + stream->acache.s_near = stream->code_table_desc->near_modes; + stream->acache.s_same = stream->code_table_desc->same_modes; + stream->code_table = stream->code_table_func (); + + return xd3_alloc_cache (stream); + + fail: + + return ENOMEM; +} + +int +xd3_encode_init_full (xd3_stream *stream) +{ + return xd3_encode_init (stream, 1); +} + +int +xd3_encode_init_partial (xd3_stream *stream) +{ + return xd3_encode_init (stream, 0); +} + +/* Called after the ENC_POSTOUT state, this puts the output buffers + * back into separate lists and re-initializes some variables. (The + * output lists were spliced together during the ENC_FLUSH state.) */ +static void +xd3_encode_reset (xd3_stream *stream) +{ + int i; + xd3_output *olist; + + stream->avail_in = 0; + stream->small_reset = 1; + stream->i_slots_used = 0; + + if (stream->src != NULL) + { + stream->src->srcbase = 0; + stream->src->srclen = 0; + stream->srcwin_decided = 0; + stream->srcwin_decided_early = 0; + stream->match_minaddr = 0; + stream->match_maxaddr = 0; + stream->taroff = 0; + } + + /* Reset output chains. */ + olist = stream->enc_heads[0]; + + for (i = 0; i < ENC_SECTS; i += 1) + { + XD3_ASSERT (olist != NULL); + + stream->enc_heads[i] = olist; + stream->enc_tails[i] = olist; + olist = olist->next_page; + + stream->enc_heads[i]->next = 0; + stream->enc_heads[i]->next_page = NULL; + + stream->enc_tails[i]->next_page = NULL; + stream->enc_tails[i] = stream->enc_heads[i]; + } + + xd3_freelist_output (stream, olist); +} + +/* The main encoding routine. */ +int +xd3_encode_input (xd3_stream *stream) +{ + int ret, i; + + if (stream->dec_state != 0) + { + stream->msg = "encoder/decoder transition"; + return XD3_INTERNAL; + } + + switch (stream->enc_state) + { + case ENC_INIT: + /* Only reached on first time through: memory setup. */ + if ((ret = xd3_encode_init_full (stream))) { return ret; } + + stream->enc_state = ENC_INPUT; + + case ENC_INPUT: + + /* If there is no input yet, just return. This checks for + * next_in == NULL, not avail_in == 0 since zero bytes is a + * valid input. There is an assertion in xd3_avail_input() that + * next_in != NULL for this reason. By returning right away we + * avoid creating an input buffer before the caller has supplied + * its first data. It is possible for xd3_avail_input to be + * called both before and after the first call to + * xd3_encode_input(). */ + if (stream->next_in == NULL) + { + return XD3_INPUT; + } + + enc_flush: + /* See if we should buffer the input: either if there is already + * a leftover buffer, or if the input is short of winsize + * without flush. The label at this point is reached by a goto + * below, when there is leftover input after postout. */ + if ((stream->buf_leftover != NULL) || + (stream->buf_avail != 0) || + (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH))) + { + if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; } + } + + /* Initalize the address cache before each window. */ + xd3_init_cache (& stream->acache); + + stream->input_position = 0; + stream->min_match = MIN_MATCH; + stream->unencoded_offset = 0; + + stream->enc_state = ENC_SEARCH; + + IF_DEBUG2 (DP(RINT "[WINSTART:%"Q"u] input bytes %"W"u offset %"Q"u\n", + stream->current_window, stream->avail_in, + stream->total_in)); + return XD3_WINSTART; + + case ENC_SEARCH: + IF_DEBUG2 (DP(RINT "[SEARCH] match_state %d avail_in %"W"u %s\n", + stream->match_state, stream->avail_in, + stream->src ? "source" : "no source")); + + /* Reentrant matching. */ + if (stream->src != NULL) + { + switch (stream->match_state) + { + case MATCH_TARGET: + /* Try matching forward at the start of the target. + * This is entered the first time through, to check for + * a perfect match, and whenever there is a source match + * that extends to the end of the previous window. The + * match_srcpos field is initially zero and later set + * during xd3_source_extend_match. */ + + if (stream->avail_in > 0) + { + /* This call can't fail because the source window is + * unrestricted. */ + ret = xd3_source_match_setup (stream, stream->match_srcpos); + XD3_ASSERT (ret == 0); + stream->match_state = MATCH_FORWARD; + } + else + { + stream->match_state = MATCH_SEARCHING; + stream->match_fwd = 0; + } + XD3_ASSERT (stream->match_fwd == 0); + + case MATCH_FORWARD: + case MATCH_BACKWARD: + if (stream->avail_in != 0) + { + if ((ret = xd3_source_extend_match (stream)) != 0) + { + return ret; + } + + /* The search has to make forward progress here + * or else it can get stuck in a match-backward + * (getsrcblk) then match-forward (getsrcblk), + * find insufficient match length, then repeat + + * exactly the same search. + */ + stream->input_position += stream->match_fwd; + } + + case MATCH_SEARCHING: + /* Continue string matching. (It's possible that the + * initial match continued through the entire input, in + * which case we're still in MATCH_FORWARD and should + * remain so for the next input window.) */ + break; + } + } + + /* String matching... */ + if (stream->avail_in != 0 && + (ret = stream->smatcher.string_match (stream))) + { + return ret; + } + + stream->enc_state = ENC_INSTR; + + case ENC_INSTR: + /* Note: Jump here to encode VCDIFF deltas w/o using this + * string-matching code. Merging code enters here. */ + + /* Flush the instrution buffer, then possibly add one more + * instruction, then emit the header. */ + if ((ret = xd3_iopt_flush_instructions (stream, 1)) || + (ret = xd3_iopt_add_finalize (stream))) + { + return ret; + } + + stream->enc_state = ENC_FLUSH; + + case ENC_FLUSH: + /* Note: main_recode_func() bypasses string-matching by setting + * ENC_FLUSH. */ + if ((ret = xd3_emit_hdr (stream))) + { + return ret; + } + + /* Begin output. */ + stream->enc_current = HDR_HEAD (stream); + + /* Chain all the outputs together. After doing this, it looks + * as if there is only one section. The other enc_heads are set + * to NULL to avoid freeing them more than once. */ + for (i = 1; i < ENC_SECTS; i += 1) + { + stream->enc_tails[i-1]->next_page = stream->enc_heads[i]; + stream->enc_heads[i] = NULL; + } + + enc_output: + + stream->enc_state = ENC_POSTOUT; + stream->next_out = stream->enc_current->base; + stream->avail_out = stream->enc_current->next; + stream->total_out += stream->avail_out; + + /* If there is any output in this buffer, return it, otherwise + * fall through to handle the next buffer or finish the window + * after all buffers have been output. */ + if (stream->avail_out > 0) + { + /* This is the only place xd3_encode returns XD3_OUTPUT */ + return XD3_OUTPUT; + } + + case ENC_POSTOUT: + + if (stream->avail_out != 0) + { + stream->msg = "missed call to consume output"; + return XD3_INTERNAL; + } + + /* Continue outputting one buffer at a time, until the next is NULL. */ + if ((stream->enc_current = stream->enc_current->next_page) != NULL) + { + goto enc_output; + } + + stream->total_in += stream->avail_in; + stream->enc_state = ENC_POSTWIN; + + IF_DEBUG2 (DP(RINT "[WINFINISH:%"Q"u] in=%"Q"u\n", + stream->current_window, + stream->total_in)); + return XD3_WINFINISH; + + case ENC_POSTWIN: + + xd3_encode_reset (stream); + + stream->current_window += 1; + stream->enc_state = ENC_INPUT; + + /* If there is leftover input to flush, repeat. */ + if (stream->buf_leftover != NULL) + { + goto enc_flush; + } + + /* Ready for more input. */ + return XD3_INPUT; + + default: + stream->msg = "invalid state"; + return XD3_INTERNAL; + } +} +#endif /* XD3_ENCODER */ + +/***************************************************************** + Client convenience functions + ******************************************************************/ + +int +xd3_process_stream (int is_encode, + xd3_stream *stream, + int (*func) (xd3_stream *), + int close_stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + usize_t ipos = 0; + usize_t n = xd3_min (stream->winsize, input_size); + + (*output_size) = 0; + + stream->flags |= XD3_FLUSH; + + xd3_avail_input (stream, input + ipos, n); + ipos += n; + + for (;;) + { + int ret; + switch ((ret = func (stream))) + { + case XD3_OUTPUT: { /* memcpy below */ break; } + case XD3_INPUT: { + n = xd3_min(stream->winsize, input_size - ipos); + if (n == 0) + { + goto done; + } + xd3_avail_input (stream, input + ipos, n); + ipos += n; + continue; + } + case XD3_GOTHEADER: { /* ignore */ continue; } + case XD3_WINSTART: { /* ignore */ continue; } + case XD3_WINFINISH: { /* ignore */ continue; } + case XD3_GETSRCBLK: + { + /* When the getblk function is NULL, it is necessary to + * provide the complete source as a single block using + * xd3_set_source_and_size, otherwise this error. The + * library should never ask for another source block. */ + stream->msg = "library requested source block"; + return XD3_INTERNAL; + } + case 0: + { + /* xd3_encode_input/xd3_decode_input never return 0 */ + stream->msg = "invalid return: 0"; + return XD3_INTERNAL; + } + default: + return ret; + } + + if (*output_size + stream->avail_out > output_size_max) + { + stream->msg = "insufficient output space"; + return ENOSPC; + } + + memcpy (output + *output_size, stream->next_out, stream->avail_out); + + *output_size += stream->avail_out; + + xd3_consume_output (stream); + } + done: + return (close_stream == 0) ? 0 : xd3_close_stream (stream); +} + +static int +xd3_process_memory (int is_encode, + int (*func) (xd3_stream *), + const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + xd3_stream stream; + xd3_config config; + xd3_source src; + int ret; + + memset (& stream, 0, sizeof (stream)); + memset (& config, 0, sizeof (config)); + + if (input == NULL || output == NULL) { + stream.msg = "invalid input/output buffer"; + ret = XD3_INTERNAL; + goto exit; + } + + config.flags = flags; + + if (is_encode) + { + config.winsize = xd3_min(input_size, (usize_t) XD3_DEFAULT_WINSIZE); + config.sprevsz = xd3_pow2_roundup (config.winsize); + } + + if ((ret = xd3_config_stream (&stream, &config)) != 0) + { + goto exit; + } + + if (source != NULL) + { + memset (& src, 0, sizeof (src)); + + src.blksize = source_size; + src.onblk = source_size; + src.curblk = source; + src.curblkno = 0; + src.max_winsize = source_size; + + if ((ret = xd3_set_source_and_size (&stream, &src, source_size)) != 0) + { + goto exit; + } + } + + if ((ret = xd3_process_stream (is_encode, + & stream, + func, 1, + input, input_size, + output, + output_size, + output_size_max)) != 0) + { + goto exit; + } + + exit: + if (ret != 0) + { + IF_DEBUG2 (DP(RINT "process_memory: %d: %s\n", ret, stream.msg)); + } + xd3_free_stream(&stream); + return ret; +} + +int +xd3_decode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + return xd3_process_stream (0, stream, & xd3_decode_input, 1, + input, input_size, + output, output_size, output_size_max); +} + +int +xd3_decode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + return xd3_process_memory (0, & xd3_decode_input, + input, input_size, + source, source_size, + output, output_size, output_size_max, + flags); +} + + +#if XD3_ENCODER +int +xd3_encode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + return xd3_process_stream (1, stream, & xd3_encode_input, 1, + input, input_size, + output, output_size, output_size_max); +} + +int +xd3_encode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + return xd3_process_memory (1, & xd3_encode_input, + input, input_size, + source, source_size, + output, output_size, output_size_max, + flags); +} +#endif + + +/************************************************************* + String matching helpers + *************************************************************/ + +#if XD3_ENCODER +/* Do the initial xd3_string_match() checksum table setup. + * Allocations are delayed until first use to avoid allocation + * sometimes (e.g., perfect matches, zero-length inputs). */ +static int +xd3_string_match_init (xd3_stream *stream) +{ + const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); + const int DO_LARGE = (stream->src != NULL); + + if (DO_LARGE && stream->large_table == NULL) + { + if ((stream->large_table = + (usize_t*) xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + } + + if (DO_SMALL) + { + /* Subsequent calls can return immediately after checking reset. */ + if (stream->small_table != NULL) + { + /* The target hash table is reinitialized once per window. */ + /* TODO: This would not have to be reinitialized if absolute + * offsets were being stored. */ + if (stream->small_reset) + { + stream->small_reset = 0; + memset (stream->small_table, 0, + sizeof (usize_t) * stream->small_hash.size); + } + + return 0; + } + + if ((stream->small_table = + (usize_t*) xd3_alloc0 (stream, + stream->small_hash.size, + sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + + /* If there is a previous table needed. */ + if (stream->smatcher.small_lchain > 1 || + stream->smatcher.small_chain > 1) + { + if ((stream->small_prev = + (xd3_slist*) xd3_alloc (stream, + stream->sprevsz, + sizeof (xd3_slist))) == NULL) + { + return ENOMEM; + } + } + } + + return 0; +} + +#if XD3_USE_LARGEFILE64 && !XD3_USE_LARGESIZET +/* This function handles the 32/64bit ambiguity -- file positions are 64bit + * but the hash table for source-offsets is 32bit. */ +static xoff_t +xd3_source_cksum_offset(xd3_stream *stream, usize_t low) +{ + xoff_t scp = stream->srcwin_cksum_pos; + xoff_t s0 = scp >> 32; + + usize_t sr = (usize_t) scp; + + if (s0 == 0) { + return low; + } + + /* This should not be >= because srcwin_cksum_pos is the next + * position to index. */ + if (low > sr) { + return (--s0 << 32) | low; + } + + return (s0 << 32) | low; +} +#else +static xoff_t +xd3_source_cksum_offset(xd3_stream *stream, usize_t low) +{ + return low; +} +#endif + +/* This function sets up the stream->src fields srcbase, srclen. The + * call is delayed until these values are needed to encode a copy + * address. At this point the decision has to be made. */ +static int +xd3_srcwin_setup (xd3_stream *stream) +{ + xd3_source *src = stream->src; + xoff_t length, x; + + /* Check the undecided state. */ + XD3_ASSERT (src->srclen == 0 && src->srcbase == 0); + + /* Avoid repeating this call. */ + stream->srcwin_decided = 1; + + /* If the stream is flushing, then the iopt buffer was able to + * contain the complete encoding. If no copies were issued no + * source window is actually needed. This prevents the VCDIFF + * header from including source base/len. xd3_emit_hdr checks for + * srclen == 0. */ + if (stream->enc_state == ENC_INSTR && stream->match_maxaddr == 0) + { + goto done; + } + + /* Check for overflow, srclen is usize_t - this can't happen unless + * XD3_DEFAULT_SRCBACK and related parameters are extreme - should + * use smaller windows. */ + length = stream->match_maxaddr - stream->match_minaddr; + + x = USIZE_T_MAX; + if (length > x) + { + stream->msg = "source window length overflow (not 64bit)"; + return XD3_INTERNAL; + } + + /* If ENC_INSTR, then we know the exact source window to use because + * no more copies can be issued. */ + if (stream->enc_state == ENC_INSTR) + { + src->srcbase = stream->match_minaddr; + src->srclen = (usize_t) length; + XD3_ASSERT (src->srclen); + goto done; + } + + /* Otherwise, we have to make a guess. More copies may still be + * issued, but we have to decide the source window base and length + * now. + * TODO: This may not working well in practice, more testing needed. */ + src->srcbase = stream->match_minaddr; + src->srclen = xd3_max ((usize_t) length, + stream->avail_in + (stream->avail_in >> 2)); + + if (src->eof_known) + { + /* Note: if the source size is known, we must reduce srclen or + * code that expects to pass a single block w/ getblk == NULL + * will not function, as the code will return GETSRCBLK asking + * for the second block. */ + src->srclen = xd3_min (src->srclen, xd3_source_eof(src) - src->srcbase); + } + IF_DEBUG1 (DP(RINT "[srcwin_setup_constrained] base %"Q"u len %"W"u\n", + src->srcbase, src->srclen)); + + XD3_ASSERT (src->srclen); + done: + /* Set the taroff. This convenience variable is used even when + stream->src == NULL. */ + stream->taroff = src->srclen; + return 0; +} + +/* Sets the bounding region for a newly discovered source match, prior + * to calling xd3_source_extend_match(). This sets the match_maxfwd, + * match_maxback variables. Note: srcpos is an absolute position + * (xoff_t) but the match_maxfwd, match_maxback variables are usize_t. + * Returns 0 if the setup succeeds, or 1 if the source position lies + * outside an already-decided srcbase/srclen window. */ +static int +xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos) +{ + xd3_source *const src = stream->src; + usize_t greedy_or_not; + + stream->match_maxback = 0; + stream->match_maxfwd = 0; + stream->match_back = 0; + stream->match_fwd = 0; + + /* This avoids a non-blocking endless loop caused by scanning + * backwards across a block boundary, only to find not enough + * matching bytes to beat the current min_match due to a better lazy + * target match: the re-entry to xd3_string_match() repeats the same + * long match because the input position hasn't changed. TODO: if + * ever duplicates are added to the source hash table, this logic + * won't suffice to avoid loops. See testing/regtest.cc's + * TestNonBlockingProgress test! */ + if (srcpos != 0 && srcpos == stream->match_last_srcpos) + { + IF_DEBUG2(DP(RINT "[match_setup] looping failure\n")); + goto bad; + } + + /* Implement src->max_winsize, which prevents the encoder from seeking + * back further than the LRU cache maintaining FIFO discipline, (to + * avoid seeking). */ + if (srcpos < stream->srcwin_cksum_pos && + stream->srcwin_cksum_pos - srcpos > src->max_winsize) + { + IF_DEBUG2(DP(RINT "[match_setup] rejected due to src->max_winsize " + "distance eof=%"Q"u srcpos=%"Q"u max_winsz=%"Q"u\n", + xd3_source_eof (src), + srcpos, src->max_winsize)); + goto bad; + } + + /* There are cases where the above test does not reject a match that + * will experience XD3_TOOFARBACK at the first xd3_getblk call + * because the input may have advanced up to one block beyond the + * actual EOF. */ + IF_DEBUG2(DP(RINT "[match_setup] %"Q"u srcpos %"Q"u, " + "src->max_winsize %"Q"u\n", + stream->total_in + stream->input_position, + srcpos, src->max_winsize)); + + /* Going backwards, the 1.5-pass algorithm allows some + * already-matched input may be covered by a longer source match. + * The greedy algorithm does not allow this. + * TODO: Measure this. */ + if (stream->flags & XD3_BEGREEDY) + { + /* The greedy algorithm allows backward matching to the last + * matched position. */ + greedy_or_not = xd3_iopt_last_matched (stream); + } + else + { + /* The 1.5-pass algorithm allows backward matching to go back as + * far as the unencoded offset, which is updated as instructions + * pass out of the iopt buffer. If this (default) is chosen, it + * means xd3_iopt_erase may be called to eliminate instructions + * when a covering source match is found. */ + greedy_or_not = stream->unencoded_offset; + } + + /* Backward target match limit. */ + XD3_ASSERT (stream->input_position >= greedy_or_not); + stream->match_maxback = stream->input_position - greedy_or_not; + + /* Forward target match limit. */ + XD3_ASSERT (stream->avail_in > stream->input_position); + stream->match_maxfwd = stream->avail_in - stream->input_position; + + /* Now we take the source position into account. It depends whether + * the srclen/srcbase have been decided yet. */ + if (stream->srcwin_decided == 0) + { + /* Unrestricted case: the match can cover the entire source, + * 0--src->size. We compare the usize_t + * match_maxfwd/match_maxback against the xoff_t + * src->size/srcpos values and take the min. */ + /* TODO #if XD3_USE_LARGESIZET ? */ + if (srcpos < stream->match_maxback) + { + stream->match_maxback = (usize_t) srcpos; + } + + if (src->eof_known) + { + xoff_t srcavail = xd3_source_eof (src) - srcpos; + + if (srcavail < stream->match_maxfwd) + { + stream->match_maxfwd = (usize_t) srcavail; + } + } + + IF_DEBUG2(DP(RINT + "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) " + "unrestricted maxback %"W"u maxfwd %"W"u\n", + srcpos, + stream->total_in + stream->input_position, + stream->match_maxback, + stream->match_maxfwd)); + goto good; + } + + /* Decided some source window. */ + XD3_ASSERT (src->srclen > 0); + + /* Restricted case: fail if the srcpos lies outside the source window */ + if ((srcpos < src->srcbase) || + (srcpos > (src->srcbase + src->srclen))) + { + IF_DEBUG1(DP(RINT "[match_setup] restricted source window failure\n")); + goto bad; + } + else + { + usize_t srcavail; + + srcavail = (usize_t) (srcpos - src->srcbase); + if (srcavail < stream->match_maxback) + { + stream->match_maxback = srcavail; + } + + srcavail = src->srcbase + src->srclen - srcpos; + if (srcavail < stream->match_maxfwd) + { + stream->match_maxfwd = srcavail; + } + + IF_DEBUG2(DP(RINT + "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) " + "restricted maxback %"W"u maxfwd %"W"u\n", + srcpos, + stream->total_in + stream->input_position, + stream->match_maxback, + stream->match_maxfwd)); + goto good; + } + + good: + stream->match_state = MATCH_BACKWARD; + stream->match_srcpos = srcpos; + stream->match_last_srcpos = srcpos; + return 0; + + bad: + stream->match_state = MATCH_SEARCHING; + stream->match_last_srcpos = srcpos; + return 1; +} + +static inline usize_t +xd3_forward_match(const uint8_t *s1c, const uint8_t *s2c, usize_t n) +{ + usize_t i = 0; +#if UNALIGNED_OK + usize_t nint = n / sizeof(int); + + if (nint >> 3) + { + usize_t j = 0; + const int *s1 = (const int*)s1c; + const int *s2 = (const int*)s2c; + usize_t nint_8 = nint - 8; + + while (i <= nint_8 && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++]) { } + + i = (i - 1) * sizeof(int); + } +#endif + + while (i < n && s1c[i] == s2c[i]) + { + i++; + } + return i; +} + +/* This function expands the source match backward and forward. It is + * reentrant, since xd3_getblk may return XD3_GETSRCBLK, so most + * variables are kept in xd3_stream. There are two callers of this + * function, the string_matching routine when a checksum match is + * discovered, and xd3_encode_input whenever a continuing (or initial) + * match is suspected. The two callers do different things with the + * input_position, thus this function leaves that variable untouched. + * If a match is taken the resulting stream->match_fwd is left + * non-zero. */ +static int +xd3_source_extend_match (xd3_stream *stream) +{ + int ret; + xd3_source *const src = stream->src; + xoff_t matchoff; /* matchoff is the current right/left-boundary of + the source match being tested. */ + usize_t streamoff; /* streamoff is the current right/left-boundary + of the input match being tested. */ + xoff_t tryblk; /* tryblk, tryoff are the block, offset position + of matchoff */ + usize_t tryoff; + usize_t tryrem; /* tryrem is the number of matchable bytes */ + usize_t matched; + + IF_DEBUG2(DP(RINT "[extend match] srcpos %"Q"u\n", + stream->match_srcpos)); + + XD3_ASSERT (src != NULL); + + /* Does it make sense to compute backward match AFTER forward match? */ + if (stream->match_state == MATCH_BACKWARD) + { + /* Note: this code is practically duplicated below, substituting + * match_fwd/match_back and direction. */ + matchoff = stream->match_srcpos - stream->match_back; + streamoff = stream->input_position - stream->match_back; + xd3_blksize_div (matchoff, src, &tryblk, &tryoff); + + /* this loops backward over source blocks */ + while (stream->match_back < stream->match_maxback) + { + /* see if we're backing across a source block boundary */ + if (tryoff == 0) + { + tryoff = src->blksize; + tryblk -= 1; + } + + if ((ret = xd3_getblk (stream, tryblk))) + { + if (ret == XD3_TOOFARBACK) + { + IF_DEBUG2(DP(RINT "[maxback] %"Q"u TOOFARBACK: %"W"u INP %"Q"u CKSUM %"Q"u\n", + tryblk, stream->match_back, + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos)); + + /* the starting position is too far back. */ + if (stream->match_back == 0) + { + XD3_ASSERT(stream->match_fwd == 0); + goto donefwd; + } + + /* search went too far back, continue forward. */ + goto doneback; + } + + /* could be a XD3_GETSRCBLK failure. */ + return ret; + } + + tryrem = xd3_min (tryoff, stream->match_maxback - stream->match_back); + + IF_DEBUG2(DP(RINT "[maxback] maxback %"W"u trysrc %"Q"u/%"W"u tgt %"W"u tryrem %"W"u\n", + stream->match_maxback, tryblk, tryoff, streamoff, tryrem)); + + /* TODO: This code can be optimized similar to xd3_match_forward() */ + for (; tryrem != 0; tryrem -= 1, stream->match_back += 1) + { + if (src->curblk[tryoff-1] != stream->next_in[streamoff-1]) + { + goto doneback; + } + + tryoff -= 1; + streamoff -= 1; + } + } + + doneback: + stream->match_state = MATCH_FORWARD; + } + + XD3_ASSERT (stream->match_state == MATCH_FORWARD); + + matchoff = stream->match_srcpos + stream->match_fwd; + streamoff = stream->input_position + stream->match_fwd; + xd3_blksize_div (matchoff, src, & tryblk, & tryoff); + + /* Note: practically the same code as backwards case above: same comments */ + while (stream->match_fwd < stream->match_maxfwd) + { + if (tryoff == src->blksize) + { + tryoff = 0; + tryblk += 1; + } + + if ((ret = xd3_getblk (stream, tryblk))) + { + if (ret == XD3_TOOFARBACK) + { + IF_DEBUG2(DP(RINT "[maxfwd] %"Q"u TOOFARBACK: %"W"u INP %"Q"u CKSUM %"Q"u\n", + tryblk, stream->match_fwd, + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos)); + goto donefwd; + } + + /* could be a XD3_GETSRCBLK failure. */ + return ret; + } + + tryrem = xd3_min(stream->match_maxfwd - stream->match_fwd, + src->onblk - tryoff); + + if (tryrem == 0) + { + /* Generally, this means we have a power-of-two size source + * and we just found the end-of-file, in this case it's an + * empty block. */ + XD3_ASSERT (src->onblk < src->blksize); + break; + } + + matched = xd3_forward_match(src->curblk + tryoff, + stream->next_in + streamoff, + tryrem); + tryoff += matched; + streamoff += matched; + stream->match_fwd += matched; + + if (tryrem != matched) + { + break; + } + } + + donefwd: + stream->match_state = MATCH_SEARCHING; + + IF_DEBUG2(DP(RINT "[extend match] input %"Q"u srcpos %"Q"u len %"W"u\n", + stream->input_position + stream->total_in, + stream->match_srcpos, + stream->match_fwd)); + + /* If the match ends short of the last instruction end, we probably + * don't want it. There is the possibility that a copy ends short + * of the last copy but also goes further back, in which case we + * might want it. This code does not implement such: if so we would + * need more complicated xd3_iopt_erase logic. */ + if (stream->match_fwd < stream->min_match) + { + stream->match_fwd = 0; + } + else + { + usize_t total = stream->match_fwd + stream->match_back; + + /* Correct the variables to remove match_back from the equation. */ + usize_t target_position = stream->input_position - stream->match_back; + usize_t match_length = stream->match_back + stream->match_fwd; + xoff_t match_position = stream->match_srcpos - stream->match_back; + xoff_t match_end = stream->match_srcpos + stream->match_fwd; + + /* At this point we may have to erase any iopt-buffer + * instructions that are fully covered by a backward-extending + * copy. */ + if (stream->match_back > 0) + { + xd3_iopt_erase (stream, target_position, total); + } + + stream->match_back = 0; + + /* Update ranges. The first source match occurs with both + values set to 0. */ + if (stream->match_maxaddr == 0 || + match_position < stream->match_minaddr) + { + stream->match_minaddr = match_position; + } + + if (match_end > stream->match_maxaddr) + { + /* Note: per-window */ + stream->match_maxaddr = match_end; + } + + if (match_end > stream->maxsrcaddr) + { + /* Note: across windows */ + stream->maxsrcaddr = match_end; + } + + IF_DEBUG2 ({ + static int x = 0; + DP(RINT "[source match:%d] length %"W"u (%s)\n", + x++, + match_length, + stream->total_in + target_position, + stream->total_in + target_position + match_length, + match_position, + match_position + match_length, + (stream->total_in + target_position == match_position) ? "same" : "diff"); + }); + + if ((ret = xd3_found_match (stream, + /* decoder position */ target_position, + /* length */ match_length, + /* address */ match_position, + /* is_source */ 1))) + { + return ret; + } + + /* If the match ends with the available input: */ + if (target_position + match_length == stream->avail_in) + { + /* Setup continuing match for the next window. */ + stream->match_state = MATCH_TARGET; + stream->match_srcpos = match_end; + } + } + + return 0; +} + +/* Update the small hash. Values in the small_table are offset by + * HASH_CKOFFSET (1) to distinguish empty buckets from real offsets. */ +static void +xd3_scksum_insert (xd3_stream *stream, + usize_t inx, + usize_t scksum, + usize_t pos) +{ + /* If we are maintaining previous duplicates. */ + if (stream->small_prev) + { + usize_t last_pos = stream->small_table[inx]; + xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask]; + + /* Note last_pos is offset by HASH_CKOFFSET. */ + pos_list->last_pos = last_pos; + } + + /* Enter the new position into the hash bucket. */ + stream->small_table[inx] = pos + HASH_CKOFFSET; +} + +#if XD3_DEBUG +static int +xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0, + const uint8_t *inp_max, usize_t cmp_len) +{ + usize_t i; + + for (i = 0; i < cmp_len; i += 1) + { + XD3_ASSERT (ref0[i] == inp0[i]); + } + + if (inp0 + cmp_len < inp_max) + { + XD3_ASSERT (inp0[i] != ref0[i]); + } + + return 1; +} +#endif /* XD3_DEBUG */ + +/* When the hash table indicates a possible small string match, it + * calls this routine to find the best match. The first matching + * position is taken from the small_table, HASH_CKOFFSET is subtracted + * to get the actual position. After checking that match, if previous + * linked lists are in use (because stream->smatcher.small_chain > 1), + * previous matches are tested searching for the longest match. If + * (stream->min_match > MIN_MATCH) then a lazy match is in effect. + */ +static usize_t +xd3_smatch (xd3_stream *stream, + usize_t base, + usize_t scksum, + usize_t *match_offset) +{ + usize_t cmp_len; + usize_t match_length = 0; + usize_t chain = (stream->min_match == MIN_MATCH ? + stream->smatcher.small_chain : + stream->smatcher.small_lchain); + const uint8_t *inp_max = stream->next_in + stream->avail_in; + const uint8_t *inp; + const uint8_t *ref; + + SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position); + + XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in); + + base -= HASH_CKOFFSET; + + again: + + IF_DEBUG2 (DP(RINT "smatch at base=%"W"u inp=%"W"u cksum=%"W"u\n", base, + stream->input_position, scksum)); + + /* For small matches, we can always go to the end-of-input because + * the matching position must be less than the input position. */ + XD3_ASSERT (base < stream->input_position); + + ref = stream->next_in + base; + inp = stream->next_in + stream->input_position; + + SMALL_HASH_DEBUG2 (stream, ref); + + /* Expand potential match forward. */ + while (inp < inp_max && *inp == *ref) + { + ++inp; + ++ref; + } + + cmp_len = (usize_t)(inp - (stream->next_in + stream->input_position)); + + /* Verify correctness */ + XD3_ASSERT (xd3_check_smatch (stream->next_in + base, + stream->next_in + stream->input_position, + inp_max, cmp_len)); + + /* Update longest match */ + if (cmp_len > match_length) + { + ( match_length) = cmp_len; + (*match_offset) = base; + + /* Stop if we match the entire input or have a long_enough match. */ + if (inp == inp_max || cmp_len >= stream->smatcher.long_enough) + { + goto done; + } + } + + /* If we have not reached the chain limit, see if there is another + previous position. */ + while (--chain != 0) + { + /* Calculate the previous offset. */ + usize_t prev_pos = stream->small_prev[base & stream->sprevmask].last_pos; + usize_t diff_pos; + + if (prev_pos == 0) + { + break; + } + + prev_pos -= HASH_CKOFFSET; + + if (prev_pos > base) + { + break; + } + + base = prev_pos; + + XD3_ASSERT (stream->input_position > base); + diff_pos = stream->input_position - base; + + /* Stop searching if we go beyond sprevsz, since those entries + * are for unrelated checksum entries. */ + if (diff_pos & ~stream->sprevmask) + { + break; + } + + goto again; + } + + done: + /* Crude efficiency test: if the match is very short and very far back, it's + * unlikely to help, but the exact calculation requires knowing the state of + * the address cache and adjacent instructions, which we can't do here. + * Rather than encode a probably inefficient copy here and check it later + * (which complicates the code a lot), do this: + */ + if (match_length == 4 && stream->input_position - (*match_offset) >= 1<<14) + { + /* It probably takes >2 bytes to encode an address >= 2^14 from here */ + return 0; + } + if (match_length == 5 && stream->input_position - (*match_offset) >= 1<<21) + { + /* It probably takes >3 bytes to encode an address >= 2^21 from here */ + return 0; + } + + /* It's unlikely that a window is large enough for the (match_length == 6 && + * address >= 2^28) check */ + return match_length; +} + +#if XD3_DEBUG +static void +xd3_verify_small_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum) +{ + uint32_t state; + uint32_t cksum = xd3_scksum (&state, inp, stream->smatcher.small_look); + + XD3_ASSERT (cksum == x_cksum); +} + +static void +xd3_verify_large_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_cksum) +{ + usize_t cksum = xd3_large_cksum (&stream->large_hash, inp, stream->smatcher.large_look); + XD3_ASSERT (cksum == x_cksum); +} +static void +xd3_verify_run_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_run_l, + uint8_t *x_run_c) +{ + usize_t slook = stream->smatcher.small_look; + uint8_t run_c; + usize_t run_l = xd3_comprun (inp, slook, &run_c); + + XD3_ASSERT (run_l == 0 || run_c == *x_run_c); + XD3_ASSERT (x_run_l > slook || run_l == x_run_l); +} +#endif /* XD3_DEBUG */ + +/* This function computes more source checksums to advance the window. + * Called at every entrance to the string-match loop and each time + * stream->input_position reaches the value returned as + * *next_move_point. NB: this is one of the most expensive functions + * in this code and also the most critical for good compression. + */ +static int +xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) +{ + /* the source file is indexed until this point */ + xoff_t target_cksum_pos; + /* the absolute target file input position */ + xoff_t absolute_input_pos; + + if (stream->src->eof_known) + { + xoff_t source_size = xd3_source_eof (stream->src); + XD3_ASSERT(stream->srcwin_cksum_pos <= source_size); + + if (stream->srcwin_cksum_pos == source_size) + { + *next_move_point = USIZE_T_MAX; + return 0; + } + } + + absolute_input_pos = stream->total_in + stream->input_position; + + /* Immediately read the entire window. + * + * Note: this reverses a long held policy, at this point in the + * code, of advancing relatively slowly as the input is read, which + * results in better compression for very-similar inputs, but worse + * compression where data is deleted near the beginning of the file. + * + * The new policy is simpler, somewhat slower and can benefit, or + * slightly worsen, compression performance. */ + if (absolute_input_pos < stream->src->max_winsize / 2) + { + target_cksum_pos = stream->src->max_winsize; + } + else + { + /* TODO: The addition of 2 blocks here is arbitrary. Do a + * better job of stream alignment based on observed source copy + * addresses, and when both input sizes are known, the + * difference in size. */ + target_cksum_pos = absolute_input_pos + + stream->src->max_winsize / 2 + + stream->src->blksize * 2; + target_cksum_pos &= ~stream->src->maskby; + } + + /* A long match may have extended past srcwin_cksum_pos. Don't + * start checksumming already-matched source data. */ + if (stream->maxsrcaddr > stream->srcwin_cksum_pos) + { + stream->srcwin_cksum_pos = stream->maxsrcaddr; + } + + if (target_cksum_pos < stream->srcwin_cksum_pos) + { + target_cksum_pos = stream->srcwin_cksum_pos; + } + + while (stream->srcwin_cksum_pos < target_cksum_pos && + (!stream->src->eof_known || + stream->srcwin_cksum_pos < xd3_source_eof (stream->src))) + { + xoff_t blkno; + xoff_t blkbaseoffset; + usize_t blkrem; + ssize_t oldpos; /* Using ssize_t because of a */ + ssize_t blkpos; /* do { blkpos-- } + while (blkpos >= oldpos); */ + int ret; + xd3_blksize_div (stream->srcwin_cksum_pos, + stream->src, &blkno, &blkrem); + oldpos = blkrem; + + if ((ret = xd3_getblk (stream, blkno))) + { + /* TOOFARBACK should never occur here, since we read forward. */ + if (ret == XD3_TOOFARBACK) + { + ret = XD3_INTERNAL; + } + + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] async getblk return for %"Q"u: %s\n", + blkno, xd3_strerror (ret))); + return ret; + } + + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] block %"Q"u T=%"Q"u S=%"Q"u L=%"Q"u EOF=%"Q"u %s\n", + blkno, + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos, + target_cksum_pos, + xd3_source_eof (stream->src), + stream->src->eof_known ? "known" : "unknown")); + + blkpos = xd3_bytes_on_srcblk (stream->src, blkno); + + if (blkpos < (ssize_t) stream->smatcher.large_look) + { + stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; + IF_DEBUG2 (DP(RINT "[srcwin_move_point] continue (end-of-block): %"Z"d\n", blkpos)); + continue; + } + + /* This inserts checksums for the entire block, in reverse, + * starting from the end of the block. This logic does not test + * stream->srcwin_cksum_pos because it always advances it to the + * start of the next block. + * + * oldpos is the srcwin_cksum_pos within this block. blkpos is + * the number of bytes available. Each iteration inspects + * large_look bytes then steps back large_step bytes. The + * if-stmt above ensures at least one large_look of data. */ + blkpos -= stream->smatcher.large_look; + blkbaseoffset = stream->src->blksize * blkno; + + do + { + /* TODO: This would be significantly faster if the compiler + * knew stream->smatcher.large_look (which the template for + * xd3_string_match_* allows). */ + usize_t cksum = xd3_large_cksum (&stream->large_hash, + stream->src->curblk + blkpos, + stream->smatcher.large_look); + usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); + + stream->large_table[hval] = + (usize_t) (blkbaseoffset + + (xoff_t)(blkpos + HASH_CKOFFSET)); + + IF_DEBUG (stream->large_ckcnt += 1); + + blkpos -= stream->smatcher.large_step; + } + while (blkpos >= oldpos); + + stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; + } + + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] exited loop T=%"Q"u " + "S=%"Q"u EOF=%"Q"u %s\n", + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos, + xd3_source_eof (stream->src), + stream->src->eof_known ? "known" : "unknown")); + + if (stream->src->eof_known) + { + xoff_t source_size = xd3_source_eof (stream->src); + if (stream->srcwin_cksum_pos >= source_size) + { + /* This invariant is needed for xd3_source_cksum_offset() */ + stream->srcwin_cksum_pos = source_size; + *next_move_point = USIZE_T_MAX; + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] finished with source input\n")); + return 0; + } + } + + /* How long until this function should be called again. */ + XD3_ASSERT(stream->srcwin_cksum_pos >= target_cksum_pos); + + *next_move_point = stream->input_position + + stream->src->blksize - + ((stream->srcwin_cksum_pos - target_cksum_pos) & stream->src->maskby); + + IF_DEBUG2 (DP(RINT + "[srcwin_move_point] finished T=%"Q"u " + "S=%"Q"u L=%"Q"u EOF=%"Q"u %s again in %"W"u\n", + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos, + target_cksum_pos, + xd3_source_eof (stream->src), + stream->src->eof_known ? "known" : "unknown", + *next_move_point - stream->input_position)); + + return 0; +} + +#endif /* XD3_ENCODER */ + +/******************************************************************** + TEMPLATE pass + *********************************************************************/ + +#endif /* __XDELTA3_C_INLINE_PASS__ */ +#ifdef __XDELTA3_C_TEMPLATE_PASS__ + +#if XD3_ENCODER + +/******************************************************************** + Templates + *******************************************************************/ + +/* Template macros */ +#define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE) +#define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n) +#define XD3_TEMPLATE3(x,n) x ## n +#define XD3_STRINGIFY(x) XD3_STRINGIFY2(x) +#define XD3_STRINGIFY2(x) #x + +static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream); + +static const xd3_smatcher XD3_TEMPLATE(__smatcher_) = +{ + XD3_STRINGIFY(TEMPLATE), + XD3_TEMPLATE(xd3_string_match_), +#if SOFTCFG == 1 + 0, 0, 0, 0, 0, 0, 0 +#else + LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, MAXLAZY, LONGENOUGH +#endif +}; + +static int +XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) +{ + const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); + const int DO_LARGE = (stream->src != NULL); + const int DO_RUN = (1); + + const uint8_t *inp; + uint32_t scksum = 0; + uint32_t scksum_state = 0; + usize_t lcksum = 0; + usize_t sinx; + usize_t linx; + uint8_t run_c; + usize_t run_l; + int ret; + usize_t match_length; + usize_t match_offset = 0; + usize_t next_move_point = 0; + + IF_DEBUG2(DP(RINT "[string_match] initial entry %"W"u\n", stream->input_position)); + + /* If there will be no compression due to settings or short input, + * skip it entirely. */ + if (! (DO_SMALL || DO_LARGE || DO_RUN) || + stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } + + if ((ret = xd3_string_match_init (stream))) { return ret; } + + /* The restartloop label is reached when the incremental loop state + * needs to be reset. */ + restartloop: + + IF_DEBUG2(DP(RINT "[string_match] restartloop %"W"u\n", stream->input_position)); + + /* If there is not enough input remaining for any kind of match, + skip it. */ + if (stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } + + /* Now reset the incremental loop state: */ + + /* The min_match variable is updated to avoid matching the same lazy + * match over and over again. For example, if you find a (small) + * match of length 9 at one position, you will likely find a match + * of length 8 at the next position. */ + if (xd3_iopt_last_matched (stream) > stream->input_position) + { + stream->min_match = xd3_max (MIN_MATCH, + 1 + xd3_iopt_last_matched(stream) - + stream->input_position); + } + else + { + stream->min_match = MIN_MATCH; + } + + /* The current input byte. */ + inp = stream->next_in + stream->input_position; + + /* Small match state. */ + if (DO_SMALL) + { + scksum = xd3_scksum (&scksum_state, inp, SLOOK); + } + + /* Run state. */ + if (DO_RUN) + { + run_l = xd3_comprun (inp, SLOOK, & run_c); + } + + /* Large match state. We continue the loop even after not enough + * bytes for LLOOK remain, so always check stream->input_position in + * DO_LARGE code. */ + if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in)) + { + /* Source window: next_move_point is the point that + * stream->input_position must reach before computing more + * source checksum. Note: this is called unconditionally + * the first time after reentry, subsequent calls will be + * avoided if next_move_point is > input_position */ + if ((ret = xd3_srcwin_move_point (stream, & next_move_point))) + { + return ret; + } + + lcksum = xd3_large_cksum (&stream->large_hash, inp, LLOOK); + } + + /* TRYLAZYLEN: True if a certain length match should be followed by + * lazy search. This checks that LEN is shorter than MAXLAZY and + * that there is enough leftover data to consider lazy matching. + * "Enough" is set to 2 since the next match will start at the next + * offset, it must match two extra characters. */ +#define TRYLAZYLEN(LEN,POS,MAX) ((MAXLAZY) > 0 && (LEN) < (MAXLAZY) \ + && (POS) + (LEN) <= (MAX) - 2) + + /* HANDLELAZY: This statement is called each time an instruciton is + * emitted (three cases). If the instruction is large enough, the + * loop is restarted, otherwise lazy matching may ensue. */ +#define HANDLELAZY(mlen) \ + if (TRYLAZYLEN ((mlen), (stream->input_position), (stream->avail_in))) \ + { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \ + else \ + { stream->input_position += (mlen); goto restartloop; } + + /* Now loop over one input byte at a time until a match is found... */ + for (;; inp += 1, stream->input_position += 1) + { + /* Now we try three kinds of string match in order of expense: + * run, large match, small match. */ + + /* Expand the start of a RUN. The test for (run_l == SLOOK) + * avoids repeating this check when we pass through a run area + * performing lazy matching. The run is only expanded once when + * the min_match is first reached. If lazy matching is + * performed, the run_l variable will remain inconsistent until + * the first non-running input character is reached, at which + * time the run_l may then again grow to SLOOK. */ + if (DO_RUN && run_l == SLOOK) + { + usize_t max_len = stream->avail_in - stream->input_position; + + IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, &run_c)); + + while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; } + + /* Output a RUN instruction. */ + if (run_l >= stream->min_match && run_l >= MIN_RUN) + { + if ((ret = xd3_emit_run (stream, stream->input_position, + run_l, &run_c))) { return ret; } + + HANDLELAZY (run_l); + } + } + + /* If there is enough input remaining. */ + if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in)) + { + if ((stream->input_position >= next_move_point) && + (ret = xd3_srcwin_move_point (stream, & next_move_point))) + { + return ret; + } + + linx = xd3_checksum_hash (& stream->large_hash, lcksum); + + IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum)); + + if (stream->large_table[linx] != 0) + { + /* the match_setup will fail if the source window has + * been decided and the match lies outside it. + * OPT: Consider forcing a window at this point to + * permit a new source window. */ + xoff_t adj_offset = + xd3_source_cksum_offset(stream, + stream->large_table[linx] - + HASH_CKOFFSET); + if (xd3_source_match_setup (stream, adj_offset) == 0) + { + if ((ret = xd3_source_extend_match (stream))) + { + return ret; + } + + /* Update stream position. match_fwd is zero if no + * match. */ + if (stream->match_fwd > 0) + { + HANDLELAZY (stream->match_fwd); + } + } + } + } + + /* Small matches. */ + if (DO_SMALL) + { + sinx = xd3_checksum_hash (& stream->small_hash, scksum); + + /* Verify incremental state in debugging mode. */ + IF_DEBUG (xd3_verify_small_state (stream, inp, scksum)); + + /* Search for the longest match */ + if (stream->small_table[sinx] != 0) + { + match_length = xd3_smatch (stream, + stream->small_table[sinx], + scksum, + & match_offset); + } + else + { + match_length = 0; + } + + /* Insert a hash for this string. */ + xd3_scksum_insert (stream, sinx, scksum, stream->input_position); + + /* Maybe output a COPY instruction */ + if (match_length >= stream->min_match) + { + IF_DEBUG2 ({ + static int x = 0; + DP(RINT "[target match:%d] " + "(-%"W"d) [ %"W"u bytes ]\n", + x++, + stream->input_position, + stream->input_position + match_length, + match_offset, + match_offset + match_length, + stream->input_position - match_offset, + match_length); + }); + + if ((ret = xd3_found_match (stream, + /* decoder position */ + stream->input_position, + /* length */ match_length, + /* address */ (xoff_t) match_offset, + /* is_source */ 0))) + { + return ret; + } + + /* Copy instruction. */ + HANDLELAZY (match_length); + } + } + + /* The logic above prevents excess work during lazy matching by + * increasing min_match to avoid smaller matches. Each time we + * advance stream->input_position by one, the minimum match + * shortens as well. */ + if (stream->min_match > MIN_MATCH) + { + stream->min_match -= 1; + } + + updateone: + + /* See if there are no more incremental cksums to compute. */ + if (stream->input_position + SLOOK == stream->avail_in) + { + goto loopnomore; + } + + /* Compute next RUN, CKSUM */ + if (DO_RUN) + { + NEXTRUN (inp[SLOOK]); + } + + if (DO_SMALL) + { + scksum = xd3_small_cksum_update (&scksum_state, inp, SLOOK); + } + + if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in)) + { + lcksum = xd3_large_cksum_update (&stream->large_hash, lcksum, inp, LLOOK); + } + } + + loopnomore: + return 0; +} + +#endif /* XD3_ENCODER */ +#endif /* __XDELTA3_C_TEMPLATE_PASS__ */ diff --git a/lib/xdelta3/xdelta3.h b/lib/xdelta3/xdelta3.h new file mode 100644 index 0000000..b9b6fe0 --- /dev/null +++ b/lib/xdelta3/xdelta3.h @@ -0,0 +1,1476 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* To learn more about Xdelta, start by reading xdelta3.c. If you are + * ready to use the API, continue reading here. There are two + * interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen + * or so related calls. This interface is styled after Zlib. */ + +#ifndef _XDELTA3_H_ +#define _XDELTA3_H_ + +#define _POSIX_SOURCE 200112L +#define _ISOC99_SOURCE +#define _C99_SOURCE + +#if HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +/****************************************************************/ + +/* Default configured value of stream->winsize. If the program + * supplies xd3_encode_input() with data smaller than winsize the + * stream will automatically buffer the input, otherwise the input + * buffer is used directly. + */ +#ifndef XD3_DEFAULT_WINSIZE +#define XD3_DEFAULT_WINSIZE (1U << 23) +#endif + +/* Default total size of the source window used in xdelta3-main.h */ +#ifndef XD3_DEFAULT_SRCWINSZ +#define XD3_DEFAULT_SRCWINSZ (1U << 26) +#endif + +/* When Xdelta requests a memory allocation for certain buffers, it + * rounds up to units of at least this size. The code assumes (and + * asserts) that this is a power-of-two. */ +#ifndef XD3_ALLOCSIZE +#define XD3_ALLOCSIZE (1U<<14) +#endif + +/* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect + * decoders against malicious files. The decoder will never decode a + * window larger than this. If the file specifies VCD_TARGET the + * decoder may require two buffers of this size. + * + * 8-16MB is reasonable, probably don't need to go larger. */ +#ifndef XD3_HARDMAXWINSIZE +#define XD3_HARDMAXWINSIZE (1U<<26) +#endif +/* The IOPT_SIZE value sets the size of a buffer used to batch + * overlapping copy instructions before they are optimized by picking + * the best non-overlapping ranges. The larger this buffer, the + * longer a forced xd3_srcwin_setup() decision is held off. Setting + * this value to 0 causes an unlimited buffer to be used. */ +#ifndef XD3_DEFAULT_IOPT_SIZE +#define XD3_DEFAULT_IOPT_SIZE (1U<<15) +#endif + +/* The maximum distance backward to search for small matches */ +#ifndef XD3_DEFAULT_SPREVSZ +#define XD3_DEFAULT_SPREVSZ (1U<<18) +#endif + +/* The default compression level */ +#ifndef XD3_DEFAULT_LEVEL +#define XD3_DEFAULT_LEVEL 3 +#endif + +#ifndef XD3_DEFAULT_SECONDARY_LEVEL +#define XD3_DEFAULT_SECONDARY_LEVEL 6 +#endif + +#ifndef XD3_USE_LARGEFILE64 +#define XD3_USE_LARGEFILE64 1 +#endif + +/* The source window size is limited to 2GB unless + * XD3_USE_LARGESIZET is defined to 1. */ +#ifndef XD3_USE_LARGESIZET +#define XD3_USE_LARGESIZET 1 +#endif + +/* Sizes and addresses within VCDIFF windows are represented as usize_t + * + * For source-file offsets and total file sizes, total input and + * output counts, the xoff_t type is used. The decoder and encoder + * generally check for overflow of the xoff_t size (this is tested at + * the 32bit boundary [xdelta3-test.h]). + */ +#ifndef _WIN32 +#define __STDC_FORMAT_MACROS +#include +#include +#else /* WIN32 case */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#ifndef WINVER +#if XD3_USE_LARGEFILE64 +/* 64 bit file offsets: uses GetFileSizeEx and SetFilePointerEx. */ +#define WINVER 0x0500 +#define _WIN32_WINNT 0x0500 +#else /* xoff_t is 32bit */ +/* 32 bit file offsets: uses GetFileSize and SetFilePointer. */ +#define WINVER 0x0400 +#define _WIN32_WINNT 0x0400 +#endif /* if XD3_USE_LARGEFILE64 */ +#endif /* ifndef WINVER */ + +#include + +/* _MSV_VER is defined by Microsoft tools, not by Mingw32 */ +#ifdef _MSC_VER +typedef signed int ssize_t; +typedef int pid_t; +#if _MSC_VER < 1600 +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long uint32_t; +typedef ULONGLONG uint64_t; +#else /* _MSC_VER >= 1600 */ +/* For MSVC10 and above */ +#include +#define inline __inline +#endif /* _MSC_VER < 1600 */ +#else /* _MSC_VER not defined */ +/* Mingw32 */ +#include +#endif /* _MSC_VER defined */ + +#endif /* _WIN32 defined */ + +/* Settings based on the size of xoff_t (32 vs 64 file offsets) */ +#if XD3_USE_LARGEFILE64 +/* xoff_t is a 64-bit type */ +#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops. */ + +#ifndef _LARGEFILE_SOURCE +#define _LARGEFILE_SOURCE +#endif + +#ifndef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 64 +#endif + +static_assert(SIZEOF_SIZE_T == sizeof(size_t), "SIZEOF_SIZE_T not correctly set"); +static_assert(SIZEOF_UNSIGNED_LONG_LONG == sizeof(unsigned long long), "SIZEOF_UNSIGNED_LONG_LONG not correctly set"); + +/* Set a xoff_t typedef and the "Q" printf insert. */ +#if defined(_WIN32) +typedef uint64_t xoff_t; +#define Q "I64" +#elif SIZEOF_UNSIGNED_LONG == 8 +typedef unsigned long xoff_t; +#define Q "l" +#elif SIZEOF_SIZE_T == 8 +typedef size_t xoff_t; +#define Q "z" +#elif SIZEOF_UNSIGNED_LONG_LONG == 8 +typedef unsigned long long xoff_t; +#define Q "ll" +#endif /* typedef and #define Q */ + +#define SIZEOF_XOFF_T 8 + +#else /* XD3_USE_LARGEFILE64 == 0 */ + +#if SIZEOF_UNSIGNED_INT == 4 +typedef unsigned int xoff_t; +#elif SIZEOF_UNSIGNED_LONG == 4 +typedef unsigned long xoff_t; +#else +typedef uint32_t xoff_t; +#endif /* xoff_t is 32 bits */ + +#define SIZEOF_XOFF_T 4 +#define Q +#endif /* 64 vs 32 bit xoff_t */ + +/* Settings based on the size of usize_t (32 and 64 bit window size) */ +#if XD3_USE_LARGESIZET + +/* Set a usize_ttypedef and the "W" printf insert. */ +#if defined(_WIN32) +typedef uint64_t usize_t; +#define W "I64" +#elif SIZEOF_UNSIGNED_LONG == 8 +typedef unsigned long usize_t; +#define W "l" +#elif SIZEOF_SIZE_T == 8 +typedef size_t usize_t; +#define W "z" +#elif SIZEOF_UNSIGNED_LONG_LONG == 8 +typedef unsigned long long usize_t; +#define W "ll" +#endif /* typedef and #define W */ + +#define SIZEOF_USIZE_T 8 + +#else /* XD3_USE_LARGESIZET == 0 */ + +#if SIZEOF_UNSIGNED_INT == 4 +typedef unsigned int usize_t; +#elif SIZEOF_UNSIGNED_LONG == 4 +typedef unsigned long usize_t; +#else +typedef uint32_t usize_t; +#endif /* usize_t is 32 bits */ + +#define SIZEOF_USIZE_T 4 +#define W + +#endif /* 64 vs 32 bit usize_t */ + +/* Settings based on the size of size_t (the system-provided, + * usually-but-maybe-not an unsigned type) */ +#if SIZEOF_SIZE_T == 4 +#define Z "z" +#elif SIZEOF_SIZE_T == 8 +#ifdef _WIN32 +#define Z "I64" +#else /* !_WIN32 */ +#define Z "z" +#endif /* Windows or not */ +#else +#error Bad configure script +#endif /* size_t printf flags */ + +#define USE_UINT32 (SIZEOF_USIZE_T == 4 || \ + SIZEOF_XOFF_T == 4 || REGRESSION_TEST) +#define USE_UINT64 (SIZEOF_USIZE_T == 8 || \ + SIZEOF_XOFF_T == 8 || REGRESSION_TEST) + +#ifndef UNALIGNED_OK +#ifdef HAVE_ALIGNED_ACCESS_REQUIRED +#define UNALIGNED_OK 0 +#else +/* This generally includes all Windows builds. */ +#define UNALIGNED_OK 1 +#endif +#endif + +/**********************************************************************/ + +/* Whether to build the encoder, otherwise only build the decoder. */ +#ifndef XD3_ENCODER +#define XD3_ENCODER 1 +#endif + +/* The code returned when main() fails, also defined in system + includes. */ +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +/* REGRESSION TEST enables the "xdelta3 test" command, which runs a + series of self-tests. */ +#ifndef REGRESSION_TEST +#define REGRESSION_TEST 0 +#endif + +/* XD3_DEBUG=1 enables assertions and various statistics. Levels > 1 + * enable some additional output only useful during development and + * debugging. */ +#ifndef XD3_DEBUG +#define XD3_DEBUG 0 +#endif + +#ifndef PYTHON_MODULE +#define PYTHON_MODULE 0 +#endif + +#ifndef SWIG_MODULE +#define SWIG_MODULE 0 +#endif + +#ifndef NOT_MAIN +#define NOT_MAIN 0 +#endif + +/* There are three string matching functions supplied: one fast, one + * slow (default), and one soft-configurable. To disable any of + * these, use the following definitions. */ +#ifndef XD3_BUILD_SLOW +#define XD3_BUILD_SLOW 1 +#endif +#ifndef XD3_BUILD_FAST +#define XD3_BUILD_FAST 1 +#endif +#ifndef XD3_BUILD_FASTER +#define XD3_BUILD_FASTER 1 +#endif +#ifndef XD3_BUILD_FASTEST +#define XD3_BUILD_FASTEST 1 +#endif +#ifndef XD3_BUILD_SOFT +#define XD3_BUILD_SOFT 1 +#endif +#ifndef XD3_BUILD_DEFAULT +#define XD3_BUILD_DEFAULT 1 +#endif + +#if XD3_DEBUG +#include +#endif + +typedef struct _xd3_stream xd3_stream; +typedef struct _xd3_source xd3_source; +typedef struct _xd3_hash_cfg xd3_hash_cfg; +typedef struct _xd3_smatcher xd3_smatcher; +typedef struct _xd3_rinst xd3_rinst; +typedef struct _xd3_dinst xd3_dinst; +typedef struct _xd3_hinst xd3_hinst; +typedef struct _xd3_winst xd3_winst; +typedef struct _xd3_rpage xd3_rpage; +typedef struct _xd3_addr_cache xd3_addr_cache; +typedef struct _xd3_output xd3_output; +typedef struct _xd3_desect xd3_desect; +typedef struct _xd3_iopt_buflist xd3_iopt_buflist; +typedef struct _xd3_rlist xd3_rlist; +typedef struct _xd3_sec_type xd3_sec_type; +typedef struct _xd3_sec_cfg xd3_sec_cfg; +typedef struct _xd3_sec_stream xd3_sec_stream; +typedef struct _xd3_config xd3_config; +typedef struct _xd3_code_table_desc xd3_code_table_desc; +typedef struct _xd3_code_table_sizes xd3_code_table_sizes; +typedef struct _xd3_slist xd3_slist; +typedef struct _xd3_whole_state xd3_whole_state; +typedef struct _xd3_wininfo xd3_wininfo; + +/* The stream configuration has three callbacks functions, all of + * which may be supplied with NULL values. If config->getblk is + * provided as NULL, the stream returns XD3_GETSRCBLK. */ + +typedef void* (xd3_alloc_func) (void *opaque, + size_t items, + usize_t size); +typedef void (xd3_free_func) (void *opaque, + void *address); + +typedef int (xd3_getblk_func) (xd3_stream *stream, + xd3_source *source, + xoff_t blkno); + +typedef const xd3_dinst* (xd3_code_table_func) (void); + + +#ifdef _WIN32 +#define vsnprintf_func _vsnprintf +#define snprintf_func _snprintf +#else +#define vsnprintf_func vsnprintf +#define snprintf_func snprintf +#endif +#define short_sprintf(sb,fmt,...) \ + snprintf_func((sb).buf,sizeof((sb).buf),fmt,__VA_ARGS__) + +/* Type used for short snprintf calls. */ +typedef struct { + char buf[48]; +} shortbuf; + +#ifndef PRINTF_ATTRIBUTE +#ifdef __GNUC__ +#define PRINTF_ATTRIBUTE(x,y) __attribute__ ((__format__ (__printf__, x, y))) +#else +#define PRINTF_ATTRIBUTE(x,y) +#endif +#endif + +/* Underlying xprintf() */ +int xsnprintf_func (char *str, size_t n, const char *fmt, ...) + PRINTF_ATTRIBUTE(3,4); + +/* XPR(NT "", ...) (used by main) prefixes an "xdelta3: " to the output. */ +void xprintf(const char *fmt, ...) PRINTF_ATTRIBUTE(1,2); +#define XPR xprintf +#define NT "xdelta3: " +#define NTR "" +/* DP(RINT ...) */ +#define DP xprintf +#define RINT "" + +#if XD3_DEBUG +#define XD3_ASSERT(x) \ + do { \ + if (! (x)) { \ + DP(RINT "%s:%d: XD3 assertion failed: %s\n", \ + __FILE__, __LINE__, #x); \ + abort (); } } while (0) +#else +#define XD3_ASSERT(x) (void)0 +#endif /* XD3_DEBUG */ + +#define xd3_max(x,y) ((x) < (y) ? (y) : (x)) +#define xd3_min(x,y) ((x) < (y) ? (x) : (y)) + +/**************************************************************** + PUBLIC ENUMS + ******************************************************************/ + +/* These are the five ordinary status codes returned by the + * xd3_encode_input() and xd3_decode_input() state machines. */ +typedef enum { + + /* An application must be prepared to handle these five return + * values from either xd3_encode_input or xd3_decode_input, except + * in the case of no-source compression, in which case XD3_GETSRCBLK + * is never returned. More detailed comments for these are given in + * xd3_encode_input and xd3_decode_input comments, below. */ + XD3_INPUT = -17703, /* need input */ + XD3_OUTPUT = -17704, /* have output */ + XD3_GETSRCBLK = -17705, /* need a block of source input (with no + * xd3_getblk function), a chance to do + * non-blocking read. */ + XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF & + first window header */ + XD3_WINSTART = -17707, /* notification: returned before a window is + * processed, giving a chance to + * XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that + * window. */ + XD3_WINFINISH = -17708, /* notification: returned after + encode/decode & output for a window */ + XD3_TOOFARBACK = -17709, /* (encoder only) may be returned by + getblk() if the block is too old */ + XD3_INTERNAL = -17710, /* internal error */ + XD3_INVALID = -17711, /* invalid config */ + XD3_INVALID_INPUT = -17712, /* invalid input/decoder error */ + XD3_NOSECOND = -17713, /* when secondary compression finds no + improvement. */ + XD3_UNIMPLEMENTED = -17714 /* currently VCD_TARGET, VCD_CODETABLE */ +} xd3_rvalues; + +/* special values in config->flags */ +typedef enum +{ + XD3_JUST_HDR = (1 << 1), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_SKIP_WINDOW = (1 << 2), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_SKIP_EMIT = (1 << 3), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_FLUSH = (1 << 4), /* flush the stream buffer to + prepare for + xd3_stream_close(). */ + + XD3_SEC_DJW = (1 << 5), /* use DJW static huffman */ + XD3_SEC_FGK = (1 << 6), /* use FGK adaptive huffman */ + XD3_SEC_LZMA = (1 << 24), /* use LZMA secondary */ + + XD3_SEC_TYPE = (XD3_SEC_DJW | XD3_SEC_FGK | XD3_SEC_LZMA), + + XD3_SEC_NODATA = (1 << 7), /* disable secondary compression of + the data section. */ + XD3_SEC_NOINST = (1 << 8), /* disable secondary compression of + the inst section. */ + XD3_SEC_NOADDR = (1 << 9), /* disable secondary compression of + the addr section. */ + + XD3_SEC_NOALL = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR), + + XD3_ADLER32 = (1 << 10), /* enable checksum computation in + the encoder. */ + XD3_ADLER32_NOVER = (1 << 11), /* disable checksum verification in + the decoder. */ + + XD3_NOCOMPRESS = (1 << 13), /* disable ordinary data + * compression feature, only search + * the source, not the target. */ + XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass + * algorithm", instead use greedy + * matching. Greedy is off by + * default. */ + XD3_ADLER32_RECODE = (1 << 15), /* used by "recode". */ + + /* 4 bits to set the compression level the same as the command-line + * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag, + * and is independent of compression level). This is for + * convenience, especially with xd3_encode_memory(). */ + + XD3_COMPLEVEL_SHIFT = 20, /* 20 - 23 */ + XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_2 = (2 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT) + +} xd3_flags; + +/* The values of this enumeration are set in xd3_config using the + * smatch_cfg variable. It can be set to default, slow, fast, etc., + * and soft. */ +typedef enum +{ + XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits, + else default. */ + XD3_SMATCH_SLOW = 1, + XD3_SMATCH_FAST = 2, + XD3_SMATCH_FASTER = 3, + XD3_SMATCH_FASTEST = 4, + XD3_SMATCH_SOFT = 5 +} xd3_smatch_cfg; + +/********************************************************************* + PRIVATE ENUMS +**********************************************************************/ + +/* stream->match_state is part of the xd3_encode_input state machine + * for source matching: + * + * 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching + * 2. this state spans encoder windows: a match and end-of-window + * will continue in the next 3. the initial target byte and source + * byte are a presumed match, to avoid some computation in case the + * inputs are identical. + */ +typedef enum { + + MATCH_TARGET = 0, /* in this state, attempt to match the start of + * the target with the previously set source + * address (initially 0). */ + MATCH_BACKWARD = 1, /* currently expanding a match backward in the + source/target. */ + MATCH_FORWARD = 2, /* currently expanding a match forward in the + source/target. */ + MATCH_SEARCHING = 3 /* currently searching for a match. */ + +} xd3_match_state; + +/* The xd3_encode_input state machine steps through these states in + * the following order. The matcher is reentrant and returns + * XD3_INPUT whenever it requires more data. After receiving + * XD3_INPUT, if the application reads EOF it should call + * xd3_stream_close(). + */ +typedef enum { + + ENC_INIT = 0, /* xd3_encode_input has never been called. */ + ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */ + ENC_SEARCH = 2, /* currently searching for matches. */ + ENC_INSTR = 3, /* currently formatting output. */ + ENC_FLUSH = 4, /* currently emitting output. */ + ENC_POSTOUT = 5, /* after an output section. */ + ENC_POSTWIN = 6, /* after all output sections. */ + ENC_ABORTED = 7 /* abort. */ +} xd3_encode_state; + +/* The xd3_decode_input state machine steps through these states in + * the following order. The matcher is reentrant and returns + * XD3_INPUT whenever it requires more data. After receiving + * XD3_INPUT, if the application reads EOF it should call + * xd3_stream_close(). + * + * 0-8: the VCDIFF header + * 9-18: the VCDIFF window header + * 19-21: the three primary sections: data, inst, addr + * 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK, + * 23: return XD3_WINFINISH, set state=9 to decode more input + */ +typedef enum { + + DEC_VCHEAD = 0, /* VCDIFF header */ + DEC_HDRIND = 1, /* header indicator */ + + DEC_SECONDID = 2, /* secondary compressor ID */ + + DEC_TABLEN = 3, /* code table length */ + DEC_NEAR = 4, /* code table near */ + DEC_SAME = 5, /* code table same */ + DEC_TABDAT = 6, /* code table data */ + + DEC_APPLEN = 7, /* application data length */ + DEC_APPDAT = 8, /* application data */ + + DEC_WININD = 9, /* window indicator */ + + DEC_CPYLEN = 10, /* copy window length */ + DEC_CPYOFF = 11, /* copy window offset */ + + DEC_ENCLEN = 12, /* length of delta encoding */ + DEC_TGTLEN = 13, /* length of target window */ + DEC_DELIND = 14, /* delta indicator */ + + DEC_DATALEN = 15, /* length of ADD+RUN data */ + DEC_INSTLEN = 16, /* length of instruction data */ + DEC_ADDRLEN = 17, /* length of address data */ + + DEC_CKSUM = 18, /* window checksum */ + + DEC_DATA = 19, /* data section */ + DEC_INST = 20, /* instruction section */ + DEC_ADDR = 21, /* address section */ + + DEC_EMIT = 22, /* producing data */ + + DEC_FINISH = 23, /* window finished */ + + DEC_ABORTED = 24 /* xd3_abort_stream */ +} xd3_decode_state; + +/************************************************************ + internal types + ************************************************************/ + +/* instruction lists used in the IOPT buffer */ +struct _xd3_rlist +{ + xd3_rlist *next; + xd3_rlist *prev; +}; + +/* the raw encoding of an instruction used in the IOPT buffer */ +struct _xd3_rinst +{ + uint8_t type; + uint8_t xtra; + uint8_t code1; + uint8_t code2; + usize_t pos; + usize_t size; + xoff_t addr; + xd3_rlist link; +}; + +/* the code-table form of an single- or double-instruction */ +struct _xd3_dinst +{ + uint8_t type1; + uint8_t size1; + uint8_t type2; + uint8_t size2; +}; + +/* the decoded form of a single (half) instruction. */ +struct _xd3_hinst +{ + uint8_t type; + usize_t size; + usize_t addr; +}; + +/* the form of a whole-file instruction */ +struct _xd3_winst +{ + uint8_t type; /* RUN, ADD, COPY */ + uint8_t mode; /* 0, VCD_SOURCE, VCD_TARGET */ + usize_t size; + xoff_t addr; + xoff_t position; /* absolute position of this inst */ +}; + +/* used by the encoder to buffer output in sections. list of blocks. */ +struct _xd3_output +{ + uint8_t *base; + usize_t next; + usize_t avail; + xd3_output *next_page; +}; + +/* used by the decoder to buffer input in sections. */ +struct _xd3_desect +{ + const uint8_t *buf; + const uint8_t *buf_max; + usize_t size; + usize_t pos; + + /* used in xdelta3-decode.h */ + uint8_t *copied1; + usize_t alloc1; + + /* used in xdelta3-second.h */ + uint8_t *copied2; + usize_t alloc2; +}; + +/* the VCDIFF address cache, see the RFC */ +struct _xd3_addr_cache +{ + usize_t s_near; + usize_t s_same; + usize_t next_slot; /* the circular index for near */ + usize_t *near_array; /* array of size s_near */ + usize_t *same_array; /* array of size s_same*256 */ +}; + +/* the IOPT buffer list is just a list of buffers, which may be allocated + * during encode when using an unlimited buffer. */ +struct _xd3_iopt_buflist +{ + xd3_rinst *buffer; + xd3_iopt_buflist *next; +}; + +/* This is the record of a pre-compiled configuration, a subset of + xd3_config. */ +struct _xd3_smatcher +{ + const char *name; + int (*string_match) (xd3_stream *stream); + usize_t large_look; + usize_t large_step; + usize_t small_look; + usize_t small_chain; + usize_t small_lchain; + usize_t max_lazy; + usize_t long_enough; +}; + +/* hash table size & power-of-two hash function. */ +struct _xd3_hash_cfg +{ + usize_t size; // Number of buckets + usize_t shift; + usize_t mask; + usize_t look; // How wide is this checksum + usize_t multiplier; // K * powers[0] + usize_t *powers; // Array of [0,look) where powers[look-1] == 1 + // and powers[N] = powers[N+1]*K (Rabin-Karp) +}; + +/* the sprev list */ +struct _xd3_slist +{ + usize_t last_pos; +}; + +/* window info (for whole state) */ +struct _xd3_wininfo { + xoff_t offset; + usize_t length; + uint32_t adler32; +}; + +/* whole state for, e.g., merge */ +struct _xd3_whole_state { + usize_t addslen; + uint8_t *adds; + usize_t adds_alloc; + + usize_t instlen; + xd3_winst *inst; + usize_t inst_alloc; + + usize_t wininfolen; + xd3_wininfo *wininfo; + usize_t wininfo_alloc; + + xoff_t length; +}; + +/******************************************************************** + public types + *******************************************************************/ + +/* Settings for the secondary compressor. */ +struct _xd3_sec_cfg +{ + int data_type; /* Which section. (set automatically) */ + usize_t ngroups; /* Number of DJW Huffman groups. */ + usize_t sector_size; /* Sector size. */ + int inefficient; /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */ +}; + +/* This is the user-visible stream configuration. */ +struct _xd3_config +{ + usize_t winsize; /* The encoder window size. */ + usize_t sprevsz; /* How far back small string + matching goes */ + usize_t iopt_size; /* entries in the + instruction-optimizing + buffer */ + + xd3_getblk_func *getblk; /* The three callbacks. */ + xd3_alloc_func *alloc; + xd3_free_func *freef; + void *opaque; /* Not used. */ + uint32_t flags; /* stream->flags are initialized + * from xd3_config & never + * modified by the library. Use + * xd3_set_flags to modify flags + * settings mid-stream. */ + + xd3_sec_cfg sec_data; /* Secondary compressor config: data */ + xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ + xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ + + xd3_smatch_cfg smatch_cfg; /* See enum: use fields below for + soft config */ + xd3_smatcher smatcher_soft; +}; + +/* The primary source file object. You create one of these objects and + * initialize the first four fields. This library maintains the next + * 5 fields. The configured getblk implementation is responsible for + * setting the final 3 fields when called (and/or when XD3_GETSRCBLK + * is returned). + */ +struct _xd3_source +{ + /* you set */ + usize_t blksize; /* block size */ + const char *name; /* its name, for debug/print + purposes */ + void *ioh; /* opaque handle */ + xoff_t max_winsize; /* maximum visible buffer */ + + /* getblk sets */ + xoff_t curblkno; /* current block number: client + sets after getblk request */ + usize_t onblk; /* number of bytes on current + block: client sets, must be >= 0 + and <= blksize */ + const uint8_t *curblk; /* current block array: client + sets after getblk request */ + + /* xd3 sets */ + usize_t srclen; /* length of this source window */ + xoff_t srcbase; /* offset of this source window + in the source itself */ + usize_t shiftby; /* for power-of-two blocksizes */ + usize_t maskby; /* for power-of-two blocksizes */ + xoff_t cpyoff_blocks; /* offset of dec_cpyoff in blocks */ + usize_t cpyoff_blkoff; /* offset of copy window in + blocks, remainder */ + xoff_t getblkno; /* request block number: xd3 sets + current getblk request */ + + /* See xd3_getblk() */ + xoff_t max_blkno; /* Maximum block, if eof is known, + * otherwise, equals frontier_blkno + * (initially 0). */ + usize_t onlastblk; /* Number of bytes on max_blkno */ + int eof_known; /* Set to true when the first + * partial block is read. */ +}; + +/* The primary xd3_stream object, used for encoding and decoding. You + * may access only two fields: avail_out, next_out. Use the methods + * above to operate on xd3_stream. */ +struct _xd3_stream +{ + /* input state */ + const uint8_t *next_in; /* next input byte */ + usize_t avail_in; /* number of bytes available at + next_in */ + xoff_t total_in; /* how many bytes in */ + + /* output state */ + uint8_t *next_out; /* next output byte */ + usize_t avail_out; /* number of bytes available at + next_out */ + usize_t space_out; /* total out space */ + xoff_t current_window; /* number of windows encoded/decoded */ + xoff_t total_out; /* how many bytes out */ + + /* to indicate an error, xd3 sets */ + const char *msg; /* last error message, NULL if + no error */ + + /* source configuration */ + xd3_source *src; /* source array */ + + /* encoder memory configuration */ + usize_t winsize; /* suggested window size */ + usize_t sprevsz; /* small string, previous window + size (power of 2) */ + usize_t sprevmask; /* small string, previous window + size mask */ + usize_t iopt_size; + usize_t iopt_unlimited; + + /* general configuration */ + xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */ + xd3_alloc_func *alloc; /* malloc function */ + xd3_free_func *free; /* free function */ + void* opaque; /* private data object passed to + alloc, free, and getblk */ + uint32_t flags; /* various options */ + + /* secondary compressor configuration */ + xd3_sec_cfg sec_data; /* Secondary compressor config: data */ + xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ + xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ + + xd3_smatcher smatcher; + + usize_t *large_table; /* table of large checksums */ + xd3_hash_cfg large_hash; /* large hash config */ + + usize_t *small_table; /* table of small checksums */ + xd3_slist *small_prev; /* table of previous offsets, + circular linked list */ + int small_reset; /* true if small table should + be reset */ + + xd3_hash_cfg small_hash; /* small hash config */ + xd3_addr_cache acache; /* the vcdiff address cache */ + xd3_encode_state enc_state; /* state of the encoder */ + + usize_t taroff; /* base offset of the target input */ + usize_t input_position; /* current input position */ + usize_t min_match; /* current minimum match + length, avoids redundent + matches */ + usize_t unencoded_offset; /* current input, first + * unencoded offset. this value + * is <= the first instruction's + * position in the iopt buffer, + * if there is at least one + * match in the buffer. */ + + /* SRCWIN */ + int srcwin_decided; /* boolean: true if srclen and + srcbase have been + decided. */ + int srcwin_decided_early; /* boolean: true if srclen + and srcbase were + decided early. */ + xoff_t srcwin_cksum_pos; /* Source checksum position */ + + /* MATCH */ + xd3_match_state match_state; /* encoder match state */ + xoff_t match_srcpos; /* current match source + position relative to + srcbase */ + xoff_t match_last_srcpos; /* previously attempted + * srcpos, to avoid loops. */ + xoff_t match_minaddr; /* smallest matching address to + * set window params (reset each + * window xd3_encode_reset) */ + xoff_t match_maxaddr; /* largest matching address to + * set window params (reset each + * window xd3_encode_reset) */ + usize_t match_back; /* match extends back so far */ + usize_t match_maxback; /* match extends back maximum */ + usize_t match_fwd; /* match extends forward so far */ + usize_t match_maxfwd; /* match extends forward maximum */ + + xoff_t maxsrcaddr; /* address of the last source + match (across windows) */ + + uint8_t *buf_in; /* for saving buffered input */ + usize_t buf_avail; /* amount of saved input */ + const uint8_t *buf_leftover; /* leftover content of next_in + (i.e., user's buffer) */ + usize_t buf_leftavail; /* amount of leftover content */ + + xd3_output *enc_current; /* current output buffer */ + xd3_output *enc_free; /* free output buffers */ + xd3_output *enc_heads[4]; /* array of encoded outputs: + head of chain */ + xd3_output *enc_tails[4]; /* array of encoded outputs: + tail of chain */ + uint32_t recode_adler32; /* set the adler32 checksum + * during "recode". */ + + xd3_rlist iopt_used; /* instruction optimizing buffer */ + xd3_rlist iopt_free; + xd3_rinst *iout; /* next single instruction */ + xd3_iopt_buflist *iopt_alloc; + + const uint8_t *enc_appheader; /* application header to encode */ + usize_t enc_appheadsz; /* application header size */ + + /* decoder stuff */ + xd3_decode_state dec_state; /* current DEC_XXX value */ + usize_t dec_hdr_ind; /* VCDIFF header indicator */ + usize_t dec_win_ind; /* VCDIFF window indicator */ + usize_t dec_del_ind; /* VCDIFF delta indicator */ + + uint8_t dec_magic[4]; /* First four bytes */ + usize_t dec_magicbytes; /* Magic position. */ + + usize_t dec_secondid; /* Optional secondary compressor ID. */ + + usize_t dec_codetblsz; /* Optional code table: length. */ + uint8_t *dec_codetbl; /* Optional code table: storage. */ + usize_t dec_codetblbytes; /* Optional code table: position. */ + + usize_t dec_appheadsz; /* Optional application header: + size. */ + uint8_t *dec_appheader; /* Optional application header: + storage */ + usize_t dec_appheadbytes; /* Optional application header: + position. */ + + usize_t dec_cksumbytes; /* Optional checksum: position. */ + uint8_t dec_cksum[4]; /* Optional checksum: storage. */ + uint32_t dec_adler32; /* Optional checksum: value. */ + + usize_t dec_cpylen; /* length of copy window + (VCD_SOURCE or VCD_TARGET) */ + xoff_t dec_cpyoff; /* offset of copy window + (VCD_SOURCE or VCD_TARGET) */ + usize_t dec_enclen; /* length of delta encoding */ + usize_t dec_tgtlen; /* length of target window */ + +#if USE_UINT64 + uint64_t dec_64part; /* part of a decoded uint64_t */ +#endif +#if USE_UINT32 + uint32_t dec_32part; /* part of a decoded uint32_t */ +#endif + + xoff_t dec_winstart; /* offset of the start of + current target window */ + xoff_t dec_window_count; /* == current_window + 1 in + DEC_FINISH */ + usize_t dec_winbytes; /* bytes of the three sections + so far consumed */ + usize_t dec_hdrsize; /* VCDIFF + app header size */ + + const uint8_t *dec_tgtaddrbase; /* Base of decoded target + addresses (addr >= + dec_cpylen). */ + const uint8_t *dec_cpyaddrbase; /* Base of decoded copy + addresses (addr < + dec_cpylen). */ + + usize_t dec_position; /* current decoder position + counting the cpylen + offset */ + usize_t dec_maxpos; /* maximum decoder position + counting the cpylen + offset */ + xd3_hinst dec_current1; /* current instruction */ + xd3_hinst dec_current2; /* current instruction */ + + uint8_t *dec_buffer; /* Decode buffer */ + uint8_t *dec_lastwin; /* In case of VCD_TARGET, the + last target window. */ + usize_t dec_lastlen; /* length of the last target + window */ + xoff_t dec_laststart; /* offset of the start of last + target window */ + usize_t dec_lastspace; /* allocated space of last + target window, for reuse */ + + xd3_desect inst_sect; /* staging area for decoding + window sections */ + xd3_desect addr_sect; + xd3_desect data_sect; + + xd3_code_table_func *code_table_func; + const xd3_dinst *code_table; + const xd3_code_table_desc *code_table_desc; + xd3_dinst *code_table_alloc; + + /* secondary compression */ + const xd3_sec_type *sec_type; + xd3_sec_stream *sec_stream_d; + xd3_sec_stream *sec_stream_i; + xd3_sec_stream *sec_stream_a; + + /* state for reconstructing whole files (e.g., for merge), this only + * supports loading USIZE_T_MAX instructions, adds, etc. */ + xd3_whole_state whole_target; + + /* statistics */ + xoff_t n_scpy; + xoff_t n_tcpy; + xoff_t n_add; + xoff_t n_run; + + xoff_t l_scpy; + xoff_t l_tcpy; + xoff_t l_add; + xoff_t l_run; + + usize_t i_slots_used; + +#if XD3_DEBUG + usize_t large_ckcnt; + + /* memory usage */ + usize_t alloc_cnt; + usize_t free_cnt; +#endif +}; + +/************************************************************************** + PUBLIC FUNCTIONS + **************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + + +/* This function configures an xd3_stream using the provided in-memory + * input buffer, source buffer, output buffer, and flags. The output + * array must be large enough or else ENOSPC will be returned. This + * is the simplest in-memory encoding interface. */ +int xd3_encode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buffer, + usize_t *output_size, + usize_t avail_output, + int flags); + +/* The reverse of xd3_encode_memory. */ +int xd3_decode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buf, + usize_t *output_size, + usize_t avail_output, + int flags); + +/* This function encodes an in-memory input using a pre-configured + * xd3_stream. This allows the caller to set a variety of options + * which are not available in the xd3_encode/decode_memory() + * functions. + * + * The output array must be large enough to hold the output or else + * ENOSPC is returned. The source (if any) should be set using + * xd3_set_source_and_size() with a single-block xd3_source. This + * calls the underlying non-blocking interfaces, + * xd3_encode/decode_input(), handling the necessary input/output + * states. This method may be considered a reference for any + * application using xd3_encode_input() directly. + * + * xd3_stream stream; + * xd3_config config; + * xd3_source src; + * + * memset (& src, 0, sizeof (src)); + * memset (& stream, 0, sizeof (stream)); + * memset (& config, 0, sizeof (config)); + * + * if (source != NULL) + * { + * src.size = source_size; + * src.blksize = source_size; + * src.curblkno = 0; + * src.onblk = source_size; + * src.curblk = source; + * src.max_winsize = source_size; + * xd3_set_source(&stream, &src); + * } + * + * config.flags = flags; + * config.winsize = input_size; + * + * ... set smatcher, appheader, encoding-table, compression-level, etc. + * + * xd3_config_stream(&stream, &config); + * xd3_encode_stream(&stream, ...); + * xd3_free_stream(&stream); + */ +int xd3_encode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t avail_output); + +/* The reverse of xd3_encode_stream. */ +int xd3_decode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t avail_size); + +/* This is the non-blocking interface. + * + * Handling input and output states is the same for encoding or + * decoding using the xd3_avail_input() and xd3_consume_output() + * routines, inlined below. + * + * Return values: + * + * XD3_INPUT: the process requires more input: call + * xd3_avail_input() then repeat + * + * XD3_OUTPUT: the process has more output: read stream->next_out, + * stream->avail_out, then call xd3_consume_output(), + * then repeat + * + * XD3_GOTHEADER: (decoder-only) notification returned following the + * VCDIFF header and first window header. the decoder + * may use the header to configure itself. + * + * XD3_WINSTART: a general notification returned once for each + * window except the 0-th window, which is implied by + * XD3_GOTHEADER. It is recommended to use a + * switch-stmt such as: + * + * ... + * again: + * switch ((ret = xd3_decode_input (stream))) { + * case XD3_GOTHEADER: { + * assert(stream->current_window == 0); + * stuff; + * } + * // fallthrough + * case XD3_WINSTART: { + * something(stream->current_window); + * goto again; + * } + * ... + * + * XD3_WINFINISH: a general notification, following the complete + * input & output of a window. at this point, + * stream->total_in and stream->total_out are consistent + * for either encoding or decoding. + * + * XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value + * is returned to initiate a non-blocking source read. + */ +int xd3_decode_input (xd3_stream *stream); +int xd3_encode_input (xd3_stream *stream); + +/* The xd3_config structure is used to initialize a stream - all data + * is copied into stream so config may be a temporary variable. See + * the [documentation] or comments on the xd3_config structure. */ +int xd3_config_stream (xd3_stream *stream, + xd3_config *config); + +/* Since Xdelta3 doesn't open any files, xd3_close_stream is just an + * error check that the stream is in a proper state to be closed: this + * means the encoder is flushed and the decoder is at a window + * boundary. The application is responsible for freeing any of the + * resources it supplied. */ +int xd3_close_stream (xd3_stream *stream); + +/* This arranges for closes the stream to succeed. Does not free the + * stream.*/ +void xd3_abort_stream (xd3_stream *stream); + +/* xd3_free_stream frees all memory allocated for the stream. The + * application is responsible for freeing any of the resources it + * supplied. */ +void xd3_free_stream (xd3_stream *stream); + +/* This function informs the encoder or decoder that source matching + * (i.e., delta-compression) is possible. For encoding, this should + * be called before the first xd3_encode_input. A NULL source is + * ignored. For decoding, this should be called before the first + * window is decoded, but the appheader may be read first + * (XD3_GOTHEADER). After decoding the header, call xd3_set_source() + * if you have a source file. Note: if (stream->dec_win_ind & VCD_SOURCE) + * is true, it means the first window expects there to be a source file. + */ +int xd3_set_source (xd3_stream *stream, + xd3_source *source); + +/* If the source size is known, call this instead of xd3_set_source(). + * to avoid having stream->getblk called (and/or to avoid XD3_GETSRCBLK). + * + * Follow these steps: + xd3_source source; + memset(&source, 0, sizeof(source)); + source.blksize = size; + source.onblk = size; + source.curblk = buf; + source.curblkno = 0; + int ret = xd3_set_source_and_size(&stream, &source, size); + ... + */ +int xd3_set_source_and_size (xd3_stream *stream, + xd3_source *source, + xoff_t source_size); + +/* This should be called before the first call to xd3_encode_input() + * to include application-specific data in the VCDIFF header. */ +void xd3_set_appheader (xd3_stream *stream, + const uint8_t *data, + usize_t size); + +/* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. + * For convenience, the decoder always adds a single byte padding to + * the end of the application header, which is set to zero in case the + * application header is a string. */ +int xd3_get_appheader (xd3_stream *stream, + uint8_t **data, + usize_t *size); + +/* To generate a VCDIFF encoded delta with xd3_encode_init() from + * another format, use: + * + * xd3_encode_init_partial() -- initialze encoder state (w/o hash tables) + * xd3_init_cache() -- reset VCDIFF address cache + * xd3_found_match() -- to report a copy instruction + * + * set stream->enc_state to ENC_INSTR and call xd3_encode_input as usual. + */ +int xd3_encode_init_partial (xd3_stream *stream); +void xd3_init_cache (xd3_addr_cache* acache); +int xd3_found_match (xd3_stream *stream, + usize_t pos, usize_t size, + xoff_t addr, int is_source); + +/* Gives an error string for xdelta3-speficic errors, returns NULL for + system errors */ +const char* xd3_strerror (int ret); + +/* For convenience, zero & initialize the xd3_config structure with + specified flags. */ +static inline +void xd3_init_config (xd3_config *config, + uint32_t flags) +{ + memset (config, 0, sizeof (*config)); + config->flags = flags; +} + +/* This supplies some input to the stream. + * + * For encoding, if the input is larger than the configured window + * size (xd3_config.winsize), the entire input will be consumed and + * encoded anyway. If you wish to strictly limit the window size, + * limit the buffer passed to xd3_avail_input to the window size. + * + * For encoding, if the input is smaller than the configured window + * size (xd3_config.winsize), the library will create a window-sized + * buffer and accumulate input until a full-sized window can be + * encoded. XD3_INPUT will be returned. The input must remain valid + * until the next time xd3_encode_input() returns XD3_INPUT. + * + * For decoding, the input will be consumed entirely before XD3_INPUT + * is returned again. + */ +static inline +void xd3_avail_input (xd3_stream *stream, + const uint8_t *idata, + usize_t isize) +{ + /* Even if isize is zero, the code expects a non-NULL idata. Why? + * It uses this value to determine whether xd3_avail_input has ever + * been called. If xd3_encode_input is called before + * xd3_avail_input it will return XD3_INPUT right away without + * allocating a stream->winsize buffer. This is to avoid an + * unwanted allocation. */ + XD3_ASSERT (idata != NULL || isize == 0); + + stream->next_in = idata; + stream->avail_in = isize; +} + +/* This acknowledges receipt of output data, must be called after any + * XD3_OUTPUT return. */ +static inline +void xd3_consume_output (xd3_stream *stream) +{ + stream->avail_out = 0; +} + +/* These are set for each XD3_WINFINISH return. */ +static inline +int xd3_encoder_used_source (xd3_stream *stream) { + return stream->src != NULL && stream->src->srclen > 0; +} +static inline +xoff_t xd3_encoder_srcbase (xd3_stream *stream) { + return stream->src->srcbase; +} +static inline +usize_t xd3_encoder_srclen (xd3_stream *stream) { + return stream->src->srclen; +} + +/* Checks for legal flag changes. */ +static inline +void xd3_set_flags (xd3_stream *stream, uint32_t flags) +{ + /* The bitwise difference should contain only XD3_FLUSH or + XD3_SKIP_WINDOW */ + XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0); + stream->flags = flags; +} + +/* Gives some extra information about the latest library error, if any + * is known. */ +static inline +const char* xd3_errstring (xd3_stream *stream) +{ + return stream->msg ? stream->msg : ""; +} + + +/* 64-bit divisions are expensive, which is why we require a + * power-of-two source->blksize. To relax this restriction is + * relatively easy, see the history for xd3_blksize_div(). */ +static inline +void xd3_blksize_div (const xoff_t offset, + const xd3_source *source, + xoff_t *blkno, + usize_t *blkoff) { + *blkno = offset >> source->shiftby; + *blkoff = offset & source->maskby; + XD3_ASSERT (*blkoff < source->blksize); +} + +static inline +void xd3_blksize_add (xoff_t *blkno, + usize_t *blkoff, + const xd3_source *source, + const usize_t add) +{ + usize_t blkdiff; + + /* Does not check for overflow, checked in xdelta3-decode.h. */ + *blkoff += add; + blkdiff = *blkoff >> source->shiftby; + + if (blkdiff) + { + *blkno += blkdiff; + *blkoff &= source->maskby; + } + + XD3_ASSERT (*blkoff < source->blksize); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#define XD3_NOOP 0U +#define XD3_ADD 1U +#define XD3_RUN 2U +#define XD3_CPY 3U /* XD3_CPY rtypes are represented as (XD3_CPY + + * copy-mode value) */ + +#if XD3_DEBUG +#define IF_DEBUG(x) x +#else +#define IF_DEBUG(x) +#endif +#if XD3_DEBUG > 1 +#define IF_DEBUG1(x) x +#else +#define IF_DEBUG1(x) +#endif +#if XD3_DEBUG > 2 +#define IF_DEBUG2(x) x +#else +#define IF_DEBUG2(x) +#endif + +#define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0])) + +#endif /* _XDELTA3_H_ */ diff --git a/lib/xdelta3/xdelta3.i b/lib/xdelta3/xdelta3.i new file mode 100644 index 0000000..2fea015 --- /dev/null +++ b/lib/xdelta3/xdelta3.i @@ -0,0 +1,85 @@ +%module xdelta3 +%import cstring.i +%import argcargv.i +%{ +#include "xdelta3.h" + +int xd3_main_cmdline (int ARGC, char **ARGV); +%} + +%cstring_input_binary(const char *input, unsigned int input_size); +%cstring_input_binary(const char *source, unsigned int source_size); + +%define %max_output_withsize(TYPEMAP, SIZE, MAXSIZE) +%typemap(in) MAXSIZE (unsigned int alloc_size) { + $1 = alloc_size = PyInt_AsLong(obj2); +} +%typemap(in,numinputs=0) (TYPEMAP, SIZE) { +} +%typemap(check) (TYPEMAP, SIZE) { + // alloc_size input is #7th position in xd3_xxcode_memory() + $1 = malloc(alloc_size7); + $2 = &alloc_size7; +} +%typemap(argout,fragment="t_output_helper") (TYPEMAP, SIZE) { + if (result == 0) { + PyObject *o; + // alloc_size7 now carries actual size + o = PyString_FromStringAndSize($1,alloc_size7); + $result = t_output_helper($result,o); + } else { + $result = t_output_helper($result,Py_None); + } + free($1); +} +%typemap(default) int flags { + $1 = 0; +} +%enddef + +%max_output_withsize(char *output_buf, unsigned int *output_size, unsigned int max_output); + +int xd3_encode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buffer, + usize_t *output_size, + usize_t avail_output, + int flags); + +int xd3_decode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buf, + usize_t *output_size, + usize_t avail_output, + int flags); + +int xd3_main_cmdline (int ARGC, char **ARGV); + +/* Is this the right way? */ +enum { + /*XD3_JUST_HDR,*/ + /*XD3_SKIP_WINDOW,*/ + /*XD3_SKIP_EMIT,*/ + /*XD3_FLUSH,*/ + XD3_SEC_DJW, + XD3_SEC_FGK, + /*XD3_SEC_TYPE,*/ + XD3_SEC_NODATA, + XD3_SEC_NOINST, + XD3_SEC_NOADDR, + /*XD3_SEC_OTHER,*/ + XD3_ADLER32, + XD3_ADLER32_NOVER, + XD3_NOCOMPRESS, + XD3_BEGREEDY, + XD3_COMPLEVEL_SHIFT, + XD3_COMPLEVEL_MASK, + XD3_COMPLEVEL_1, + XD3_COMPLEVEL_3, + XD3_COMPLEVEL_6, + XD3_COMPLEVEL_9, +}; diff --git a/lib/xdelta3/xdelta3.vcxproj b/lib/xdelta3/xdelta3.vcxproj new file mode 100644 index 0000000..31f97f8 --- /dev/null +++ b/lib/xdelta3/xdelta3.vcxproj @@ -0,0 +1,344 @@ + + + + + Debug + Itanium + + + Debug + Win32 + + + Debug + x64 + + + Release + Itanium + + + Release + Win32 + + + Release + x64 + + + xdelta3-64 + Itanium + + + xdelta3-64 + Win32 + + + xdelta3-64 + x64 + + + + + %(PreprocessorDefinitions) + %(PreprocessorDefinitions) + %(PreprocessorDefinitions) + /DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions) + /DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions) + /DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions) + /DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions) + /DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions) + /DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions) + + + + + + + + + + + + + + + + + + + + {8F9D37B5-B78E-4816-BE61-AEF679DBF3BC} + Win32Proj + xdelta3 + + + + Application + true + MultiByte + v120 + + + Application + true + MultiByte + + + Application + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + Application + false + true + MultiByte + + + Application + false + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + Application + false + true + MultiByte + + + Application + false + true + MultiByte + v120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + true + + + true + + + false + + + false + + + false + $(WindowsSdkDir)\include;$(VCInstallDir)include;..\xz\include + $(LibraryPath);$(VSInstallDir);$(VSInstallDir)lib\amd64;..\xz\bin_x86-64 + + + false + + + false + + + false + + + + NotUsing + Level3 + Disabled + WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;SECONDARY_LZMA=0;LZMA_API_STATIC;SIZEOF_SIZE_T=4;SIZEOF_UNSIGNED_LONG_LONG=8;%(PreprocessorDefinitions) + MultiThreaded + ../xz/include + + + Console + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + NotUsing + Level3 + Disabled + WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded + + + Console + true + + + + + NotUsing + Level3 + Disabled + WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded + + + Console + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;..\..\..\..\src\xz\bin_x86-64\liblzma_static.lib;%(AdditionalDependencies) + + + + + Level3 + NotUsing + MaxSpeed + true + true + WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;SECONDARY_LZMA=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;LZMA_API_STATIC;%(PreprocessorDefinitions) + MultiThreaded + ..\..\..\..\src\xz\include + + + Console + true + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;..\..\..\..\src\xz\bin_i486\liblzma_static.lib;%(AdditionalDependencies) + + + + + Level3 + NotUsing + MaxSpeed + true + true + WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded + + + Console + true + true + true + + + + + Level3 + NotUsing + MaxSpeed + true + true + WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;SECONDARY_LZMA=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;LZMA_API_STATIC;%(PreprocessorDefinitions) + MultiThreaded + ..\..\..\..\src\xz\include + + + Console + true + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);..\..\..\..\src\xz\bin_x86-64\liblzma_static.lib + + + + + Level3 + NotUsing + MaxSpeed + true + true + WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded + + + Console + true + true + true + + + + + Level3 + NotUsing + MaxSpeed + true + true + WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded + + + Console + true + true + true + + + + + Level3 + NotUsing + MaxSpeed + true + true + WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreaded + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/lib/xdelta3/xdelta3.wxi b/lib/xdelta3/xdelta3.wxi new file mode 100644 index 0000000..2ef8426 --- /dev/null +++ b/lib/xdelta3/xdelta3.wxi @@ -0,0 +1,7 @@ + + + + + + + diff --git a/lib/xdelta3/xdelta3.wxs b/lib/xdelta3/xdelta3.wxs new file mode 100644 index 0000000..5e2d05c --- /dev/null +++ b/lib/xdelta3/xdelta3.wxs @@ -0,0 +1,131 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/errorAndExit.h b/src/errorAndExit.h index 106e68b..e96bbde 100644 --- a/src/errorAndExit.h +++ b/src/errorAndExit.h @@ -1 +1,3 @@ +#pragma once + void errorAndExit(); diff --git a/src/parseArguments.h b/src/parseArguments.h index 1ba22bb..7f05de6 100644 --- a/src/parseArguments.h +++ b/src/parseArguments.h @@ -1,3 +1,5 @@ +#pragma once + #include struct arguments { diff --git a/src/utils/min.h b/src/utils/min.h index 9605113..18f7d67 100644 --- a/src/utils/min.h +++ b/src/utils/min.h @@ -1,3 +1,5 @@ +#pragma once + //minimum of two integers #define min(a,b) \ ({ __typeof__ (a) _a = (a); \ diff --git a/src/xdelta3.c b/src/xdelta3.c new file mode 100644 index 0000000..9a8a2f7 --- /dev/null +++ b/src/xdelta3.c @@ -0,0 +1,8 @@ +//To fix compile errors with xdelta3 +#define SIZEOF_SIZE_T 4 +#define static_assert(e,m) /* do nothing */ +#define XD3_ENCODER 0 +typedef unsigned int usize_t; +typedef unsigned long long xoff_t; + +#include "../lib/xdelta3/xdelta3.h"