dictionaryRandomAccess.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. // LZ4 API example : Dictionary Random Access
  2. #if defined(_MSC_VER) && (_MSC_VER <= 1800) /* Visual Studio <= 2013 */
  3. # define _CRT_SECURE_NO_WARNINGS
  4. # define snprintf sprintf_s
  5. #endif
  6. #include "lz4.h"
  7. #include <stdio.h>
  8. #include <stdint.h>
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #define MIN(x, y) ((x) < (y) ? (x) : (y))
  12. enum {
  13. BLOCK_BYTES = 1024, /* 1 KiB of uncompressed data in a block */
  14. DICTIONARY_BYTES = 1024, /* Load a 1 KiB dictionary */
  15. MAX_BLOCKS = 1024 /* For simplicity of implementation */
  16. };
  17. /**
  18. * Magic bytes for this test case.
  19. * This is not a great magic number because it is a common word in ASCII.
  20. * However, it is important to have some versioning system in your format.
  21. */
  22. const char kTestMagic[] = { 'T', 'E', 'S', 'T' };
  23. void write_int(FILE* fp, int i) {
  24. size_t written = fwrite(&i, sizeof(i), 1, fp);
  25. if (written != 1) { exit(10); }
  26. }
  27. void write_bin(FILE* fp, const void* array, size_t arrayBytes) {
  28. size_t written = fwrite(array, 1, arrayBytes, fp);
  29. if (written != arrayBytes) { exit(11); }
  30. }
  31. void read_int(FILE* fp, int* i) {
  32. size_t read = fread(i, sizeof(*i), 1, fp);
  33. if (read != 1) { exit(12); }
  34. }
  35. size_t read_bin(FILE* fp, void* array, size_t arrayBytes) {
  36. size_t read = fread(array, 1, arrayBytes, fp);
  37. if (ferror(fp)) { exit(12); }
  38. return read;
  39. }
  40. void seek_bin(FILE* fp, long offset, int origin) {
  41. if (fseek(fp, offset, origin)) { exit(14); }
  42. }
  43. void test_compress(FILE* outFp, FILE* inpFp, void *dict, int dictSize)
  44. {
  45. LZ4_stream_t lz4Stream_body;
  46. LZ4_stream_t* lz4Stream = &lz4Stream_body;
  47. char inpBuf[BLOCK_BYTES];
  48. int offsets[MAX_BLOCKS];
  49. int *offsetsEnd = offsets;
  50. LZ4_initStream(lz4Stream, sizeof(*lz4Stream));
  51. /* Write header magic */
  52. write_bin(outFp, kTestMagic, sizeof(kTestMagic));
  53. *offsetsEnd++ = sizeof(kTestMagic);
  54. /* Write compressed data blocks. Each block contains BLOCK_BYTES of plain
  55. data except possibly the last. */
  56. for(;;) {
  57. const int inpBytes = (int) read_bin(inpFp, inpBuf, BLOCK_BYTES);
  58. if(0 == inpBytes) {
  59. break;
  60. }
  61. /* Forget previously compressed data and load the dictionary */
  62. LZ4_loadDict(lz4Stream, (const char*) dict, dictSize);
  63. {
  64. char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];
  65. const int cmpBytes = LZ4_compress_fast_continue(
  66. lz4Stream, inpBuf, cmpBuf, inpBytes, sizeof(cmpBuf), 1);
  67. if(cmpBytes <= 0) { exit(1); }
  68. write_bin(outFp, cmpBuf, (size_t)cmpBytes);
  69. /* Keep track of the offsets */
  70. *offsetsEnd = *(offsetsEnd - 1) + cmpBytes;
  71. ++offsetsEnd;
  72. }
  73. if (offsetsEnd - offsets > MAX_BLOCKS) { exit(2); }
  74. }
  75. /* Write the tailing jump table */
  76. {
  77. int *ptr = offsets;
  78. while (ptr != offsetsEnd) {
  79. write_int(outFp, *ptr++);
  80. }
  81. write_int(outFp, (int) (offsetsEnd - offsets));
  82. }
  83. }
  84. void test_decompress(FILE* outFp, FILE* inpFp, void *dict, int dictSize, int offset, int length)
  85. {
  86. LZ4_streamDecode_t lz4StreamDecode_body;
  87. LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body;
  88. /* The blocks [currentBlock, endBlock) contain the data we want */
  89. int currentBlock = offset / BLOCK_BYTES;
  90. int endBlock = ((offset + length - 1) / BLOCK_BYTES) + 1;
  91. char decBuf[BLOCK_BYTES];
  92. int offsets[MAX_BLOCKS];
  93. /* Special cases */
  94. if (length == 0) { return; }
  95. /* Read the magic bytes */
  96. {
  97. char magic[sizeof(kTestMagic)];
  98. size_t read = read_bin(inpFp, magic, sizeof(magic));
  99. if (read != sizeof(magic)) { exit(1); }
  100. if (memcmp(kTestMagic, magic, sizeof(magic))) { exit(2); }
  101. }
  102. /* Read the offsets tail */
  103. {
  104. int numOffsets;
  105. int block;
  106. int *offsetsPtr = offsets;
  107. seek_bin(inpFp, -4, SEEK_END);
  108. read_int(inpFp, &numOffsets);
  109. if (numOffsets <= endBlock) { exit(3); }
  110. seek_bin(inpFp, -4 * (numOffsets + 1), SEEK_END);
  111. for (block = 0; block <= endBlock; ++block) {
  112. read_int(inpFp, offsetsPtr++);
  113. }
  114. }
  115. /* Seek to the first block to read */
  116. seek_bin(inpFp, offsets[currentBlock], SEEK_SET);
  117. offset = offset % BLOCK_BYTES;
  118. /* Start decoding */
  119. for(; currentBlock < endBlock; ++currentBlock) {
  120. char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];
  121. /* The difference in offsets is the size of the block */
  122. int cmpBytes = offsets[currentBlock + 1] - offsets[currentBlock];
  123. {
  124. const size_t read = read_bin(inpFp, cmpBuf, (size_t)cmpBytes);
  125. if(read != (size_t)cmpBytes) { exit(4); }
  126. }
  127. /* Load the dictionary */
  128. LZ4_setStreamDecode(lz4StreamDecode, (const char*) dict, dictSize);
  129. {
  130. const int decBytes = LZ4_decompress_safe_continue(
  131. lz4StreamDecode, cmpBuf, decBuf, cmpBytes, BLOCK_BYTES);
  132. if(decBytes <= 0) { exit(5); }
  133. {
  134. /* Write out the part of the data we care about */
  135. int blockLength = MIN(length, (decBytes - offset));
  136. write_bin(outFp, decBuf + offset, (size_t)blockLength);
  137. offset = 0;
  138. length -= blockLength;
  139. }
  140. }
  141. }
  142. }
  143. int compare(FILE* fp0, FILE* fp1, int length)
  144. {
  145. int result = 0;
  146. while(0 == result) {
  147. char b0[4096];
  148. char b1[4096];
  149. const size_t r0 = read_bin(fp0, b0, MIN(length, (int)sizeof(b0)));
  150. const size_t r1 = read_bin(fp1, b1, MIN(length, (int)sizeof(b1)));
  151. result = (int) r0 - (int) r1;
  152. if(0 == r0 || 0 == r1) {
  153. break;
  154. }
  155. if(0 == result) {
  156. result = memcmp(b0, b1, r0);
  157. }
  158. length -= r0;
  159. }
  160. return result;
  161. }
  162. int main(int argc, char* argv[])
  163. {
  164. char inpFilename[256] = { 0 };
  165. char lz4Filename[256] = { 0 };
  166. char decFilename[256] = { 0 };
  167. char dictFilename[256] = { 0 };
  168. int offset;
  169. int length;
  170. char dict[DICTIONARY_BYTES];
  171. int dictSize;
  172. if(argc < 5) {
  173. printf("Usage: %s input dictionary offset length", argv[0]);
  174. return 0;
  175. }
  176. snprintf(inpFilename, 256, "%s", argv[1]);
  177. snprintf(lz4Filename, 256, "%s.lz4s-%d", argv[1], BLOCK_BYTES);
  178. snprintf(decFilename, 256, "%s.lz4s-%d.dec", argv[1], BLOCK_BYTES);
  179. snprintf(dictFilename, 256, "%s", argv[2]);
  180. offset = atoi(argv[3]);
  181. length = atoi(argv[4]);
  182. printf("inp = [%s]\n", inpFilename);
  183. printf("lz4 = [%s]\n", lz4Filename);
  184. printf("dec = [%s]\n", decFilename);
  185. printf("dict = [%s]\n", dictFilename);
  186. printf("offset = [%d]\n", offset);
  187. printf("length = [%d]\n", length);
  188. /* Load dictionary */
  189. {
  190. FILE* dictFp = fopen(dictFilename, "rb");
  191. dictSize = (int)read_bin(dictFp, dict, DICTIONARY_BYTES);
  192. fclose(dictFp);
  193. }
  194. /* compress */
  195. {
  196. FILE* inpFp = fopen(inpFilename, "rb");
  197. FILE* outFp = fopen(lz4Filename, "wb");
  198. printf("compress : %s -> %s\n", inpFilename, lz4Filename);
  199. test_compress(outFp, inpFp, dict, dictSize);
  200. printf("compress : done\n");
  201. fclose(outFp);
  202. fclose(inpFp);
  203. }
  204. /* decompress */
  205. {
  206. FILE* inpFp = fopen(lz4Filename, "rb");
  207. FILE* outFp = fopen(decFilename, "wb");
  208. printf("decompress : %s -> %s\n", lz4Filename, decFilename);
  209. test_decompress(outFp, inpFp, dict, DICTIONARY_BYTES, offset, length);
  210. printf("decompress : done\n");
  211. fclose(outFp);
  212. fclose(inpFp);
  213. }
  214. /* verify */
  215. {
  216. FILE* inpFp = fopen(inpFilename, "rb");
  217. FILE* decFp = fopen(decFilename, "rb");
  218. seek_bin(inpFp, offset, SEEK_SET);
  219. printf("verify : %s <-> %s\n", inpFilename, decFilename);
  220. const int cmp = compare(inpFp, decFp, length);
  221. if(0 == cmp) {
  222. printf("verify : OK\n");
  223. } else {
  224. printf("verify : NG\n");
  225. }
  226. fclose(decFp);
  227. fclose(inpFp);
  228. }
  229. return 0;
  230. }