ArithmeticCoding
0.0.0a
Arithmetic coding in C
|
00001 00220 #include <stdio.h> 00221 #include <string.h> 00222 #include "stream.h" 00223 00224 typedef uint8_t u8; 00225 typedef uint8_t u16; 00226 typedef uint32_t u32; 00227 typedef uint64_t u64; 00228 typedef float real; 00229 00230 #define ENDL "\n" 00231 #define TRY(e) \ 00232 do{ if(!(e)) {\ 00233 printf("%s(%d):"ENDL "\t%s"ENDL "\tExpression evaluated as false."ENDL, \ 00234 __FILE__,__LINE__,#e); \ 00235 goto Error; \ 00236 }} while(0) 00237 00238 #define SAFE_FREE(e) if(e) { free(e); (e)=NULL; } 00239 00251 typedef struct _state_t 00252 { u64 b, 00253 l, 00254 v; 00255 stream_t d; 00256 size_t nsym; 00257 u64 D, 00258 shift, 00259 mask, 00260 lowl; 00261 u64 *cdf; 00262 } state_t; 00263 00265 static void init_common(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym) 00266 { 00267 00268 state->l = (1ULL<<state->shift)-1; // e.g. 2^32-1 for u64 00269 state->mask = state->l; // for modding a u64 to u32 with & 00270 00271 nsym++; // add end symbol 00272 TRY( state->cdf=malloc(nsym*sizeof(*state->cdf)) ); 00273 { size_t i; 00274 real s = state->l-state->D; // scale to D^P range and adjust for end symbol 00275 for(i=0;i<(nsym-1);++i) 00276 state->cdf[i] = s*cdf[i]; 00277 state->cdf[i] = s; 00278 state->nsym = nsym; 00279 #if 0 00280 for(i=0;i<nsym;++i) 00281 printf("CDF[%5zu] = %12llu cdf[%5zu] = %f"ENDL,i,state->cdf[i],i,cdf[i]); 00282 #endif 00283 } 00284 attach(&state->d,buf,nbuf); 00285 return; 00286 Error: 00287 abort(); 00288 } 00290 static void init_u1(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym) 00291 { memset(state,0,sizeof(*state)); 00292 state->D = 2; 00293 state->shift = 32; // log2(D^P) - need 2P to fit in a register for multiplies 00294 state->lowl = 1ULL<<31; // 2^(shift - log2(D)) 00295 init_common(state,buf,nbuf,cdf,nsym); 00296 } 00298 static void init_u4(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym) 00299 { memset(state,0,sizeof(*state)); 00300 state->D = 1ULL<<4; 00301 state->shift = 32; // log2(D^P) - need 2P to fit in a register for multiplies 00302 state->lowl = 1ULL<<28; // 2^(shift - log2(D)) 00303 init_common(state,buf,nbuf,cdf,nsym); 00304 } 00306 static void init_u8(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym) 00307 { memset(state,0,sizeof(*state)); 00308 state->D = 1ULL<<8; 00309 state->shift = 32; // log2(D^P) - need 2P to fit in a register for multiplies 00310 state->lowl = 1ULL<<24; // 2^(shift - log2(D)) 00311 init_common(state,buf,nbuf,cdf,nsym); 00312 } 00314 static void init_u16(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym) 00315 { memset(state,0,sizeof(*state)); 00316 state->D = 1ULL<<16; 00317 state->shift = 32; // log2(D^P) - need 2P to fit in a register for multiplies 00318 state->lowl = 1ULL<<16; // 2^(shift - log2(D)) 00319 init_common(state,buf,nbuf*2,cdf,nsym); 00320 } 00322 static void free_internal(state_t *state) 00323 { void *d; 00324 SAFE_FREE(state->cdf); 00325 //detach(&state->d,&d,NULL); // Don't really want to do this - ends up wierd 00326 //SAFE_FREE(d); 00327 } 00328 00329 // 00330 // Build CDF 00331 // 00332 00334 static u32 maximum(u32 *s, size_t n) 00335 { u32 max=0; 00336 while(n--) 00337 max = (s[n]>max)?s[n]:max; 00338 return max; 00339 } 00340 00365 void cdf_build(real **cdf, size_t *M, u32 *s, size_t N) 00366 { size_t i,nbytes; 00367 *M = maximum(s,N)+1; 00368 TRY( *cdf=realloc(*cdf,nbytes=sizeof(real)*(M[0]+1)) ); // cdf has M+1 elements 00369 memset(*cdf,0,nbytes); 00370 for(i=0;i< N;++i) cdf[0][s[i]]++; // histogram 00371 for(i=0;i<M[0] ;++i) cdf[0][s[i]]/=(real)N; // norm 00372 for(i=1;i<M[0] ;++i) cdf[0][i] += cdf[0][i-1]; // cumsum 00373 for(i=M[0]+1;i>0;--i) cdf[0][i] = cdf[0][i-1]; // move 00374 cdf[0][0] = 0.0; 00375 00376 #ifdef DEBUG 00377 TRY( fabs(cdf[M[0]+1]-1.0) < 1e-6 ); 00378 #endif 00379 return; 00380 Error: 00381 abort(); 00382 } 00383 00384 // 00385 // Encoder 00386 // 00387 00388 #define B (state->b) 00389 #define L (state->l) 00390 #define C (state->cdf) 00391 #define SHIFT (state->shift) 00392 #define NSYM (state->nsym) 00393 #define MASK (state->mask) 00394 #define STREAM (&(state->d)) 00395 #define DATA (state->d.d) 00396 #define OFLOW (STREAM->overflow) 00397 #define bitsofD (8) 00398 #define D (1ULL<<bitsofD) 00399 #define LOWL (state->lowl) 00400 00401 void carry_null(stream_t *s) {} //no op 00402 void push_null(stream_t *s) {s->ibyte++;} 00403 00404 #define DEFN_UPDATE(T) \ 00405 static void update_##T(u64 s,state_t *state) \ 00406 { u64 a,x,y; \ 00407 y = L; /* End of interval */ \ 00408 if(s!=(NSYM-1)) /*is not last symbol */ \ 00409 y = (y*C[s+1])>>SHIFT; \ 00410 a = B; \ 00411 x = (L*C[s])>>SHIFT; \ 00412 B = (B+x)&MASK; \ 00413 L = y-x; \ 00414 TRY(L>0); \ 00415 if(a>B) \ 00416 carry_##T(STREAM); \ 00417 return; \ 00418 Error: \ 00419 abort(); \ 00420 } 00421 DEFN_UPDATE(u1); 00422 DEFN_UPDATE(u4); 00423 DEFN_UPDATE(u8); 00424 DEFN_UPDATE(u16); 00425 DEFN_UPDATE(null); 00426 00427 #define DEFN_ERENORM(T) \ 00428 static void erenorm_##T(state_t *state) \ 00429 { \ 00430 const int s = SHIFT-bitsofD; \ 00431 while(L<LOWL) \ 00432 { push_##T(STREAM, B>>s); \ 00433 L = (L<<bitsofD)&MASK; \ 00434 B = (B<<bitsofD)&MASK; \ 00435 } \ 00436 } 00437 DEFN_ERENORM(u1); // typed by output stream type 00438 DEFN_ERENORM(u4); 00439 DEFN_ERENORM(u8); 00440 DEFN_ERENORM(u16); 00441 static void erenorm_null(state_t *state) 00442 { 00443 const int s = SHIFT-bitsofD; 00444 while(L<LOWL) 00445 { push_null(STREAM); 00446 L = (L<<bitsofD)&MASK; 00447 B = (B<<bitsofD)&MASK; 00448 } 00449 } 00450 00451 #define DEFN_ESELECT(T) \ 00452 static void eselect_##T(state_t *state) \ 00453 { u64 a; \ 00454 a=B; \ 00455 B=(B+(1ULL<<(SHIFT-bitsofD-1)) )&MASK; /* D^(P-1)/2: (2^8)^(4-1)/2 = 2^24/2 = 2^23 = 2^(32-8-1) */ \ 00456 L=(1ULL<<(SHIFT-2*bitsofD))-1; /* requires P>2 */ \ 00457 if(a>B) \ 00458 carry_##T(STREAM); \ 00459 erenorm_##T(state); /* output last 2 symbols */ \ 00460 } 00461 DEFN_ESELECT(u1); // typed by output stream type 00462 DEFN_ESELECT(u4); 00463 DEFN_ESELECT(u8); 00464 DEFN_ESELECT(u16); 00465 00466 #define DEFN_ESTEP(T) \ 00467 static void estep_##T(state_t *state,u64 s) \ 00468 { \ 00469 update_##T(s,state); \ 00470 if(L<LOWL) \ 00471 erenorm_##T(state); \ 00472 } 00473 DEFN_ESTEP(u1); // typed by output stream type 00474 DEFN_ESTEP(u4); 00475 DEFN_ESTEP(u8); 00476 DEFN_ESTEP(u16); 00477 DEFN_ESTEP(null); // doesn't actually write to stream 00478 00479 #define DEFN_ENCODE(TOUT,TIN) \ 00480 void encode_##TOUT##_##TIN(void **out, size_t *nout, TIN *in, size_t nin, real *cdf, size_t nsym) \ 00481 { size_t i; \ 00482 state_t s; \ 00483 init_##TOUT(&s,*out,*nout,cdf,nsym); \ 00484 for(i=0;i<nin;++i) \ 00485 estep_##TOUT(&s,in[i]); \ 00486 estep_##TOUT(&s,s.nsym-1); \ 00487 eselect_##TOUT(&s); \ 00488 detach(&s.d,out,nout); \ 00489 free_internal(&s); \ 00490 } 00491 #define DEFN_ENCODE_OUTS(TIN) \ 00492 DEFN_ENCODE(u1,TIN); \ 00493 DEFN_ENCODE(u4,TIN); \ 00494 DEFN_ENCODE(u8,TIN); \ 00495 DEFN_ENCODE(u16,TIN); 00496 DEFN_ENCODE_OUTS(u8); 00497 DEFN_ENCODE_OUTS(u16); 00498 DEFN_ENCODE_OUTS(u32); 00499 DEFN_ENCODE_OUTS(u64); 00500 00501 // 00502 // Decode 00503 // 00504 00505 static u64 dselect(state_t *state, u64 *v, int *isend) 00506 { u64 s,n; 00507 u64 x,y; 00508 00509 s = 0; 00510 n = NSYM; 00511 x = 0; 00512 y = L; 00513 while( (n-s)>1UL ) // bisection search 00514 { u32 m = (s+n)>>1; 00515 u64 z = (L*C[m])>>SHIFT; 00516 if(z>*v) 00517 n=m,y=z; 00518 else 00519 s=m,x=z; 00520 } 00521 *v -= x; 00522 L = y-x; 00523 if(s==(NSYM-1)) 00524 *isend=1; 00525 return s; 00526 } 00527 00528 #define DEFN_DRENORM(T) \ 00529 static void drenorm_##T(state_t *state, u64 *v)\ 00530 { while(L<LOWL) \ 00531 { *v = ((*v<<bitsofD)&MASK)+pop_##T(STREAM); \ 00532 L = ( L<<bitsofD)&MASK; \ 00533 } \ 00534 } 00535 DEFN_DRENORM(u1); 00536 DEFN_DRENORM(u4); 00537 DEFN_DRENORM(u8); 00538 DEFN_DRENORM(u16); 00539 00540 #define DEFN_DPRIME(T) \ 00541 static void dprime_##T(state_t *state, u64* v) \ 00542 { size_t i; \ 00543 *v = 0; \ 00544 for(i=bitsofD;i<=SHIFT;i+=bitsofD) \ 00545 *v += (1ULL<<(SHIFT-i))*pop_##T(STREAM); /*(2^8)^(P-n) = 2^(8*(P-n))*/ \ 00546 } 00547 DEFN_DPRIME(u1); 00548 DEFN_DPRIME(u4); 00549 DEFN_DPRIME(u8); 00550 DEFN_DPRIME(u16); 00551 00552 #define DEFN_DSTEP(T) \ 00553 static u64 dstep_##T(state_t *state,u64 *v,int *isend) \ 00554 { \ 00555 u64 s = dselect(state,v,isend); \ 00556 if( L<LOWL ) \ 00557 drenorm_##T(state,v); \ 00558 return s; \ 00559 } 00560 DEFN_DSTEP(u1); 00561 DEFN_DSTEP(u4); 00562 DEFN_DSTEP(u8); 00563 DEFN_DSTEP(u16); 00564 00565 #define DEFN_DECODE(TOUT,TIN) \ 00566 void decode_##TOUT##_##TIN(TOUT **out, size_t *nout, u8 *in, size_t nin, real *cdf, size_t nsym) \ 00567 { state_t s; \ 00568 stream_t d={0}; \ 00569 u64 v,x; \ 00570 size_t i=0; \ 00571 int isend=0; \ 00572 attach(&d,*out,*nout*sizeof(TOUT)); \ 00573 init_##TIN(&s,in,nin,cdf,nsym); \ 00574 dprime_##TIN(&s,&v); \ 00575 x=dstep_##TIN(&s,&v,&isend); \ 00576 while(!isend) \ 00577 { push_##TOUT(&d,x); \ 00578 x=dstep_##TIN(&s,&v,&isend); \ 00579 } \ 00580 free_internal(&s); \ 00581 detach(&d,(void**)out,nout); \ 00582 *nout /= sizeof(TOUT); \ 00583 } 00584 #define DEFN_DECODE_OUTS(TIN) \ 00585 DEFN_DECODE(u8,TIN); \ 00586 DEFN_DECODE(u16,TIN); \ 00587 DEFN_DECODE(u32,TIN); \ 00588 DEFN_DECODE(u64,TIN); 00589 DEFN_DECODE_OUTS(u1); 00590 DEFN_DECODE_OUTS(u4); 00591 DEFN_DECODE_OUTS(u8); 00592 DEFN_DECODE_OUTS(u16); 00593 00594 // 00595 // Variable output alphabet encoding 00596 // 00597 00598 void sync(stream_t *dest, stream_t *src) 00599 { dest->d = src->d; 00600 dest->nbytes = src->nbytes; 00601 } 00602 void vdecode1(u8 **out, size_t *nout, u8 *in, size_t nin, real *cdf, size_t nsym, real *tcdf, size_t tsym) 00603 { state_t d0,e1; 00604 stream_t d={0}; 00605 int isend=0; 00606 u64 v0; 00607 attach(&d,*out,*nout); 00608 init_u8(&d0,in,nin,tcdf,tsym); // the tcdf decode 00609 init_u8(&e1, 0, 0,tcdf,tsym); // the tcdf encode - used to check for end symbol 00610 00611 dprime_u8(&d0,&v0); 00612 while(e1.d.ibyte<nin) // stop decoding when reencoding reproduces the input string 00613 { u8 s = dstep_u8(&d0,&v0,&isend); 00614 push_u8(&d,s); 00615 estep_null(&e1,s); 00616 } 00617 00618 detach(&d,(void**)out,nout); 00619 { void *b; 00620 detach(&e1.d,&b,NULL); 00621 if(b) free(b); 00622 } 00623 free_internal(&d0); 00624 free_internal(&e1); 00625 } 00626 00627 #define DEFN_VENCODE(T) \ 00628 void vencode_##T(u8 **out, size_t *nout, size_t noutsym, T *in, size_t nin, size_t ninsym, real *cdf) \ 00629 { u8 *t=NULL; \ 00630 size_t i,n=0; \ 00631 real *tcdf; \ 00632 TRY( tcdf=malloc(sizeof(*tcdf)*(noutsym+1)) ); \ 00633 { size_t i; \ 00634 real v = 1.0/(real)noutsym; \ 00635 for(i=0;i<=noutsym;++i) \ 00636 tcdf[i] = i*v; \ 00637 } \ 00638 { void *buf=0; \ 00639 n = 0; \ 00640 encode_u8_##T(&buf,&n,in,nin,cdf,ninsym); \ 00641 vdecode1(out,nout,buf,n,cdf,ninsym,tcdf,noutsym); \ 00642 free(buf); \ 00643 } \ 00644 free(tcdf); \ 00645 return; \ 00646 Error: \ 00647 abort(); \ 00648 } 00649 DEFN_VENCODE(u8); 00650 DEFN_VENCODE(u16); 00651 DEFN_VENCODE(u32); 00652 DEFN_VENCODE(u64); 00653 00654 #define DEFN_VDECODE(T) \ 00655 void vdecode_##T(T **out, size_t *nout, size_t noutsym, u8 *in, size_t nin, size_t ninsym, real *cdf) \ 00656 { u8 *t=NULL; \ 00657 size_t i,n=0; \ 00658 real *tcdf; \ 00659 TRY( tcdf=malloc(sizeof(*tcdf)*(ninsym+1)) ); \ 00660 { size_t i; \ 00661 real v = 1.0/(real)ninsym; \ 00662 for(i=0;i<=ninsym;++i) \ 00663 tcdf[i] = i*v; \ 00664 } \ 00665 { void *buf=0; \ 00666 n = 0; \ 00667 encode_u8_u8(&buf,&n,in,nin,tcdf,ninsym); \ 00668 decode_##T##_u8(out,nout,buf,n,cdf,noutsym); \ 00669 free(buf); \ 00670 } \ 00671 free(tcdf); \ 00672 return; \ 00673 Error: \ 00674 abort(); \ 00675 } 00676 DEFN_VDECODE(u8); 00677 DEFN_VDECODE(u16); 00678 DEFN_VDECODE(u32); 00679 DEFN_VDECODE(u64); 00680 //end addtogroup ac