ArithmeticCoding  0.0.0a
Arithmetic coding in C
C:/Users/clackn/Desktop/src/arithcode/src/ac.c
Go to the documentation of this file.
00001 
00220 #include <stdio.h>
00221 #include <string.h>
00222 #include "stream.h"
00223 
00224 typedef uint8_t   u8;
00225 typedef uint8_t   u16;
00226 typedef uint32_t  u32;
00227 typedef uint64_t  u64;
00228 typedef float     real;
00229 
00230 #define ENDL "\n"
00231 #define TRY(e) \
00232   do{ if(!(e)) {\
00233     printf("%s(%d):"ENDL "\t%s"ENDL "\tExpression evaluated as false."ENDL, \
00234         __FILE__,__LINE__,#e); \
00235     goto Error; \
00236   }} while(0)
00237 
00238 #define SAFE_FREE(e) if(e) { free(e); (e)=NULL; }
00239 
00251 typedef struct _state_t
00252 { u64      b,       
00253            l,       
00254            v;       
00255   stream_t d;       
00256   size_t   nsym;    
00257   u64      D,       
00258            shift,   
00259            mask,    
00260            lowl;    
00261   u64     *cdf;     
00262 } state_t;
00263 
00265 static void init_common(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym)
00266 { 
00267   
00268   state->l = (1ULL<<state->shift)-1; // e.g. 2^32-1 for u64
00269   state->mask = state->l;            // for modding a u64 to u32 with &
00270 
00271   nsym++; // add end symbol
00272   TRY( state->cdf=malloc(nsym*sizeof(*state->cdf)) );
00273   { size_t i;
00274     real s = state->l-state->D; // scale to D^P range and adjust for end symbol
00275     for(i=0;i<(nsym-1);++i)
00276       state->cdf[i] = s*cdf[i];
00277     state->cdf[i] = s;
00278     state->nsym = nsym;
00279 #if 0
00280     for(i=0;i<nsym;++i)
00281       printf("CDF[%5zu] = %12llu  cdf[%5zu] = %f"ENDL,i,state->cdf[i],i,cdf[i]);
00282 #endif
00283   }
00284   attach(&state->d,buf,nbuf);
00285   return;
00286 Error:
00287   abort();
00288 }
00290 static void init_u1(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym)
00291 { memset(state,0,sizeof(*state));
00292   state->D     = 2;
00293   state->shift = 32; // log2(D^P) - need 2P to fit in a register for multiplies
00294   state->lowl  = 1ULL<<31; // 2^(shift - log2(D))
00295   init_common(state,buf,nbuf,cdf,nsym);
00296 }
00298 static void init_u4(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym)
00299 { memset(state,0,sizeof(*state));
00300   state->D     = 1ULL<<4;
00301   state->shift = 32; // log2(D^P) - need 2P to fit in a register for multiplies
00302   state->lowl  = 1ULL<<28; // 2^(shift - log2(D))
00303   init_common(state,buf,nbuf,cdf,nsym);
00304 }
00306 static void init_u8(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym)
00307 { memset(state,0,sizeof(*state));
00308   state->D     = 1ULL<<8;
00309   state->shift = 32; // log2(D^P) - need 2P to fit in a register for multiplies
00310   state->lowl  = 1ULL<<24; // 2^(shift - log2(D))
00311   init_common(state,buf,nbuf,cdf,nsym);
00312 }
00314 static void init_u16(state_t *state,u8 *buf,size_t nbuf,real *cdf,size_t nsym)
00315 { memset(state,0,sizeof(*state));
00316   state->D     = 1ULL<<16;
00317   state->shift = 32;       // log2(D^P) - need 2P to fit in a register for multiplies
00318   state->lowl  = 1ULL<<16; // 2^(shift - log2(D))
00319   init_common(state,buf,nbuf*2,cdf,nsym);
00320 }
00322 static  void free_internal(state_t *state)
00323 { void *d;
00324   SAFE_FREE(state->cdf);
00325 //detach(&state->d,&d,NULL); // Don't really want to do this - ends up wierd
00326 //SAFE_FREE(d);
00327 }
00328 
00329 //
00330 // Build CDF
00331 // 
00332 
00334 static u32 maximum(u32 *s, size_t n)
00335 { u32 max=0;
00336   while(n--)
00337     max = (s[n]>max)?s[n]:max;
00338   return max;
00339 }
00340 
00365 void cdf_build(real **cdf, size_t *M, u32 *s, size_t N)
00366 { size_t i,nbytes;
00367   *M = maximum(s,N)+1; 
00368   TRY( *cdf=realloc(*cdf,nbytes=sizeof(real)*(M[0]+1)) ); // cdf has M+1 elements
00369   memset(*cdf,0,nbytes);
00370   for(i=0;i<     N;++i) cdf[0][s[i]]++;                   // histogram
00371   for(i=0;i<M[0]  ;++i) cdf[0][s[i]]/=(real)N;            // norm
00372   for(i=1;i<M[0]  ;++i) cdf[0][i]   += cdf[0][i-1];       // cumsum
00373   for(i=M[0]+1;i>0;--i) cdf[0][i]    = cdf[0][i-1];       // move
00374   cdf[0][0] = 0.0;
00375     
00376 #ifdef DEBUG
00377   TRY( fabs(cdf[M[0]+1]-1.0) < 1e-6 );
00378 #endif
00379   return;
00380 Error:
00381   abort();
00382 }
00383 
00384 //
00385 // Encoder
00386 //
00387 
00388 #define B         (state->b)
00389 #define L         (state->l)
00390 #define C         (state->cdf)
00391 #define SHIFT     (state->shift)
00392 #define NSYM      (state->nsym)
00393 #define MASK      (state->mask)
00394 #define STREAM    (&(state->d))
00395 #define DATA      (state->d.d)
00396 #define OFLOW     (STREAM->overflow)
00397 #define bitsofD   (8)
00398 #define D         (1ULL<<bitsofD)
00399 #define LOWL      (state->lowl)
00400 
00401 void carry_null(stream_t *s) {} //no op
00402 void push_null(stream_t *s)  {s->ibyte++;}
00403 
00404 #define DEFN_UPDATE(T) \
00405   static void update_##T(u64 s,state_t *state) \
00406   { u64 a,x,y;                                \
00407     y = L; /* End of interval */              \
00408     if(s!=(NSYM-1)) /*is not last symbol */   \
00409       y = (y*C[s+1])>>SHIFT;                  \
00410     a = B;                                    \
00411     x = (L*C[s])>>SHIFT;                      \
00412     B = (B+x)&MASK;                           \
00413     L = y-x;                                  \
00414     TRY(L>0);                                 \
00415     if(a>B)                                   \
00416       carry_##T(STREAM);                      \
00417     return;                                   \
00418   Error:                                      \
00419     abort();                                  \
00420   }
00421 DEFN_UPDATE(u1);
00422 DEFN_UPDATE(u4);
00423 DEFN_UPDATE(u8);
00424 DEFN_UPDATE(u16);
00425 DEFN_UPDATE(null);
00426 
00427 #define DEFN_ERENORM(T) \
00428 static void erenorm_##T(state_t *state) \
00429 {                                      \
00430   const int s = SHIFT-bitsofD;         \
00431   while(L<LOWL)                        \
00432   { push_##T(STREAM, B>>s);             \
00433     L = (L<<bitsofD)&MASK;             \
00434     B = (B<<bitsofD)&MASK;             \
00435   }                                    \
00436 }
00437 DEFN_ERENORM(u1); // typed by output stream type
00438 DEFN_ERENORM(u4);
00439 DEFN_ERENORM(u8);
00440 DEFN_ERENORM(u16);
00441 static void erenorm_null(state_t *state)
00442 {
00443   const int s = SHIFT-bitsofD;
00444   while(L<LOWL)
00445   { push_null(STREAM);
00446     L = (L<<bitsofD)&MASK;
00447     B = (B<<bitsofD)&MASK;
00448   }
00449 }
00450 
00451 #define DEFN_ESELECT(T) \
00452   static void eselect_##T(state_t *state)                                                                \
00453   { u64 a;                                                                                              \
00454     a=B;                                                                                                \
00455     B=(B+(1ULL<<(SHIFT-bitsofD-1)) )&MASK; /* D^(P-1)/2: (2^8)^(4-1)/2 = 2^24/2 = 2^23 = 2^(32-8-1) */  \
00456     L=(1ULL<<(SHIFT-2*bitsofD))-1;         /* requires P>2 */                                           \
00457     if(a>B)                                                                                             \
00458       carry_##T(STREAM);                                                                                  \
00459     erenorm_##T(state);                     /* output last 2 symbols */                                  \
00460   }
00461 DEFN_ESELECT(u1); // typed by output stream type
00462 DEFN_ESELECT(u4);
00463 DEFN_ESELECT(u8);
00464 DEFN_ESELECT(u16);
00465 
00466 #define DEFN_ESTEP(T) \
00467   static void estep_##T(state_t *state,u64 s) \
00468   {                                          \
00469     update_##T(s,state);                      \
00470     if(L<LOWL)                               \
00471       erenorm_##T(state);                     \
00472   }
00473 DEFN_ESTEP(u1); // typed by output stream type
00474 DEFN_ESTEP(u4);
00475 DEFN_ESTEP(u8);
00476 DEFN_ESTEP(u16);
00477 DEFN_ESTEP(null); // doesn't actually write to stream
00478 
00479 #define DEFN_ENCODE(TOUT,TIN) \
00480 void encode_##TOUT##_##TIN(void **out, size_t *nout, TIN *in, size_t nin, real *cdf, size_t nsym) \
00481 { size_t i;                             \
00482   state_t s;                            \
00483   init_##TOUT(&s,*out,*nout,cdf,nsym);  \
00484   for(i=0;i<nin;++i)                    \
00485     estep_##TOUT(&s,in[i]);             \
00486   estep_##TOUT(&s,s.nsym-1);            \
00487   eselect_##TOUT(&s);                   \
00488   detach(&s.d,out,nout);                \
00489   free_internal(&s);                    \
00490 }
00491 #define DEFN_ENCODE_OUTS(TIN) \
00492   DEFN_ENCODE(u1,TIN); \
00493   DEFN_ENCODE(u4,TIN); \
00494   DEFN_ENCODE(u8,TIN); \
00495   DEFN_ENCODE(u16,TIN);
00496 DEFN_ENCODE_OUTS(u8);
00497 DEFN_ENCODE_OUTS(u16);
00498 DEFN_ENCODE_OUTS(u32);
00499 DEFN_ENCODE_OUTS(u64);
00500 
00501 //
00502 // Decode
00503 //
00504 
00505 static u64 dselect(state_t *state, u64 *v, int *isend)
00506 { u64 s,n;
00507   u64 x,y;
00508 
00509   s = 0;
00510   n = NSYM;
00511   x = 0;
00512   y = L;
00513   while( (n-s)>1UL )   // bisection search
00514   { u32 m = (s+n)>>1;
00515     u64 z = (L*C[m])>>SHIFT;
00516     if(z>*v)
00517       n=m,y=z;
00518     else
00519       s=m,x=z;
00520   }
00521   *v -= x;
00522   L = y-x;
00523   if(s==(NSYM-1))
00524     *isend=1;
00525   return s;
00526 }
00527 
00528 #define DEFN_DRENORM(T) \
00529 static void drenorm_##T(state_t *state, u64 *v)\
00530 { while(L<LOWL)                               \
00531   { *v = ((*v<<bitsofD)&MASK)+pop_##T(STREAM); \
00532     L =   ( L<<bitsofD)&MASK;                 \
00533   }                                           \
00534 }
00535 DEFN_DRENORM(u1);
00536 DEFN_DRENORM(u4);
00537 DEFN_DRENORM(u8);
00538 DEFN_DRENORM(u16);
00539 
00540 #define DEFN_DPRIME(T) \
00541 static void dprime_##T(state_t *state, u64* v)                             \
00542 { size_t i;                                                               \
00543   *v = 0;                                                                 \
00544   for(i=bitsofD;i<=SHIFT;i+=bitsofD)                                      \
00545     *v += (1ULL<<(SHIFT-i))*pop_##T(STREAM); /*(2^8)^(P-n) = 2^(8*(P-n))*/ \
00546 }
00547 DEFN_DPRIME(u1);
00548 DEFN_DPRIME(u4);
00549 DEFN_DPRIME(u8);
00550 DEFN_DPRIME(u16);
00551 
00552 #define DEFN_DSTEP(T) \
00553 static u64 dstep_##T(state_t *state,u64 *v,int *isend) \
00554 {                                 \
00555   u64 s = dselect(state,v,isend); \
00556   if( L<LOWL )                    \
00557     drenorm_##T(state,v);         \
00558   return s;                       \
00559 }
00560 DEFN_DSTEP(u1);
00561 DEFN_DSTEP(u4);
00562 DEFN_DSTEP(u8);
00563 DEFN_DSTEP(u16);
00564 
00565 #define DEFN_DECODE(TOUT,TIN) \
00566 void decode_##TOUT##_##TIN(TOUT **out, size_t *nout, u8 *in, size_t nin, real *cdf, size_t nsym) \
00567 { state_t s;                                   \
00568   stream_t d={0};                              \
00569   u64 v,x;                                     \
00570   size_t i=0;                                  \
00571   int isend=0;                                 \
00572   attach(&d,*out,*nout*sizeof(TOUT));          \
00573   init_##TIN(&s,in,nin,cdf,nsym);              \
00574   dprime_##TIN(&s,&v);                         \
00575   x=dstep_##TIN(&s,&v,&isend);                 \
00576   while(!isend)                                \
00577   { push_##TOUT(&d,x);                         \
00578     x=dstep_##TIN(&s,&v,&isend);               \
00579   }                                            \
00580   free_internal(&s);                           \
00581   detach(&d,(void**)out,nout);                 \
00582   *nout /= sizeof(TOUT);                       \
00583 }
00584 #define DEFN_DECODE_OUTS(TIN) \
00585   DEFN_DECODE(u8,TIN);  \
00586   DEFN_DECODE(u16,TIN); \
00587   DEFN_DECODE(u32,TIN); \
00588   DEFN_DECODE(u64,TIN);
00589 DEFN_DECODE_OUTS(u1);
00590 DEFN_DECODE_OUTS(u4);
00591 DEFN_DECODE_OUTS(u8);
00592 DEFN_DECODE_OUTS(u16);
00593 
00594 //
00595 // Variable output alphabet encoding
00596 //
00597 
00598 void sync(stream_t *dest, stream_t *src)
00599 { dest->d      = src->d;
00600   dest->nbytes = src->nbytes;
00601 }
00602 void vdecode1(u8 **out, size_t *nout, u8 *in, size_t nin, real *cdf, size_t nsym, real *tcdf, size_t tsym)
00603 { state_t d0,e1;                                   
00604   stream_t d={0};                              
00605   int isend=0;
00606   u64 v0;
00607   attach(&d,*out,*nout);          
00608   init_u8(&d0,in,nin,tcdf,tsym);            // the tcdf decode
00609   init_u8(&e1, 0,  0,tcdf,tsym);            // the tcdf encode - used to check for end symbol
00610 
00611   dprime_u8(&d0,&v0);
00612   while(e1.d.ibyte<nin)                     // stop decoding when reencoding reproduces the input string
00613   { u8 s = dstep_u8(&d0,&v0,&isend);
00614     push_u8(&d,s);
00615     estep_null(&e1,s);
00616   }
00617 
00618   detach(&d,(void**)out,nout);
00619   { void *b;
00620     detach(&e1.d,&b,NULL);
00621     if(b) free(b);
00622   }
00623   free_internal(&d0);
00624   free_internal(&e1);
00625 }
00626 
00627 #define DEFN_VENCODE(T) \
00628 void vencode_##T(u8 **out, size_t *nout, size_t noutsym, T *in, size_t nin, size_t ninsym, real *cdf) \
00629 { u8 *t=NULL;                                          \
00630   size_t i,n=0;                                        \
00631   real *tcdf;                                          \
00632   TRY( tcdf=malloc(sizeof(*tcdf)*(noutsym+1)) );       \
00633   { size_t i;                                          \
00634     real v = 1.0/(real)noutsym;                        \
00635     for(i=0;i<=noutsym;++i)                            \
00636       tcdf[i] = i*v;                                   \
00637   }                                                    \
00638   { void *buf=0;                                       \
00639     n = 0;                                             \
00640     encode_u8_##T(&buf,&n,in,nin,cdf,ninsym);          \
00641     vdecode1(out,nout,buf,n,cdf,ninsym,tcdf,noutsym);  \
00642     free(buf);                                         \
00643   }                                                    \
00644   free(tcdf);                                          \
00645   return;                                              \
00646 Error:                                                 \
00647   abort();                                             \
00648 }
00649 DEFN_VENCODE(u8);
00650 DEFN_VENCODE(u16);
00651 DEFN_VENCODE(u32);
00652 DEFN_VENCODE(u64);
00653 
00654 #define DEFN_VDECODE(T) \
00655 void vdecode_##T(T **out, size_t *nout, size_t noutsym, u8 *in, size_t nin, size_t ninsym, real *cdf) \
00656 { u8 *t=NULL;                                          \
00657   size_t i,n=0;                                        \
00658   real *tcdf;                                          \
00659   TRY( tcdf=malloc(sizeof(*tcdf)*(ninsym+1)) );        \
00660   { size_t i;                                          \
00661     real v = 1.0/(real)ninsym;                         \
00662     for(i=0;i<=ninsym;++i)                             \
00663       tcdf[i] = i*v;                                   \
00664   }                                                    \
00665   { void *buf=0;                                       \
00666     n = 0;                                             \
00667     encode_u8_u8(&buf,&n,in,nin,tcdf,ninsym);          \
00668     decode_##T##_u8(out,nout,buf,n,cdf,noutsym);       \
00669     free(buf);                                         \
00670   }                                                    \
00671   free(tcdf);                                          \
00672   return;                                              \
00673 Error:                                                 \
00674   abort();                                             \
00675 }
00676 DEFN_VDECODE(u8);
00677 DEFN_VDECODE(u16);
00678 DEFN_VDECODE(u32);
00679 DEFN_VDECODE(u64);
00680  //end addtogroup ac
 All Classes Files Functions Variables Typedefs Defines