From 3ae7f9c5aab65724708684148e061cd34ee9af25 Mon Sep 17 00:00:00 2001
From: Ashley <iamashley@duck.com>
Date: Sun, 12 Feb 2023 09:12:54 +0000
Subject: [PATCH] add alac source code - part one

---
 alac/codec/makefile     |  80 +++++++++
 alac/codec/matrix_dec.c | 390 ++++++++++++++++++++++++++++++++++++++++
 alac/codec/matrix_enc.c | 342 +++++++++++++++++++++++++++++++++++
 alac/codec/matrixlib.h  |  80 +++++++++
 4 files changed, 892 insertions(+)
 create mode 100644 alac/codec/makefile
 create mode 100644 alac/codec/matrix_dec.c
 create mode 100644 alac/codec/matrix_enc.c
 create mode 100644 alac/codec/matrixlib.h

diff --git a/alac/codec/makefile b/alac/codec/makefile
new file mode 100644
index 00000000..ceea0094
--- /dev/null
+++ b/alac/codec/makefile
@@ -0,0 +1,80 @@
+# libalac make
+
+CFLAGS = -g -O3 -c
+LFLAGS = -Wall
+CC = g++
+
+SRCDIR = .
+OBJDIR = ./obj
+INCLUDES = .
+
+HEADERS = \
+$(SRCDIR)/EndianPortable.h \
+$(SRCDIR)/aglib.h \
+$(SRCDIR)/ALACAudioTypes.h \
+$(SRCDIR)/ALACBitUtilities.h\
+$(SRCDIR)/ALACDecoder.h \
+$(SRCDIR)/ALACEncoder.h \
+$(SRCDIR)/dplib.h \
+$(SRCDIR)/matrixlib.h
+
+SOURCES = \
+$(SRCDIR)/EndianPortable.c \
+$(SRCDIR)/ALACBitUtilities.c \
+$(SRCDIR)/ALACDecoder.cpp \
+$(SRCDIR)/ALACEncoder.cpp \
+$(SRCDIR)/ag_dec.c \
+$(SRCDIR)/ag_enc.c \
+$(SRCDIR)/dp_dec.c \
+$(SRCDIR)/dp_enc.c \
+$(SRCDIR)/matrix_dec.c \
+$(SRCDIR)/matrix_enc.c
+
+OBJS = \
+EndianPortable.o \
+ALACBitUtilities.o \
+ALACDecoder.o \
+ALACEncoder.o \
+ag_dec.o \
+ag_enc.o \
+dp_dec.o \
+dp_enc.o \
+matrix_dec.o \
+matrix_enc.o
+
+libalac.a:	$(OBJS)
+	ar rcs libalac.a $(OBJS)
+
+EndianPortable.o : EndianPortable.c
+	$(CC) -I $(INCLUDES) $(CFLAGS) EndianPortable.c
+
+ALACBitUtilities.o : ALACBitUtilities.c
+	$(CC) -I $(INCLUDES) $(CFLAGS) ALACBitUtilities.c
+
+ALACDecoder.o : ALACDecoder.cpp
+	$(CC) -I $(INCLUDES) $(CFLAGS) ALACDecoder.cpp
+
+ALACEncoder.o : ALACEncoder.cpp
+	$(CC) -I $(INCLUDES) $(CFLAGS) ALACEncoder.cpp
+
+ag_dec.o : ag_dec.c
+	$(CC) -I $(INCLUDES) $(CFLAGS) ag_dec.c
+
+ag_enc.o : ag_enc.c
+	$(CC) -I $(INCLUDES) $(CFLAGS) ag_enc.c
+
+dp_dec.o : dp_dec.c
+	$(CC) -I $(INCLUDES) $(CFLAGS) dp_dec.c
+
+dp_enc.o : dp_enc.c
+	$(CC) -I $(INCLUDES) $(CFLAGS) dp_enc.c
+
+matrix_dec.o : matrix_dec.c
+	$(CC) -I $(INCLUDES) $(CFLAGS) matrix_dec.c
+
+matrix_enc.o : matrix_enc.c
+	$(CC) -I $(INCLUDES) $(CFLAGS) matrix_enc.c
+		
+clean:
+	-rm $(OBJS) libalac.a
+
diff --git a/alac/codec/matrix_dec.c b/alac/codec/matrix_dec.c
new file mode 100644
index 00000000..b1889b92
--- /dev/null
+++ b/alac/codec/matrix_dec.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+/*
+	File:		matrix_dec.c
+	
+	Contains:	ALAC mixing/matrixing decode routines.
+
+	Copyright:	(c) 2004-2011 Apple, Inc.
+*/
+
+#include "matrixlib.h"
+#include "ALACAudioTypes.h"
+
+// up to 24-bit "offset" macros for the individual bytes of a 20/24-bit word
+#if TARGET_RT_BIG_ENDIAN
+	#define LBYTE	2
+	#define MBYTE	1
+	#define HBYTE	0
+#else
+	#define LBYTE	0
+	#define MBYTE	1
+	#define HBYTE	2
+#endif
+
+/*
+    There is no plain middle-side option; instead there are various mixing
+    modes including middle-side, each lossless, as embodied in the mix()
+    and unmix() functions.  These functions exploit a generalized middle-side
+    transformation:
+    
+    u := [(rL + (m-r)R)/m];
+    v := L - R;
+    
+    where [ ] denotes integer floor.  The (lossless) inverse is
+    
+    L = u + v - [rV/m];
+    R = L - v;
+*/
+
+// 16-bit routines
+
+void unmix16( int32_t * u, int32_t * v, int16_t * out, uint32_t stride, int32_t numSamples, int32_t mixbits, int32_t mixres )
+{
+	int16_t *	op = out;
+	int32_t 		j;
+
+	if ( mixres != 0 )
+	{
+		/* matrixed stereo */
+		for ( j = 0; j < numSamples; j++ )
+		{
+			int32_t		l, r;
+
+			l = u[j] + v[j] - ((mixres * v[j]) >> mixbits);
+			r = l - v[j];
+
+			op[0] = (int16_t) l;
+			op[1] = (int16_t) r;
+			op += stride;
+		} 
+	}
+	else
+	{
+		/* Conventional separated stereo. */
+		for ( j = 0; j < numSamples; j++ )
+		{
+			op[0] = (int16_t) u[j];
+			op[1] = (int16_t) v[j];
+			op += stride;
+		}
+	}
+}
+
+// 20-bit routines
+// - the 20 bits of data are left-justified in 3 bytes of storage but right-aligned for input/output predictor buffers
+
+void unmix20( int32_t * u, int32_t * v, uint8_t * out, uint32_t stride, int32_t numSamples, int32_t mixbits, int32_t mixres )
+{
+	uint8_t *	op = out;
+	int32_t 		j;
+
+	if ( mixres != 0 )
+	{
+		/* matrixed stereo */
+		for ( j = 0; j < numSamples; j++ )
+		{
+			int32_t		l, r;
+
+			l = u[j] + v[j] - ((mixres * v[j]) >> mixbits);
+			r = l - v[j];
+
+			l <<= 4;
+			r <<= 4;
+
+			op[HBYTE] = (uint8_t)((l >> 16) & 0xffu);
+			op[MBYTE] = (uint8_t)((l >>  8) & 0xffu);
+			op[LBYTE] = (uint8_t)((l >>  0) & 0xffu);
+			op += 3;
+
+			op[HBYTE] = (uint8_t)((r >> 16) & 0xffu);
+			op[MBYTE] = (uint8_t)((r >>  8) & 0xffu);
+			op[LBYTE] = (uint8_t)((r >>  0) & 0xffu);
+
+			op += (stride - 1) * 3;
+		}
+	}
+	else 
+	{
+		/* Conventional separated stereo. */
+		for ( j = 0; j < numSamples; j++ )
+		{
+			int32_t		val;
+
+			val = u[j] << 4;
+			op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
+			op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
+			op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
+			op += 3;
+
+			val = v[j] << 4;
+			op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
+			op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
+			op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
+
+			op += (stride - 1) * 3;
+		}
+	}
+}
+
+// 24-bit routines
+// - the 24 bits of data are right-justified in the input/output predictor buffers
+
+void unmix24( int32_t * u, int32_t * v, uint8_t * out, uint32_t stride, int32_t numSamples,
+				int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
+{
+	uint8_t *	op = out;
+	int32_t			shift = bytesShifted * 8;
+	int32_t		l, r;
+	int32_t 		j, k;
+
+	if ( mixres != 0 )
+	{
+		/* matrixed stereo */
+		if ( bytesShifted != 0 )
+		{
+			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
+			{
+				l = u[j] + v[j] - ((mixres * v[j]) >> mixbits);
+				r = l - v[j];
+
+				l = (l << shift) | (uint32_t) shiftUV[k + 0];
+				r = (r << shift) | (uint32_t) shiftUV[k + 1];
+
+				op[HBYTE] = (uint8_t)((l >> 16) & 0xffu);
+				op[MBYTE] = (uint8_t)((l >>  8) & 0xffu);
+				op[LBYTE] = (uint8_t)((l >>  0) & 0xffu);
+				op += 3;
+
+				op[HBYTE] = (uint8_t)((r >> 16) & 0xffu);
+				op[MBYTE] = (uint8_t)((r >>  8) & 0xffu);
+				op[LBYTE] = (uint8_t)((r >>  0) & 0xffu);
+
+				op += (stride - 1) * 3;
+			}
+		}
+		else
+		{
+			for ( j = 0; j < numSamples; j++ )
+			{
+				l = u[j] + v[j] - ((mixres * v[j]) >> mixbits);
+				r = l - v[j];
+
+				op[HBYTE] = (uint8_t)((l >> 16) & 0xffu);
+				op[MBYTE] = (uint8_t)((l >>  8) & 0xffu);
+				op[LBYTE] = (uint8_t)((l >>  0) & 0xffu);
+				op += 3;
+
+				op[HBYTE] = (uint8_t)((r >> 16) & 0xffu);
+				op[MBYTE] = (uint8_t)((r >>  8) & 0xffu);
+				op[LBYTE] = (uint8_t)((r >>  0) & 0xffu);
+
+				op += (stride - 1) * 3;
+			}
+		}
+	}
+	else 
+	{
+		/* Conventional separated stereo. */
+		if ( bytesShifted != 0 )
+		{
+			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
+			{
+				l = u[j];
+				r = v[j];
+
+				l = (l << shift) | (uint32_t) shiftUV[k + 0];
+				r = (r << shift) | (uint32_t) shiftUV[k + 1];
+
+				op[HBYTE] = (uint8_t)((l >> 16) & 0xffu);
+				op[MBYTE] = (uint8_t)((l >>  8) & 0xffu);
+				op[LBYTE] = (uint8_t)((l >>  0) & 0xffu);
+				op += 3;
+
+				op[HBYTE] = (uint8_t)((r >> 16) & 0xffu);
+				op[MBYTE] = (uint8_t)((r >>  8) & 0xffu);
+				op[LBYTE] = (uint8_t)((r >>  0) & 0xffu);
+
+				op += (stride - 1) * 3;
+			}
+		}
+		else
+		{
+			for ( j = 0; j < numSamples; j++ )
+			{
+				int32_t		val;
+
+				val = u[j];
+				op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
+				op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
+				op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
+				op += 3;
+
+				val = v[j];
+				op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
+				op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
+				op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
+
+				op += (stride - 1) * 3;
+			}
+		}
+	}
+}
+
+// 32-bit routines
+// - note that these really expect the internal data width to be < 32 but the arrays are 32-bit
+// - otherwise, the calculations might overflow into the 33rd bit and be lost
+// - therefore, these routines deal with the specified "unused lower" bytes in the "shift" buffers
+
+void unmix32( int32_t * u, int32_t * v, int32_t * out, uint32_t stride, int32_t numSamples,
+				int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
+{
+	int32_t *	op = out;
+	int32_t			shift = bytesShifted * 8;
+	int32_t		l, r;
+	int32_t 		j, k;
+
+	if ( mixres != 0 )
+	{
+		//Assert( bytesShifted != 0 );
+
+		/* matrixed stereo with shift */
+		for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
+		{
+			int32_t		lt, rt;
+
+			lt = u[j];
+			rt = v[j];
+			
+			l = lt + rt - ((mixres * rt) >> mixbits);
+			r = l - rt;
+
+			op[0] = (l << shift) | (uint32_t) shiftUV[k + 0];
+			op[1] = (r << shift) | (uint32_t) shiftUV[k + 1];
+			op += stride;
+		} 
+	}
+	else
+	{
+		if ( bytesShifted == 0 )
+		{
+			/* interleaving w/o shift */
+			for ( j = 0; j < numSamples; j++ )
+			{
+				op[0] = u[j];
+				op[1] = v[j];
+				op += stride;
+			}
+		}
+		else
+		{
+			/* interleaving with shift */
+			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
+			{
+				op[0] = (u[j] << shift) | (uint32_t) shiftUV[k + 0];
+				op[1] = (v[j] << shift) | (uint32_t) shiftUV[k + 1];
+				op += stride;
+			}
+		}
+	}
+}
+
+// 20/24-bit <-> 32-bit helper routines (not really matrixing but convenient to put here)
+
+void copyPredictorTo24( int32_t * in, uint8_t * out, uint32_t stride, int32_t numSamples )
+{
+	uint8_t *	op = out;
+	int32_t			j;
+
+	for ( j = 0; j < numSamples; j++ )
+	{
+		int32_t		val = in[j];
+
+		op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
+		op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
+		op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
+		op += (stride * 3);
+	}
+}
+
+void copyPredictorTo24Shift( int32_t * in, uint16_t * shift, uint8_t * out, uint32_t stride, int32_t numSamples, int32_t bytesShifted )
+{
+	uint8_t *	op = out;
+	int32_t			shiftVal = bytesShifted * 8;
+	int32_t			j;
+
+	//Assert( bytesShifted != 0 );
+
+	for ( j = 0; j < numSamples; j++ )
+	{
+		int32_t		val = in[j];
+
+		val = (val << shiftVal) | (uint32_t) shift[j];
+
+		op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
+		op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
+		op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
+		op += (stride * 3);
+	}
+}
+
+void copyPredictorTo20( int32_t * in, uint8_t * out, uint32_t stride, int32_t numSamples )
+{
+	uint8_t *	op = out;
+	int32_t			j;
+
+	// 32-bit predictor values are right-aligned but 20-bit output values should be left-aligned
+	// in the 24-bit output buffer
+	for ( j = 0; j < numSamples; j++ )
+	{
+		int32_t		val = in[j];
+
+		op[HBYTE] = (uint8_t)((val >> 12) & 0xffu);
+		op[MBYTE] = (uint8_t)((val >>  4) & 0xffu);
+		op[LBYTE] = (uint8_t)((val <<  4) & 0xffu);
+		op += (stride * 3);
+	}
+}
+
+void copyPredictorTo32( int32_t * in, int32_t * out, uint32_t stride, int32_t numSamples )
+{
+	int32_t			i, j;
+
+	// this is only a subroutine to abstract the "iPod can only output 16-bit data" problem
+	for ( i = 0, j = 0; i < numSamples; i++, j += stride )
+		out[j] = in[i];
+}
+
+void copyPredictorTo32Shift( int32_t * in, uint16_t * shift, int32_t * out, uint32_t stride, int32_t numSamples, int32_t bytesShifted )
+{
+	int32_t *		op = out;
+	uint32_t		shiftVal = bytesShifted * 8;
+	int32_t				j;
+
+	//Assert( bytesShifted != 0 );
+
+	// this is only a subroutine to abstract the "iPod can only output 16-bit data" problem
+	for ( j = 0; j < numSamples; j++ )
+	{
+		op[0] = (in[j] << shiftVal) | (uint32_t) shift[j];
+		op += stride;
+	}
+}
diff --git a/alac/codec/matrix_enc.c b/alac/codec/matrix_enc.c
new file mode 100644
index 00000000..e1943305
--- /dev/null
+++ b/alac/codec/matrix_enc.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+/*
+	File:		matrix_enc.c
+	
+	Contains:	ALAC mixing/matrixing encode routines.
+
+	Copyright:	(c) 2004-2011 Apple, Inc.
+*/
+
+#include "matrixlib.h"
+#include "ALACAudioTypes.h"
+
+// up to 24-bit "offset" macros for the individual bytes of a 20/24-bit word
+#if TARGET_RT_BIG_ENDIAN
+	#define LBYTE	2
+	#define MBYTE	1
+	#define HBYTE	0
+#else
+	#define LBYTE	0
+	#define MBYTE	1
+	#define HBYTE	2
+#endif
+
+/*
+    There is no plain middle-side option; instead there are various mixing
+    modes including middle-side, each lossless, as embodied in the mix()
+    and unmix() functions.  These functions exploit a generalized middle-side
+    transformation:
+    
+    u := [(rL + (m-r)R)/m];
+    v := L - R;
+    
+    where [ ] denotes integer floor.  The (lossless) inverse is
+    
+    L = u + v - [rV/m];
+    R = L - v;
+*/
+
+// 16-bit routines
+
+void mix16( int16_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples, int32_t mixbits, int32_t mixres )
+{
+	int16_t	*	ip = in;
+	int32_t			j;
+
+	if ( mixres != 0 )
+	{
+		int32_t		mod = 1 << mixbits;
+		int32_t		m2;
+
+		/* matrixed stereo */
+		m2 = mod - mixres;
+		for ( j = 0; j < numSamples; j++ )
+		{
+			int32_t		l, r;
+
+			l = (int32_t) ip[0];
+			r = (int32_t) ip[1];
+			ip += stride;
+			u[j] = (mixres * l + m2 * r) >> mixbits;
+			v[j] = l - r;
+		}
+	}
+	else
+	{
+		/* Conventional separated stereo. */
+		for ( j = 0; j < numSamples; j++ )
+		{
+			u[j] = (int32_t) ip[0];
+			v[j] = (int32_t) ip[1];
+			ip += stride;
+		}
+	}
+}
+
+// 20-bit routines
+// - the 20 bits of data are left-justified in 3 bytes of storage but right-aligned for input/output predictor buffers
+
+void mix20( uint8_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples, int32_t mixbits, int32_t mixres )
+{
+	int32_t		l, r;
+	uint8_t *	ip = in;
+	int32_t			j;
+
+	if ( mixres != 0 )
+	{
+		/* matrixed stereo */
+		int32_t		mod = 1 << mixbits;
+		int32_t		m2 = mod - mixres;
+
+		for ( j = 0; j < numSamples; j++ )
+		{
+			l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+			l = (l << 8) >> 12;
+			ip += 3;
+
+			r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+			r = (r << 8) >> 12;
+			ip += (stride - 1) * 3;
+
+			u[j] = (mixres * l + m2 * r) >> mixbits;
+			v[j] = l - r;
+		} 
+	}
+	else
+	{
+		/* Conventional separated stereo. */
+		for ( j = 0; j < numSamples; j++ )
+		{
+			l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+			u[j] = (l << 8) >> 12;
+			ip += 3;
+
+			r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+			v[j] = (r << 8) >> 12;
+			ip += (stride - 1) * 3;
+		}
+	}
+}
+
+// 24-bit routines
+// - the 24 bits of data are right-justified in the input/output predictor buffers
+
+void mix24( uint8_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples,
+			int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
+{	
+	int32_t		l, r;
+	uint8_t *	ip = in;
+	int32_t			shift = bytesShifted * 8;
+	uint32_t	mask  = (1ul << shift) - 1;
+	int32_t			j, k;
+
+	if ( mixres != 0 )
+	{
+		/* matrixed stereo */
+		int32_t		mod = 1 << mixbits;
+		int32_t		m2 = mod - mixres;
+
+		if ( bytesShifted != 0 )
+		{
+			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
+			{
+				l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+				l = (l << 8) >> 8;
+				ip += 3;
+
+				r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+				r = (r << 8) >> 8;
+				ip += (stride - 1) * 3;
+
+				shiftUV[k + 0] = (uint16_t)(l & mask);
+				shiftUV[k + 1] = (uint16_t)(r & mask);
+				
+				l >>= shift;
+				r >>= shift;
+
+				u[j] = (mixres * l + m2 * r) >> mixbits;
+				v[j] = l - r;
+			}
+		}
+		else
+		{
+			for ( j = 0; j < numSamples; j++ )
+			{
+				l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+				l = (l << 8) >> 8;
+				ip += 3;
+
+				r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+				r = (r << 8) >> 8;
+				ip += (stride - 1) * 3;
+
+				u[j] = (mixres * l + m2 * r) >> mixbits;
+				v[j] = l - r;
+			}
+		}
+	}
+	else
+	{
+		/* Conventional separated stereo. */
+		if ( bytesShifted != 0 )
+		{
+			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
+			{
+				l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+				l = (l << 8) >> 8;
+				ip += 3;
+
+				r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+				r = (r << 8) >> 8;
+				ip += (stride - 1) * 3;
+
+				shiftUV[k + 0] = (uint16_t)(l & mask);
+				shiftUV[k + 1] = (uint16_t)(r & mask);
+				
+				l >>= shift;
+				r >>= shift;
+
+				u[j] = l;
+				v[j] = r;
+			}
+		}
+		else
+		{
+			for ( j = 0; j < numSamples; j++ )
+			{
+				l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+				u[j] = (l << 8) >> 8;
+				ip += 3;
+
+				r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+				v[j] = (r << 8) >> 8;
+				ip += (stride - 1) * 3;
+			}
+		}
+	}
+}
+
+// 32-bit routines
+// - note that these really expect the internal data width to be < 32 but the arrays are 32-bit
+// - otherwise, the calculations might overflow into the 33rd bit and be lost
+// - therefore, these routines deal with the specified "unused lower" bytes in the "shift" buffers
+
+void mix32( int32_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples,
+			int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
+{
+	int32_t	*	ip = in;
+	int32_t			shift = bytesShifted * 8;
+	uint32_t	mask  = (1ul << shift) - 1;
+	int32_t		l, r;
+	int32_t			j, k;
+
+	if ( mixres != 0 )
+	{
+		int32_t		mod = 1 << mixbits;
+		int32_t		m2;
+
+		//Assert( bytesShifted != 0 );
+
+		/* matrixed stereo with shift */
+		m2 = mod - mixres;
+		for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
+		{
+			l = ip[0];
+			r = ip[1];
+			ip += stride;
+
+			shiftUV[k + 0] = (uint16_t)(l & mask);
+			shiftUV[k + 1] = (uint16_t)(r & mask);
+			
+			l >>= shift;
+			r >>= shift;
+
+			u[j] = (mixres * l + m2 * r) >> mixbits;
+			v[j] = l - r;
+		}
+	}
+	else
+	{
+		if ( bytesShifted == 0 )
+		{
+			/* de-interleaving w/o shift */
+			for ( j = 0; j < numSamples; j++ )
+			{
+				u[j] = ip[0];
+				v[j] = ip[1];
+				ip += stride;
+			}
+		}
+		else
+		{
+			/* de-interleaving with shift */
+			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
+			{
+				l = ip[0];
+				r = ip[1];
+				ip += stride;
+
+				shiftUV[k + 0] = (uint16_t)(l & mask);
+				shiftUV[k + 1] = (uint16_t)(r & mask);
+				
+				l >>= shift;
+				r >>= shift;
+
+				u[j] = l;
+				v[j] = r;
+			}
+		}
+	}
+}
+
+// 20/24-bit <-> 32-bit helper routines (not really matrixing but convenient to put here)
+
+void copy20ToPredictor( uint8_t * in, uint32_t stride, int32_t * out, int32_t numSamples )
+{
+	uint8_t *	ip = in;
+	int32_t			j;
+
+	for ( j = 0; j < numSamples; j++ )
+	{
+		int32_t			val;
+
+		// 20-bit values are left-aligned in the 24-bit input buffer but right-aligned in the 32-bit output buffer
+		val = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+		out[j] = (val << 8) >> 12;
+		ip += stride * 3;
+	}
+}
+
+void copy24ToPredictor( uint8_t * in, uint32_t stride, int32_t * out, int32_t numSamples )
+{
+	uint8_t *	ip = in;
+	int32_t			j;
+
+	for ( j = 0; j < numSamples; j++ )
+	{
+		int32_t			val;
+
+		val = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
+		out[j] = (val << 8) >> 8;
+		ip += stride * 3;
+	}
+}
diff --git a/alac/codec/matrixlib.h b/alac/codec/matrixlib.h
new file mode 100644
index 00000000..0a4f3718
--- /dev/null
+++ b/alac/codec/matrixlib.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+/*
+	File:		matrixlib.h
+	
+	Contains:	ALAC mixing/matrixing routines to/from 32-bit predictor buffers.
+
+	Copyright:	Copyright (C) 2004 to 2011 Apple, Inc.
+*/
+
+#ifndef __MATRIXLIB_H
+#define __MATRIXLIB_H
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// 16-bit routines
+void	mix16( int16_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples, int32_t mixbits, int32_t mixres );
+void	unmix16( int32_t * u, int32_t * v, int16_t * out, uint32_t stride, int32_t numSamples, int32_t mixbits, int32_t mixres );
+
+// 20-bit routines
+void	mix20( uint8_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples, int32_t mixbits, int32_t mixres );
+void	unmix20( int32_t * u, int32_t * v, uint8_t * out, uint32_t stride, int32_t numSamples, int32_t mixbits, int32_t mixres );
+
+// 24-bit routines
+// - 24-bit data sometimes compresses better by shifting off the bottom byte so these routines deal with
+//	 the specified "unused lower bytes" in the combined "shift" buffer
+void	mix24( uint8_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples,
+				int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted );
+void	unmix24( int32_t * u, int32_t * v, uint8_t * out, uint32_t stride, int32_t numSamples,
+				 int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted );
+
+// 32-bit routines
+// - note that these really expect the internal data width to be < 32-bit but the arrays are 32-bit
+// - otherwise, the calculations might overflow into the 33rd bit and be lost
+// - therefore, these routines deal with the specified "unused lower" bytes in the combined "shift" buffer
+void	mix32( int32_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples,
+				int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted );
+void	unmix32( int32_t * u, int32_t * v, int32_t * out, uint32_t stride, int32_t numSamples,
+				 int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted );
+
+// 20/24/32-bit <-> 32-bit helper routines (not really matrixing but convenient to put here)
+void	copy20ToPredictor( uint8_t * in, uint32_t stride, int32_t * out, int32_t numSamples );
+void	copy24ToPredictor( uint8_t * in, uint32_t stride, int32_t * out, int32_t numSamples );
+
+void	copyPredictorTo24( int32_t * in, uint8_t * out, uint32_t stride, int32_t numSamples );
+void	copyPredictorTo24Shift( int32_t * in, uint16_t * shift, uint8_t * out, uint32_t stride, int32_t numSamples, int32_t bytesShifted );
+void	copyPredictorTo20( int32_t * in, uint8_t * out, uint32_t stride, int32_t numSamples );
+
+void	copyPredictorTo32( int32_t * in, int32_t * out, uint32_t stride, int32_t numSamples );
+void	copyPredictorTo32Shift( int32_t * in, uint16_t * shift, int32_t * out, uint32_t stride, int32_t numSamples, int32_t bytesShifted );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* __MATRIXLIB_H */