added Ben Saylor's pvoc~ and partconv~ from his sources so they can be added to Pd-extended

svn path=/trunk/externals/bsaylor/; revision=4566
author: Hans-Christoph Steiner <eighthave@users.sourceforge.net> 2006-02-07 18:23:33 +0000
committer: IOhannes m zmölnig <zmoelnig@iem.at> 2015-10-14 13:36:00 +0200
commit: 2a5a823d12708ca0fa5c56347593afd9ce3b9b83 (patch)
tree: eeb5906131003278b368711eb1a5238f39cd3f50
parent: dac2e8e0cb5980d41dc3879338816d6f52a5ff8a (diff)
15 files changed, 1996 insertions, 0 deletions
diff --git a/GPL.txt b/GPL.txt
new file mode 100644
index 0000000..5b6e7c6
--- /dev/null
+++ b/GPL.txt
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..24f589e
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,10 @@
+TARGET=bsaylor
+
+default: 
+	make -C ../ $(TARGET)
+
+install:
+	make -C ../ $(TARGET)_install
+
+clean:
+	make -C ../ $(TARGET)_clean
diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..87bea76
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,39 @@
+
+aenv~ is a asymptotic ADSR envelope generator; The output value approaches the
+target values as asymptotes.
+
+
+
+partconv~ is an external that implements partitioned fast convolution,
+suitable for convolving input signals with long impulse responses for reverbs,
+etc.  This release offers much improved performance, as well as independence
+of the partition size and your patch's blocksize.  It includes Altivec code
+from Chris Clepper.  There's also some SSE 1 code that produces almost correct
+results but doesn't seem to improve performance.  If you are familiar with SSE
+and want to have a go at writing an SSE version, please do!
+partconv~ requires FFTW3 (http://fftw.org)
+
+
+
+pvoc~ is a phase vocoder based on Pd's 09.pvoc.pd example patch. Advantages over the abstraction include (reportedly) faster execution, instantaneous response to input, and adjustable phase locking. It requires FFTW3. 
+bensaylor's Home 
+
+
+
+susloop~: sample player with various loop methods (ping-pong, ... ) think
+tracker. svf~  This is a signal-controlled port of Steve Harris' state
+variable filter LADSPA plugin.
+
+
+
+svf~: a signal-controlled port of Steve Harris' state variable filter
+LADSPA plugin (http://plugin.org.uk).
+
+
+
+zhzhx~: Turns the input signal into a staticky, distorted mess. Comes with tone
+control. 
+
+
+
+Benjamin R. Saylor <bensaylor@fastmail.fm>
diff --git a/altivec-perform.inc.c b/altivec-perform.inc.c
new file mode 100644
index 0000000..3ce1cf0
--- /dev/null
+++ b/altivec-perform.inc.c
@@ -0,0 +1,283 @@
+//altivec version by Chris Clepper
+//
+static t_int *partconv_perform(t_int *w)
+{
+    t_partconv *x = (t_partconv *)(w[1]);
+    t_float *in = (t_float *)(w[2]);
+    t_float *out = (t_float *)(w[3]);
+    int n = (int)(w[4]);
+    int i;
+    int j;
+    int k;	// bin
+    int p;	// partition
+    int endpart;
+    fftwf_complex *cursumbuf_fd;
+    float *sumbuf1ptr;
+    float *sumbuf2ptr;
+
+    union {
+        unsigned char c[16];
+        vector unsigned char v;
+    }permfill;
+
+    union {
+        float f[4];
+        vector float v;
+    }floatfill;
+
+    vector float *load_input, *load_irpart;
+    vector float store_multbuf1,store_multbuf2;
+    vector float vinput_fd0, vinput_fd4; //input vectors
+    vector float virpart_fd0, virpart_fd4;  //ir partition vectors
+    vector float permtemp1357, permtemp0246;
+    vector float vzero;// vscale;
+    vector unsigned char input_0022, input_1133, perm_0246, perm_1357, perm_0123,perm_4567;
+    vector float vtemp1, vtemp2, vtemp3, vtemp4, vtemp5, vtemp6, vtemp7, vtemp8;
+
+    floatfill.f[0] = 0.f;
+    floatfill.f[1] = 0.f;
+    floatfill.f[2] = 0.f;
+    floatfill.f[3] = 0.f;
+    vzero = floatfill.v;
+
+    //store_multbuf = vzero;
+
+    floatfill.f[0] = x->scale;
+    floatfill.f[1] = x->scale;
+    floatfill.f[2] = x->scale;
+    floatfill.f[3] = x->scale;
+    //vscale = floatfill.v;
+
+    //fill the permute buffer for the first input_fd multiply
+    permfill.c[0] = 0; permfill.c[1] = 1; permfill.c[2] = 2; permfill.c[3] = 3; //first float
+    permfill.c[4] = 0; permfill.c[5] = 1; permfill.c[6] = 2; permfill.c[7] = 3; //second float
+    permfill.c[8] = 8; permfill.c[9] = 9; permfill.c[10] = 10; permfill.c[11] = 11; //third float
+    permfill.c[12] = 8; permfill.c[13] = 9; permfill.c[14] = 10; permfill.c[15] = 11; //fourth float
+
+    input_0022 = permfill.v;
+
+    permfill.c[0] = 4; permfill.c[1] = 5; permfill.c[2] = 6; permfill.c[3] = 7; //first float
+    permfill.c[4] = 4; permfill.c[5] = 5; permfill.c[6] = 6; permfill.c[7] = 7; //second float
+    permfill.c[8] = 12; permfill.c[9] = 13; permfill.c[10] = 14; permfill.c[11] = 15; //third float
+    permfill.c[12] = 12; permfill.c[13] = 13; permfill.c[14] = 14; permfill.c[15] = 15; //fourth float
+
+    input_1133 = permfill.v;
+
+    //perm_0246
+    //0,1,2,3,        8,9,10,11,          16,17,18,19,          24,25,26,27
+    permfill.c[0] = 0; permfill.c[1] = 1; permfill.c[2] = 2; permfill.c[3] = 3; //first float
+    permfill.c[4] = 8; permfill.c[5] = 9; permfill.c[6] = 10; permfill.c[7] = 11; //second float
+    permfill.c[8] = 16; permfill.c[9] = 17; permfill.c[10] = 18; permfill.c[11] = 19; //third float
+    permfill.c[12] = 24; permfill.c[13] = 25; permfill.c[14] = 26; permfill.c[15] = 27; //fourth float
+
+    perm_0246 = permfill.v;
+
+    // perm_1357
+    //         4,5,6,7,         12,13,14,15,           20,21,22,23,         28,29,30,31
+    permfill.c[0] = 4; permfill.c[1] = 5; permfill.c[2] = 6; permfill.c[3] = 7; //first float
+    permfill.c[4] = 12; permfill.c[5] = 13; permfill.c[6] = 14; permfill.c[7] = 15; //second float
+    permfill.c[8] = 20; permfill.c[9] = 21; permfill.c[10] = 22; permfill.c[11] = 23; //third float
+    permfill.c[12] = 28; permfill.c[13] = 29; permfill.c[14] = 30; permfill.c[15] = 31; //fourth float
+
+    perm_1357 = permfill.v;
+
+    // perm_0123  from [0,2,4,6] and [1,3,5,7]
+    //         0,1,2,3	16,17,18,19	4,5,6,7	20,21,22,23
+    permfill.c[0] = 0; permfill.c[1] = 1; permfill.c[2] = 2; permfill.c[3] = 3; //first float
+    permfill.c[4] = 16; permfill.c[5] = 17; permfill.c[6] = 18; permfill.c[7] = 19; //second float
+    permfill.c[8] = 4; permfill.c[9] = 5; permfill.c[10] = 6; permfill.c[11] = 7; //third float
+    permfill.c[12] = 20; permfill.c[13] = 21; permfill.c[14] = 22; permfill.c[15] = 23; //fourth float
+
+    perm_0123 = permfill.v;
+
+    // perm_4567  from [0,2,4,6] and [1,3,5,7]
+    //        8.9.10.11      24,25,26,27              12,13,14,15         28,29,30,31
+    permfill.c[0] = 8; permfill.c[1] = 9; permfill.c[2] = 10; permfill.c[3] = 11; //first float
+    permfill.c[4] = 24; permfill.c[5] = 25; permfill.c[6] = 26; permfill.c[7] = 27; //second float
+    permfill.c[8] = 12; permfill.c[9] = 13; permfill.c[10] = 14; permfill.c[11] = 15; //third float
+    permfill.c[12] = 28; permfill.c[13] = 29; permfill.c[14] = 30; permfill.c[15] = 31; //fourth float
+
+    // perm_4567  from [0,2,4,6] and [1,3,5,7]
+    //        8.9.10.11      24,25,26,27              12,13,14,15         28,29,30,31
+    permfill.c[0] = 8; permfill.c[1] = 9; permfill.c[2] = 10; permfill.c[3] = 11; //first float
+    permfill.c[4] = 24; permfill.c[5] = 25; permfill.c[6] = 26; permfill.c[7] = 27; //second float
+    permfill.c[8] = 12; permfill.c[9] = 13; permfill.c[10] = 14; permfill.c[11] = 15; //third float
+    permfill.c[12] = 28; permfill.c[13] = 29; permfill.c[14] = 30; permfill.c[15] = 31; //fourth float
+
+    perm_4567 = permfill.v;
+    
+
+    memcpy(&(x->inbuf[x->inbufpos]), in, n*sizeof(float));  // gather a block of input into input buffer
+    x->inbufpos += n;
+    if (x->inbufpos >= x->partsize) {
+        // input buffer is full, so we begin a new cycle
+
+        if (x->pd_blocksize != n) {
+            // the patch's blocksize has change since we last dealt the work
+            x->pd_blocksize = n;
+            partconv_deal_work(x);
+        }
+
+        x->inbufpos = 0;
+        x->curcall = 0;
+        x->curpart = 0;
+        memcpy(x->input_td, x->inbuf, x->partsize * sizeof(float));  // copy 'gathering' input buffer into 'transform' buffer
+        memset(&(x->input_td[x->partsize]), 0, (x->paddedsize - x->partsize) * sizeof(float));  // pad
+
+        fftwf_execute(x->input_plan);  // transform the input
+
+        // everything has been read out of prev sumbuf, so clear it
+        memset(x->sumbuf->prev->td, 0,  x->paddedsize * sizeof(float));
+
+        // advance sumbuf pointers
+        x->sumbuf = x->sumbuf->next;
+        x->sumbuf->readpos = 0;
+        x->sumbuf->prev->readpos = x->partsize;
+    }
+
+    // convolve this call's portion of partitions
+    endpart = x->curpart + x->parts_per_call[x->curcall];
+    if (endpart > x->nparts)  // FIXME does this ever happen?
+        endpart = x->nparts;
+    for (p = x->curpart; p < endpart; p++) {
+        //printf("convolving with partition %d\n", p);
+        //
+        // multiply the input block by the partition, accumulating the result in the appropriate sumbuf
+        //
+
+        // FIXME do this in a circular list-type fashion so we don't need "index"
+        cursumbuf_fd =  x->sumbufs[(x->sumbuf->index + p) % x->nsumbufs].fd;
+
+        for (k = 0; k < x->nbins; k+=4) {
+
+
+            
+            
+            load_input = (vector float *)&x->input_fd[k][0];
+            vinput_fd0 = vec_ld(0, (vector float *) load_input);
+
+            vtemp1 = vec_perm(load_input[0],vzero,input_0022);
+
+            load_input = (vector float *)&x->input_fd[k][4];
+            //load input_fd[k][4]
+            //vector will have input_fd[4,5,6,7]
+            vinput_fd4 = vec_ld(0, (vector float *) &x->input_fd[k][4]);
+
+            vtemp3 = vec_perm(load_input[0],vzero,input_0022);
+
+            //vec_ld irpart[p][k][0]
+            //vector will have irpart_fd[0,1,2,3]
+
+            load_irpart = (vector float *) &x->irpart_fd[p][k][0];
+
+            virpart_fd0 = vec_ld(0,&x->irpart_fd[p][k][0]);
+            vtemp1 = vec_madd(vtemp1,load_irpart[0],vzero);
+
+            load_irpart = (vector float *) &x->irpart_fd[p][k][4];
+            virpart_fd4 = vec_ld(0,&x->irpart_fd[p][k][4]);
+            vtemp3 = vec_madd(vtemp3,load_irpart[0],vzero);
+
+
+            store_multbuf1 = vec_ld(0,&cursumbuf_fd[k][0]);
+
+            store_multbuf2 = vec_ld(0,&cursumbuf_fd[k][4]);
+
+
+            //vec_perm to line up the elements
+            // irpart is fine
+            // make vector of input_fd[0] [2] and [4] [6]
+            //make vector of input_fd[1] [3] and [5] [7]
+            //
+            // permute only works on bytes so the first float is bytes 0,1,2,3 the second is 4,5,6,7 etc
+            //
+            // 0,1,2,3,        8,9,10,11,          16,17,18,19,          24,25,26,27
+            //
+            //         4,5,6,7,         12,13,14,15,           20,21,22,23,         28,29,30,31
+
+
+            //vec_perm temp1 and temp3 into [0,2,4,6]
+            permtemp0246 = vec_perm(vtemp1,vtemp3,perm_0246);
+
+            //and [1,3,5,7]
+            permtemp1357 = vec_perm(vtemp1,vtemp3,perm_1357);
+
+            //vinput_fd[1,3,5,7]
+            vtemp2 = vec_perm(vinput_fd0,vinput_fd4,perm_1357);
+
+            //irpart[1,3,5,7]
+            vtemp4 = vec_perm(virpart_fd0,virpart_fd4,perm_1357);
+
+            //irpart[0,2,4,6]
+            vtemp5 = vec_perm(virpart_fd0,virpart_fd4,perm_0246);
+
+            //vec_nmsub  input_fd[1,3,5,7]  irpart[1,3,5,7] temp[0,2,4,6]
+            vtemp6 = vec_nmsub(vtemp2,vtemp4,permtemp0246);
+
+            //vec_madd  input_fd[1,3,5,7] irpart[0,2,4,6] temp[1,3,5,7]
+            vtemp7 = vec_madd(vtemp2,vtemp5,permtemp1357);
+
+
+            
+
+            //vec_madd  all by scale - this is now done after the loop
+          //  vtemp6 = vec_madd(vtemp6,vscale,vzero);
+
+           // vtemp7 = vec_madd(vtemp7,vscale,vzero);
+
+
+            //vec_perm data back into place - tricky!
+
+            //vec_perm nmsub_result[0,2,4,6] madd_result [1,3,5,7]
+            // results will be [0,1,2,3] [4,5,6,7]
+            vtemp1 = vec_perm(vtemp6,vtemp7,perm_0123);
+
+            vtemp2 = vec_perm(vtemp6,vtemp7,perm_4567);
+
+
+            //vec_st
+            
+            store_multbuf1 = vec_add(store_multbuf1,vtemp1);
+            store_multbuf2 = vec_add(store_multbuf2,vtemp2);
+
+            vec_st(store_multbuf1,0,&cursumbuf_fd[k][0]);
+            
+            vec_st(store_multbuf2,0,&cursumbuf_fd[k][4]);
+            
+            
+            /*
+            cursumbuf_fd[k][0]
+            +=
+            (  x->input_fd[k][0] * x->irpart_fd[p][k][0]
+               - x->input_fd[k][1] * x->irpart_fd[p][k][1]);
+
+            cursumbuf_fd[k][1]
+                +=
+                (  x->input_fd[k][0] * x->irpart_fd[p][k][1]
+                   + x->input_fd[k][1] * x->irpart_fd[p][k][0]);*/
+        }
+    }
+    x->curpart = p;
+
+    // The convolution of the fresh block of input with the first partition of the IR
+    // is the last thing that gets summed into the current sumbuf before it gets IFFTed and starts being output.
+    // This happens during the first call of every cycle.
+    if (x->curcall == 0) {
+        // current sumbuf has been filled, so transform it (TD to FD).
+        // Output loop will begin to read it and sum it with the last one
+        fftwf_execute(x->sumbuf->plan);
+    }
+
+    // we're summing and outputting the first half of the most recently IFFTed sumbuf
+    // and the second half of the previous one
+    sumbuf1ptr = &(x->sumbuf->td[x->sumbuf->readpos]);
+    sumbuf2ptr = &(x->sumbuf->prev->td[x->sumbuf->prev->readpos]);
+    for (i = 0; i < n; i++) {
+        *(out++) = (*(sumbuf1ptr++) + *(sumbuf2ptr++)) * x->scale;
+    }
+    x->sumbuf->readpos += n;
+    x->sumbuf->prev->readpos += n;
+
+    x->curcall++;
+
+    return (w+5);
+}
diff --git a/help/partconv~-help.pd b/help/partconv~-help.pd
new file mode 100644
index 0000000..bda06cf
--- /dev/null
+++ b/help/partconv~-help.pd
@@ -0,0 +1,74 @@
+#N canvas 0 30 680 606 10;
+#X obj 201 569 dac~;
+#X obj 40 168 dirac~;
+#X msg 40 148 0;
+#X obj 177 190 osc~;
+#X obj 177 164 mtof;
+#X floatatom 177 141 5 0 0 0 - - -;
+#X obj 419 358 soundfiler;
+#X obj 419 259 loadbang;
+#X obj 201 541 *~;
+#X obj 302 282 openpanel;
+#X obj 302 309 t b s;
+#X obj 302 240 bng 32 250 50 0 empty empty empty 0 -6 0 8 -260818 -1
+-1;
+#X obj 331 466 vsl 8 128 0 100 0 1 empty empty empty 0 -8 0 8 -260818
+-1 -1 9700 1;
+#X text 300 219 load a soundfile to use as the impulse response;
+#X text 9 127 test with an impulse;
+#X text 431 7 Ben Saylor - bensaylor@fastmail.fm;
+#X text 105 7 partitioned convolution external;
+#X obj 437 388 table irL;
+#X msg 272 350 set irL;
+#X msg 341 350 set irR;
+#X obj 230 541 *~;
+#X obj 259 426 partconv~ irR 2048;
+#X msg 427 333 read -resize \$1 irL irR;
+#X obj 437 410 table irR;
+#X msg 232 190 0;
+#X msg 270 190 0.1;
+#X obj 177 212 *~ 0;
+#X obj 271 515 dbtorms;
+#X text 226 155 test with a sine;
+#X obj 110 187 adc~ 1;
+#X text 10 8 partconv~;
+#X text 26 44 version 0.2 \, August 12 \, 2005;
+#X text 258 43 [partconv~ <IR array> <partition size>];
+#X obj 126 426 partconv~ irL 2048;
+#X msg 419 283 read -resize noiseburst.wav irL;
+#X msg 419 307 read -resize noiseburst.wav irR;
+#X text 258 71 The partition size must be a power of 2 greater than
+or equal to the block size. Larger partition sizes are more efficient
+\, to a point \, but increase latency (the delay between input and
+output is equal to the partition size minus the block size).;
+#X connect 1 0 21 0;
+#X connect 1 0 33 0;
+#X connect 2 0 1 0;
+#X connect 3 0 26 0;
+#X connect 4 0 3 0;
+#X connect 5 0 4 0;
+#X connect 7 0 34 0;
+#X connect 7 0 35 0;
+#X connect 8 0 0 0;
+#X connect 9 0 10 0;
+#X connect 10 0 18 0;
+#X connect 10 0 19 0;
+#X connect 10 1 22 0;
+#X connect 11 0 9 0;
+#X connect 12 0 27 0;
+#X connect 18 0 33 0;
+#X connect 19 0 21 0;
+#X connect 20 0 0 1;
+#X connect 21 0 20 0;
+#X connect 22 0 6 0;
+#X connect 24 0 26 1;
+#X connect 25 0 26 1;
+#X connect 26 0 21 0;
+#X connect 26 0 33 0;
+#X connect 27 0 8 1;
+#X connect 27 0 20 1;
+#X connect 29 0 21 0;
+#X connect 29 0 33 0;
+#X connect 33 0 8 0;
+#X connect 34 0 6 0;
+#X connect 35 0 6 0;
diff --git a/help/pvoc~-help.pd b/help/pvoc~-help.pd
new file mode 100644
index 0000000..4a9f7f2
--- /dev/null
+++ b/help/pvoc~-help.pd
@@ -0,0 +1,85 @@
+#N canvas 143 0 760 635 10;
+#X obj 265 611 dac~;
+#X obj 304 561 vsl 8 64 0 1 0 1 empty empty empty 0 -8 0 8 -260818
+-1 -1 3200 1;
+#X obj 265 587 *~;
+#X obj 68 158 cnv 15 580 220 empty empty empty 20 12 0 14 -195568 -66577
+0;
+#N canvas 0 0 450 300 graph1 0;
+#X array sample 62079 float 0;
+#X coords 0 1 62078 -1 560 100 1;
+#X restore 77 52 graph;
+#X msg 346 506 read -resize /usr/local/lib/pd/doc/sound/voice.wav sample
+;
+#X obj 346 526 soundfiler;
+#X obj 346 547 s length;
+#X obj 346 485 loadbang;
+#X obj 77 172 hsl 560 15 0 1 0 0 empty empty time-position -2 -6 0
+8 -233017 -1 -1 26900 1;
+#X obj 214 222 vsl 15 128 0.5 2 1 0 empty empty pitch-shift 0 -8 0
+8 -261681 -1 -1 6350 1;
+#X msg 182 203 1;
+#X floatatom 214 356 5 0 0 0 - - -;
+#X obj 280 222 vsl 15 128 0 1 0 0 empty empty phase-locking 0 -8 0
+8 -225280 -1 -1 0 1;
+#X floatatom 280 356 5 0 0 0 - - -;
+#X obj 74 532 line~;
+#X msg 74 510 \$1 200;
+#X obj 74 488 *;
+#X obj 90 423 r length;
+#X text 6 28 [pvoc~ <array> <fftsize> <overlap>];
+#X obj 226 551 lop~ 10;
+#X msg 433 486 read -resize \$1 sample;
+#X obj 433 466 openpanel;
+#X obj 548 337 bng 32 250 50 0 empty empty empty 0 -6 0 8 -260818 -1
+-1;
+#X msg 280 457 locking \$1;
+#X msg 99 447 setarray sample;
+#X msg 380 292 transients 0 6227 35716 53368;
+#X msg 380 314 notransients;
+#X text 380 275 de-smear transients at these locations;
+#X text 546 319 load a sample;
+#X obj 226 531 sig~ 1;
+#X msg 125 261 0.5;
+#X msg 125 281 1;
+#X msg 125 301 2;
+#X msg 125 341 4;
+#X msg 125 510 0 \, 4.41e+06 \$1;
+#X obj 125 399 * 100000;
+#X msg 125 321 3;
+#X text 8 6 pvoc~ 0.2 Ben Saylor bensaylor@fastmail.fm;
+#X obj 99 571 pvoc~ sample 2048 4;
+#X connect 1 0 2 1;
+#X connect 2 0 0 0;
+#X connect 2 0 0 1;
+#X connect 5 0 6 0;
+#X connect 6 0 7 0;
+#X connect 8 0 5 0;
+#X connect 9 0 17 0;
+#X connect 10 0 12 0;
+#X connect 11 0 10 0;
+#X connect 12 0 30 0;
+#X connect 13 0 14 0;
+#X connect 14 0 24 0;
+#X connect 15 0 39 0;
+#X connect 16 0 15 0;
+#X connect 17 0 16 0;
+#X connect 18 0 17 1;
+#X connect 18 0 25 0;
+#X connect 20 0 39 1;
+#X connect 21 0 6 0;
+#X connect 22 0 21 0;
+#X connect 23 0 22 0;
+#X connect 24 0 39 0;
+#X connect 25 0 39 0;
+#X connect 26 0 39 0;
+#X connect 27 0 39 0;
+#X connect 30 0 20 0;
+#X connect 31 0 36 0;
+#X connect 32 0 36 0;
+#X connect 33 0 36 0;
+#X connect 34 0 36 0;
+#X connect 35 0 15 0;
+#X connect 36 0 35 0;
+#X connect 37 0 36 0;
+#X connect 39 0 2 0;
diff --git a/noiseburst.wav b/noiseburst.wav
new file mode 100644
index 0000000..2043f3a
--- /dev/null
+++ b/noiseburst.wav
diff --git a/partconv~.c b/partconv~.c
new file mode 100644
index 0000000..60175f2
--- /dev/null
+++ b/partconv~.c
@@ -0,0 +1,382 @@
+/* Copyright 2003-2005 Benjamin R. Saylor <bensaylor@fastmail.fm>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+// Thu Feb 17 22:29:27 CST 2005 - Changed outbuf loops to use comparison instead of modulo (suggested by c. clepper) - faster
+// Sat Jul 30 19:59:08 AKDT 2005 - Completed modification for re-blocking and dividing up the work between calls.
+// 					This has eliminated dropouts due to lots of work on large block boundaries.
+// Fri Aug  5 10:46:33 AKDT 2005 - accumulate in the frequency domain, so only 1 IFFT is needed per input block,
+// 					rather than <nparts> IFFTs.  Big performance boost.
+// Fri Aug  5 20:27:05 AKDT 2005 - should work properly with arbitrary (2^n) blocksize <= partsize
+// Fri Aug 12 00:32:29 AKDT 2005 - added altivec code by Chris Clepper
+
+// TODO
+// SSE version
+// multichannel (multiple IRs)?  probably wouldn't gain much from this
+// divide work more evenly? (0, 15, 7, 23, 3, 11, 19, 27, ...)
+// someday, an SSE3 version (supposed to make complex math fast)
+
+#include <math.h>
+#include <string.h>
+#include <fftw3.h>
+#include "m_pd.h"
+
+#define MAXPARTS 256	// max number of partitions
+
+#ifdef USE_SSE
+typedef float v4sf __attribute__ ((vector_size (16)));
+#endif
+
+static t_class *partconv_class;
+
+struct sumbuffer {
+	int index;
+	fftwf_complex *fd;
+	float *td;
+	fftwf_plan plan;
+	int readpos;
+	struct sumbuffer *next, *prev;
+};
+
+typedef struct _partconv {
+	t_object x_obj;
+	t_symbol *arrayname;
+	int partsize;
+	int fftsize;
+	float scale;
+	int paddedsize;
+	int nbins;
+	int nparts;
+	int ir_prepared;
+	int pd_blocksize;
+
+	// partitions of impulse response
+	fftwf_plan irpart_plan;
+	float *irpart_td[MAXPARTS];
+	fftwf_complex *irpart_fd[MAXPARTS];
+
+	// input
+	fftwf_plan input_plan;
+	float *inbuf;
+	int inbufpos;
+	float *input_td;
+	fftwf_complex *input_fd;
+
+	// circular array/list of buffers for accumulating results of convolution
+	struct sumbuffer sumbufs[MAXPARTS+2];
+	int nsumbufs;  // number of sumbufs
+	struct sumbuffer *sumbuf;  // the current sumbuf corresponding to the first partition of the IR
+
+	// dividing up the work between calls to perform()
+	int parts_per_call[MAXPARTS];	// parts_per_call[c] is the number of partitions to convolve during perform() call c
+	int curcall;			// current call, counted from the beginning of the current cycle (input buffer full)
+	int curpart;			// current partition to convolve
+} t_partconv;
+
+// Determine how to divide the work as evenly as possible between calls to perform().
+static void partconv_deal_work(t_partconv *x)
+{
+	int calls_per_cycle;
+	int parts_to_distribute;
+	int i;
+
+	// Like dealing cards.
+	// One cycle is defined as the time it takes to fill the input buffer (whose size is the user-given partition size)
+	calls_per_cycle = x->partsize / x->pd_blocksize;
+	for (i = 0; i < calls_per_cycle; i++) {
+		x->parts_per_call[i] = 0;
+	}
+	i = 0;
+	parts_to_distribute = x->nparts;
+	while (parts_to_distribute) {
+		x->parts_per_call[i]++;
+		parts_to_distribute--;
+		i = (i + 1) % calls_per_cycle;
+	}
+	/*
+	for (i = 0; i < calls_per_cycle; i++) {
+		printf("parts_per_call[%d] = %d\n", i, x->parts_per_call[i]);
+	}
+	*/
+}
+
+#ifdef __VEC__
+#include "altivec-perform.inc.c"
+#else
+
+static t_int *partconv_perform(t_int *w)
+{
+	t_partconv *x = (t_partconv *)(w[1]);
+	t_float *in = (t_float *)(w[2]);
+	t_float *out = (t_float *)(w[3]);
+	int n = (int)(w[4]);
+	int i;
+	int j;
+	int k;	// bin
+	int p;	// partition
+	int endpart;
+
+#ifdef USE_SSE
+	int v1;
+	int v2;
+	int nvecs;
+	v4sf *cursumbuf_fd;
+	v4sf *input_fd;
+	v4sf *irpart_fd;
+#else
+	fftwf_complex *cursumbuf_fd;
+	fftwf_complex *input_fd;
+	fftwf_complex *irpart_fd;
+#endif
+
+	float *sumbuf1ptr;
+	float *sumbuf2ptr;
+
+	memcpy(&(x->inbuf[x->inbufpos]), in, n*sizeof(float));  // gather a block of input into input buffer
+	x->inbufpos += n;
+	if (x->inbufpos >= x->partsize) {
+		// input buffer is full, so we begin a new cycle
+		
+		if (x->pd_blocksize != n) {
+			// the patch's blocksize has change since we last dealt the work
+			x->pd_blocksize = n;
+			partconv_deal_work(x);
+		}
+		
+		x->inbufpos = 0;
+		x->curcall = 0;
+		x->curpart = 0;
+		memcpy(x->input_td, x->inbuf, x->partsize * sizeof(float));  // copy 'gathering' input buffer into 'transform' buffer
+		memset(&(x->input_td[x->partsize]), 0, (x->paddedsize - x->partsize) * sizeof(float));  // pad
+
+		fftwf_execute(x->input_plan);  // transform the input
+
+		// everything has been read out of prev sumbuf, so clear it
+		memset(x->sumbuf->prev->td, 0,  x->paddedsize * sizeof(float));
+
+		// advance sumbuf pointers
+		x->sumbuf = x->sumbuf->next;
+		x->sumbuf->readpos = 0;
+		x->sumbuf->prev->readpos = x->partsize;
+	}
+
+	// convolve this call's portion of partitions
+	endpart = x->curpart + x->parts_per_call[x->curcall];
+	if (endpart > x->nparts)  // FIXME does this ever happen?
+		endpart = x->nparts;
+	for (p = x->curpart; p < endpart; p++) {
+		// multiply the input block by the partition, accumulating the result in the appropriate sumbuf
+#ifdef USE_SSE
+#include "sse-conv.inc.c"
+#else
+		cursumbuf_fd = x->sumbufs[(x->sumbuf->index + p) % x->nsumbufs].fd;
+		input_fd = x->input_fd;
+		irpart_fd = x->irpart_fd[p];
+
+		for (k = 0; k < x->nbins; k++) {
+
+			cursumbuf_fd[k][0]
+				+=
+				(  input_fd[k][0] * irpart_fd[k][0]
+				 - input_fd[k][1] * irpart_fd[k][1]);
+
+			cursumbuf_fd[k][1]
+				+=
+				(  input_fd[k][0] * irpart_fd[k][1]
+				 + input_fd[k][1] * irpart_fd[k][0]);
+		}
+#endif
+	}
+	x->curpart = p;
+
+	// The convolution of the fresh block of input with the first partition of the IR
+	// is the last thing that gets summed into the current sumbuf before it gets IFFTed and starts being output.
+	// This happens during the first call of every cycle.
+	if (x->curcall == 0) {
+		// current sumbuf has been filled, so transform it
+		// Output loop will begin to read it and sum it with the last one
+		fftwf_execute(x->sumbuf->plan);
+	}
+
+	// we're summing and outputting the first half of the most recently IFFTed sumbuf
+	// and the second half of the previous one
+	sumbuf1ptr = &(x->sumbuf->td[x->sumbuf->readpos]);
+	sumbuf2ptr = &(x->sumbuf->prev->td[x->sumbuf->prev->readpos]);
+	for (i = 0; i < n; i++) {
+		out[i] = (sumbuf1ptr[i] + sumbuf2ptr[i]) * x->scale;
+	}
+	x->sumbuf->readpos += n;
+	x->sumbuf->prev->readpos += n;
+
+	x->curcall++;
+
+	return (w+5);
+}
+
+#endif // __VEC__
+
+static void partconv_free(t_partconv *x)
+{
+	int i;
+
+	fftwf_free(x->inbuf);
+	for (i = 0; i < x->nparts; i++)
+		fftwf_free(x->irpart_td[i]);
+	fftwf_free(x->input_td);
+	fftwf_destroy_plan(x->input_plan);
+	for (i  = 0; i < x->nsumbufs; i++) {
+		fftwf_free(x->sumbufs[i].fd);
+		fftwf_destroy_plan(x->sumbufs[i].plan);
+	}
+}
+
+static void partconv_set(t_partconv *x, t_symbol *s)
+{
+	int i;
+	int j;
+	t_garray *arrayobj;
+	t_float *array;
+	int arraysize;
+	int arraypos;
+
+	// get the array from pd
+	x->arrayname = s;
+	if ( ! (arrayobj = (t_garray *)pd_findbyclass(x->arrayname, garray_class))) {
+		if (*x->arrayname->s_name) {
+			pd_error(x, "partconv~: %s: no such array", x->arrayname->s_name);
+			return;
+		}
+	} else if ( ! garray_getfloatarray(arrayobj, &arraysize, &array)) {
+		pd_error(x, "%s: bad template", x->arrayname->s_name);
+		return;
+	}
+
+	// if the IR has already been prepared, free everything first
+	if (x->ir_prepared == 1) {
+		partconv_free(x);
+	}
+
+	// caculate number of partitions
+	x->nparts = arraysize / x->partsize;
+	if (arraysize % x->partsize != 0)
+		x->nparts++;
+	if (x->nparts > MAXPARTS)
+		x->nparts = MAXPARTS;
+
+	// allocate, fill, pad, and transform each IR partition
+	for (arraypos = 0, i = 0; i < x->nparts; i++) {
+		x->irpart_td[i] = fftwf_malloc(sizeof(float) * x->paddedsize);
+		x->irpart_fd[i] = (fftwf_complex *) x->irpart_td[i];
+		x->irpart_plan = fftwf_plan_dft_r2c_1d(x->fftsize, x->irpart_td[i], x->irpart_fd[i], FFTW_MEASURE);
+		for (j = 0; j < x->partsize && arraypos < arraysize; j++, arraypos++) {
+			x->irpart_td[i][j] = array[arraypos];
+		}
+		for ( ; j < x->paddedsize; j++) {
+			x->irpart_td[i][j] = 0;
+		}
+		fftwf_execute(x->irpart_plan);
+		fftwf_destroy_plan(x->irpart_plan);
+		// now, x->irpart[i] contains the DFT of the ith partition of the impulse response.
+	}
+
+	x->inbuf = fftwf_malloc(sizeof(float) * x->partsize);
+	x->inbufpos = 0;
+	
+	// allocate buffer for DFT of padded input
+	x->input_td = fftwf_malloc(sizeof(float) * x->paddedsize);	// float array into which input block is copied and padded
+	x->input_fd = (fftwf_complex *) x->input_td;			// fftwf_complex pointer to the same array to facilitate in-place fft
+	x->input_plan =  fftwf_plan_dft_r2c_1d(x->fftsize, x->input_td, x->input_fd, FFTW_MEASURE);
+
+	// set up circular list/array of buffers for accumulating results of convolution
+	x->nsumbufs = x->nparts + 2;
+	x->sumbuf = &(x->sumbufs[0]);
+	for (i  = 0; i < x->nsumbufs; i++) {
+		x->sumbufs[i].index = i;
+		x->sumbufs[i].fd = fftwf_malloc(sizeof(float) * x->paddedsize);
+		memset(x->sumbufs[i].fd, 0, sizeof(float) * x->paddedsize);
+		x->sumbufs[i].td = (float *) x->sumbufs[i].fd;
+		x->sumbufs[i].plan = fftwf_plan_dft_c2r_1d(x->fftsize, x->sumbufs[i].fd, x->sumbufs[i].td, FFTW_MEASURE);
+		x->sumbufs[i].readpos = 0;
+	}
+	x->sumbufs[0].next = &(x->sumbufs[1]);
+	x->sumbufs[0].prev = &(x->sumbufs[x->nsumbufs - 1]);
+	for (i = 1; i < x->nsumbufs; i++) {
+		x->sumbufs[i].next = &(x->sumbufs[(i + 1) % x->nsumbufs]);
+		x->sumbufs[i].prev = &(x->sumbufs[i - 1]);
+	}
+
+	partconv_deal_work(x);
+	x->curcall = 0;
+	x->curpart = 0;
+
+	post("partconv~: using %s in %d partitions with FFT-size %d", x->arrayname->s_name, x->nparts, x->fftsize);
+	x->ir_prepared = 1;
+}
+
+static void partconv_dsp(t_partconv *x, t_signal **sp)
+{
+	// if the ir array has not been prepared, prepare it
+	if (x->ir_prepared == 0) {
+		partconv_set(x, x->arrayname);
+	}
+
+	dsp_add(partconv_perform, 4, x, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n);
+}
+
+static void *partconv_new(t_symbol *s, int argc, t_atom *argv)
+{
+	t_partconv *x = (t_partconv *)pd_new(partconv_class);
+
+	outlet_new(&x->x_obj, gensym("signal"));
+
+	if (argc != 2) {
+		post("argc = %d", argc);
+		error("partconv~: usage: [partconv~ <arrayname> <partsize>]\n\t- partition size must be a power of 2 >= blocksize");
+		return NULL;
+	}
+
+	x->arrayname = atom_getsymbol(argv);
+	x->partsize = atom_getfloatarg(1, argc, argv);
+	if (x->partsize <= 0 || x->partsize != (1 << ilog2(x->partsize)))
+	{
+		error("partconv~: partition size not a power of 2");
+		return NULL;
+	}
+	x->fftsize = 2 * x->partsize;
+	x->scale = 1 / ((float) x->fftsize);
+
+	// need 2*(n/2+1) float array for in-place transform, where n is fftsize.
+#ifdef USE_SSE
+	// for sse, make it a multiple of 8, because we pull in 4 bins at a time and don't want to get a segfault
+	x->paddedsize = 2 * (x->fftsize / 2 + 4);
+#else
+	x->paddedsize = 2 * (x->fftsize / 2 + 1);
+#endif
+	x->nbins = x->fftsize / 2 + 1;
+	x->ir_prepared = 0;
+	x->pd_blocksize = sys_getblksize();
+
+	return (x);
+}
+
+void partconv_tilde_setup(void)
+{
+	partconv_class = class_new(gensym("partconv~"), (t_newmethod)partconv_new,
+			(t_method)partconv_free, sizeof(t_partconv), 0, A_GIMME, 0);
+	class_addmethod(partconv_class, nullfn, gensym("signal"), 0);
+	class_addmethod(partconv_class, (t_method) partconv_dsp, gensym("dsp"), 0);
+	class_addmethod(partconv_class, (t_method) partconv_set, gensym("set"), A_DEFSYMBOL, 0);
+}
diff --git a/partconv~.dsp b/partconv~.dsp
new file mode 100644
index 0000000..a5a952d
--- /dev/null
+++ b/partconv~.dsp
@@ -0,0 +1,114 @@
+# Microsoft Developer Studio Project File - Name="partconv" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** NICHT BEARBEITEN **
+
+# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
+
+CFG=partconv - Win32 Debug
+!MESSAGE Dies ist kein g�ltiges Makefile. Zum Erstellen dieses Projekts mit NMAKE
+!MESSAGE verwenden Sie den Befehl "Makefile exportieren" und f�hren Sie den Befehl
+!MESSAGE 
+!MESSAGE NMAKE /f "partconv.mak".
+!MESSAGE 
+!MESSAGE Sie k�nnen beim Ausf�hren von NMAKE eine Konfiguration angeben
+!MESSAGE durch Definieren des Makros CFG in der Befehlszeile. Zum Beispiel:
+!MESSAGE 
+!MESSAGE NMAKE /f "partconv.mak" CFG="partconv - Win32 Debug"
+!MESSAGE 
+!MESSAGE F�r die Konfiguration stehen zur Auswahl:
+!MESSAGE 
+!MESSAGE "partconv - Win32 Release" (basierend auf  "Win32 (x86) Dynamic-Link Library")
+!MESSAGE "partconv - Win32 Debug" (basierend auf  "Win32 (x86) Dynamic-Link Library")
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+MTL=midl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "partconv - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PARTCONV_EXPORTS" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PARTCONV_EXPORTS" /YX /FD /c
+# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
+# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
+# ADD BASE RSC /l 0xc07 /d "NDEBUG"
+# ADD RSC /l 0xc07 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386
+
+!ELSEIF  "$(CFG)" == "partconv - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PARTCONV_EXPORTS" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PARTCONV_EXPORTS" /YX /FD /GZ /c
+# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
+# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
+# ADD BASE RSC /l 0xc07 /d "_DEBUG"
+# ADD RSC /l 0xc07 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib pd.lib fftw3.lib /nologo /dll /debug /machine:I386 /out:"C:\Programme\pd_0.37\extra\partconv.dll" /pdbtype:sept
+
+!ENDIF 
+
+# Begin Target
+
+# Name "partconv - Win32 Release"
+# Name "partconv - Win32 Debug"
+# Begin Group "Quellcodedateien"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\partconv.c
+# End Source File
+# End Group
+# Begin Group "Header-Dateien"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\fftw3.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\m_pd.h
+# End Source File
+# End Group
+# Begin Group "Ressourcendateien"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/partconv~.dsw b/partconv~.dsw
new file mode 100644
index 0000000..940b7a9
--- /dev/null
+++ b/partconv~.dsw
@@ -0,0 +1,29 @@
+Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNUNG: DIESE ARBEITSBEREICHSDATEI DARF NICHT BEARBEITET ODER GEL�SCHT WERDEN!
+
+###############################################################################
+
+Project: "partconv"=.\partconv.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
diff --git a/partconv~.libs b/partconv~.libs
new file mode 100644
index 0000000..67c2eba
--- /dev/null
+++ b/partconv~.libs
@@ -0,0 +1 @@
+-lfftw3
diff --git a/partconv~.vcproj b/partconv~.vcproj
new file mode 100644
index 0000000..ce1bf88
--- /dev/null
+++ b/partconv~.vcproj
@@ -0,0 +1,169 @@
+<?xml version="1.0" encoding = "Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="7.00"
+	Name="partconv~"
+	SccProjectName=""
+	SccLocalPath="">
+	<Platforms>
+		<Platform
+			Name="Win32"/>
+	</Platforms>
+	<Configurations>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory=".\Release"
+			IntermediateDirectory=".\Release"
+			ConfigurationType="2"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="FALSE"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				InlineFunctionExpansion="1"
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;PARTCONV_EXPORTS"
+				StringPooling="TRUE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="TRUE"
+				UsePrecompiledHeader="2"
+				PrecompiledHeaderFile=".\Release/partconv.pch"
+				AssemblerListingLocation=".\Release/"
+				ObjectFile=".\Release/"
+				ProgramDataBaseFileName=".\Release/"
+				WarningLevel="3"
+				SuppressStartupBanner="TRUE"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalOptions="/MACHINE:I386"
+				AdditionalDependencies="odbc32.lib odbccp32.lib"
+				OutputFile=".\Release/partconv.dll"
+				LinkIncremental="1"
+				SuppressStartupBanner="TRUE"
+				ProgramDatabaseFile=".\Release/partconv.pdb"
+				ImportLibrary=".\Release/partconv.lib"/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="NDEBUG"
+				MkTypLibCompatible="TRUE"
+				SuppressStartupBanner="TRUE"
+				TargetEnvironment="1"
+				TypeLibraryName=".\Release/partconv.tlb"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="3079"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+		</Configuration>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory=".\Debug"
+			IntermediateDirectory=".\Debug"
+			ConfigurationType="2"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="FALSE"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				PreprocessorDefinitions="WIN32;_WINDOWS;_USRDLL;PARTCONV_EXPORTS;NDEBUG;_WINDOWS;_USRDLL;LEST_I;NT"
+				BasicRuntimeChecks="0"
+				RuntimeLibrary="0"
+				DisableLanguageExtensions="TRUE"
+				UsePrecompiledHeader="0"
+				PrecompiledHeaderFile=""
+				AssemblerOutput="2"
+				AssemblerListingLocation=".\Debug/"
+				ObjectFile=".\Debug/"
+				ProgramDataBaseFileName=".\Debug/"
+				WarningLevel="3"
+				SuppressStartupBanner="TRUE"
+				Detect64BitPortabilityProblems="TRUE"
+				DebugInformationFormat="0"
+				CompileAs="1"
+				ShowIncludes="TRUE"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalOptions="/MACHINE:I386"
+				AdditionalDependencies="odbc32.lib odbccp32.lib pd.lib fftw3.lib"
+				OutputFile="..\partconv~.dll"
+				LinkIncremental="1"
+				SuppressStartupBanner="TRUE"
+				GenerateDebugInformation="TRUE"
+				ProgramDatabaseFile=".\Debug/partconv.pdb"
+				ImportLibrary=".\Debug/partconv.lib"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="_DEBUG"
+				MkTypLibCompatible="TRUE"
+				SuppressStartupBanner="TRUE"
+				TargetEnvironment="1"
+				TypeLibraryName=".\Debug/partconv.tlb"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="3079"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+		</Configuration>
+	</Configurations>
+	<Files>
+		<Filter
+			Name="Quellcodedateien"
+			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat">
+			<File
+				RelativePath="partconv.c">
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;PARTCONV_EXPORTS;_MBCS;$(NoInherit)"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;PARTCONV_EXPORTS;_MBCS;$(NoInherit)"
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header-Dateien"
+			Filter="h;hpp;hxx;hm;inl">
+			<File
+				RelativePath="fftw3.h">
+			</File>
+			<File
+				RelativePath="m_pd.h">
+			</File>
+		</Filter>
+		<Filter
+			Name="Ressourcendateien"
+			Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/pvoc~.c b/pvoc~.c
new file mode 100644
index 0000000..c6c76ce
--- /dev/null
+++ b/pvoc~.c
@@ -0,0 +1,392 @@
+/* Copyright 2003 Benjamin R. Saylor <bensaylor@fastmail.fm>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <math.h>
+#include <fftw3.h>
+#include "m_pd.h"
+
+// FIXME:
+// set array when dsp is turned on
+// get rid of shiftbuf, just save values that will be needed next before overwriting them
+// cubic interp
+// use float fftw?
+// performance testing
+// what if there are 2 transients less than fftsize apart?  second one might get smeared.
+// compare sound with phaselockedvoc.pd
+// detect transients
+// peaks + noise
+// other phase locking methods
+// use floats?
+// use in-place?
+// if don't have an array, call setarray(x->arrayname)
+// window size and fft size independent (what is gained by zero-padding?)
+// error if parent blocksize is larger than hopsize
+// slowly return window to true position after centering around a transient?
+// use fewer fft arrays?
+
+// DONE:
+// use FFTW_MEASURE
+
+static t_class *pvoc_class;
+
+typedef struct _pvoc {
+	t_object x_obj;
+	t_symbol *arrayname;
+	t_garray *arrayobj;
+	t_float *array;
+	int arraysize;
+	double *window;
+	int fftsize;
+	int overlap;
+	int hopsize;			// = fftsize / overlap
+	int trans[256];			// sample indices of transients
+	int ntrans;			// number of transients
+	int wastrans;			// there was a transient in the left half of the window during the previous frame
+	double phaselocking;
+	fftw_plan fftplan;
+	fftw_plan fft2plan;
+	fftw_plan ifftplan;
+	double *fftin;
+	double *fft2in;
+	double *ifftout;
+	fftw_complex *fftout;
+	fftw_complex *fft2out;
+	fftw_complex *ifftin;
+	fftw_complex *shiftbuf;
+	double *outbuf;
+	int outbufpos;
+} t_pvoc;
+
+// if there is a transient between samples a and b, return its position, else return -1
+static inline int transient_between(t_pvoc *x, int a, int b)
+{
+	// linear search for now: FIXME
+	int i;
+	for (i = 0; i < x->ntrans; i++)
+		if (a <= x->trans[i] && b >= x->trans[i])
+			return x->trans[i];
+	return -1;
+}
+
+#if 1
+static inline double interpolate(t_pvoc *x, double t)
+{
+	// linear interpolation for now: FIXME
+	if (t < 0 || t > (x->arraysize - 1))
+		return 0.0;
+	else {
+		int x_1 = t;
+		double y_1 = x->array[x_1];
+		double y_2 = x->array[x_1 + 1];
+
+		return (y_2 - y_1) * (t - x_1) + y_1;
+	}
+}
+#else
+static inline double interpolate(t_pvoc *x, double t)
+{
+	// FIXME check bounds (can't think now)
+	int truncphase = (int) x->phase;
+	double fr = x->phase - ((double) truncphase);
+	double inm1 = x->ifftout[truncphase - 1];
+	double in   = x->ifftout[truncphase + 0];
+	double inp1 = x->ifftout[truncphase + 1];
+	double inp2 = x->ifftout[truncphase + 2];
+
+	// taken from swh-plugins-0.4.0/ladspa-util.h cube_interp, made to use doubles instead since doubles are what i'm using for some reason
+	return in + 0.5 * fr * (inp1 - inm1 +
+	 fr * (4.0 * inp1 + 2.0 * inm1 - 5.0 * in - inp2 +
+	 fr * (3.0 * (in - inp1) - inm1 + inp2)));
+}
+#endif
+
+static t_int *pvoc_perform(t_int *w)
+{
+	t_pvoc *x = (t_pvoc *)(w[1]);
+	t_float *in1 = (t_float *)(w[2]);
+	t_float *in2 = (t_float *)(w[3]);
+	t_float *out = (t_float *)(w[4]);
+	int n = (int)(w[5]);
+	double t;
+	double pitchshift;
+	int transientpos;
+	int desmear;
+	double framestart;
+	double frameend;
+	int i;	// 0 to n -type iterator
+	int j;	// start to end -type iterator
+	int k;	// bin iterator
+	double xlook;		// iterator for interpolated table lookup
+
+	// if we are at the start of a new frame...
+	if (x->outbufpos % x->hopsize == 0) {
+
+		// don't desmear this frame by default
+		desmear = 0;
+
+		// sample the input signals (FIXME just sample these in the beginning..)
+		t = in1[0];		// time position
+		pitchshift = in2[0];	// pitch shift
+
+		// set the frame boundaries with the desired time pos in the middle
+		framestart = t - (pitchshift * x->fftsize / 2);
+		frameend = framestart + pitchshift * x->fftsize;
+
+		// prepare to de-smear transients
+		transientpos = transient_between(x, (int) framestart, (int) frameend);
+		if (transientpos != -1) {
+			// there is a transient in this frame
+#if 0
+			if (transientpos > t) {
+				// there is a transient in the right half of the window:
+				// --> move the window left until the transient is outside it
+				frameend = transientpos;
+				framestart = frameend - x->fftsize;
+				x->wastrans = 0;
+			} else if ( ! x->wastrans) {
+				// there is a transient in the left half of the window,
+				// and there was no transient there during the previous frame:
+				// --> center the window around the transient and remember to desmear this frame
+				framestart = transientpos - (x->fftsize / 2);
+				frameend = framestart + x->fftsize;
+				desmear = 1;
+				x->wastrans = 1;
+			} else
+				x->wastrans = 1;
+#else
+			// this simpler method turns out to sound better (timing sounds more accurate, no "frozen" sound preceding transients)
+			if ( ! x->wastrans) {
+				// there is a transient in the window,
+				// and there wasn't during the previous frame:
+				// --> center the window around the transient and remember to desmear this frame
+				framestart = transientpos - (pitchshift * x->fftsize / 2);
+				desmear = 1;
+			}
+			x->wastrans = 1;
+#endif
+		} else
+			x->wastrans = 0;
+
+		// interpolate-read the array from framestart to frameend into fftin, windowing it
+		for (i = 0, xlook = framestart; i < x->fftsize; xlook += pitchshift, i++) {
+			x->fftin[i] = interpolate(x, xlook) * x->window[i];
+		}
+
+		// hop forward and read the second frame into fft2in
+		// FIXME merge the two loops?
+		framestart += pitchshift * x->hopsize;
+		for (i = 0, xlook = framestart; i < x->fftsize; xlook += pitchshift, i++) {
+			x->fft2in[i] = interpolate(x, xlook) * x->window[i];
+		}
+
+		// do the ffts
+		fftw_execute(x->fftplan);
+		fftw_execute(x->fft2plan);
+
+		if ( ! desmear) {
+			// Miller Puckette's phase modification math (translation from 09.pvoc.pd and 10.phaselockedvoc.pd)
+
+			double a, b, r, c, d;
+
+			// propagate phase
+			for (k = 0; k < (x->fftsize / 2 + 1); k++) {
+				a = x->ifftin[k][0] * x->fftout[k][0] + x->ifftin[k][1] * x->fftout[k][1] + 0.00000000000000000001;
+				b = x->ifftin[k][1] * x->fftout[k][0] - x->ifftin[k][0] * x->fftout[k][1];
+				r = 1 / sqrt(a * a + b * b);
+				c = a * r;
+				d = b * r;
+				x->shiftbuf[k][0] = c * x->fft2out[k][0] - d * x->fft2out[k][1];
+				x->shiftbuf[k][1] = c * x->fft2out[k][1] + d * x->fft2out[k][0];
+			}
+
+			// don't phase-lock the first bin
+			x->ifftin[0][0] = x->shiftbuf[0][0];
+			x->ifftin[0][1] = x->shiftbuf[0][1];
+
+			// phase-lock
+			for (k = 1; k < (x->fftsize / 2); k++) {
+				x->ifftin[k][0] = x->shiftbuf[k][0] - x->phaselocking * (x->shiftbuf[k - 1][0] + x->shiftbuf[k + 1][0]);
+				x->ifftin[k][1] = x->shiftbuf[k][1] - x->phaselocking * (x->shiftbuf[k - 1][1] + x->shiftbuf[k + 1][1]);
+			}
+
+			// don't phase-lock the last bin
+			x->ifftin[x->fftsize / 2][0] = x->shiftbuf[x->fftsize / 2][0];
+			x->ifftin[x->fftsize / 2][1] = x->shiftbuf[x->fftsize / 2][1];
+
+		} else {
+			// this frame is to be de-smeared, which means don't modify the phases, just preserve the original phases
+			for (k = 0; k < (x->fftsize / 2 + 1); k++) {
+				x->ifftin[k][0] = x->fftout[k][0];
+				x->ifftin[k][1] = x->fftout[k][1];
+			}
+		}
+
+		// do the ifft
+		fftw_execute(x->ifftplan);
+
+		// add into output buffer, windowing and normalizing first (divide by blocksize)
+		for (i = 0, j = x->outbufpos; i < x->fftsize; i++, j++) {
+			x->outbuf[j % x->fftsize] += x->ifftout[i] / x->fftsize * x->window[i];
+		}
+	}
+
+	// output one block of the output buffer
+	for (i = 0, j = x->outbufpos; i < n; i++, j++) {
+		out[i] = x->outbuf[j % x->fftsize];
+		x->outbuf[j % x->fftsize] = 0;		// zero the part of the buffer that was just output
+	}
+
+	// move the output buffer pointer forward by one block
+	x->outbufpos = (x->outbufpos + n) % x->fftsize;
+
+	return (w+6);
+}
+
+static void pvoc_dsp(t_pvoc *x, t_signal **sp)
+{
+	    dsp_add(pvoc_perform, 5, x, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
+}
+
+// adapted from jsarlo's windowing library
+// Hanning
+static void makewindow(double *w, int n)
+{
+	int i;
+	double xshift =  n / 2.0;
+	double x;
+	for (i = 0; i < n; i++) {
+		x = (i - xshift) / xshift;
+		w[i] = 0.5 * (1 + cos(M_PI * x));
+	}
+}
+
+static void setarray(t_pvoc *x, t_symbol *s)
+{
+	x->arrayname = s;
+	if ( ! (x->arrayobj = (t_garray *)pd_findbyclass(x->arrayname, garray_class))) {
+ 		if (*x->arrayname->s_name) pd_error(x, "pvoc~: %s: no such array", x->arrayname->s_name);
+		x->array = NULL;
+		x->arraysize = 0;
+	} else if ( ! garray_getfloatarray(x->arrayobj, &x->arraysize, &x->array)) {
+ 		error("%s: bad template", x->arrayname->s_name);
+		x->array = NULL;
+		x->arraysize = 0;
+	} else {
+		garray_usedindsp(x->arrayobj);
+	}
+}
+
+static void locking(t_pvoc *x, t_floatarg f)
+{
+	x->phaselocking = f;
+}
+
+// takes a list of sample positions of transients to be de-smeared
+static void transients(t_pvoc *x, t_symbol *s, int argc, t_atom *argv)
+{
+	int i;
+
+	x->ntrans = argc;
+	for (i = 0; i < x->ntrans; i++)
+		x->trans[i] = atom_getfloatarg(i, argc, argv);
+}
+
+// for clarity (same as "transients" with no args)
+static void notransients(t_pvoc *x)
+{
+	x->ntrans = 0;
+}
+
+static void *pvoc_new(t_symbol *s, int argc, t_atom *argv)
+{
+	t_pvoc *x = (t_pvoc *)pd_new(pvoc_class);
+	int i;
+
+	inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_signal, &s_signal);	// pitch-shift inlet
+	outlet_new(&x->x_obj, gensym("signal"));
+
+	if (argc != 3) {
+		post("argc = %d", argc);
+		error("pvoc~: usage: [pvoc~ <arrayname> <fftsize> <overlap>]");
+		return NULL;
+	}
+
+	x->fftsize = atom_getfloatarg(1, argc, argv);
+	x->overlap = atom_getfloatarg(2, argc, argv);
+	x->hopsize = x->fftsize / x->overlap;
+	x->ntrans = 0;
+	x->wastrans = 0;
+	x->phaselocking = 0;
+
+	// get the source array
+	setarray(x, atom_getsymbol(argv));
+
+	// set up output ring buffer
+	x->outbuf = getbytes(sizeof(double) * x->fftsize);
+	x->outbufpos = 0;
+	for (i = 0; i < x->fftsize; i++)
+		x->outbuf[i] = 0;
+
+	// make table for window function
+	x->window = getbytes(sizeof(double) * x->fftsize);
+	makewindow(x->window, x->fftsize);
+
+	// set up fftw stuff
+	x->fftin = fftw_malloc(sizeof(double) * x->fftsize);
+	x->fft2in = fftw_malloc(sizeof(double) * x->fftsize);
+	x->ifftout = fftw_malloc(sizeof(double) * x->fftsize);
+	x->fftout = fftw_malloc(sizeof(fftw_complex) * (x->fftsize / 2 + 1));
+	x->fft2out = fftw_malloc(sizeof(fftw_complex) * (x->fftsize / 2 + 1));
+	x->ifftin = fftw_malloc(sizeof(fftw_complex) * (x->fftsize / 2 + 1));
+	x->shiftbuf = fftw_malloc(sizeof(fftw_complex) * (x->fftsize / 2 + 1));
+	for (i = 0; i < (x->fftsize / 2 + 1); i++) {
+		x->ifftin[i][0] = 0;	// need to start the phases from zero
+		x->ifftin[i][1] = 0;
+	}
+	x->fftplan = fftw_plan_dft_r2c_1d(x->fftsize, x->fftin, x->fftout, FFTW_MEASURE);
+	x->fft2plan = fftw_plan_dft_r2c_1d(x->fftsize, x->fft2in, x->fft2out, FFTW_MEASURE);
+	x->ifftplan = fftw_plan_dft_c2r_1d(x->fftsize, x->ifftin, x->ifftout, FFTW_MEASURE | FFTW_PRESERVE_INPUT);
+	
+	return (x);
+}
+
+static void pvoc_free(t_pvoc *x)
+{
+	freebytes(x->outbuf, sizeof(double) * x->fftsize);
+	freebytes(x->window, sizeof(double) * x->fftsize);
+	fftw_free(x->fftin);
+	fftw_free(x->fft2in);
+	fftw_free(x->ifftout);
+	fftw_free(x->fftout);
+	fftw_free(x->fft2out);
+	fftw_free(x->ifftin);
+	fftw_free(x->shiftbuf);
+	fftw_destroy_plan(x->fftplan);
+	fftw_destroy_plan(x->fft2plan);
+	fftw_destroy_plan(x->ifftplan);
+}
+
+void pvoc_tilde_setup(void)
+{
+	pvoc_class = class_new(gensym("pvoc~"), (t_newmethod)pvoc_new, (t_method)pvoc_free, sizeof(t_pvoc), 0, A_GIMME, 0);
+	class_addmethod(pvoc_class, nullfn, gensym("signal"), 0);
+	class_addmethod(pvoc_class, (t_method) pvoc_dsp, gensym("dsp"), 0);
+	class_addmethod(pvoc_class, (t_method) setarray, gensym("setarray"), A_DEFSYMBOL, 0);
+	class_addmethod(pvoc_class, (t_method) locking, gensym("locking"), A_DEFFLOAT, 0);
+	class_addmethod(pvoc_class, (t_method) transients, gensym("transients"), A_GIMME, 0);
+	class_addmethod(pvoc_class, (t_method) notransients, gensym("notransients"), 0);
+}
diff --git a/pvoc~.libs b/pvoc~.libs
new file mode 100644
index 0000000..67c2eba
--- /dev/null
+++ b/pvoc~.libs
@@ -0,0 +1 @@
+-lfftw3
diff --git a/sse-conv.inc.c b/sse-conv.inc.c
new file mode 100644
index 0000000..cc62f30
--- /dev/null
+++ b/sse-conv.inc.c
@@ -0,0 +1,77 @@
+// Ben Saylor 2005-08-10
+// attempt at an SSE version of the convolution loop.
+// Results sound weird, and CPU usage is about the same. :(
+
+		cursumbuf_fd	= (v4sf *) x->sumbufs[(x->sumbuf->index + p) % x->nsumbufs].fd;
+		input_fd	= (v4sf *) x->input_fd;
+		irpart_fd	= (v4sf *) x->irpart_fd[p];
+		nvecs = x->paddedsize/4;
+
+		for (v1=0, v2=1; v2 < nvecs; v1+=2, v2+=2) {
+			// v1 is the index of the first 4-float vector (v4sf) to process (v2 is the second)
+			// input_fd, etc. are v4sf pointers.
+			// pull in 4 bins (8 floats) (2 v4sfs) at a time.
+			
+			// (a + bi) * (c + di) = (ac - bd) + (ad + bc)i
+
+			asm volatile (	
+					// load inputs
+					"movaps %[in1], %%xmm0\n\t"
+					"movaps %[in2], %%xmm6\n\t"
+					"movaps %[ir1], %%xmm2\n\t"
+					"movaps %[ir2], %%xmm7\n\t"
+					"movaps %%xmm0, %%xmm1\n\t"
+					"movaps %%xmm2, %%xmm3\n\t"
+					// xmm0 = xmm1 = [a1 b1 a2 b2]
+					// xmm6 =        [a3 b3 a4 b4]
+					// xmm2 = xmm3 = [c1 d1 c2 d2]
+					// xmm7 =        [c3 d3 c4 d4]
+
+					// de-interleave real and imaginary parts
+					"shufps $0x88, %%xmm6, %%xmm0\n\t"	// xmm0 = [a1 a2 a3 a4]
+					"shufps $0xDD, %%xmm6, %%xmm1\n\t"	// xmm1 = [b1 b2 b3 b4]
+					"shufps $0x88, %%xmm7, %%xmm2\n\t"	// xmm2 = [c1 c2 c3 c4]
+					"shufps $0xDD, %%xmm7, %%xmm3\n\t"	// xmm3 = [d1 d2 d3 d4]
+
+					// load output (early, maybe it will help)
+					"movaps %[out1], %%xmm6\n\t"
+					"movaps %[out2], %%xmm7\n\t"
+
+					// compute the real part of the complex product
+					// (work on copies of xmm0 and xmm1, because we need to keep them)
+					"movaps %%xmm0, %%xmm4\n\t"		// xmm4 = [a1 a2 a3 a4]
+					"mulps %%xmm2, %%xmm4\n\t"		// xmm4 = [a1c1 a2c2 a3c3 a4c4]
+					"movaps %%xmm1, %%xmm5\n\t"		// xmm5 = [b1 b2 b3 b4]
+					"mulps %%xmm3, %%xmm5\n\t"		// xmm5 = [b1d1 b2d2 b3d3 b4d4]
+					"subps %%xmm5, %%xmm4\n\t"		// xmm4 = (ac - bd) [r1 r2 r3 r4]
+
+					// compute the imaginary part of the complex product
+					"mulps %%xmm3, %%xmm0\n\t"		// xmm0 = [a1d1 a2d2 a3d3 a4d4]
+					"mulps %%xmm2, %%xmm1\n\t"		// xmm1 = [b1c1 b2c2 b3c3 b4c4]
+					"addps %%xmm1, %%xmm0\n\t"		// xmm0 = (ad + bc) [i1 i2 i3 i4]
+
+					// re-interleave
+					"movaps %%xmm4, %%xmm5\n\t"		// xmm5 = [r1 r2 r3 r4]
+					"unpcklps %%xmm0, %%xmm4\n\t"		// xmm4 = [r1 i1 r2 i2]
+					"unpckhps %%xmm5, %%xmm0\n\t"		// xmm0 = [r3 i3 r4 i4]
+
+					// add into sumbuf
+					"addps %%xmm4, %%xmm6\n\t"
+					"addps %%xmm0, %%xmm7\n\t"
+					"movaps %%xmm6, %[out1]\n\t"
+					"movaps %%xmm7, %[out2]"
+
+					// output/input operands
+					: [out1] "+m" (cursumbuf_fd[v1]),
+					  [out2] "+m" (cursumbuf_fd[v2])
+
+					// input operands
+					: [in1] "m" (input_fd[v1]),
+					  [in2] "m" (input_fd[v2]),
+					  [ir1] "m" (irpart_fd[v1]),
+					  [ir2] "m" (irpart_fd[v2])
+
+					// clobbered registers
+					: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
+			    );
+		}
author	Hans-Christoph Steiner <eighthave@users.sourceforge.net>	2006-02-07 18:23:33 +0000
committer	IOhannes m zmölnig <zmoelnig@iem.at>	2015-10-14 13:36:00 +0200
commit	2a5a823d12708ca0fa5c56347593afd9ce3b9b83 (patch)
tree	eeb5906131003278b368711eb1a5238f39cd3f50
parent	dac2e8e0cb5980d41dc3879338816d6f52a5ff8a (diff)