diff options
-rw-r--r-- | GPL.txt | 340 | ||||
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | README.txt | 39 | ||||
-rw-r--r-- | altivec-perform.inc.c | 283 | ||||
-rw-r--r-- | help/partconv~-help.pd | 74 | ||||
-rw-r--r-- | help/pvoc~-help.pd | 85 | ||||
-rw-r--r-- | noiseburst.wav | bin | 0 -> 88236 bytes | |||
-rw-r--r-- | partconv~.c | 382 | ||||
-rw-r--r-- | partconv~.dsp | 114 | ||||
-rw-r--r-- | partconv~.dsw | 29 | ||||
-rw-r--r-- | partconv~.libs | 1 | ||||
-rw-r--r-- | partconv~.vcproj | 169 | ||||
-rw-r--r-- | pvoc~.c | 392 | ||||
-rw-r--r-- | pvoc~.libs | 1 | ||||
-rw-r--r-- | sse-conv.inc.c | 77 |
15 files changed, 1996 insertions, 0 deletions
@@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..24f589e --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +TARGET=bsaylor + +default: + make -C ../ $(TARGET) + +install: + make -C ../ $(TARGET)_install + +clean: + make -C ../ $(TARGET)_clean diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..87bea76 --- /dev/null +++ b/README.txt @@ -0,0 +1,39 @@ +
+aenv~ is a asymptotic ADSR envelope generator; The output value approaches the
+target values as asymptotes.
+
+
+
+partconv~ is an external that implements partitioned fast convolution,
+suitable for convolving input signals with long impulse responses for reverbs,
+etc. This release offers much improved performance, as well as independence
+of the partition size and your patch's blocksize. It includes Altivec code
+from Chris Clepper. There's also some SSE 1 code that produces almost correct
+results but doesn't seem to improve performance. If you are familiar with SSE
+and want to have a go at writing an SSE version, please do!
+partconv~ requires FFTW3 (http://fftw.org)
+
+
+
+pvoc~ is a phase vocoder based on Pd's 09.pvoc.pd example patch. Advantages over the abstraction include (reportedly) faster execution, instantaneous response to input, and adjustable phase locking. It requires FFTW3.
+bensaylor's Home
+
+
+
+susloop~: sample player with various loop methods (ping-pong, ... ) think
+tracker. svf~ This is a signal-controlled port of Steve Harris' state
+variable filter LADSPA plugin.
+
+
+
+svf~: a signal-controlled port of Steve Harris' state variable filter
+LADSPA plugin (http://plugin.org.uk).
+
+
+
+zhzhx~: Turns the input signal into a staticky, distorted mess. Comes with tone
+control.
+
+
+
+Benjamin R. Saylor <bensaylor@fastmail.fm>
diff --git a/altivec-perform.inc.c b/altivec-perform.inc.c new file mode 100644 index 0000000..3ce1cf0 --- /dev/null +++ b/altivec-perform.inc.c @@ -0,0 +1,283 @@ +//altivec version by Chris Clepper +// +static t_int *partconv_perform(t_int *w) +{ + t_partconv *x = (t_partconv *)(w[1]); + t_float *in = (t_float *)(w[2]); + t_float *out = (t_float *)(w[3]); + int n = (int)(w[4]); + int i; + int j; + int k; // bin + int p; // partition + int endpart; + fftwf_complex *cursumbuf_fd; + float *sumbuf1ptr; + float *sumbuf2ptr; + + union { + unsigned char c[16]; + vector unsigned char v; + }permfill; + + union { + float f[4]; + vector float v; + }floatfill; + + vector float *load_input, *load_irpart; + vector float store_multbuf1,store_multbuf2; + vector float vinput_fd0, vinput_fd4; //input vectors + vector float virpart_fd0, virpart_fd4; //ir partition vectors + vector float permtemp1357, permtemp0246; + vector float vzero;// vscale; + vector unsigned char input_0022, input_1133, perm_0246, perm_1357, perm_0123,perm_4567; + vector float vtemp1, vtemp2, vtemp3, vtemp4, vtemp5, vtemp6, vtemp7, vtemp8; + + floatfill.f[0] = 0.f; + floatfill.f[1] = 0.f; + floatfill.f[2] = 0.f; + floatfill.f[3] = 0.f; + vzero = floatfill.v; + + //store_multbuf = vzero; + + floatfill.f[0] = x->scale; + floatfill.f[1] = x->scale; + floatfill.f[2] = x->scale; + floatfill.f[3] = x->scale; + //vscale = floatfill.v; + + //fill the permute buffer for the first input_fd multiply + permfill.c[0] = 0; permfill.c[1] = 1; permfill.c[2] = 2; permfill.c[3] = 3; //first float + permfill.c[4] = 0; permfill.c[5] = 1; permfill.c[6] = 2; permfill.c[7] = 3; //second float + permfill.c[8] = 8; permfill.c[9] = 9; permfill.c[10] = 10; permfill.c[11] = 11; //third float + permfill.c[12] = 8; permfill.c[13] = 9; permfill.c[14] = 10; permfill.c[15] = 11; //fourth float + + input_0022 = permfill.v; + + permfill.c[0] = 4; permfill.c[1] = 5; permfill.c[2] = 6; permfill.c[3] = 7; //first float + permfill.c[4] = 4; permfill.c[5] = 5; permfill.c[6] = 6; permfill.c[7] = 7; //second float + permfill.c[8] = 12; permfill.c[9] = 13; permfill.c[10] = 14; permfill.c[11] = 15; //third float + permfill.c[12] = 12; permfill.c[13] = 13; permfill.c[14] = 14; permfill.c[15] = 15; //fourth float + + input_1133 = permfill.v; + + //perm_0246 + //0,1,2,3, 8,9,10,11, 16,17,18,19, 24,25,26,27 + permfill.c[0] = 0; permfill.c[1] = 1; permfill.c[2] = 2; permfill.c[3] = 3; //first float + permfill.c[4] = 8; permfill.c[5] = 9; permfill.c[6] = 10; permfill.c[7] = 11; //second float + permfill.c[8] = 16; permfill.c[9] = 17; permfill.c[10] = 18; permfill.c[11] = 19; //third float + permfill.c[12] = 24; permfill.c[13] = 25; permfill.c[14] = 26; permfill.c[15] = 27; //fourth float + + perm_0246 = permfill.v; + + // perm_1357 + // 4,5,6,7, 12,13,14,15, 20,21,22,23, 28,29,30,31 + permfill.c[0] = 4; permfill.c[1] = 5; permfill.c[2] = 6; permfill.c[3] = 7; //first float + permfill.c[4] = 12; permfill.c[5] = 13; permfill.c[6] = 14; permfill.c[7] = 15; //second float + permfill.c[8] = 20; permfill.c[9] = 21; permfill.c[10] = 22; permfill.c[11] = 23; //third float + permfill.c[12] = 28; permfill.c[13] = 29; permfill.c[14] = 30; permfill.c[15] = 31; //fourth float + + perm_1357 = permfill.v; + + // perm_0123 from [0,2,4,6] and [1,3,5,7] + // 0,1,2,3 16,17,18,19 4,5,6,7 20,21,22,23 + permfill.c[0] = 0; permfill.c[1] = 1; permfill.c[2] = 2; permfill.c[3] = 3; //first float + permfill.c[4] = 16; permfill.c[5] = 17; permfill.c[6] = 18; permfill.c[7] = 19; //second float + permfill.c[8] = 4; permfill.c[9] = 5; permfill.c[10] = 6; permfill.c[11] = 7; //third float + permfill.c[12] = 20; permfill.c[13] = 21; permfill.c[14] = 22; permfill.c[15] = 23; //fourth float + + perm_0123 = permfill.v; + + // perm_4567 from [0,2,4,6] and [1,3,5,7] + // 8.9.10.11 24,25,26,27 12,13,14,15 28,29,30,31 + permfill.c[0] = 8; permfill.c[1] = 9; permfill.c[2] = 10; permfill.c[3] = 11; //first float + permfill.c[4] = 24; permfill.c[5] = 25; permfill.c[6] = 26; permfill.c[7] = 27; //second float + permfill.c[8] = 12; permfill.c[9] = 13; permfill.c[10] = 14; permfill.c[11] = 15; //third float + permfill.c[12] = 28; permfill.c[13] = 29; permfill.c[14] = 30; permfill.c[15] = 31; //fourth float + + // perm_4567 from [0,2,4,6] and [1,3,5,7] + // 8.9.10.11 24,25,26,27 12,13,14,15 28,29,30,31 + permfill.c[0] = 8; permfill.c[1] = 9; permfill.c[2] = 10; permfill.c[3] = 11; //first float + permfill.c[4] = 24; permfill.c[5] = 25; permfill.c[6] = 26; permfill.c[7] = 27; //second float + permfill.c[8] = 12; permfill.c[9] = 13; permfill.c[10] = 14; permfill.c[11] = 15; //third float + permfill.c[12] = 28; permfill.c[13] = 29; permfill.c[14] = 30; permfill.c[15] = 31; //fourth float + + perm_4567 = permfill.v; + + + memcpy(&(x->inbuf[x->inbufpos]), in, n*sizeof(float)); // gather a block of input into input buffer + x->inbufpos += n; + if (x->inbufpos >= x->partsize) { + // input buffer is full, so we begin a new cycle + + if (x->pd_blocksize != n) { + // the patch's blocksize has change since we last dealt the work + x->pd_blocksize = n; + partconv_deal_work(x); + } + + x->inbufpos = 0; + x->curcall = 0; + x->curpart = 0; + memcpy(x->input_td, x->inbuf, x->partsize * sizeof(float)); // copy 'gathering' input buffer into 'transform' buffer + memset(&(x->input_td[x->partsize]), 0, (x->paddedsize - x->partsize) * sizeof(float)); // pad + + fftwf_execute(x->input_plan); // transform the input + + // everything has been read out of prev sumbuf, so clear it + memset(x->sumbuf->prev->td, 0, x->paddedsize * sizeof(float)); + + // advance sumbuf pointers + x->sumbuf = x->sumbuf->next; + x->sumbuf->readpos = 0; + x->sumbuf->prev->readpos = x->partsize; + } + + // convolve this call's portion of partitions + endpart = x->curpart + x->parts_per_call[x->curcall]; + if (endpart > x->nparts) // FIXME does this ever happen? + endpart = x->nparts; + for (p = x->curpart; p < endpart; p++) { + //printf("convolving with partition %d\n", p); + // + // multiply the input block by the partition, accumulating the result in the appropriate sumbuf + // + + // FIXME do this in a circular list-type fashion so we don't need "index" + cursumbuf_fd = x->sumbufs[(x->sumbuf->index + p) % x->nsumbufs].fd; + + for (k = 0; k < x->nbins; k+=4) { + + + + + load_input = (vector float *)&x->input_fd[k][0]; + vinput_fd0 = vec_ld(0, (vector float *) load_input); + + vtemp1 = vec_perm(load_input[0],vzero,input_0022); + + load_input = (vector float *)&x->input_fd[k][4]; + //load input_fd[k][4] + //vector will have input_fd[4,5,6,7] + vinput_fd4 = vec_ld(0, (vector float *) &x->input_fd[k][4]); + + vtemp3 = vec_perm(load_input[0],vzero,input_0022); + + //vec_ld irpart[p][k][0] + //vector will have irpart_fd[0,1,2,3] + + load_irpart = (vector float *) &x->irpart_fd[p][k][0]; + + virpart_fd0 = vec_ld(0,&x->irpart_fd[p][k][0]); + vtemp1 = vec_madd(vtemp1,load_irpart[0],vzero); + + load_irpart = (vector float *) &x->irpart_fd[p][k][4]; + virpart_fd4 = vec_ld(0,&x->irpart_fd[p][k][4]); + vtemp3 = vec_madd(vtemp3,load_irpart[0],vzero); + + + store_multbuf1 = vec_ld(0,&cursumbuf_fd[k][0]); + + store_multbuf2 = vec_ld(0,&cursumbuf_fd[k][4]); + + + //vec_perm to line up the elements + // irpart is fine + // make vector of input_fd[0] [2] and [4] [6] + //make vector of input_fd[1] [3] and [5] [7] + // + // permute only works on bytes so the first float is bytes 0,1,2,3 the second is 4,5,6,7 etc + // + // 0,1,2,3, 8,9,10,11, 16,17,18,19, 24,25,26,27 + // + // 4,5,6,7, 12,13,14,15, 20,21,22,23, 28,29,30,31 + + + //vec_perm temp1 and temp3 into [0,2,4,6] + permtemp0246 = vec_perm(vtemp1,vtemp3,perm_0246); + + //and [1,3,5,7] + permtemp1357 = vec_perm(vtemp1,vtemp3,perm_1357); + + //vinput_fd[1,3,5,7] + vtemp2 = vec_perm(vinput_fd0,vinput_fd4,perm_1357); + + //irpart[1,3,5,7] + vtemp4 = vec_perm(virpart_fd0,virpart_fd4,perm_1357); + + //irpart[0,2,4,6] + vtemp5 = vec_perm(virpart_fd0,virpart_fd4,perm_0246); + + //vec_nmsub input_fd[1,3,5,7] irpart[1,3,5,7] temp[0,2,4,6] + vtemp6 = vec_nmsub(vtemp2,vtemp4,permtemp0246); + + //vec_madd input_fd[1,3,5,7] irpart[0,2,4,6] temp[1,3,5,7] + vtemp7 = vec_madd(vtemp2,vtemp5,permtemp1357); + + + + + //vec_madd all by scale - this is now done after the loop + // vtemp6 = vec_madd(vtemp6,vscale,vzero); + + // vtemp7 = vec_madd(vtemp7,vscale,vzero); + + + //vec_perm data back into place - tricky! + + //vec_perm nmsub_result[0,2,4,6] madd_result [1,3,5,7] + // results will be [0,1,2,3] [4,5,6,7] + vtemp1 = vec_perm(vtemp6,vtemp7,perm_0123); + + vtemp2 = vec_perm(vtemp6,vtemp7,perm_4567); + + + //vec_st + + store_multbuf1 = vec_add(store_multbuf1,vtemp1); + store_multbuf2 = vec_add(store_multbuf2,vtemp2); + + vec_st(store_multbuf1,0,&cursumbuf_fd[k][0]); + + vec_st(store_multbuf2,0,&cursumbuf_fd[k][4]); + + + /* + cursumbuf_fd[k][0] + += + ( x->input_fd[k][0] * x->irpart_fd[p][k][0] + - x->input_fd[k][1] * x->irpart_fd[p][k][1]); + + cursumbuf_fd[k][1] + += + ( x->input_fd[k][0] * x->irpart_fd[p][k][1] + + x->input_fd[k][1] * x->irpart_fd[p][k][0]);*/ + } + } + x->curpart = p; + + // The convolution of the fresh block of input with the first partition of the IR + // is the last thing that gets summed into the current sumbuf before it gets IFFTed and starts being output. + // This happens during the first call of every cycle. + if (x->curcall == 0) { + // current sumbuf has been filled, so transform it (TD to FD). + // Output loop will begin to read it and sum it with the last one + fftwf_execute(x->sumbuf->plan); + } + + // we're summing and outputting the first half of the most recently IFFTed sumbuf + // and the second half of the previous one + sumbuf1ptr = &(x->sumbuf->td[x->sumbuf->readpos]); + sumbuf2ptr = &(x->sumbuf->prev->td[x->sumbuf->prev->readpos]); + for (i = 0; i < n; i++) { + *(out++) = (*(sumbuf1ptr++) + *(sumbuf2ptr++)) * x->scale; + } + x->sumbuf->readpos += n; + x->sumbuf->prev->readpos += n; + + x->curcall++; + + return (w+5); +} diff --git a/help/partconv~-help.pd b/help/partconv~-help.pd new file mode 100644 index 0000000..bda06cf --- /dev/null +++ b/help/partconv~-help.pd @@ -0,0 +1,74 @@ +#N canvas 0 30 680 606 10; +#X obj 201 569 dac~; +#X obj 40 168 dirac~; +#X msg 40 148 0; +#X obj 177 190 osc~; +#X obj 177 164 mtof; +#X floatatom 177 141 5 0 0 0 - - -; +#X obj 419 358 soundfiler; +#X obj 419 259 loadbang; +#X obj 201 541 *~; +#X obj 302 282 openpanel; +#X obj 302 309 t b s; +#X obj 302 240 bng 32 250 50 0 empty empty empty 0 -6 0 8 -260818 -1 +-1; +#X obj 331 466 vsl 8 128 0 100 0 1 empty empty empty 0 -8 0 8 -260818 +-1 -1 9700 1; +#X text 300 219 load a soundfile to use as the impulse response; +#X text 9 127 test with an impulse; +#X text 431 7 Ben Saylor - bensaylor@fastmail.fm; +#X text 105 7 partitioned convolution external; +#X obj 437 388 table irL; +#X msg 272 350 set irL; +#X msg 341 350 set irR; +#X obj 230 541 *~; +#X obj 259 426 partconv~ irR 2048; +#X msg 427 333 read -resize \$1 irL irR; +#X obj 437 410 table irR; +#X msg 232 190 0; +#X msg 270 190 0.1; +#X obj 177 212 *~ 0; +#X obj 271 515 dbtorms; +#X text 226 155 test with a sine; +#X obj 110 187 adc~ 1; +#X text 10 8 partconv~; +#X text 26 44 version 0.2 \, August 12 \, 2005; +#X text 258 43 [partconv~ <IR array> <partition size>]; +#X obj 126 426 partconv~ irL 2048; +#X msg 419 283 read -resize noiseburst.wav irL; +#X msg 419 307 read -resize noiseburst.wav irR; +#X text 258 71 The partition size must be a power of 2 greater than +or equal to the block size. Larger partition sizes are more efficient +\, to a point \, but increase latency (the delay between input and +output is equal to the partition size minus the block size).; +#X connect 1 0 21 0; +#X connect 1 0 33 0; +#X connect 2 0 1 0; +#X connect 3 0 26 0; +#X connect 4 0 3 0; +#X connect 5 0 4 0; +#X connect 7 0 34 0; +#X connect 7 0 35 0; +#X connect 8 0 0 0; +#X connect 9 0 10 0; +#X connect 10 0 18 0; +#X connect 10 0 19 0; +#X connect 10 1 22 0; +#X connect 11 0 9 0; +#X connect 12 0 27 0; +#X connect 18 0 33 0; +#X connect 19 0 21 0; +#X connect 20 0 0 1; +#X connect 21 0 20 0; +#X connect 22 0 6 0; +#X connect 24 0 26 1; +#X connect 25 0 26 1; +#X connect 26 0 21 0; +#X connect 26 0 33 0; +#X connect 27 0 8 1; +#X connect 27 0 20 1; +#X connect 29 0 21 0; +#X connect 29 0 33 0; +#X connect 33 0 8 0; +#X connect 34 0 6 0; +#X connect 35 0 6 0; diff --git a/help/pvoc~-help.pd b/help/pvoc~-help.pd new file mode 100644 index 0000000..4a9f7f2 --- /dev/null +++ b/help/pvoc~-help.pd @@ -0,0 +1,85 @@ +#N canvas 143 0 760 635 10; +#X obj 265 611 dac~; +#X obj 304 561 vsl 8 64 0 1 0 1 empty empty empty 0 -8 0 8 -260818 +-1 -1 3200 1; +#X obj 265 587 *~; +#X obj 68 158 cnv 15 580 220 empty empty empty 20 12 0 14 -195568 -66577 +0; +#N canvas 0 0 450 300 graph1 0; +#X array sample 62079 float 0; +#X coords 0 1 62078 -1 560 100 1; +#X restore 77 52 graph; +#X msg 346 506 read -resize /usr/local/lib/pd/doc/sound/voice.wav sample +; +#X obj 346 526 soundfiler; +#X obj 346 547 s length; +#X obj 346 485 loadbang; +#X obj 77 172 hsl 560 15 0 1 0 0 empty empty time-position -2 -6 0 +8 -233017 -1 -1 26900 1; +#X obj 214 222 vsl 15 128 0.5 2 1 0 empty empty pitch-shift 0 -8 0 +8 -261681 -1 -1 6350 1; +#X msg 182 203 1; +#X floatatom 214 356 5 0 0 0 - - -; +#X obj 280 222 vsl 15 128 0 1 0 0 empty empty phase-locking 0 -8 0 +8 -225280 -1 -1 0 1; +#X floatatom 280 356 5 0 0 0 - - -; +#X obj 74 532 line~; +#X msg 74 510 \$1 200; +#X obj 74 488 *; +#X obj 90 423 r length; +#X text 6 28 [pvoc~ <array> <fftsize> <overlap>]; +#X obj 226 551 lop~ 10; +#X msg 433 486 read -resize \$1 sample; +#X obj 433 466 openpanel; +#X obj 548 337 bng 32 250 50 0 empty empty empty 0 -6 0 8 -260818 -1 +-1; +#X msg 280 457 locking \$1; +#X msg 99 447 setarray sample; +#X msg 380 292 transients 0 6227 35716 53368; +#X msg 380 314 notransients; +#X text 380 275 de-smear transients at these locations; +#X text 546 319 load a sample; +#X obj 226 531 sig~ 1; +#X msg 125 261 0.5; +#X msg 125 281 1; +#X msg 125 301 2; +#X msg 125 341 4; +#X msg 125 510 0 \, 4.41e+06 \$1; +#X obj 125 399 * 100000; +#X msg 125 321 3; +#X text 8 6 pvoc~ 0.2 Ben Saylor bensaylor@fastmail.fm; +#X obj 99 571 pvoc~ sample 2048 4; +#X connect 1 0 2 1; +#X connect 2 0 0 0; +#X connect 2 0 0 1; +#X connect 5 0 6 0; +#X connect 6 0 7 0; +#X connect 8 0 5 0; +#X connect 9 0 17 0; +#X connect 10 0 12 0; +#X connect 11 0 10 0; +#X connect 12 0 30 0; +#X connect 13 0 14 0; +#X connect 14 0 24 0; +#X connect 15 0 39 0; +#X connect 16 0 15 0; +#X connect 17 0 16 0; +#X connect 18 0 17 1; +#X connect 18 0 25 0; +#X connect 20 0 39 1; +#X connect 21 0 6 0; +#X connect 22 0 21 0; +#X connect 23 0 22 0; +#X connect 24 0 39 0; +#X connect 25 0 39 0; +#X connect 26 0 39 0; +#X connect 27 0 39 0; +#X connect 30 0 20 0; +#X connect 31 0 36 0; +#X connect 32 0 36 0; +#X connect 33 0 36 0; +#X connect 34 0 36 0; +#X connect 35 0 15 0; +#X connect 36 0 35 0; +#X connect 37 0 36 0; +#X connect 39 0 2 0; diff --git a/noiseburst.wav b/noiseburst.wav Binary files differnew file mode 100644 index 0000000..2043f3a --- /dev/null +++ b/noiseburst.wav diff --git a/partconv~.c b/partconv~.c new file mode 100644 index 0000000..60175f2 --- /dev/null +++ b/partconv~.c @@ -0,0 +1,382 @@ +/* Copyright 2003-2005 Benjamin R. Saylor <bensaylor@fastmail.fm> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +// Thu Feb 17 22:29:27 CST 2005 - Changed outbuf loops to use comparison instead of modulo (suggested by c. clepper) - faster +// Sat Jul 30 19:59:08 AKDT 2005 - Completed modification for re-blocking and dividing up the work between calls. +// This has eliminated dropouts due to lots of work on large block boundaries. +// Fri Aug 5 10:46:33 AKDT 2005 - accumulate in the frequency domain, so only 1 IFFT is needed per input block, +// rather than <nparts> IFFTs. Big performance boost. +// Fri Aug 5 20:27:05 AKDT 2005 - should work properly with arbitrary (2^n) blocksize <= partsize +// Fri Aug 12 00:32:29 AKDT 2005 - added altivec code by Chris Clepper + +// TODO +// SSE version +// multichannel (multiple IRs)? probably wouldn't gain much from this +// divide work more evenly? (0, 15, 7, 23, 3, 11, 19, 27, ...) +// someday, an SSE3 version (supposed to make complex math fast) + +#include <math.h> +#include <string.h> +#include <fftw3.h> +#include "m_pd.h" + +#define MAXPARTS 256 // max number of partitions + +#ifdef USE_SSE +typedef float v4sf __attribute__ ((vector_size (16))); +#endif + +static t_class *partconv_class; + +struct sumbuffer { + int index; + fftwf_complex *fd; + float *td; + fftwf_plan plan; + int readpos; + struct sumbuffer *next, *prev; +}; + +typedef struct _partconv { + t_object x_obj; + t_symbol *arrayname; + int partsize; + int fftsize; + float scale; + int paddedsize; + int nbins; + int nparts; + int ir_prepared; + int pd_blocksize; + + // partitions of impulse response + fftwf_plan irpart_plan; + float *irpart_td[MAXPARTS]; + fftwf_complex *irpart_fd[MAXPARTS]; + + // input + fftwf_plan input_plan; + float *inbuf; + int inbufpos; + float *input_td; + fftwf_complex *input_fd; + + // circular array/list of buffers for accumulating results of convolution + struct sumbuffer sumbufs[MAXPARTS+2]; + int nsumbufs; // number of sumbufs + struct sumbuffer *sumbuf; // the current sumbuf corresponding to the first partition of the IR + + // dividing up the work between calls to perform() + int parts_per_call[MAXPARTS]; // parts_per_call[c] is the number of partitions to convolve during perform() call c + int curcall; // current call, counted from the beginning of the current cycle (input buffer full) + int curpart; // current partition to convolve +} t_partconv; + +// Determine how to divide the work as evenly as possible between calls to perform(). +static void partconv_deal_work(t_partconv *x) +{ + int calls_per_cycle; + int parts_to_distribute; + int i; + + // Like dealing cards. + // One cycle is defined as the time it takes to fill the input buffer (whose size is the user-given partition size) + calls_per_cycle = x->partsize / x->pd_blocksize; + for (i = 0; i < calls_per_cycle; i++) { + x->parts_per_call[i] = 0; + } + i = 0; + parts_to_distribute = x->nparts; + while (parts_to_distribute) { + x->parts_per_call[i]++; + parts_to_distribute--; + i = (i + 1) % calls_per_cycle; + } + /* + for (i = 0; i < calls_per_cycle; i++) { + printf("parts_per_call[%d] = %d\n", i, x->parts_per_call[i]); + } + */ +} + +#ifdef __VEC__ +#include "altivec-perform.inc.c" +#else + +static t_int *partconv_perform(t_int *w) +{ + t_partconv *x = (t_partconv *)(w[1]); + t_float *in = (t_float *)(w[2]); + t_float *out = (t_float *)(w[3]); + int n = (int)(w[4]); + int i; + int j; + int k; // bin + int p; // partition + int endpart; + +#ifdef USE_SSE + int v1; + int v2; + int nvecs; + v4sf *cursumbuf_fd; + v4sf *input_fd; + v4sf *irpart_fd; +#else + fftwf_complex *cursumbuf_fd; + fftwf_complex *input_fd; + fftwf_complex *irpart_fd; +#endif + + float *sumbuf1ptr; + float *sumbuf2ptr; + + memcpy(&(x->inbuf[x->inbufpos]), in, n*sizeof(float)); // gather a block of input into input buffer + x->inbufpos += n; + if (x->inbufpos >= x->partsize) { + // input buffer is full, so we begin a new cycle + + if (x->pd_blocksize != n) { + // the patch's blocksize has change since we last dealt the work + x->pd_blocksize = n; + partconv_deal_work(x); + } + + x->inbufpos = 0; + x->curcall = 0; + x->curpart = 0; + memcpy(x->input_td, x->inbuf, x->partsize * sizeof(float)); // copy 'gathering' input buffer into 'transform' buffer + memset(&(x->input_td[x->partsize]), 0, (x->paddedsize - x->partsize) * sizeof(float)); // pad + + fftwf_execute(x->input_plan); // transform the input + + // everything has been read out of prev sumbuf, so clear it + memset(x->sumbuf->prev->td, 0, x->paddedsize * sizeof(float)); + + // advance sumbuf pointers + x->sumbuf = x->sumbuf->next; + x->sumbuf->readpos = 0; + x->sumbuf->prev->readpos = x->partsize; + } + + // convolve this call's portion of partitions + endpart = x->curpart + x->parts_per_call[x->curcall]; + if (endpart > x->nparts) // FIXME does this ever happen? + endpart = x->nparts; + for (p = x->curpart; p < endpart; p++) { + // multiply the input block by the partition, accumulating the result in the appropriate sumbuf +#ifdef USE_SSE +#include "sse-conv.inc.c" +#else + cursumbuf_fd = x->sumbufs[(x->sumbuf->index + p) % x->nsumbufs].fd; + input_fd = x->input_fd; + irpart_fd = x->irpart_fd[p]; + + for (k = 0; k < x->nbins; k++) { + + cursumbuf_fd[k][0] + += + ( input_fd[k][0] * irpart_fd[k][0] + - input_fd[k][1] * irpart_fd[k][1]); + + cursumbuf_fd[k][1] + += + ( input_fd[k][0] * irpart_fd[k][1] + + input_fd[k][1] * irpart_fd[k][0]); + } +#endif + } + x->curpart = p; + + // The convolution of the fresh block of input with the first partition of the IR + // is the last thing that gets summed into the current sumbuf before it gets IFFTed and starts being output. + // This happens during the first call of every cycle. + if (x->curcall == 0) { + // current sumbuf has been filled, so transform it + // Output loop will begin to read it and sum it with the last one + fftwf_execute(x->sumbuf->plan); + } + + // we're summing and outputting the first half of the most recently IFFTed sumbuf + // and the second half of the previous one + sumbuf1ptr = &(x->sumbuf->td[x->sumbuf->readpos]); + sumbuf2ptr = &(x->sumbuf->prev->td[x->sumbuf->prev->readpos]); + for (i = 0; i < n; i++) { + out[i] = (sumbuf1ptr[i] + sumbuf2ptr[i]) * x->scale; + } + x->sumbuf->readpos += n; + x->sumbuf->prev->readpos += n; + + x->curcall++; + + return (w+5); +} + +#endif // __VEC__ + +static void partconv_free(t_partconv *x) +{ + int i; + + fftwf_free(x->inbuf); + for (i = 0; i < x->nparts; i++) + fftwf_free(x->irpart_td[i]); + fftwf_free(x->input_td); + fftwf_destroy_plan(x->input_plan); + for (i = 0; i < x->nsumbufs; i++) { + fftwf_free(x->sumbufs[i].fd); + fftwf_destroy_plan(x->sumbufs[i].plan); + } +} + +static void partconv_set(t_partconv *x, t_symbol *s) +{ + int i; + int j; + t_garray *arrayobj; + t_float *array; + int arraysize; + int arraypos; + + // get the array from pd + x->arrayname = s; + if ( ! (arrayobj = (t_garray *)pd_findbyclass(x->arrayname, garray_class))) { + if (*x->arrayname->s_name) { + pd_error(x, "partconv~: %s: no such array", x->arrayname->s_name); + return; + } + } else if ( ! garray_getfloatarray(arrayobj, &arraysize, &array)) { + pd_error(x, "%s: bad template", x->arrayname->s_name); + return; + } + + // if the IR has already been prepared, free everything first + if (x->ir_prepared == 1) { + partconv_free(x); + } + + // caculate number of partitions + x->nparts = arraysize / x->partsize; + if (arraysize % x->partsize != 0) + x->nparts++; + if (x->nparts > MAXPARTS) + x->nparts = MAXPARTS; + + // allocate, fill, pad, and transform each IR partition + for (arraypos = 0, i = 0; i < x->nparts; i++) { + x->irpart_td[i] = fftwf_malloc(sizeof(float) * x->paddedsize); + x->irpart_fd[i] = (fftwf_complex *) x->irpart_td[i]; + x->irpart_plan = fftwf_plan_dft_r2c_1d(x->fftsize, x->irpart_td[i], x->irpart_fd[i], FFTW_MEASURE); + for (j = 0; j < x->partsize && arraypos < arraysize; j++, arraypos++) { + x->irpart_td[i][j] = array[arraypos]; + } + for ( ; j < x->paddedsize; j++) { + x->irpart_td[i][j] = 0; + } + fftwf_execute(x->irpart_plan); + fftwf_destroy_plan(x->irpart_plan); + // now, x->irpart[i] contains the DFT of the ith partition of the impulse response. + } + + x->inbuf = fftwf_malloc(sizeof(float) * x->partsize); + x->inbufpos = 0; + + // allocate buffer for DFT of padded input + x->input_td = fftwf_malloc(sizeof(float) * x->paddedsize); // float array into which input block is copied and padded + x->input_fd = (fftwf_complex *) x->input_td; // fftwf_complex pointer to the same array to facilitate in-place fft + x->input_plan = fftwf_plan_dft_r2c_1d(x->fftsize, x->input_td, x->input_fd, FFTW_MEASURE); + + // set up circular list/array of buffers for accumulating results of convolution + x->nsumbufs = x->nparts + 2; + x->sumbuf = &(x->sumbufs[0]); + for (i = 0; i < x->nsumbufs; i++) { + x->sumbufs[i].index = i; + x->sumbufs[i].fd = fftwf_malloc(sizeof(float) * x->paddedsize); + memset(x->sumbufs[i].fd, 0, sizeof(float) * x->paddedsize); + x->sumbufs[i].td = (float *) x->sumbufs[i].fd; + x->sumbufs[i].plan = fftwf_plan_dft_c2r_1d(x->fftsize, x->sumbufs[i].fd, x->sumbufs[i].td, FFTW_MEASURE); + x->sumbufs[i].readpos = 0; + } + x->sumbufs[0].next = &(x->sumbufs[1]); + x->sumbufs[0].prev = &(x->sumbufs[x->nsumbufs - 1]); + for (i = 1; i < x->nsumbufs; i++) { + x->sumbufs[i].next = &(x->sumbufs[(i + 1) % x->nsumbufs]); + x->sumbufs[i].prev = &(x->sumbufs[i - 1]); + } + + partconv_deal_work(x); + x->curcall = 0; + x->curpart = 0; + + post("partconv~: using %s in %d partitions with FFT-size %d", x->arrayname->s_name, x->nparts, x->fftsize); + x->ir_prepared = 1; +} + +static void partconv_dsp(t_partconv *x, t_signal **sp) +{ + // if the ir array has not been prepared, prepare it + if (x->ir_prepared == 0) { + partconv_set(x, x->arrayname); + } + + dsp_add(partconv_perform, 4, x, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); +} + +static void *partconv_new(t_symbol *s, int argc, t_atom *argv) +{ + t_partconv *x = (t_partconv *)pd_new(partconv_class); + + outlet_new(&x->x_obj, gensym("signal")); + + if (argc != 2) { + post("argc = %d", argc); + error("partconv~: usage: [partconv~ <arrayname> <partsize>]\n\t- partition size must be a power of 2 >= blocksize"); + return NULL; + } + + x->arrayname = atom_getsymbol(argv); + x->partsize = atom_getfloatarg(1, argc, argv); + if (x->partsize <= 0 || x->partsize != (1 << ilog2(x->partsize))) + { + error("partconv~: partition size not a power of 2"); + return NULL; + } + x->fftsize = 2 * x->partsize; + x->scale = 1 / ((float) x->fftsize); + + // need 2*(n/2+1) float array for in-place transform, where n is fftsize. +#ifdef USE_SSE + // for sse, make it a multiple of 8, because we pull in 4 bins at a time and don't want to get a segfault + x->paddedsize = 2 * (x->fftsize / 2 + 4); +#else + x->paddedsize = 2 * (x->fftsize / 2 + 1); +#endif + x->nbins = x->fftsize / 2 + 1; + x->ir_prepared = 0; + x->pd_blocksize = sys_getblksize(); + + return (x); +} + +void partconv_tilde_setup(void) +{ + partconv_class = class_new(gensym("partconv~"), (t_newmethod)partconv_new, + (t_method)partconv_free, sizeof(t_partconv), 0, A_GIMME, 0); + class_addmethod(partconv_class, nullfn, gensym("signal"), 0); + class_addmethod(partconv_class, (t_method) partconv_dsp, gensym("dsp"), 0); + class_addmethod(partconv_class, (t_method) partconv_set, gensym("set"), A_DEFSYMBOL, 0); +} diff --git a/partconv~.dsp b/partconv~.dsp new file mode 100644 index 0000000..a5a952d --- /dev/null +++ b/partconv~.dsp @@ -0,0 +1,114 @@ +# Microsoft Developer Studio Project File - Name="partconv" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** NICHT BEARBEITEN **
+
+# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
+
+CFG=partconv - Win32 Debug
+!MESSAGE Dies ist kein gültiges Makefile. Zum Erstellen dieses Projekts mit NMAKE
+!MESSAGE verwenden Sie den Befehl "Makefile exportieren" und führen Sie den Befehl
+!MESSAGE
+!MESSAGE NMAKE /f "partconv.mak".
+!MESSAGE
+!MESSAGE Sie können beim Ausführen von NMAKE eine Konfiguration angeben
+!MESSAGE durch Definieren des Makros CFG in der Befehlszeile. Zum Beispiel:
+!MESSAGE
+!MESSAGE NMAKE /f "partconv.mak" CFG="partconv - Win32 Debug"
+!MESSAGE
+!MESSAGE Für die Konfiguration stehen zur Auswahl:
+!MESSAGE
+!MESSAGE "partconv - Win32 Release" (basierend auf "Win32 (x86) Dynamic-Link Library")
+!MESSAGE "partconv - Win32 Debug" (basierend auf "Win32 (x86) Dynamic-Link Library")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+MTL=midl.exe
+RSC=rc.exe
+
+!IF "$(CFG)" == "partconv - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PARTCONV_EXPORTS" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PARTCONV_EXPORTS" /YX /FD /c
+# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
+# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
+# ADD BASE RSC /l 0xc07 /d "NDEBUG"
+# ADD RSC /l 0xc07 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386
+
+!ELSEIF "$(CFG)" == "partconv - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PARTCONV_EXPORTS" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PARTCONV_EXPORTS" /YX /FD /GZ /c
+# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
+# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
+# ADD BASE RSC /l 0xc07 /d "_DEBUG"
+# ADD RSC /l 0xc07 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib pd.lib fftw3.lib /nologo /dll /debug /machine:I386 /out:"C:\Programme\pd_0.37\extra\partconv.dll" /pdbtype:sept
+
+!ENDIF
+
+# Begin Target
+
+# Name "partconv - Win32 Release"
+# Name "partconv - Win32 Debug"
+# Begin Group "Quellcodedateien"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\partconv.c
+# End Source File
+# End Group
+# Begin Group "Header-Dateien"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\fftw3.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\m_pd.h
+# End Source File
+# End Group
+# Begin Group "Ressourcendateien"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/partconv~.dsw b/partconv~.dsw new file mode 100644 index 0000000..940b7a9 --- /dev/null +++ b/partconv~.dsw @@ -0,0 +1,29 @@ +Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNUNG: DIESE ARBEITSBEREICHSDATEI DARF NICHT BEARBEITET ODER GELÖSCHT WERDEN!
+
+###############################################################################
+
+Project: "partconv"=.\partconv.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
diff --git a/partconv~.libs b/partconv~.libs new file mode 100644 index 0000000..67c2eba --- /dev/null +++ b/partconv~.libs @@ -0,0 +1 @@ +-lfftw3 diff --git a/partconv~.vcproj b/partconv~.vcproj new file mode 100644 index 0000000..ce1bf88 --- /dev/null +++ b/partconv~.vcproj @@ -0,0 +1,169 @@ +<?xml version="1.0" encoding = "Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="7.00"
+ Name="partconv~"
+ SccProjectName=""
+ SccLocalPath="">
+ <Platforms>
+ <Platform
+ Name="Win32"/>
+ </Platforms>
+ <Configurations>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory=".\Release"
+ IntermediateDirectory=".\Release"
+ ConfigurationType="2"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ CharacterSet="2">
+ <Tool
+ Name="VCCLCompilerTool"
+ InlineFunctionExpansion="1"
+ PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;PARTCONV_EXPORTS"
+ StringPooling="TRUE"
+ RuntimeLibrary="0"
+ EnableFunctionLevelLinking="TRUE"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderFile=".\Release/partconv.pch"
+ AssemblerListingLocation=".\Release/"
+ ObjectFile=".\Release/"
+ ProgramDataBaseFileName=".\Release/"
+ WarningLevel="3"
+ SuppressStartupBanner="TRUE"/>
+ <Tool
+ Name="VCCustomBuildTool"/>
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalOptions="/MACHINE:I386"
+ AdditionalDependencies="odbc32.lib odbccp32.lib"
+ OutputFile=".\Release/partconv.dll"
+ LinkIncremental="1"
+ SuppressStartupBanner="TRUE"
+ ProgramDatabaseFile=".\Release/partconv.pdb"
+ ImportLibrary=".\Release/partconv.lib"/>
+ <Tool
+ Name="VCMIDLTool"
+ PreprocessorDefinitions="NDEBUG"
+ MkTypLibCompatible="TRUE"
+ SuppressStartupBanner="TRUE"
+ TargetEnvironment="1"
+ TypeLibraryName=".\Release/partconv.tlb"/>
+ <Tool
+ Name="VCPostBuildEventTool"/>
+ <Tool
+ Name="VCPreBuildEventTool"/>
+ <Tool
+ Name="VCPreLinkEventTool"/>
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="3079"/>
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"/>
+ <Tool
+ Name="VCWebDeploymentTool"/>
+ </Configuration>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory=".\Debug"
+ IntermediateDirectory=".\Debug"
+ ConfigurationType="2"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ CharacterSet="2">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ PreprocessorDefinitions="WIN32;_WINDOWS;_USRDLL;PARTCONV_EXPORTS;NDEBUG;_WINDOWS;_USRDLL;LEST_I;NT"
+ BasicRuntimeChecks="0"
+ RuntimeLibrary="0"
+ DisableLanguageExtensions="TRUE"
+ UsePrecompiledHeader="0"
+ PrecompiledHeaderFile=""
+ AssemblerOutput="2"
+ AssemblerListingLocation=".\Debug/"
+ ObjectFile=".\Debug/"
+ ProgramDataBaseFileName=".\Debug/"
+ WarningLevel="3"
+ SuppressStartupBanner="TRUE"
+ Detect64BitPortabilityProblems="TRUE"
+ DebugInformationFormat="0"
+ CompileAs="1"
+ ShowIncludes="TRUE"/>
+ <Tool
+ Name="VCCustomBuildTool"/>
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalOptions="/MACHINE:I386"
+ AdditionalDependencies="odbc32.lib odbccp32.lib pd.lib fftw3.lib"
+ OutputFile="..\partconv~.dll"
+ LinkIncremental="1"
+ SuppressStartupBanner="TRUE"
+ GenerateDebugInformation="TRUE"
+ ProgramDatabaseFile=".\Debug/partconv.pdb"
+ ImportLibrary=".\Debug/partconv.lib"
+ TargetMachine="1"/>
+ <Tool
+ Name="VCMIDLTool"
+ PreprocessorDefinitions="_DEBUG"
+ MkTypLibCompatible="TRUE"
+ SuppressStartupBanner="TRUE"
+ TargetEnvironment="1"
+ TypeLibraryName=".\Debug/partconv.tlb"/>
+ <Tool
+ Name="VCPostBuildEventTool"/>
+ <Tool
+ Name="VCPreBuildEventTool"/>
+ <Tool
+ Name="VCPreLinkEventTool"/>
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="_DEBUG"
+ Culture="3079"/>
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"/>
+ <Tool
+ Name="VCWebDeploymentTool"/>
+ </Configuration>
+ </Configurations>
+ <Files>
+ <Filter
+ Name="Quellcodedateien"
+ Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat">
+ <File
+ RelativePath="partconv.c">
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;PARTCONV_EXPORTS;_MBCS;$(NoInherit)"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;PARTCONV_EXPORTS;_MBCS;$(NoInherit)"
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Header-Dateien"
+ Filter="h;hpp;hxx;hm;inl">
+ <File
+ RelativePath="fftw3.h">
+ </File>
+ <File
+ RelativePath="m_pd.h">
+ </File>
+ </Filter>
+ <Filter
+ Name="Ressourcendateien"
+ Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
@@ -0,0 +1,392 @@ +/* Copyright 2003 Benjamin R. Saylor <bensaylor@fastmail.fm> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <math.h> +#include <fftw3.h> +#include "m_pd.h" + +// FIXME: +// set array when dsp is turned on +// get rid of shiftbuf, just save values that will be needed next before overwriting them +// cubic interp +// use float fftw? +// performance testing +// what if there are 2 transients less than fftsize apart? second one might get smeared. +// compare sound with phaselockedvoc.pd +// detect transients +// peaks + noise +// other phase locking methods +// use floats? +// use in-place? +// if don't have an array, call setarray(x->arrayname) +// window size and fft size independent (what is gained by zero-padding?) +// error if parent blocksize is larger than hopsize +// slowly return window to true position after centering around a transient? +// use fewer fft arrays? + +// DONE: +// use FFTW_MEASURE + +static t_class *pvoc_class; + +typedef struct _pvoc { + t_object x_obj; + t_symbol *arrayname; + t_garray *arrayobj; + t_float *array; + int arraysize; + double *window; + int fftsize; + int overlap; + int hopsize; // = fftsize / overlap + int trans[256]; // sample indices of transients + int ntrans; // number of transients + int wastrans; // there was a transient in the left half of the window during the previous frame + double phaselocking; + fftw_plan fftplan; + fftw_plan fft2plan; + fftw_plan ifftplan; + double *fftin; + double *fft2in; + double *ifftout; + fftw_complex *fftout; + fftw_complex *fft2out; + fftw_complex *ifftin; + fftw_complex *shiftbuf; + double *outbuf; + int outbufpos; +} t_pvoc; + +// if there is a transient between samples a and b, return its position, else return -1 +static inline int transient_between(t_pvoc *x, int a, int b) +{ + // linear search for now: FIXME + int i; + for (i = 0; i < x->ntrans; i++) + if (a <= x->trans[i] && b >= x->trans[i]) + return x->trans[i]; + return -1; +} + +#if 1 +static inline double interpolate(t_pvoc *x, double t) +{ + // linear interpolation for now: FIXME + if (t < 0 || t > (x->arraysize - 1)) + return 0.0; + else { + int x_1 = t; + double y_1 = x->array[x_1]; + double y_2 = x->array[x_1 + 1]; + + return (y_2 - y_1) * (t - x_1) + y_1; + } +} +#else +static inline double interpolate(t_pvoc *x, double t) +{ + // FIXME check bounds (can't think now) + int truncphase = (int) x->phase; + double fr = x->phase - ((double) truncphase); + double inm1 = x->ifftout[truncphase - 1]; + double in = x->ifftout[truncphase + 0]; + double inp1 = x->ifftout[truncphase + 1]; + double inp2 = x->ifftout[truncphase + 2]; + + // taken from swh-plugins-0.4.0/ladspa-util.h cube_interp, made to use doubles instead since doubles are what i'm using for some reason + return in + 0.5 * fr * (inp1 - inm1 + + fr * (4.0 * inp1 + 2.0 * inm1 - 5.0 * in - inp2 + + fr * (3.0 * (in - inp1) - inm1 + inp2))); +} +#endif + +static t_int *pvoc_perform(t_int *w) +{ + t_pvoc *x = (t_pvoc *)(w[1]); + t_float *in1 = (t_float *)(w[2]); + t_float *in2 = (t_float *)(w[3]); + t_float *out = (t_float *)(w[4]); + int n = (int)(w[5]); + double t; + double pitchshift; + int transientpos; + int desmear; + double framestart; + double frameend; + int i; // 0 to n -type iterator + int j; // start to end -type iterator + int k; // bin iterator + double xlook; // iterator for interpolated table lookup + + // if we are at the start of a new frame... + if (x->outbufpos % x->hopsize == 0) { + + // don't desmear this frame by default + desmear = 0; + + // sample the input signals (FIXME just sample these in the beginning..) + t = in1[0]; // time position + pitchshift = in2[0]; // pitch shift + + // set the frame boundaries with the desired time pos in the middle + framestart = t - (pitchshift * x->fftsize / 2); + frameend = framestart + pitchshift * x->fftsize; + + // prepare to de-smear transients + transientpos = transient_between(x, (int) framestart, (int) frameend); + if (transientpos != -1) { + // there is a transient in this frame +#if 0 + if (transientpos > t) { + // there is a transient in the right half of the window: + // --> move the window left until the transient is outside it + frameend = transientpos; + framestart = frameend - x->fftsize; + x->wastrans = 0; + } else if ( ! x->wastrans) { + // there is a transient in the left half of the window, + // and there was no transient there during the previous frame: + // --> center the window around the transient and remember to desmear this frame + framestart = transientpos - (x->fftsize / 2); + frameend = framestart + x->fftsize; + desmear = 1; + x->wastrans = 1; + } else + x->wastrans = 1; +#else + // this simpler method turns out to sound better (timing sounds more accurate, no "frozen" sound preceding transients) + if ( ! x->wastrans) { + // there is a transient in the window, + // and there wasn't during the previous frame: + // --> center the window around the transient and remember to desmear this frame + framestart = transientpos - (pitchshift * x->fftsize / 2); + desmear = 1; + } + x->wastrans = 1; +#endif + } else + x->wastrans = 0; + + // interpolate-read the array from framestart to frameend into fftin, windowing it + for (i = 0, xlook = framestart; i < x->fftsize; xlook += pitchshift, i++) { + x->fftin[i] = interpolate(x, xlook) * x->window[i]; + } + + // hop forward and read the second frame into fft2in + // FIXME merge the two loops? + framestart += pitchshift * x->hopsize; + for (i = 0, xlook = framestart; i < x->fftsize; xlook += pitchshift, i++) { + x->fft2in[i] = interpolate(x, xlook) * x->window[i]; + } + + // do the ffts + fftw_execute(x->fftplan); + fftw_execute(x->fft2plan); + + if ( ! desmear) { + // Miller Puckette's phase modification math (translation from 09.pvoc.pd and 10.phaselockedvoc.pd) + + double a, b, r, c, d; + + // propagate phase + for (k = 0; k < (x->fftsize / 2 + 1); k++) { + a = x->ifftin[k][0] * x->fftout[k][0] + x->ifftin[k][1] * x->fftout[k][1] + 0.00000000000000000001; + b = x->ifftin[k][1] * x->fftout[k][0] - x->ifftin[k][0] * x->fftout[k][1]; + r = 1 / sqrt(a * a + b * b); + c = a * r; + d = b * r; + x->shiftbuf[k][0] = c * x->fft2out[k][0] - d * x->fft2out[k][1]; + x->shiftbuf[k][1] = c * x->fft2out[k][1] + d * x->fft2out[k][0]; + } + + // don't phase-lock the first bin + x->ifftin[0][0] = x->shiftbuf[0][0]; + x->ifftin[0][1] = x->shiftbuf[0][1]; + + // phase-lock + for (k = 1; k < (x->fftsize / 2); k++) { + x->ifftin[k][0] = x->shiftbuf[k][0] - x->phaselocking * (x->shiftbuf[k - 1][0] + x->shiftbuf[k + 1][0]); + x->ifftin[k][1] = x->shiftbuf[k][1] - x->phaselocking * (x->shiftbuf[k - 1][1] + x->shiftbuf[k + 1][1]); + } + + // don't phase-lock the last bin + x->ifftin[x->fftsize / 2][0] = x->shiftbuf[x->fftsize / 2][0]; + x->ifftin[x->fftsize / 2][1] = x->shiftbuf[x->fftsize / 2][1]; + + } else { + // this frame is to be de-smeared, which means don't modify the phases, just preserve the original phases + for (k = 0; k < (x->fftsize / 2 + 1); k++) { + x->ifftin[k][0] = x->fftout[k][0]; + x->ifftin[k][1] = x->fftout[k][1]; + } + } + + // do the ifft + fftw_execute(x->ifftplan); + + // add into output buffer, windowing and normalizing first (divide by blocksize) + for (i = 0, j = x->outbufpos; i < x->fftsize; i++, j++) { + x->outbuf[j % x->fftsize] += x->ifftout[i] / x->fftsize * x->window[i]; + } + } + + // output one block of the output buffer + for (i = 0, j = x->outbufpos; i < n; i++, j++) { + out[i] = x->outbuf[j % x->fftsize]; + x->outbuf[j % x->fftsize] = 0; // zero the part of the buffer that was just output + } + + // move the output buffer pointer forward by one block + x->outbufpos = (x->outbufpos + n) % x->fftsize; + + return (w+6); +} + +static void pvoc_dsp(t_pvoc *x, t_signal **sp) +{ + dsp_add(pvoc_perform, 5, x, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n); +} + +// adapted from jsarlo's windowing library +// Hanning +static void makewindow(double *w, int n) +{ + int i; + double xshift = n / 2.0; + double x; + for (i = 0; i < n; i++) { + x = (i - xshift) / xshift; + w[i] = 0.5 * (1 + cos(M_PI * x)); + } +} + +static void setarray(t_pvoc *x, t_symbol *s) +{ + x->arrayname = s; + if ( ! (x->arrayobj = (t_garray *)pd_findbyclass(x->arrayname, garray_class))) { + if (*x->arrayname->s_name) pd_error(x, "pvoc~: %s: no such array", x->arrayname->s_name); + x->array = NULL; + x->arraysize = 0; + } else if ( ! garray_getfloatarray(x->arrayobj, &x->arraysize, &x->array)) { + error("%s: bad template", x->arrayname->s_name); + x->array = NULL; + x->arraysize = 0; + } else { + garray_usedindsp(x->arrayobj); + } +} + +static void locking(t_pvoc *x, t_floatarg f) +{ + x->phaselocking = f; +} + +// takes a list of sample positions of transients to be de-smeared +static void transients(t_pvoc *x, t_symbol *s, int argc, t_atom *argv) +{ + int i; + + x->ntrans = argc; + for (i = 0; i < x->ntrans; i++) + x->trans[i] = atom_getfloatarg(i, argc, argv); +} + +// for clarity (same as "transients" with no args) +static void notransients(t_pvoc *x) +{ + x->ntrans = 0; +} + +static void *pvoc_new(t_symbol *s, int argc, t_atom *argv) +{ + t_pvoc *x = (t_pvoc *)pd_new(pvoc_class); + int i; + + inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_signal, &s_signal); // pitch-shift inlet + outlet_new(&x->x_obj, gensym("signal")); + + if (argc != 3) { + post("argc = %d", argc); + error("pvoc~: usage: [pvoc~ <arrayname> <fftsize> <overlap>]"); + return NULL; + } + + x->fftsize = atom_getfloatarg(1, argc, argv); + x->overlap = atom_getfloatarg(2, argc, argv); + x->hopsize = x->fftsize / x->overlap; + x->ntrans = 0; + x->wastrans = 0; + x->phaselocking = 0; + + // get the source array + setarray(x, atom_getsymbol(argv)); + + // set up output ring buffer + x->outbuf = getbytes(sizeof(double) * x->fftsize); + x->outbufpos = 0; + for (i = 0; i < x->fftsize; i++) + x->outbuf[i] = 0; + + // make table for window function + x->window = getbytes(sizeof(double) * x->fftsize); + makewindow(x->window, x->fftsize); + + // set up fftw stuff + x->fftin = fftw_malloc(sizeof(double) * x->fftsize); + x->fft2in = fftw_malloc(sizeof(double) * x->fftsize); + x->ifftout = fftw_malloc(sizeof(double) * x->fftsize); + x->fftout = fftw_malloc(sizeof(fftw_complex) * (x->fftsize / 2 + 1)); + x->fft2out = fftw_malloc(sizeof(fftw_complex) * (x->fftsize / 2 + 1)); + x->ifftin = fftw_malloc(sizeof(fftw_complex) * (x->fftsize / 2 + 1)); + x->shiftbuf = fftw_malloc(sizeof(fftw_complex) * (x->fftsize / 2 + 1)); + for (i = 0; i < (x->fftsize / 2 + 1); i++) { + x->ifftin[i][0] = 0; // need to start the phases from zero + x->ifftin[i][1] = 0; + } + x->fftplan = fftw_plan_dft_r2c_1d(x->fftsize, x->fftin, x->fftout, FFTW_MEASURE); + x->fft2plan = fftw_plan_dft_r2c_1d(x->fftsize, x->fft2in, x->fft2out, FFTW_MEASURE); + x->ifftplan = fftw_plan_dft_c2r_1d(x->fftsize, x->ifftin, x->ifftout, FFTW_MEASURE | FFTW_PRESERVE_INPUT); + + return (x); +} + +static void pvoc_free(t_pvoc *x) +{ + freebytes(x->outbuf, sizeof(double) * x->fftsize); + freebytes(x->window, sizeof(double) * x->fftsize); + fftw_free(x->fftin); + fftw_free(x->fft2in); + fftw_free(x->ifftout); + fftw_free(x->fftout); + fftw_free(x->fft2out); + fftw_free(x->ifftin); + fftw_free(x->shiftbuf); + fftw_destroy_plan(x->fftplan); + fftw_destroy_plan(x->fft2plan); + fftw_destroy_plan(x->ifftplan); +} + +void pvoc_tilde_setup(void) +{ + pvoc_class = class_new(gensym("pvoc~"), (t_newmethod)pvoc_new, (t_method)pvoc_free, sizeof(t_pvoc), 0, A_GIMME, 0); + class_addmethod(pvoc_class, nullfn, gensym("signal"), 0); + class_addmethod(pvoc_class, (t_method) pvoc_dsp, gensym("dsp"), 0); + class_addmethod(pvoc_class, (t_method) setarray, gensym("setarray"), A_DEFSYMBOL, 0); + class_addmethod(pvoc_class, (t_method) locking, gensym("locking"), A_DEFFLOAT, 0); + class_addmethod(pvoc_class, (t_method) transients, gensym("transients"), A_GIMME, 0); + class_addmethod(pvoc_class, (t_method) notransients, gensym("notransients"), 0); +} diff --git a/pvoc~.libs b/pvoc~.libs new file mode 100644 index 0000000..67c2eba --- /dev/null +++ b/pvoc~.libs @@ -0,0 +1 @@ +-lfftw3 diff --git a/sse-conv.inc.c b/sse-conv.inc.c new file mode 100644 index 0000000..cc62f30 --- /dev/null +++ b/sse-conv.inc.c @@ -0,0 +1,77 @@ +// Ben Saylor 2005-08-10 +// attempt at an SSE version of the convolution loop. +// Results sound weird, and CPU usage is about the same. :( + + cursumbuf_fd = (v4sf *) x->sumbufs[(x->sumbuf->index + p) % x->nsumbufs].fd; + input_fd = (v4sf *) x->input_fd; + irpart_fd = (v4sf *) x->irpart_fd[p]; + nvecs = x->paddedsize/4; + + for (v1=0, v2=1; v2 < nvecs; v1+=2, v2+=2) { + // v1 is the index of the first 4-float vector (v4sf) to process (v2 is the second) + // input_fd, etc. are v4sf pointers. + // pull in 4 bins (8 floats) (2 v4sfs) at a time. + + // (a + bi) * (c + di) = (ac - bd) + (ad + bc)i + + asm volatile ( + // load inputs + "movaps %[in1], %%xmm0\n\t" + "movaps %[in2], %%xmm6\n\t" + "movaps %[ir1], %%xmm2\n\t" + "movaps %[ir2], %%xmm7\n\t" + "movaps %%xmm0, %%xmm1\n\t" + "movaps %%xmm2, %%xmm3\n\t" + // xmm0 = xmm1 = [a1 b1 a2 b2] + // xmm6 = [a3 b3 a4 b4] + // xmm2 = xmm3 = [c1 d1 c2 d2] + // xmm7 = [c3 d3 c4 d4] + + // de-interleave real and imaginary parts + "shufps $0x88, %%xmm6, %%xmm0\n\t" // xmm0 = [a1 a2 a3 a4] + "shufps $0xDD, %%xmm6, %%xmm1\n\t" // xmm1 = [b1 b2 b3 b4] + "shufps $0x88, %%xmm7, %%xmm2\n\t" // xmm2 = [c1 c2 c3 c4] + "shufps $0xDD, %%xmm7, %%xmm3\n\t" // xmm3 = [d1 d2 d3 d4] + + // load output (early, maybe it will help) + "movaps %[out1], %%xmm6\n\t" + "movaps %[out2], %%xmm7\n\t" + + // compute the real part of the complex product + // (work on copies of xmm0 and xmm1, because we need to keep them) + "movaps %%xmm0, %%xmm4\n\t" // xmm4 = [a1 a2 a3 a4] + "mulps %%xmm2, %%xmm4\n\t" // xmm4 = [a1c1 a2c2 a3c3 a4c4] + "movaps %%xmm1, %%xmm5\n\t" // xmm5 = [b1 b2 b3 b4] + "mulps %%xmm3, %%xmm5\n\t" // xmm5 = [b1d1 b2d2 b3d3 b4d4] + "subps %%xmm5, %%xmm4\n\t" // xmm4 = (ac - bd) [r1 r2 r3 r4] + + // compute the imaginary part of the complex product + "mulps %%xmm3, %%xmm0\n\t" // xmm0 = [a1d1 a2d2 a3d3 a4d4] + "mulps %%xmm2, %%xmm1\n\t" // xmm1 = [b1c1 b2c2 b3c3 b4c4] + "addps %%xmm1, %%xmm0\n\t" // xmm0 = (ad + bc) [i1 i2 i3 i4] + + // re-interleave + "movaps %%xmm4, %%xmm5\n\t" // xmm5 = [r1 r2 r3 r4] + "unpcklps %%xmm0, %%xmm4\n\t" // xmm4 = [r1 i1 r2 i2] + "unpckhps %%xmm5, %%xmm0\n\t" // xmm0 = [r3 i3 r4 i4] + + // add into sumbuf + "addps %%xmm4, %%xmm6\n\t" + "addps %%xmm0, %%xmm7\n\t" + "movaps %%xmm6, %[out1]\n\t" + "movaps %%xmm7, %[out2]" + + // output/input operands + : [out1] "+m" (cursumbuf_fd[v1]), + [out2] "+m" (cursumbuf_fd[v2]) + + // input operands + : [in1] "m" (input_fd[v1]), + [in2] "m" (input_fd[v2]), + [ir1] "m" (irpart_fd[v1]), + [ir2] "m" (irpart_fd[v2]) + + // clobbered registers + : "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); + } |