aboutsummaryrefslogtreecommitdiff
path: root/detoxk/detox.c
diff options
context:
space:
mode:
Diffstat (limited to 'detoxk/detox.c')
-rw-r--r--detoxk/detox.c1
1 files changed, 1 insertions, 0 deletions
diff --git a/detoxk/detox.c b/detoxk/detox.c
new file mode 100644
index 0000000..9889e45
--- /dev/null
+++ b/detoxk/detox.c
@@ -0,0 +1 @@
+/*__________________________________________________________________________ detox á extract value/content from tag-structured symbol Copyright (C) 2003 - 2006 jan schacher This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA pd-port 20060517 revised tree output 20040712 initial build 20030502 ____________________________________________________________________________*/ #include "m_pd.h" #include <string.h> #include <stdlib.h> #include <ctype.h> #include <locale.h> #include <stdio.h> #define VERSION "1.3" #define CLIP(x,a,b) (x)=(x)<(a)?(a):(x)>(b)?(b):(x) #define MAXLENGTH 4096 typedef struct _detox { t_object ob; t_atom t_tree[257], t_attrpair[2]; t_atom t_comp, t_blip; t_atom t_checkat[2]; char t_charbuf[MAXLENGTH]; long t_treecount; short t_debug; short t_mode; void *s_outlet, *s_outlet2, *s_outlet3, *s_outlet4; } t_detox; t_symbol *ps_nothing; void *detox_class; void *detox_new(t_symbol *s, short argc, t_atom *argv); void detox_bang(t_detox *x); void detox_float(t_detox *x, t_float f); void detox_list(t_detox *x, t_symbol *s, short argc, t_atom *argv); void detox_anything(t_detox *x, t_symbol *s, short argc, t_atom *argv); void detox_free(t_detox *x); void detox_reset(t_detox *x); void detox_debug(t_detox *x, float f); void detox_mode(t_detox *x, float f); void detox_anything(t_detox *x, t_symbol *s, short argc, t_atom *argv); void detox_action(t_detox *x); void detox_fixsymbol(t_detox *x); void version(t_detox *x); void detox_setup(void) { detox_class = class_new(gensym("detox"), (t_newmethod)detox_new, (t_method)detox_free, sizeof(t_detox), 0, A_GIMME, 0); class_addsymbol(detox_class, (t_method)detox_anything); class_addmethod(detox_class, (t_method)detox_list, gensym("list"), A_GIMME,0); class_addmethod(detox_class, (t_method)detox_debug, gensym("debug"), A_FLOAT, 0); class_addmethod(detox_class, (t_method)detox_mode, gensym("mode"), A_FLOAT, 0); class_addmethod(detox_class, (t_method)detox_reset, gensym("reset"), 0); class_addmethod(detox_class, (t_method)version, gensym("version"), 0); post("- detox - jasch - "__DATE__" "); ps_nothing = gensym(""); } void *detox_new(t_symbol *s, short argc, t_atom *argv) { t_detox *x = (t_detox *)pd_new(detox_class); x->s_outlet = outlet_new(&x->ob, NULL); x->s_outlet2 = outlet_new(&x->ob, NULL); x->s_outlet3 = outlet_new(&x->ob, NULL); x->s_outlet4 = outlet_new(&x->ob, gensym("float")); x->t_treecount = 0; x->t_debug = 0; if((argc >= 1)&&(argv[0].a_type == A_FLOAT)){ if(argv[0].a_w.w_float == 0){ x->t_mode = 0; }else{ x->t_mode = 1; } }else{ x->t_mode = 1; } x->t_debug = 0; if(x->t_debug) post("mode is %ld", x->t_mode); x->t_checkat[0].a_type = A_SYMBOL; x->t_checkat[0].a_w.w_symbol = gensym("none"); x->t_checkat[1].a_type = A_SYMBOL; x->t_checkat[1].a_w.w_symbol = gensym("none"); return (x); } void detox_free(t_detox *x) { // notify_free((t_object *)x); } void detox_bang(t_detox *x) { detox_action(x); } void detox_float(t_detox *x, t_float f) { sprintf(x->t_charbuf, "%.6f", f); detox_action(x); } void detox_list(t_detox *x, t_symbol *s, short argc, t_atom *argv) { long i; char temp[MAXLENGTH]; x->t_charbuf[0] = 0; for(i = 0; i < argc; i++){ if(i) strcat(x->t_charbuf, " "); temp[0] = 0; if(argv[i].a_type == A_FLOAT){ sprintf(temp, "%.6f", argv[i].a_w.w_float); }else if(argv[i].a_type == A_SYMBOL){ // check for whitespace in symbol and add back doublequotes if(strchr(argv[i].a_w.w_symbol->s_name, 32) == NULL) { // no whitespace : normal proc strcpy(temp, argv[i].a_w.w_symbol->s_name); } else { // with whitespace : fix it x->t_checkat[0] = argv[i]; detox_fixsymbol(x); strcpy(temp, x->t_checkat[1].a_w.w_symbol->s_name); } } if((strlen(x->t_charbuf) + strlen(temp)) < MAXLENGTH){ strcat(x->t_charbuf, temp); }else{ return; } } detox_action(x); } void detox_anything(t_detox *x, t_symbol *s, short argc, t_atom *argv) { long i; char temp[MAXLENGTH]; x->t_charbuf[0] = 0; strcpy(x->t_charbuf, s->s_name); for(i = 0; i < argc; i++){ strcat(x->t_charbuf, " "); temp[0] = 0; if(argv[i].a_type == A_FLOAT){ sprintf(temp, "%.6f", argv[i].a_w.w_float); }else if(argv[i].a_type == A_SYMBOL){ // check for whitespace in symbol and add back doublequotes if(strchr(argv[i].a_w.w_symbol->s_name, 32) == NULL) { // no whitespace : normal proc strcpy(temp, argv[i].a_w.w_symbol->s_name); } else { // with whitespace : fix it x->t_checkat[0] = argv[i]; detox_fixsymbol(x); strcpy(temp, x->t_checkat[1].a_w.w_symbol->s_name); } } // post("temp is %s", temp); if((strlen(x->t_charbuf) + strlen(temp)) < MAXLENGTH){ strcat(x->t_charbuf, temp); }else{ return; } } detox_action(x); return; } void detox_reset(t_detox *x) { short i; x->t_treecount = 0; for(i=0;i<256; i++) x->t_tree[i].a_w.w_symbol = ps_nothing; } void detox_debug(t_detox *x, float f) { if(f == 0.0){ x->t_debug = 0; }else if(f != 0){ x->t_debug = 1; } } void detox_mode(t_detox *x, float f) { if(f == 0.0){ x->t_mode = 0; }else if(f != 0){ x->t_mode = 1; } } void detox_action(t_detox *x) { t_atom *outlist, *attrpair; // *comp, char local[MAXLENGTH], local2[MAXLENGTH]; char temp[MAXLENGTH]; char tempstring[2]; // for strtok char *ptr1 = NULL, *ptr2 = NULL, *ptr3 = NULL; long i, j, k, last, len, tempcount = 0; short tagtype = 0; // 0 = closed, 1 = open, 2 = closing short outsize = 0; char quotetype[1]; short digitflag = 0; short punctflag = 0; short quoteflag = 0; // short attrpresent = 0; quotetype[0] = '\"'; // init to standard double quotes i = 0; strcpy(tempstring, " "); outlist = x->t_tree; attrpair = x->t_attrpair; x->t_attrpair[0].a_type = A_SYMBOL; x->t_attrpair[1].a_type = A_SYMBOL; // check type and recast on output strcpy(local2, x->t_charbuf); last = strlen(local2); // test for tag // if(x->t_debug) post("input is %s", x->t_charbuf); k = 0; while(isspace(local2[k])){ // remove whitespace/tab/cr/linefeed k++; } // if(x->t_debug) post("k is %ld, last is %ld", k, last); for(i=k, j=0; i<last; i++, j++){ // remove whitespace/tab/cr/linefeed local[j] = local2[i]; } local[j] = 0; // terminate string if((local[0] != '<') || (local[j-1] != '>')){ tagtype = 0; // not a well formed tag if(x->t_debug) post("tagtype 0"); goto content; } if((local[1] == '?') || (local[1] == '!')){ tagtype = 4; // this might be a metatag if(x->t_debug) post("tagtype 4"); goto content; } for(i = 0, ptr1 = NULL, quoteflag = 0; i < (long)(strlen(local) -1); i++) { if((local[i] == '\"') || (local[i] == '\'')){ quoteflag = !quoteflag; } if((local[i] == '/') && (quoteflag == 0)) { ptr1 = &local[i]; } } //ptr1 = strchr(local, '/'); // is it a closing tag ? // bug: if slash is within quotes: should ignore if(x->t_debug) post("ptr1 contains %s", x->t_charbuf); if(ptr1 == NULL){ // we have new open tag tagtype = 2; if(x->t_debug) post("tagtype 2"); }else if((local[1] == '/')){ // is it at beginning tagtype = 3; if(x->t_debug) post("tagtype 3"); }else if(local[1] != '/'){ // is it elsewhere if(x->t_debug) post("tagtype 1"); tagtype = 1; } switch(tagtype){ case 1: // closed tag :: add temporarily to tree ptr1 = strchr(local, '>'); ptr2 = strchr(local, ' '); if(ptr2 != NULL) *ptr2 = 0; if(ptr1 != NULL){ *ptr1 = 0; }else{ return; } len = strlen(local); ptr3 = (char *)memmove((local + 0),(local + 1), len-1); local[len-1] = 0; ptr3 = local; x->t_blip.a_type = A_SYMBOL; x->t_blip.a_w.w_symbol = gensym(ptr3); tempcount = 1; break; case 2: // opening tag :: add to tree, increment count ptr2 = strchr(local, ' '); if(ptr2 != NULL){ *ptr2 = 0; if(x->t_debug) post("inside opening tag w/ whitespace, %s", local); }else{ local[strlen(local) - 1] = 0; // local[strlen(ptr2) -1] = 0; if(x->t_debug) post("inside opening tag no whitespace, %s", local); // return; } len = strlen(local); ptr3 = (char *)memmove((local + 0),(local + 1), len-1); if(x->t_debug) post("ptr3 is %s", ptr3); local[len-1] = 0; ptr3 = local; SETSYMBOL(x->t_tree + x->t_treecount, gensym(ptr3)); x->t_treecount++; x->t_treecount = CLIP(x->t_treecount, 0, 255); break; case 3: // closing tag :: remove from tree, decrement count ptr2 = strchr(local, '>'); if(ptr2 != NULL){ *ptr2 = 0; }else{ return; } len = strlen(local); ptr3 = (char *)memmove((local + 0),(local + 2), len-2); local[len-2] = 0; ptr3 = local; x->t_comp.a_type = A_SYMBOL; x->t_comp.a_w.w_symbol = gensym(ptr3); if(x->t_comp.a_w.w_symbol->s_name == x->t_tree[x->t_treecount-1].a_w.w_symbol->s_name){ x->t_treecount -= 1; x->t_treecount = CLIP(x->t_treecount, 0, 255); }else{ outlet_float(x->s_outlet4, 3); outlet_float(x->s_outlet3, -1); return; } break; } content: outlet_float(x->s_outlet4, tagtype); if(tagtype == 0) return; // the tree if(x->t_treecount > 0){ if(tempcount){ outsize = x->t_treecount + 1; x->t_tree[outsize-1] = x->t_blip; }else{ outsize = x->t_treecount; } outlet_list(x->s_outlet3, 0L, outsize, outlist); // ouput tree }else if(x->t_treecount == 0){ if(tempcount){ outlet_anything(x->s_outlet3, x->t_blip.a_w.w_symbol, 0, 0L); // ouput tree }else{ outlet_float(x->s_outlet3, 0); // ouput tree } } strcpy(local, x->t_charbuf); ptr1 = strchr(local, '>'); // parse content between enclosing tags <tag>blah</tag> if(ptr1 == NULL) goto attributes; *ptr1 = 0; ++ptr1; ptr2 = strchr(ptr1, '<'); if(ptr2 == NULL) goto attributes; *ptr2 = 0; outlet_anything(x->s_outlet2, gensym(ptr1), 0, 0L); attributes: strcpy(local, x->t_charbuf); // retrieve full buffer again switch(x->t_mode) { // the old way of attribute parsing (mode 0) case 0: if(x->t_debug) post("content: before strtok %s", local); ptr1 = local; ptr2 = strchr(local, '>'); // find tag end ptr2++; // terminate local buffer after tag end ptr2[0] = 0; if(strcspn(ptr1, "=\"\'") == strlen(ptr1)) { // check for attributes inside tag break; // no attr-chars found, bail ouit } ptr1 = strchr(local, ' '); // find first whitespace if(ptr1 == NULL) { break; // no whitespace found bail } while((ptr1[0] == ' ') && (ptr1 != NULL)) { // find next non-whitespace char -> potential start of attr ptr1++; } ptr2 = ptr1; // setup second pointer from end of attr name while( !((ptr2[0] == ' ') || (ptr2[0] == '=')) && (ptr2 != NULL)) { // find next non-white or not equal-sign -> what if not well formed ?? ptr2++; } if(ptr2 == NULL) { error("detox: mode 0, attrloop ptr2 reached NULL looking for \" \" and \"=\""); return; } /*len = 0; len = strlen(ptr1) - strlen(ptr2); // determine length of attr name: name minus whitespace/equal sign for(i = 0; i < len; i++) { temp[i] = ptr1[i]; } temp[len] = 0; // terminate temp_buffer */ ptr1 = ptr2; // copy start pointer to position of end pointer -> remainder while(ptr1 != NULL) { // move start pointer until single or double quote if ((ptr1[0] == '\'') || (ptr1[0] == '\"')) { quotetype[0] = ptr1[0]; break; } ptr1++; } if(ptr1 == NULL) { // bail if no quotes found and end reached error("detox: mode 0, attrloop ptr1 reached NULL"); return; } // store quote char for later comparison if (x->t_debug) post("first quotetype found is %c", quotetype[0]); ptr2 = ++ptr1; // copy end pointer to pos of startpointer + 1, right after the quote while(ptr2 != NULL) { // looking for next quote of stored type if(ptr2[0] == quotetype[0]) { break; } ptr2++; } // post("quotetype is %c and ptr2 is %c", quotetype[0], ptr2[0]); if(ptr2 == NULL) { error("detox: mode 0, attrloop ptr2 reached NULL"); return; } len = 0; digitflag = 1; // reset check for long punctflag = 0; // reset check for float len = strlen(ptr1) - strlen(ptr2); // get the length of the attr content // post("ptr2 ends scan at %c", ptr2[0]); for(i = 0; i < len; i++) { // copy attr-content into temp-buffer temp[i] = ptr1[i]; punctflag += (temp[i] == '.'); // digitflag = digitflag && (isdigit(temp[i]) || ispunct(temp[i])); // are all digits? } temp[len] = 0; if (x->t_debug) post("temp is %s, digitflag is %ld, punctflag is %ld", temp, digitflag, punctflag); if(digitflag == 0){ attrpair[0].a_type = A_SYMBOL; // to do: check atom type and cast accordingly js 20061202 attrpair[0].a_w.w_symbol = gensym(temp); outlet_anything(x->s_outlet, x->t_attrpair[0].a_w.w_symbol, 0, 0L); } else if ((digitflag == 1)&&(punctflag == 0)){ attrpair[1].a_type = A_FLOAT; // to do: check atom type and cast accordingly js 20061202 attrpair[1].a_w.w_float = atoi(temp); outlet_float(x->s_outlet, x->t_attrpair[2].a_w.w_float); } else if ((digitflag == 1)&&(punctflag == 1)){ attrpair[1].a_type = A_FLOAT; // to do: check atom type and cast accordingly js 20061202 attrpair[1].a_w.w_float = atof(temp); outlet_float(x->s_outlet, x->t_attrpair[1].a_w.w_float); } break; case 1: // the new way of attribute parsing (mode 1) ¥¥¥¥ fixed 20070322 ptr1 = local; ptr2 = strchr(local, '>'); // find tag end ptr2++; // terminate local buffer after tag end ptr2[0] = 0; if(strcspn(ptr1, "=\"\'") == strlen(ptr1)) { // check for attributes inside tag break; // no attr-chars found, bail ouit } ptr1 = strchr(local, ' '); // find first whitespace if(ptr1 == NULL) { break; // no whitespace found bail } for(;;) { // ¥¥ attribute loop; example: <person name = 'Alan Turing' died = '07/06/1954' born = "1912-06-23" /> while((ptr1[0] == ' ') && (ptr1 != NULL)) { // find next non-whitespace char -> potential start of attr ptr1++; } ptr2 = ptr1; // setup second pointer from end of attr name while( !((ptr2[0] == ' ') || (ptr2[0] == '=')) && (ptr2 != NULL)) { // find next non-white or not equal-sign -> what if not well formed ?? ptr2++; } if(ptr2 == NULL) { error("detox: mode 1, attrloop ptr2 reached NULL looking for \" \" and \"=\""); return; } len = 0; len = strlen(ptr1) - strlen(ptr2); // determine length of attr name: name minus whitespace/equal sign for(i = 0; i < len; i++) { temp[i] = ptr1[i]; } temp[len] = 0; // terminate temp_buffer attrpair[0].a_w.w_symbol = gensym(temp); // put tempbuffer into output symbol // ¥ done with the attr-name ptr1 = ptr2; // copy start pointer to position of end pointer -> remainder while(ptr1 != NULL) { // move start pointer until single or double quote if ((ptr1[0] == '\'') || (ptr1[0] == '\"')) { quotetype[0] = ptr1[0]; // store quote char for later comparison break; } ptr1++; } if(ptr1 == NULL) { // bail if no quotes found and end reached error("detox: mode 1, attrloop ptr1 reached NULL"); return; } if (x->t_debug) post("first quotetype found is %c", quotetype[0]); ptr2 = ++ptr1; // copy end pointer to pos of startpointer + 1, right after the quote while(ptr2 != NULL) { // looking for next quote of stored type if(ptr2[0] == quotetype[0]) { break; } ptr2++; } // post("quotetype is %c and ptr2 is %c", quotetype[0], ptr2[0]); if(ptr2 == NULL) { error("detox: mode 1, attrloop ptr2 reached NULL"); return; } len = 0; digitflag = 1; // reset check for long punctflag = 0; // reset check for float len = strlen(ptr1) - strlen(ptr2); // get the length of the attr content // post("ptr2 ends scan at %c", ptr2[0]); for(i = 0; i < len; i++) { // copy attr-content into temp-buffer temp[i] = ptr1[i]; punctflag += (temp[i] == '.'); // digitflag = digitflag && (isdigit(temp[i]) || ispunct(temp[i])); // are all digits? } temp[len] = 0; if (x->t_debug) post("temp is %s, digitflag is %ld, punctflag is %ld", temp, digitflag, punctflag); if(digitflag == 0){ attrpair[1].a_type = A_SYMBOL; // to do: check atom type and cast accordingly js 20061202 attrpair[1].a_w.w_symbol = gensym(temp); } else if ((digitflag == 1)&&(punctflag == 0)){ attrpair[1].a_type = A_FLOAT; // to do: check atom type and cast accordingly js 20061202 attrpair[1].a_w.w_float = atoi(temp); } else if ((digitflag == 1)&&(punctflag == 1)){ attrpair[1].a_type = A_FLOAT; // to do: check atom type and cast accordingly js 20061202 attrpair[1].a_w.w_float = atof(temp); } outlet_anything(x->s_outlet, x->t_attrpair[0].a_w.w_symbol, 1, &x->t_attrpair[1]); ptr1 = ++ptr2; // move start pointer to end of atrr-content while(ptr1 != NULL) { // find next non-whitespace char if (ptr1[0] != ' ') { break; } ptr1++; } if((ptr1[0] == '/') || (ptr1[0] == '>') || (ptr1[0] == '?') || (ptr1 == NULL)) { // if char is "/" or ">" or "?" or end of string finish up if (x->t_debug) post("ptr1 reached ending meta characters"); return; } if (x->t_debug) post("ptr1 reached loop point"); } break; } } void detox_fixsymbol(t_detox *x) // this is an ugly hack for max stripping double quotes from symbols with whitespace in them (language problem) { long i = 0; long len = 0; x->t_checkat[1].a_w.w_symbol->s_name[0] = '\"'; // first char in output is " len = strlen(x->t_checkat[0].a_w.w_symbol->s_name); len = CLIP(len, 0, (MAXLENGTH-3)); for(i = 0; i < len; i++) { if ((x->t_checkat[0].a_w.w_symbol->s_name[i] == '/') || (x->t_checkat[0].a_w.w_symbol->s_name[i] == '>')) { // check for syntax differences break; } x->t_checkat[1].a_w.w_symbol->s_name[i+1] = x->t_checkat[0].a_w.w_symbol->s_name[i]; } x->t_checkat[1].a_w.w_symbol->s_name[i+1] = '\"'; // last char in output is " x->t_checkat[1].a_w.w_symbol->s_name[i+2] = 0; // terminate string } void version(t_detox *x) { post("á detox á jasch á v. "VERSION" á compiled "__DATE__" - "__TIME__ ); } \ No newline at end of file