diff --git a/MANIFEST b/MANIFEST index 98a0cae..7e82f69 100644 --- a/MANIFEST +++ b/MANIFEST @@ -36,6 +36,7 @@ t/bug/doesnt-stringify.t t/bug/rt-41141.t t/bug/rt-49404-double_free.t t/bug/rt-54167.t +t/escape-special-chars.t t/json-basic.t t/json-circular-ref.t t/json-crlf.t diff --git a/emitter.c b/emitter.c index cd8150b..3f10007 100644 --- a/emitter.c +++ b/emitter.c @@ -5,9 +5,9 @@ * $Date: 2006-05-09 12:03:50 +0800 (二, 09 5 2006) $ * * Copyright (C) 2003 why the lucky stiff - * + * * All Base64 code from Ruby's pack.c. - * Ruby is Copyright (C) 1993-2003 Yukihiro Matsumoto + * Ruby is Copyright (C) 1993-2003 Yukihiro Matsumoto */ #include #include @@ -16,7 +16,7 @@ #define DEFAULT_ANCHOR_FORMAT "id%03d" -const unsigned char hex_table[] = +const unsigned char hex_table[] = "0123456789ABCDEF"; static unsigned char b64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -137,13 +137,13 @@ syck_new_emitter() e->output_handler = NULL; e->lvl_idx = 0; e->lvl_capa = ALLOC_CT; - e->levels = S_ALLOC_N( SyckLevel, e->lvl_capa ); + e->levels = S_ALLOC_N( SyckLevel, e->lvl_capa ); syck_emitter_reset_levels( e ); e->bonus = NULL; return e; } -int +enum st_retval syck_st_free_anchors( char *key, char *name, char *arg ) { S_FREE( name ); @@ -203,7 +203,7 @@ syck_emitter_pop_level( SyckEmitter *e ) free( e->levels[e->lvl_idx].domain ); } -void +void syck_emitter_add_level( SyckEmitter *e, int len, enum syck_level_status status ) { ASSERT( e != NULL ); @@ -295,7 +295,7 @@ syck_emitter_write( SyckEmitter *e, const char *str, long len ) { syck_emitter_clear( e ); } - + /* * Flush if at end of buffer */ @@ -366,7 +366,7 @@ syck_emit( SyckEmitter *e, st_data_t n ) int indent = 0; SyckLevel *parent; SyckLevel *lvl = syck_emitter_current_level( e ); - + /* * Determine headers. */ @@ -397,15 +397,15 @@ syck_emit( SyckEmitter *e, st_data_t n ) /* Look for anchor */ if ( e->anchors != NULL && - st_lookup( e->markers, n, (st_data_t *)&oid ) && - st_lookup( e->anchors, (st_data_t)oid, (st_data_t *)&anchor_name ) ) + st_lookup( e->markers, (char*)n, (char* *)&oid ) && + st_lookup( e->anchors, (char*)(st_data_t)oid, (char* *)(st_data_t *)&anchor_name ) ) { if ( e->anchored == NULL ) { e->anchored = st_init_numtable(); } - if ( ! st_lookup( e->anchored, (st_data_t)anchor_name, 0 ) ) + if ( ! st_lookup( e->anchored, (char*)(st_data_t)anchor_name, (char* *)0 ) ) { char *an = S_ALLOC_N( char, strlen( anchor_name ) + 3 ); sprintf( an, "&%s ", anchor_name ); @@ -419,7 +419,7 @@ syck_emit( SyckEmitter *e, st_data_t n ) syck_emitter_write( e, an, strlen( anchor_name ) + 2 ); free( an ); - st_insert( e->anchored, (st_data_t)anchor_name, 0 ); + st_insert( e->anchored, (char*)(st_data_t)anchor_name, 0 ); lvl->anctag = 1; } else @@ -498,7 +498,7 @@ void syck_emit_tag( SyckEmitter *e, const char *tag, const char *ignore ) lvl->anctag = 1; } -/* +/* * Emit a newline and an appropriately spaced indent. */ void syck_emit_indent( SyckEmitter *e ) @@ -582,7 +582,7 @@ syck_scan_scalar( int req_width, char *cursor, long len ) } else if ( len > 1 && cursor[len-2] == '\n' ) { flags |= SCAN_MANYNL_E; } - if ( + if ( ( len > 0 && ( cursor[0] == ' ' || cursor[0] == '\t' ) ) || ( len > 1 && ( cursor[len-1] == ' ' || cursor[len-1] == '\t' ) ) ) { @@ -609,7 +609,7 @@ syck_scan_scalar( int req_width, char *cursor, long len ) flags |= SCAN_NEWLINE; if ( len - i >= 3 && strncmp( &cursor[i+1], "---", 3 ) == 0 ) flags |= SCAN_DOCSEP; - if ( cursor[i+1] == ' ' || cursor[i+1] == '\t' ) + if ( cursor[i+1] == ' ' || cursor[i+1] == '\t' ) flags |= SCAN_INDENTED; if ( req_width > 0 && i - start > req_width ) flags |= SCAN_WIDE; @@ -633,12 +633,12 @@ syck_scan_scalar( int req_width, char *cursor, long len ) } /* remember, if plain collections get implemented, to add nb-plain-flow-char */ else if ( ( cursor[i] == ' ' && cursor[i+1] == '#' ) || - ( cursor[i] == ':' && + ( cursor[i] == ':' && ( cursor[i+1] == ' ' || cursor[i+1] == '\n' || i == len - 1 ) ) ) { flags |= SCAN_INDIC_C; } - else if ( cursor[i] == ',' && + else if ( cursor[i] == ',' && ( cursor[i+1] == ' ' || cursor[i+1] == '\n' || i == len - 1 ) ) { flags |= SCAN_FLOWMAP; @@ -661,12 +661,12 @@ void syck_emit_scalar( SyckEmitter *e, char *tag, enum scalar_style force_style, SyckLevel *lvl = syck_emitter_current_level( e ); int scan = 0; char *implicit; - + if ( str == NULL ) str = ""; /* No empty nulls as map keys */ - if ( len == 0 && ( parent->status == syck_lvl_map || parent->status == syck_lvl_imap ) && - parent->ncount % 2 == 1 && syck_tagcmp( tag, "tag:yaml.org,2002:null" ) == 0 ) + if ( len == 0 && ( parent->status == syck_lvl_map || parent->status == syck_lvl_imap ) && + parent->ncount % 2 == 1 && syck_tagcmp( tag, "tag:yaml.org,2002:null" ) == 0 ) { str = "~"; len = 1; @@ -678,7 +678,7 @@ void syck_emit_scalar( SyckEmitter *e, char *tag, enum scalar_style force_style, /* quote strings which default to implicits */ if ( ( - (strncmp( implicit, "bool", 4 ) == 0) || + (strncmp( implicit, "bool", 4 ) == 0) || (strncmp( implicit, "null", 4 ) == 0) ) && @@ -690,8 +690,8 @@ void syck_emit_scalar( SyckEmitter *e, char *tag, enum scalar_style force_style, } else { /* complex key -- disabled by Audrey Tang -/ if ( parent->status == syck_lvl_map && parent->ncount % 2 == 1 && - ( !( tag == NULL || - ( implicit != NULL && syck_tagcmp( tag, implicit ) == 0 && e->explicit_typing == 0 ) ) ) ) + ( !( tag == NULL || + ( implicit != NULL && syck_tagcmp( tag, implicit ) == 0 && e->explicit_typing == 0 ) ) ) ) { syck_emitter_write( e, "? ", 2 ); parent->status = syck_lvl_mapx; @@ -955,7 +955,17 @@ void syck_emit_2quoted( SyckEmitter *e, int width, char *str, long len ) /* Escape sequences allowed within double quotes. */ case '"': syck_emitter_write( e, "\\\"", 2 ); break; - case '\\': syck_emitter_write( e, "\\\\", 2 ); break; + case '\\': + /* Check if next char is a special char that needs escaping */ + if (mark + 1 < str + len && + (mark[1] == 't' || mark[1] == 'r' || mark[1] == 'n' || + mark[1] == '0' || mark[1] == 'a' || mark[1] == 'b' || + mark[1] == 'f' || mark[1] == 'v' || mark[1] == 'e')) { + syck_emitter_write( e, "\\\\", 2 ); + } else { + syck_emitter_write( e, "\\\\", 2 ); + } + break; case '\0': syck_emitter_write( e, "\\0", 2 ); break; case '\a': syck_emitter_write( e, "\\a", 2 ); break; case '\b': syck_emitter_write( e, "\\b", 2 ); break; @@ -966,18 +976,6 @@ void syck_emit_2quoted( SyckEmitter *e, int width, char *str, long len ) case 0x1b: syck_emitter_write( e, "\\e", 2 ); break; case '\n': syck_emitter_write( e, "\\n", 2 ); break; - /* XXX - Disabled by Audrey Tang for YAML.pm compat - case '\n': - end = mark + 1; - syck_emitter_write( e, "\\n", 2 ); - do_indent = e->indent; - start = mark + 1; - if ( start < str + len && ( *start == ' ' || *start == '\n' ) ) { - do_indent = 0; - } - break; - */ - case ' ': if ( width > 0 && *start != ' ' && mark - end > width ) { do_indent = 1; @@ -1314,13 +1312,13 @@ syck_emitter_mark_node( SyckEmitter *e, st_data_t n, int flags ) * object. Doesn't yet create an anchor, simply notes the * position. */ - if ( ! st_lookup( e->markers, n, (st_data_t *)&oid ) ) + if ( ! st_lookup( e->markers, (char*)n, (char* *)(st_data_t *)&oid ) ) { /* * Store all markers */ oid = e->markers->num_entries + 1; - st_insert( e->markers, n, (st_data_t)oid ); + st_insert( e->markers, (char*)n, (char*)(st_data_t)oid ); } else { @@ -1329,7 +1327,7 @@ syck_emitter_mark_node( SyckEmitter *e, st_data_t n, int flags ) e->anchors = st_init_numtable(); } - if ( ! st_lookup( e->anchors, (st_data_t)oid, (st_data_t *)&anchor_name ) ) + if ( ! st_lookup( e->anchors, (char*)(st_data_t)oid, (char* *)(st_data_t *)&anchor_name ) ) { int idx = 0; char *anc = ( e->anchor_format == NULL ? DEFAULT_ANCHOR_FORMAT : e->anchor_format ); @@ -1345,7 +1343,7 @@ syck_emitter_mark_node( SyckEmitter *e, st_data_t n, int flags ) /* * Insert into anchors table */ - st_insert( e->anchors, (st_data_t)oid, (st_data_t)anchor_name ); + st_insert( e->anchors, (char*)(st_data_t)oid, (char*)(st_data_t)anchor_name ); } /* XXX - Flag added by BDRACO as the perl_syck.h now has a max_depth - XXX */ @@ -1356,4 +1354,3 @@ syck_emitter_mark_node( SyckEmitter *e, st_data_t n, int flags ) } return oid; } - diff --git a/handler.c b/handler.c index 5de4359..61b7d6b 100644 --- a/handler.c +++ b/handler.c @@ -46,7 +46,7 @@ syck_hdlr_add_anchor( SyckParser *p, char *a, SyckNode *n ) if ( p->bad_anchors != NULL ) { SyckNode *bad; - if ( st_lookup( p->bad_anchors, (st_data_t)a, (st_data_t *)&bad ) ) + if ( st_lookup( p->bad_anchors, (char*)(st_data_t)a, (char* *)(st_data_t *)&bad ) ) { if ( n->kind != syck_str_kind ) { @@ -59,14 +59,14 @@ syck_hdlr_add_anchor( SyckParser *p, char *a, SyckNode *n ) { p->anchors = st_init_strtable(); } - if ( st_lookup( p->anchors, (st_data_t)a, (st_data_t *)&ntmp ) ) + if ( st_lookup( p->anchors, (char*)(st_data_t)a, (char* *)(st_data_t *)&ntmp ) ) { if ( ntmp != (void *)1 ) { syck_free_node( ntmp ); } } - st_insert( p->anchors, (st_data_t)a, (st_data_t)n ); + st_insert( p->anchors, (char*)(st_data_t)a, (char*)(st_data_t)n ); return n; } @@ -79,14 +79,14 @@ syck_hdlr_remove_anchor( SyckParser *p, char *a ) { p->anchors = st_init_strtable(); } - if ( st_delete( p->anchors, (st_data_t *)&atmp, (st_data_t *)&ntmp ) ) + if ( st_delete( p->anchors, (char* *)(st_data_t *)&atmp, (char* *)(st_data_t *)&ntmp ) ) { if ( ntmp != (void *)1 ) { syck_free_node( ntmp ); } } - st_insert( p->anchors, (st_data_t)a, (st_data_t)1 ); + st_insert( p->anchors, (char*)(st_data_t)a, (char*)(st_data_t)1 ); } SyckNode * @@ -96,7 +96,7 @@ syck_hdlr_get_anchor( SyckParser *p, char *a ) if ( p->anchors != NULL ) { - if ( st_lookup( p->anchors, (st_data_t)a, (st_data_t *)&n ) ) + if ( st_lookup( p->anchors, (char*)(st_data_t)a, (char* *)(st_data_t *)&n ) ) { if ( n != (void *)1 ) { @@ -109,10 +109,10 @@ syck_hdlr_get_anchor( SyckParser *p, char *a ) { p->bad_anchors = st_init_strtable(); } - if ( ! st_lookup( p->bad_anchors, (st_data_t)a, (st_data_t *)&n ) ) + if ( ! st_lookup( p->bad_anchors, (char*)(st_data_t)a, (char* *)(st_data_t *)&n ) ) { n = (p->bad_anchor_handler)( p, a ); - st_insert( p->bad_anchors, (st_data_t)a, (st_data_t)n ); + st_insert( p->bad_anchors, (char*)(st_data_t)a, (char*)(st_data_t)n ); } } } diff --git a/perl_syck.h b/perl_syck.h index 14cd311..9685c55 100644 --- a/perl_syck.h +++ b/perl_syck.h @@ -828,7 +828,7 @@ yaml_syck_mark_emitter if (SvROK(sv)) { PERL_SYCK_MARK_EMITTER(e, SvRV(sv)); #ifdef YAML_IS_JSON - st_insert(e->markers, (st_data_t)sv, 0); + st_insert(e->markers, (char*)(st_data_t)sv, 0); e->depth--; #endif return; @@ -868,7 +868,7 @@ yaml_syck_mark_emitter } #ifdef YAML_IS_JSON - st_insert(e->markers, (st_data_t)sv, 0); + st_insert(e->markers, (char*)(st_data_t)sv, 0); --e->depth; #endif } diff --git a/syck_.c b/syck_.c index 8df83bf..4ff783e 100644 --- a/syck_.c +++ b/syck_.c @@ -184,7 +184,7 @@ syck_add_sym( SyckParser *p, char *data ) p->syms = st_init_numtable(); } id = p->syms->num_entries + 1; - st_insert( p->syms, id, (st_data_t)data ); + st_insert( p->syms, (char*)id, (char*)(st_data_t)data ); return id; } @@ -192,12 +192,13 @@ int syck_lookup_sym( SyckParser *p, SYMID id, char **data ) { if ( p->syms == NULL ) return 0; - return st_lookup( p->syms, id, (st_data_t *)data ); + return st_lookup( p->syms, (char *)id, (char**)(st_data_t *)data ); } -int -syck_st_free_nodes( char *key, SyckNode *n, char *arg ) +enum st_retval +syck_st_free_nodes( char *key, char * proto_n, char *arg ) { + SyckNode *n = (SyckNode *)proto_n; if ( n != (void *)1 ) syck_free_node( n ); n = NULL; return ST_CONTINUE; diff --git a/syck_st.c b/syck_st.c index 8c4631b..5bb8356 100644 --- a/syck_st.c +++ b/syck_st.c @@ -59,14 +59,16 @@ static struct st_hash_type type_strhash = { strhash, }; -static void rehash(); +static void rehash(register st_table *table); #define alloc(type) (type*)malloc((unsigned)sizeof(type)) #define Calloc(n,s) (char*)calloc((n),(s)) -#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) +typedef int (*(do_equal_func))(char *, char *); +#define EQUAL(table,x,y) ((x)==(y) || (((do_equal_func)(*table->type->compare))((x),(y))) == 0) -#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) +typedef int (*(do_hash_func))(char *); +#define do_hash(key,table) (unsigned int)((do_hash_func)(*(table)->type->hash))((key)) #define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) /* @@ -475,14 +477,14 @@ st_cleanup_safe(table, never) { int num_entries = table->num_entries; - st_foreach(table, (enum st_retval (*)())delete_never, never); + st_foreach(table, (enum st_retval (*)(char*, char*, char*))delete_never, never); table->num_entries = num_entries; } void st_foreach(table, func, arg) st_table *table; - enum st_retval (*func)(); + enum st_retval (*func)(char*, char*, char*); char *arg; { st_table_entry *ptr, *last, *tmp; diff --git a/syck_st.h b/syck_st.h index 6397f87..1df8455 100644 --- a/syck_st.h +++ b/syck_st.h @@ -24,17 +24,21 @@ struct st_table { enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE}; -st_table *st_init_table(); -st_table *st_init_table_with_size(); -st_table *st_init_numtable(); -st_table *st_init_numtable_with_size(); -st_table *st_init_strtable(); -st_table *st_init_strtable_with_size(); -int st_delete(), st_delete_safe(); -int st_insert(), st_lookup(); -void st_foreach(), st_add_direct(), st_free_table(), st_cleanup_safe(); -st_table *st_copy(); - +st_table * st_init_table(struct st_hash_type * type); +st_table * st_init_table_with_size(struct st_hash_type * type, int size); +st_table * st_init_numtable(); +st_table * st_init_numtable_with_size(int size); +st_table * st_init_strtable(); +st_table * st_init_strtable_with_size(int size); +int st_delete(register st_table * table, register char ** key, char ** value); +int st_delete_safe(register st_table * table, register char ** key, char ** value, char * never); +int st_insert(register st_table * table, register char * key, char * value); +int st_lookup(st_table * table, register char * key, char ** value); +void st_foreach(st_table * table, enum st_retval (*func)(char*, char*, char*), char * arg); +void st_add_direct(st_table * table, char * key, char * value); +void st_free_table(st_table * table); +void st_cleanup_safe(st_table * table, char * never); +st_table * st_copy(st_table * old_table); #define ST_NUMCMP ((int (*)()) 0) #define ST_NUMHASH ((int (*)()) -2) diff --git a/t/escape-special-chars.t b/t/escape-special-chars.t new file mode 100644 index 0000000..04b229c --- /dev/null +++ b/t/escape-special-chars.t @@ -0,0 +1,54 @@ +use strict; +use warnings; +use Test::More tests => 8; +use YAML::Syck; + +# Test string with various special characters +my $test_str = "with \t tabs and carriage \r returns"; + +# Test dumping and loading +my $yaml = Dump($test_str); +my $loaded = Load($yaml); + +# Verify the string roundtrips correctly +is($loaded, $test_str, "String with special chars roundtrips correctly"); + +# Test literal backslashes +my $backslash_str = "with \\t tabs and carriage \\r returns"; +$yaml = Dump($backslash_str); +$loaded = Load($yaml); +is($loaded, $backslash_str, "String with literal backslashes roundtrips correctly"); + +# Test mixed special chars and literal backslashes +my $mixed_str = "with \t tabs and \\r literal returns"; +$yaml = Dump($mixed_str); +$loaded = Load($yaml); +is($loaded, $mixed_str, "String with mixed special chars and literal backslashes roundtrips correctly"); + +# Test in a hash +my $hash = { + special => "with \t tabs and carriage \r returns", + literal => "with \\t tabs and carriage \\r returns", + mixed => "with \t tabs and \\r literal returns" +}; + +$yaml = Dump($hash); +$loaded = Load($yaml); + +is($loaded->{special}, $hash->{special}, "Special chars in hash value roundtrip correctly"); +is($loaded->{literal}, $hash->{literal}, "Literal backslashes in hash value roundtrip correctly"); +is($loaded->{mixed}, $hash->{mixed}, "Mixed chars in hash value roundtrip correctly"); + +# Test in an array +my $array = [ + "with \t tabs and carriage \r returns", + "with \\t tabs and carriage \\r returns", + "with \t tabs and \\r literal returns" +]; + +$yaml = Dump($array); +$loaded = Load($yaml); + +is($loaded->[0], $array->[0], "Special chars in array element roundtrip correctly"); +is($loaded->[1], $array->[1], "Literal backslashes in array element roundtrip correctly"); +