Line data Source code
1 : /** 2 : * @file csv.c 3 : * @brief https://www.rfc-editor.org/rfc/rfc4180 4 : * 5 : * <+DETAILED+> 6 : * 7 : * @author François Cerbelle (Fanfan), francois@cerbelle.net 8 : * 9 : * @internal 10 : * Created: 29/10/2024 11 : * Revision: none 12 : * Last modified: 2024-12-05 20:17 13 : * Compiler: gcc 14 : * Organization: Cerbelle.net 15 : * Copyright: Copyright (c) 2024, François Cerbelle 16 : * 17 : * This source code is released for free distribution under the terms of the 18 : * GNU General Public License as published by the Free Software Foundation. 19 : */ 20 : 21 : #ifdef HAVE_CONFIG_H 22 : #include "config.h" 23 : #endif 24 : 25 : #include "csv.h" 26 : 27 : #include <stdlib.h> 28 : #include <string.h> 29 : 30 : static unsigned short int _csv_firstfield = 1; 31 : 32 44 : void csv_addline(FILE* reportfile) { 33 44 : fprintf(reportfile,"\r\n"); 34 44 : _csv_firstfield = 1; 35 44 : } 36 : 37 176 : void csv_addfield(FILE* reportfile, const char* value) { 38 176 : char* csv = txt2csv(value); 39 176 : if (_csv_firstfield) { 40 44 : fprintf(reportfile,"%s",csv); 41 44 : _csv_firstfield = 0; 42 : } else 43 132 : fprintf(reportfile,",%s",csv); 44 176 : free(csv); 45 176 : } 46 : 47 : /* RFC4180 compliant CSV parser, with LF only tolerance when CRLF expected */ 48 : /* https://www.rfc-editor.org/rfc/rfc4180 */ 49 : static char* _csvtok_csv=NULL; 50 324 : char* csvtok(char* source) { 51 324 : size_t csv_idx=0; 52 324 : size_t txt_idx=0; 53 324 : char* csvtok_txt = NULL; 54 324 : unsigned short int quoted = 0; 55 : 56 324 : if (NULL!=source) 57 66 : _csvtok_csv = source; 58 : 59 324 : if (_csvtok_csv[0]==0) 60 6 : return NULL; 61 : 62 318 : if (NULL==(csvtok_txt = malloc(strlen(_csvtok_csv)+1))) { 63 0 : perror("csvtok csvtok_txt malloc"); 64 0 : exit(EXIT_FAILURE); 65 : } 66 318 : csvtok_txt[0]=0; 67 : 68 2018 : while (_csvtok_csv[csv_idx]) { 69 2009 : if (!quoted) { 70 1712 : if (_csvtok_csv[csv_idx]==',') { 71 216 : if ((_csvtok_csv[csv_idx+1]==0)|| 72 213 : (_csvtok_csv[csv_idx+1]=='\n')|| /* Unix style tolerance */ 73 207 : ((_csvtok_csv[csv_idx+1]=='\r')&&(_csvtok_csv[csv_idx+2]=='\n'))) { 74 12 : fprintf(stderr,"RFC4180 forbids comma at the end of record %s at %zu.\n", 75 : _csvtok_csv,csv_idx); 76 12 : free(csvtok_txt); 77 12 : _csvtok_csv=NULL; 78 12 : return NULL; 79 : } else 80 : break; 81 1496 : } else if ((_csvtok_csv[csv_idx]=='\n')|| 82 1496 : ((_csvtok_csv[csv_idx]=='\r')&&(_csvtok_csv[csv_idx+1]=='\n'))) 83 : break; 84 1493 : else if (_csvtok_csv[csv_idx]=='"') { 85 93 : if (csv_idx==0) { 86 90 : quoted = 1; 87 90 : csv_idx++; 88 90 : continue; 89 : } else { 90 3 : fprintf(stderr,"doublequote in a non quoted value %s at %zu.\n", 91 : _csvtok_csv,csv_idx); 92 3 : free(csvtok_txt); 93 3 : _csvtok_csv=NULL; 94 3 : return NULL; 95 : } 96 : } 97 : } else { /* Quoted */ 98 297 : if (_csvtok_csv[csv_idx]=='"') { 99 93 : if (_csvtok_csv[csv_idx+1]=='"') { 100 : /* Skip escaping doublequote and let the copy occur */ 101 6 : csv_idx++; 102 87 : } else if ((_csvtok_csv[csv_idx+1]=='0')|| 103 87 : (_csvtok_csv[csv_idx+1]==',')|| 104 39 : (_csvtok_csv[csv_idx+1]=='\n')|| 105 3 : ((_csvtok_csv[csv_idx+1]=='\r')&&(_csvtok_csv[csv_idx+2]=='\n'))) { 106 84 : quoted = 0; 107 84 : csv_idx++; 108 84 : break; 109 : } else { 110 3 : fprintf(stderr,"doublequote should be at the end of field or escaping another doublequote in %s at %zu.\n", 111 : _csvtok_csv,csv_idx); 112 3 : free(csvtok_txt); 113 3 : _csvtok_csv=NULL; 114 3 : return NULL; 115 : } 116 : } 117 : } 118 1610 : csvtok_txt[txt_idx++]=_csvtok_csv[csv_idx++]; 119 : } 120 : 121 : /* Close csvtok_txt */ 122 300 : csvtok_txt[txt_idx]=0; 123 300 : if (quoted) { 124 3 : fprintf(stderr,"Missing end-of-field doublequote %s\n",csvtok_txt); 125 3 : free(csvtok_txt); 126 3 : _csvtok_csv=NULL; 127 3 : return NULL; 128 297 : } else if (_csvtok_csv[csv_idx]==',') /* Next char after end of field should be 0/,/CRLF */ 129 252 : csv_idx++; 130 45 : else if (_csvtok_csv[csv_idx]=='\n') 131 36 : csv_idx+=1; 132 9 : else if ((_csvtok_csv[csv_idx]=='\r')&&(_csvtok_csv[csv_idx+1]=='\n')) 133 3 : csv_idx+=2; 134 6 : else if (_csvtok_csv[csv_idx]!=0) { 135 0 : fprintf(stderr,"Parsing error after %s\n",csvtok_txt); 136 0 : free(csvtok_txt); 137 0 : _csvtok_csv=NULL; 138 0 : return NULL; 139 : } 140 297 : _csvtok_csv += csv_idx; 141 : /* Shrink overallocated string */ 142 297 : if (NULL==(csvtok_txt=realloc(csvtok_txt,strlen(csvtok_txt)+1))) { 143 0 : fprintf(stderr,"ERROR: csvtok Trimming CSV token\n"); 144 0 : free(csvtok_txt); 145 0 : return NULL; 146 : } 147 297 : return csvtok_txt; 148 : } 149 : 150 : /* RFC4180 compliant text to CSV encoder */ 151 : /* https://www.rfc-editor.org/rfc/rfc4180 */ 152 245 : char* txt2csv(const char* text) { 153 : char* csv; 154 : size_t text_idx; 155 : size_t csv_idx; 156 245 : unsigned short int need_quotes=0; 157 : 158 : { 159 : /* Check if quotes are needed and how many doublequotes are in the 160 : * source text to allocate output buffer. This text iteration could be 161 : * avoided but would imply to overallocate for the worst case scenario 162 : * and to reallocate at the end with the potentially needed surrounding 163 : * quotes */ 164 245 : size_t extra_chars = 0; 165 1818 : for (text_idx=0; text[text_idx]; text_idx++) { 166 1573 : if ( (text[text_idx]==',')|| (text[text_idx]=='\r')|| (text[text_idx]=='\n')) 167 85 : need_quotes = 1; 168 1573 : if (text[text_idx]=='"') { 169 53 : need_quotes = 1; 170 53 : extra_chars++; 171 : } 172 : } 173 : /* Allocate the right output buffer size */ 174 245 : if (NULL==(csv=malloc(strlen(text)+(need_quotes?2:0)+extra_chars+1))) { 175 0 : perror("txt2csv malloc"); 176 0 : return NULL; 177 : } 178 : } 179 : 180 245 : text_idx = 0; 181 245 : csv_idx = 0; 182 : 183 : /* If quotes are needed add a starting doublequote */ 184 245 : if (need_quotes) 185 98 : csv[csv_idx++] = '"'; 186 : 187 : /* Copy each source char to the destination buffer */ 188 1818 : while (text[text_idx]) { 189 : /* With a doublequote before if the char to copy is a doublequote */ 190 1573 : if (text[text_idx]=='"') 191 53 : csv[csv_idx++] = '"'; 192 1573 : csv[csv_idx++] = text[text_idx++]; 193 : } 194 : 195 : /* If quotes are needed add a closing doublequote */ 196 245 : if (need_quotes) 197 98 : csv[csv_idx++] = '"'; 198 : 199 : /* Properly end the C string */ 200 245 : csv[csv_idx] = 0; 201 : 202 245 : return csv; 203 : } 204 : /* vim: set tw=80: */