Browse Source

Added further functions for data manipulation and optimized the ones already in place

Smoolak 1 year ago
parent
commit
a279f480ad
2 changed files with 91 additions and 23 deletions
  1. 6
    2
      include/readbop.h
  2. 85
    21
      src/readbop.c.in

+ 6
- 2
include/readbop.h View File

@@ -57,8 +57,12 @@
57 57
 		int arr2csv(const char* arr, size_t rows, size_t row_length, size_t cols, const type_t *types, char **headers, char **output);
58 58
 		int print_data(const char *ptr, type_t t);
59 59
 
60
-	ssize_t get_col_offset(char **headers, const type_t *types, size_t cols, const char* target);
61
-	ssize_t* get_col_offsets(char **headers, const type_t *types, size_t cols, char **targets, size_t n);
60
+		ssize_t get_col_index(char **headers, size_t cols, const char* target);
61
+		ssize_t* get_col_indexes(char **headers, size_t cols, char **targets, size_t n);
62
+		ssize_t get_col_offset_i(const type_t *types, size_t cols, ssize_t index);
63
+		ssize_t* get_col_offsets_i(const type_t *types, size_t cols, const ssize_t* indexes, size_t n);
64
+		ssize_t get_col_offset(char **headers, const type_t *types, size_t cols, const char* target);
65
+		ssize_t* get_col_offsets(char **headers, const type_t *types, size_t cols, char **targets, size_t n);
62 66
 
63 67
 
64 68
 		//Tested only with Bebop 2 files

+ 85
- 21
src/readbop.c.in View File

@@ -1140,6 +1140,86 @@ int print_data(const char *ptr, type_t t) {
1140 1140
 	return 0;
1141 1141
 }
1142 1142
 
1143
+ssize_t get_col_index(char **headers, size_t cols, const char* target) {
1144
+	for(ssize_t i = 0; i < cols; ++i)
1145
+		if(strcmp(headers[i], target) == 0)
1146
+			return i;
1147
+
1148
+	return -1;
1149
+}
1150
+
1151
+ssize_t* get_col_indexes(char **headers, size_t cols, char **targets, size_t n) {
1152
+	ssize_t *indexes;
1153
+	if((indexes = malloc(n*sizeof(ssize_t))) == NULL)
1154
+		set_error_null(ALLOC);
1155
+
1156
+	for(size_t i = 0; i < n; ++i)
1157
+		indexes[i] = -1;
1158
+
1159
+	size_t cnt = 0;
1160
+	for(size_t i = 0; i < cols; ++i) {
1161
+		for(size_t j = 0; j < n; ++j) {
1162
+			if(indexes[j] < 0 && strcmp(headers[i], targets[j]) == 0) {
1163
+				++cnt;
1164
+				indexes[j] = i;
1165
+			}
1166
+		}
1167
+
1168
+		if(cnt == n)
1169
+			return indexes;
1170
+	}
1171
+
1172
+	return indexes;
1173
+}
1174
+
1175
+ssize_t get_col_offset_i(const type_t *types, size_t cols, ssize_t index) {
1176
+	if(index < 0)
1177
+		return -1;
1178
+
1179
+	size_t offset = 0;
1180
+	for(size_t i = 0; i < index; ++i)
1181
+		offset += type_sizes[types[i]];
1182
+	return offset;
1183
+}
1184
+
1185
+ssize_t* get_col_offsets_i(const type_t *types, size_t cols, const ssize_t* indexes, size_t n) {
1186
+	ssize_t *offsets;
1187
+	if((offsets = malloc(n*sizeof(ssize_t))) == NULL)
1188
+		set_error_null(ALLOC);
1189
+
1190
+	ssize_t max = -2;
1191
+	for(size_t i = 0; i < n; ++i)
1192
+		if(indexes[i] > max)
1193
+			max = indexes[i];
1194
+	++max;
1195
+
1196
+	if(max < 0) {
1197
+		for(size_t i = 0; i < n; ++i)
1198
+			offsets[i] = -1;
1199
+		return offsets;
1200
+	}
1201
+
1202
+	size_t *cummul;
1203
+	if((cummul = calloc(max, sizeof(size_t))) == NULL) {
1204
+		free(offsets);
1205
+        set_error_null(ALLOC);
1206
+	}
1207
+
1208
+	for(size_t i = 1; i < max; ++i)
1209
+		cummul[i] = cummul[i-1] + type_sizes[types[i-1]];
1210
+
1211
+	for(size_t i = 0; i < n; ++i) {
1212
+		if(indexes[i] < 0)
1213
+			offsets[i] = -1;
1214
+		else
1215
+			offsets[i] = cummul[indexes[i]];
1216
+	}
1217
+
1218
+	free(cummul);
1219
+
1220
+	return offsets;
1221
+}
1222
+
1143 1223
 ssize_t get_col_offset(char **headers, const type_t *types, size_t cols, const char* target) {
1144 1224
 	size_t offset = 0;
1145 1225
 	for(size_t i = 0; i < cols; ++i) {
@@ -1152,45 +1232,29 @@ ssize_t get_col_offset(char **headers, const type_t *types, size_t cols, const c
1152 1232
 }
1153 1233
 
1154 1234
 ssize_t* get_col_offsets(char **headers, const type_t *types, size_t cols, char **targets, size_t n) {
1155
-	uint8_t *found;
1156
-	if((found = calloc(n, sizeof(uint8_t))) == NULL)
1157
-		set_error_null(ALLOC);
1158
-
1159 1235
 	ssize_t *offsets;
1160
-	if((offsets = malloc(n*sizeof(size_t))) == NULL) {
1161
-		free(found);
1236
+	if((offsets = malloc(n*sizeof(ssize_t))) == NULL)
1162 1237
 		set_error_null(ALLOC);
1163
-	}
1164 1238
 
1165 1239
 	for(size_t i = 0; i < n; ++i)
1166 1240
 		offsets[i] = -1;
1167 1241
 
1242
+	size_t cnt = 0;
1168 1243
 	size_t current_offset = 0;
1169 1244
 	for(size_t i = 0; i < cols; ++i) {
1170 1245
 		for(size_t j = 0; j < n; ++j) {
1171
-			if(!found[j] && strcmp(headers[i], targets[j]) == 0) {
1172
-				found[j] = 1;
1246
+			if(offsets[j] < 0 && strcmp(headers[i], targets[j]) == 0) {
1247
+				++cnt;
1173 1248
 				offsets[j] = current_offset;
1174 1249
 			}
1175 1250
 		}
1176 1251
 
1177
-		uint8_t stop = 1;
1178
-		for(size_t j = 0; j < n; ++j) {
1179
-			if(!found[j]) {
1180
-				stop = 0;
1181
-				break;
1182
-			}
1183
-		}
1184
-
1185
-		if(stop) {
1186
-			free(found);
1252
+		if(cnt == n)
1187 1253
 			return offsets;
1188
-		}
1189 1254
 
1190 1255
 		current_offset += type_sizes[types[i]];
1191 1256
 	}
1192 1257
 
1193
-	free(found);
1194 1258
 	return offsets;
1195 1259
 }
1196 1260
 

Loading…
Cancel
Save