test result

#读取int型一列,做baseline
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1      0.734 s         0.736 s            10 batch_buffer_size=32k batches=489 columns=16 init_time=6.34947G num_rows=16M parquet_parse=4.78942G rowgroups=6 write_time=840.358M
#调整步长
-          auto value = int64_array->Value(i);
-          memcpy(buffer_address + offsets[i] + field_offset, &value, sizeof(int64_t));
+          // auto value = int64_array->Value(i);
+          // memcpy(buffer_address + offsets[i] + field_offset, &value, sizeof(int64_t));
+          auto value_ptr = int64_array->raw_values();
+          memcpy(buffer_address + offsets[i] + field_offset, value_ptr + i*8, 8);


Benchmark                                                                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1      0.816 s         0.818 s            10 batch_buffer_size=32k batches=489 columns=16 init_time=6.37134G num_rows=16M parquet_parse=4.75935G rowgroups=6 write_time=1.63872G
+          int64_t val_offset = i*8;
+          memcpy(buffer_address + offsets[i] + field_offset, value_ptr + val_offset, 8);
Benchmark                                                                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1      0.812 s         0.814 s            10 batch_buffer_size=32k batches=489 columns=16 init_time=6.32578G num_rows=16M parquet_parse=4.74461G rowgroups=6 write_time=1.6437G

#调整步长为2
+          int64_t val_offset = i*2;
+          memcpy(buffer_address + offsets[i] + field_offset, value_ptr + val_offset, 8);

Benchmark                                                                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1      0.765 s         0.767 s            10 batch_buffer_size=32k batches=489 columns=16 init_time=6.51011G num_rows=16M parquet_parse=4.81547G rowgroups=6 write_time=990.263M
-          auto value = int64_array->Value(i);
-          memcpy(buffer_address + offsets[i] + field_offset, &value, sizeof(int64_t));
+          // auto value = int64_array->Value(i);
+          // memcpy(buffer_address + offsets[i] + field_offset, &value, sizeof(int64_t));
+          auto value_ptr = int64_array->raw_values();
+          memcpy(buffer_address + offsets[i] + field_offset, &(value_ptr[i]), 8);

------------
Benchmark                                                                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1      0.731 s         0.733 s            10 batch_buffer_size=32k batches=489 columns=16 init_time=6.32732G num_rows=16M parquet_parse=4.73706G rowgroups=6 write_time=839.743M

+          auto value_ptr = int64_array->raw_values();
+          memcpy(buffer_address + offsets[i] + field_offset, value_ptr + i, 8);

Benchmark                                                                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1      0.732 s         0.734 s            10 batch_buffer_size=32k batches=489 columns=16 init_time=6.33026G num_rows=16M parquet_parse=4.74852G rowgroups=6 write_time=839.259M

先行访问后列写
# bseline
Benchmark                                                                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1       2.79 s          2.79 s            10 batch_buffer_size=32k batches=489 columns=16 init_time=6.70119G num_rows=16M parquet_parse=4.78144G rowgroups=6 write_time=21.0347G

#先行后列
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
----------------------------------------------------------------------------------------------------------------------------------------------
Benchmark                                                                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1       18.9 s          18.9 s            10 batch_buffer_size=32k batches=489 columns=16 init_time=6.84436G num_rows=16M parquet_parse=4.7926G rowgroups=6 write_time=182.113G