Feature #10423
open[PATCH] opt_str_lit*: avoid literal string allocations
Description
Patch also downloadable at: http://80x24.org/spew/m/opt_str_lit-v4%40m.txt
Broken-out commits in the "opt_str_lit-v4" branch of
git://bogomips.org/ruby.git (and http://bogomips.org/ruby.git )
This obsoletes Features #10326, #10329, and #10333
Changes since -v3
-
cleanup tests to be more DRY
-
optimize allocations for Hash#fetch, String#r?partition
-
split opt_str_lit into 3 instructions to reduce insn complexity
(opt_str_freeze, opt_aset_with, opt_aref_with) are still gone.While having one instruction is appealing in some ways,
hiding compile-time-resolvable branches behind it is
misleading and it should be easier-to-follow when divided
into three methods.-
opt_str_lit_recv: for "literal string" receivers
This no longer optimizes the method dispatch away
for String#freeze, the method call now always happens
(but allocation is avoided) -
opt_str_lit_tmask: the most heavily-used
This optimizes allocations away for "literal string" arguments -
opt_str_lit_data: currently only used for Time#strftime
We may remove this (and the strftime optimization)
if it is too rare to be useful.
-
Benchmarks:
"make rdoc" performance improved from 66.3 => 63.3 sec
vm2_* benchmarks:
We take a speed hit from losing opt_aref_with/opt_aset_with
but I think the flexibility of the new instructions is worth it.
Speedup ratio: compare with the result of `trunk' (greater is better)
name built
loop_whileloop2 1.000
vm2_array* 0.997
vm2_array_delete_lit* 2.204
vm2_array_include_lit* 2.372
vm2_bigarray* 1.002
vm2_bighash* 1.004
vm2_case* 1.030
vm2_defined_method* 1.013
vm2_dstr* 0.977
vm2_eval* 1.019
vm2_hash_aref_lit* 0.767
vm2_hash_aset_lit* 0.828
vm2_hash_delete_lit* 2.254
vm2_method* 1.010
vm2_method_missing* 1.010
vm2_method_with_block* 0.996
vm2_mutex* 0.954
vm2_newlambda* 1.010
vm2_poly_method* 0.974
vm2_poly_method_ov* 0.990
vm2_proc* 1.003
vm2_raise1* 0.992
vm2_raise2* 0.993
vm2_regexp* 1.053
vm2_send* 1.000
vm2_set_include_lit* 1.051
vm2_str_delete* 1.365
vm2_str_eq1* 2.282
vm2_str_eq2* 2.700
vm2_str_eqq1* 2.062
vm2_str_eqq2* 2.421
vm2_str_fmt* 1.271
vm2_str_gsub_bang_lit* 1.650
vm2_str_gsub_bang_re* 1.153
vm2_str_gsub_re* 1.110
vm2_str_plus1* 1.496
vm2_str_plus2* 1.618
vm2_str_tr_bang* 1.527
vm2_strcat* 1.406
vm2_super* 1.005
vm2_unif1* 0.985
vm2_zsuper* 0.992
-----------------------------------------------------------
raw data:
[["loop_whileloop2",
[[0.09168246667832136,
0.09153273981064558,
0.09135112632066011,
0.09144351910799742,
0.0913569862022996],
[0.09150420501828194,
0.09135987050831318,
0.09148290101438761,
0.09135456290096045,
0.09141282178461552]]],
["vm2_array",
[[0.6448190519586205,
0.6377620408311486,
0.6456073289737105,
0.636782786808908,
0.64611473120749],
[0.6434593833982944,
0.6479324344545603,
0.6385641889646649,
0.6491997549310327,
0.6610294701531529]]],
["vm2_array_delete_lit",
[[0.44069126434624195,
0.43327025789767504,
0.44154794327914715,
0.43351241294294596,
0.4343419009819627],
[0.24649471696466208,
0.24813515041023493,
0.2801106022670865,
0.24796569626778364,
0.25444996636360884]]],
["vm2_array_include_lit",
[[0.42663843277841806,
0.42771619465202093,
0.4342082254588604,
0.4192065382376313,
0.4339462127536535],
[0.24434014037251472,
0.23243559524416924,
0.23569373413920403,
0.2295856410637498,
0.23521045502275229]]],
["vm2_bigarray",
[[5.9676302736625075,
5.921381871215999,
5.893002421595156,
5.898605763912201,
5.92879247572273],
[5.918988090008497,
5.953728860244155,
5.910121874883771,
5.894348439760506,
5.882020344957709]]],
["vm2_bighash",
[[3.5214368999004364,
3.5914944410324097,
3.5528544737026095,
3.6002828497439623,
3.595806434750557],
[3.6454197568818927,
3.5987599324434996,
3.631806828081608,
3.5076274275779724,
3.5060981484130025]]],
["vm2_case",
[[0.16305183991789818,
0.16213963273912668,
0.1626761993393302,
0.1703133536502719,
0.16371900774538517],
[0.1630518063902855,
0.16097174491733313,
0.16129930969327688,
0.16106242593377829,
0.16007240489125252]]],
["vm2_defined_method",
[[2.4309032559394836,
2.4551019677892327,
2.4333217879757285,
2.4307468980550766,
2.4270543046295643],
[2.3963797464966774,
2.4110122229903936,
2.592901307158172,
2.7106671230867505,
2.6869526272639632]]],
["vm2_dstr",
[[1.206045768223703,
1.1181026576086879,
1.29782950039953,
1.074333599768579,
1.060404953546822],
[1.0911998134106398,
1.3560991054400802,
1.096764899790287,
1.0906088771298528,
1.0831655990332365]]],
["vm2_eval",
[[12.45692038256675,
12.536506623961031,
12.868694677948952,
12.425183075480163,
12.422813648357987],
[12.490455374121666,
12.190652490593493,
12.194032781757414,
12.581901006400585,
13.230092607438564]]],
["vm2_hash_aref_lit",
[[0.2547442754730582,
0.25785687286406755,
0.2552897445857525,
0.2547551356256008,
0.2546374099329114],
[0.3131725639104843,
0.3109410647302866,
0.30428329203277826,
0.30984809901565313,
0.30932857654988766]]],
["vm2_hash_aset_lit",
[[0.3067243071272969,
0.307466727681458,
0.3059097733348608,
0.32101333420723677,
0.3114894386380911],
[0.3522613048553467,
0.3512485157698393,
0.3521018158644438,
0.35033643897622824,
0.35163887683302164]]],
["vm2_hash_delete_lit",
[[0.4271302009001374,
0.5321928421035409,
0.42596039827913046,
0.4215333154425025,
0.4221466425806284],
[0.2427450241521001,
0.23863658774644136,
0.23870530910789967,
0.23786015156656504,
0.24112990126013756]]],
["vm2_method",
[[1.2295677298679948,
1.2423510188236833,
1.2246184339746833,
1.213058383204043,
1.3788639837875962],
[1.2468778286129236,
1.2022472834214568,
1.319407593458891,
1.3409598469734192,
1.246872222982347]]],
["vm2_method_missing",
[[1.8783203130587935,
2.0230442117899656,
1.7176201958209276,
1.790073310956359,
1.7432779222726822],
[1.710430753417313,
1.8266426706686616,
1.7299889298155904,
1.7529844669625163,
1.7007915144786239]]],
["vm2_method_with_block",
[[1.3849838990718126,
1.345238379202783,
1.3416069420054555,
1.3473590090870857,
1.3418097402900457],
[1.3468122174963355,
2.0074896160513163,
1.3560442496091127,
1.353834005072713,
1.3525155214592814]]],
["vm2_mutex",
[[0.6452304208651185,
0.7461022492498159,
0.7083938084542751,
0.7519227871671319,
0.7953951777890325],
[0.6722144978120923,
0.6991892093792558,
0.699041954241693,
0.8469044668599963,
0.7956293905153871]]],
["vm2_newlambda",
[[0.7562470361590385,
0.7670294912531972,
0.7572819162160158,
0.8686427380889654,
0.772026221267879],
[0.7586023258045316,
0.7559101320803165,
0.7497995179146528,
0.7534453617408872,
0.7510695895180106]]],
["vm2_poly_method",
[[2.1563803972676396,
1.9838355090469122,
2.0223182002082467,
1.9927900172770023,
2.055130822584033],
[2.03470276389271,
2.187690651975572,
2.162980039604008,
2.143593772314489,
2.04829403758049]]],
["vm2_poly_method_ov",
[[0.22944753244519234,
0.2304667690768838,
0.22847569175064564,
0.22843772917985916,
0.22901444043964148],
[0.2298243623226881,
0.2334523554891348,
0.23789969086647034,
0.23442872054874897,
0.23049725405871868]]],
["vm2_proc",
[[0.46109406277537346,
0.4476210633292794,
0.44980251509696245,
0.4451689962297678,
0.4452860997989774],
[0.4545932151377201,
0.4440324120223522,
0.44979235529899597,
0.4695265833288431,
0.4635683596134186]]],
["vm2_raise1",
[[4.989030849188566,
4.951415436342359,
4.755166156217456,
4.824490828439593,
4.795467809773982],
[4.797340838238597,
4.799810292199254,
4.792353850789368,
4.966345896013081,
5.099304006434977]]],
["vm2_raise2",
[[7.062344457022846,
7.118377918377519,
7.290325260721147,
7.1321266973391175,
7.341989707201719],
[7.456813280470669,
7.130581725388765,
7.1143358228728175,
7.1227226899936795,
7.2687620194628835]]],
["vm2_regexp",
[[1.0923050865530968,
1.0876065697520971,
1.0792832653969526,
1.0728685557842255,
1.098634515888989],
[1.172722346149385,
1.0436540711671114,
1.0638451064005494,
1.0391646642237902,
1.023901374079287]]],
["vm2_send",
[[0.3253856906667352,
0.31978365778923035,
0.32439478673040867,
0.3290995704010129,
0.32464261166751385],
[0.31982277520000935,
0.32479251362383366,
0.31968420650810003,
0.3205655198544264,
0.32221281714737415]]],
["vm2_set_include_lit",
[[0.7017893251031637,
0.7203339897096157,
0.714060970582068,
0.7558876499533653,
0.7028816910460591],
[0.6870902189984918,
0.6755420127883554,
0.6719080805778503,
0.6900417571887374,
1.1408466212451458]]],
["vm2_str_delete",
[[0.671600878238678,
0.6570039531216025,
0.6776775699108839,
0.6644531870260835,
0.97628088388592],
[0.508894819766283,
0.5154125597327948,
0.5064034061506391,
0.5130562232807279,
0.5056716529652476]]],
["vm2_str_eq1",
[[0.42520802468061447,
0.42302330397069454,
0.42416366562247276,
0.4216942973434925,
0.4239108320325613],
[0.23870286531746387,
0.23699089046567678,
0.2361262608319521,
0.31979569140821695,
0.26054865028709173]]],
["vm2_str_eq2",
[[0.42534991446882486,
0.4238837053999305,
0.4327298905700445,
0.42395070381462574,
0.42501260805875063],
[0.21500772424042225,
0.21449210867285728,
0.2152256891131401,
0.21711387671530247,
0.2152888299897313]]],
["vm2_str_eqq1",
[[0.45151620265096426,
0.4597599729895592,
0.45280721783638,
0.4582480965182185,
0.45380780193954706],
[0.26729187835007906,
0.26626693457365036,
0.2660442851483822,
0.26674115750938654,
0.2721241358667612]]],
["vm2_str_eqq2",
[[0.45260279811918736,
0.44771482422947884,
0.4570333072915673,
0.5335573730990291,
0.4592890217900276],
[0.2385527715086937,
0.24223692156374454,
0.24510123394429684,
0.2744274791330099,
0.2447566892951727]]],
["vm2_str_fmt",
[[2.614012835547328,
2.6062369514256716,
2.5333361653611064,
2.50343619287014,
2.641179056838155],
[1.9900542199611664,
2.0983974151313305,
1.9898552373051643,
2.001521130092442,
1.9963106652721763]]],
["vm2_str_gsub_bang_lit",
[[1.1346126468852162,
1.134159336797893,
1.1467241132631898,
1.170719631947577,
1.149439207278192],
[0.737271074205637,
0.7308303005993366,
0.730314377695322,
0.7260715514421463,
0.7231733025982976]]],
["vm2_str_gsub_bang_re",
[[1.4583028750494123,
1.4592840988188982,
1.4590299287810922,
1.4751051738858223,
1.492074583657086],
[1.3006651625037193,
1.3510901927947998,
1.2767879888415337,
1.3884455161169171,
1.277703725732863]]],
["vm2_str_gsub_re",
[[1.7273316802456975,
1.7103399503976107,
1.7015334982424974,
1.690703290514648,
1.694624281488359],
[1.5322920577600598,
1.5391994584351778,
1.5388264516368508,
1.5372542077675462,
1.5352015374228358]]],
["vm2_str_plus1",
[[0.653569158166647,
0.6917095249518752,
0.6521412674337626,
0.6697740163654089,
0.7296328162774444],
[0.46872936747968197,
0.48235206957906485,
0.46762560587376356,
0.4766495209187269,
0.4661587802693248]]],
["vm2_str_plus2",
[[0.6671840893104672,
0.6488652648404241,
0.6509246686473489,
0.6638444662094116,
0.6557880509644747],
[0.43602617271244526,
0.4365514535456896,
0.45153723005205393,
0.437131704762578,
0.44073084741830826]]],
["vm2_str_tr_bang",
[[2.6861952347680926,
2.816385295242071,
2.919709806330502,
2.712329135276377,
2.6914397440850735],
[1.79120559617877,
1.8011440439149737,
1.7935738116502762,
1.7961191991344094,
1.794896787032485]]],
["vm2_strcat",
[[0.7171547841280699,
0.7207952421158552,
0.7209635498002172,
0.7520132083445787,
0.7179927034303546],
[0.5424934774637222,
0.5365820089355111,
0.5409346511587501,
0.5369464093819261,
0.5396620389074087]]],
["vm2_super",
[[0.43017072789371014,
0.4671447016298771,
0.43235956504940987,
0.43128165043890476,
0.4320727000012994],
[0.42843823600560427,
0.4359660716727376,
0.46027328819036484,
0.43320534005761147,
0.4283680962398648]]],
["vm2_unif1",
[[0.22890623100101948,
0.23932419251650572,
0.2282293662428856,
0.2281231889501214,
0.22829711250960827],
[0.23160281032323837,
0.23210297524929047,
0.23051423579454422,
0.23075581435114145,
0.2301806192845106]]],
["vm2_zsuper",
[[0.44561540707945824,
0.4586751004680991,
0.468279717490077,
0.44244454242289066,
0.4582256320863962],
[0.5099742524325848,
0.44541092310100794,
0.4480971107259393,
0.4709290647879243,
0.5650665555149317]]]]
Elapsed time: 646.078768307 (sec)
-----------------------------------------------------------
benchmark results:
minimum results in each 5 measurements.
Execution time (sec)
name trunk built
loop_whileloop2 0.091 0.091
vm2_array* 0.545 0.547
vm2_array_delete_lit* 0.342 0.155
vm2_array_include_lit* 0.328 0.138
vm2_bigarray* 5.802 5.791
vm2_bighash* 3.430 3.415
vm2_case* 0.071 0.069
vm2_defined_method* 2.336 2.305
vm2_dstr* 0.969 0.992
vm2_eval* 12.331 12.099
vm2_hash_aref_lit* 0.163 0.213
vm2_hash_aset_lit* 0.215 0.259
vm2_hash_delete_lit* 0.330 0.147
vm2_method* 1.122 1.111
vm2_method_missing* 1.626 1.609
vm2_method_with_block* 1.250 1.255
vm2_mutex* 0.554 0.581
vm2_newlambda* 0.665 0.658
vm2_poly_method* 1.892 1.943
vm2_poly_method_ov* 0.137 0.138
vm2_proc* 0.354 0.353
vm2_raise1* 4.664 4.701
vm2_raise2* 6.971 7.023
vm2_regexp* 0.982 0.933
vm2_send* 0.228 0.228
vm2_set_include_lit* 0.610 0.581
vm2_str_delete* 0.566 0.414
vm2_str_eq1* 0.330 0.145
vm2_str_eq2* 0.333 0.123
vm2_str_eqq1* 0.360 0.175
vm2_str_eqq2* 0.356 0.147
vm2_str_fmt* 2.412 1.899
vm2_str_gsub_bang_lit* 1.043 0.632
vm2_str_gsub_bang_re* 1.367 1.185
vm2_str_gsub_re* 1.599 1.441
vm2_str_plus1* 0.561 0.375
vm2_str_plus2* 0.558 0.345
vm2_str_tr_bang* 2.595 1.700
vm2_strcat* 0.626 0.445
vm2_super* 0.339 0.337
vm2_unif1* 0.137 0.139
vm2_zsuper* 0.351 0.354
---
benchmark/bm_vm2_array_delete_lit.rb | 6 +
benchmark/bm_vm2_array_include_lit.rb | 6 +
benchmark/bm_vm2_hash_aref_lit.rb | 6 +
benchmark/bm_vm2_hash_aset_lit.rb | 6 +
benchmark/bm_vm2_hash_delete_lit.rb | 6 +
benchmark/bm_vm2_set_include_lit.rb | 7 +
benchmark/bm_vm2_str_delete.rb | 6 +
benchmark/bm_vm2_str_eq1.rb | 6 +
benchmark/bm_vm2_str_eq2.rb | 6 +
benchmark/bm_vm2_str_eqq1.rb | 6 +
benchmark/bm_vm2_str_eqq2.rb | 6 +
benchmark/bm_vm2_str_fmt.rb | 5 +
benchmark/bm_vm2_str_gsub_bang_lit.rb | 6 +
benchmark/bm_vm2_str_gsub_bang_re.rb | 6 +
benchmark/bm_vm2_str_gsub_re.rb | 6 +
benchmark/bm_vm2_str_plus1.rb | 6 +
benchmark/bm_vm2_str_plus2.rb | 6 +
benchmark/bm_vm2_str_tr_bang.rb | 7 +
benchmark/bm_vm2_strcat.rb | 7 +
common.mk | 18 +-
compile.c | 330 ++++++++++++++++++++++++++++++----
defs/id.def | 38 ++++
defs/opt_method.def | 90 ++++++++++
insns.def | 275 ++++++++++++++++------------
template/opt_method.h.tmpl | 111 ++++++++++++
template/opt_method.inc.tmpl | 42 +++++
test/-ext-/symbol/test_type.rb | 1 +
test/objspace/test_objspace.rb | 1 +
test/ruby/envutil.rb | 20 +++
test/ruby/test_hash.rb | 2 +
test/ruby/test_iseq.rb | 1 +
test/ruby/test_optimization.rb | 162 ++++++++++++++++-
vm.c | 67 +------
vm_core.h | 44 +----
vm_insnhelper.c | 8 +-
vm_insnhelper.h | 27 +++
36 files changed, 1089 insertions(+), 264 deletions(-)
Files