Remade the bibliography for the results

2025-07-07 00:05:35 +02:00
parent 721bb62a20
commit e230ac9005
4 changed files with 507 additions and 52 deletions
--- a/association-for-computing-machinery.csl
+++ b/association-for-computing-machinery.csl
@@ -0,0 +1,215 @@
 <?xml version="1.0" encoding="utf-8"?>
 <style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only" default-locale="en-US">
  <info>
    <title>Association for Computing Machinery</title>
    <title-short>ACM</title-short>
    <id>http://www.zotero.org/styles/association-for-computing-machinery</id>
    <link href="http://www.zotero.org/styles/association-for-computing-machinery" rel="self"/>
    <link href="http://www.zotero.org/styles/acm-sigchi-proceedings" rel="template"/>
    <link href="https://www.acm.org/publications/authors/reference-formatting" rel="documentation"/>
    <author>
      <name>Patrick O'Brien</name>
      <email>obrienpat86@gmail.com</email>
    </author>
    <contributor>
      <name>Junliang Hu</name>
      <email>vtta0124@gmail.com</email>
    </contributor>
    <category citation-format="numeric"/>
    <category field="engineering"/>
    <updated>2024-03-10T19:16:53+00:00</updated>
    <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
  </info>
  <macro name="author">
    <group suffix=".">
      <names variable="author">
        <name delimiter=", " and="text" sort-separator=", "/>
        <label form="short" text-case="capitalize-first" prefix=" (" suffix=")"/>
        <substitute>
          <names variable="editor"/>
        </substitute>
      </names>
    </group>
  </macro>
  <macro name="editor">
    <names variable="editor">
      <name delimiter=", " and="text" delimiter-precedes-last="never"/>
      <label form="short" prefix=" (" suffix=")"/>
    </names>
  </macro>
  <macro name="title">
    <choose>
      <if type="bill book graphic legal_case legislation motion_picture report song" match="any">
        <group delimiter=" ">
          <text variable="title" font-style="italic"/>
          <group delimiter=" " prefix="(" suffix=")">
            <number variable="edition" form="ordinal"/>
            <text term="edition" form="short"/>
          </group>
        </group>
      </if>
      <else>
        <text variable="title"/>
      </else>
    </choose>
  </macro>
  <macro name="year">
    <date variable="issued">
      <date-part name="year"/>
    </date>
  </macro>
  <macro name="journal">
    <group delimiter=", ">
      <group delimiter=" ">
        <text variable="container-title" form="short" font-style="italic"/>
        <text variable="volume" suffix=","/>
        <group>
          <text variable="issue" suffix=" "/>
          <date date-parts="year-month" form="text" variable="issued" prefix="(" suffix=")"/>
        </group>
      </group>
    </group>
  </macro>
  <macro name="edition">
    <choose>
      <if is-numeric="edition">
        <group delimiter=" ">
          <number variable="edition" form="ordinal"/>
          <text term="edition" form="short"/>
        </group>
      </if>
      <else>
        <text variable="edition"/>
      </else>
    </choose>
  </macro>
  <macro name="book-publisher">
    <group delimiter=", ">
      <text variable="publisher"/>
      <text variable="publisher-place"/>
    </group>
  </macro>
  <macro name="access">
    <choose>
      <if variable="DOI">
        <text variable="DOI" prefix="https://doi.org/"/>
      </if>
      <else-if variable="URL">
        <group delimiter=" ">
          <text term="retrieved" text-case="capitalize-first"/>
          <date variable="accessed" form="text"/>
          <text term="from"/>
          <text variable="URL"/>
        </group>
      </else-if>
    </choose>
  </macro>
  <citation collapse="citation-number">
    <sort>
      <key variable="citation-number"/>
    </sort>
    <layout prefix="[" suffix="]" delimiter=", ">
      <group delimiter=":">
        <text prefix="P" variable="citation-number"/>
        <text variable="locator"/>
      </group>
    </layout>
  </citation>
  <bibliography second-field-align="flush" entry-spacing="0">
    <sort>
      <key macro="author"/>
      <key variable="issued"/>
    </sort>
    <layout>
      <text variable="citation-number" prefix="[P" suffix="]"/>
      <group delimiter=". " suffix=". ">
        <text macro="author"/>
        <text macro="year"/>
        <text macro="title"/>
      </group>
      <group suffix=".">
        <choose>
          <if type="bill book graphic legal_case legislation motion_picture report song" match="any">
            <text macro="book-publisher" suffix="."/>
          </if>
          <else-if type="paper-conference">
            <group delimiter=" " suffix=", ">
              <text term="in" text-case="capitalize-first"/>
              <group delimiter=" ">
                <text variable="container-title" font-style="italic"/>
                <text variable="collection-title" font-style="italic" prefix="(" suffix=")"/>
              </group>
            </group>
            <group delimiter=", " suffix=". ">
              <date variable="issued" prefix="">
                <date-part name="month" form="long" suffix=" "/>
                <date-part name="day" form="numeric-leading-zeros" suffix=", " range-delimiter="-"/>
                <date-part name="year"/>
              </date>
            </group>
            <group delimiter=", " suffix=".">
              <text macro="book-publisher"/>
              <text variable="page"/>
            </group>
          </else-if>
          <else-if type="chapter" match="any">
            <group delimiter=" ">
              <text term="in" text-case="capitalize-first"/>
              <group delimiter=", ">
                <group delimiter=" ">
                  <text variable="container-title" font-style="italic"/>
                  <text macro="edition" prefix="(" suffix=")"/>
                </group>
                <text macro="editor"/>
              </group>
            </group>
            <group suffix="." delimiter=", " prefix=". ">
              <text macro="book-publisher"/>
              <text variable="page"/>
            </group>
          </else-if>
          <else-if type="speech" match="any">
            <group delimiter=". ">
              <group delimiter=" ">
                <text term="in" text-case="capitalize-first"/>
                <group delimiter=", ">
                  <group delimiter=" ">
                    <text variable="event" font-style="italic"/>
                    <text macro="edition" prefix="(" suffix=")"/>
                  </group>
                  <text macro="editor"/>
                </group>
              </group>
              <group delimiter=", " suffix=".">
                <text macro="book-publisher"/>
                <text variable="page"/>
              </group>
            </group>
          </else-if>
          <else-if type="article-journal">
            <group delimiter=", " suffix=".">
              <text macro="journal"/>
              <text variable="page"/>
            </group>
          </else-if>
          <else-if type="thesis" match="any">
            <group delimiter=". ">
              <text variable="genre"/>
              <text macro="book-publisher"/>
            </group>
          </else-if>
          <else>
            <group suffix="." delimiter=", ">
              <group delimiter=" " font-style="italic">
                <text variable="container-title"/>
                <text variable="volume"/>
              </group>
              <text variable="page"/>
            </group>
          </else>
        </choose>
      </group>
      <text macro="access" prefix=" "/>
    </layout>
  </bibliography>
 </style>
--- a/main.typ
+++ b/main.typ
@@ -1,14 +1,15 @@
-// cSpell:ignoreRegExp @[a-zA-Z0-9_]+
+// cSpell:ignoreRegExp @[a-zA-Z0-9_-]+
 // cSpell:ignoreRegExp #\w+\(
-// cSpell:ignore cetz booktbl instcombine
+// cSpell:ignore cetz booktbl bibliographyx citegroup instcombine
 // cSpell:ignore Reissmann Soffa Zaidi Aigner Barany Mössenböck Duboscq Steinlechner Ginsbach Kildall Rastilav Joisha Rajiv Schreiber Banerjee Boehm Chakrabarti Löwe Edvinsson Peyton-Jones
 // cSpell:disable
 #import "clean-acmart.typ": acmart
 #import "@preview/cetz:0.3.4"
 #import "@preview/lilaq:0.3.0" as lq
 #import "@preview/cetz:0.3.2"
 #import "@preview/cetz-plot:0.1.1": chart as cetz_chart
 #import "@preview/tblr:0.3.1": tblr, rows as tblr_rows, hline as tblr_hline
 #import "@preview/codly:1.3.0": codly-init, codly
 #import "@preview/alexandria:0.2.0": *
 #show: codly-init.with()
 #codly(zebra-fill: none, display-icon: false, display-name: false, stroke: none, radius: 0mm, inset: 0.2em)
@@ -87,6 +88,12 @@
  #it
 ]
 #show: alexandria(prefix: "x-", read: p => read(p))
 #show: alexandria(prefix: "y-", read: p => read(p))
 #let cgx = it => citegroup(it, prefix: "x-")
 #let cgy = it => citegroup(it, prefix: "y-")
 #set heading(numbering: "1.1.1")
 // cSpell:enable
@@ -102,10 +109,11 @@ Dataflow analysis is a technique used to gather information about the state of v
 Dataflow analysis is a well-established field where regularly new techniques are created and older techniques improved. Different compilers and analysis framework implement different methods and optimizations with dataflow analysis. This work aims to summarize the current state and past achievements of this technology. \
 This work is divided into the following sections: in @background_c the background required to understand this work is given, in @methodology_c the methodology used to create this work is described, in @findings_c the contents of the papers are analyzed and evaluated, in @conclusion_c the results from this work are summarized.
 = Background <background_c>
 == Static Single Assignment form (SSA / SSA form)
 #figure( // ssa_form_example
-  caption: [C code and respective SSA in control flow graph form, adapted from Fig.1 in [@slr-13]],
+  caption: [C code and respective SSA in control flow graph form, adapted from Fig.1 in the work of Reissmann, Meyer and Soffa @y-reissmann_rvsdg_2020],
  kind: "raw",
  grid(
    columns: (1fr, 1.25fr),
@@ -121,7 +129,7 @@ This work is divided into the following sections: in @background_c the backgroun
    image("ssa-example.svg", height: 16em)
  )
 ) <ssa_form_example> 
-Many modern compilers and analysis tools operate on a Static Single-Assignment (SSA) form @cooper_keith_d_engineering_2011 @cytron_efficiently_1991. The SSA form works by assigning each variable only once. This is done by creating multiple sub-variables $x_1, x_2, ...$ for each variable $x$. After a branch in the program a #{sym.Phi}-Node is used to select the new value of the variable based on branch executed. 
+Many modern compilers and analysis tools operate on a Static Single-Assignment (SSA) form @x-cooper_keith_d_engineering_2011 @x-cytron_efficiently_1991. The SSA form works by assigning each variable only once. This is done by creating multiple sub-variables $x_1, x_2, ...$ for each variable $x$. After a branch in the program a #{sym.Phi}-Node is used to select the new value of the variable based on branch executed. 
 An example of the SSA form can be seen in @ssa_form_example. On the left is a simple C code in a function body and on right is the respective SSA form of the C code. The immediate representation of LLVM is closely modeled after the SSA form.
 == Dataflow analysis (DFA)
 A compiler can perform dataflow analysis either on the original source code or an intermediate representation. When performing the analysis on the source code, the original structure and flow constructs of the program is available, while performing the analysis on an immediate representation has the advantage of being usable for many different languages but in the translation step from source code to immediate representation a lot of information about control flow and similar could have already been lost. LLVM for example already has a lot of generic optimization steps for its own immediate representation, which allows language developers to focus designing their language and a compiler from their language to the LLVM IR with language specific optimizations instead of having to implement a full compiler and optimizations themselves. A big problem with DFA is the long runtime and because it is a sequential algorithm it is complicated to parallelize it. This makes it harder to use DFA in a Just-In-Time (JIT) compiler, since it has a limited timeframe for compilation.
@@ -141,11 +149,11 @@ Backward analysis goes from the exit of the program to the entry, thereby enabli
  ```
 ) <must_may_example>
 The facts which the algorithm knows about variable either must be true or may be true. When they must be true, every path leading to the current point must ensure that the fact is true. The facts in @must_may_example on line 4 are: `x` and `z` must be initialized since that is done in both branches of the if, while `y` only may be initialized. `x` also must be `2`, since it gets assigned `2` in both branches, `z` may be `2` or may be `1`. The must constraints are mostly used for optimization, while the may constraints are mostly used for showing warnings.
-=== Constant folding and propagation @optimizing_compiler_wikipedia
+=== Constant folding and propagation @x-optimizing_compiler_wikipedia
 An example based on @ssa_form_example would be the compiler calculating $x_1$ to be $8$. This is called constant folding and done by replacing all calculations which are possible at compile time with their result. Constant propagation then replaces the $x_1$ in the calculation of $x_2$ with its value. When constant folding is the applied again $x_2$ would be $6$.
-=== Dead branch elimination @optimizing_compiler_wikipedia
+=== Dead branch elimination @x-optimizing_compiler_wikipedia
 Continuing from the last optimization it would be possible to conclude that the branch $x_2 < 4$ would always evaluate to $0$. This would result in elimination of the $1$ branch and result in $x_5$ always being $12$ and $y_1 = 24$. These two optimizations would already allow to replace the whole code from @ssa_form_example with `int x = 12; int y = 24;`. In this simple simple example this seems obvious, but if $x$ is a function parameter there still could be instances where this branch could be eliminated because of the function argument supplied elsewhere.
-=== Common subexpression elimination @optimizing_compiler_wikipedia
+=== Common subexpression elimination @x-optimizing_compiler_wikipedia
 Common subexpression elimination is done by finding cases where a calculation or a function call without side-effect is done multiple times with the same variables and values. An example for this would be the expressions `d = (a * b) - c * (a * b)`, which can be rewritten to `tmp = a * b; d = tmp - c * tmp` as long as `a` and `b` remain the same between the two calculations of `a * b`.
 = Methodology <methodology_c>
@@ -156,7 +164,7 @@ This work is created following the process described in @process_fig. The protoc
  float: true,
  [
    #figure(
-      caption: [Overview of the review process. Adapted from @federico_ciccozzi_execution_2019 and @gotz_claimed_2021.],
+      caption: [Overview of the review process. Adapted from @x-federico_ciccozzi_execution_2019 and @x-gotz_claimed_2021.],
      image("review_process.png")
    ) <process_fig>
  ]
@@ -308,7 +316,7 @@ All data items were extracted from the full text of all included publications.
 In this chapter we list our findings from the conducted systematic literature analysis. 
 == Demographic
-[@slr-10] shows that dataflow analysis is not only used to optimize software for normal computers, but also to optimize hardware description languages like Verilog or VHDL, which are then turned into hardware via a Field Programmable Gate Array (FPGA).
+The work by Zaidi and Greaves @y-zaidi_value_2015 shows that dataflow analysis is not only used to optimize software for normal computers, but also to optimize hardware description languages like Verilog or VHDL, which are then turned into hardware via a Field Programmable Gate Array (FPGA).
 === Publication year
 #figure( // demographic_pub_year
  caption: "Publication years of the publications",
@@ -377,7 +385,7 @@ As seen in @demographic_pub_year most of the analyzed publication are from the l
    })
  }
 ) <demographic_research_focus>
-The focus of the different papers can be seen in @demographic_research_focus. Most of the papers [@slr-1, @slr-2, @slr-5, @slr-7, @slr-12] included focus on creating and implementing new algorithms and techniques. Another big focus of the included papers is speeding up the analysis, which also makes it more viable for using in JIT compilers. While [@slr-4, @slr-6] try to do this by simply running parts of the analysis on different threads, [@slr-14] tries to pipeline the analysis of functions and [@slr-15] tries to skip parts by only lazily iterating over nodes of the IR. [@slr-9, @slr-10, @slr-13] implement a custom IR to make it easier to run parts of the DFA or to have a better structure then the previous code or IR. The focus of [@slr-3, @slr-8] is to provide a generic library for implementing DFA and using it and to provide an example implementation of the library to show how it works. [@slr-11] creates a Domain-Specific Language (DSL) for implementing DFA algorithm in the LLVM framework to make it easier for researchers to try out new ideas and implement them. \
+The focus of the different papers can be seen in @demographic_research_focus. Most of the papers #cgy[@y-kildall_unified_1973 @y-rastislav_bodik_interprocedural_1997 @y-joisha_technique_2011 @y-tang_summary-based_2012 @y-pathade_path_2019] focus on creating and implementing new algorithms and techniques. Another big focus of the included papers is speeding up the analysis, which also makes it more viable for using in JIT compilers. While #cgy[@y-edvinsson_multi-threaded_2010 @y-edvinsson_parallel_2011] try to do this by simply running parts of the analysis on different threads, the work by Shi and Zhang @y-shi_pipelining_2020 tries to pipeline the analysis of functions and the work by Aigner, Barany and Mössenböck @y-aigner_lazy_2024 tries to skip parts by only lazily iterating over nodes of the IR. In the works of Duboscq et al., Zaidi et al., Reissmann et al. #cgy[@y-duboscq_graal_2013 @y-zaidi_value_2015 @y-reissmann_rvsdg_2020] a custom IR is implemented to make it easier to run parts of the DFA or to have a better structure then the previous code or IR. The focus of the work by Ramsey et al. @y-ramsey_hoopl_2010 is to provide a generic library for implementing DFA and using it and the work of Urban and Steinlechner @y-urban_implementing_2013 is to provide an example implementation of the library to show how it works. The work by Ginsbach, Crawford and O'Boyle @y-ginsbach_candl_2018 defines and implements a compiler for a Domain-Specific Language (DSL) defined in the paper to implement DFA algorithms in the LLVM framework to make it easier for researchers to try out new ideas and implement them.
 === Target languages
 #figure( // demographic_target_lang
  caption: "Target languages of the publications",
@@ -416,30 +424,31 @@ The focus of the different papers can be seen in @demographic_research_focus. Mo
    })
  }
 ) <demographic_target_lang>
-@demographic_target_lang shows a 33% trend towards implementing DFA optimizations either with LLVM directly or by operating on the LLVM IR, while Java is either directly used as bytecode or as SSA representation of Java. This shows that LLVM is a good platform for implementing optimizations and that it has a lower barrier of entry for developing optimizations. \
+@demographic_target_lang shows a 33% trend towards implementing DFA optimizations either with LLVM directly or by operating on the LLVM IR, while Java is either directly used as bytecode or as SSA representation of Java. This shows that LLVM is a good platform for implementing optimizations and that it has a lower barrier of entry for developing optimizations.
 == RQ1: Advantages and disadvantages of using Dataflow analysis for compiler optimization
 DFA makes many big compiler optimizations possible but it also brings many trade-offs and not just for performance.
 These optimizations eliminate unused code and simplify expressions, which reduces execution time and memory footprint during runtime.
-[@slr-1] is one of the first publications talking about DFA and how it allows to use previously existing optimizations, which could only be applied on code sections without branches, with branching by checking how data flows through the branches.
+The work by Kildall @y-kildall_unified_1973 is one of the first to talk about DFA and how it allows to use previously existing optimizations, which could only be applied on code sections without branches, with branching by checking how data flows through the branches.
-Later publications [@slr-2, @slr-5] describe ways to apply these optimization interprocedurally and across thread synchronization boundaries. [@slr-2] does this be inlining the called procedure and then performing dataflow analysis. This makes every procedure call optimized for every call location, but brings the disadvantage of very rapidly increasing the size of the optimized program. An important requirement that [@slr-5] describes, is that programs must be well synchronized, otherwise DFA can not be used because of possible data races. \
+Later publications by Rastilav et al. and Joisha et al. #cgy[@y-rastislav_bodik_interprocedural_1997 @y-joisha_technique_2011] describe ways to apply these optimization interprocedurally and across thread synchronization boundaries. The work by Rastilav, Rajiv and Soffa @y-rastislav_bodik_interprocedural_1997 does this be inlining the called procedure and then performing dataflow analysis. This makes every procedure call optimized for every call location, but brings the disadvantage of very rapidly increasing the size of the optimized program. An important requirement that the work by Joisha, Schreiber, Banerjee, Boehm and Chakrabarti @y-joisha_technique_2011 describes, is that programs must be well synchronized, otherwise DFA can not be used because of possible data races.
 === Analysis performance
-While performance is not the biggest concern for DFA, since it runs at compile-time and accuracy is more important [@slr-4], many publications [@slr-4, @slr-6, @slr-14, @slr-15] have investigated how to improve the performance of DFA. This is done with several techniques: In [@slr-4, @slr-6] different function calls are run on different threads, but it has the problem of creating and queue a task for each function, which can lead to a big overhead. In [@slr-6] independent branches are also run on separate threads. A big problem with both approaches is to avoid, that some functions could be queued for analysis be more than one thread, which leads to unnecessary redundancy. \
+While performance is not the biggest concern for DFA, since it runs at compile-time and accuracy is more important as described in the work by Edvinsson and Löwe @y-edvinsson_multi-threaded_2010. Many publications #cgy[@y-edvinsson_multi-threaded_2010 @y-edvinsson_parallel_2011 @y-shi_pipelining_2020 @y-aigner_lazy_2024] have investigated how to improve the performance of DFA. This is done with several techniques: In both publications by Edvinsson, Löwe and Lundberg #cgy[@y-edvinsson_multi-threaded_2010 @y-edvinsson_parallel_2011] different function calls are run on different threads, but it has the problem of creating and queue a task for each function, which can lead to a big overhead. In later work by Edvinsson, Löwe and Lundberg @y-edvinsson_parallel_2011 independent branches are also run on separate threads. A big problem with both approaches is to avoid, that some functions could be queued for analysis be more than one thread, which leads to unnecessary redundancy. \ // TODO explain detailed how function are selected
-Another approach [@slr-14] is to pipeline the function calls. This is done by analyzing all variables, which do not depend on any function calls. When the function calls have finished being analyzed, the variables, which depend on that function call are analyzed. Thereby more parallel work is possible.
+Another approach described in the work by Shi and Zhang @y-shi_pipelining_2020 is to pipeline the function calls. This is done by analyzing all variables, which do not depend on any function calls. When the function calls have finished being analyzed, the variables, which depend on that function call are analyzed. Thereby more parallel work is possible.
 === Implementation complexity
-Another problem with DFA is the difficulty to implement optimizations with it [@slr-3, @slr-11]. DFA is often also deeply entangled with the compiler internals, which makes it difficult to reuse existing optimizations with other compilers or implement new optimizations quickly and it is complicated to implemented, as seen in LLVM: "simple peephole optimizations in the LLVM instcombine pass contain approximately 30000 lines of complex C++ code, despite the transformations being simple" [@slr-11]  \
+Another problem with DFA is the difficulty to implement optimizations with it, which is explained in the works by Ramsey et al. and Ginsbach et al. #cgy[@y-ramsey_hoopl_2010 @y-ginsbach_candl_2018]. DFA is often also deeply entangled with the compiler internals, which makes it difficult to reuse existing optimizations with other compilers or implement new optimizations quickly and it is complicated to implemented, as seen in LLVM: "simple peephole optimizations in the LLVM instcombine pass contain approximately 30000 lines of complex C++ code, despite the transformations being simple" @y-ginsbach_candl_2018 \ // TODO fix cite
-One solutions to this problem is described in [@slr-3] by implementing a library in Haskell which performs the dataflow analysis and provides an interface, which "is made possible by sophisticated aspects of Haskell’s type system, such as higher-rank polymorphism, GADTs, and type functions" [@slr-3], to implement various optimizations, which also then can be reused for other compilers. The biggest drawback of this library is it's limited to compilers implemented in Haskell. \
+One solutions to this problem is described in the work by Ramsey, Dias and Peyton-Jones @y-ramsey_hoopl_2010 by implementing a library in Haskell which performs the dataflow analysis and provides an interface, which "is made possible by sophisticated aspects of Haskell's type system, such as higher-rank polymorphism, GADTs, and type functions" @y-ramsey_hoopl_2010, to implement various optimizations, which also then can be reused for other compilers. The biggest drawback of this library is it's limited to compilers implemented in Haskell. \
-[@slr-11] describes a domain specific language to implement LLVM optimization passes. This is done by a having a simple language for directly implementing the logic of the optimization, while a custom transpiler then converts it into a LLVM pass written in C++. Since the LLVM pass is implemented in a more generic way to fit this purpose, it leads to a moderate compile time increase. There is no formal verification done on the implemented optimization pass. Because of these disadvantages it is a great tool to quickly implement, test and iterate optimizations, but for a more permanent passes, hand-written C++ code should be used.
+Another approach is described by the work of Ginsbach, Crawford and O'Boyle @y-ginsbach_candl_2018 by creating a domain specific language to implement LLVM optimization passes. This is done by a having a simple language for directly implementing the logic of the optimization, while a custom transpiler then converts it into a LLVM pass written in C++. Since the LLVM pass is implemented in a more generic way to fit this purpose, it leads to a moderate compile time increase. There is no formal verification done on the implemented optimization pass. Because of these disadvantages it is a great tool to quickly implement, test and iterate optimizations, but for a more permanent passes, hand-written C++ code should be used.
 === Limitations
-DFA is hard to parallelize because variables are often dependant on other variables or function arguments. While it is possible to analyze multiple functions at the surface level, they still depend on the context of other functions calling it. As already mentioned [@slr-14] already shows how it is still possible to run parallel analysis while still waiting for the results of other threads. \
+DFA is hard to parallelize because variables are often dependant on other variables or function arguments. While it is possible to analyze multiple functions at the surface level, they still depend on the context of other functions calling it. As already mentioned in the work by Shi and Zhang @y-shi_pipelining_2020, it is still possible to run parallel analysis while still waiting for the results of other threads. \
-Global variables also make analysis more complicated since the can be accessed and modified by all functions and either need to be treated as an unknown value every time or all functions which work with this variable are analytically dependant on each other and should be locked at when checking the value of the variable. A similar problem exists for variables shared across threads, because the analysis has to look at all functions which could modify the variable. Alternatively the variable should be well synchronized so that only one thread can write it or multiple threads can read it, but not both options at the same time [@slr-5]. \
+Global variables also make analysis more complicated since the can be accessed and modified by all functions and either need to be treated as an unknown value every time or all functions which work with this variable are analytically dependant on each other and should be locked at when checking the value of the variable. A similar problem exists for variables shared across threads, because the analysis has to look at all functions which could modify the variable. Alternatively the variable should be well synchronized so that only one thread can write it or multiple threads can read it, but not both options at the same time as described in the work by Joisha, Schreiber, Banerjee, Boehm and Chakrabarti @y-joisha_technique_2011. \
 Another thing that complicates DFA in languages like C is the usage of pointers because they allow the program to modify all variables in unpredictable ways which thereby invalidates all facts and assumptions which were made up to that point about all variables. \
 Since inlining is required to perform rewrites, it can lead to bloating the executable and make it overly huge.
 == RQ2: Usage of dataflow analysis in current compilers
 The Glasgow Haskell Compiler (GHC), LLVM, and GCC are good examples for compilers which already extensively use DFA to implement optimizations.
-These optimizations include common sub-expression elimination [@slr-1, @slr-7, @slr-13], copy propagation [@slr-5, @slr-7], constant propagation [@slr-1], conditional branch elimination [@slr-2] and dead code elimination [@slr-13].
+These optimizations include common sub-expression elimination #cgy[@y-kildall_unified_1973 @y-tang_summary-based_2012 @y-reissmann_rvsdg_2020], copy propagation #cgy[@y-joisha_technique_2011 @y-tang_summary-based_2012], constant propagation @y-kildall_unified_1973, conditional branch elimination @y-rastislav_bodik_interprocedural_1997 and dead code elimination @y-reissmann_rvsdg_2020.
 // TODO rewrite
 = Conclusion <conclusion_c>
 Our findings show that DFA is already extensively used in current compilers and brings big advantages for runtime speed. The cost of this is a higher compilation duration, which makes it unsuitable for JIT compilation. Furthermore, DFA allows complex optimizations across branches and function boundaries which would not be possible with traditional straight-line optimizations. \
@@ -449,30 +458,38 @@ The adaptability of LLVM and the associated immediate representation makes it an
 #pagebreak(weak: true)
 #set heading(numbering: none)
-#bibliography("refs.bib", title: "References", style: "association-for-computing-machinery")
+#bibliographyx("refs.bib", prefix: "x-", title: "References", style: "association-for-computing-machinery")
 #let slr_bib_style = read("association-for-computing-machinery.csl", encoding: none)
 #bibliographyx("slr.bib", full: true, prefix: "y-", title: "Analyzed papers", style: slr_bib_style)
 #pagebreak(weak: true)
 #set heading(numbering: "A.a.a")
 #counter(heading).update(0)
 #{ // slr results table
-  set page(flipped: true, columns: 1, margin: 2em)
+  set page(flipped: true, columns: 1, margin: 1.75em)
  [= SLR Results]
  v(1em)
  counter(heading).update(0)
  set table(stroke: (x, _) => if x in (1, 4, 6) { (x: 2pt, y: 1pt) } else { 1pt })
  show heading: set text(weight: "regular")
  context {
    let slr_data = csv("pubs.csv")
    let slr_data = slr_data.slice(1)
    let slr_bib = get-bibliography("y-")
    let key_map = (:)
    for r in slr_bib.references {
      let k = "y-" + r.key
      let id = r.prefix.elem.children.at(0).text.text.find(regex("\d\d?"))
      let idx = slr_data.position(v => v.at(0) == id)
      slr_data.at(idx).at(0) = ref(label(k))
      key_map.insert(id, k)
    }
    table(
      columns: (auto, auto, auto, auto, auto, auto, 6em, 4.05em, auto, auto),
      inset: (x: 5pt, y: 3pt),
-    ..csv("pubs.csv")
+      ..slr_data.flatten()
      .map(v => {
        if v.at(0) != "ID" {
          let id = v.at(0).slice(1)
          v.at(0) = [#figure([P#id], kind: "slr", supplement: none) #label("slr-" + id)]
        }
        return v
      })
      .flatten()
    )
  }
 }
--- a/pubs.csv
+++ b/pubs.csv
@@ -1,16 +1,16 @@
 ID,D1,D2,D3,D4,D5,D6,D7,D8,D9
-P1,"Kildall, Gary A.",1973,A unified approach to global program optimization,Allows straight-line optimization techniques for branch structure,,General Techniques,None,"Constant Propagation, Common Subexpression Elimination, Register Optimization",
+1,"Aigner, Christoph; Barany, Gergö; Mössenböck, Hanspeter",2024,Lazy Sparse Conditional Constant Propagation in the Sea of Nodes,,Detecting all compile time constant is undecidable problem,GraalVM,Sea of Nodes / Graal IR,Lazy iteration to reduce portion of necessary graph,
-P2,Rastislav Bodik; Rajiv Gupta; Mary Lou Soffa,1997,Interprocedural conditional branch elimination,Reduction of instruction count,Exponential/Polynomial worst-case time complexity,ICC,C,"Conditional Branch Elimination, Elimination of correlated conditionals and operations",
+2,"Duboscq, Gilles; Stadler, Lukas; Würthinger, Thomas; Simon, Doug; Wimmer, Christian; Mössenböck, Hanspeter",2013,Graal IR: An Extensible Declarative Intermediate Representation,Easier optimization implementation with Graph-IR,,"GraalVM, uses P3",Java Bytecode,IR which is simple to run optimizations on,Not implemented: commutative edges on nodes for better congruent detection
-P3,"Ramsey, Norman; Dias, João; Peyton Jones, Simon",2010,"Hoopl: a modular, reusable library for dataflow analysis and transformation",Reusable library for DFA,"DFA typically entangled with compiler, Algorithms complicated and hard to understand","Library, used by GHC",Custom,"Interleaved analysis and rewriting, speculative rewriting, computing fixed points, dynamic fault isolation",Only usable from Haskell
+3,"Edvinsson, Marcus; Lowe, Welf",2010,A multi-threaded approach for data-flow analysis,Accuracy more important than speed since at compile time,"Low usability for JIT because of time consumption, DFA is computation intense, DFA often implemented sequentially",Custom,"SSA, Java","High speed-up for analysis, for benchmarks without benefit max loss of 13% speed",Only speed-up of 1.78 on 8 cores
-P4,"Edvinsson, Marcus; Lowe, Welf",2010,A multi-threaded approach for data-flow analysis,Accuracy more important than speed since at compile time,"Low usability for JIT because of time consumption, DFA is computation intense, DFA often implemented sequentially",Custom,"SSA, Java","High speed-up for analysis, for benchmarks without benefit max loss of 13% speed",Only speed-up of 1.78 on 8 cores
+4,"Edvinsson, Marcus; Lundberg, Jonas; Löwe, Welf",2011,Parallel points-to analysis for multi-core machines,Points-To Analysis analyses whole program,SSA nodes in Points-to SSA are sequentially dependent,Custom,"SSA, Java",Parallel Points-to Analysis,
-P5,"Joisha, Pramod G.; Schreiber, Robert S.; Banerjee, Prithviraj; Boehm, Hans J.; Chakrabarti, Dhruva R.",2011,A technique for the effective and automatic reuse of classical compiler optimizations on multithreaded code,,"Sequential transformations can not be applied to parallel code, Need to watch out for data races and data synchronization",GCC,C,"Bidirectional DFA across synchronizations in well-synchronized programs, Can reuse existing optimizations, Value numbering, Copy propagation",
+5,"Ginsbach, Philip; Crawford, Lewis; O'Boyle, Michael F. P.",2018,CAnDL: a domain specific language for compiler analysis,DSL for optimization implementation makes implementation simpler and iterations quicker,"Optimizations are hard to implement in LLVM, Simple peephole optimization is 30000 LOC",DSL to LLVM Pass,LLVM IR,,"Moderate compile time increase, no formal verification"
-P6,"Edvinsson, Marcus; Lundberg, Jonas; Löwe, Welf",2011,Parallel points-to analysis for multi-core machines,Points-To Analysis analyses whole program,SSA nodes in Points-to SSA are sequentially dependent,Custom,"SSA, Java",Parallel Points-to Analysis,
+6,"Joisha, Pramod G.; Schreiber, Robert S.; Banerjee, Prithviraj; Boehm, Hans J.; Chakrabarti, Dhruva R.",2011,A technique for the effective and automatic reuse of classical compiler optimizations on multithreaded code,,"Sequential transformations can not be applied to parallel code, Need to watch out for data races and data synchronization",GCC,C,"Bidirectional DFA across synchronizations in well-synchronized programs, Can reuse existing optimizations, Value numbering, Copy propagation",
-P7,"Tang, Xiaolong; Järvi, Jaakko",2012,Summary-based data-flow analysis that understands regular composite objects and iterators,,Hard to make assumptions about user-defined types,LLVM,LLVM IR,"Common Sub-expression elimination, Copy propagation, Equational reasoning",
+7,"Kildall, Gary A.",1973,A unified approach to global program optimization,Allows straight-line optimization techniques for branch structure,,General Techniques,None,"Constant Propagation, Common Subexpression Elimination, Register Optimization",
-P8,"Urban, Bernhard; Steinlechner, Harald",2013,Implementing a Java JIT compiler in Haskell: case study,,,Custom JIT,Java Bytecode,Liveness Analysis,
+8,"Pathade, Komal; Khedker, Uday P.",2019,Path sensitive MFP solutions in presence of intersecting infeasible control flow path segments,,Path insensitive solutions overapproximate data flow values,TCS Embedded Code Analyzer,C,"Reaching Definition, Def-Use Pairs, Unitialized Variables, 300% precision increase",100% analysis time increase
-P9,"Duboscq, Gilles; Stadler, Lukas; Würthinger, Thomas; Simon, Doug; Wimmer, Christian; Mössenböck, Hanspeter",2013,Graal IR: An Extensible Declarative Intermediate Representation,Easier optimization implementation with Graph-IR,,"GraalVM, uses P3",Java Bytecode,IR which is simple to run optimizations on,Not implemented: commutative edges on nodes for better congruent detection
+9,"Ramsey, Norman; Dias, João; Peyton Jones, Simon",2010,"Hoopl: a modular, reusable library for dataflow analysis and transformation",Reusable library for DFA,"DFA typically entangled with compiler, Algorithms complicated and hard to understand","Library, used by GHC",Custom,"Interleaved analysis and rewriting, speculative rewriting, computing fixed points, dynamic fault isolation",Only usable from Haskell
-P10,"Zaidi, Ali Mustafa; Greaves, David",2015,Value State Flow Graph: A Dataflow Compiler IR for Accelerating Control-Intensive Code in Spatial Hardware,Performance improvement through execution of dataflow graph,,Custom LLVM Backend,LLVM IR,,"Structs, Multidimensional-Arrays not supported"
+10,Rastislav Bodik; Rajiv Gupta; Mary Lou Soffa,1997,Interprocedural conditional branch elimination,Reduction of instruction count,Exponential/Polynomial worst-case time complexity,ICC,C,"Conditional Branch Elimination, Elimination of correlated conditionals and operations",
-P11,"Ginsbach, Philip; Crawford, Lewis; O'Boyle, Michael F. P.",2018,CAnDL: a domain specific language for compiler analysis,DSL for optimization implementation makes implementation simpler and iterations quicker,"Optimizations are hard to implement in LLVM, Simple peephole optimization is 30000 LOC",DSL to LLVM Pass,LLVM IR,,"Moderate compile time increase, no formal verification"
+11,"Reissmann, Nico; Meyer, Jan Christian; Bahmann, Helge; Själander, Magnus",2020,RVSDG: An Intermediate Representation for Optimizing Compilers,,Structures like loops not encoded in SSA,Custom,LLVM IR,"Common Node Elimination, Dead Node Elimination",
-P12,"Pathade, Komal; Khedker, Uday P.",2019,Path sensitive MFP solutions in presence of intersecting infeasible control flow path segments,,Path insensitive solutions overapproximate data flow values,TCS Embedded Code Analyzer,C,"Reaching Definition, Def-Use Pairs, Unitialized Variables, 300% precision increase",100% analysis time increase
+12,"Shi, Qingkai; Zhang, Charles",2020,Pipelining bottom-up data flow analysis,,Calling dependence limit parallelism of bottom-up DFA,Custom based on LLVM,LLVM IR,2x to 3x speedup by relaxing calling dependence,Inline assembly and c++ stl not modeled
-P13,"Reissmann, Nico; Meyer, Jan Christian; Bahmann, Helge; Själander, Magnus",2020,RVSDG: An Intermediate Representation for Optimizing Compilers,,Structures like loops not encoded in SSA,Custom,LLVM IR,"Common Node Elimination, Dead Node Elimination",
+13,"Tang, Xiaolong; Järvi, Jaakko",2012,Summary-based data-flow analysis that understands regular composite objects and iterators,,Hard to make assumptions about user-defined types,LLVM,LLVM IR,"Common Sub-expression elimination, Copy propagation, Equational reasoning",
-P14,"Shi, Qingkai; Zhang, Charles",2020,Pipelining bottom-up data flow analysis,,Calling dependence limit parallelism of bottom-up DFA,Custom based on LLVM,LLVM IR,2x to 3x speedup by relaxing calling dependence,Inline assembly and c++ stl not modeled
+14,"Urban, Bernhard; Steinlechner, Harald",2013,Implementing a Java JIT compiler in Haskell: case study,,,Custom JIT,Java Bytecode,Liveness Analysis,
-P15,"Aigner, Christoph; Barany, Gergö; Mössenböck, Hanspeter",2024,Lazy Sparse Conditional Constant Propagation in the Sea of Nodes,,Detecting all compile time constant is undecidable problem,GraalVM,Sea of Nodes / Graal IR,Lazy iteration to reduce portion of necessary graph,
+15,"Zaidi, Ali Mustafa; Greaves, David",2015,Value State Flow Graph: A Dataflow Compiler IR for Accelerating Control-Intensive Code in Spatial Hardware,Performance improvement through execution of dataflow graph,,Custom LLVM Backend,LLVM IR,,"Structs, Multidimensional-Arrays not supported"
--- a/slr.bib
+++ b/slr.bib
@@ -0,0 +1,223 @@
@inproceedings{kildall_unified_1973,
 	location = {Boston, Massachusetts},
 	title = {A unified approach to global program optimization},
 	url = {http://portal.acm.org/citation.cfm?doid=512927.512945},
 	doi = {10.1145/512927.512945},
 	abstract = {A technique is presented for global analysie of program structure in order to perform compile time optimization of object code generated for expressions. The global expression optimization presented includes constant propagation, common subexpression elimination, elimination of redundant register load operations, and live expression analysis. A general purpose program flow analysis algorithm is developed which depends upon the existence of an “optimizing function.” The algorithm is defined formally using a directed graph model of program flow structure, and is shown to be correct, Several optimizing functions are defined which, when used in conjunction with the flow analysis algorithm, provide the various forms of code optimization. The flow analysis algorithm is sufficiently general that additional functions can easily be defined for other forms of globa{\textasciitilde} cod: optimization.},
 	eventtitle = {the 1st annual {ACM} {SIGACT}-{SIGPLAN} symposium},
 	pages = {194--206},
 	booktitle = {Proceedings of the 1st annual {ACM} {SIGACT}-{SIGPLAN} symposium on Principles of programming languages  - {POPL} '73},
 	publisher = {{ACM} Press},
 	author = {Kildall, Gary A.},
 	urldate = {2025-05-31},
 	date = {1973},
 	langid = {english},
 }
@article{rastislav_bodik_interprocedural_1997,
 	title = {Interprocedural conditional branch elimination},
 	url = {https://doi.org/10.1145/258915.258929},
 	doi = {10.1145/258915.258929},
 	abstract = {The existence of statically detectable correlation among conditional branches enables their elimination, an optimization that has a number of benefits. This paper presents techniques to determine whether an interprocedural execution path leading to a conditional branch exists along which the branch outcome is known at compile time, and then to eliminate the branch along this path through code restructuring.},
 	author = {{Rastislav Bodik} and {Rajiv Gupta} and {Mary Lou Soffa}},
 	date = {1997},
 	langid = {english},
 }
@inproceedings{ramsey_hoopl_2010,
 	location = {New York, {NY}, {USA}},
 	title = {Hoopl: a modular, reusable library for dataflow analysis and transformation},
 	isbn = {978-1-4503-0252-4},
 	url = {https://doi.org/10.1145/1863523.1863539},
 	doi = {10.1145/1863523.1863539},
 	series = {Haskell '10},
 	abstract = {Dataflow analysis and transformation of control-flow graphs is pervasive in optimizing compilers, but it is typically entangled with the details of a particular compiler. We describe Hoopl, a reusable library that makes it unusually easy to define new analyses and transformations for any compiler written in Haskell. Hoopl's interface is modular and polymorphic, and it offers unusually strong static guarantees. The implementation encapsulates state-of-the-art algorithms (interleaved analysis and rewriting, dynamic error isolation), and it cleanly separates their tricky elements so that they can be understood independently.},
 	pages = {121--134},
 	booktitle = {Proceedings of the Third {ACM} Haskell Symposium on Haskell},
 	publisher = {Association for Computing Machinery},
 	author = {Ramsey, Norman and Dias, João and Peyton Jones, Simon},
 	date = {2010},
 	keywords = {dataflow},
 }
@inproceedings{edvinsson_multi-threaded_2010,
 	location = {Atlanta, {GA}},
 	title = {A multi-threaded approach for data-flow analysis},
 	isbn = {978-1-4244-6533-0 978-1-4244-6534-7},
 	url = {http://ieeexplore.ieee.org/document/5470818/},
 	doi = {10.1109/IPDPSW.2010.5470818},
 	abstract = {Program analysis supporting software development is often part of edit-compile-cycles, and precise program analysis is time consuming. With the availability of parallel processing power on desktop computers, parallelization is a way to speed up program analysis. This requires a parallel data-ﬂow analysis with sufﬁcient work for each processing unit. The present paper suggests such an approach for object-oriented programs analyzing the target methods of polymorphic calls in parallel. With carefully selected thresholds guaranteeing sufﬁcient work for the parallel threads and only little redundancy between them, this approach achieves a maximum speed-up of 5 (average 1.78) on 8 cores for the benchmark programs.},
 	eventtitle = {2010 {IEEE} International Symposium on Parallel \& Distributed Processing, Workshops and Phd Forum ({IPDPSW} 2010)},
 	pages = {1--8},
 	booktitle = {2010 {IEEE} International Symposium on Parallel \& Distributed Processing, Workshops and Phd Forum ({IPDPSW})},
 	publisher = {{IEEE}},
 	author = {Edvinsson, Marcus and Löwe, Welf},
 	urldate = {2025-05-31},
 	date = {2010-04},
 	langid = {english},
 }
@inproceedings{joisha_technique_2011,
 	location = {New York, {NY}, {USA}},
 	title = {A technique for the effective and automatic reuse of classical compiler optimizations on multithreaded code},
 	isbn = {978-1-4503-0490-0},
 	url = {https://doi.org/10.1145/1926385.1926457},
 	doi = {10.1145/1926385.1926457},
 	series = {{POPL} '11},
 	abstract = {A large body of data-flow analyses exists for analyzing and optimizing sequential code. Unfortunately, much of it cannot be directly applied on parallel code, for reasons of correctness. This paper presents a technique to automatically, aggressively, yet safely apply sequentially-sound data-flow transformations, without change, on shared-memory programs. The technique is founded on the notion of program references being "siloed" on certain control-flow paths. Intuitively, siloed references are free of interference from other threads within the confines of such paths. Data-flow transformations can, in general, be unblocked on siloed references.The solution has been implemented in a widely used compiler. Results on benchmarks from {SPLASH}-2 show that performance improvements of up to 41\% are possible, with an average improvement of 6\% across all the tested programs over all thread counts.},
 	pages = {623--636},
 	booktitle = {Proceedings of the 38th Annual {ACM} {SIGPLAN}-{SIGACT} Symposium on Principles of Programming Languages},
 	publisher = {Association for Computing Machinery},
 	author = {Joisha, Pramod G. and Schreiber, Robert S. and Banerjee, Prithviraj and Boehm, Hans J. and Chakrabarti, Dhruva R.},
 	date = {2011},
 	keywords = {data-flow analysis, parallel-program optimization},
 }
@inproceedings{edvinsson_parallel_2011,
 	location = {New York, {NY}, {USA}},
 	title = {Parallel points-to analysis for multi-core machines},
 	isbn = {978-1-4503-0241-8},
 	url = {https://doi.org/10.1145/1944862.1944872},
 	doi = {10.1145/1944862.1944872},
 	series = {{HiPEAC} '11},
 	abstract = {Static program analysis supporting software development is often part of edit-compile-cycles, and precise program analysis is time consuming. Points-to analysis is a data-flow-based static program analysis used to find object references in programs. Its applications include test case generation, compiler optimizations and program understanding, and more. Recent increases in processing power of desktop computers comes mainly from multiple cores. Parallel algorithms are vital for simultaneous use of multiple cores. An efficient parallel points-to analysis requires sufficient work for each processing unit.The present paper presents a parallelized points-to analysis of object-oriented programs. It exploits that (1) different target methods of polymorphic calls and (2) independent control-flow branches can be analyzed in parallel. Carefully selected thresholds guarantee that each parallel thread has sufficient work to do and that only little work is redundant with other threads. Our experiments show that this approach achieves a maximum speed-up of 4.43 on 8 cores for a benchmark suite of Java programs.},
 	pages = {45--54},
 	booktitle = {Proceedings of the 6th International Conference on High Performance and Embedded Architectures and Compilers},
 	publisher = {Association for Computing Machinery},
 	author = {Edvinsson, Marcus and Lundberg, Jonas and Löwe, Welf},
 	date = {2011},
 	keywords = {data flow analysis, program analysis, parallel algorithms, parallel processing},
 }
@article{tang_summary-based_2012,
 	title = {Summary-based data-flow analysis that understands regular composite objects and iterators},
 	volume = {12},
 	issn = {1559-6915},
 	url = {https://doi.org/10.1145/2432546.2432549},
 	doi = {10.1145/2432546.2432549},
 	abstract = {Today's industrial-strength compilers do not take advantage of the semantics of user-defined types and operations when analyzing code involving objects of user-defined types. We show that user-defined types that are both "regular" and "composite" (roughly corresponding to what is casually known as "value semantics") can, however, be analyzed efficiently and effectively. The notion of regularity comes from generic programming and C++. Programmers routinely rely on regularity when reasoning about generic code and manually performing (optimizing) code transformations and rewrites. Stepanov suggests that compilers, too, should take advantage of regularity to expand the opportunities for applying optimizing transformations. This paper exploits the properties of regular composite objects to produce concise procedure summaries for summary-based analyses, thus taking a step towards Stepanov's goal. In addition to regularity and compositeness, we also make our analysis aware of the prevalent "iterator" abstraction, which expands the applicability of our approach. We target the C++ language, and use the {LLVM} framework to implement the analysis.},
 	pages = {36--47},
 	number = {4},
 	journaltitle = {{SIGAPP} Appl. Comput. Rev.},
 	author = {Tang, Xiaolong and Järvi, Jaakko},
 	date = {2012-12},
 	keywords = {program analysis, C++, generic programming},
 }
@inproceedings{urban_implementing_2013,
 	location = {New York, {NY}, {USA}},
 	title = {Implementing a Java {JIT} compiler in Haskell: case study},
 	isbn = {978-1-4503-2111-2},
 	url = {https://doi.org/10.1145/2500828.2500849},
 	doi = {10.1145/2500828.2500849},
 	series = {{PPPJ} '13},
 	abstract = {We present a {JVM} prototype implemented in the purely-functional language Haskell. It exploits several features of the language, such as strong static typing to implement an intermediate representation, and abstraction mechanism to express machine code generation in the manner of a domain specific language.The compiler consists of (i) a pass to transform Java bytecode to a register-based intermediate representation, (ii) application of an existing data-flow analysis framework to our intermediate representation and (iii) machine code generation that targets the x86 architecture. The implementation follows a compile-only approach. To implement certain Java features efficiently, code patching is used.Various code samples demonstrate the elegance of our prototype. Results prove reasonable performance compared to real-world implementations.},
 	pages = {177--180},
 	booktitle = {Proceedings of the 2013 International Conference on Principles and Practices of Programming on the Java Platform: Virtual Machines, Languages, and Tools},
 	publisher = {Association for Computing Machinery},
 	author = {Urban, Bernhard and Steinlechner, Harald},
 	date = {2013},
 	keywords = {data-flow analysis, Java, intermediate representation, code generation, Haskell, virtual machine},
 }
@article{duboscq_graal_2013,
 	title = {Graal {IR}: An Extensible Declarative Intermediate Representation},
 	abstract = {We present an intermediate representation ({IR}) for a Java just in time ({JIT}) compiler written in Java. It is a graph-based {IR} that models both control-ﬂow and data-ﬂow dependencies between nodes. We show the framework in which we developed our {IR}. Much care has been taken to allow the programmer to focus on compiler optimization rather than {IR} bookkeeping. Edges between nodes are declared concisely using Java annotations, and common properties and functions on nodes are communicated to the framework by implementing interfaces. Building upon these declarations, the graph framework automatically implements a set of useful primitives that the programmer can use to implement optimizations.},
 	author = {Duboscq, Gilles and Stadler, Lukas and Würthinger, Thomas and Simon, Doug and Wimmer, Christian and Mössenböck, Hanspeter},
 	date = {2013},
 	langid = {english},
 }
@article{zaidi_value_2015,
 	title = {Value State Flow Graph: A Dataflow Compiler {IR} for Accelerating Control-Intensive Code in Spatial Hardware},
 	volume = {9},
 	issn = {1936-7406},
 	url = {https://doi.org/10.1145/2807702},
 	doi = {10.1145/2807702},
 	abstract = {Although custom (and reconfigurable) computing can provide orders-of-magnitude improvements in energy efficiency and performance for many numeric, data-parallel applications, performance on nonnumeric, sequential code is often worse than conventional superscalar processors. This work attempts to improve sequential performance in custom hardware by (a) switching from a statically scheduled to a dynamically scheduled (dataflow) execution model and (b) developing a new compiler {IR} for high-level synthesis—the value state flow graph ({VSFG})—that enables aggressive exposition of {ILP} even in the presence of complex control flow. Compared to existing control-data flow graph ({CDFG})-based {IRs}, the {VSFG} exposes more instruction-level parallelism from control-intensive sequential code by exploiting aggressive speculation, enabling control dependence analysis, as well as execution along multiple flows of control. This new {IR} is directly implemented as a static-dataflow graph in hardware by our prototype high-level synthesis tool chain and shows an average speedup of 1.13× over equivalent hardware generated using {LegUp}, an existing {CDFG}-based {HLS} tool. Furthermore, the {VSFG} allows us to further trade area and energy for performance through loop unrolling, increasing the average speedup to 1.55×, with a peak speedup of 4.05×. Our {VSFG}-based hardware approaches the sequential cycle counts of an Intel Nehalem Core i7 processor while consuming only 0.25× the energy of an in-order Altera Nios {IIf} processor.},
 	number = {2},
 	journaltitle = {{ACM} Trans. Reconfigurable Technol. Syst.},
 	author = {Zaidi, Ali Mustafa and Greaves, David},
 	date = {2015-12},
 	keywords = {compilers, high-level synthesis, reconfigurable computing, amdahl’s law, custom computing, Dark silicon, instruction level parallelism},
 }
@inproceedings{ginsbach_candl_2018,
 	location = {New York, {NY}, {USA}},
 	title = {{CAnDL}: a domain specific language for compiler analysis},
 	isbn = {978-1-4503-5644-2},
 	url = {https://doi.org/10.1145/3178372.3179515},
 	doi = {10.1145/3178372.3179515},
 	series = {{CC} '18},
 	abstract = {Optimizing compilers require sophisticated program analysis and transformations to exploit modern hardware. Implementing the appropriate analysis for a compiler optimization is a time consuming activity. For example, in {LLVM}, tens of thousands of lines of code are required to detect appropriate places to apply peephole optimizations. It is a barrier to the rapid prototyping and evaluation of new optimizations. In this paper we present the Compiler Analysis Description Language ({CAnDL}), a domain specific language for compiler analysis. {CAnDL} is a constraint based language that operates over {LLVM}'s intermediate representation. The compiler developer writes a {CAnDL} program, which is then compiled by the {CAnDL} compiler into a C++ {LLVM} pass. It provides a uniform manner in which to describe compiler analysis and can be applied to a range of compiler analysis problems, reducing code length and complexity. We implemented and evaluated {CAnDL} on a number of real world use cases: eliminating redundant operations; graphics code optimization; identifying static control flow regions. In all cases were we able to express the analysis more briefly than competing approaches.},
 	pages = {151--162},
 	booktitle = {Proceedings of the 27th International Conference on Compiler Construction},
 	publisher = {Association for Computing Machinery},
 	author = {Ginsbach, Philip and Crawford, Lewis and O'Boyle, Michael F. P.},
 	date = {2018},
 	keywords = {optimization, {LLVM}, constraint programming},
 }
@inproceedings{pathade_path_2019,
 	location = {New York, {NY}, {USA}},
 	title = {Path sensitive {MFP} solutions in presence of intersecting infeasible control flow path segments},
 	isbn = {978-1-4503-6277-1},
 	url = {https://doi.org/10.1145/3302516.3307349},
 	doi = {10.1145/3302516.3307349},
 	series = {{CC} 2019},
 	abstract = {Data flow analysis computes Maximum Fix Point ({MFP}) solution which represents an over approximation of the data reaching a program point along all control flow paths (cfps). Some of these cfps may be infeasible; meaning, the necessary pre-condition for execution of cfp is not satisfiable in any run of the program. Approximations that do not discern data along infeasible cfps may lead to imprecision, because they include spurious information. Recent methods progressively separate the data along feasible and infeasible prefixes of infeasible cfps to ignore data corresponding to prefix that is infeasible. A criteria called minimal infeasible path segment is used to identify the cluster of infeasible cfps which can be considered equivalent for maintaining separate data. Clustering is useful because it avoids the possibly exponential cost of keeping the data along each infeasible cfp separate. The recent clustering approach is imprecise in presence of shared edges between cfps from two different clusters. In this work, we formalize the interaction between clusters and provide a more general and effective criteria for clustering the infeasible cfps. Our experiments indicate up to 2-3 times increase in the precision over previous approach, with average 100\% increase in memory and time required for the analysis. This is possible because our empirical observation indicates that on average 70\% clusters overlap with other clusters.},
 	pages = {159--169},
 	booktitle = {Proceedings of the 28th International Conference on Compiler Construction},
 	publisher = {Association for Computing Machinery},
 	author = {Pathade, Komal and Khedker, Uday P.},
 	date = {2019},
 	keywords = {Data Flow Analysis, Compilers, Static Analysis, Infeasible Control Flow Paths, Program Analysis},
 }
@article{reissmann_rvsdg_2020,
 	title = {{RVSDG}: An Intermediate Representation for Optimizing Compilers},
 	volume = {19},
 	issn = {1539-9087},
 	url = {https://doi.org/10.1145/3391902},
 	doi = {10.1145/3391902},
 	abstract = {Intermediate Representations ({IRs}) are central to optimizing compilers as the way the program is represented may enhance or limit analyses and transformations. Suitable {IRs} focus on exposing the most relevant information and establish invariants that different compiler passes can rely on. While control-flow centric {IRs} appear to be a natural fit for imperative programming languages, analyses required by compilers have increasingly shifted to understand data dependencies and work at multiple abstraction layers at the same time. This is partially evidenced in recent developments such as the Multi-Level Intermediate Representation ({MLIR}) proposed by Google. However, rigorous use of data flow centric {IRs} in general purpose compilers has not been evaluated for feasibility and usability as previous works provide no practical implementations.We present the Regionalized Value State Dependence Graph ({RVSDG}) {IR} for optimizing compilers. The {RVSDG} is a data flow centric {IR} where nodes represent computations, edges represent computational dependencies, and regions capture the hierarchical structure of programs. It represents programs in demand-dependence form, implicitly supports structured control flow, and models entire programs within a single {IR}. We provide a complete specification of the {RVSDG}, construction and destruction methods, as well as exemplify its utility by presenting Dead Node and Common Node Elimination optimizations. We implemented a prototype compiler and evaluate it in terms of performance, code size, compilation time, and representational overhead. Our results indicate that the {RVSDG} can serve as a competitive {IR} in optimizing compilers while reducing complexity.},
 	number = {6},
 	journaltitle = {{ACM} Trans. Embed. Comput. Syst.},
 	author = {Reissmann, Nico and Meyer, Jan Christian and Bahmann, Helge and Själander, Magnus},
 	date = {2020-12},
 	keywords = {{LLVM}, intermediate representation, Regionalized value state dependence graph ({RVSDG})},
 }
@inproceedings{shi_pipelining_2020,
 	location = {Seoul South Korea},
 	title = {Pipelining bottom-up data flow analysis},
 	isbn = {978-1-4503-7121-6},
 	url = {https://dl.acm.org/doi/10.1145/3377811.3380425},
 	doi = {10.1145/3377811.3380425},
 	abstract = {Bottom-up program analysis has been traditionally easy to parallelize because functions without caller-callee relations can be analyzed independently. However, such function-level parallelism is significantly limited by the calling dependence - functions with caller-callee relations have to be analyzed sequentially because the analysis of a function depends on the analysis results, a.k.a., function summaries, of its callees. We observe that the calling dependence can be relaxed in many cases and, as a result, the parallelism can be improved. In this paper, we present Coyote, a framework of bottom-up data flow analysis, in which the analysis task of each function is elaborately partitioned into multiple sub-tasks to generate pipelineable function summaries. These sub-tasks are pipelined and run in parallel, even though the calling dependence exists. We formalize our idea under the {IFDS}/{IDE} framework and have implemented an application to checking null-dereference bugs and taint issues in C/C++ programs. We evaluate Coyote on a series of standard benchmark programs and open-source software systems, which demonstrates significant speedup over a conventional parallel design.},
 	eventtitle = {{ICSE} '20: 42nd International Conference on Software Engineering},
 	pages = {835--847},
 	booktitle = {Proceedings of the {ACM}/{IEEE} 42nd International Conference on Software Engineering},
 	publisher = {{ACM}},
 	author = {Shi, Qingkai and Zhang, Charles},
 	urldate = {2025-05-31},
 	date = {2020-06-27},
 	langid = {english},
 }
@inproceedings{aigner_lazy_2024,
 	location = {New York, {NY}, {USA}},
 	title = {Lazy Sparse Conditional Constant Propagation in the Sea of Nodes},
 	isbn = {979-8-4007-1118-3},
 	url = {https://doi.org/10.1145/3679007.3685059},
 	doi = {10.1145/3679007.3685059},
 	series = {{MPLR} 2024},
 	abstract = {Conditional constant propagation is a compiler optimization that detects and propagates constant values for expressions in the input program taking unreachable branches into account. It uses a data flow analysis that traverses the program’s control flow graph to discover instructions that produce constant values. In this paper we document our work to adapt conditional constant propagation to the Sea of Nodes program representation of {GraalVM}. In the Sea of Nodes, the program is represented as a graph in which most nodes ‘float’ and are only restricted by data flow edges. Classical data flow analysis is not possible in this setting because most operations are not ordered and not assigned to basic blocks. We present a novel approach to data flow analysis optimized for the Sea of Nodes. The analysis starts from known constant nodes in the graph and propagates information directly along data flow edges. Most nodes in the graph can never contribute new constants and are therefore never visited, a property we call lazy iteration. Dependences on control flow are taken into account by evaluating {SSA} φ nodes in a particular order according to a carefully defined priority metric. Our analysis is implemented in the {GraalVM} compiler. Experiments on the Renaissance benchmark suite show that lazy iteration only visits 20.5 \% of all nodes in the graph. With the constants and unreachable branches found by our analysis, and previously undetected by the {GraalVM} compiler, we achieve an average speedup of 1.4 \% over {GraalVM}’s optimized baseline.},
 	pages = {2--13},
 	booktitle = {Proceedings of the 21st {ACM} {SIGPLAN} International Conference on Managed Programming Languages and Runtimes},
 	publisher = {Association for Computing Machinery},
 	author = {Aigner, Christoph and Barany, Gergö and Mössenböck, Hanspeter},
 	date = {2024},
 	keywords = {data flow analysis, optimization, compilers, constant propagation, Sea of Nodes},
 }