dfa-for-co-slr/slr.bib


@inproceedings{kildall_unified_1973,
	location = {Boston, Massachusetts},
	title = {A unified approach to global program optimization},
	url = {http://portal.acm.org/citation.cfm?doid=512927.512945},
	doi = {10.1145/512927.512945},
	abstract = {A technique is presented for global analysie of program structure in order to perform compile time optimization of object code generated for expressions. The global expression optimization presented includes constant propagation, common subexpression elimination, elimination of redundant register load operations, and live expression analysis. A general purpose program flow analysis algorithm is developed which depends upon the existence of an “optimizing function.” The algorithm is defined formally using a directed graph model of program flow structure, and is shown to be correct, Several optimizing functions are defined which, when used in conjunction with the flow analysis algorithm, provide the various forms of code optimization. The flow analysis algorithm is sufficiently general that additional functions can easily be defined for other forms of globa{\textasciitilde} cod: optimization.},
	eventtitle = {the 1st annual {ACM} {SIGACT}-{SIGPLAN} symposium},
	pages = {194--206},
	booktitle = {Proceedings of the 1st annual {ACM} {SIGACT}-{SIGPLAN} symposium on Principles of programming languages  - {POPL} '73},
	publisher = {{ACM} Press},
	author = {Kildall, Gary A.},
	urldate = {2025-05-31},
	date = {1973},
	langid = {english},
}

@article{rastislav_bodik_interprocedural_1997,
	title = {Interprocedural conditional branch elimination},
	url = {https://doi.org/10.1145/258915.258929},
	doi = {10.1145/258915.258929},
	abstract = {The existence of statically detectable correlation among conditional branches enables their elimination, an optimization that has a number of benefits. This paper presents techniques to determine whether an interprocedural execution path leading to a conditional branch exists along which the branch outcome is known at compile time, and then to eliminate the branch along this path through code restructuring.},
	author = {{Rastislav Bodik} and {Rajiv Gupta} and {Mary Lou Soffa}},
	date = {1997},
	langid = {english},
}

@inproceedings{ramsey_hoopl_2010,
	location = {New York, {NY}, {USA}},
	title = {Hoopl: a modular, reusable library for dataflow analysis and transformation},
	isbn = {978-1-4503-0252-4},
	url = {https://doi.org/10.1145/1863523.1863539},
	doi = {10.1145/1863523.1863539},
	series = {Haskell '10},
	abstract = {Dataflow analysis and transformation of control-flow graphs is pervasive in optimizing compilers, but it is typically entangled with the details of a particular compiler. We describe Hoopl, a reusable library that makes it unusually easy to define new analyses and transformations for any compiler written in Haskell. Hoopl's interface is modular and polymorphic, and it offers unusually strong static guarantees. The implementation encapsulates state-of-the-art algorithms (interleaved analysis and rewriting, dynamic error isolation), and it cleanly separates their tricky elements so that they can be understood independently.},
	pages = {121--134},
	booktitle = {Proceedings of the Third {ACM} Haskell Symposium on Haskell},
	publisher = {Association for Computing Machinery},
	author = {Ramsey, Norman and Dias, João and Peyton Jones, Simon},
	date = {2010},
	keywords = {dataflow},
}

@inproceedings{edvinsson_multi-threaded_2010,
	location = {Atlanta, {GA}},
	title = {A multi-threaded approach for data-flow analysis},
	isbn = {978-1-4244-6533-0 978-1-4244-6534-7},
	url = {http://ieeexplore.ieee.org/document/5470818/},
	doi = {10.1109/IPDPSW.2010.5470818},
	abstract = {Program analysis supporting software development is often part of edit-compile-cycles, and precise program analysis is time consuming. With the availability of parallel processing power on desktop computers, parallelization is a way to speed up program analysis. This requires a parallel data-ﬂow analysis with sufﬁcient work for each processing unit. The present paper suggests such an approach for object-oriented programs analyzing the target methods of polymorphic calls in parallel. With carefully selected thresholds guaranteeing sufﬁcient work for the parallel threads and only little redundancy between them, this approach achieves a maximum speed-up of 5 (average 1.78) on 8 cores for the benchmark programs.},
	eventtitle = {2010 {IEEE} International Symposium on Parallel \& Distributed Processing, Workshops and Phd Forum ({IPDPSW} 2010)},
	pages = {1--8},
	booktitle = {2010 {IEEE} International Symposium on Parallel \& Distributed Processing, Workshops and Phd Forum ({IPDPSW})},
	publisher = {{IEEE}},
	author = {Edvinsson, Marcus and Löwe, Welf},
	urldate = {2025-05-31},
	date = {2010-04},
	langid = {english},
}

@inproceedings{joisha_technique_2011,
	location = {New York, {NY}, {USA}},
	title = {A technique for the effective and automatic reuse of classical compiler optimizations on multithreaded code},
	isbn = {978-1-4503-0490-0},
	url = {https://doi.org/10.1145/1926385.1926457},
	doi = {10.1145/1926385.1926457},
	series = {{POPL} '11},
	abstract = {A large body of data-flow analyses exists for analyzing and optimizing sequential code. Unfortunately, much of it cannot be directly applied on parallel code, for reasons of correctness. This paper presents a technique to automatically, aggressively, yet safely apply sequentially-sound data-flow transformations, without change, on shared-memory programs. The technique is founded on the notion of program references being "siloed" on certain control-flow paths. Intuitively, siloed references are free of interference from other threads within the confines of such paths. Data-flow transformations can, in general, be unblocked on siloed references.The solution has been implemented in a widely used compiler. Results on benchmarks from {SPLASH}-2 show that performance improvements of up to 41\% are possible, with an average improvement of 6\% across all the tested programs over all thread counts.},
	pages = {623--636},
	booktitle = {Proceedings of the 38th Annual {ACM} {SIGPLAN}-{SIGACT} Symposium on Principles of Programming Languages},
	publisher = {Association for Computing Machinery},
	author = {Joisha, Pramod G. and Schreiber, Robert S. and Banerjee, Prithviraj and Boehm, Hans J. and Chakrabarti, Dhruva R.},
	date = {2011},
	keywords = {data-flow analysis, parallel-program optimization},
}

@inproceedings{edvinsson_parallel_2011,
	location = {New York, {NY}, {USA}},
	title = {Parallel points-to analysis for multi-core machines},
	isbn = {978-1-4503-0241-8},
	url = {https://doi.org/10.1145/1944862.1944872},
	doi = {10.1145/1944862.1944872},
	series = {{HiPEAC} '11},
	abstract = {Static program analysis supporting software development is often part of edit-compile-cycles, and precise program analysis is time consuming. Points-to analysis is a data-flow-based static program analysis used to find object references in programs. Its applications include test case generation, compiler optimizations and program understanding, and more. Recent increases in processing power of desktop computers comes mainly from multiple cores. Parallel algorithms are vital for simultaneous use of multiple cores. An efficient parallel points-to analysis requires sufficient work for each processing unit.The present paper presents a parallelized points-to analysis of object-oriented programs. It exploits that (1) different target methods of polymorphic calls and (2) independent control-flow branches can be analyzed in parallel. Carefully selected thresholds guarantee that each parallel thread has sufficient work to do and that only little work is redundant with other threads. Our experiments show that this approach achieves a maximum speed-up of 4.43 on 8 cores for a benchmark suite of Java programs.},
	pages = {45--54},
	booktitle = {Proceedings of the 6th International Conference on High Performance and Embedded Architectures and Compilers},
	publisher = {Association for Computing Machinery},
	author = {Edvinsson, Marcus and Lundberg, Jonas and Löwe, Welf},
	date = {2011},
	keywords = {data flow analysis, program analysis, parallel algorithms, parallel processing},
}

@article{tang_summary-based_2012,
	title = {Summary-based data-flow analysis that understands regular composite objects and iterators},
	volume = {12},
	issn = {1559-6915},
	url = {https://doi.org/10.1145/2432546.2432549},
	doi = {10.1145/2432546.2432549},
	abstract = {Today's industrial-strength compilers do not take advantage of the semantics of user-defined types and operations when analyzing code involving objects of user-defined types. We show that user-defined types that are both "regular" and "composite" (roughly corresponding to what is casually known as "value semantics") can, however, be analyzed efficiently and effectively. The notion of regularity comes from generic programming and C++. Programmers routinely rely on regularity when reasoning about generic code and manually performing (optimizing) code transformations and rewrites. Stepanov suggests that compilers, too, should take advantage of regularity to expand the opportunities for applying optimizing transformations. This paper exploits the properties of regular composite objects to produce concise procedure summaries for summary-based analyses, thus taking a step towards Stepanov's goal. In addition to regularity and compositeness, we also make our analysis aware of the prevalent "iterator" abstraction, which expands the applicability of our approach. We target the C++ language, and use the {LLVM} framework to implement the analysis.},
	pages = {36--47},
	number = {4},
	journaltitle = {{SIGAPP} Appl. Comput. Rev.},
	author = {Tang, Xiaolong and Järvi, Jaakko},
	date = {2012-12},
	keywords = {program analysis, C++, generic programming},
}

@inproceedings{urban_implementing_2013,
	location = {New York, {NY}, {USA}},
	title = {Implementing a Java {JIT} compiler in Haskell: case study},
	isbn = {978-1-4503-2111-2},
	url = {https://doi.org/10.1145/2500828.2500849},
	doi = {10.1145/2500828.2500849},
	series = {{PPPJ} '13},
	abstract = {We present a {JVM} prototype implemented in the purely-functional language Haskell. It exploits several features of the language, such as strong static typing to implement an intermediate representation, and abstraction mechanism to express machine code generation in the manner of a domain specific language.The compiler consists of (i) a pass to transform Java bytecode to a register-based intermediate representation, (ii) application of an existing data-flow analysis framework to our intermediate representation and (iii) machine code generation that targets the x86 architecture. The implementation follows a compile-only approach. To implement certain Java features efficiently, code patching is used.Various code samples demonstrate the elegance of our prototype. Results prove reasonable performance compared to real-world implementations.},
	pages = {177--180},
	booktitle = {Proceedings of the 2013 International Conference on Principles and Practices of Programming on the Java Platform: Virtual Machines, Languages, and Tools},
	publisher = {Association for Computing Machinery},
	author = {Urban, Bernhard and Steinlechner, Harald},
	date = {2013},
	keywords = {data-flow analysis, Java, intermediate representation, code generation, Haskell, virtual machine},
}

@article{duboscq_graal_2013,
	title = {Graal {IR}: An Extensible Declarative Intermediate Representation},
	abstract = {We present an intermediate representation ({IR}) for a Java just in time ({JIT}) compiler written in Java. It is a graph-based {IR} that models both control-ﬂow and data-ﬂow dependencies between nodes. We show the framework in which we developed our {IR}. Much care has been taken to allow the programmer to focus on compiler optimization rather than {IR} bookkeeping. Edges between nodes are declared concisely using Java annotations, and common properties and functions on nodes are communicated to the framework by implementing interfaces. Building upon these declarations, the graph framework automatically implements a set of useful primitives that the programmer can use to implement optimizations.},
	author = {Duboscq, Gilles and Stadler, Lukas and Würthinger, Thomas and Simon, Doug and Wimmer, Christian and Mössenböck, Hanspeter},
	date = {2013},
	langid = {english},
}

@article{zaidi_value_2015,
	title = {Value State Flow Graph: A Dataflow Compiler {IR} for Accelerating Control-Intensive Code in Spatial Hardware},
	volume = {9},
	issn = {1936-7406},
	url = {https://doi.org/10.1145/2807702},
	doi = {10.1145/2807702},
	abstract = {Although custom (and reconfigurable) computing can provide orders-of-magnitude improvements in energy efficiency and performance for many numeric, data-parallel applications, performance on nonnumeric, sequential code is often worse than conventional superscalar processors. This work attempts to improve sequential performance in custom hardware by (a) switching from a statically scheduled to a dynamically scheduled (dataflow) execution model and (b) developing a new compiler {IR} for high-level synthesis—the value state flow graph ({VSFG})—that enables aggressive exposition of {ILP} even in the presence of complex control flow. Compared to existing control-data flow graph ({CDFG})-based {IRs}, the {VSFG} exposes more instruction-level parallelism from control-intensive sequential code by exploiting aggressive speculation, enabling control dependence analysis, as well as execution along multiple flows of control. This new {IR} is directly implemented as a static-dataflow graph in hardware by our prototype high-level synthesis tool chain and shows an average speedup of 1.13× over equivalent hardware generated using {LegUp}, an existing {CDFG}-based {HLS} tool. Furthermore, the {VSFG} allows us to further trade area and energy for performance through loop unrolling, increasing the average speedup to 1.55×, with a peak speedup of 4.05×. Our {VSFG}-based hardware approaches the sequential cycle counts of an Intel Nehalem Core i7 processor while consuming only 0.25× the energy of an in-order Altera Nios {IIf} processor.},
	number = {2},
	journaltitle = {{ACM} Trans. Reconfigurable Technol. Syst.},
	author = {Zaidi, Ali Mustafa and Greaves, David},
	date = {2015-12},
	keywords = {compilers, high-level synthesis, reconfigurable computing, amdahl’s law, custom computing, Dark silicon, instruction level parallelism},
}

@inproceedings{ginsbach_candl_2018,
	location = {New York, {NY}, {USA}},
	title = {{CAnDL}: a domain specific language for compiler analysis},
	isbn = {978-1-4503-5644-2},
	url = {https://doi.org/10.1145/3178372.3179515},
	doi = {10.1145/3178372.3179515},
	series = {{CC} '18},
	abstract = {Optimizing compilers require sophisticated program analysis and transformations to exploit modern hardware. Implementing the appropriate analysis for a compiler optimization is a time consuming activity. For example, in {LLVM}, tens of thousands of lines of code are required to detect appropriate places to apply peephole optimizations. It is a barrier to the rapid prototyping and evaluation of new optimizations. In this paper we present the Compiler Analysis Description Language ({CAnDL}), a domain specific language for compiler analysis. {CAnDL} is a constraint based language that operates over {LLVM}'s intermediate representation. The compiler developer writes a {CAnDL} program, which is then compiled by the {CAnDL} compiler into a C++ {LLVM} pass. It provides a uniform manner in which to describe compiler analysis and can be applied to a range of compiler analysis problems, reducing code length and complexity. We implemented and evaluated {CAnDL} on a number of real world use cases: eliminating redundant operations; graphics code optimization; identifying static control flow regions. In all cases were we able to express the analysis more briefly than competing approaches.},
	pages = {151--162},
	booktitle = {Proceedings of the 27th International Conference on Compiler Construction},
	publisher = {Association for Computing Machinery},
	author = {Ginsbach, Philip and Crawford, Lewis and O'Boyle, Michael F. P.},
	date = {2018},
	keywords = {optimization, {LLVM}, constraint programming},
}

@inproceedings{pathade_path_2019,
	location = {New York, {NY}, {USA}},
	title = {Path sensitive {MFP} solutions in presence of intersecting infeasible control flow path segments},
	isbn = {978-1-4503-6277-1},
	url = {https://doi.org/10.1145/3302516.3307349},
	doi = {10.1145/3302516.3307349},
	series = {{CC} 2019},
	abstract = {Data flow analysis computes Maximum Fix Point ({MFP}) solution which represents an over approximation of the data reaching a program point along all control flow paths (cfps). Some of these cfps may be infeasible; meaning, the necessary pre-condition for execution of cfp is not satisfiable in any run of the program. Approximations that do not discern data along infeasible cfps may lead to imprecision, because they include spurious information. Recent methods progressively separate the data along feasible and infeasible prefixes of infeasible cfps to ignore data corresponding to prefix that is infeasible. A criteria called minimal infeasible path segment is used to identify the cluster of infeasible cfps which can be considered equivalent for maintaining separate data. Clustering is useful because it avoids the possibly exponential cost of keeping the data along each infeasible cfp separate. The recent clustering approach is imprecise in presence of shared edges between cfps from two different clusters. In this work, we formalize the interaction between clusters and provide a more general and effective criteria for clustering the infeasible cfps. Our experiments indicate up to 2-3 times increase in the precision over previous approach, with average 100\% increase in memory and time required for the analysis. This is possible because our empirical observation indicates that on average 70\% clusters overlap with other clusters.},
	pages = {159--169},
	booktitle = {Proceedings of the 28th International Conference on Compiler Construction},
	publisher = {Association for Computing Machinery},
	author = {Pathade, Komal and Khedker, Uday P.},
	date = {2019},
	keywords = {Data Flow Analysis, Compilers, Static Analysis, Infeasible Control Flow Paths, Program Analysis},
}

@article{reissmann_rvsdg_2020,
	title = {{RVSDG}: An Intermediate Representation for Optimizing Compilers},
	volume = {19},
	issn = {1539-9087},
	url = {https://doi.org/10.1145/3391902},
	doi = {10.1145/3391902},
	abstract = {Intermediate Representations ({IRs}) are central to optimizing compilers as the way the program is represented may enhance or limit analyses and transformations. Suitable {IRs} focus on exposing the most relevant information and establish invariants that different compiler passes can rely on. While control-flow centric {IRs} appear to be a natural fit for imperative programming languages, analyses required by compilers have increasingly shifted to understand data dependencies and work at multiple abstraction layers at the same time. This is partially evidenced in recent developments such as the Multi-Level Intermediate Representation ({MLIR}) proposed by Google. However, rigorous use of data flow centric {IRs} in general purpose compilers has not been evaluated for feasibility and usability as previous works provide no practical implementations.We present the Regionalized Value State Dependence Graph ({RVSDG}) {IR} for optimizing compilers. The {RVSDG} is a data flow centric {IR} where nodes represent computations, edges represent computational dependencies, and regions capture the hierarchical structure of programs. It represents programs in demand-dependence form, implicitly supports structured control flow, and models entire programs within a single {IR}. We provide a complete specification of the {RVSDG}, construction and destruction methods, as well as exemplify its utility by presenting Dead Node and Common Node Elimination optimizations. We implemented a prototype compiler and evaluate it in terms of performance, code size, compilation time, and representational overhead. Our results indicate that the {RVSDG} can serve as a competitive {IR} in optimizing compilers while reducing complexity.},
	number = {6},
	journaltitle = {{ACM} Trans. Embed. Comput. Syst.},
	author = {Reissmann, Nico and Meyer, Jan Christian and Bahmann, Helge and Själander, Magnus},
	date = {2020-12},
	keywords = {{LLVM}, intermediate representation, Regionalized value state dependence graph ({RVSDG})},
}

@inproceedings{shi_pipelining_2020,
	location = {Seoul South Korea},
	title = {Pipelining bottom-up data flow analysis},
	isbn = {978-1-4503-7121-6},
	url = {https://dl.acm.org/doi/10.1145/3377811.3380425},
	doi = {10.1145/3377811.3380425},
	abstract = {Bottom-up program analysis has been traditionally easy to parallelize because functions without caller-callee relations can be analyzed independently. However, such function-level parallelism is significantly limited by the calling dependence - functions with caller-callee relations have to be analyzed sequentially because the analysis of a function depends on the analysis results, a.k.a., function summaries, of its callees. We observe that the calling dependence can be relaxed in many cases and, as a result, the parallelism can be improved. In this paper, we present Coyote, a framework of bottom-up data flow analysis, in which the analysis task of each function is elaborately partitioned into multiple sub-tasks to generate pipelineable function summaries. These sub-tasks are pipelined and run in parallel, even though the calling dependence exists. We formalize our idea under the {IFDS}/{IDE} framework and have implemented an application to checking null-dereference bugs and taint issues in C/C++ programs. We evaluate Coyote on a series of standard benchmark programs and open-source software systems, which demonstrates significant speedup over a conventional parallel design.},
	eventtitle = {{ICSE} '20: 42nd International Conference on Software Engineering},
	pages = {835--847},
	booktitle = {Proceedings of the {ACM}/{IEEE} 42nd International Conference on Software Engineering},
	publisher = {{ACM}},
	author = {Shi, Qingkai and Zhang, Charles},
	urldate = {2025-05-31},
	date = {2020-06-27},
	langid = {english},
}

@inproceedings{aigner_lazy_2024,
	location = {New York, {NY}, {USA}},
	title = {Lazy Sparse Conditional Constant Propagation in the Sea of Nodes},
	isbn = {979-8-4007-1118-3},
	url = {https://doi.org/10.1145/3679007.3685059},
	doi = {10.1145/3679007.3685059},
	series = {{MPLR} 2024},
	abstract = {Conditional constant propagation is a compiler optimization that detects and propagates constant values for expressions in the input program taking unreachable branches into account. It uses a data flow analysis that traverses the program’s control flow graph to discover instructions that produce constant values. In this paper we document our work to adapt conditional constant propagation to the Sea of Nodes program representation of {GraalVM}. In the Sea of Nodes, the program is represented as a graph in which most nodes ‘float’ and are only restricted by data flow edges. Classical data flow analysis is not possible in this setting because most operations are not ordered and not assigned to basic blocks. We present a novel approach to data flow analysis optimized for the Sea of Nodes. The analysis starts from known constant nodes in the graph and propagates information directly along data flow edges. Most nodes in the graph can never contribute new constants and are therefore never visited, a property we call lazy iteration. Dependences on control flow are taken into account by evaluating {SSA} φ nodes in a particular order according to a carefully defined priority metric. Our analysis is implemented in the {GraalVM} compiler. Experiments on the Renaissance benchmark suite show that lazy iteration only visits 20.5 \% of all nodes in the graph. With the constants and unreachable branches found by our analysis, and previously undetected by the {GraalVM} compiler, we achieve an average speedup of 1.4 \% over {GraalVM}’s optimized baseline.},
	pages = {2--13},
	booktitle = {Proceedings of the 21st {ACM} {SIGPLAN} International Conference on Managed Programming Languages and Runtimes},
	publisher = {Association for Computing Machinery},
	author = {Aigner, Christoph and Barany, Gergö and Mössenböck, Hanspeter},
	date = {2024},
	keywords = {data flow analysis, optimization, compilers, constant propagation, Sea of Nodes},
}