Enquanto o github é indexado (graças ao limite de 5.000 solicitações por hora), o que é necessário para obter dados para o artigo, vamos falar sobre o teste de lexers e analisadores por enquanto. Vamos discutir os desejos para o processo de desenvolvimento de gramáticas, testando-as e controle de qualidade para não virar a criatura da foto.

, , - ++. , . , TDD, , 15 100% ( ). , .
,
++, , , , ++ , , . , , , .
, , :
class Example {
boolean flag;
}
, , ? , , , , , , . .
, ( , ), , , , , . TDD, , .
, - , , , , - .
, , , ? , .
: C# , , ? ( ) :
ID ? ID : ID
ID ? LITERAL : LITERAL
ID? ? ANY : ANY
! C# , value is MyType ? var1 : var2, , ? . Antlr . , . , , , , . , 1 100 1000 0.001 ?
, , . , , . ++ , - . , .
- , , json, , .
{
"tokens" : [ {
"tokenType" : "PACKAGE",
"statement" : "1:0[7]"
}, {
"tokenType" : "Identifier",
"statement" : "1:8[7]",
"text" : "example"
}, {
"tokenType" : "SEMI",
"statement" : "1:15[1]"
}, {
"tokenType" : "CLASS",
"statement" : "3:0[5]"
}, {
"tokenType" : "Identifier",
"statement" : "3:6[7]",
"text" : "Example"
}, {
"tokenType" : "FoldBlock",
"statement" : "3:14[21]",
"children" : {
"tokens" : [ {
"tokenType" : "LBRACE",
"statement" : "3:14[1]"
}, {
"tokenType" : "BOOLEAN",
"statement" : "4:4[7]"
}, {
"tokenType" : "Identifier",
"statement" : "4:12[4]",
"text" : "flag"
}, {
"tokenType" : "SEMI",
"statement" : "4:16[1]"
}, {
"tokenType" : "RBRACE",
"statement" : "5:0[1]"
}, {
"tokenType" : "EOF",
"statement" : "5:1[3]"
} ]
}
}, {
"tokenType" : "EOF",
"statement" : "6:0[5]"
} ]
}
, , - .
, , , - , - , . ParserService, , , , ( ).
, -:
def testLexer(suite: Suite): Unit = {
implicit val ps: ParserService = ParserService(suite.language)
for (fi <- suite.fileInfos) {
assertFile(fi.file)
val lexerOutput: LexerOutput = readFromFile[LexerOutput](testConfig(LexerSuiteDir, suite, fi.file))
val lexer = ps.newLexer(asVirtualFile(fi))
val stream = new CommonTokenStream(lexer)
stream.fill()
val tokens = CollectionConverters.asScala(stream.getTokens)
assertTokens(lexer, lexerOutput, tokens)
}
}
private def assertTokens(lexer: AbstractRScanLexer, lexerOutput: LexerOutput, tokens: mutable.Buffer[Token]): Unit = {
val queue = new mutable.Queue[LexerOutputToken]()
queue ++= lexerOutput.tokens
for (t <- tokens) {
assertTrue(queue.nonEmpty, "Queue should not be empty")
val lot = queue.dequeue()
val tokenType = lexer.getVocabulary.getSymbolicName(t.getType)
assertEquals(lot.tokenType, tokenType, "Token types are not equal")
assertEquals(lot.statement, tokenStatement(t), "Token types are not equal")
if (lot.text != null) {
assertEquals(lot.text, t.getText, "Texts are not equal")
}
if (lot.children != null) {
assertTrue(t.isInstanceOf[RScanToken], "Must be RScanToken")
val rt = t.asInstanceOf[RScanToken]
assertTrue(rt.foldedTokens().nonEmpty, "Must have fold tokens")
assertTokens(lexer, lot.children, rt.foldedTokens().toBuffer)
} else {
assertFalse(t.isInstanceOf[RScanToken] && t.asInstanceOf[RScanToken].foldedTokens().nonEmpty, "No fold tokens allowed")
}
}
if (queue.nonEmpty && queue.head.tokenType.equals("EOF")) queue.dequeue()
assertTrue(queue.isEmpty, "Queue must be empty")
}
- , , , .
, :
class JavaParserTest extends AbstractJUnitAntlr4Test {
@ParameterizedTest(name = "[{index}] {0}")
@MethodSource(Array("testFiles"))
override def testLexer(suite: Suite): Unit = {
super.testLexer(suite)
}
@ParameterizedTest(name = "[{index}] {0}")
@MethodSource(Array("testFiles"))
override def testParser(suite: Suite): Unit = {
super.testParser(suite)
}
}
object JavaParserTest {
private val TestDataDirectory = "/test/java/unit"
def testFiles: java.util.stream.Stream[Arguments] = Antlr4TestUtils.filesFromResourceDirectory(JavaLanguage.ref, TestDataDirectory, getClass, Seq("java"))
}
/test/java/unit (suite), junit testLexer, - , . !
java -Xmx128m -Dfile.encoding=UTF-8 -classpath $CLASSPATH \
org.lastrix.rscan.test.ParserTestSuiteGenerator \
-l Java -d $TEST_PATH -e .java
./gradlew clean test
- . , AST. AST, , , , :
{
"languageName" : "Java",
"items" : [ {
"keyText" : "RAW_DECL_PACKAGE at field000.java[1:0-5:1]",
"children" : [ {
"keyText" : "NAME at field000.java[1:8-1:15]",
"children" : [ {
"keyText" : "UNRESOLVED_ID at field000.java[1:8-1:15]",
"specText" : "example"
} ]
}, {
"keyText" : "RAW_DECL_CLASS at field000.java[3:0-5:1]",
"children" : [ {
"keyText" : "NAME at field000.java[3:6-3:13]",
"children" : [ {
"keyText" : "UNRESOLVED_ID at field000.java[3:6-3:13]",
"specText" : "Example"
} ]
}, {
"keyText" : "MEMBERS at field000.java[3:14-5:1]",
"children" : [ {
"keyText" : "RAW_DECL_FIELD at field000.java[4:4-4:16]",
"children" : [ {
"keyText" : "RAW_TYPE at field000.java[4:4-4:11]",
"children" : [ {
"keyText" : "UNRESOLVED_ID at field000.java[4:4-4:11]",
"specText" : "boolean"
} ]
}, {
"keyText" : "RAW_DECL_VARIABLE at field000.java[4:12-4:16]",
"children" : [ {
"keyText" : "NAME at field000.java[4:12-4:16]",
"children" : [ {
"keyText" : "UNRESOLVED_ID at field000.java[4:12-4:16]",
"specText" : "flag"
} ]
} ]
} ]
} ]
} ]
} ]
} ]
}
, .
, -, , , :
def testParser(suite: Suite): Unit = {
implicit val ps: ParserService = ParserService(suite.language)
for (fi <- suite.fileInfos) {
assertFile(fi.file)
val parserOutput: ParserOutput = readFromFile[ParserOutput](testConfig(ParserSuiteDir, suite, fi.file))
val op = antlr4.parseFile(asVirtualFile(fi))
assertTrue(op.isInstanceOf[RLangOp], "Must be RLangOp")
assertEquals(parserOutput.languageName, op.asInstanceOf[RLangOp].language.name, "Languages are not equal")
assertOperations(op.key, op.children, parserOutput.items)
}
}
private def assertOperations(ownerKey: ROpKey, children: Seq[ROp], items: Seq[ParserOutputItem]): Unit = {
if (children.isEmpty) {
assertTrue(items == null || items.isEmpty, s"No items in operation: $ownerKey")
return
} else if (items == null) {
Assertions.fail(s"Missing child operations: $ownerKey")
}
val queue = new mutable.Queue[ParserOutputItem]()
queue ++= items
for (child <- children) {
assertTrue(queue.nonEmpty, s"Must not be empty at: ${child.key}")
val poi = queue.dequeue()
assertEquals(poi.keyText, child.key.toString, "Key text is not equal")
assertEquals(poi.specText, child.specText, "SpecText is not equal")
assertOperations(child.key, child.children, poi.children)
}
assertTrue(queue.isEmpty, s"Queue must be empty for: $ownerKey")
}
, CI/CD , ( ). , , ( ), - - , . . , 100 . .
, , , "". , ?
org.lastrix.rscan.test.ParserTestSuiteGenerator, . (), , , , , ( , ?), , , Intellij IDEA - , 4 . -, , . , ?
Java, . (478 ), (intel i7 9700k) 1s 45ms, 65% .
O artigo propõe uma abordagem para testar lexers e analisadores, que permite acelerar o desenvolvimento e abandonar a abordagem antiquada, quando era necessário analisar milhares de projetos enormes para verificar o trabalho da gramática a fim de detectar um pequeno bug. Escrever à mão uma grande quantidade de código para verificar o resultado do trabalho - tudo isso pode ser automatizado, exceto talvez para verificar o resultado da geração. Embora não seja possível abandonar completamente a opção de teste para testar com código grande, você pode reduzir a necessidade de usar essa abordagem. Um bug detectado em código grande pode ser facilmente reproduzido em código pequeno e, no futuro, esse problema pode ser controlado.