例如,如果您正在跟踪一个不安全的对象 x(可能是一些不受信任的或潜在的恶意数据),程序中的一个步骤可能会改变它的值。因此,在 y = x + 1 这样的简单计算中,正常的数据流分析会突出使用 x,而不是 y。然而,由于 y 是从 x 派生的,它会受到不受信任或“污染”信息的影响,因此它也被污染了。分析从 x 到 y 的污点流称为污点跟踪。




这里以rwctf中who move my block这道题涉及的nbd-server的一个漏洞为例来简述如何使用污点分析。

首先从 accept 函数开始找起,它是整个 socket 连接的起点,通过它我们可以根据交叉引用找到处理连接的函数 handle_modern_connection

`static void   handle_modern_connection(GArray *const servers, const int sock, struct generic_conf *genconf)   {       [...]       net = socket_accept(sock);       if (net < 0)           return;              if (!dontfork) {           // 重要!:注意这里会 fork 出一个子进程来单独处理新连接           pid = spawn_child(&commsocket);           if (pid) {               if (pid > 0) {                   msg(LOG_INFO, "Spawned a child process");                   g_array_append_val(childsocks, commsocket);               }               if (pid < 0)                   msg(LOG_ERR, "Failed to spawn a child process");               close(net);               return;           }           /* Child just continues. */       }       [...]              // 连接协商       client = negotiate(net, servers, genconf);                 [...]                 msg(LOG_INFO, "Starting to serve");      	       // 开始处理       mainloop_threaded(client);       exit(EXIT_SUCCESS);   handler_err:       [...]   }`        

需要注意的是,默认情况下对于每个连接,server 都会 fork 一个新的子进程来单独处理。这个特性相当重要,因为我们可以利用这个特性来爆破 canary 和 PIE

该函数会调用 negotiate 函数,并创建结构体 CLIENT,将新连接的 fd 赋值给该 client,之后后续使用 socket_read(client, addr, len) 来从 client(即我们这边)读取数据。

`/**    * Do the initial negotiation.    *    * @param net The socket we're doing the negotiation over.    * @param servers The array of known servers.    * @param genconf the global options (needed for accessing TLS config data)    **/   CLIENT* negotiate(int net, GArray* servers, struct generic_conf *genconf) {   	uint16_t smallflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES;   	uint64_t magic;   	uint32_t cflags = 0;   	uint32_t opt;       // 创建并初始化 client 结构体   	CLIENT* client = g_new0(CLIENT, 1);       // 将 socket fd 赋给 cleint   	client->net = net;   	client->socket_read = socket_read_notls;   	client->socket_write = socket_write_notls;   	client->socket_closed = socket_closed_negotiate;      	assert(servers != NULL);   	socket_write(client, INIT_PASSWD, 8);   	magic = htonll(opts_magic);   	socket_write(client, &magic, sizeof(magic));      	smallflags = htons(smallflags);   	socket_write(client, &smallflags, sizeof(uint16_t));       // 从 client 读取数据   	socket_read(client, &cflags, sizeof(cflags));   	cflags = htonl(cflags);       [...]   }   `


QL source code

使用的QL如下,由于污点分析不会对未建模的函数进行进一步污点传播,所以这里通过覆盖isAdditionalTaintStep来手动构建函数参数到函数调用的额外边,另外一个需要注意的是我使用的是semmle.code.cpp.ir.dataflow.TaintTracking而不是文档里的semmle.code.cpp.dataflow.TaintTracking,前者是基于IR的新API,被建议使用,也广泛应用在codeql自己的cwe case里


`/**    * @kind path-problem    */      import DataFlow::PathGraph   import cpp   import semmle.code.cpp.ir.dataflow.TaintTracking      predicate htonlCallEdge(DataFlow::Node node1, DataFlow::Node node2) {     exists(FunctionCall fc |       // fc.getTarget().getName() = "htonl" and       node1.asExpr() = fc.getAnArgument() and       node2.asExpr() = fc     )   }      class MyDataFlowConfiguration extends TaintTracking::Configuration {     MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" }        override predicate isSource(DataFlow::Node source) {       exists(FunctionCall fc | fc.getArgument(1) = source.asExpr() |         fc.getTarget().hasGlobalName("socket_read")       )     }        override predicate isSink(DataFlow::Node sink) {       // sink.asExpr().getLocation().toString().matches("%nbd-server%") and       // sink.asExpr() instanceof BinaryArithmeticOperation       exists(FunctionCall fc | fc.getArgument(2) = sink.asExpr() |         fc.getTarget().hasGlobalName("socket_read")       )     }        override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {       htonlCallEdge(node1, node2)     }   }      from MyDataFlowConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink   where config.hasFlowPath(source, sink)   select sink.getNode(), source, sink, ""   `



`static bool handle_info(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {   	uint32_t namelen, len;   	char *name;   	int i;   	SERVER *server = NULL;   	[...]   	char buf[1024];   	[...]          // 1. 从远程读入 len   	socket_read(client, &len, sizeof(len));   	len = htonl(len);       // 2. 从远程读入 namelen   	socket_read(client, &namelen, sizeof(namelen));   	namelen = htonl(namelen);       // 3. 进入 if 分支   	if(namelen > (len - 6)) {   		send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "An OPT_INFO request cannot be smaller than the length of the name + 6");           // 4. 从 client 读入数据,由于 len 可控,因此可以造成栈溢出   		socket_read(client, buf, len - sizeof(namelen));   	}   	if(namelen > 0) {   		name = malloc(namelen + 1);   		name[namelen] = 0;   		socket_read(client, name, namelen);   	} else {   		name = strdup("");   	}       [...]   }   `




`abstract class Configuration extends DataFlow::Configuration{     ...     predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }        final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {       this.isAdditionalTaintStep(node1, node2) or       defaultAdditionalTaintStep(node1, node2)     }   ...   predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) {     localAdditionalTaintStep(src, sink)   }   ...   cached   predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {     operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction())     or     instructionToOperandTaintStep(nodeFrom.asInstruction(), nodeTo.asOperand())   }   `


operandToInstructionTaintStep 用于把污点从参数流向指令返回值,这里做了许多连边处理:

  • 首先是运算指令的参数连边至整个运算指令:
``// Taint can flow through expressions that alter the value but preserve     // more than one bit of it _or_ expressions that follow data through     // pointer indirections.     instrTo.getAnOperand() = opFrom and     (         instrTo instanceof ArithmeticInstruction         or         instrTo instanceof BitwiseInstruction         or         instrTo instanceof PointerArithmeticInstruction         or         // The `CopyInstruction` case is also present in non-taint data flow, but         // that uses `getDef` rather than `getAnyDef`. For taint, we want flow         // from a definition of `myStruct` to a `myStruct.myField` expression.         instrTo instanceof CopyInstruction     )   ``


例如如果我们污点到了len + 1len,那么它将把污点从len传播到len + 1这个AddExpr中。

  • 其次是一元运算指令,这里排除了字段取地址指令
  ``// Unary instructions tend to preserve enough information in practice that we     // want taint to flow through.     // The exception is `FieldAddressInstruction`. Together with the rules below for     // `LoadInstruction`s and `ChiInstruction`s, flow through `FieldAddressInstruction`     // could cause flow into one field to come out an unrelated field.     // This would happen across function boundaries, where the IR would not be able to     // match loads to stores.     instrTo.(UnaryInstruction).getUnaryOperand() = opFrom and     (       not instrTo instanceof FieldAddressInstruction       or       instrTo.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union     )``

排除字段取地址指令的原因正如注释所说,流过 FieldAddressInstruction 可能会导致污点流从某个字段流入,从另一个不相关的字段流出。

  • 此外是为其他已经建模好的函数进行污点传递,其中污点从 callInput 传播到 callOutput。
`modeledTaintStep(opFrom, instrTo)   `

污点分析库会额外对库函数建模,对很多非常常用的函数建立额外边。这种建模是通过派生 TaintFunction 类,重写 hasTaintFlow 函数来实现的。我们可以全局搜索 TaintFunction 字符串,找到所有建模好的函数。以下是其中某个函数的建模实现:

``/**    * A function that is pure, that is, its evaluation is guaranteed to be    * side-effect free. Excludes functions modeled by `PureStrFunction` and `PureMemFunction`.    */   private class PureFunction extends TaintFunction, SideEffectFunction {     PureFunction() { this.hasGlobalOrStdOrBslName(["abs", "labs"]) }        override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {       exists(ParameterIndex i |         input.isParameter(i) and         exists(this.getParameter(i))       ) and       output.isReturnValue()     }        override predicate hasOnlySpecificReadSideEffects() { any() }        override predicate hasOnlySpecificWriteSideEffects() { any() }   }   ``

污点分析库对函数 abs 进行建模,重写 hasTaintFlow函数,将该函数的输入参数与函数的返回值相连。这样,如果该函数的参数被污染,那么该函数的返回值也将被视为污染。

数据流分析库同样会对一些库函数进行建模,但不同的是,所建模函数的数量并没有污点分析那么多,同时连接额外边的侧重点也不一样,以 gets 函数为例,以下是它的建模实现:

``/**    * The standard functions `gets` and `fgets`.    */   private class GetsFunction extends DataFlowFunction, TaintFunction, ArrayFunction, AliasFunction,     SideEffectFunction, RemoteFlowSourceFunction {     GetsFunction() {       // gets(str)       // fgets(str, num, stream)       // fgetws(wstr, num, stream)       this.hasGlobalOrStdOrBslName(["gets", "fgets", "fgetws"])     }        override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {       input.isParameter(0) and       output.isReturnValue()     }        override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {       input.isParameter(2) and       output.isParameterDeref(0)     }        override predicate parameterNeverEscapes(int index) { index = 2 }        override predicate parameterEscapesOnlyViaReturn(int index) { index = 0 }        override predicate parameterIsAlwaysReturned(int index) { index = 0 }        override predicate hasOnlySpecificReadSideEffects() { any() }        override predicate hasOnlySpecificWriteSideEffects() { any() }        override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {       i = 0 and       buffer = true and       mustWrite = true     }        override predicate hasRemoteFlowSource(FunctionOutput output, string description) {       output.isParameterDeref(0) and       description = "String read by " + this.getName()     }        override predicate hasArrayWithVariableSize(int bufParam, int countParam) {       not this.hasName("gets") and       bufParam = 0 and       countParam = 1     }        override predicate hasArrayWithUnknownSize(int bufParam) {       this.hasName("gets") and       bufParam = 0     }        override predicate hasArrayOutput(int bufParam) { bufParam = 0 }   }      ``

注意到 hasDataFlow 的实现是将传入的第一个 buf 参数与返回值连接(buf参数的值会影响到 gets 的返回值)。而  hasTaintFlow 是将 fgets 等的数据来源与 buf 连接(数据来源会污染 buf 中的数据)。

  • 除此之外还涉及到ReadSideEffectInstruction/InitializeIndirectionInstruction等IR进行了额外的连边,但是笔者暂未找到合适的codeql IR文档,留待后文,但我初步推测应该和内存初始化和指针解引用等都有关系。




此外根据我在做chrome QL审计的经验,可以参考Chrome Library来补一些拷贝构造函数,智能指针,虚函数调用,以及c++容器相关的边。


