Commit Diff


commit - 8ab8d656c00c8520c892cbb3357725ddb0973a4e
commit + 44401529a2253cbf00583a3680ae01f0cb74693c
blob - /dev/null
blob + f951843e1c2840f9e14aa798260ef0f0ba6361be (mode 644)
--- /dev/null
+++ changelogs/unreleased/gh-7562-fiber_join.md
@@ -0,0 +1,6 @@
+## bugfix/core
+
+* Fixed a possible inconsistent state entering if fibers are joined incorrectly.
+  Now the `fiber_set_joinable` function panics if the fiber is dead or joined
+  already. The `fiber_join` and `fiber_join_timeout` functions now panic on a
+  double join if it is possible to detect it (gh-7562).
blob - 362afc06e8c78d9128fa817e02968c042735f9a9
blob + 5fa2be21265b7ac46d5d1ce7f441fa17f1a04a83
--- src/lib/core/fiber.c
+++ src/lib/core/fiber.c
@@ -664,6 +664,56 @@ fiber_set_joinable(struct fiber *fiber, bool yesno)
 {
 	if (fiber == NULL)
 		fiber = fiber();
+
+	/*
+	 * The C fiber API does not allow to report any error to the caller. At
+	 * the same time using the function in some conditions is unsafe. So in
+	 * case if such a condition is met the function panics.
+	 *
+	 * Here're the conditions the function may be called in:
+	 * 1. The fiber is not joinable and hasn't finished yet. We're good to
+	 *    change the fiber's joinability.
+	 * 2. The fiber is not joinable and is finished. That means the fiber
+	 *    is recycled already, so this is a use after free. This case is
+	 *    impossible from the Lua world, but can be done with C API, so it
+	 *    is to be prohibited.
+	 * 3. The fiber is joinable, not finished and not joined. We're good to
+	 *    change its joinability.
+	 * 4. The fiber is joinable, not finished, but joined already. Making
+	 *    the fiber non-joinable will lead to a double free: one in the
+	 *    fiber_loop, the second one in the active fiber_join_timeout.
+	 *    Making it joinable is a sign of attempt to join it later, so we
+	 *    can expect the second join in the future - detect it now, while
+	 *    we can do it, because in the future the fiber struct may be
+	 *    reused and it will be much more difficult to find the root cause
+	 *    of the double join (or we won't be able to detect it at all).
+	 * 5. The fiber is joinable, finished and not joined. The fiber should
+	 *    be joined in order to be recycled, so making it non-joinable can
+	 *    lead to a fiber leak and to be prohibited. The point about making
+	 *    it joinable from the statement #4 is applied here too.
+	 * 6. The fiber is joinable, finished and joined. In order to avoid
+	 *    introducing a new fiber flag, a joined fiber becomes non-joinable
+	 *    on death (see the fiber_join_timeout implementation), so this
+	 *    condition transforms into #2. And it's a use after free too.
+	 *
+	 * Simplifying:
+	 *   OK:
+	 *     1: !is_joinable && !is_dead
+	 *     3: is_joinable && !is_dead && !is_joined
+	 *   NOT OK:
+	 *     2: !is_joinable && is_dead
+	 *     4: is_joinable && !is_dead && is_joined
+	 *     5: is_joinable && is_dead && !is_joined
+	 *     6. is_joinable && is_dead && is_joined
+	 *
+	 *  Othervice, OK: !is_dead && (!is_joinable || !is_joined)
+	 *  So NOT OK: is_dead || (is_joinable && is_joined)
+	 *  So NOT OK: is_dead || is_joined, because joined implies joinable.
+	 */
+	if ((fiber->flags & FIBER_IS_DEAD) != 0 ||
+	    (fiber->flags & FIBER_JOIN_BEEN_INVOKED) != 0)
+		panic("%s on a dead or joined fiber detected", __func__);
+
 	if (yesno == true)
 		fiber->flags |= FIBER_IS_JOINABLE;
 	else
@@ -731,6 +781,12 @@ fiber_join_timeout(struct fiber *fiber, double timeout
 {
 	if ((fiber->flags & FIBER_IS_JOINABLE) == 0)
 		panic("the fiber is not joinable");
+
+	if ((fiber->flags & FIBER_JOIN_BEEN_INVOKED) != 0)
+		panic("join of a joined fiber detected");
+
+	/* Prohibit joining the fiber and changing its joinability. */
+	fiber->flags |= FIBER_JOIN_BEEN_INVOKED;
 
 	if (!fiber_is_dead(fiber)) {
 		double deadline = fiber_clock() + timeout;
@@ -752,6 +808,11 @@ fiber_join_timeout(struct fiber *fiber, double timeout
 	}
 	assert((fiber->flags & FIBER_IS_RUNNING) == 0);
 	assert((fiber->flags & FIBER_IS_JOINABLE) != 0);
+	/*
+	 * This line is here just to make the following statement true: if
+	 * a fiber is dead and is not joinable, that means it's recycled.
+	 * So we don't have to introduce a new FIBER_IS_RECYCLED flag.
+	 */
 	fiber->flags &= ~FIBER_IS_JOINABLE;
 
 	/* Move exception to the caller */
blob - 1059af3483d4851dffa224c99aa7535b2823860f
blob + 104c21b1c212beec2bc54af3a8830659c0233763
--- src/lib/core/fiber.h
+++ src/lib/core/fiber.h
@@ -170,6 +170,11 @@ enum {
 	 * This flag idicates, if the fiber can be killed from the Lua world.
 	 */
 	FIBER_IS_SYSTEM         = 1 << 8,
+	/**
+	 * Someone has joined the fiber already. So this fiber can't be joined
+	 * once again nor can its joinability be changed.
+	 */
+	FIBER_JOIN_BEEN_INVOKED = 1 << 9,
 	FIBER_DEFAULT_FLAGS	= 0
 };
 
@@ -346,6 +351,11 @@ fiber_set_cancellable(bool yesno);
 
 /**
  * Set fiber to be joinable (false by default).
+ * The fiber must not be joined already nor dead.
+ *
+ * @pre the fiber is not dead (panic if not).
+ * @pre the fiber is not joined yet (panic if not).
+ *
  * \param fiber to (un)set the joinable property.
  *              If set to NULL, the current fiber is used.
  * \param yesno status to set
@@ -357,8 +367,10 @@ fiber_set_joinable(struct fiber *fiber, bool yesno);
  * Wait until the fiber is dead and then move its execution
  * status to the caller.
  * The fiber must not be detached (@sa fiber_set_joinable()).
- * @pre FIBER_IS_JOINABLE flag is set.
  *
+ * @pre FIBER_IS_JOINABLE flag is set (panic if not).
+ * @pre the fiber is not joined yet (panic if not).
+ *
  * \param f fiber to be woken up
  * \return fiber function ret code
  */
@@ -372,7 +384,9 @@ fiber_join(struct fiber *f);
  * Return fiber execution status to the caller or -1
  * if timeout exceeded and set diag.
  * The fiber must not be detached @sa fiber_set_joinable()
+ *
  * @pre FIBER_IS_JOINABLE flag is set.
+ * @pre the fiber is not joined yet (panic if not).
  *
  * \param f fiber to be woken up
  * \param timeout time during which we wait for the fiber completion
blob - 2e938d65841ec05fd95e9ce87dd2e9b64983cf23
blob + 103dad797c856d9589ec4d138f7234ebfa68db07
--- test/unit/fiber.cc
+++ test/unit/fiber.cc
@@ -189,6 +189,20 @@ fiber_join_test()
 	fiber_reschedule();
 	note("by this time the fiber should be dead already");
 	fiber_cancel(fiber);
+	fiber_join(fiber);
+
+	note("Can change the joinability in safe cases.");
+	fiber = fiber_new_xc("alive_not_joinable", noop_f);
+	/* Non-joinable not dead fiber.  */
+	fiber_set_joinable(fiber, true);
+	fail_unless((fiber->flags & FIBER_IS_JOINABLE) != 0);
+	/* Joinable not dead and not joined fiber. */
+	fiber_set_joinable(fiber, false);
+	fail_unless((fiber->flags & FIBER_IS_JOINABLE) == 0);
+	/* The same as the first case , just to be sure. */
+	fiber_set_joinable(fiber, true);
+	fail_unless((fiber->flags & FIBER_IS_JOINABLE) != 0);
+	fiber_wakeup(fiber);
 	fiber_join(fiber);
 
 	footer();
blob - 24b7883798160916743bc7a7a341b512d34f70d1
blob + 9cd48c333dcb523c4416ff519a74a5acf624f741
--- test/unit/fiber.result
+++ test/unit/fiber.result
@@ -12,6 +12,7 @@ OutOfMemory: Failed to allocate 42 bytes in allocator 
 # exception propagated
 # cancel dead has started
 # by this time the fiber should be dead already
+# Can change the joinability in safe cases.
 	*** fiber_join_test: done ***
 	*** fiber_stack_test ***
 # normal-stack fiber not crashed