贝塞尔曲线生成,Bresenham算法实现
/**
* Marlin 3D Printer Firmware
* Copyright (c) 2020 MarlinFirmware [https://github.com/MarlinFirmware/Marlin]
*
* Based on Sprinter and grbl.
* Copyright (c) 2011 Camiel Gubbels / Erik van der Zalm
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
/**
* stepper.cpp - A singleton object to execute motion plans using stepper motors
* Marlin Firmware
*
* Derived from Grbl
* Copyright (c) 2009-2011 Simen Svale Skogsrud
*
* Grbl is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Grbl is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Grbl. If not, see <https://www.gnu.org/licenses/>.
*/
/**
* Timer calculations informed by the 'RepRap cartesian firmware' by Zack Smith
* and Philipp Tiefenbacher.
*/
/**
* __________________________
* /| |\ _________________ ^
* / | | \ /| |\ |
* / | | \ / | | \ s
* / | | | | | \ p
* / | | | | | \ e
* +-----+------------------------+---+--+---------------+----+ e
* | BLOCK 1 | BLOCK 2 | d
*
* time ----->
*
* The trapezoid is the shape the speed curve over time. It starts at block->initial_rate, accelerates
* first block->accelerate_until step_events_completed, then keeps going at constant speed until
* step_events_completed reaches block->decelerate_after after which it decelerates until the trapezoid generator is reset.
* The slope of acceleration is calculated using v = u + at where t is the accumulated timer values of the steps so far.
*/
/**
* Marlin uses the Bresenham algorithm. For a detailed explanation of theory and
* method see https://www.cs.helsinki.fi/group/goa/mallinnus/lines/bresenh.html
*/
/**
* Jerk controlled movements planner added Apr 2018 by Eduardo José Tagle.
* Equations based on Synthethos TinyG2 sources, but the fixed-point
* implementation is new, as we are running the ISR with a variable period.
* Also implemented the Bézier velocity curve evaluation in ARM assembler,
* to avoid impacting ISR speed.
*/
#include "stepper.h"
Stepper stepper; // Singleton
#define BABYSTEPPING_EXTRA_DIR_WAIT
#ifdef __AVR__
#include "speed_lookuptable.h"
#endif
#include "endstops.h"
#include "planner.h"
#include "motion.h"
#include "temperature.h"
#include "../lcd/ultralcd.h"
#include "../gcode/queue.h"
#include "../sd/cardreader.h"
#include "../MarlinCore.h"
#include "../HAL/shared/Delay.h"
#if ENABLED(INTEGRATED_BABYSTEPPING)
#include "../feature/babystep.h"
#endif
#if MB(ALLIGATOR)
#include "../feature/dac/dac_dac084s085.h"
#endif
#if HAS_MOTOR_CURRENT_SPI
#include <SPI.h>
#endif
#if ENABLED(MIXING_EXTRUDER)
#include "../feature/mixing.h"
#endif
#if HAS_FILAMENT_RUNOUT_DISTANCE
#include "../feature/runout.h"
#endif
#if HAS_L64XX
#include "../libs/L64XX/L64XX_Marlin.h"
uint8_t L6470_buf[MAX_L64XX + 1]; // chip command sequence - element 0 not used
bool L64XX_OK_to_power_up = false; // flag to keep L64xx steppers powered down after a reset or power up
#endif
#if ENABLED(POWER_LOSS_RECOVERY)
#include "../feature/powerloss.h"
#endif
#if HAS_CUTTER
#include "../feature/spindle_laser.h"
#endif
// public:
#if EITHER(HAS_EXTRA_ENDSTOPS, Z_STEPPER_AUTO_ALIGN)
bool Stepper::separate_multi_axis = false;
#endif
#if HAS_MOTOR_CURRENT_SPI || HAS_MOTOR_CURRENT_PWM
bool Stepper::initialized; // = false
uint32_t Stepper::motor_current_setting[MOTOR_CURRENT_COUNT]; // Initialized by settings.load()
#if HAS_MOTOR_CURRENT_SPI
constexpr uint32_t Stepper::digipot_count[];
#endif
#endif
// private:
block_t* Stepper::current_block; // (= nullptr) A pointer to the block currently being traced
uint8_t Stepper::last_direction_bits, // = 0
Stepper::axis_did_move; // = 0
bool Stepper::abort_current_block;
#if DISABLED(MIXING_EXTRUDER) && HAS_MULTI_EXTRUDER
uint8_t Stepper::last_moved_extruder = 0xFF;
#endif
#if ENABLED(X_DUAL_ENDSTOPS)
bool Stepper::locked_X_motor = false, Stepper::locked_X2_motor = false;
#endif
#if ENABLED(Y_DUAL_ENDSTOPS)
bool Stepper::locked_Y_motor = false, Stepper::locked_Y2_motor = false;
#endif
#if EITHER(Z_MULTI_ENDSTOPS, Z_STEPPER_AUTO_ALIGN)
bool Stepper::locked_Z_motor = false, Stepper::locked_Z2_motor = false
#if NUM_Z_STEPPER_DRIVERS >= 3
, Stepper::locked_Z3_motor = false
#if NUM_Z_STEPPER_DRIVERS >= 4
, Stepper::locked_Z4_motor = false
#endif
#endif
;
#endif
uint32_t Stepper::acceleration_time, Stepper::deceleration_time;
uint8_t Stepper::steps_per_isr;
TERN(ADAPTIVE_STEP_SMOOTHING,,constexpr) uint8_t Stepper::oversampling_factor;
xyze_long_t Stepper::delta_error{
0};
xyze_ulong_t Stepper::advance_dividend{
0};
uint32_t Stepper::advance_divisor = 0,
Stepper::step_events_completed = 0, // The number of step events executed in the current block
Stepper::accelerate_until, // The count at which to stop accelerating
Stepper::decelerate_after, // The count at which to start decelerating
Stepper::step_event_count; // The total event count for the current block
#if EITHER(HAS_MULTI_EXTRUDER, MIXING_EXTRUDER)
uint8_t Stepper::stepper_extruder;
#else
constexpr uint8_t Stepper::stepper_extruder;
#endif
#if ENABLED(S_CURVE_ACCELERATION)
int32_t __attribute__((used)) Stepper::bezier_A __asm__("bezier_A"); // A coefficient in Bézier speed curve with alias for assembler
int32_t __attribute__((used)) Stepper::bezier_B __asm__("bezier_B"); // B coefficient in Bézier speed curve with alias for assembler
int32_t __attribute__((used)) Stepper::bezier_C __asm__("bezier_C"); // C coefficient in Bézier speed curve with alias for assembler
uint32_t __attribute__((used)) Stepper::bezier_F __asm__("bezier_F"); // F coefficient in Bézier speed curve with alias for assembler
uint32_t __attribute__((used)) Stepper::bezier_AV __asm__("bezier_AV"); // AV coefficient in Bézier speed curve with alias for assembler
#ifdef __AVR__
bool __attribute__((used)) Stepper::A_negative __asm__("A_negative"); // If A coefficient was negative
#endif
bool Stepper::bezier_2nd_half; // =false If Bézier curve has been initialized or not
#endif
#if ENABLED(LIN_ADVANCE)
uint32_t Stepper::nextAdvanceISR = LA_ADV_NEVER,
Stepper::LA_isr_rate = LA_ADV_NEVER;
uint16_t Stepper::LA_current_adv_steps = 0,
Stepper::LA_final_adv_steps,
Stepper::LA_max_adv_steps;
int8_t Stepper::LA_steps = 0;
bool Stepper::LA_use_advance_lead;
#endif // LIN_ADVANCE
#if ENABLED(INTEGRATED_BABYSTEPPING)
uint32_t Stepper::nextBabystepISR = BABYSTEP_NEVER;
#endif
#if ENABLED(DIRECT_STEPPING)
page_step_state_t Stepper::page_step_state;
#endif
int32_t Stepper::ticks_nominal = -1;
#if DISABLED(S_CURVE_ACCELERATION)
uint32_t Stepper::acc_step_rate; // needed for deceleration start point
#endif
xyz_long_t Stepper::endstops_trigsteps;
xyze_long_t Stepper::count_position{
0};
xyze_int8_t Stepper::count_direction{
0};
#if ENABLED(LASER_POWER_INLINE_TRAPEZOID)
Stepper::stepper_laser_t Stepper::laser_trap = {
.enabled = false,
.cur_power = 0,
.cruise_set = false,
#if DISABLED(LASER_POWER_INLINE_TRAPEZOID_CONT)
.last_step_count = 0,
.acc_step_count = 0
#else
.till_update = 0
#endif
};
#endif
#define DUAL_ENDSTOP_APPLY_STEP(A,V) \
if (separate_multi_axis) { \
if (A##_HOME_DIR < 0) { \
if (!(TEST(endstops.state(), A##_MIN) && count_direction[_AXIS(A)] < 0) && !locked_##A##_motor) A##_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##2_MIN) && count_direction[_AXIS(A)] < 0) && !locked_##A##2_motor) A##2_STEP_WRITE(V); \
} \
else { \
if (!(TEST(endstops.state(), A##_MAX) && count_direction[_AXIS(A)] > 0) && !locked_##A##_motor) A##_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##2_MAX) && count_direction[_AXIS(A)] > 0) && !locked_##A##2_motor) A##2_STEP_WRITE(V); \
} \
} \
else { \
A##_STEP_WRITE(V); \
A##2_STEP_WRITE(V); \
}
#define DUAL_SEPARATE_APPLY_STEP(A,V) \
if (separate_multi_axis) { \
if (!locked_##A##_motor) A##_STEP_WRITE(V); \
if (!locked_##A##2_motor) A##2_STEP_WRITE(V); \
} \
else { \
A##_STEP_WRITE(V); \
A##2_STEP_WRITE(V); \
}
#define TRIPLE_ENDSTOP_APPLY_STEP(A,V) \
if (separate_multi_axis) { \
if (A##_HOME_DIR < 0) { \
if (!(TEST(endstops.state(), A##_MIN) && count_direction[_AXIS(A)] < 0) && !locked_##A##_motor) A##_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##2_MIN) && count_direction[_AXIS(A)] < 0) && !locked_##A##2_motor) A##2_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##3_MIN) && count_direction[_AXIS(A)] < 0) && !locked_##A##3_motor) A##3_STEP_WRITE(V); \
} \
else { \
if (!(TEST(endstops.state(), A##_MAX) && count_direction[_AXIS(A)] > 0) && !locked_##A##_motor) A##_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##2_MAX) && count_direction[_AXIS(A)] > 0) && !locked_##A##2_motor) A##2_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##3_MAX) && count_direction[_AXIS(A)] > 0) && !locked_##A##3_motor) A##3_STEP_WRITE(V); \
} \
} \
else { \
A##_STEP_WRITE(V); \
A##2_STEP_WRITE(V); \
A##3_STEP_WRITE(V); \
}
#define TRIPLE_SEPARATE_APPLY_STEP(A,V) \
if (separate_multi_axis) { \
if (!locked_##A##_motor) A##_STEP_WRITE(V); \
if (!locked_##A##2_motor) A##2_STEP_WRITE(V); \
if (!locked_##A##3_motor) A##3_STEP_WRITE(V); \
} \
else { \
A##_STEP_WRITE(V); \
A##2_STEP_WRITE(V); \
A##3_STEP_WRITE(V); \
}
#define QUAD_ENDSTOP_APPLY_STEP(A,V) \
if (separate_multi_axis) { \
if (A##_HOME_DIR < 0) { \
if (!(TEST(endstops.state(), A##_MIN) && count_direction[_AXIS(A)] < 0) && !locked_##A##_motor) A##_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##2_MIN) && count_direction[_AXIS(A)] < 0) && !locked_##A##2_motor) A##2_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##3_MIN) && count_direction[_AXIS(A)] < 0) && !locked_##A##3_motor) A##3_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##4_MIN) && count_direction[_AXIS(A)] < 0) && !locked_##A##4_motor) A##4_STEP_WRITE(V); \
} \
else { \
if (!(TEST(endstops.state(), A##_MAX) && count_direction[_AXIS(A)] > 0) && !locked_##A##_motor) A##_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##2_MAX) && count_direction[_AXIS(A)] > 0) && !locked_##A##2_motor) A##2_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##3_MAX) && count_direction[_AXIS(A)] > 0) && !locked_##A##3_motor) A##3_STEP_WRITE(V); \
if (!(TEST(endstops.state(), A##4_MAX) && count_direction[_AXIS(A)] > 0) && !locked_##A##4_motor) A##4_STEP_WRITE(V); \
} \
} \
else { \
A##_STEP_WRITE(V); \
A##2_STEP_WRITE(V); \
A##3_STEP_WRITE(V); \
A##4_STEP_WRITE(V); \
}
#define QUAD_SEPARATE_APPLY_STEP(A,V) \
if (separate_multi_axis) { \
if (!locked_##A##_motor) A##_STEP_WRITE(V); \
if (!locked_##A##2_motor) A##2_STEP_WRITE(V); \
if (!locked_##A##3_motor) A##3_STEP_WRITE(V); \
if (!locked_##A##4_motor) A##4_STEP_WRITE(V); \
} \
else { \
A##_STEP_WRITE(V); \
A##2_STEP_WRITE(V); \
A##3_STEP_WRITE(V); \
A##4_STEP_WRITE(V); \
}
#if ENABLED(X_DUAL_STEPPER_DRIVERS)
#define X_APPLY_DIR(v,Q) do{ X_DIR_WRITE(v); X2_DIR_WRITE((v) != INVERT_X2_VS_X_DIR); }while(0)
#if ENABLED(X_DUAL_ENDSTOPS)
#define X_APPLY_STEP(v,Q) DUAL_ENDSTOP_APPLY_STEP(X,v)
#else
#define X_APPLY_STEP(v,Q) do{ X_STEP_WRITE(v); X2_STEP_WRITE(v); }while(0)
#endif
#elif ENABLED(DUAL_X_CARRIAGE)
#define X_APPLY_DIR(v,ALWAYS) do{ \
if (extruder_duplication_enabled || ALWAYS) { X_DIR_WRITE(v); X2_DIR_WRITE(mirrored_duplication_mode ? !(v) : v); } \
else if (last_moved_extruder) X2_DIR_WRITE(v); else X_DIR_WRITE(v); \
}while(0)
#define X_APPLY_STEP(v,ALWAYS) do{ \
if (extruder_duplication_enabled || ALWAYS) { X_STEP_WRITE(v); X2_STEP_WRITE(v); } \
else if (last_moved_extruder) X2_STEP_WRITE(v); else X_STEP_WRITE(v); \
}while(0)
#else
#define X_APPLY_DIR(v,Q) X_DIR_WRITE(v)
#define X_APPLY_STEP(v,Q) X_STEP_WRITE(v)
#endif
#if ENABLED(Y_DUAL_STEPPER_DRIVERS)
#define Y_APPLY_DIR(v,Q) do{ Y_DIR_WRITE(v); Y2_DIR_WRITE((v) != INVERT_Y2_VS_Y_DIR); }while(0)
#if ENABLED(Y_DUAL_ENDSTOPS)
#define Y_APPLY_STEP(v,Q) DUAL_ENDSTOP_APPLY_STEP(Y,v)
#else
#define Y_APPLY_STEP(v,Q) do{ Y_STEP_WRITE(v); Y2_STEP_WRITE(v); }while(0)
#endif
#else
#define Y_APPLY_DIR(v,Q) Y_DIR_WRITE(v)
#define Y_APPLY_STEP(v,Q) Y_STEP_WRITE(v)
#endif
#if NUM_Z_STEPPER_DRIVERS == 4
#define Z_APPLY_DIR(v,Q) do{ Z_DIR_WRITE(v); Z2_DIR_WRITE(v); Z3_DIR_WRITE(v); Z4_DIR_WRITE(v); }while(0)
#if ENABLED(Z_MULTI_ENDSTOPS)
#define Z_APPLY_STEP(v,Q) QUAD_ENDSTOP_APPLY_STEP(Z,v)
#elif ENABLED(Z_STEPPER_AUTO_ALIGN)
#define Z_APPLY_STEP(v,Q) QUAD_SEPARATE_APPLY_STEP(Z,v)
#else
#define Z_APPLY_STEP(v,Q) do{ Z_STEP_WRITE(v); Z2_STEP_WRITE(v); Z3_STEP_WRITE(v); Z4_STEP_WRITE(v); }while(0)
#endif
#elif NUM_Z_STEPPER_DRIVERS == 3
#define Z_APPLY_DIR(v,Q) do{ Z_DIR_WRITE(v); Z2_DIR_WRITE(v); Z3_DIR_WRITE(v); }while(0)
#if ENABLED(Z_MULTI_ENDSTOPS)
#define Z_APPLY_STEP(v,Q) TRIPLE_ENDSTOP_APPLY_STEP(Z,v)
#elif ENABLED(Z_STEPPER_AUTO_ALIGN)
#define Z_APPLY_STEP(v,Q) TRIPLE_SEPARATE_APPLY_STEP(Z,v)
#else
#define Z_APPLY_STEP(v,Q) do{ Z_STEP_WRITE(v); Z2_STEP_WRITE(v); Z3_STEP_WRITE(v); }while(0)
#endif
#elif NUM_Z_STEPPER_DRIVERS == 2
#define Z_APPLY_DIR(v,Q) do{ Z_DIR_WRITE(v); Z2_DIR_WRITE(v); }while(0)
#if ENABLED(Z_MULTI_ENDSTOPS)
#define Z_APPLY_STEP(v,Q) DUAL_ENDSTOP_APPLY_STEP(Z,v)
#elif ENABLED(Z_STEPPER_AUTO_ALIGN)
#define Z_APPLY_STEP(v,Q) DUAL_SEPARATE_APPLY_STEP(Z,v)
#else
#define Z_APPLY_STEP(v,Q) do{ Z_STEP_WRITE(v); Z2_STEP_WRITE(v); }while(0)
#endif
#else
#define Z_APPLY_DIR(v,Q) Z_DIR_WRITE(v)
#define Z_APPLY_STEP(v,Q) Z_STEP_WRITE(v)
#endif
#if DISABLED(MIXING_EXTRUDER)
#define E_APPLY_STEP(v,Q) E_STEP_WRITE(stepper_extruder, v)
#endif
#define CYCLES_TO_NS(CYC) (1000UL * (CYC) / ((F_CPU) / 1000000))
#define NS_PER_PULSE_TIMER_TICK (1000000000UL / (STEPPER_TIMER_RATE))
// Round up when converting from ns to timer ticks
#define NS_TO_PULSE_TIMER_TICKS(NS) (((NS) + (NS_PER_PULSE_TIMER_TICK) / 2) / (NS_PER_PULSE_TIMER_TICK))
#define TIMER_SETUP_NS (CYCLES_TO_NS(TIMER_READ_ADD_AND_STORE_CYCLES))
#define PULSE_HIGH_TICK_COUNT hal_timer_t(NS_TO_PULSE_TIMER_TICKS(_MIN_PULSE_HIGH_NS - _MIN(_MIN_PULSE_HIGH_NS, TIMER_SETUP_NS)))
#define PULSE_LOW_TICK_COUNT hal_timer_t(NS_TO_PULSE_TIMER_TICKS(_MIN_PULSE_LOW_NS - _MIN(_MIN_PULSE_LOW_NS, TIMER_SETUP_NS)))
#define USING_TIMED_PULSE() hal_timer_t start_pulse_count = 0
#define START_TIMED_PULSE(DIR) (start_pulse_count = HAL_timer_get_count(PULSE_TIMER_NUM))
#define AWAIT_TIMED_PULSE(DIR) while (PULSE_##DIR##_TICK_COUNT > HAL_timer_get_count(PULSE_TIMER_NUM) - start_pulse_count) { }
#define START_HIGH_PULSE() START_TIMED_PULSE(HIGH)
#define AWAIT_HIGH_PULSE() AWAIT_TIMED_PULSE(HIGH)
#define START_LOW_PULSE() START_TIMED_PULSE(LOW)
#define AWAIT_LOW_PULSE() AWAIT_TIMED_PULSE(LOW)
#if MINIMUM_STEPPER_PRE_DIR_DELAY > 0
#define DIR_WAIT_BEFORE() DELAY_NS(MINIMUM_STEPPER_PRE_DIR_DELAY)
#else
#define DIR_WAIT_BEFORE()
#endif
#if MINIMUM_STEPPER_POST_DIR_DELAY > 0
#define DIR_WAIT_AFTER() DELAY_NS(MINIMUM_STEPPER_POST_DIR_DELAY)
#else
#define DIR_WAIT_AFTER()
#endif
/**
* Set the stepper direction of each axis
*
* COREXY: X_AXIS=A_AXIS and Y_AXIS=B_AXIS
* COREXZ: X_AXIS=A_AXIS and Z_AXIS=C_AXIS
* COREYZ: Y_AXIS=B_AXIS and Z_AXIS=C_AXIS
*/
void Stepper::set_directions() {
DIR_WAIT_BEF ORE();
#define SET_STEP_DIR(A) \
if (motor_direction(_AXIS(A))) { \
A##_APPLY_DIR(INVERT_##A##_DIR, false); \
count_direction[_AXIS(A)] = -1; \
} \
else { \
A##_APPLY_DIR(!INVERT_##A##_DIR, false); \
count_direction[_AXIS(A)] = 1; \
}
#if HAS_X_DIR
SET_STEP_DIR(X); // A
#endif
#if HAS_Y_DIR
SET_STEP_DIR(Y); // B
#endif
#if HAS_Z_DIR
SET_STEP_DIR(Z); // C
#endif
#if DISABLED(LIN_ADVANCE)
#if ENABLED(MIXING_EXTRUDER)
// Because this is valid for the whole block we don't know
// what e-steppers will step. Likely all. Set all.
if (motor_direction(E_AXIS)) {
MIXER_STEPPER_LOOP(j) REV_E_DIR(j);
count_direction.e = -1;
}
else {
MIXER_STEPPER_LOOP(j) NORM_E_DIR(j);
count_direction.e = 1;
}
#else
if (motor_direction(E_AXIS)) {
REV_E_DIR(stepper_extruder);
count_direction.e = -1;
}
else {
NORM_E_DIR(stepper_extruder);
count_direction.e = 1;
}
#endif
#endif // !LIN_ADVANCE
#if HAS_L64XX
if (L64XX_OK_to_power_up) {
// OK to send the direction commands (which powers up the L64XX steppers)
if (L64xxManager.spi_active) {
L64xxManager.spi_abort = true; // Interrupted SPI transfer needs to shut down gracefully
for (uint8_t j = 1; j <= L64XX::chain[0]; j++)
L6470_buf[j] = dSPIN_NOP; // Fill buffer with NOOPs
L64xxManager.transfer(L6470_buf, L64XX::chain[0]); // Send enough NOOPs to complete any command
L64xxManager.transfer(L6470_buf, L64XX::chain[0]);
L64xxManager.transfer(L6470_buf, L64XX::chain[0]);
}
// L64xxManager.dir_commands[] is an array that holds direction command for each stepper
// Scan command array, copy matches into L64xxManager.transfer
for (uint8_t j = 1; j <= L64XX::chain[0]; j++)
L6470_buf[j] = L64xxManager.dir_commands[L64XX::chain[j]];
L64xxManager.transfer(L6470_buf, L64XX::chain[0]); // send the command stream to the drivers
}
#endif
DIR_WAIT_AFTER();
}
#if ENABLED(S_CURVE_ACCELERATION)
/**
* This uses a quintic (fifth-degree) Bézier polynomial for the velocity curve, giving
* a "linear pop" velocity curve; with pop being the sixth derivative of position:
* velocity - 1st, acceleration - 2nd, jerk - 3rd, snap - 4th, crackle - 5th, pop - 6th
*
* The Bézier curve takes the form:
*
* V(t) = P_0 * B_0(t) + P_1 * B_1(t) + P_2 * B_2(t) + P_3 * B_3(t) + P_4 * B_4(t) + P_5 * B_5(t)
*
* Where 0 <= t <= 1, and V(t) is the velocity. P_0 through P_5 are the control points, and B_0(t)
* through B_5(t) are the Bernstein basis as follows:
*
* B_0(t) = (1-t)^5 = -t^5 + 5t^4 - 10t^3 + 10t^2 - 5t + 1
* B_1(t) = 5(1-t)^4 * t = 5t^5 - 20t^4 + 30t^3 - 20t^2 + 5t
* B_2(t) = 10(1-t)^3 * t^2 = -10t^5 + 30t^4 - 30t^3 + 10t^2
* B_3(t) = 10(1-t)^2 * t^3 = 10t^5 - 20t^4 + 10t^3
* B_4(t) = 5(1-t) * t^4 = -5t^5 + 5t^4
* B_5(t) = t^5 = t^5
* ^ ^ ^ ^ ^ ^
* | | | | | |
* A B C D E F
*
* Unfortunately, we cannot use forward-differencing to calculate each position through
* the curve, as Marlin uses variable timer periods. So, we require a formula of the form:
*
* V_f(t) = A*t^5 + B*t^4 + C*t^3 + D*t^2 + E*t + F
*
* Looking at the above B_0(t) through B_5(t) expanded forms, if we take the coefficients of t^5
* through t of the Bézier form of V(t), we can determine that:
*
* A = -P_0 + 5*P_1 - 10*P_2 + 10*P_3 - 5*P_4 + P_5
* B = 5*P_0 - 20*P_1 + 30*P_2 - 20*P_3 + 5*P_4
* C = -10*P_0 + 30*P_1 - 30*P_2 + 10*P_3
* D = 10*P_0 - 20*P_1 + 10*P_2
* E = - 5*P_0 + 5*P_1
* F = P_0
*
* Now, since we will (currently) *always* want the initial acceleration and jerk values to be 0,
* We set P_i = P_0 = P_1 = P_2 (initial velocity), and P_t = P_3 = P_4 = P_5 (target velocity),
* which, after simplification, resolves to:
*
* A = - 6*P_i + 6*P_t = 6*(P_t - P_i)
* B = 15*P_i - 15*P_t = 15*(P_i - P_t)
* C = -10*P_i + 10*P_t = 10*(P_t - P_i)
* D = 0
* E = 0
* F = P_i
*
* As the t is evaluated in non uniform steps here, there is no other way rather than evaluating
* the Bézier curve at each point:
*
* V_f(t) = A*t^5 + B*t^4 + C*t^3 + F [0 <= t <= 1]
*
* Floating point arithmetic execution time cost is prohibitive, so we will transform the math to
* use fixed point values to be able to evaluate it in realtime. Assuming a maximum of 250000 steps
* per second (driver pulses should at least be 2µS hi/2µS lo), and allocating 2 bits to avoid
* overflows on the evaluation of the Bézier curve, means we can use
*
* t: unsigned Q0.32 (0 <= t < 1) |range 0 to 0xFFFFFFFF unsigned
* A: signed Q24.7 , |range = +/- 250000 * 6 * 128 = +/- 192000000 = 0x0B71B000 | 28 bits + sign
* B: signed Q24.7 , |range = +/- 250000 *15 * 128 = +/- 480000000 = 0x1C9C3800 | 29 bits + sign
* C: signed Q24.7 , |range = +/- 250000 *10 * 128 = +/- 320000000 = 0x1312D000 | 29 bits + sign
* F: signed Q24.7 , |range = +/- 250000 * 128 = 32000000 = 0x01E84800 | 25 bits + sign
*
* The trapezoid generator state contains the following information, that we will use to create and evaluate
* the Bézier curve:
*
* blk->step_event_count [TS] = The total count of steps for this movement. (=distance)
* blk->initial_rate [VI] = The initial steps per second (=velocity)
* blk->final_rate [VF] = The ending steps per second (=velocity)
* and the count of events completed (step_events_completed) [CS] (=distance until now)
*
* Note the abbreviations we use in the following formulae are between []s
*
* For Any 32bit CPU:
*
* At the start of each trapezoid, calculate the coefficients A,B,C,F and Advance [AV], as follows:
*
* A = 6*128*(VF - VI) = 768*(VF - VI)
* B = 15*128*(VI - VF) = 1920*(VI - VF)
* C = 10*128*(VF - VI) = 1280*(VF - VI)
* F = 128*VI = 128*VI
* AV = (1<<32)/TS ~= 0xFFFFFFFF / TS (To use ARM UDIV, that is 32 bits) (this is computed at the planner, to offload expensive calculations from the ISR)
*
* And for each point, evaluate the curve with the following sequence:
*
* void lsrs(uint32_t& d, uint32_t s, int cnt) {
* d = s >> cnt;
* }
* void lsls(uint32_t& d, uint32_t s, int cnt) {
* d = s << cnt;
* }
* void lsrs(int32_t& d, uint32_t s, int cnt) {
* d = uint32_t(s) >> cnt;
* }
* void lsls(int32_t& d, uint32_t s, int cnt) {
* d = uint32_t(s) << cnt;
* }
* void umull(uint32_t& rlo, uint32_t& rhi, uint32_t op1, uint32_t op2) {
* uint64_t res = uint64_t(op1) * op2;
* rlo = uint32_t(res & 0xFFFFFFFF);
* rhi = uint32_t((res >> 32) & 0xFFFFFFFF);
* }
* void smlal(int32_t& rlo, int32_t& rhi, int32_t op1, int32_t op2) {
* int64_t mul = int64_t(op1) * op2;
* int64_t s = int64_t(uint32_t(rlo) | ((uint64_t(uint32_t(rhi)) << 32U)));
* mul += s;
* rlo = int32_t(mul & 0xFFFFFFFF);
* rhi = int32_t((mul >> 32) & 0xFFFFFFFF);
* }
* int32_t _eval_bezier_curve_arm(uint32_t curr_step) {
* uint32_t flo = 0;
* uint32_t fhi = bezier_AV * curr_step;
* uint32_t t = fhi;
* int32_t alo = bezier_F;
* int32_t ahi = 0;
* int32_t A = bezier_A;
* int32_t B = bezier_B;
* int32_t C = bezier_C;
*
* lsrs(ahi, alo, 1); // a = F << 31
* lsls(alo, alo, 31); //
* umull(flo, fhi, fhi, t); // f *= t
* umull(flo, fhi, fhi, t); // f>>=32; f*=t
* lsrs(flo, fhi, 1); //
* smlal(alo, ahi, flo, C); // a+=(f>>33)*C
* umull(flo, fhi, fhi, t); // f>>=32; f*=t
* lsrs(flo, fhi, 1); //
* smlal(alo, ahi, flo, B); // a+=(f>>33)*B
* umull(flo, fhi, fhi, t); // f>>=32; f*=t
* lsrs(flo, fhi, 1); // f>>=33;
* smlal(alo, ahi, flo, A); // a+=(f>>33)*A;
* lsrs(alo, ahi, 6); // a>>=38
*
* return alo;
* }
*
* This is rewritten in ARM assembly for optimal performance (43 cycles to execute).
*
* For AVR, the precision of coefficients is scaled so the Bézier curve can be evaluated in real-time:
* Let's reduce precision as much as possible. After some experimentation we found that:
*
* Assume t and AV with 24 bits is enough
* A = 6*(VF - VI)
* B = 15*(VI - VF)
* C = 10*(VF - VI)
* F = VI
* AV = (1<<24)/TS (this is computed at the planner, to offload expensive calculations from the ISR)
*
* Instead of storing sign for each coefficient, we will store its absolute value,
* and flag the sign of the A coefficient, so we can save to store the sign bit.
* It always holds that sign(A) = - sign(B) = sign(C)
*
* So, the resulting range of the coefficients are:
*
* t: unsigned (0 <= t < 1) |range 0 to 0xFFFFFF unsigned
* A: signed Q24 , range = 250000 * 6 = 1500000 = 0x16E360 | 21 bits
* B: signed Q24 , range = 250000 *15 = 3750000 = 0x393870 | 22 bits
* C: signed Q24 , range = 250000 *10 = 2500000 = 0x1312D0 | 21 bits
* F: signed Q24 , range = 250000 = 250000 = 0x0ED090 | 20 bits
*
* And for each curve, estimate its coefficients with:
*
* void _calc_bezier_curve_coeffs(int32_t v0, int32_t v1, uint32_t av) {
* // Calculate the Bézier coefficients
* if (v1 < v0) {
* A_negative = true;
* bezier_A = 6 * (v0 - v1);
* bezier_B = 15 * (v0 - v1);
* bezier_C = 10 * (v0 - v1);
* }
* else {
* A_negative = false;
* bezier_A = 6 * (v1 - v0);
* bezier_B = 15 * (v1 - v0);
* bezier_C = 10 * (v1 - v0);
* }
* bezier_F = v0;
* }
*
* And for each point, evaluate the curve with the following sequence:
*
* // unsigned multiplication of 24 bits x 24bits, return upper 16 bits
* void umul24x24to16hi(uint16_t& r, uint24_t op1, uint24_t op2) {
* r = (uint64_t(op1) * op2) >> 8;
* }
* // unsigned multiplication of 16 bits x 16bits, return upper 16 bits
* void umul16x16to16hi(uint16_t& r, uint16_t op1, uint16_t op2) {
* r = (uint32_t(op1) * op2) >> 16;
* }
* // unsigned multiplication of 16 bits x 24bits, return upper 24 bits
* void umul16x24to24hi(uint24_t& r, uint16_t op1, uint24_t op2) {
* r = uint24_t((uint64_t(op1) * op2) >> 16);
* }
*
* int32_t _eval_bezier_curve(uint32_t curr_step) {
* // To save computing, the first step is always the initial speed
* if (!curr_step)
* return bezier_F;
*
* uint16_t t;
* umul24x24to16hi(t, bezier_AV, curr_step); // t: Range 0 - 1^16 = 16 bits
* uint16_t f = t;
* umul16x16to16hi(f, f, t); // Range 16 bits (unsigned)
* umul16x16to16hi(f, f, t); // Range 16 bits : f = t^3 (unsigned)
* uint24_t acc = bezier_F; // Range 20 bits (unsigned)
* if (A_negative) {
* uint24_t v;
* umul16x24to24hi(v, f, bezier_C); // Range 21bits
* acc -= v;
* umul16x16to16hi(f, f, t); // Range 16 bits : f = t^4 (unsigned)
* umul16x24to24hi(v, f, bezier_B); // Range 22bits
* acc += v;
* umul16x16to16hi(f, f, t); // Range 16 bits : f = t^5 (unsigned)
* umul16x24to24hi(v, f, bezier_A); // Range 21bits + 15 = 36bits (plus sign)
* acc -= v;
* }
* else {
* uint24_t v;
* umul16x24to24hi(v, f, bezier_C); // Range 21bits
* acc += v;
* umul16x16to16hi(f, f, t); // Range 16 bits : f = t^4 (unsigned)
* umul16x24to24hi(v, f, bezier_B); // Range 22bits
* acc -= v;
* umul16x16to16hi(f, f, t); // Range 16 bits : f = t^5 (unsigned)
* umul16x24to24hi(v, f, bezier_A); // Range 21bits + 15 = 36bits (plus sign)
* acc += v;
* }
* return acc;
* }
* These functions are translated to assembler for optimal performance.
* Coefficient calculation takes 70 cycles. Bezier point evaluation takes 150 cycles.
*/
#ifdef __AVR__
// For AVR we use assembly to maximize speed
void Stepper::_calc_bezier_curve_coeffs(const int32_t v0, const int32_t v1, const uint32_t av) {
// Store advance
bezier_AV = av;
// Calculate the rest of the coefficients
uint8_t r2 = v0 & 0xFF;
uint8_t r3 = (v0 >> 8) & 0xFF;
uint8_t r12 = (v0 >> 16) & 0xFF;
uint8_t r5 = v1 & 0xFF;
uint8_t r6 = (v1 >> 8) & 0xFF;
uint8_t r7 = (v1 >> 16) & 0xFF;
uint8_t r4,r8,r9,r10,r11;
__asm__ __volatile__(
/* Calculate the Bézier coefficients */
/* %10:%1:%0 = v0*/
/* %5:%4:%3 = v1*/
/* %7:%6:%10 = temporary*/
/* %9 = val (must be high register!)*/
/* %10 (must be high register!)*/
/* Store initial velocity*/
A("sts bezier_F, %0")
A("sts bezier_F+1, %1")
A("sts bezier_F+2, %10") /* bezier_F = %10:%1:%0 = v0 */
/* Get delta speed */
A("ldi %2,-1") /* %2 = 0xFF, means A_negative = true */
A("clr %8") /* %8 = 0 */
A("sub %0,%3")
A("sbc %1,%4")
A("sbc %10,%5") /* v0 -= v1, C=1 if result is negative */
A("brcc 1f") /* branch if result is positive (C=0), that means v0 >= v1 */
/* Result was negative, get the absolute value*/
A("com %10")
A("com %1")
A("neg %0")
A("sbc %1,%2")
A("sbc %10,%2") /* %10:%1:%0 +1 -> %10:%1:%0 = -(v0 - v1) = (v1 - v0) */
A("clr %2") /* %2 = 0, means A_negative = false */
/* Store negative flag*/
L("1")
A("sts A_negative, %2") /* Store negative flag */
/* Compute coefficients A,B and C [20 cycles worst case]*/
A("ldi %9,6") /* %9 = 6 */
A("mul %0,%9") /* r1:r0 = 6*LO(v0-v1) */
A("sts bezier_A, r0")
A("mov %6,r1")
A("clr %7") /* %7:%6:r0 = 6*LO(v0-v1) */
A("mul %1,%9") /* r1:r0 = 6*MI(v0-v1) */
A("add %6,r0")
A("adc %7,r1") /* %7:%6:?? += 6*MI(v0-v1) << 8 */
A("mul %10,%9") /* r1:r0 = 6*HI(v0-v1) */
A("add %7,r0") /* %7:%6:?? += 6*HI(v0-v1) << 16 */
A("sts bezier_A+1, %6")
A("sts bezier_A+2, %7") /* bezier_A = %7:%6:?? = 6*(v0-v1) [35 cycles worst] */
A("ldi %9,15") /* %9 = 15 */
A("mul %0,%9") /* r1:r0 = 5*LO(v0-v1) */
A("sts bezier_B, r0")
A("mov %6,r1")
A("clr %7") /* %7:%6:?? = 5*LO(v0-v1) */
A("mul %1,%9") /* r1:r0 = 5*MI(v0-v1) */
A("add %6,r0")
A("adc %7,r1") /* %7:%6:?? += 5*MI(v0-v1) << 8 */
A("mul %10,%9") /* r1:r0 = 5*HI(v0-v1) */
A("add %7,r0") /* %7:%6:?? += 5*HI(v0-v1) << 16 */
A("sts bezier_B+1, %6")
A("sts bezier_B+2, %7") /* bezier_B = %7:%6:?? = 5*(v0-v1) [50 cycles worst] */
A("ldi %9,10") /* %9 = 10 */
A("mul %0,%9") /* r1:r0 = 10*LO(v0-v1) */
A("sts bezier_C, r0")
A("mov %6,r1")
A("clr %7") /* %7:%6:?? = 10*LO(v0-v1) */
A("mul %1,%9") /* r1:r0 = 10*MI(v0-v1) */
A("add %6,r0")
A("adc %7,r1") /* %7:%6:?? += 10*MI(v0-v1) << 8 */
A("mul %10,%9") /* r1:r0 = 10*HI(v0-v1) */
A("add %7,r0") /* %7:%6:?? += 10*HI(v0-v1) << 16 */
A("sts bezier_C+1, %6")
" sts bezier_C+2, %7" /* bezier_C = %7:%6:?? = 10*(v0-v1) [65 cycles worst] */
: "+r" (r2),
"+d" (r3),
"=r" (r4),
"+r" (r5),
"+r" (r6),
"+r" (r7),
"=r" (r8),
"=r" (r9),
"=r" (r10),
"=d" (r11),
"+r" (r12)
:
: "r0", "r1", "cc", "memory"
);
}
FORCE_INLINE int32_t Stepper::_eval_bezier_curve(const uint32_t curr_step) {
// If dealing with the first step, save expensive computing and return the initial speed
if (!curr_step)
return bezier_F;
uint8_t r0 = 0; /* Zero register */
uint8_t r2 = (curr_step) & 0xFF;
uint8_t r3 = (curr_step >> 8) & 0xFF;
uint8_t r4 = (curr_step >> 16) & 0xFF;
uint8_t r1,r5,r6,r7,r8,r9,r10,r11; /* Temporary registers */
__asm__ __volatile(
/* umul24x24to16hi(t, bezier_AV, curr_step); t: Range 0 - 1^16 = 16 bits*/
A("lds %9,bezier_AV") /* %9 = LO(AV)*/
A("mul %9,%2") /* r1:r0 = LO(bezier_AV)*LO(curr_step)*/
A("mov %7,r1") /* %7 = LO(bezier_AV)*LO(curr_step) >> 8*/
A("clr %8") /* %8:%7 = LO(bezier_AV)*LO(curr_step) >> 8*/
A("lds %10,bezier_AV+1") /* %10 = MI(AV)*/
A("mul %10,%2") /* r1:r0 = MI(bezier_AV)*LO(curr_step)*/
A("add %7,r0")
A("adc %8,r1") /* %8:%7 += MI(bezier_AV)*LO(curr_step)*/
A("lds r1,bezier_AV+2") /* r11 = HI(AV)*/
A("mul r1,%2") /* r1:r0 = HI(bezier_AV)*LO(curr_step)*/
A("add %8,r0") /* %8:%7 += HI(bezier_AV)*LO(curr_step) << 8*/
A("mul %9,%3") /* r1:r0 = LO(bezier_AV)*MI(curr_step)*/
A("add %7,r0")
A("adc %8,r1") /* %8:%7 += LO(bezier_AV)*MI(curr_step)*/
A("mul %10,%3") /* r1:r0 = MI(bezier_AV)*MI(curr_step)*/
A("add %8,r0") /* %8:%7 += LO(bezier_AV)*MI(curr_step) << 8*/