diff --git a/server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_2__add_supervisor_agent.sql b/server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_2__add_supervisor_agent.sql new file mode 100644 index 0000000000..47962a81c7 --- /dev/null +++ b/server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_2__add_supervisor_agent.sql @@ -0,0 +1,16 @@ +-- +-- Add supervisor_endpoint table to maintain supervisor agent info, this table will bind to resource_resource +-- +CREATE TABLE `supervisor_endpoint` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT, + `host` varchar(256) NOT NULL COMMENT 'host of supervisor', + `port` int(11) NOT NULL COMMENT 'port of supervisor', + `status` varchar(64) NOT NULL COMMENT 'status of supervisor, contains PREPAREING,AVAILABLE,DESTROYED,UNAVAILABLE,ABANDON', + `loads` int(11) NOT NULL COMMENT 'load of supervisor', + `resource_id` bigint(20) NOT NULL COMMENT 'resource id related to resource_resource table, -1 means not related', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'create time', + `update_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time', + PRIMARY KEY (`id`), + UNIQUE KEY host_and_port (`host`, `port`), + KEY `status_index` (`status`) +); \ No newline at end of file diff --git a/server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_3__add_resource_allocate_info.sql b/server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_3__add_resource_allocate_info.sql new file mode 100644 index 0000000000..16217854c2 --- /dev/null +++ b/server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_3__add_resource_allocate_info.sql @@ -0,0 +1,16 @@ +-- +-- Add resource_allocate_info table to maintain task resource allocate info +-- +CREATE TABLE `resource_allocate_info` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT, + `task_id` bigint(20) NOT NULL COMMENT 'task id allocate to this allocate info', + `resource_allocate_state` varchar(20) NOT NULL COMMENT 'resource allocate state, update by resource allocator, including PREPARING, AVAILABLE, FAILED, FINISHED', + `resource_usage_state` varchar(20) NOT NULL COMMENT 'resource usage state update by resource user, including PREPARING, USING, FINISHED', + `endpoint` varchar(512) DEFAULT NULL COMMENT 'supervisor endpoint, in format host:port', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'create time', + `update_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time', + PRIMARY KEY (`id`), + UNIQUE KEY `task_index` (`task_id`), + INDEX `usage_state_index` (`resource_usage_state`, `resource_allocate_state`), + INDEX `allocate_state_index` (`resource_allocate_state`, `resource_usage_state`) +); \ No newline at end of file diff --git a/server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_1__alter_resource_resource_add_index.sql b/server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_4__alter_resource_resource_add_index.sql similarity index 100% rename from server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_1__alter_resource_resource_add_index.sql rename to server/odc-migrate/src/main/resources/migrate/common/V_4_3_3_4__alter_resource_resource_add_index.sql diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/agent/OdcAgent.java b/server/odc-server/src/main/java/com/oceanbase/odc/agent/OdcAgent.java index b7bd7150e9..d9d355c8d2 100644 --- a/server/odc-server/src/main/java/com/oceanbase/odc/agent/OdcAgent.java +++ b/server/odc-server/src/main/java/com/oceanbase/odc/agent/OdcAgent.java @@ -38,5 +38,6 @@ public static void main(String[] args) { log.error("Task existed abnormal", e); } log.info("Task executor exit."); + System.exit(0); } } diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/EmbedServer.java b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/EmbedServer.java index f753a7b12c..bb788a2e09 100644 --- a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/EmbedServer.java +++ b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/EmbedServer.java @@ -15,48 +15,14 @@ */ package com.oceanbase.odc.agent.runtime; -import java.net.InetSocketAddress; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.RejectedExecutionHandler; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.oceanbase.odc.common.json.JsonUtils; -import com.oceanbase.odc.common.util.StringUtils; -import com.oceanbase.odc.service.common.util.UrlUtils; -import com.oceanbase.odc.service.task.executor.TraceDecoratorThreadFactory; +import com.oceanbase.odc.service.common.response.SuccessResponse; import com.oceanbase.odc.service.task.executor.TraceDecoratorUtils; +import com.oceanbase.odc.service.task.net.HttpServerContainer; +import com.oceanbase.odc.service.task.net.RequestHandler; import com.oceanbase.odc.service.task.util.JobUtils; -import io.netty.bootstrap.ServerBootstrap; -import io.netty.buffer.Unpooled; -import io.netty.channel.ChannelFuture; -import io.netty.channel.ChannelHandlerContext; -import io.netty.channel.ChannelInitializer; -import io.netty.channel.ChannelOption; -import io.netty.channel.EventLoopGroup; -import io.netty.channel.SimpleChannelInboundHandler; -import io.netty.channel.nio.NioEventLoopGroup; -import io.netty.channel.socket.SocketChannel; -import io.netty.channel.socket.nio.NioServerSocketChannel; -import io.netty.handler.codec.http.DefaultFullHttpResponse; -import io.netty.handler.codec.http.FullHttpRequest; -import io.netty.handler.codec.http.FullHttpResponse; -import io.netty.handler.codec.http.HttpHeaderNames; -import io.netty.handler.codec.http.HttpHeaderValues; -import io.netty.handler.codec.http.HttpMethod; -import io.netty.handler.codec.http.HttpObjectAggregator; -import io.netty.handler.codec.http.HttpResponseStatus; -import io.netty.handler.codec.http.HttpServerCodec; -import io.netty.handler.codec.http.HttpUtil; -import io.netty.handler.codec.http.HttpVersion; -import io.netty.handler.timeout.IdleStateEvent; -import io.netty.handler.timeout.IdleStateHandler; -import io.netty.util.CharsetUtil; import lombok.extern.slf4j.Slf4j; /** @@ -65,186 +31,36 @@ * @since 4.2.4 */ @Slf4j -class EmbedServer { - - private ExecutorRequestHandler requestHandler; - private Thread thread; - - public void start() { - requestHandler = new ExecutorRequestHandler(); - thread = new Thread(TraceDecoratorUtils.decorate(new Runnable() { - @Override - public void run() { - // param - EventLoopGroup bossGroup = new NioEventLoopGroup(); - EventLoopGroup workerGroup = new NioEventLoopGroup(); - ThreadPoolExecutor bizThreadPool = new ThreadPoolExecutor( - 0, - 128, - 60L, - TimeUnit.SECONDS, - new LinkedBlockingQueue(64), - new TraceDecoratorThreadFactory(new ThreadFactory() { - @Override - public Thread newThread(Runnable r) { - return new Thread(r, "odc-job, EmbedServer bizThreadPool-" + r.hashCode()); - } - }), - new RejectedExecutionHandler() { - @Override - public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) { - throw new RuntimeException("odc-job, EmbedServer bizThreadPool is EXHAUSTED!"); - } - }); - try { - // start server - ServerBootstrap bootstrap = new ServerBootstrap(); - bootstrap.group(bossGroup, workerGroup) - .channel(NioServerSocketChannel.class) - .childHandler(new ChannelInitializer() { - @Override - public void initChannel(SocketChannel channel) throws Exception { - channel.pipeline() - .addLast(new IdleStateHandler(0, 0, 30 * 3, TimeUnit.SECONDS)) // beat 3N, - // close if - // idle - .addLast(new HttpServerCodec()) - .addLast(new HttpObjectAggregator(5 * 1024 * 1024)) // merge request & - // reponse to FULL - .addLast(new EmbedHttpServerHandler(requestHandler, bizThreadPool)); - } - }) - .childOption(ChannelOption.SO_KEEPALIVE, true); - - ChannelFuture future; - int port; - if (JobUtils.getExecutorPort().isPresent()) { - // start with assigned port - future = bootstrap.bind(JobUtils.getExecutorPort().get()).sync(); - port = JobUtils.getExecutorPort().get(); - } else { - // start with random port - future = bootstrap.bind(0).sync(); - InetSocketAddress localAddress = (InetSocketAddress) future.channel().localAddress(); - // save port to system properties - JobUtils.setExecutorPort(localAddress.getPort()); - port = localAddress.getPort(); - } - log.info("odc-job remoting server start success, nettype = {}, port = {}", - EmbedServer.class, port); - - // wait util stop - future.channel().closeFuture().sync(); - - } catch (InterruptedException e) { - log.info("odc-job remoting server stop."); - } catch (Exception e) { - log.error("odc-job remoting server error.", e); - } finally { - // stop - try { - workerGroup.shutdownGracefully(); - bossGroup.shutdownGracefully(); - } catch (Exception e) { - log.error(e.getMessage(), e); - } - } - } - })); - thread.setDaemon(true); // daemon, service jvm, user thread leave >>> daemon leave >>> jvm leave - thread.start(); - } - - public void stop() throws Exception { - // destroy server thread - if (thread != null && thread.isAlive()) { - thread.interrupt(); +class EmbedServer extends HttpServerContainer> { + @Override + protected int getPort() { + int port; + if (JobUtils.getExecutorPort().isPresent()) { + // start with assigned port + port = JobUtils.getExecutorPort().get(); + } else { + port = 0; } - - log.info("odc-job remoting server destroy success."); + return port; } + @Override + protected RequestHandler> getRequestHandler() { + return new ExecutorRequestHandler(); + } - - public static class EmbedHttpServerHandler extends SimpleChannelInboundHandler { - private static final Logger logger = LoggerFactory.getLogger(EmbedHttpServerHandler.class); - - private final ThreadPoolExecutor bizThreadPool; - private final ExecutorRequestHandler requestHandler; - - public EmbedHttpServerHandler(ExecutorRequestHandler executorRequestHandler, ThreadPoolExecutor bizThreadPool) { - this.requestHandler = executorRequestHandler; - this.bizThreadPool = bizThreadPool; - } - - @Override - protected void channelRead0(final ChannelHandlerContext ctx, FullHttpRequest msg) throws Exception { - // request parse - // final byte[] requestBytes = ByteBufUtil.getBytes(msg.content()); // - // byteBuf.toString(io.netty.util.CharsetUtil.UTF_8); - String requestData = msg.content().toString(CharsetUtil.UTF_8); - String uri = UrlUtils.decode(msg.uri()); - HttpMethod httpMethod = msg.method(); - boolean keepAlive = HttpUtil.isKeepAlive(msg); - if (StringUtils.isNotBlank(uri)) { - logger.info("odc-job get uri {}", uri); - } - if (StringUtils.isNotBlank(requestData)) { - logger.info("odc-job get requestData {}", requestData); - } - - // invoke - bizThreadPool.execute(TraceDecoratorUtils.decorate(new Runnable() { - @Override - public void run() { - // do invoke - Object responseObj = requestHandler.process(httpMethod, uri, requestData); - - // to json - String responseJson = JsonUtils.toJson(responseObj); - - // write response - writeResponse(ctx, keepAlive, responseJson); - } - })); - } - - /** - * write response - */ - private void writeResponse(ChannelHandlerContext ctx, boolean keepAlive, String responseJson) { - // write response - FullHttpResponse response = new DefaultFullHttpResponse(HttpVersion.HTTP_1_1, HttpResponseStatus.OK, - Unpooled.copiedBuffer(responseJson, CharsetUtil.UTF_8)); - response.headers().set(HttpHeaderNames.CONTENT_TYPE, "application/json"); - response.headers().set(HttpHeaderNames.CONTENT_LENGTH, response.content().readableBytes()); - if (keepAlive) { - response.headers().set(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE); - } - ctx.writeAndFlush(response); - } - - @Override - public void channelReadComplete(ChannelHandlerContext ctx) throws Exception { - ctx.flush(); - } - - @Override - public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { - logger.error("odc-job provider netty_http server caught exception", cause); - ctx.close(); - } - - @Override - public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exception { - if (evt instanceof IdleStateEvent) { - ctx.channel().close(); // beat 3N, close if idle - logger.debug("odc-job provider netty_http server close an idle channel."); - } else { - super.userEventTriggered(ctx, evt); - } - } + @Override + protected String getModuleName() { + return "odc-job"; } + @Override + protected Thread createThread(Runnable r) { + return new Thread(TraceDecoratorUtils.decorate(r)); + } + @Override + protected Consumer portConsumer() { + return JobUtils::setExecutorPort; + } } diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ExecutorRequestHandler.java b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ExecutorRequestHandler.java index 89c257daf4..91ac8bff28 100644 --- a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ExecutorRequestHandler.java +++ b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ExecutorRequestHandler.java @@ -30,6 +30,7 @@ import com.oceanbase.odc.service.task.executor.logger.LogBiz; import com.oceanbase.odc.service.task.executor.logger.LogBizImpl; import com.oceanbase.odc.service.task.executor.logger.LogUtils; +import com.oceanbase.odc.service.task.net.RequestHandler; import com.oceanbase.odc.service.task.schedule.JobIdentity; import com.oceanbase.odc.service.task.util.JobUtils; @@ -42,7 +43,7 @@ * @since 4.2.4 */ @Slf4j -class ExecutorRequestHandler { +class ExecutorRequestHandler implements RequestHandler> { private final Pattern queryLogUrlPattern = Pattern.compile(String.format(JobExecutorUrls.QUERY_LOG, "([0-9]+)")); private final Pattern stopTaskPattern = Pattern.compile(String.format(JobExecutorUrls.STOP_TASK, "([0-9]+)")); @@ -86,7 +87,7 @@ public SuccessResponse process(HttpMethod httpMethod, String uri, String if (matcher.find()) { JobIdentity ji = getJobIdentity(matcher); TaskRuntimeInfo runtimeInfo = ThreadPoolTaskExecutor.getInstance().getTaskRuntimeInfo(ji); - boolean result = runtimeInfo.getTaskContainer().modify(JobUtils.fromJsonToMap(requestData)); + boolean result = runtimeInfo.getTaskContainer().modifyTask(JobUtils.fromJsonToMap(requestData)); return Responses.ok(result); } @@ -109,11 +110,16 @@ public SuccessResponse process(HttpMethod httpMethod, String uri, String return Responses.single("invalid request, uri-mapping(" + uri + ") not found."); } catch (Exception e) { - log.error(e.getMessage(), e); - return Responses.single("request error:" + ExceptionUtils.getRootCauseReason(e)); + return processException(e); } } + @Override + public SuccessResponse processException(Throwable e) { + log.error(e.getMessage(), e); + return Responses.single("request error:" + ExceptionUtils.getRootCauseReason(e)); + } + private static JobIdentity getJobIdentity(Matcher matcher) { return JobIdentity.of(Long.parseLong(matcher.group(1))); } diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskApplication.java b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskApplication.java index c3b3e8274f..614b55eb11 100644 --- a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskApplication.java +++ b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskApplication.java @@ -18,24 +18,17 @@ import java.io.File; import java.net.URI; import java.net.URISyntaxException; -import java.util.HashMap; -import java.util.Map; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.core.LoggerContext; -import com.oceanbase.odc.common.trace.TaskContextHolder; -import com.oceanbase.odc.common.trace.TraceContextHolder; -import com.oceanbase.odc.common.util.StringUtils; +import com.oceanbase.odc.common.ExitHelper; +import com.oceanbase.odc.common.JobContextResolver; +import com.oceanbase.odc.common.json.JsonUtils; import com.oceanbase.odc.common.util.SystemUtils; -import com.oceanbase.odc.core.shared.Verify; import com.oceanbase.odc.service.task.Task; import com.oceanbase.odc.service.task.caller.JobContext; -import com.oceanbase.odc.service.task.caller.JobEnvironmentEncryptor; -import com.oceanbase.odc.service.task.constants.JobEnvKeyConstants; import com.oceanbase.odc.service.task.exception.TaskRuntimeException; -import com.oceanbase.odc.service.task.executor.context.JobContextProviderFactory; -import com.oceanbase.odc.service.task.util.JobUtils; import lombok.extern.slf4j.Slf4j; @@ -49,8 +42,15 @@ public class TaskApplication { private JobContext context; public void run(String[] args) { + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + log.info("Task executor exits, systemInfo={}", SystemUtils.getSystemMemoryInfo()); + })); try { - init(args); + context = new JobContextResolver().resolveJobContext(args); + // set log4j xml + setLog4JConfigXml(); + log.info("context is {}", JsonUtils.toJson(context)); + log.info("initial log configuration success."); } catch (Exception e) { log.warn("Init task error:", e); throw e; @@ -62,6 +62,7 @@ public void run(String[] args) { Task task = TaskFactory.create(context.getJobClass()); ThreadPoolTaskExecutor.getInstance().execute(task, context); ExitHelper.await(); + ThreadPoolTaskExecutor.getInstance().cancel(context.getJobIdentity()); } catch (Exception e) { log.warn("Execute task error:", e); } finally { @@ -73,58 +74,6 @@ public void run(String[] args) { } } - private void init(String[] args) { - Runtime.getRuntime().addShutdownHook(new Thread(() -> { - log.info("Task executor exits, systemInfo={}", SystemUtils.getSystemMemoryInfo()); - })); - // 1 step: valid environment value not blank - validEnvValues(); - log.info("verify environment variables success."); - - // 2 step: decrypt environment value - decryptEnvironments(); - log.info("decrypt environment variables success."); - - // 3 step: get JobContext from environment - context = JobContextProviderFactory.create(SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_TASK_RUN_MODE)) - .provide(); - log.info("initial job context success."); - - // 4 step: trace taskId in log4j2 context - trace(context.getJobIdentity().getId()); - // 5 step: set log path in system properties - setLogPathSysProperty(); - // 6 step: set log4j2.xml - setLog4JConfigXml(); - log.info("initial log configuration success."); - - log.info("Task executor start info, ip={}, port={}, runMode={}, taskId={}, logPath={}, userId={}.", - SystemUtils.getLocalIpAddress(), - SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_EXECUTOR_PORT), - SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_TASK_RUN_MODE), - context.getJobIdentity().getId(), - System.getProperty(JobEnvKeyConstants.ODC_LOG_DIRECTORY), - SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_EXECUTOR_USER_ID)); - } - - private void decryptEnvironments() { - Map allProperties = new HashMap<>(System.getenv()); - System.getProperties().forEach((key, value) -> { - allProperties.put((String) key, (String) value); - }); - new JobEnvironmentEncryptor().decrypt(allProperties); - } - - private void trace(long taskId) { - TraceContextHolder.trace(); - // mock userId - TaskContextHolder.trace(JobUtils.getUserId(), taskId); - } - - private void setLogPathSysProperty() { - JobUtils.putEnvToSysProperties(JobEnvKeyConstants.ODC_LOG_DIRECTORY); - } - private void setLog4JConfigXml() { String configurationFile = System.getProperty("log4j.configurationFile"); URI taskLogFile = null; @@ -146,24 +95,4 @@ private void setLog4JConfigXml() { // this will force a reconfiguration, MDC context will to take effect context.setConfigLocation(taskLogFile); } - - private void validEnvValues() { - validNotBlank(JobEnvKeyConstants.ODC_TASK_RUN_MODE); - if (StringUtils.equalsIgnoreCase("PROCESS", - SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_TASK_RUN_MODE))) { - validNotBlank(JobEnvKeyConstants.ODC_JOB_CONTEXT_FILE_PATH); - } else { - validNotBlank(JobEnvKeyConstants.ODC_JOB_CONTEXT); - } - validNotBlank(JobEnvKeyConstants.ODC_BOOT_MODE); - validNotBlank(JobEnvKeyConstants.ENCRYPT_SALT); - validNotBlank(JobEnvKeyConstants.ENCRYPT_KEY); - validNotBlank(JobEnvKeyConstants.ODC_EXECUTOR_USER_ID); - validNotBlank(JobEnvKeyConstants.ODC_LOG_DIRECTORY); - } - - private void validNotBlank(String envKey) { - Verify.notBlank(SystemUtils.getEnvOrProperty(envKey), envKey); - } - } diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskContainer.java b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskContainer.java index c780b20144..cb8a16a919 100644 --- a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskContainer.java +++ b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskContainer.java @@ -51,6 +51,9 @@ final class TaskContainer implements ExceptionListener { // only save latest exception if any // it will be cleaned if been fetched protected AtomicReference latestException = new AtomicReference<>(); + // check if fina work has done + protected AtomicBoolean finalWorkDone = new AtomicBoolean(false); + public TaskContainer(JobContext jobContext, CloudObjectStorageService cloudObjectStorageService, TaskReporter taskReporter, // assignable for test @@ -99,11 +102,11 @@ public void runTask() { updateStatus(TaskStatus.FAILED); onException(e); } finally { - close(); + closeTask(); } } - public boolean stop() { + public boolean stopTask() { try { if (getStatus().isTerminated()) { log.warn("Task is already finished and cannot be canceled, id={}, status={}.", getJobId(), getStatus()); @@ -117,11 +120,11 @@ public boolean stop() { log.warn("Stop task failed, id={}", getJobId(), e); return false; } finally { - close(); + closeTask(); } } - public boolean modify(Map jobParameters) { + public boolean modifyTask(Map jobParameters) { if (Objects.isNull(jobParameters) || jobParameters.isEmpty()) { log.warn("Job parameter cannot be null, id={}", getJobId()); return false; @@ -135,7 +138,7 @@ public boolean modify(Map jobParameters) { } - private void close() { + private void closeTask() { if (closed.compareAndSet(false, true)) { try { task.close(); @@ -143,7 +146,18 @@ private void close() { // do nothing } log.info("Task completed, id={}, status={}.", getJobId(), getStatus()); + } + } + + public synchronized void closeTaskContainer() { + if (!finalWorkDone.compareAndSet(false, true)) { + log.info("final work has done"); + return; + } + try { taskMonitor.finalWork(); + } catch (Throwable e) { + log.info("do final work failed", e); } } @@ -153,8 +167,14 @@ public TaskStatus getStatus() { private void updateStatus(TaskStatus status) { - log.info("Update task status, id={}, status={}.", getJobId(), status); - this.status = status; + TaskStatus prevStatus = this.status; + if (!this.status.isTerminated()) { + this.status = status; + log.info("Update task status, id={}, from prev = {} to status={}.", getJobId(), prevStatus, status); + } else { + log.info("Status has terminated, , id={}, status={}. ignore transfer tp status = {}", getJobId(), + prevStatus, status); + } } protected Map getJobParameters() { diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskMonitor.java b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskMonitor.java index 05526a314e..2fbea4a7ca 100644 --- a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskMonitor.java +++ b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/TaskMonitor.java @@ -79,16 +79,19 @@ public Map getLogMetadata() { public void monitor() { log.info("monitor starting, jobId={}", getJobId()); - this.startTimeMilliSeconds = System.currentTimeMillis(); + initReportScheduler(); + initHeartbeatScheduler(); + } + private void initReportScheduler() { ThreadFactory threadFactory = new TraceDecoratorThreadFactory(new TaskThreadFactory(("Task-Monitor-Job-" + getJobId()))); this.reportScheduledExecutor = Executors.newSingleThreadScheduledExecutor(threadFactory); reportScheduledExecutor.scheduleAtFixedRate(() -> { if (isTimeout() && !getTaskContainer().getStatus().isTerminated()) { log.info("Task timeout, try stop, jobId={}", getJobId()); - getTaskContainer().stop(); + getTaskContainer().stopTask(); } try { if (JobUtils.getExecutorPort().isPresent()) { @@ -101,22 +104,28 @@ public void monitor() { JobConstants.REPORT_TASK_INFO_INTERVAL_SECONDS, TimeUnit.SECONDS); log.info("Task monitor init success"); + } - heartScheduledExecutor = Executors.newSingleThreadScheduledExecutor( - new TaskThreadFactory(("Task-Heart-Job-" + getJobId()))); - - heartScheduledExecutor.scheduleAtFixedRate(() -> { - try { - if (JobUtils.getExecutorPort().isPresent() && JobUtils.isReportEnabled()) { - getReporter().report(JobServerUrls.TASK_HEARTBEAT, buildHeartRequest()); + private void initHeartbeatScheduler() { + if (JobUtils.getExecutorPort().isPresent() && JobUtils.isReportEnabled()) { + heartScheduledExecutor = Executors.newSingleThreadScheduledExecutor( + new TaskThreadFactory(("Task-Heart-Job-" + getJobId()))); + + heartScheduledExecutor.scheduleAtFixedRate(() -> { + try { + if (JobUtils.getExecutorPort().isPresent() && JobUtils.isReportEnabled()) { + getReporter().report(JobServerUrls.TASK_HEARTBEAT, buildHeartRequest()); + } + } catch (Throwable e) { + log.warn("Update heart info failed, id: {}", getJobId(), e); } - } catch (Throwable e) { - log.warn("Update heart info failed, id: {}", getJobId(), e); - } - }, JobConstants.REPORT_TASK_HEART_DELAY_SECONDS, - JobConstants.REPORT_TASK_HEART_INTERVAL_SECONDS, - TimeUnit.SECONDS); - log.info("Task heart init success"); + }, JobConstants.REPORT_TASK_HEART_DELAY_SECONDS, + JobConstants.REPORT_TASK_HEART_INTERVAL_SECONDS, + TimeUnit.SECONDS); + log.info("Task heart init success"); + } else { + log.info("heart beat not needed, cause report not needed"); + } } public void finalWork() { @@ -174,7 +183,6 @@ protected boolean isTimeout() { @VisibleForTesting protected void doFinal() { - TaskResult finalResult = DefaultTaskResultBuilder.build(getTaskContainer()); // Report final result log.info("Task id: {}, finished with status: {}, start to report final result", getJobId(), diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ThreadPoolTaskExecutor.java b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ThreadPoolTaskExecutor.java index e20d3bffc9..ca83d2ebb6 100644 --- a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ThreadPoolTaskExecutor.java +++ b/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ThreadPoolTaskExecutor.java @@ -20,11 +20,14 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.ThreadPoolExecutor.CallerRunsPolicy; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import com.oceanbase.odc.common.ExitHelper; import com.oceanbase.odc.common.concurrent.ExecutorUtils; import com.oceanbase.odc.core.shared.PreConditions; import com.oceanbase.odc.core.task.TaskThreadFactory; @@ -53,8 +56,8 @@ class ThreadPoolTaskExecutor implements TaskExecutor { private final ExecutorService executor; private ThreadPoolTaskExecutor() { - this.executor = Executors.newFixedThreadPool(2, - new TraceDecoratorThreadFactory(new TaskThreadFactory("Task-Executor"))); + this.executor = new ThreadPoolExecutor(2, 2, 1000, TimeUnit.SECONDS, new LinkedBlockingQueue<>(16), + new TraceDecoratorThreadFactory(new TaskThreadFactory("Task-Executor")), new CallerRunsPolicy()); } public static TaskExecutor getInstance() { @@ -82,6 +85,10 @@ synchronized public void execute(Task task, JobContext jc) { } catch (Exception e) { log.error("Task start failed, jobIdentity={}.", jobIdentity.getId(), e); taskContainer.onException(e); + } finally { + taskContainer.closeTaskContainer(); + // only hold one task, task done, agent quit + ExitHelper.notifyFinished(); } }); taskRuntimeInfo.setFuture(future); @@ -110,7 +117,7 @@ protected CloudObjectStorageService buildCloudStorageService(JobContext jobConte public boolean cancel(JobIdentity ji) { TaskRuntimeInfo runtimeInfo = getTaskRuntimeInfo(ji); TaskContainer task = runtimeInfo.getTaskContainer(); - Future stopFuture = executor.submit(task::stop); + Future stopFuture = executor.submit(task::stopTask); boolean result = false; try { // wait 10 seconds for stop task accomplished diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ExitHelper.java b/server/odc-server/src/main/java/com/oceanbase/odc/common/ExitHelper.java similarity index 88% rename from server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ExitHelper.java rename to server/odc-server/src/main/java/com/oceanbase/odc/common/ExitHelper.java index 472039836e..2bf218c73b 100644 --- a/server/odc-server/src/main/java/com/oceanbase/odc/agent/runtime/ExitHelper.java +++ b/server/odc-server/src/main/java/com/oceanbase/odc/common/ExitHelper.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.oceanbase.odc.agent.runtime; +package com.oceanbase.odc.common; import java.util.concurrent.CountDownLatch; @@ -25,7 +25,7 @@ * @since 4.2.4 */ @Slf4j -class ExitHelper { +public class ExitHelper { private static final CountDownLatch LATCH = new CountDownLatch(1); @@ -36,4 +36,8 @@ public static void await() { log.warn("Await thread be interrupted and exit:", e); } } + + public static void notifyFinished() { + LATCH.countDown(); + } } diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/common/JobContextResolver.java b/server/odc-server/src/main/java/com/oceanbase/odc/common/JobContextResolver.java new file mode 100644 index 0000000000..f4798a0b03 --- /dev/null +++ b/server/odc-server/src/main/java/com/oceanbase/odc/common/JobContextResolver.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.common; + +import java.util.HashMap; +import java.util.Map; + +import com.oceanbase.odc.common.trace.TaskContextHolder; +import com.oceanbase.odc.common.trace.TraceContextHolder; +import com.oceanbase.odc.common.util.StringUtils; +import com.oceanbase.odc.common.util.SystemUtils; +import com.oceanbase.odc.core.shared.Verify; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.JobEnvironmentEncryptor; +import com.oceanbase.odc.service.task.constants.JobEnvKeyConstants; +import com.oceanbase.odc.service.task.executor.context.JobContextProviderFactory; +import com.oceanbase.odc.service.task.util.JobUtils; + +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/11/26 15:11 + */ +@Slf4j +public class JobContextResolver { + public JobContext resolveJobContext(String[] args) { + // 1 step: valid environment value not blank + validEnvValues(); + log.info("verify environment variables success."); + + // 2 step: decrypt environment value + decryptEnvironments(); + log.info("decrypt environment variables success."); + + // 3 step: get JobContext from environment + JobContext context = + JobContextProviderFactory.create(SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_TASK_RUN_MODE)) + .provide(); + log.info("initial job context success."); + + // 4 step: trace taskId in log4j2 context + trace(context.getJobIdentity().getId()); + // 5 step: set log path in system properties + setLogPathSysProperty(); + + log.info("Task executor start info, ip={}, port={}, runMode={}, taskId={}, logPath={}, userId={}.", + SystemUtils.getLocalIpAddress(), + SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_EXECUTOR_PORT), + SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_TASK_RUN_MODE), + context.getJobIdentity().getId(), + System.getProperty(JobEnvKeyConstants.ODC_LOG_DIRECTORY), + SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_EXECUTOR_USER_ID)); + return context; + } + + private void decryptEnvironments() { + Map allProperties = new HashMap<>(System.getenv()); + System.getProperties().forEach((key, value) -> { + allProperties.put((String) key, (String) value); + }); + new JobEnvironmentEncryptor().decrypt(allProperties); + } + + private void trace(long taskId) { + TraceContextHolder.trace(); + // mock userId + TaskContextHolder.trace(JobUtils.getUserId(), taskId); + } + + private void setLogPathSysProperty() { + JobUtils.putEnvToSysProperties(JobEnvKeyConstants.ODC_LOG_DIRECTORY); + } + + + private void validEnvValues() { + validNotBlank(JobEnvKeyConstants.ODC_TASK_RUN_MODE); + if (StringUtils.equalsIgnoreCase("PROCESS", + SystemUtils.getEnvOrProperty(JobEnvKeyConstants.ODC_TASK_RUN_MODE))) { + validNotBlank(JobEnvKeyConstants.ODC_JOB_CONTEXT_FILE_PATH); + } else { + validNotBlank(JobEnvKeyConstants.ODC_JOB_CONTEXT); + } + validNotBlank(JobEnvKeyConstants.ODC_BOOT_MODE); + validNotBlank(JobEnvKeyConstants.ENCRYPT_SALT); + validNotBlank(JobEnvKeyConstants.ENCRYPT_KEY); + validNotBlank(JobEnvKeyConstants.ODC_EXECUTOR_USER_ID); + validNotBlank(JobEnvKeyConstants.ODC_LOG_DIRECTORY); + } + + private void validNotBlank(String envKey) { + Verify.notBlank(SystemUtils.getEnvOrProperty(envKey), envKey); + } + +} diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/supervisor/SupervisorAgent.java b/server/odc-server/src/main/java/com/oceanbase/odc/supervisor/SupervisorAgent.java new file mode 100644 index 0000000000..e5649a92c9 --- /dev/null +++ b/server/odc-server/src/main/java/com/oceanbase/odc/supervisor/SupervisorAgent.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.supervisor; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceStability.Evolving; + +import com.oceanbase.odc.common.JobContextResolver; +import com.oceanbase.odc.common.util.StringUtils; +import com.oceanbase.odc.server.module.Modules; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.ProcessConfig; +import com.oceanbase.odc.service.task.constants.JobEnvKeyConstants; +import com.oceanbase.odc.service.task.supervisor.protocol.StartTaskCommand; +import com.oceanbase.odc.supervisor.runtime.SupervisorApplication; + +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/11/26 15:43 + */ +@Slf4j +@Evolving +public class SupervisorAgent { + public static void main(String[] args) { + + log.info("Supervisor agent started"); + SupervisorApplication supervisorApplication = null; + try { + Modules.load(); + // config it + supervisorApplication = new SupervisorApplication(0); + supervisorApplication.start(args); + // send command context, then stop to compatible with previous logic + JobContext jobContext = new JobContextResolver().resolveJobContext(args); + supervisorApplication.getTaskSupervisorServer().getTaskCommandExecutor() + .onCommand(StartTaskCommand.create(jobContext, buildFromJobContext(jobContext))); + // supervisorApplication.waitStop(); + } catch (Throwable e) { + log.error("Supervisor agent stopped", e); + } finally { + if (null != supervisorApplication) { + supervisorApplication.stop(); + } + } + log.info("Supervisor agent stopped."); + } + + private static ProcessConfig buildFromJobContext(JobContext jobContext) { + ProcessConfig processConfig = new ProcessConfig(); + processConfig.setJvmXmsMB(1024); + processConfig.setJvmXmxMB(2048); + Map env = new HashMap<>(); + for (Map.Entry evn : System.getenv().entrySet()) { + // ignore job context file path + if (StringUtils.equalsIgnoreCase(evn.getKey(), JobEnvKeyConstants.ODC_JOB_CONTEXT_FILE_PATH)) { + continue; + } + env.put(evn.getKey(), evn.getValue()); + } + processConfig.setEnvironments(env); + log.info("process config with values = {}", processConfig.getEnvironments()); + return processConfig; + } + +} diff --git a/server/odc-server/src/main/java/com/oceanbase/odc/supervisor/runtime/SupervisorApplication.java b/server/odc-server/src/main/java/com/oceanbase/odc/supervisor/runtime/SupervisorApplication.java new file mode 100644 index 0000000000..076c6122f7 --- /dev/null +++ b/server/odc-server/src/main/java/com/oceanbase/odc/supervisor/runtime/SupervisorApplication.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.supervisor.runtime; + +import java.util.concurrent.atomic.AtomicBoolean; + +import com.oceanbase.odc.common.util.SystemUtils; +import com.oceanbase.odc.service.task.constants.JobConstants; +import com.oceanbase.odc.service.task.supervisor.TaskSupervisor; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.supervisor.runtime.LocalTaskCommandExecutor; +import com.oceanbase.odc.service.task.supervisor.runtime.TaskSupervisorServer; + +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/11/26 14:41 + */ +@Slf4j +public class SupervisorApplication { + @Getter + private TaskSupervisorServer taskSupervisorServer; + private final int port; + private AtomicBoolean stopped = new AtomicBoolean(false); + + public SupervisorApplication(int port) { + this.port = port; + } + + public void start(String[] args) { + // TODO(longxuan): will be given in future release + TaskSupervisor taskSupervisor = + new TaskSupervisor(new SupervisorEndpoint(SystemUtils.getLocalIpAddress(), port), + JobConstants.ODC_AGENT_CLASS_NAME); + taskSupervisorServer = new TaskSupervisorServer(port, new LocalTaskCommandExecutor(taskSupervisor)); + try { + taskSupervisorServer.start(); + log.info("Starting supervisor agent."); + // current directly quit agent + } catch (Exception e) { + log.warn("Supervisor agent stopped", e); + stopped.set(true); + } + } + + public void waitStop() { + try { + if (stopped.get()) { + return; + } + if (null != taskSupervisorServer) { + taskSupervisorServer.waitStop(); + } + } catch (Exception e) { + log.warn("Stop supervisor agent occur exception:", e); + } + } + + public void stop() { + stopped.set(true); + if (null != taskSupervisorServer) { + try { + taskSupervisorServer.stop(); + } catch (Throwable e) { + log.warn("Stop supervisor agent occur exception:", e); + } + } + } +} diff --git a/server/odc-server/src/main/resources/config/application.yml b/server/odc-server/src/main/resources/config/application.yml index 63148ec438..08e8d79834 100644 --- a/server/odc-server/src/main/resources/config/application.yml +++ b/server/odc-server/src/main/resources/config/application.yml @@ -81,6 +81,4 @@ management: include: "business,application" endpoint: prometheus: - enabled: true - server: - port: 8089 + enabled: false diff --git a/server/odc-server/src/test/java/com/oceanbase/odc/agent/runtime/TaskContainerTest.java b/server/odc-server/src/test/java/com/oceanbase/odc/agent/runtime/TaskContainerTest.java index f978141bf0..cdbdbe68b0 100644 --- a/server/odc-server/src/test/java/com/oceanbase/odc/agent/runtime/TaskContainerTest.java +++ b/server/odc-server/src/test/java/com/oceanbase/odc/agent/runtime/TaskContainerTest.java @@ -53,7 +53,6 @@ public void init() { jobContext.setJobClass(SimpleTask.class.getName()); } - @Test public void testExceptionListenerNormal() { try (MockedStatic mockSystemUtil = Mockito.mockStatic(SystemUtils.class)) { diff --git a/server/odc-server/src/test/java/com/oceanbase/odc/supervisor/SupervisorApplicationTest.java b/server/odc-server/src/test/java/com/oceanbase/odc/supervisor/SupervisorApplicationTest.java new file mode 100644 index 0000000000..264f889fe8 --- /dev/null +++ b/server/odc-server/src/test/java/com/oceanbase/odc/supervisor/SupervisorApplicationTest.java @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.supervisor; + +/** + * @author longpeng.zlp + * @date 2024/12/9 15:59 + */ +/** + * @author longpeng.zlp + * @date 2024/12/9 15:59 + */ +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.util.HashMap; +import java.util.Map; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import com.oceanbase.odc.common.util.SystemUtils; +import com.oceanbase.odc.service.task.caller.DefaultJobContext; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.ProcessConfig; +import com.oceanbase.odc.service.task.constants.JobConstants; +import com.oceanbase.odc.service.task.constants.JobEnvKeyConstants; +import com.oceanbase.odc.service.task.dummy.DummyTask; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.schedule.JobIdentity; +import com.oceanbase.odc.service.task.supervisor.JobEventHandler; +import com.oceanbase.odc.service.task.supervisor.PortDetector; +import com.oceanbase.odc.service.task.supervisor.TaskCallerResult; +import com.oceanbase.odc.service.task.supervisor.TaskSupervisorJobCaller; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.supervisor.proxy.LocalTaskSupervisorProxy; +import com.oceanbase.odc.service.task.util.JobUtils; +import com.oceanbase.odc.service.task.util.TaskExecutorClient; +import com.oceanbase.odc.supervisor.runtime.SupervisorApplication; + +/** + * @author longpeng.zlp + * @date 2024/12/9 15:42 + */ +public class SupervisorApplicationTest { + private SupervisorEndpoint localSupervisorEndpoint; + private SupervisorEndpoint remoteSupervisorEndpoint; + // supervisor application + private SupervisorApplication supervisorApplication; + // local task supervisor + private LocalTaskSupervisorProxy localTaskSupervisorProxy; + + private TaskSupervisorJobCaller taskSupervisorJobCaller; + + private JobContext jobContext; + + private ProcessConfig processConfig; + + @Before + public void setUp() { + PortDetector portDetector = PortDetector.getInstance(); + int allocatePort = portDetector.getPort(); + supervisorApplication = new SupervisorApplication(allocatePort); + supervisorApplication.start(new String[] {}); + String ip = SystemUtils.getLocalIpAddress(); + remoteSupervisorEndpoint = new SupervisorEndpoint(ip, allocatePort); + localSupervisorEndpoint = new SupervisorEndpoint(ip, 8989); + localTaskSupervisorProxy = + new LocalTaskSupervisorProxy(localSupervisorEndpoint, JobConstants.ODC_AGENT_CLASS_NAME); + JobEventHandler jobEventHandler = Mockito.mock(JobEventHandler.class); + taskSupervisorJobCaller = + new TaskSupervisorJobCaller(jobEventHandler, localTaskSupervisorProxy, new TaskExecutorClient()); + jobContext = createJobContext(); + processConfig = createProcessConfig(); + } + + @After + public void clear() { + supervisorApplication.stop(); + supervisorApplication.waitStop(); + } + + @Test + public void testRemoteTaskOperation() throws JobException, IOException { + ExecutorEndpoint executorEndpoint = + taskSupervisorJobCaller.startTask(remoteSupervisorEndpoint, jobContext, processConfig); + waitPortAvailable(executorEndpoint); + Assert.assertEquals(executorEndpoint.getSupervisorPort(), remoteSupervisorEndpoint.getPort()); + // local and remote can see job alive + Assert.assertTrue(localTaskSupervisorProxy.isTaskAlive(remoteSupervisorEndpoint, executorEndpoint, jobContext)); + Assert.assertTrue(localTaskSupervisorProxy.isTaskAlive(localSupervisorEndpoint, executorEndpoint, jobContext)); + // verify supervisor alive + Assert.assertTrue(localTaskSupervisorProxy.isSupervisorAlive(remoteSupervisorEndpoint)); + Assert.assertTrue(localTaskSupervisorProxy.isSupervisorAlive(localSupervisorEndpoint)); + // stop task, sync + TaskCallerResult taskCallerResult = + taskSupervisorJobCaller.stopTask(remoteSupervisorEndpoint, executorEndpoint, jobContext); + Assert.assertTrue(taskCallerResult.getSucceed()); + Assert.assertTrue(localTaskSupervisorProxy.isSupervisorAlive(remoteSupervisorEndpoint)); + // verify finish + taskCallerResult = taskSupervisorJobCaller.finish(remoteSupervisorEndpoint, executorEndpoint, jobContext); + Assert.assertTrue(taskCallerResult.getSucceed()); + // check task stopped + Assert.assertFalse( + localTaskSupervisorProxy.isTaskAlive(remoteSupervisorEndpoint, executorEndpoint, jobContext)); + } + + @Test + public void testRemoteTaskOperationWithSupervisorNotAlive() throws JobException, IOException, InterruptedException { + ExecutorEndpoint executorEndpoint = + taskSupervisorJobCaller.startTask(remoteSupervisorEndpoint, jobContext, processConfig); + Assert.assertEquals(executorEndpoint.getSupervisorPort(), remoteSupervisorEndpoint.getPort()); + waitPortAvailable(executorEndpoint); + // shutdown remote supervisor + supervisorApplication.stop(); + supervisorApplication.waitStop(); + // stop task + TaskCallerResult taskCallerResult = + taskSupervisorJobCaller.stopTaskDirectly(remoteSupervisorEndpoint, executorEndpoint, jobContext); + // still can stop + Assert.assertTrue(taskCallerResult.getSucceed()); + // check task stopped + Assert.assertFalse(localTaskSupervisorProxy.isSupervisorAlive(remoteSupervisorEndpoint)); + // verify finish + taskCallerResult = taskSupervisorJobCaller.finish(remoteSupervisorEndpoint, executorEndpoint, jobContext); + Assert.assertFalse(taskCallerResult.getSucceed()); + } + + + @Test + public void testRemoteTaskOperationWithTaskNotAlive() throws JobException, IOException { + ExecutorEndpoint executorEndpoint = + taskSupervisorJobCaller.startTask(remoteSupervisorEndpoint, jobContext, processConfig); + waitPortAvailable(executorEndpoint); + Assert.assertEquals(executorEndpoint.getSupervisorPort(), remoteSupervisorEndpoint.getPort()); + // shutdown remote task + localTaskSupervisorProxy.stopTask(localSupervisorEndpoint, executorEndpoint, jobContext); + Assert.assertFalse(localTaskSupervisorProxy.isTaskAlive(localSupervisorEndpoint, executorEndpoint, jobContext)); + // stop task + TaskCallerResult taskCallerResult = + taskSupervisorJobCaller.stopTask(remoteSupervisorEndpoint, executorEndpoint, jobContext); + // still can stop + Assert.assertTrue(taskCallerResult.getSucceed()); + // check task stopped + Assert.assertTrue(localTaskSupervisorProxy.isSupervisorAlive(remoteSupervisorEndpoint)); + // verify finish + taskCallerResult = taskSupervisorJobCaller.finish(remoteSupervisorEndpoint, executorEndpoint, jobContext); + Assert.assertTrue(taskCallerResult.getSucceed()); + } + + private ProcessConfig createProcessConfig() { + ProcessConfig ret = new ProcessConfig(); + ret.setJvmXmxMB(1024); + ret.setJvmXmsMB(1024); + Map envMap = new HashMap() { + { + put(JobEnvKeyConstants.REPORT_ENABLED, Boolean.FALSE.toString()); + put(JobEnvKeyConstants.ODC_LOG_DIRECTORY, "."); + put(JobEnvKeyConstants.ODC_EXECUTOR_USER_ID, "1024"); + put(JobEnvKeyConstants.ODC_BOOT_MODE, "TASK_EXECUTOR"); + put(JobEnvKeyConstants.ODC_TASK_RUN_MODE, "PROCESS"); + } + }; + JobUtils.encryptEnvironments(envMap); + ret.setEnvironments(envMap); + return ret; + } + + + private JobContext createJobContext() { + DefaultJobContext jobContext = new DefaultJobContext(); + jobContext.setJobClass(DummyTask.class.getName()); + jobContext.setJobIdentity(JobIdentity.of(1024L)); + jobContext.setJobProperties(new HashMap() { + { + put("prop1", "valueProp1"); + } + }); + jobContext.setJobParameters(new HashMap() { + { + put("param1", "valueParam1"); + } + }); + return jobContext; + } + + private static boolean isPortAvailable(ExecutorEndpoint executorEndpoint) { + try (Socket socket = new Socket()) { + socket.connect(new InetSocketAddress(executorEndpoint.getHost(), executorEndpoint.getExecutorPort()), 1000); + return true; + } catch (Exception e) { + return false; + } + } + + private static void waitPortAvailable(ExecutorEndpoint executorEndpoint) { + while (!isPortAvailable(executorEndpoint)) { + try { + Thread.sleep(500); + } catch (Throwable e) { + } + } + } +} + diff --git a/server/odc-server/src/test/java/com/oceanbase/odc/supervisor/runtime/TaskSupervisorServerTest.java b/server/odc-server/src/test/java/com/oceanbase/odc/supervisor/runtime/TaskSupervisorServerTest.java new file mode 100644 index 0000000000..ebc1968777 --- /dev/null +++ b/server/odc-server/src/test/java/com/oceanbase/odc/supervisor/runtime/TaskSupervisorServerTest.java @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.supervisor.runtime; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import com.oceanbase.odc.common.util.MapUtils; +import com.oceanbase.odc.service.task.caller.DefaultJobContext; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.ProcessConfig; +import com.oceanbase.odc.service.task.dummy.DummyTask; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.schedule.JobIdentity; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.supervisor.protocol.CommandType; +import com.oceanbase.odc.service.task.supervisor.protocol.GeneralTaskCommand; +import com.oceanbase.odc.service.task.supervisor.protocol.StartTaskCommand; +import com.oceanbase.odc.service.task.supervisor.protocol.TaskCommand; +import com.oceanbase.odc.service.task.supervisor.protocol.TaskCommandSender; +import com.oceanbase.odc.service.task.supervisor.runtime.TaskCommandExecutor; +import com.oceanbase.odc.service.task.supervisor.runtime.TaskSupervisorServer; + +/** + * @author longpeng.zlp + * @date 2024/11/25 16:58 + */ +public class TaskSupervisorServerTest { + private TaskSupervisorServer taskSupervisorServer; + private SimpleTaskCommandExecutor simpleTaskCommandExecutor; + private TaskCommandSender taskCommandSender; + private DefaultJobContext jobContext; + + @Before + public void setUp() throws InterruptedException { + // init job context + jobContext = new DefaultJobContext(); + jobContext.setJobIdentity(JobIdentity.of(1024L)); + jobContext.setJobClass(DummyTask.class.getName()); + jobContext.setHostUrls(Arrays.asList("127.0.0.1:8080")); + jobContext.setJobParameters(new HashMap() { + { + put("par1", "par11"); + put("par2", "par21"); + } + }); + jobContext.setJobProperties(new HashMap() { + { + put("pro1", "pro11"); + put("pro2", "pro21"); + } + }); + taskCommandSender = new TaskCommandSender(); + simpleTaskCommandExecutor = new SimpleTaskCommandExecutor(); + taskSupervisorServer = new TaskSupervisorServer(0, simpleTaskCommandExecutor); + taskSupervisorServer.start(); + while (taskSupervisorServer.getServerPort() <= 0) { + Thread.sleep(100); + } + } + + @After + public void shutdown() throws Exception { + taskSupervisorServer.stop(); + } + + @Test + public void testStartCommandProcess() throws IOException { + ProcessConfig processConfig = new ProcessConfig(); + processConfig.setJvmXmsMB(1024); + processConfig.setJvmXmxMB(2048); + processConfig.setEnvironments(new HashMap() { + { + put("env1", "key1"); + put("evn2", "key2"); + } + }); + StartTaskCommand startTaskCommand = StartTaskCommand.create(jobContext, processConfig); + String ret = taskCommandSender.sendCommand( + new SupervisorEndpoint("127.0.0.1", taskSupervisorServer.getServerPort()), + startTaskCommand); + Assert.assertEquals(ret, startTaskCommand.commandType().name().toLowerCase()); + StartTaskCommand receivedCommand = (StartTaskCommand) simpleTaskCommandExecutor.receivedTaskCommand; + checkJobContextEquals(receivedCommand.getJobContext(), jobContext); + ProcessConfig receivedProcessConfig = receivedCommand.getProcessConfig(); + Assert.assertEquals(receivedProcessConfig.getJvmXmsMB(), 1024); + Assert.assertEquals(receivedProcessConfig.getJvmXmxMB(), 2048); + Assert.assertTrue(MapUtils.isEqual(receivedProcessConfig.getEnvironments(), processConfig.getEnvironments(), + String::equals)); + } + + @Test + public void testNoneStartCommandProcess() throws IOException { + ExecutorEndpoint endpoint = new ExecutorEndpoint("command", "127.0.0.1", 8989, 12345, "identifier"); + for (CommandType commandType : CommandType.values()) { + if (commandType == CommandType.START) { + continue; + } + GeneralTaskCommand generalTaskCommand = GeneralTaskCommand.create(jobContext, endpoint, commandType); + String ret = taskCommandSender.sendCommand( + new SupervisorEndpoint("127.0.0.1", taskSupervisorServer.getServerPort()), + generalTaskCommand); + GeneralTaskCommand receivedCommand = (GeneralTaskCommand) simpleTaskCommandExecutor.receivedTaskCommand; + Assert.assertEquals(ret, generalTaskCommand.commandType().name().toLowerCase()); + checkJobContextEquals(receivedCommand.getJobContext(), jobContext); + ExecutorEndpoint receivedEndpoint = receivedCommand.getExecutorEndpoint(); + Assert.assertEquals(receivedEndpoint.getSupervisorPort(), endpoint.getSupervisorPort()); + Assert.assertEquals(receivedEndpoint.getIdentifier(), endpoint.getIdentifier()); + Assert.assertEquals(receivedEndpoint.getProtocol(), endpoint.getProtocol()); + Assert.assertEquals(receivedEndpoint.getHost(), endpoint.getHost()); + Assert.assertEquals(receivedEndpoint.getExecutorPort(), endpoint.getExecutorPort()); + } + } + + @Test + public void testHeartbeat() throws IOException { + String ret = taskCommandSender.heartbeat( + new SupervisorEndpoint("127.0.0.1", taskSupervisorServer.getServerPort())); + GeneralTaskCommand receivedCommand = (GeneralTaskCommand) simpleTaskCommandExecutor.receivedTaskCommand; + Assert.assertNull(receivedCommand); + Assert.assertEquals(ret, "true"); + } + + + private void checkJobContextEquals(JobContext src, JobContext dest) { + Assert.assertEquals(src.getJobClass(), dest.getJobClass()); + Assert.assertTrue(MapUtils.isEqual(src.getJobParameters(), dest.getJobParameters(), String::equals)); + Assert.assertTrue(MapUtils.isEqual(src.getJobProperties(), dest.getJobProperties(), String::equals)); + Assert.assertEquals(src.getJobIdentity().getId(), dest.getJobIdentity().getId()); + Assert.assertEquals(src.getHostUrls(), dest.getHostUrls()); + } + + private static final class SimpleTaskCommandExecutor implements TaskCommandExecutor { + private TaskCommand receivedTaskCommand; + + @Override + public String onCommand(TaskCommand taskCommand) throws JobException { + this.receivedTaskCommand = taskCommand; + return taskCommand.commandType().name().toLowerCase(); + } + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/JobEntity.java b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/JobEntity.java index 27cea4bc61..af8354be89 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/JobEntity.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/JobEntity.java @@ -86,6 +86,9 @@ public class JobEntity implements Serializable { @Column(name = "run_mode") private TaskRunMode runMode; + /** + * this column is json string of TaskResult, not TaskResult.getResultJson + */ @Column(name = "result_json") private String resultJson; diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/JobRepository.java b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/JobRepository.java index 3228742a7c..924c3a4163 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/JobRepository.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/JobRepository.java @@ -45,6 +45,14 @@ public interface JobRepository extends JpaRepository, int updateExecutorEndpoint(@Param("id") Long id, @Param("executorEndpoint") String executorEndpoint, @Param("oldStatus") JobStatus oldStatus); + @Transactional + @Query(value = "update job_job set " + + " executor_endpoint=:executorEndpoint, executor_identifier = :executorIdentifier" + + " where id=:id", nativeQuery = true) + @Modifying + int updateExecutorEndpointAndExecutorIdentifierById(@Param("id") Long id, + @Param("executorEndpoint") String executorEndpoint, @Param("executorIdentifier") String executorIdentifier); + @Transactional @Query(value = "update job_job set " + " status=:#{#param.status.name()}," diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/ResourceAllocateInfoEntity.java b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/ResourceAllocateInfoEntity.java new file mode 100644 index 0000000000..2e93dcf788 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/ResourceAllocateInfoEntity.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.metadb.task; + +import java.util.Date; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Table; + +import org.hibernate.annotations.Generated; +import org.hibernate.annotations.GenerationTime; + +import lombok.Data; + +/** + * @author longpeng.zlp + * @date 2024/12/4 16:48 + */ +@Data +@Entity +@Table(name = "resource_allocate_info") +public class ResourceAllocateInfoEntity { + + /** + * Id for supervisor endpoint + */ + @Id + @Column(name = "id", nullable = false) + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + /** + * task id relate to job_job + */ + @Column(name = "task_id", nullable = false) + private Long taskId; + + /** + * resource allocate state, update by resource allocator, including PREPARING, AVAILABLE, FAILED + */ + @Column(name = "resource_allocate_state", nullable = false) + private String resourceAllocateState; + + /** + * resource usage state update by resource user, including PREPARING, USING, FINISHED + */ + @Column(name = "resource_usage_state", nullable = false) + private String resourceUsageState; + + /** + * supervisor endpoint, in format host:port + */ + @Column(name = "endpoint", nullable = false) + private String endpoint; + + /** + * Record insertion time + */ + @Generated(GenerationTime.ALWAYS) + @Column(name = "create_time", insertable = false, updatable = false, + columnDefinition = "datetime NOT NULL DEFAULT CURRENT_TIMESTAMP") + private Date createTime; + + /** + * Record modification time + */ + @Generated(GenerationTime.ALWAYS) + @Column(name = "update_time", insertable = false, updatable = false, + columnDefinition = "datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP") + private Date updateTime; +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/ResourceAllocateInfoRepository.java b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/ResourceAllocateInfoRepository.java new file mode 100644 index 0000000000..8e24248075 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/ResourceAllocateInfoRepository.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.metadb.task; + +import java.util.Optional; + +import javax.transaction.Transactional; + +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.JpaSpecificationExecutor; +import org.springframework.data.jpa.repository.Modifying; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** + * @author yaobin + * @date 2023-12-06 + * @since 4.2.4 + */ +@Repository +public interface ResourceAllocateInfoRepository extends JpaRepository, + JpaSpecificationExecutor { + + @Transactional + @Query(value = "update resource_allocate_info set " + + " resource_allocate_state='AVAILABLE', endpoint = :endpointToSet" + + " where task_id=:idToFind", nativeQuery = true) + @Modifying + int updateEndpointByTaskId(@Param("endpointToSet") String endpoint, @Param("idToFind") Long id); + + @Transactional + @Query(value = "update resource_allocate_info set " + + " resource_allocate_state=:stateToSet" + + " where task_id=:idToFind", nativeQuery = true) + @Modifying + int updateResourceAllocateStateByTaskId(@Param("stateToSet") String state, @Param("idToFind") Long id); + + @Transactional + @Query(value = "update resource_allocate_info set " + + " resource_usage_state=:stateToSet" + + " where task_id =:idToFind", nativeQuery = true) + @Modifying + int updateResourceUsageStateByTaskId(@Param("stateToSet") String state, @Param("idToFind") Long id); + + + @Query(value = "SELECT * FROM resource_allocate_info WHERE task_id = ?1", nativeQuery = true) + Optional findByTaskIdNative(Long id); + + @Query(value = "SELECT * FROM resource_allocate_info WHERE id = ?1", nativeQuery = true) + Optional findByIdNative(Long id); +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/SupervisorEndpointEntity.java b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/SupervisorEndpointEntity.java new file mode 100644 index 0000000000..f56ed22ac0 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/SupervisorEndpointEntity.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.metadb.task; + +import java.util.Date; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Table; + +import org.hibernate.annotations.Generated; +import org.hibernate.annotations.GenerationTime; + +import lombok.Data; + +/** + * entity for supervisor endpoint + * + * @author longpeng.zlp + * @date 2024/11/29 15:32 + */ +@Data +@Entity +@Table(name = "supervisor_endpoint") +public class SupervisorEndpointEntity { + + /** + * Id for supervisor endpoint + */ + @Id + @Column(name = "id", nullable = false) + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + /** + * host of supervisor endpoint + */ + @Column(name = "host", nullable = false) + private String host; + + /** + * port of supervisor endpoint + */ + @Column(name = "port", nullable = false) + private Integer port; + + /** + * status of supervisor endpoint, candidate value is + * PREPARING,AVAILABLE,DESTROYED,UNAVAILABLE,ABANDON + */ + @Column(name = "status", nullable = false) + private String status; + + /** + * load of supervisor endpoint, for task allocate + */ + @Column(name = "loads", nullable = false) + private Integer loads; + + /** + * resourceID related to resource_resource, -1 means not related to any resource + */ + @Column(name = "resource_id", nullable = false) + private Long resourceID; + + /** + * Record insertion time + */ + @Generated(GenerationTime.ALWAYS) + @Column(name = "create_time", insertable = false, updatable = false, + columnDefinition = "datetime NOT NULL DEFAULT CURRENT_TIMESTAMP") + private Date createTime; + + /** + * Record modification time + */ + @Generated(GenerationTime.ALWAYS) + @Column(name = "update_time", insertable = false, updatable = false, + columnDefinition = "datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP") + private Date updateTime; +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/SupervisorEndpointRepository.java b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/SupervisorEndpointRepository.java new file mode 100644 index 0000000000..14838f8353 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/metadb/task/SupervisorEndpointRepository.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.metadb.task; + +import java.util.Optional; + +import javax.transaction.Transactional; + +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.JpaSpecificationExecutor; +import org.springframework.data.jpa.repository.Modifying; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** + * @author yaobin + * @date 2023-12-06 + * @since 4.2.4 + */ +@Repository +public interface SupervisorEndpointRepository extends JpaRepository, + JpaSpecificationExecutor { + + @Transactional + @Query(value = "update supervisor_endpoint set " + + " status=:statusToSet, loads = :loadToSet" + + " where host=:hostToFind and port=:portToFind", nativeQuery = true) + @Modifying + int updateStatusAndLoadByHostAndPort(@Param("hostToFind") String host, @Param("portToFind") Integer port, + @Param("loadToSet") Integer load, + @Param("statusToSet") String status); + + @Transactional + @Query(value = "update supervisor_endpoint set " + + " status=:statusToSet" + + " where host=:hostToFind and port=:portToFind", nativeQuery = true) + @Modifying + int updateStatusByHostAndPort(@Param("hostToFind") String host, @Param("portToFind") Integer port, + @Param("statusToSet") String status); + + @Transactional + @Query(value = "update supervisor_endpoint set " + + "loads = loads + :loadToAdd" + + " where host=:hostToFind and port=:portToFind", nativeQuery = true) + @Modifying + int addLoadByHostAndPort(@Param("hostToFind") String host, @Param("portToFind") Integer port, + @Param("loadToAdd") Integer loadToAdd); + + @Query(value = "SELECT * FROM supervisor_endpoint WHERE id = ?1", nativeQuery = true) + Optional findByIdNative(Long id); + + @Query(value = "SELECT * FROM supervisor_endpoint WHERE host = :hostToFind and port = :portToFind", + nativeQuery = true) + Optional findByHostAndPort(@Param("hostToFind") String host, + @Param("portToFind") Integer port); +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/dlm/DLMJobStore.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/dlm/DLMJobStore.java index 94414bf025..5878fb948a 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/dlm/DLMJobStore.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/dlm/DLMJobStore.java @@ -72,9 +72,11 @@ public void setDlmTableUnits(Map dlmTableUnits) { public void destroy() { try { - dataSource.close(); + if (null != dataSource) { + dataSource.close(); + } } catch (Exception e) { - log.warn("Close meta datasource failed,errorMsg={}", e.getMessage()); + log.warn("Close meta datasource failed,errorMsg={}", e); } } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/quartz/config/QuartzConfiguration.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/quartz/config/QuartzConfiguration.java index c0788720fa..83b9e7666b 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/quartz/config/QuartzConfiguration.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/quartz/config/QuartzConfiguration.java @@ -62,6 +62,17 @@ public SchedulerFactoryBean schedulerFactoryBean(DataSource dataSource) { return schedulerFactoryBean; } + @Bean("defaultTaskSchedulerFactoryBean") + public SchedulerFactoryBean taskSchedulerFactoryBean(DataSource dataSource) { + SchedulerFactoryBean schedulerFactoryBean = new SchedulerFactoryBean(); + schedulerFactoryBean.setDataSource(dataSource); + schedulerFactoryBean.setSchedulerName("ODC-TASK-SCHEDULER"); + Properties properties = new Properties(); + properties.put("org.quartz.threadPool.threadCount", "6"); + schedulerFactoryBean.setQuartzProperties(properties); + return schedulerFactoryBean; + } + @Bean("defaultScheduler") public Scheduler scheduler( @Autowired @Qualifier("defaultSchedulerFactoryBean") SchedulerFactoryBean schedulerFactoryBean) @@ -71,4 +82,11 @@ public Scheduler scheduler( scheduler.getListenerManager().addTriggerListener(odcTriggerListener); return scheduler; } + + @Bean("defaultTaskScheduler") + public Scheduler taskScheduler( + @Autowired @Qualifier("defaultTaskSchedulerFactoryBean") SchedulerFactoryBean schedulerFactoryBean) + throws SchedulerException { + return schedulerFactoryBean.getScheduler(); + } } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/BaseJobCaller.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/BaseJobCaller.java index 17e285e573..9f9ce9197e 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/BaseJobCaller.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/BaseJobCaller.java @@ -21,13 +21,13 @@ import com.oceanbase.odc.service.resource.ResourceID; import com.oceanbase.odc.service.task.config.JobConfiguration; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; -import com.oceanbase.odc.service.task.config.JobConfigurationValidator; import com.oceanbase.odc.service.task.enums.JobCallerAction; import com.oceanbase.odc.service.task.exception.JobException; import com.oceanbase.odc.service.task.listener.JobCallerEvent; import com.oceanbase.odc.service.task.schedule.JobIdentity; import com.oceanbase.odc.service.task.service.TaskFrameworkService; import com.oceanbase.odc.service.task.util.TaskExecutorClient; +import com.oceanbase.odc.service.task.util.TaskSupervisorUtil; import lombok.extern.slf4j.Slf4j; @@ -38,9 +38,9 @@ */ @Slf4j public abstract class BaseJobCaller implements JobCaller { + @Override public void start(JobContext context) throws JobException { - JobConfigurationValidator.validComponent(); JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); TaskFrameworkService taskFrameworkService = jobConfiguration.getTaskFrameworkService(); ExecutorIdentifier executorIdentifier = null; @@ -51,7 +51,7 @@ public void start(JobContext context) throws JobException { } try { executorIdentifier = doStart(context); - rows = taskFrameworkService.startSuccess(ji.getId(), executorIdentifier.toString()); + rows = taskFrameworkService.startSuccess(ji.getId(), executorIdentifier.toString(), context); if (rows > 0) { afterStartSucceed(executorIdentifier, ji); } else { @@ -85,7 +85,6 @@ private void afterStartFailed(JobIdentity ji, @Override public void stop(JobIdentity ji) throws JobException { - JobConfigurationValidator.validComponent(); JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); TaskFrameworkService taskFrameworkService = jobConfiguration.getTaskFrameworkService(); TaskExecutorClient taskExecutorClient = jobConfiguration.getTaskExecutorClient(); @@ -118,7 +117,6 @@ protected void afterStopFailed(JobIdentity ji, Exception e) throws JobException @Override public void modify(JobIdentity ji, String jobParametersJson) throws JobException { - JobConfigurationValidator.validComponent(); JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); TaskFrameworkService taskFrameworkService = jobConfiguration.getTaskFrameworkService(); TaskExecutorClient taskExecutorClient = jobConfiguration.getTaskExecutorClient(); @@ -128,7 +126,6 @@ public void modify(JobIdentity ji, String jobParametersJson) throws JobException @Override public void finish(JobIdentity ji) throws JobException { - JobConfigurationValidator.validComponent(); JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); TaskFrameworkService taskFrameworkService = jobConfiguration.getTaskFrameworkService(); JobEntity jobEntity = taskFrameworkService.find(ji.getId()); @@ -144,6 +141,9 @@ public void finish(JobIdentity ji) throws JobException { ResourceID resourceID = ResourceIDUtil.getResourceID(identifier, jobEntity); log.info("Preparing destroy,jobId={}, executorIdentifier={}.", ji.getId(), executorIdentifier); doFinish(ji, identifier, resourceID); + if (TaskSupervisorUtil.isTaskSupervisorEnabled(jobConfiguration.getTaskFrameworkProperties())) { + jobConfiguration.getSupervisorAgentAllocator().deallocateSupervisorEndpoint(jobEntity.getId()); + } } @@ -194,8 +194,6 @@ protected void updateExecutorDestroyed(JobIdentity ji) throws JobException { protected abstract ExecutorIdentifier doStart(JobContext context) throws JobException; - protected abstract void doStop(JobIdentity ji) throws JobException; - protected abstract boolean isExecutorExist(ExecutorIdentifier identifier, ResourceID resourceID) throws JobException; } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/ExecutorProcessBuilderFactory.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/ExecutorProcessBuilderFactory.java index 2721b03a70..e04c66d5d5 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/ExecutorProcessBuilderFactory.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/ExecutorProcessBuilderFactory.java @@ -36,6 +36,11 @@ public class ExecutorProcessBuilderFactory { private static final Pattern ODC_SERVER_EXECUTABLE_JAR = Pattern.compile("^.*odc-server-.*executable\\.jar$"); public ProcessBuilder getProcessBuilder(ProcessConfig processConfig, long jobId, String executorName) { + return getProcessBuilder(processConfig, jobId, executorName, JobConstants.ODC_AGENT_CLASS_NAME); + } + + public ProcessBuilder getProcessBuilder(ProcessConfig processConfig, long jobId, String executorName, + String mainClassName) { RuntimeMXBean runtimeMxBean = ManagementFactory.getRuntimeMXBean(); ProcessBuilder pb = new ProcessBuilder(); List commands = new ArrayList<>(); @@ -47,13 +52,13 @@ public ProcessBuilder getProcessBuilder(ProcessConfig processConfig, long jobId, commands.add("-cp"); // set jar package file name in commands commands.add(runtimeMxBean.getClassPath()); - commands.add("-Dloader.main=" + JobConstants.ODC_AGENT_CLASS_NAME); + commands.add("-Dloader.main=" + mainClassName); commands.add("org.springframework.boot.loader.PropertiesLauncher"); } else { // start odc executor by java -classpath commands.add("-cp"); commands.add(runtimeMxBean.getClassPath()); - commands.add(JobConstants.ODC_AGENT_CLASS_NAME); + commands.add(mainClassName); // commands.add("org.springframework.boot.loader.PropertiesLauncher"); } pb.command(commands); diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/JobCallerBuilder.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/JobCallerBuilder.java index 0ffe215036..b7cab5747c 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/JobCallerBuilder.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/JobCallerBuilder.java @@ -15,16 +15,13 @@ */ package com.oceanbase.odc.service.task.caller; -import java.io.File; -import java.nio.charset.Charset; import java.util.Map; -import org.apache.commons.io.FileUtils; - import com.oceanbase.odc.common.util.StringUtils; import com.oceanbase.odc.service.resource.ResourceManager; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; +import com.oceanbase.odc.service.task.constants.JobConstants; import com.oceanbase.odc.service.task.constants.JobEnvKeyConstants; import com.oceanbase.odc.service.task.enums.TaskMonitorMode; import com.oceanbase.odc.service.task.enums.TaskRunMode; @@ -47,34 +44,19 @@ public class JobCallerBuilder { * @param environments env for process builder * @return */ - public static JobCaller buildProcessCaller(JobContext context, Map environments) { + public static ProcessJobCaller buildProcessCaller(JobContext context, Map environments) { JobUtils.encryptEnvironments(environments); - /** - * write JobContext to file in case of exceeding the environments size limit; set the file path in - * the environment instead - */ - String jobContextFilePath = JobUtils.getExecutorDataPath() + "/" + StringUtils.uuid() + ".enc"; - try { - FileUtils.writeStringToFile(new File(jobContextFilePath), - JobUtils.encrypt(environments.get(JobEnvKeyConstants.ENCRYPT_KEY), - environments.get(JobEnvKeyConstants.ENCRYPT_SALT), JobUtils.toJson(context)), - Charset.defaultCharset()); - } catch (Exception ex) { - FileUtils.deleteQuietly(new File(jobContextFilePath)); - throw new RuntimeException("Failed to write job context to file: " + jobContextFilePath, ex); - } - environments.put(JobEnvKeyConstants.ODC_JOB_CONTEXT_FILE_PATH, - JobUtils.encrypt(environments.get(JobEnvKeyConstants.ENCRYPT_KEY), - environments.get(JobEnvKeyConstants.ENCRYPT_SALT), jobContextFilePath)); + setReportMode(environments, context); ProcessConfig config = new ProcessConfig(); config.setEnvironments(environments); - TaskFrameworkProperties taskFrameworkProperties = JobConfigurationHolder.getJobConfiguration().getTaskFrameworkProperties(); config.setJvmXmsMB(taskFrameworkProperties.getJobProcessMinMemorySizeInMB()); config.setJvmXmxMB(taskFrameworkProperties.getJobProcessMaxMemorySizeInMB()); - - return new ProcessJobCaller(config); + String mainClassName = StringUtils.isBlank(taskFrameworkProperties.getProcessMainClassName()) + ? JobConstants.ODC_AGENT_CLASS_NAME + : taskFrameworkProperties.getProcessMainClassName(); + return new ProcessJobCaller(config, mainClassName); } /** @@ -94,12 +76,7 @@ public static Map buildK8sEnv(JobContext context) { environments.put(JobEnvKeyConstants.ODC_EXECUTOR_PORT, String.valueOf(executorListenPort)); } - TaskMonitorMode monitorMode = JobPropertiesUtils.getMonitorMode(jobProperties); - if (TaskMonitorMode.PULL.equals(monitorMode)) { - environments.put(JobEnvKeyConstants.REPORT_ENABLED, "false"); - } else { - environments.put(JobEnvKeyConstants.REPORT_ENABLED, "true"); - } + setReportMode(environments, context); // encryption related properties JasyptEncryptorConfigProperties jasyptProperties = JobConfigurationHolder.getJobConfiguration() @@ -112,6 +89,15 @@ public static Map buildK8sEnv(JobContext context) { return environments; } + private static void setReportMode(Map environments, JobContext jobContext) { + TaskMonitorMode monitorMode = JobPropertiesUtils.getMonitorMode(jobContext.getJobProperties()); + if (TaskMonitorMode.PULL.equals(monitorMode)) { + environments.put(JobEnvKeyConstants.REPORT_ENABLED, "false"); + } else { + environments.put(JobEnvKeyConstants.REPORT_ENABLED, "true"); + } + } + public static JobCaller buildK8sJobCaller(PodConfig podConfig, JobContext context, ResourceManager resourceManager) { Map environments = buildK8sEnv(context); diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/JobCallerContext.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/JobCallerContext.java new file mode 100644 index 0000000000..830f678c8d --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/JobCallerContext.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.caller; + +/** + * context for job caller + * + * @author longpeng.zlp + * @date 2024/11/28 11:16 + */ +public interface JobCallerContext { + +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/K8sJobCaller.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/K8sJobCaller.java index d59345e510..8657be297f 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/K8sJobCaller.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/K8sJobCaller.java @@ -59,7 +59,9 @@ public ExecutorIdentifier doStart(JobContext context) throws JobException { resourceManager.create(resourceLocation, buildK8sResourceContext(context, resourceLocation)); String arn = resource.getResource().resourceID().getIdentifier(); return DefaultExecutorIdentifier.builder().namespace(resource.getResource().getNamespace()) - .executorName(arn).build(); + .executorName(arn).host(resource.getResource().getPodIpAddress()) + .port(Integer.valueOf(resource.getResource().getServicePort())) + .build(); } catch (Throwable e) { throw new JobException("doStart failed for " + context, e); } @@ -81,9 +83,6 @@ protected ResourceLocation buildResourceLocation(JobContext context) { return new ResourceLocation(region, group); } - @Override - public void doStop(JobIdentity ji) throws JobException {} - @Override protected void doFinish(JobIdentity ji, ExecutorIdentifier ei, ResourceID resourceID) throws JobException { diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/ProcessJobCaller.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/ProcessJobCaller.java index 090ff3abf4..6188d71c11 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/ProcessJobCaller.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/caller/ProcessJobCaller.java @@ -20,22 +20,27 @@ import java.io.IOException; import java.text.MessageFormat; -import java.util.Objects; -import java.util.Optional; +import java.util.HashMap; +import java.util.Map; import com.fasterxml.jackson.core.type.TypeReference; +import com.oceanbase.odc.common.util.StringUtils; import com.oceanbase.odc.common.util.SystemUtils; import com.oceanbase.odc.metadb.task.JobEntity; import com.oceanbase.odc.service.common.response.OdcResult; import com.oceanbase.odc.service.resource.ResourceID; import com.oceanbase.odc.service.task.config.JobConfiguration; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; +import com.oceanbase.odc.service.task.constants.JobEnvKeyConstants; import com.oceanbase.odc.service.task.enums.JobStatus; import com.oceanbase.odc.service.task.exception.JobException; import com.oceanbase.odc.service.task.schedule.JobIdentity; +import com.oceanbase.odc.service.task.supervisor.TaskSupervisor; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; import com.oceanbase.odc.service.task.util.HttpClientUtils; -import com.oceanbase.odc.service.task.util.JobUtils; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; /** @@ -45,77 +50,38 @@ */ @Slf4j public class ProcessJobCaller extends BaseJobCaller { - + @Getter private final ProcessConfig processConfig; - public ProcessJobCaller(ProcessConfig processConfig) { + private final TaskSupervisor taskSupervisor; + + public ProcessJobCaller(ProcessConfig processConfig, String mainClassName) { this.processConfig = processConfig; + // only serve local task supervisor + this.taskSupervisor = new TaskSupervisor(new SupervisorEndpoint(SystemUtils.getLocalIpAddress(), + DefaultExecutorIdentifier.DEFAULT_PORT), mainClassName); } @Override public ExecutorIdentifier doStart(JobContext context) throws JobException { - - String executorName = JobUtils.generateExecutorName(context.getJobIdentity()); - ProcessBuilder pb = new ExecutorProcessBuilderFactory().getProcessBuilder( - processConfig, context.getJobIdentity().getId(), executorName); - Process process; - try { - process = pb.start(); - } catch (Exception ex) { - throw new JobException("Start process failed.", ex); - } - - long pid = SystemUtils.getProcessPid(process); - if (pid == -1) { - process.destroyForcibly(); - throw new JobException("Get pid failed, job id={0} ", context.getJobIdentity().getId()); - } - - boolean isProcessRunning = - SystemUtils.isProcessRunning(pid, JobUtils.generateExecutorSelectorOnProcess(executorName)); - - if (!isProcessRunning) { - process.destroyForcibly(); - throw new JobException("Start process failed, not process found, pid={0},executorName={1}.", - pid, executorName); - } - - JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); - String portString = Optional.ofNullable(jobConfiguration.getHostProperties().getPort()) - .orElse(DefaultExecutorIdentifier.DEFAULT_PORT + ""); - // set process id as namespace - return DefaultExecutorIdentifier.builder().host(SystemUtils.getLocalIpAddress()) - .port(Integer.parseInt(portString)) - .namespace(pid + "") - .executorName(executorName).build(); + ExecutorEndpoint executorEndpoint = taskSupervisor.startTask(context, copyProcessConfig(processConfig)); + return ExecutorIdentifierParser.parser(executorEndpoint.getIdentifier()); } - @Override - protected void doStop(JobIdentity ji) throws JobException {} - @Override protected void doFinish(JobIdentity ji, ExecutorIdentifier ei, ResourceID resourceID) throws JobException { - if (isExecutorExist(ei, resourceID)) { - long pid = Long.parseLong(ei.getNamespace()); - log.info("Found process, try kill it, pid={}.", pid); - // first update destroy time, second destroy executor. - // if executor failed update will be rollback, ensure distributed transaction atomicity. + ExecutorEndpoint executorEndpoint = buildExecutorEndpoint(ei); + JobContext jobContext = createJobContext(ji); + if (isSameTaskSupervisor(executorEndpoint, taskSupervisor.getSupervisorEndpoint())) { + taskSupervisor.stopTask(executorEndpoint, jobContext); updateExecutorDestroyed(ji); - doDestroyInternal(ei); return; } - JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); - String portString = Optional.ofNullable(jobConfiguration.getHostProperties().getPort()) - .orElse(DefaultExecutorIdentifier.DEFAULT_PORT + ""); - if (SystemUtils.getLocalIpAddress().equals(ei.getHost()) && Objects.equals(portString, ei.getPort() + "")) { - updateExecutorDestroyed(ji); - return; - } - JobConfiguration configuration = JobConfigurationHolder.getJobConfiguration(); - JobEntity jobEntity = configuration.getTaskFrameworkService().find(ji.getId()); - if (!isOdcHealthy(ei.getHost(), ei.getPort())) { + if (!isRemoteTaskSupervisorAlive(executorEndpoint)) { + JobConfiguration configuration = JobConfigurationHolder.getJobConfiguration(); + JobEntity jobEntity = configuration.getTaskFrameworkService().find(ji.getId()); if (jobEntity.getStatus() == JobStatus.RUNNING) { // Cannot connect to target identifier,we cannot kill the process, // so we set job to FAILED and avoid two process running @@ -134,18 +100,35 @@ protected void doFinish(JobIdentity ji, ExecutorIdentifier ei, ResourceID resour + " may not on this machine, jodId={0}, identifier={1}", ji.getId(), ei); } - public boolean canBeFinish(JobIdentity ji, ExecutorIdentifier ei, ResourceID resourceID) { - if (isExecutorExist(ei, resourceID)) { - log.info("Executor be found, jobId={}, identifier={}", ji.getId(), ei); - return true; + /** + * copy process config and remove listen port if it's in pull mode + * + * @param origin + * @return + */ + protected ProcessConfig copyProcessConfig(ProcessConfig origin) { + ProcessConfig ret = new ProcessConfig(); + ret.setJvmXmxMB(origin.getJvmXmxMB()); + ret.setJvmXmsMB(origin.getJvmXmsMB()); + Map evn = new HashMap<>(); + if (null != origin.getEnvironments()) { + evn.putAll(origin.getEnvironments()); + } + if (StringUtils.equalsIgnoreCase(evn.get(JobEnvKeyConstants.REPORT_ENABLED), "false")) { + evn.remove(JobEnvKeyConstants.ODC_EXECUTOR_PORT); } - String portString = Optional.ofNullable( - JobConfigurationHolder.getJobConfiguration().getHostProperties().getPort()) - .orElse(DefaultExecutorIdentifier.DEFAULT_PORT + ""); - if (SystemUtils.getLocalIpAddress().equals(ei.getHost()) && Objects.equals(portString, ei.getPort() + "")) { + ret.setEnvironments(evn); + return ret; + } + + public boolean canBeFinish(JobIdentity ji, ExecutorIdentifier ei, ResourceID resourceID) { + ExecutorEndpoint executorEndpoint = buildExecutorEndpoint(ei); + // same machine can operate the task + if (isSameTaskSupervisor(executorEndpoint, taskSupervisor.getSupervisorEndpoint())) { return true; } - if (!isOdcHealthy(ei.getHost(), ei.getPort())) { + // remote is down + if (!isRemoteTaskSupervisorAlive(executorEndpoint)) { log.info("Cannot connect to target odc server, executor can be destroyed,jobId={}, identifier={}", ji.getId(), ei); return true; @@ -153,32 +136,54 @@ public boolean canBeFinish(JobIdentity ji, ExecutorIdentifier ei, ResourceID res return false; } - protected void doDestroyInternal(ExecutorIdentifier identifier) throws JobException { - long pid = Long.parseLong(identifier.getNamespace()); - boolean result = SystemUtils.killProcessByPid(pid); - if (result) { - log.info("Destroy succeed by kill process, executorIdentifier={}, pid={}", identifier, pid); - } else { - throw new JobException( - "Destroy executor failed by kill process, identifier={0}, pid{1}=", identifier, pid); - } - } @Override protected boolean isExecutorExist(ExecutorIdentifier identifier, ResourceID resourceID) { - long pid = Long.parseLong(identifier.getNamespace()); - boolean result = SystemUtils.isProcessRunning(pid, - JobUtils.generateExecutorSelectorOnProcess(identifier.getExecutorName())); - if (result) { - log.info("Found executor by identifier, identifier={}", identifier); - } else { - log.warn("Not found executor by identifier, identifier={}", identifier); - } - return result; + return taskSupervisor.isTaskAlive(identifier); } - private boolean isOdcHealthy(String server, int servPort) { - String url = String.format("http://%s:%d/api/v1/heartbeat/isHealthy", server, servPort); + protected JobContext createJobContext(JobIdentity jobIdentity) { + return new JobContext() { + @Override + public JobIdentity getJobIdentity() { + return jobIdentity; + } + + @Override + public String getJobClass() { + throw new IllegalStateException("not impl"); + } + + @Override + public Map getJobProperties() { + throw new IllegalStateException("not impl"); + } + + @Override + public Map getJobParameters() { + throw new IllegalStateException("not impl"); + } + }; + } + + protected ExecutorEndpoint buildExecutorEndpoint(ExecutorIdentifier executorIdentifier) { + return new ExecutorEndpoint( + TaskSupervisor.COMMAND_PROTOCOL_NAME, + executorIdentifier.getHost(), + DefaultExecutorIdentifier.DEFAULT_PORT, + executorIdentifier.getPort(), + executorIdentifier.toString()); + } + + /** + * this method will be moved out + * + * @param executorEndpoint + * @return + */ + public boolean isRemoteTaskSupervisorAlive(ExecutorEndpoint executorEndpoint) { + String url = String.format("http://%s:%s/api/v1/heartbeat/isHealthy", executorEndpoint.getHost(), + executorEndpoint.getSupervisorPort()); try { OdcResult result = HttpClientUtils.request("GET", url, new TypeReference>() {}); return result.getData(); @@ -188,4 +193,15 @@ private boolean isOdcHealthy(String server, int servPort) { } } + /** + * if this is on same machine + * + * @param supervisorEndpoint + * @return + */ + protected boolean isSameTaskSupervisor(ExecutorEndpoint executorEndpoint, SupervisorEndpoint supervisorEndpoint) { + return StringUtils.equalsIgnoreCase(executorEndpoint.getHost(), supervisorEndpoint.getHost()) + && Integer.compare(executorEndpoint.getSupervisorPort(), supervisorEndpoint.getPort()) == 0; + } + } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultJobConfiguration.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultJobConfiguration.java index 3169842dae..09751968db 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultJobConfiguration.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultJobConfiguration.java @@ -26,6 +26,8 @@ import com.oceanbase.odc.service.task.TaskService; import com.oceanbase.odc.service.task.dispatch.JobDispatcher; import com.oceanbase.odc.service.task.jasypt.JasyptEncryptorConfigProperties; +import com.oceanbase.odc.service.task.resource.SupervisorAgentAllocator; +import com.oceanbase.odc.service.task.resource.TaskResourceManager; import com.oceanbase.odc.service.task.schedule.JobCredentialProvider; import com.oceanbase.odc.service.task.schedule.StartJobRateLimiter; import com.oceanbase.odc.service.task.schedule.TaskFrameworkDisabledHandler; @@ -33,6 +35,7 @@ import com.oceanbase.odc.service.task.schedule.provider.JobImageNameProvider; import com.oceanbase.odc.service.task.service.TaskFrameworkService; import com.oceanbase.odc.service.task.service.TransactionManager; +import com.oceanbase.odc.service.task.supervisor.TaskSupervisorJobCaller; import com.oceanbase.odc.service.task.util.TaskExecutorClient; import lombok.Getter; @@ -59,8 +62,12 @@ public abstract class DefaultJobConfiguration implements JobConfiguration { protected JobDispatcher jobDispatcher; + protected TaskSupervisorJobCaller taskSupervisorJobCaller; + protected Scheduler daemonScheduler; + protected Scheduler taskSupervisorScheduler; + protected ResourceManager resourceManager; protected HostUrlProvider hostUrlProvider; @@ -84,4 +91,9 @@ public abstract class DefaultJobConfiguration implements JobConfiguration { protected HostProperties hostProperties; protected JobCredentialProvider jobCredentialProvider; + + protected TaskResourceManager taskResourceManager; + + protected SupervisorAgentAllocator supervisorAgentAllocator; + } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultSpringJobConfiguration.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultSpringJobConfiguration.java index 11745a7253..2ab4b1e04b 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultSpringJobConfiguration.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultSpringJobConfiguration.java @@ -24,13 +24,18 @@ import org.springframework.transaction.support.TransactionTemplate; import com.oceanbase.odc.common.event.LocalEventPublisher; +import com.oceanbase.odc.metadb.task.ResourceAllocateInfoRepository; +import com.oceanbase.odc.metadb.task.SupervisorEndpointRepository; import com.oceanbase.odc.service.common.model.HostProperties; import com.oceanbase.odc.service.connection.ConnectionService; import com.oceanbase.odc.service.objectstorage.cloud.model.CloudEnvConfigurations; import com.oceanbase.odc.service.resource.ResourceManager; import com.oceanbase.odc.service.task.TaskService; +import com.oceanbase.odc.service.task.constants.JobConstants; import com.oceanbase.odc.service.task.dispatch.ImmediateJobDispatcher; import com.oceanbase.odc.service.task.jasypt.JasyptEncryptorConfigProperties; +import com.oceanbase.odc.service.task.resource.ProcessTaskResourceManager; +import com.oceanbase.odc.service.task.resource.SupervisorAgentAllocator; import com.oceanbase.odc.service.task.schedule.DefaultTaskFrameworkDisabledHandler; import com.oceanbase.odc.service.task.schedule.JobCredentialProvider; import com.oceanbase.odc.service.task.schedule.StartJobRateLimiter; @@ -40,7 +45,11 @@ import com.oceanbase.odc.service.task.service.SpringTransactionManager; import com.oceanbase.odc.service.task.service.StdTaskFrameworkService; import com.oceanbase.odc.service.task.service.TaskFrameworkService; +import com.oceanbase.odc.service.task.supervisor.DefaultJobEventListener; +import com.oceanbase.odc.service.task.supervisor.TaskSupervisorJobCaller; +import com.oceanbase.odc.service.task.supervisor.proxy.LocalTaskSupervisorProxy; import com.oceanbase.odc.service.task.util.TaskExecutorClient; +import com.oceanbase.odc.service.task.util.TaskSupervisorUtil; /** * @author yaobin @@ -62,6 +71,9 @@ public void afterPropertiesSet() { setConnectionService(ctx.getBean(ConnectionService.class)); setTaskService(ctx.getBean(TaskService.class)); setDaemonScheduler((Scheduler) ctx.getBean("taskFrameworkSchedulerFactoryBean")); + // TODO(tinker): return right scheduler + setTaskSupervisorScheduler((Scheduler) ctx.getBean("defaultTaskSchedulerFactoryBean")); + setJobDispatcher(new ImmediateJobDispatcher(ctx.getBean(ResourceManager.class))); setResourceManager(ctx.getBean(ResourceManager.class)); LocalEventPublisher publisher = new LocalEventPublisher(); @@ -71,14 +83,29 @@ public void afterPropertiesSet() { } setTaskFrameworkService(tfs); setEventPublisher(publisher); - - setTaskExecutorClient(new TaskExecutorClient()); + TaskExecutorClient executorClient = ctx.getBean(TaskExecutorClient.class); + setTaskExecutorClient(executorClient); + setTaskSupervisorJobCaller( + new TaskSupervisorJobCaller(new DefaultJobEventListener(), new LocalTaskSupervisorProxy( + TaskSupervisorUtil.getDefaultSupervisorEndpoint(), JobConstants.ODC_AGENT_CLASS_NAME), + executorClient)); setTransactionManager(new SpringTransactionManager(ctx.getBean(TransactionTemplate.class))); initJobRateLimiter(); setTaskFrameworkDisabledHandler(new DefaultTaskFrameworkDisabledHandler()); setJasyptEncryptorConfigProperties(ctx.getBean(JasyptEncryptorConfigProperties.class)); setHostProperties(ctx.getBean(HostProperties.class)); setJobCredentialProvider(ctx.getBean(JobCredentialProvider.class)); + TaskFrameworkProperties taskFrameworkProperties = ctx.getBean(TaskFrameworkProperties.class); + if (TaskSupervisorUtil.isTaskSupervisorEnabled(taskFrameworkProperties)) { + // init resource allocator and resource manager + ProcessTaskResourceManager processTaskResourceManager = + new ProcessTaskResourceManager(ctx.getBean(SupervisorEndpointRepository.class), ctx.getBean( + ResourceAllocateInfoRepository.class)); + processTaskResourceManager.initTaskResourceManager(); + setTaskResourceManager(processTaskResourceManager); + setSupervisorAgentAllocator( + new SupervisorAgentAllocator(ctx.getBean(ResourceAllocateInfoRepository.class))); + } } @Override diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultTaskFrameworkProperties.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultTaskFrameworkProperties.java index e6fd835c54..87e01f9ec7 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultTaskFrameworkProperties.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/DefaultTaskFrameworkProperties.java @@ -96,8 +96,16 @@ public class DefaultTaskFrameworkProperties implements TaskFrameworkProperties { private String destroyExecutorJobCronExpression; private String pullTaskResultJobCronExpression; + + private String processMainClassName; /** * local k8s debug mode, use process builder mock k8s */ private boolean enableK8sLocalDebugMode; + + /** + * if enable task supervisor agent, current only in process mode, this flag can enabled, k8s mode + * will ignore this mode + */ + private boolean enableTaskSupervisorAgent; } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfiguration.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfiguration.java index 9ff8d7be81..798ed1cf3e 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfiguration.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfiguration.java @@ -26,6 +26,8 @@ import com.oceanbase.odc.service.task.TaskService; import com.oceanbase.odc.service.task.dispatch.JobDispatcher; import com.oceanbase.odc.service.task.jasypt.JasyptEncryptorConfigProperties; +import com.oceanbase.odc.service.task.resource.SupervisorAgentAllocator; +import com.oceanbase.odc.service.task.resource.TaskResourceManager; import com.oceanbase.odc.service.task.schedule.JobCredentialProvider; import com.oceanbase.odc.service.task.schedule.StartJobRateLimiter; import com.oceanbase.odc.service.task.schedule.TaskFrameworkDisabledHandler; @@ -33,6 +35,7 @@ import com.oceanbase.odc.service.task.schedule.provider.JobImageNameProvider; import com.oceanbase.odc.service.task.service.TaskFrameworkService; import com.oceanbase.odc.service.task.service.TransactionManager; +import com.oceanbase.odc.service.task.supervisor.TaskSupervisorJobCaller; import com.oceanbase.odc.service.task.util.TaskExecutorClient; /** @@ -55,8 +58,27 @@ public interface JobConfiguration { Scheduler getDaemonScheduler(); + /** + * scheduler for task supervisor scheduler + * + * @return + */ + Scheduler getTaskSupervisorScheduler(); + + /** + * old job dispatcher + * + * @return + */ JobDispatcher getJobDispatcher(); + /** + * task job caller based on task supervisor + * + * @return + */ + TaskSupervisorJobCaller getTaskSupervisorJobCaller(); + HostUrlProvider getHostUrlProvider(); TaskFrameworkService getTaskFrameworkService(); @@ -78,4 +100,18 @@ public interface JobConfiguration { HostProperties getHostProperties(); JobCredentialProvider getJobCredentialProvider(); + + /** + * get task resource manage + * + * @return + */ + TaskResourceManager getTaskResourceManager(); + + /** + * resource allocator for task supervisor + * + * @return + */ + SupervisorAgentAllocator getSupervisorAgentAllocator(); } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfigurationHolder.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfigurationHolder.java index fa33bd2c29..92a7d291ae 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfigurationHolder.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfigurationHolder.java @@ -16,6 +16,8 @@ package com.oceanbase.odc.service.task.config; +import com.oceanbase.odc.core.shared.PreConditions; + /** * @author yaobin * @date 2023-11-21 @@ -30,6 +32,9 @@ public static JobConfiguration getJobConfiguration() { } public static void setJobConfiguration(JobConfiguration config) { + PreConditions.notNull(config, "jobConfiguration"); + PreConditions.notNull(config.getTaskFrameworkService(), "taskFrameworkService"); + PreConditions.notNull(config.getTaskFrameworkProperties(), "taskFrameworkProperties"); configuration = config; } } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/TaskFrameworkProperties.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/TaskFrameworkProperties.java index 823a27e287..54e6b6deaa 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/TaskFrameworkProperties.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/TaskFrameworkProperties.java @@ -72,6 +72,15 @@ public interface TaskFrameworkProperties { String getDestroyExecutorJobCronExpression(); + /** + * main class to boot process, default is null, to upgrade from process caller to supervisor + * + * @return + */ + String getProcessMainClassName(); + boolean isEnableK8sLocalDebugMode(); + boolean isEnableTaskSupervisorAgent(); + } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dispatch/ImmediateJobDispatcher.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dispatch/ImmediateJobDispatcher.java index bb1073383a..0ee54271e6 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dispatch/ImmediateJobDispatcher.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dispatch/ImmediateJobDispatcher.java @@ -29,7 +29,6 @@ import com.oceanbase.odc.service.task.caller.JobEnvironmentFactory; import com.oceanbase.odc.service.task.config.JobConfiguration; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; -import com.oceanbase.odc.service.task.config.JobConfigurationValidator; import com.oceanbase.odc.service.task.config.K8sProperties; import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; import com.oceanbase.odc.service.task.constants.JobConstants; @@ -89,7 +88,6 @@ public boolean canBeFinish(JobIdentity ji) { } private JobCaller getJobCaller(JobIdentity ji, JobContext context) { - JobConfigurationValidator.validComponent(); TaskFrameworkService taskFrameworkService = JobConfigurationHolder.getJobConfiguration().getTaskFrameworkService(); JobConfiguration config = JobConfigurationHolder.getJobConfiguration(); diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dummy/DummyTask.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dummy/DummyTask.java index 8d77096c38..797b1b6ef2 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dummy/DummyTask.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dummy/DummyTask.java @@ -54,8 +54,9 @@ public boolean start() throws Exception { while (!stopped.get() && loopCount.get() < maxLoopCount) { Thread.sleep(1000); log.info("dummy task loop for to {}", loopCount.get()); + loopCount.incrementAndGet(); } - return !stopped.get(); + return true; } @Override diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dummy/LocalMockK8sJobClient.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dummy/LocalMockK8sJobClient.java index 3fd2abd342..7149cf5454 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dummy/LocalMockK8sJobClient.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/dummy/LocalMockK8sJobClient.java @@ -57,7 +57,7 @@ public K8sPodResource create(K8sResourceContext k8sResourceContext) throws JobEx k8sResourceContext.type(), executorIdentifier.getNamespace(), executorIdentifier.getExecutorName(), ResourceState.AVAILABLE, - "127.0.0.1:" + executorIdentifier.getPort(), new Date(System.currentTimeMillis())); + "127.0.0.1", String.valueOf(executorIdentifier.getPort()), new Date(System.currentTimeMillis())); } private JobContext getJobContext(Object extraData) { @@ -79,7 +79,7 @@ public Optional get(String namespace, String arn) throws JobExce K8sPodResource ret = new K8sPodResource(ResourceIDUtil.DEFAULT_REGION_PROP_NAME, ResourceIDUtil.DEFAULT_GROUP_PROP_NAME, DefaultResourceOperatorBuilder.CLOUD_K8S_POD_TYPE, namespace, arn, ResourceState.AVAILABLE, - "127.0.0.1", new Date(System.currentTimeMillis())); + "127.0.0.1", "8989", new Date(System.currentTimeMillis())); return Optional.of(ret); } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServer.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServer.java new file mode 100644 index 0000000000..9134138550 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServer.java @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.net; + +import java.net.InetSocketAddress; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.RejectedExecutionHandler; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import com.oceanbase.odc.service.task.executor.TraceDecoratorThreadFactory; + +import io.netty.bootstrap.ServerBootstrap; +import io.netty.channel.Channel; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelOption; +import io.netty.channel.EventLoopGroup; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.channel.socket.SocketChannel; +import io.netty.channel.socket.nio.NioServerSocketChannel; +import io.netty.handler.codec.http.HttpObjectAggregator; +import io.netty.handler.codec.http.HttpServerCodec; +import io.netty.handler.timeout.IdleStateHandler; +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/11/22 15:17 + */ +@Slf4j +public class HttpServer { + // port expect to listen + private final HttpServerContext serverContext; + // real port listened + private int realListenPort; + // thread pool to do things + private ThreadPoolExecutor requestExecutor; + // start flag + private AtomicBoolean started = new AtomicBoolean(false); + // stopped flag, set when exception occur or stop method called + private AtomicBoolean stopped = new AtomicBoolean(false); + private EventLoopGroup bossGroup; + private EventLoopGroup workerGroup; + private Channel channel; + + public HttpServer(HttpServerContext serverContext) { + this.serverContext = serverContext; + } + + // start http server + public synchronized void start() { + if (!started.compareAndSet(false, true)) { + log.info("http server for {} has been started", serverContext.moduleName()); + return; + } + // param + bossGroup = new NioEventLoopGroup(); + workerGroup = new NioEventLoopGroup(); + requestExecutor = createThreadPool(); + try { + // start server + ServerBootstrap bootstrap = new ServerBootstrap(); + bootstrap.group(bossGroup, workerGroup) + .channel(NioServerSocketChannel.class) + .childHandler(new ChannelInitializer() { + @Override + public void initChannel(SocketChannel channel) throws Exception { + channel.pipeline() + .addLast(new IdleStateHandler(0, 0, 30 * 3, TimeUnit.SECONDS)) // beat 3N, + // close if + // idle + .addLast(new HttpServerCodec()) + .addLast(new HttpObjectAggregator(5 * 1024 * 1024)) // merge request & + // response to FULL + .addLast(new HttpServerHandler<>(serverContext.requestHandler(), requestExecutor, + serverContext.moduleName())); + } + }) + .childOption(ChannelOption.SO_KEEPALIVE, true); + int expectListenPort = serverContext.listenPort(); + // start with random port + ChannelFuture future = bootstrap.bind(expectListenPort).sync(); + channel = future.channel(); + InetSocketAddress localAddress = (InetSocketAddress) channel.localAddress(); + // save port to system properties + realListenPort = localAddress.getPort(); + serverContext.portListener().accept(realListenPort); + + log.info("{} remoting server start success, nettype = {}, port = {}, listenPort = {}", + serverContext.moduleName(), serverContext.requestHandler().getClass(), expectListenPort, + realListenPort); + } catch (InterruptedException e) { + log.info("{} remoting server stop.", serverContext.moduleName()); + stopped.set(true); + } catch (Exception e) { + log.error("{} remoting server error.", serverContext.moduleName(), e); + stopped.set(true); + } + } + + public void waitStop() { + try { + if (stopped.get()) { + log.info("stop flag has been set"); + return; + } + if (null != channel) { + log.info("wait for channel future stop"); + channel.closeFuture().sync(); + } + log.info("channel stopped, quit waitStop"); + } catch (Throwable e) { + log.error("{} remoting server error.", serverContext.moduleName(), e); + } + } + + /** + * stop http server + */ + public void stop() { + stopped.set(true); + if (null != channel) { + channel.close(); + } + synchronized (this) { + shutDownEventLoop(workerGroup); + shutDownEventLoop(bossGroup); + } + if (null != requestExecutor) { + requestExecutor.shutdown(); + } + log.info("HttpServer shutdown invoked"); + } + + /** + * get real listen port + * + * @return + */ + public int getRealListenPort() { + return realListenPort; + } + + private ThreadPoolExecutor createThreadPool() { + return new ThreadPoolExecutor( + 0, + 128, + 60L, + TimeUnit.SECONDS, + new LinkedBlockingQueue(64), + new TraceDecoratorThreadFactory(new ThreadFactory() { + @Override + public Thread newThread(Runnable r) { + return new Thread(r, + serverContext.moduleName() + ", EmbedServer bizThreadPool-" + r.hashCode()); + } + }), + new RejectedExecutionHandler() { + @Override + public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) { + throw new RuntimeException( + serverContext.moduleName() + ", EmbedServer bizThreadPool is EXHAUSTED!"); + } + }); + } + + private void shutDownEventLoop(EventLoopGroup eventLoopGroup) { + if (null == eventLoopGroup) { + return; + } + // stop + try { + Future shutdownFuture = eventLoopGroup.shutdownGracefully(); + shutdownFuture.get(5, TimeUnit.SECONDS); + } catch (Exception e) { + log.error(e.getMessage(), e); + } + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServerContainer.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServerContainer.java new file mode 100644 index 0000000000..f1ae3b9d0b --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServerContainer.java @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.net; + +import java.util.function.Consumer; + +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/11/25 16:08 + */ +@Slf4j +public abstract class HttpServerContainer { + protected RequestHandler requestHandler; + protected Thread thread; + protected HttpServer httpServer; + + public void start() { + initHttpServer(); + httpServer.start(); + thread = createThread(httpServer::waitStop); + thread.setDaemon(true); // daemon, service jvm, user thread leave >>> daemon leave >>> jvm leave + thread.start(); + } + + /** + * init http server + */ + private void initHttpServer() { + requestHandler = getRequestHandler(); + httpServer = new HttpServer(new HttpServerContext() { + @Override + public int listenPort() { + return getPort(); + } + + @Override + public String moduleName() { + return getModuleName(); + } + + @Override + public RequestHandler requestHandler() { + return requestHandler; + } + + @Override + public Consumer portListener() { + return portConsumer(); + } + }); + } + + public void waitStop() { + if (null != httpServer) { + httpServer.waitStop(); + } + } + + public void stop() throws Exception { + // destroy server thread + if (null != httpServer) { + httpServer.stop(); + } + if (null != thread) { + // max wait 5 seconds + thread.join(5000); + } + log.info("{} remoting server destroy success.", getModuleName()); + } + + /** + * provide port for http server + * + * @return + */ + protected abstract int getPort(); + + /** + * provide request handler + * + * @return + */ + protected abstract RequestHandler getRequestHandler(); + + /** + * provide module name + */ + protected abstract String getModuleName(); + + /** + * provide thread create factory + * + * @param r + * @return + */ + protected abstract Thread createThread(Runnable r); + + /** + * provide port listener + * + * @return + */ + protected abstract Consumer portConsumer(); + +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServerContext.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServerContext.java new file mode 100644 index 0000000000..aa816d103c --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServerContext.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.net; + +import java.util.function.Consumer; + +/** + * @author longpeng.zlp + * @date 2024/11/22 16:52 + */ +public interface HttpServerContext { + /** + * listen port expect to start + * + * @return + */ + int listenPort(); + + /** + * module name use this server + * + * @return + */ + String moduleName(); + + /** + * request handler to handle http request + * + * @return + */ + RequestHandler requestHandler(); + + /** + * port listener to listen real port + */ + Consumer portListener(); +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServerHandler.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServerHandler.java new file mode 100644 index 0000000000..dae9eaede7 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/HttpServerHandler.java @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.net; + +import java.util.concurrent.ThreadPoolExecutor; + +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.common.util.StringUtils; +import com.oceanbase.odc.service.common.util.UrlUtils; +import com.oceanbase.odc.service.task.executor.TraceDecoratorUtils; + +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.SimpleChannelInboundHandler; +import io.netty.handler.codec.http.DefaultFullHttpResponse; +import io.netty.handler.codec.http.FullHttpRequest; +import io.netty.handler.codec.http.FullHttpResponse; +import io.netty.handler.codec.http.HttpHeaderNames; +import io.netty.handler.codec.http.HttpHeaderValues; +import io.netty.handler.codec.http.HttpMethod; +import io.netty.handler.codec.http.HttpResponseStatus; +import io.netty.handler.codec.http.HttpUtil; +import io.netty.handler.codec.http.HttpVersion; +import io.netty.handler.timeout.IdleStateEvent; +import io.netty.util.CharsetUtil; +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/11/22 15:37 + */ +@Slf4j +public class HttpServerHandler extends SimpleChannelInboundHandler { + + private final ThreadPoolExecutor bizThreadPool; + private final RequestHandler requestHandler; + private final String moduleName; + + public HttpServerHandler(RequestHandler executorRequestHandler, ThreadPoolExecutor bizThreadPool, + String moduleName) { + this.requestHandler = executorRequestHandler; + this.bizThreadPool = bizThreadPool; + this.moduleName = moduleName; + } + + @Override + protected void channelRead0(final ChannelHandlerContext ctx, FullHttpRequest msg) throws Exception { + // request parse + // final byte[] requestBytes = ByteBufUtil.getBytes(msg.content()); // + // byteBuf.toString(io.netty.util.CharsetUtil.UTF_8); + String requestData = msg.content().toString(CharsetUtil.UTF_8); + String uri = UrlUtils.decode(msg.uri()); + HttpMethod httpMethod = msg.method(); + boolean keepAlive = HttpUtil.isKeepAlive(msg); + if (StringUtils.isNotBlank(uri)) { + log.info("{} get uri {}", moduleName, uri); + } + if (StringUtils.isNotBlank(requestData)) { + log.info("{} get requestData {}", moduleName, requestData); + } + + // invoke + bizThreadPool.execute(TraceDecoratorUtils.decorate(new Runnable() { + @Override + public void run() { + T responseObj = null; + try { + // do invoke + responseObj = requestHandler.process(httpMethod, uri, requestData); + } catch (Throwable e) { + log.info("request handler failed", e); + responseObj = requestHandler.processException(e); + } + + // to json + String responseJson = JsonUtils.toJson(responseObj); + + // write response + writeResponse(ctx, keepAlive, responseJson); + } + })); + } + + /** + * write response + */ + private void writeResponse(ChannelHandlerContext ctx, boolean keepAlive, String responseJson) { + // write response + FullHttpResponse response = new DefaultFullHttpResponse(HttpVersion.HTTP_1_1, HttpResponseStatus.OK, + Unpooled.copiedBuffer(responseJson, CharsetUtil.UTF_8)); + response.headers().set(HttpHeaderNames.CONTENT_TYPE, "application/json"); + response.headers().set(HttpHeaderNames.CONTENT_LENGTH, response.content().readableBytes()); + if (keepAlive) { + response.headers().set(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE); + } + ctx.writeAndFlush(response); + } + + @Override + public void channelReadComplete(ChannelHandlerContext ctx) throws Exception { + ctx.flush(); + } + + @Override + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { + log.error("{} provider netty_http server caught exception", moduleName, cause); + ctx.close(); + } + + @Override + public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exception { + if (evt instanceof IdleStateEvent) { + ctx.channel().close(); // beat 3N, close if idle + log.debug("{} provider netty_http server close an idle channel.", moduleName); + } else { + super.userEventTriggered(ctx, evt); + } + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/RequestHandler.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/RequestHandler.java new file mode 100644 index 0000000000..5bc7c54a50 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/net/RequestHandler.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.net; + +import io.netty.handler.codec.http.HttpMethod; + +/** + * @author longpeng.zlp + * @date 2024/11/22 15:28 + */ +public interface RequestHandler { + T process(HttpMethod httpMethod, String uri, String requestData); + + /** + * cast exception to known type. + * + * @param e + * @return + */ + T processException(Throwable e); +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/DefaultResourceOperatorBuilder.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/DefaultResourceOperatorBuilder.java index 16004d4213..a94008a4fd 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/DefaultResourceOperatorBuilder.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/DefaultResourceOperatorBuilder.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.Optional; +import org.apache.commons.lang3.tuple.Pair; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; @@ -126,6 +127,7 @@ public ResourceEntity toResourceEntity(K8sPodResource k8sResource) { @Override public K8sPodResource toResource(ResourceEntity resourceEntity, Optional runtimeResource) { + Pair ipAndPort = K8sPodResource.parseIPAndPort(resourceEntity.getEndpoint()); return new K8sPodResource( resourceEntity.getRegion(), resourceEntity.getGroupName(), @@ -133,7 +135,8 @@ public K8sPodResource toResource(ResourceEntity resourceEntity, Optional parseIPAndPort(String k8sEndPoint) { + String[] infos = StringUtils.split(k8sEndPoint, "::"); + if (null == infos || infos.length != 6) { + throw new IllegalStateException( + "expect k8s endpoint constructed by k8s::region::namespace::arn::ip::port, but current is " + + k8sEndPoint); + } + return Pair.of(infos[4], infos[5]); + } + public ResourceState resourceState() { return resourceState; } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/ProcessTaskResourceManager.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/ProcessTaskResourceManager.java new file mode 100644 index 0000000000..6b47fbc7de --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/ProcessTaskResourceManager.java @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.resource; + +import java.time.Duration; +import java.time.Instant; +import java.util.Date; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.apache.commons.collections4.CollectionUtils; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.jpa.domain.Specification; + +import com.oceanbase.odc.common.jpa.SpecificationUtil; +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.metadb.task.ResourceAllocateInfoEntity; +import com.oceanbase.odc.metadb.task.ResourceAllocateInfoRepository; +import com.oceanbase.odc.metadb.task.SupervisorEndpointEntity; +import com.oceanbase.odc.metadb.task.SupervisorEndpointRepository; +import com.oceanbase.odc.service.task.constants.JobConstants; +import com.oceanbase.odc.service.task.supervisor.SupervisorEndpointState; +import com.oceanbase.odc.service.task.supervisor.TaskSupervisor; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.supervisor.runtime.LocalTaskCommandExecutor; +import com.oceanbase.odc.service.task.supervisor.runtime.TaskSupervisorServer; +import com.oceanbase.odc.service.task.util.TaskSupervisorUtil; + +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/12/2 14:43 + */ +@Slf4j +public class ProcessTaskResourceManager implements TaskResourceManager { + protected final SupervisorEndpointRepository supervisorEndpointRepository; + protected final ResourceAllocateInfoRepository resourceAllocateInfoRepository; + protected TaskSupervisorServer taskSupervisorServer; + + public ProcessTaskResourceManager(SupervisorEndpointRepository supervisorEndpointRepository, + ResourceAllocateInfoRepository resourceAllocateInfoRepository) { + this.supervisorEndpointRepository = supervisorEndpointRepository; + this.resourceAllocateInfoRepository = resourceAllocateInfoRepository; + } + + @Override + public void initTaskResourceManager() { + SupervisorEndpoint localEndpoint = TaskSupervisorUtil.getDefaultSupervisorEndpoint(); + startTaskSupervisorServer(localEndpoint); + tryRegisterTaskSupervisorAgent(localEndpoint); + } + + private void startTaskSupervisorServer(SupervisorEndpoint supervisorEndpoint) { + TaskSupervisor taskSupervisor = + new TaskSupervisor(supervisorEndpoint, + JobConstants.ODC_AGENT_CLASS_NAME); + taskSupervisorServer = + new TaskSupervisorServer(supervisorEndpoint.getPort(), new LocalTaskCommandExecutor(taskSupervisor)); + try { + taskSupervisorServer.start(); + log.info("Starting task supervisor server."); + // current directly quit agent + } catch (Exception e) { + log.warn("Supervisor agent stopped", e); + throw e; + } + } + + /** + */ + @Override + public void execute() { + log.debug("begin process task resource execute"); + // 1. allocate supervisor agent + allocateSupervisorAgent(); + // 2. deallocate supervisor agent + deAllocateSupervisorAgent(); + } + + protected void allocateSupervisorAgent() { + List resourceToAllocate = collectAllocateInfo(); + for (ResourceAllocateInfoEntity allocateInfoEntity : resourceToAllocate) { + if (isAllocateInfoExpired(allocateInfoEntity)) { + failedAllocateForId(allocateInfoEntity.getTaskId()); + } + SupervisorEndpoint supervisorEndpoint = chooseSupervisorEndpoint(supervisorEndpointRepository); + // allocate success + if (null != supervisorEndpoint) { + allocateForJob(supervisorEndpoint, allocateInfoEntity.getTaskId()); + } + } + } + + protected void deAllocateSupervisorAgent() { + List resourceToDeallocate = collectDeAllocateInfo(); + for (ResourceAllocateInfoEntity deAllocateInfoEntity : resourceToDeallocate) { + SupervisorEndpoint supervisorEndpoint = + JsonUtils.fromJson(deAllocateInfoEntity.getEndpoint(), SupervisorEndpoint.class); + // allocate success + if (null != supervisorEndpoint) { + releaseLoad(supervisorEndpoint); + } else { + log.warn("supervisorEndpoint not parsed, taskID = {}, originValue = {}", + deAllocateInfoEntity.getTaskId(), deAllocateInfoEntity.getEndpoint()); + } + finishedAllocateForId(deAllocateInfoEntity.getTaskId()); + } + } + + protected boolean isAllocateInfoExpired(ResourceAllocateInfoEntity entity) { + Duration between = Duration.between(entity.getUpdateTime().toInstant(), Instant.now()); + // 300 seconds considered as timeout + // TODO(lx): config it + return (between.toMillis() / 1000 > 300); + } + + + private void releaseLoad(SupervisorEndpoint supervisorEndpoint) { + Optional optionalSupervisorEndpointEntity = supervisorEndpointRepository + .findByHostAndPort(supervisorEndpoint.getHost(), Integer.valueOf(supervisorEndpoint.getPort())); + if (!optionalSupervisorEndpointEntity.isPresent()) { + log.warn("update supervisor endpoint failed, endpoint={}", supervisorEndpoint); + return; + } + SupervisorEndpointEntity supervisorEndpointEntity = optionalSupervisorEndpointEntity.get(); + supervisorEndpointRepository.addLoadByHostAndPort(supervisorEndpointEntity.getHost(), + supervisorEndpointEntity.getPort(), -1); + } + + /** + * collect allocate info needed to allocate + * + * @return + */ + protected List collectAllocateInfo() { + Specification condition = Specification.where( + SpecificationUtil.columnEqual("resourceAllocateState", ResourceAllocateState.PREPARING.name())); + return resourceAllocateInfoRepository.findAll(condition, PageRequest.of(0, 100)).getContent(); + } + + /** + * collect deallocate info needed to deallocate + * + * @return + */ + protected List collectDeAllocateInfo() { + Specification condition = Specification.where( + SpecificationUtil.columnEqual("resourceUsageState", ResourceUsageState.FINISHED.name())); + Specification query = condition + .and(SpecificationUtil.columnEqual("resourceAllocateState", ResourceAllocateState.AVAILABLE.name())); + return resourceAllocateInfoRepository.findAll(query, PageRequest.of(0, 100)).getContent(); + } + + protected List collectRunningSupervisorEndpoint() { + Specification condition = Specification.where( + SpecificationUtil.columnEqual("status", SupervisorEndpointState.AVAILABLE.name())); + return supervisorEndpointRepository.findAll(condition, PageRequest.of(0, 100)).getContent(); + } + + /** + * this logic will wrap to a interface + * + * @return + */ + protected SupervisorEndpoint chooseSupervisorEndpoint(SupervisorEndpointRepository supervisorEndpointRepository) { + List supervisorEndpointEntities = collectRunningSupervisorEndpoint(); + // no available found + if (CollectionUtils.isEmpty(supervisorEndpointEntities)) { + // no endpoint found, that's not good + log.warn("not supervisor end point found"); + return null; + } + // use load smaller + supervisorEndpointEntities = supervisorEndpointEntities.stream() + .sorted((s1, s2) -> Integer.compare(s1.getLoads(), s2.getLoads())).collect( + Collectors.toList()); + SupervisorEndpointEntity tmp = supervisorEndpointEntities.get(0); + SupervisorEndpoint ret = new SupervisorEndpoint(tmp.getHost(), tmp.getPort()); + // each task means one load + supervisorEndpointRepository.addLoadByHostAndPort(tmp.getHost(), tmp.getPort(), 1); + return ret; + } + + /** + * register self to meta store + */ + private void tryRegisterTaskSupervisorAgent(SupervisorEndpoint localEndpoint) { + log.info("start with supervisor agent mode, try register agent"); + Optional registered = supervisorEndpointRepository + .findByHostAndPort(localEndpoint.getHost(), localEndpoint.getPort()); + if (registered.isPresent()) { + supervisorEndpointRepository.updateStatusByHostAndPort(localEndpoint.getHost(), localEndpoint.getPort(), + SupervisorEndpointState.AVAILABLE.name()); + } else { + SupervisorEndpointEntity created = new SupervisorEndpointEntity(); + created.setHost(localEndpoint.getHost()); + created.setPort(localEndpoint.getPort()); + created.setResourceID(-1L); + created.setLoads(0); + created.setStatus(SupervisorEndpointState.AVAILABLE.name()); + created.setCreateTime(new Date(System.currentTimeMillis())); + supervisorEndpointRepository.save(created); + } + } + + + /** + * allocate endpoint for id, this method will called by resource manager + * + * @param supervisorEndpoint + * @param taskID + */ + protected void allocateForJob(SupervisorEndpoint supervisorEndpoint, Long taskID) { + resourceAllocateInfoRepository.updateEndpointByTaskId(JsonUtils.toJson(supervisorEndpoint), taskID); + } + + /** + * task allocate has failed for id, this method will called by resource manager + * + * @param taskID + */ + protected void failedAllocateForId(Long taskID) { + resourceAllocateInfoRepository.updateResourceAllocateStateByTaskId(ResourceAllocateState.FAILED.name(), taskID); + } + + /** + * task allocate has finished for id, this method will called by resource manager + * + * @param taskID + */ + protected void finishedAllocateForId(Long taskID) { + resourceAllocateInfoRepository.updateResourceAllocateStateByTaskId(ResourceAllocateState.FINISHED.name(), + taskID); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfigurationValidator.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/ResourceAllocateState.java similarity index 50% rename from server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfigurationValidator.java rename to server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/ResourceAllocateState.java index d20e6a89c1..0cb1e98656 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/config/JobConfigurationValidator.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/ResourceAllocateState.java @@ -13,21 +13,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.oceanbase.odc.service.task.config; +package com.oceanbase.odc.service.task.resource; -import com.oceanbase.odc.core.shared.PreConditions; +import com.oceanbase.odc.common.util.StringUtils; /** - * @author yaobin - * @date 2024-01-15 - * @since 4.2.4 + * state for resource allocate, only operate by resource allocator + * + * @author longpeng.zlp + * @date 2024/12/4 17:57 */ -public class JobConfigurationValidator { +public enum ResourceAllocateState { + PREPARING, + AVAILABLE, + FAILED, + FINISHED; - public static void validComponent() { - JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); - PreConditions.notNull(jobConfiguration, "jobConfiguration"); - PreConditions.notNull(jobConfiguration.getTaskFrameworkService(), "taskFrameworkService"); - PreConditions.notNull(jobConfiguration.getTaskFrameworkProperties(), "taskFrameworkProperties"); + public static ResourceAllocateState fromString(String allocateState) { + return ResourceAllocateState.valueOf(StringUtils.upperCase(allocateState)); } } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/ResourceUsageState.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/ResourceUsageState.java new file mode 100644 index 0000000000..de4383398b --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/ResourceUsageState.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.resource; + +import com.oceanbase.odc.common.util.StringUtils; + +/** + * state for resource usage, only update by resource user + * + * @author longpeng.zlp + * @date 2024/12/4 17:57 + */ +public enum ResourceUsageState { + PREPARING, + USING, + FINISHED; + + public static ResourceUsageState fromString(String allocateState) { + return ResourceUsageState.valueOf(StringUtils.upperCase(allocateState)); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/SupervisorAgentAllocator.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/SupervisorAgentAllocator.java new file mode 100644 index 0000000000..4b7d0e8394 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/SupervisorAgentAllocator.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.resource; + +import java.util.Optional; + +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.metadb.task.ResourceAllocateInfoEntity; +import com.oceanbase.odc.metadb.task.ResourceAllocateInfoRepository; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; + +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/12/5 10:41 + */ +@Slf4j +public class SupervisorAgentAllocator { + protected final ResourceAllocateInfoRepository resourceAllocateInfoRepository; + + public SupervisorAgentAllocator(ResourceAllocateInfoRepository resourceAllocateInfoRepository) { + this.resourceAllocateInfoRepository = resourceAllocateInfoRepository; + } + + public Optional tryAllocateSupervisorEndpoint(JobContext jobContext) { + // register it to allocate info + ResourceAllocateInfoEntity entity = createAllocateInfo(jobContext); + ResourceAllocateState resourceAllocateState = + ResourceAllocateState.fromString(entity.getResourceAllocateState()); + switch (resourceAllocateState) { + // failed and finished is illegal state for allocate operation + case FAILED: + log.info("allocate resource failed for jobContext = {}", jobContext); + updateUsageState(jobContext.getJobIdentity().getId(), ResourceUsageState.FINISHED); + throw new RuntimeException("allocate resource failed for jobContext = " + jobContext + ")"); + case FINISHED: + log.info("allocate resource invalid state with finished for jobContext = {}", jobContext); + throw new RuntimeException( + "allocate resource invalid state with finished for jobContext = " + jobContext + ")"); + case AVAILABLE: + log.info("allocate resource succeed for jobContext = {}, allocate endpoint = {}", jobContext, + entity.getEndpoint()); + SupervisorEndpoint ret = JsonUtils.fromJson(entity.getEndpoint(), SupervisorEndpoint.class); + updateUsageState(jobContext.getJobIdentity().getId(), ResourceUsageState.USING); + return Optional.of(ret); + case PREPARING: + return Optional.empty(); + default: + throw new RuntimeException("allocate resource meet unexpected state =" + resourceAllocateState); + } + } + + public void deallocateSupervisorEndpoint(Long taskID) { + updateUsageState(taskID, ResourceUsageState.FINISHED); + } + + /** + * create allocate info for job context + * + * @param jobContext + */ + protected ResourceAllocateInfoEntity createAllocateInfo(JobContext jobContext) { + Optional resourceAllocateInfoEntity = + resourceAllocateInfoRepository.findByTaskIdNative(jobContext.getJobIdentity().getId()); + if (resourceAllocateInfoEntity.isPresent()) { + return resourceAllocateInfoEntity.get(); + } + ResourceAllocateInfoEntity created = new ResourceAllocateInfoEntity(); + created.setResourceAllocateState(ResourceAllocateState.PREPARING.name()); + created.setResourceUsageState(ResourceUsageState.PREPARING.name()); + created.setEndpoint(null); + created.setTaskId(jobContext.getJobIdentity().getId()); + resourceAllocateInfoRepository.save(created); + return created; + } + + /** + * update usage state for task id (job id), this method will called by resource user + * + * @param taskId + * @param resourceUsageState + */ + protected void updateUsageState(Long taskId, ResourceUsageState resourceUsageState) { + resourceAllocateInfoRepository.updateResourceUsageStateByTaskId(resourceUsageState.name(), taskId); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/TaskResourceManager.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/TaskResourceManager.java new file mode 100644 index 0000000000..e22b723ac2 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/TaskResourceManager.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.resource; + +/** + * resource manager for task + * + * @author longpeng.zlp + * @date 2024/12/2 14:24 + */ +public interface TaskResourceManager { + + /** + * try init task resourceManager + */ + void initTaskResourceManager(); + + /** + * execute task resource manage + */ + void execute(); +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/client/NativeK8sJobClient.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/client/NativeK8sJobClient.java index fe20f9d3c6..d19f661bbe 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/client/NativeK8sJobClient.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/resource/client/NativeK8sJobClient.java @@ -140,7 +140,8 @@ protected K8sPodResource create(@NonNull String namespace, @NonNull String name, // return pod status return new K8sPodResource(null, null, null, namespace, createdJob.getMetadata().getName(), k8sPodPhaseToResourceState(createdJob.getStatus().getPhase()), - createdJob.getStatus().getPodIP(), new Date(System.currentTimeMillis() / 1000)); + createdJob.getStatus().getPodIP(), String.valueOf(k8sProperties.getExecutorListenPort()), + new Date(System.currentTimeMillis() / 1000)); } catch (ApiException e) { if (e.getResponseBody() != null) { throw new JobException(e.getResponseBody(), e); @@ -169,6 +170,7 @@ public Optional get(@NonNull String namespace, @NonNull String a V1Pod v1Pod = v1PodOptional.get(); K8sPodResource resource = new K8sPodResource(k8sProperties.getRegion(), k8sProperties.getGroup(), null, namespace, arn, k8sPodPhaseToResourceState(v1Pod.getStatus().getPhase()), v1Pod.getStatus().getPodIP(), + String.valueOf(k8sProperties.getExecutorListenPort()), new Date(System.currentTimeMillis() / 1000)); return Optional.of(resource); } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/StdJobScheduler.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/StdJobScheduler.java index 5cae71ebf3..6fef7eff8c 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/StdJobScheduler.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/StdJobScheduler.java @@ -55,12 +55,14 @@ import com.oceanbase.odc.service.task.listener.DefaultJobCallerListener; import com.oceanbase.odc.service.task.schedule.daemon.CheckRunningJob; import com.oceanbase.odc.service.task.schedule.daemon.DestroyExecutorJob; -import com.oceanbase.odc.service.task.schedule.daemon.DestroyResourceJob; import com.oceanbase.odc.service.task.schedule.daemon.DoCancelingJob; +import com.oceanbase.odc.service.task.schedule.daemon.ManagerResourceJob; import com.oceanbase.odc.service.task.schedule.daemon.PullTaskResultJob; import com.oceanbase.odc.service.task.schedule.daemon.StartPreparingJob; +import com.oceanbase.odc.service.task.schedule.daemon.StartPreparingJobRunBySupervisorAgent; import com.oceanbase.odc.service.task.service.JobRunnable; import com.oceanbase.odc.service.task.util.JobUtils; +import com.oceanbase.odc.service.task.util.TaskSupervisorUtil; import cn.hutool.core.util.StrUtil; import lombok.NonNull; @@ -180,7 +182,7 @@ private void tryCanceling(Long jobId) throws JobException { Map eventMessage = AlarmUtils.createAlarmMapBuilder() .item(AlarmUtils.ORGANIZATION_NAME, Optional.ofNullable(jobEntity.getOrganizationId()).map( Object::toString).orElse(StrUtil.EMPTY)) - .item(AlarmUtils.TASK_JOB_ID_NAME, jobId.toString()) + .item(AlarmUtils.TASK_JOB_ID_NAME, String.valueOf(jobId)) .item(AlarmUtils.MESSAGE_NAME, MessageFormat.format("Cancel job failed, jobId={0}, message={1}", jobEntity.getId(), e.getMessage())) @@ -203,14 +205,14 @@ private void initDaemonJob() { initStartPreparingJob(); initDoCancelingJob(); initDestroyExecutorJob(); - initDestroyResource(); + initManageResource(); } private void initCheckRunningJob() { String key = "checkRunningJob"; initCronJob(key, configuration.getTaskFrameworkProperties().getCheckRunningJobCronExpression(), - CheckRunningJob.class); + CheckRunningJob.class, scheduler); } private void initPullTaskResultJob() { @@ -221,40 +223,50 @@ private void initPullTaskResultJob() { String key = "pullTaskResultJob"; initCronJob(key, configuration.getTaskFrameworkProperties().getPullTaskResultJobCronExpression(), - PullTaskResultJob.class); + PullTaskResultJob.class, scheduler); } private void initStartPreparingJob() { - String key = "startPreparingJob"; - initCronJob(key, - configuration.getTaskFrameworkProperties().getStartPreparingJobCronExpression(), - StartPreparingJob.class); + if (TaskSupervisorUtil.isTaskSupervisorEnabled(taskFrameworkProperties)) { + log.info("start with supervisor preparing job"); + String key = "startPreparingSupervisorTaskJob"; + initCronJob(key, + configuration.getTaskFrameworkProperties().getStartPreparingJobCronExpression(), + StartPreparingJobRunBySupervisorAgent.class, configuration.getTaskSupervisorScheduler()); + } else { + log.info("start with normal preparing job"); + String key = "startPreparingJob"; + initCronJob(key, + configuration.getTaskFrameworkProperties().getStartPreparingJobCronExpression(), + StartPreparingJob.class, scheduler); + } } private void initDoCancelingJob() { String key = "doCancelingJob"; initCronJob(key, configuration.getTaskFrameworkProperties().getDoCancelingJobCronExpression(), - DoCancelingJob.class); + DoCancelingJob.class, scheduler); } private void initDestroyExecutorJob() { String key = "destroyExecutorJob"; initCronJob(key, configuration.getTaskFrameworkProperties().getDestroyExecutorJobCronExpression(), - DestroyExecutorJob.class); + DestroyExecutorJob.class, scheduler); } - private void initDestroyResource() { - if (configuration.getTaskFrameworkProperties().getRunMode() == TaskRunMode.K8S) { - String key = "destroyResourceJob"; + private void initManageResource() { + if (TaskSupervisorUtil.isTaskSupervisorEnabled(taskFrameworkProperties) + || taskFrameworkProperties.getRunMode() == TaskRunMode.K8S) { + String key = "managerResourceJob"; initCronJob(key, configuration.getTaskFrameworkProperties().getDestroyExecutorJobCronExpression(), - DestroyResourceJob.class); + ManagerResourceJob.class, configuration.getTaskSupervisorScheduler()); } } - private void initCronJob(String key, String cronExpression, Class jobClass) { + private void initCronJob(String key, String cronExpression, Class jobClass, Scheduler scheduler) { TriggerConfig config = new TriggerConfig(); config.setTriggerStrategy(TriggerStrategy.CRON); config.setCronExpression(cronExpression); @@ -266,7 +278,7 @@ private void initCronJob(String key, String cronExpression, Class JobDetail detail = JobBuilder.newJob(jobClass) .withIdentity(JobKey.jobKey(key, group)) .build(); - scheduleCronJob(triggerKey, trigger, detail); + scheduleCronJob(triggerKey, trigger, detail, scheduler); } catch (JobException e) { log.warn("build trigger {} failed:", key, e); } catch (SchedulerException e) { @@ -274,7 +286,7 @@ private void initCronJob(String key, String cronExpression, Class } } - private void scheduleCronJob(TriggerKey triggerKey, Trigger trigger, JobDetail detail) + private void scheduleCronJob(TriggerKey triggerKey, Trigger trigger, JobDetail detail, Scheduler scheduler) throws SchedulerException { if (scheduler.checkExists(triggerKey)) { if (scheduler.getTrigger(triggerKey) instanceof CronTrigger && trigger instanceof CronTrigger) { diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/CheckRunningJob.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/CheckRunningJob.java index af71061231..5602732442 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/CheckRunningJob.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/CheckRunningJob.java @@ -32,7 +32,6 @@ import com.oceanbase.odc.metadb.task.JobEntity; import com.oceanbase.odc.service.task.config.JobConfiguration; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; -import com.oceanbase.odc.service.task.config.JobConfigurationValidator; import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; import com.oceanbase.odc.service.task.enums.JobStatus; import com.oceanbase.odc.service.task.enums.TaskRunMode; @@ -62,7 +61,6 @@ public class CheckRunningJob implements Job { @Override public void execute(JobExecutionContext context) throws JobExecutionException { configuration = JobConfigurationHolder.getJobConfiguration(); - JobConfigurationValidator.validComponent(); TaskFrameworkProperties taskFrameworkProperties = getConfiguration().getTaskFrameworkProperties(); int heartTimeoutSeconds = taskFrameworkProperties.getJobHeartTimeoutSeconds(); // find heartbeat timeout job diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DestroyExecutorJob.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DestroyExecutorJob.java index 0e9ae03fd4..e1982f7f3f 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DestroyExecutorJob.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DestroyExecutorJob.java @@ -30,7 +30,6 @@ import com.oceanbase.odc.metadb.task.JobEntity; import com.oceanbase.odc.service.task.config.JobConfiguration; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; -import com.oceanbase.odc.service.task.config.JobConfigurationValidator; import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; import com.oceanbase.odc.service.task.constants.JobConstants; import com.oceanbase.odc.service.task.exception.JobException; @@ -55,7 +54,6 @@ public class DestroyExecutorJob implements Job { @Override public void execute(JobExecutionContext context) throws JobExecutionException { configuration = JobConfigurationHolder.getJobConfiguration(); - JobConfigurationValidator.validComponent(); // scan terminate job TaskFrameworkService taskFrameworkService = configuration.getTaskFrameworkService(); @@ -103,6 +101,7 @@ private void destroyExecutor(TaskFrameworkService taskFrameworkService, JobEntit // executorIdentifier is null, otherwise, the job cannot be released. log.info("Executor not found, updating executor to destroyed,jobId={}", lockedEntity.getId()); taskFrameworkService.updateExecutorToDestroyed(lockedEntity.getId()); + configuration.getSupervisorAgentAllocator().deallocateSupervisorEndpoint(lockedEntity.getId()); } }); } diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DoCancelingJob.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DoCancelingJob.java index 831326cfb1..b385851133 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DoCancelingJob.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DoCancelingJob.java @@ -24,7 +24,6 @@ import com.oceanbase.odc.metadb.task.JobEntity; import com.oceanbase.odc.service.task.config.JobConfiguration; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; -import com.oceanbase.odc.service.task.config.JobConfigurationValidator; import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; import com.oceanbase.odc.service.task.enums.JobStatus; import com.oceanbase.odc.service.task.enums.TaskRunMode; @@ -49,7 +48,6 @@ public class DoCancelingJob implements Job { @Override public void execute(JobExecutionContext context) throws JobExecutionException { configuration = JobConfigurationHolder.getJobConfiguration(); - JobConfigurationValidator.validComponent(); // scan preparing job TaskFrameworkService taskFrameworkService = configuration.getTaskFrameworkService(); TaskFrameworkProperties taskFrameworkProperties = configuration.getTaskFrameworkProperties(); diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DestroyResourceJob.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/ManagerResourceJob.java similarity index 80% rename from server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DestroyResourceJob.java rename to server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/ManagerResourceJob.java index 36075d4746..cf549bf850 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/DestroyResourceJob.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/ManagerResourceJob.java @@ -31,11 +31,13 @@ import com.oceanbase.odc.service.resource.ResourceLocation; import com.oceanbase.odc.service.task.config.JobConfiguration; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; -import com.oceanbase.odc.service.task.config.JobConfigurationValidator; import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; import com.oceanbase.odc.service.task.constants.JobConstants; +import com.oceanbase.odc.service.task.enums.TaskRunMode; import com.oceanbase.odc.service.task.exception.TaskRuntimeException; +import com.oceanbase.odc.service.task.resource.TaskResourceManager; import com.oceanbase.odc.service.task.service.TaskFrameworkService; +import com.oceanbase.odc.service.task.util.TaskSupervisorUtil; import lombok.extern.slf4j.Slf4j; @@ -46,18 +48,37 @@ */ @Slf4j @DisallowConcurrentExecution -public class DestroyResourceJob implements Job { +public class ManagerResourceJob implements Job { private JobConfiguration configuration; @Override public void execute(JobExecutionContext context) throws JobExecutionException { configuration = JobConfigurationHolder.getJobConfiguration(); - JobConfigurationValidator.validComponent(); - // scan terminate job TaskFrameworkService taskFrameworkService = configuration.getTaskFrameworkService(); TaskFrameworkProperties taskFrameworkProperties = configuration.getTaskFrameworkProperties(); + processTaskResource(configuration.getTaskResourceManager(), taskFrameworkProperties); + processRealResource(taskFrameworkProperties, taskFrameworkService); + } + + private void processTaskResource(TaskResourceManager taskResourceManager, + TaskFrameworkProperties taskFrameworkProperties) { + if (!TaskSupervisorUtil.isTaskSupervisorEnabled(taskFrameworkProperties)) { + return; + } + try { + taskResourceManager.execute(); + } catch (Throwable e) { + log.warn("process task resource failed cause", e); + } + } + + private void processRealResource(TaskFrameworkProperties taskFrameworkProperties, + TaskFrameworkService taskFrameworkService) { + if (!(taskFrameworkProperties.getRunMode() == TaskRunMode.K8S)) { + return; + } Page resources = taskFrameworkService.findAbandonedResource(0, taskFrameworkProperties.getSingleFetchDestroyExecutorJobRows()); resources.forEach(a -> { diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/PullTaskResultJob.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/PullTaskResultJob.java index 9e0610c8e3..8aa210ea2a 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/PullTaskResultJob.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/PullTaskResultJob.java @@ -24,7 +24,6 @@ import com.oceanbase.odc.metadb.task.JobEntity; import com.oceanbase.odc.service.task.config.JobConfiguration; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; -import com.oceanbase.odc.service.task.config.JobConfigurationValidator; import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; import com.oceanbase.odc.service.task.service.TaskFrameworkService; @@ -47,8 +46,6 @@ public void execute(JobExecutionContext context) throws JobExecutionException { this.taskFrameworkProperties = configuration.getTaskFrameworkProperties(); this.taskFrameworkService = configuration.getTaskFrameworkService(); - JobConfigurationValidator.validComponent(); - int singlePullResultJobRows = taskFrameworkProperties.getSinglePullResultJobRows(); Page runningJobs = taskFrameworkService.findRunningJobs(0, singlePullResultJobRows); runningJobs.forEach(job -> taskFrameworkService.refreshResult(job.getId())); diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/StartPreparingJob.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/StartPreparingJob.java index 18c46070de..9d72288d68 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/StartPreparingJob.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/StartPreparingJob.java @@ -34,7 +34,6 @@ import com.oceanbase.odc.service.task.caller.JobContext; import com.oceanbase.odc.service.task.config.JobConfiguration; import com.oceanbase.odc.service.task.config.JobConfigurationHolder; -import com.oceanbase.odc.service.task.config.JobConfigurationValidator; import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; import com.oceanbase.odc.service.task.enums.JobStatus; import com.oceanbase.odc.service.task.exception.JobException; @@ -62,7 +61,6 @@ public class StartPreparingJob implements Job { @Override public void execute(JobExecutionContext context) throws JobExecutionException { configuration = JobConfigurationHolder.getJobConfiguration(); - JobConfigurationValidator.validComponent(); if (!configuration.getTaskFrameworkEnabledProperties().isEnabled()) { configuration.getTaskFrameworkDisabledHandler().handleJobToFailed(); @@ -117,7 +115,7 @@ private void startJob(TaskFrameworkService taskFrameworkService, JobEntity jobEn Map eventMessage = AlarmUtils.createAlarmMapBuilder() .item(AlarmUtils.ORGANIZATION_NAME, Optional.ofNullable(jobEntity.getOrganizationId()).map( Object::toString).orElse(StrUtil.EMPTY)) - .item(AlarmUtils.TASK_JOB_ID_NAME, jobEntity.getId().toString()) + .item(AlarmUtils.TASK_JOB_ID_NAME, String.valueOf(jobEntity.getId())) .item(AlarmUtils.MESSAGE_NAME, MessageFormat.format("Start job failed, jobId={0}, message={1}", lockedEntity.getId(), diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/StartPreparingJobRunBySupervisorAgent.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/StartPreparingJobRunBySupervisorAgent.java new file mode 100644 index 0000000000..f2a6972d79 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/schedule/daemon/StartPreparingJobRunBySupervisorAgent.java @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.schedule.daemon; + +import java.text.MessageFormat; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import org.quartz.DisallowConcurrentExecution; +import org.quartz.Job; +import org.quartz.JobExecutionContext; +import org.quartz.JobExecutionException; +import org.springframework.data.domain.Page; + +import com.google.common.collect.Lists; +import com.oceanbase.odc.common.trace.TraceContextHolder; +import com.oceanbase.odc.core.alarm.AlarmEventNames; +import com.oceanbase.odc.core.alarm.AlarmUtils; +import com.oceanbase.odc.metadb.task.JobEntity; +import com.oceanbase.odc.service.task.caller.JobCallerBuilder; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.JobEnvironmentFactory; +import com.oceanbase.odc.service.task.caller.ProcessJobCaller; +import com.oceanbase.odc.service.task.config.JobConfiguration; +import com.oceanbase.odc.service.task.config.JobConfigurationHolder; +import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; +import com.oceanbase.odc.service.task.enums.JobStatus; +import com.oceanbase.odc.service.task.enums.TaskRunMode; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.exception.TaskRuntimeException; +import com.oceanbase.odc.service.task.schedule.DefaultJobContextBuilder; +import com.oceanbase.odc.service.task.schedule.SingleJobProperties; +import com.oceanbase.odc.service.task.service.TaskFrameworkService; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.util.JobDateUtils; + +import cn.hutool.core.util.StrUtil; +import lombok.extern.slf4j.Slf4j; + +/** + * prepare job run by supervisor agent + * + * @author longpeng.zlp + * @date 2024/11/29 14:08 + */ +@Slf4j +@DisallowConcurrentExecution +public class StartPreparingJobRunBySupervisorAgent implements Job { + + @Override + public void execute(JobExecutionContext context) throws JobExecutionException { + JobConfiguration configuration = JobConfigurationHolder.getJobConfiguration(); + + if (!configuration.getTaskFrameworkEnabledProperties().isEnabled()) { + configuration.getTaskFrameworkDisabledHandler().handleJobToFailed(); + return; + } + + TaskFrameworkProperties taskFrameworkProperties = configuration.getTaskFrameworkProperties(); + // scan preparing job + TaskFrameworkService taskFrameworkService = configuration.getTaskFrameworkService(); + Page jobs = taskFrameworkService.find( + Lists.newArrayList(JobStatus.PREPARING, JobStatus.RETRYING), 0, + taskFrameworkProperties.getSingleFetchPreparingJobRows()); + + for (JobEntity jobEntity : jobs) { + if (!configuration.getStartJobRateLimiter().tryAcquire()) { + break; + } + try { + if (checkJobIsExpired(jobEntity)) { + taskFrameworkService.updateStatusDescriptionByIdOldStatus(jobEntity.getId(), + jobEntity.getStatus(), JobStatus.CANCELED, "Job expired and failed."); + } else { + JobContext jobContext = + new DefaultJobContextBuilder().build(jobEntity); + Optional supervisorEndpoint = configuration.getSupervisorAgentAllocator() + .tryAllocateSupervisorEndpoint(jobContext); + // no resource found current round, try allocate next + if (!supervisorEndpoint.isPresent()) { + continue; + } + startJob(supervisorEndpoint.get(), configuration, jobContext, jobEntity); + } + } catch (Throwable e) { + log.warn("Start job failed, jobId={}.", jobEntity.getId(), e); + } + } + + } + + private void startJob(SupervisorEndpoint supervisorEndpoint, JobConfiguration configuration, JobContext jobContext, + JobEntity jobEntity) { + if (jobEntity.getStatus() == JobStatus.PREPARING || jobEntity.getStatus() == JobStatus.RETRYING) { + // todo user id should be not null when submit job + if (jobEntity.getCreatorId() != null) { + TraceContextHolder.setUserId(jobEntity.getCreatorId()); + } + + log.info("Prepare start job, jobId={}, currentStatus={}.", + jobEntity.getId(), jobEntity.getStatus()); + ProcessJobCaller jobCaller = JobCallerBuilder.buildProcessCaller(jobContext, + new JobEnvironmentFactory().build(jobContext, TaskRunMode.PROCESS)); + try { + ExecutorEndpoint executorEndpoint = configuration.getTaskSupervisorJobCaller() + .startTask(supervisorEndpoint, jobContext, jobCaller.getProcessConfig()); + log.info("start job success with endpoint={}", executorEndpoint); + } catch (JobException e) { + Map eventMessage = AlarmUtils.createAlarmMapBuilder() + .item(AlarmUtils.ORGANIZATION_NAME, Optional.ofNullable(jobEntity.getOrganizationId()).map( + Object::toString).orElse(StrUtil.EMPTY)) + .item(AlarmUtils.TASK_JOB_ID_NAME, String.valueOf(jobEntity.getId())) + .item(AlarmUtils.MESSAGE_NAME, + MessageFormat.format("Start job failed, jobId={0}, message={1}", + jobEntity.getId(), + e.getMessage())) + .build(); + AlarmUtils.alarm(AlarmEventNames.TASK_START_FAILED, eventMessage); + // rollback load + configuration.getSupervisorAgentAllocator() + .deallocateSupervisorEndpoint(jobContext.getJobIdentity().getId()); + throw new TaskRuntimeException(e); + } + } else { + log.warn("Job {} current status is {} but not preparing or retrying, start explain is aborted.", + jobEntity.getId(), jobEntity.getStatus()); + } + } + + private boolean checkJobIsExpired(JobEntity jobEntity) { + SingleJobProperties jobProperties = SingleJobProperties.fromJobProperties(jobEntity.getJobProperties()); + if (jobProperties == null || jobProperties.getJobExpiredIfNotRunningAfterSeconds() == null) { + return false; + } + + long baseTimeMills = jobEntity.getCreateTime().getTime(); + return JobDateUtils.getCurrentDate().getTime() - baseTimeMills > TimeUnit.MILLISECONDS.convert( + jobProperties.getJobExpiredIfNotRunningAfterSeconds(), TimeUnit.SECONDS); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/service/StdTaskFrameworkService.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/service/StdTaskFrameworkService.java index 07a0b8819b..09297679f4 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/service/StdTaskFrameworkService.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/service/StdTaskFrameworkService.java @@ -67,10 +67,14 @@ import com.oceanbase.odc.metadb.task.JobRepository; import com.oceanbase.odc.service.resource.ResourceManager; import com.oceanbase.odc.service.resource.ResourceState; +import com.oceanbase.odc.service.task.caller.ExecutorIdentifier; +import com.oceanbase.odc.service.task.caller.ExecutorIdentifierParser; +import com.oceanbase.odc.service.task.caller.JobContext; import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; import com.oceanbase.odc.service.task.constants.JobAttributeEntityColumn; import com.oceanbase.odc.service.task.constants.JobEntityColumn; import com.oceanbase.odc.service.task.enums.JobStatus; +import com.oceanbase.odc.service.task.enums.TaskMonitorMode; import com.oceanbase.odc.service.task.enums.TaskRunMode; import com.oceanbase.odc.service.task.exception.JobException; import com.oceanbase.odc.service.task.executor.HeartbeatRequest; @@ -289,10 +293,23 @@ public JobEntity save(@NonNull JobDefinition jd) { } @Override - public int startSuccess(Long id, String executorIdentifier) { + public int startSuccess(Long id, String executorIdentifier, JobContext jobContext) { JobEntity jobEntity = find(id); jobEntity.setExecutorIdentifier(executorIdentifier); - return jobRepository.updateJobExecutorIdentifierById(jobEntity); + TaskMonitorMode monitorMode = JobPropertiesUtils.getMonitorMode(jobContext.getJobProperties()); + if (monitorMode == TaskMonitorMode.PUSH) { + return jobRepository.updateJobExecutorIdentifierById(jobEntity); + } else { + // that's pull mode, update executor endpoint as well + ExecutorIdentifier identifier = ExecutorIdentifierParser.parser(executorIdentifier); + String host = identifier.getHost(); + if (!StringUtils.startsWith(host, "http")) { + host = "http://" + host; + } + String port = String.valueOf(identifier.getPort()); + return jobRepository.updateExecutorEndpointAndExecutorIdentifierById(jobEntity.getId(), host + ":" + port, + executorIdentifier); + } } @Override @@ -429,6 +446,12 @@ public void refreshResult(Long id) { }); } + /** + * refresh log meta when job is canceled + * + * @param id + * @return + */ @Override public boolean refreshLogMetaForCancelJob(Long id) { JobEntity je = find(id); diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/service/TaskFrameworkService.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/service/TaskFrameworkService.java index 71712ac9fc..cf85c24ca2 100644 --- a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/service/TaskFrameworkService.java +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/service/TaskFrameworkService.java @@ -24,6 +24,7 @@ import com.oceanbase.odc.metadb.resource.ResourceEntity; import com.oceanbase.odc.metadb.task.JobEntity; +import com.oceanbase.odc.service.task.caller.JobContext; import com.oceanbase.odc.service.task.enums.JobStatus; import com.oceanbase.odc.service.task.enums.TaskRunMode; import com.oceanbase.odc.service.task.executor.HeartbeatRequest; @@ -72,7 +73,7 @@ public interface TaskFrameworkService { */ long countRunningJobs(TaskRunMode runMode); - int startSuccess(Long id, String executorIdentifier); + int startSuccess(Long id, String executorIdentifier, JobContext jobContext); int beforeStart(Long id); diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/DefaultJobEventListener.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/DefaultJobEventListener.java new file mode 100644 index 0000000000..596edd8cd1 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/DefaultJobEventListener.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor; + +import java.text.MessageFormat; + +import com.oceanbase.odc.metadb.task.JobEntity; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.config.JobConfiguration; +import com.oceanbase.odc.service.task.config.JobConfigurationHolder; +import com.oceanbase.odc.service.task.enums.JobStatus; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.listener.JobCallerEvent; +import com.oceanbase.odc.service.task.schedule.JobIdentity; +import com.oceanbase.odc.service.task.service.TaskFrameworkService; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; + +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/11/28 17:37 + */ +@Slf4j +public class DefaultJobEventListener implements JobEventHandler { + @Override + public void beforeStartJob(JobContext context) throws JobException { + JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); + TaskFrameworkService taskFrameworkService = jobConfiguration.getTaskFrameworkService(); + JobIdentity ji = context.getJobIdentity(); + int rows = taskFrameworkService.beforeStart(ji.getId()); + if (rows <= 0) { + throw new JobException("Start job failed, jobId={0}", ji.getId()); + } + } + + @Override + public void afterStartJob(ExecutorEndpoint executorIdentifier, JobContext jobContext) throws JobException { + JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); + TaskFrameworkService taskFrameworkService = jobConfiguration.getTaskFrameworkService(); + int rows = taskFrameworkService.startSuccess(jobContext.getJobIdentity().getId(), + executorIdentifier.getIdentifier(), jobContext); + if (rows <= 0) { + throw new JobException("Update job status to RUNNING failed, jobId={0}.", + jobContext.getJobIdentity().getId()); + } + } + + @Override + public void afterFinished(ExecutorEndpoint executorIdentifier, JobContext jobContext) throws JobException { + JobConfiguration jobConfiguration = JobConfigurationHolder.getJobConfiguration(); + TaskFrameworkService taskFrameworkService = jobConfiguration.getTaskFrameworkService(); + int rows = taskFrameworkService.updateExecutorToDestroyed(jobContext.getJobIdentity().getId()); + if (rows > 0) { + log.info("Destroy job executor succeed, jobId={}.", jobContext.getJobIdentity().getId()); + } else { + throw new JobException("Update executor to destroyed failed, JodId={0}", + jobContext.getJobIdentity().getId()); + } + } + + @Override + public void finishFailed(ExecutorEndpoint executorIdentifier, JobContext jobContext) { + JobConfiguration configuration = JobConfigurationHolder.getJobConfiguration(); + JobEntity jobEntity = configuration.getTaskFrameworkService().find(jobContext.getJobIdentity().getId()); + if (jobEntity.getStatus() == JobStatus.RUNNING) { + // Cannot connect to target identifier,we cannot kill the process, + // so we set job to FAILED and avoid two process running + configuration.getTaskFrameworkService().updateStatusDescriptionByIdOldStatus( + jobContext.getJobIdentity().getId(), JobStatus.RUNNING, JobStatus.FAILED, + MessageFormat.format("Cannot connect to target odc server, jodId={0}, identifier={1}", + jobContext.getJobIdentity().getId(), executorIdentifier)); + } + } + + @Override + public void onNewEvent(JobCallerEvent jobCallerEvent) { + JobConfiguration configuration = JobConfigurationHolder.getJobConfiguration(); + configuration.getEventPublisher().publishEvent(jobCallerEvent); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/JobEventHandler.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/JobEventHandler.java new file mode 100644 index 0000000000..8d0dd5bfaa --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/JobEventHandler.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor; + +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.listener.JobCallerEvent; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; + +/** + * @author longpeng.zlp + * @date 2024/11/28 14:53 + */ +public interface JobEventHandler { + /** + * call process should be done before job started + * + * @param jobContext + * @return + */ + void beforeStartJob(JobContext jobContext) throws JobException; + + /** + * calls process should be done after start job + * + * @param executorIdentifier + * @param jobContext + */ + void afterStartJob(ExecutorEndpoint executorIdentifier, JobContext jobContext) throws JobException; + + /** + * finish done + * + * @param executorIdentifier + * @param jobContext + */ + void afterFinished(ExecutorEndpoint executorIdentifier, JobContext jobContext) throws JobException; + + /** + * target machine may offline + * + * @param jobContext + */ + void finishFailed(ExecutorEndpoint executorIdentifier, JobContext jobContext); + + + /** + * listent new event + * + * @param jobCallerEvent + */ + void onNewEvent(JobCallerEvent jobCallerEvent); +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/PortDetector.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/PortDetector.java new file mode 100644 index 0000000000..adc991510d --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/PortDetector.java @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor; + +import java.net.ServerSocket; +import java.util.HashSet; +import java.util.PriorityQueue; +import java.util.Set; + +import lombok.Setter; + +/** + * detect unused port for process in range + * + * @author longpeng.zlp + * @date 2024/10/25 16:25 + */ +public class PortDetector { + private static final PortDetector PORT_DETECTOR = new PortDetector(); + @Setter + private int maxPort; + @Setter + private int minPort; + @Setter + private int expiredMs; + private PriorityQueue allocatedPortInfos = new PriorityQueue<>((ap1, ap2) -> { + return Long.compare(ap1.getAllocatedTime(), ap2.getAllocatedTime()); + }); + private Set allocatedSets = new HashSet<>(); + + private PortDetector() { + maxPort = 10240; + minPort = 9000; + // 10s + expiredMs = 10000; + } + + public synchronized int getPort() { + long currentTimeMS = System.currentTimeMillis(); + // expire allocated tasks port + while (!allocatedPortInfos.isEmpty() + && (currentTimeMS - allocatedPortInfos.peek().getAllocatedTime()) > expiredMs) { + AllocatedPortInfo allocatedPortInfo = allocatedPortInfos.poll(); + allocatedSets.remove(allocatedPortInfo.getPort()); + } + // go through and find available port + for (int i = minPort; i <= maxPort; ++i) { + if (allocatedSets.contains(i)) { + continue; + } + if (portInUse(i)) { + continue; + } + allocatedSets.add(i); + allocatedPortInfos.add(new AllocatedPortInfo(i, System.currentTimeMillis())); + return i; + } + throw new RuntimeException("port allocate failed"); + } + + private boolean portInUse(int port) { + ServerSocket socketServer = null; + try { + socketServer = new ServerSocket(port); + } catch (Throwable e) { + return true; + } finally { + if (null != socketServer) { + try { + socketServer.close(); + } catch (Throwable e) { + } + } + } + return false; + } + + public static PortDetector getInstance() { + return PORT_DETECTOR; + } + + private static final class AllocatedPortInfo { + private final int port; + private final long allocatedTime; + + private AllocatedPortInfo(int port, long allocatedTime) { + this.port = port; + this.allocatedTime = allocatedTime; + } + + public int getPort() { + return port; + } + + public long getAllocatedTime() { + return allocatedTime; + } + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/SupervisorEndpointState.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/SupervisorEndpointState.java new file mode 100644 index 0000000000..a20956e8ee --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/SupervisorEndpointState.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor; + +/** + * @author longpeng.zlp + * @date 2024/11/29 16:18 + */ +public enum SupervisorEndpointState { + // in prepare status + PREPARING, + // endpoint is available + AVAILABLE, + // endpoint has been destroyed + DESTROYED, + // endpoint not reachable + UNAVAILABLE, + // endpoint is abandoned + ABANDON +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/TaskCallerResult.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/TaskCallerResult.java new file mode 100644 index 0000000000..02472ea83c --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/TaskCallerResult.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor; + +import lombok.Data; + +/** + * @author longpeng.zlp + * @date 2024/12/9 11:30 + */ +@Data +public class TaskCallerResult { + public static final TaskCallerResult SUCCESS_RESULT = new TaskCallerResult(true, null); + private final Boolean succeed; + private final Exception e; + + public static TaskCallerResult failed(Exception e) { + return new TaskCallerResult(false, e); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/TaskSupervisor.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/TaskSupervisor.java new file mode 100644 index 0000000000..21fb0d300c --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/TaskSupervisor.java @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor; + +import java.io.File; +import java.lang.ProcessBuilder.Redirect; +import java.nio.charset.Charset; +import java.util.Map; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.classification.InterfaceStability.Evolving; + +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.common.util.StringUtils; +import com.oceanbase.odc.common.util.SystemUtils; +import com.oceanbase.odc.service.task.caller.DefaultExecutorIdentifier; +import com.oceanbase.odc.service.task.caller.ExecutorIdentifier; +import com.oceanbase.odc.service.task.caller.ExecutorIdentifierParser; +import com.oceanbase.odc.service.task.caller.ExecutorProcessBuilderFactory; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.ProcessConfig; +import com.oceanbase.odc.service.task.constants.JobEnvKeyConstants; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.util.JobUtils; + +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +/** + * submit a task context and return a executor endpoint in local mode + * + * @author longpeng.zlp + * @date 2024/10/28 10:55 + */ +@Slf4j +@Evolving +public class TaskSupervisor { + public static final String COMMAND_PROTOCOL_NAME = "command"; + @Getter + private final SupervisorEndpoint supervisorEndpoint; + + private final String mainClassName; + + public TaskSupervisor(SupervisorEndpoint supervisorEndpoint, String mainClassName) { + this.supervisorEndpoint = supervisorEndpoint; + this.mainClassName = mainClassName; + } + + /** + * start task with given parameters + * + * @param context + * @param processConfig + * @return + */ + public ExecutorEndpoint startTask(JobContext context, ProcessConfig processConfig) throws JobException { + String executorName = JobUtils.generateExecutorName(context.getJobIdentity()); + int port = tryGenerateListenPortToEnv(processConfig); + // save job context to file + writeJobContextToFile(context, processConfig); + ProcessBuilder pb = new ExecutorProcessBuilderFactory().getProcessBuilder( + processConfig, context.getJobIdentity().getId(), executorName, mainClassName); + log.info("start task with processConfig={}, env={}", JobUtils.toJson(processConfig), + JsonUtils.toJson(pb.environment())); + pb.redirectErrorStream(true); + pb.redirectOutput(Redirect.appendTo(new File("process-call.log"))); + Process process; + try { + process = pb.start(); + } catch (Exception ex) { + throw new JobException("Start process failed.", ex); + } + + long pid = SystemUtils.getProcessPid(process); + if (pid == -1) { + process.destroyForcibly(); + throw new JobException("Get pid failed, job id={0} ", context.getJobIdentity().getId()); + } + + boolean isProcessRunning = + SystemUtils.isProcessRunning(pid, JobUtils.generateExecutorSelectorOnProcess(executorName)); + + if (!isProcessRunning) { + process.destroyForcibly(); + throw new JobException("Start process failed, not process found, pid={0},executorName={1}.", + pid, executorName); + } + + // set process id as namespace + ExecutorIdentifier executorIdentifier = DefaultExecutorIdentifier.builder().host(supervisorEndpoint.getHost()) + .port(port) + .namespace(pid + "") + .executorName(executorName).build(); + return new ExecutorEndpoint(COMMAND_PROTOCOL_NAME, supervisorEndpoint.getHost(), supervisorEndpoint.getPort(), + port, executorIdentifier.toString()); + } + + /** + * job context may be large content greater than Evn max length 4MB. save it to file + * + * @param context + * @param processConfig + */ + protected void writeJobContextToFile(JobContext context, ProcessConfig processConfig) { + Map environments = processConfig.getEnvironments(); + /** + * write JobContext to file in case of exceeding the environments size limit; set the file path in + * the environment instead + */ + String jobContextFilePath = JobUtils.getExecutorDataPath() + "/" + StringUtils.uuid() + ".enc"; + try { + FileUtils.writeStringToFile(new File(jobContextFilePath), + JobUtils.encrypt(environments.get(JobEnvKeyConstants.ENCRYPT_KEY), + environments.get(JobEnvKeyConstants.ENCRYPT_SALT), JobUtils.toJson(context)), + Charset.defaultCharset()); + } catch (Exception ex) { + FileUtils.deleteQuietly(new File(jobContextFilePath)); + throw new RuntimeException("Failed to write job context to file: " + jobContextFilePath, ex); + } + environments.put(JobEnvKeyConstants.ODC_JOB_CONTEXT_FILE_PATH, + JobUtils.encrypt(environments.get(JobEnvKeyConstants.ENCRYPT_KEY), + environments.get(JobEnvKeyConstants.ENCRYPT_SALT), jobContextFilePath)); + } + + /** + * generate listen port for process if is pull mode + * + * @param processConfig + * @return + */ + protected int tryGenerateListenPortToEnv(ProcessConfig processConfig) { + String reportEnabled = getValue(processConfig.getEnvironments(), JobEnvKeyConstants.REPORT_ENABLED); + // enable report mode, use push mode + if (!StringUtils.equalsIgnoreCase(reportEnabled, "false")) { + log.info("task run in push mode, port allocate not needed"); + return -1; + } + // use pull mode, detect if port is given + String givenPort = getValue(processConfig.getEnvironments(), JobEnvKeyConstants.ODC_EXECUTOR_PORT); + if (null != givenPort && Integer.parseInt(givenPort) != 0) { + log.info("task run in pull mode, allocatedPort = {}", givenPort); + return Integer.valueOf(givenPort); + } + // port not valid, fill it + int detectPort = PortDetector.getInstance().getPort(); + processConfig.getEnvironments().put(JobEnvKeyConstants.ODC_EXECUTOR_PORT, String.valueOf(detectPort)); + log.info("task run in pull mode, port not given, allocatePort={}", detectPort); + return detectPort; + } + + protected String getValue(Map map, String key) { + if (null == map) { + return null; + } + return map.get(key); + } + + /** + * stop task + * + * @param jobContext + */ + public boolean stopTask(ExecutorEndpoint executorEndpoint, JobContext jobContext) throws JobException { + ExecutorIdentifier executorIdentifier = getExecutorIdentifier(executorEndpoint); + // kill process on this machine + if (isTaskAlive(executorIdentifier)) { + long pid = Long.parseLong(executorIdentifier.getNamespace()); + log.info("Found process, try kill it, pid={}.", pid); + // first update destroy time, second destroy executor. + // if executor failed update will be rollback, ensure distributed transaction atomicity. + doDestroyInternal(executorIdentifier); + } + return true; + } + + protected void doDestroyInternal(ExecutorIdentifier identifier) throws JobException { + long pid = Long.parseLong(identifier.getNamespace()); + boolean result = SystemUtils.killProcessByPid(pid); + if (result) { + log.info("Destroy succeed by kill process, executorIdentifier={}, pid={}", identifier, pid); + } else { + throw new JobException( + "Destroy executor failed by kill process, identifier={0}, pid{1}=", identifier, pid); + } + } + + public boolean isTaskAlive(ExecutorIdentifier identifier) { + long pid = Long.parseLong(identifier.getNamespace()); + boolean result = SystemUtils.isProcessRunning(pid, + JobUtils.generateExecutorSelectorOnProcess(identifier.getExecutorName())); + if (result) { + log.info("Found executor by identifier, identifier={}", identifier); + } else { + log.warn("Not found executor by identifier, identifier={}", identifier); + } + return result; + } + + public static ExecutorIdentifier getExecutorIdentifier(ExecutorEndpoint executorEndpoint) { + return ExecutorIdentifierParser.parser(executorEndpoint.getIdentifier()); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/TaskSupervisorJobCaller.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/TaskSupervisorJobCaller.java new file mode 100644 index 0000000000..d60312a9b2 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/TaskSupervisorJobCaller.java @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor; + +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.ProcessConfig; +import com.oceanbase.odc.service.task.enums.JobCallerAction; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.listener.JobCallerEvent; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.supervisor.proxy.LocalTaskSupervisorProxy; +import com.oceanbase.odc.service.task.supervisor.proxy.TaskSupervisorProxy; +import com.oceanbase.odc.service.task.util.TaskExecutorClient; + +import lombok.extern.slf4j.Slf4j; + +/** + * job caller for task operation + * + * @author longpeng.zlp + * @date 2024/11/28 14:49 + */ +@Slf4j +public class TaskSupervisorJobCaller { + // event listener + private final JobEventHandler jobEventHandler; + // supervisor command proxy to send command to supervisor endpoint + private final LocalTaskSupervisorProxy taskSupervisorProxy; + // task executor client to send command to task directly + private final TaskExecutorClient taskExecutorClient; + + public TaskSupervisorJobCaller(JobEventHandler jobEventHandler, + LocalTaskSupervisorProxy taskSupervisorProxy, TaskExecutorClient taskExecutorClient) { + this.jobEventHandler = jobEventHandler; + this.taskSupervisorProxy = taskSupervisorProxy; + this.taskExecutorClient = taskExecutorClient; + } + + public ExecutorEndpoint startTask(SupervisorEndpoint supervisorEndpoint, JobContext jobContext, + ProcessConfig processConfig) throws JobException { + ExecutorEndpoint executorEndpoint = null; + try { + // do start process + jobEventHandler.beforeStartJob(jobContext); + executorEndpoint = taskSupervisorProxy.startTask(supervisorEndpoint, jobContext, processConfig); + jobEventHandler.afterStartJob(executorEndpoint, jobContext); + // send success event + log.info("Start job succeed, jobId={}.", jobContext.getJobIdentity().getId()); + jobEventHandler.onNewEvent(new JobCallerEvent(jobContext.getJobIdentity(), JobCallerAction.START, true, + TaskSupervisor.getExecutorIdentifier(executorEndpoint), null)); + return executorEndpoint; + } catch (Exception e) { + // try roll back + stopTask(supervisorEndpoint, executorEndpoint, jobContext); + // send failed event + jobEventHandler + .onNewEvent(new JobCallerEvent(jobContext.getJobIdentity(), JobCallerAction.START, false, e)); + throw new JobException("Start job failed", e); + } + } + + /** + * stop task through supervisor agent, this will force stop task + */ + public TaskCallerResult stopTask(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) throws JobException { + try { + TaskCallerResult stopResult = null; + if (taskSupervisorProxy.isSupervisorAlive(supervisorEndpoint)) { + // supervisor is alive and stopped + boolean stopFlag = taskSupervisorProxy.stopTask(supervisorEndpoint, executorEndpoint, jobContext); + stopResult = stopFlag ? TaskCallerResult.SUCCESS_RESULT + : TaskCallerResult + .failed(new JobException("stop task failed for endpoint=" + executorEndpoint)); + log.info("stop through agent with ret = {}, endpoint = {}", stopFlag, supervisorEndpoint); + } else { + // supervisor not alive can't determinate stop result + log.info("supervisor not alive, endpoint = {}", supervisorEndpoint); + stopResult = TaskCallerResult.failed(new JobException( + "supervisor agent not alive, can't determinate endpoint = " + executorEndpoint + " status")); + } + log.info("Stop job {}, jobId={}.", stopResult.getSucceed() ? "successfully" : "failed", + jobContext.getJobIdentity().getId()); + jobEventHandler + .onNewEvent(new JobCallerEvent(jobContext.getJobIdentity(), JobCallerAction.STOP, + stopResult.getSucceed(), stopResult.getE())); + return stopResult; + } catch (Exception e) { + // handle stop exception + jobEventHandler.onNewEvent(new JobCallerEvent(jobContext.getJobIdentity(), JobCallerAction.STOP, false, e)); + return TaskCallerResult + .failed(new JobException("job be stop failed, jobId={0}.", e, jobContext.getJobIdentity().getId())); + } + } + + /** + * stop task with http endpoint + */ + public TaskCallerResult stopTaskDirectly(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) throws JobException { + try { + taskExecutorClient.stop(TaskSupervisorProxy.getExecutorEndpoint(executorEndpoint), + jobContext.getJobIdentity()); + return TaskCallerResult.SUCCESS_RESULT; + } catch (Exception e) { + log.info("stop task failed cause ", e); + return TaskCallerResult.failed(e); + } + } + + /** + * modify task use task executor client + * + * @param supervisorEndpoint + * @param executorEndpoint + * @param jobContext + * @return + * @throws JobException + */ + public TaskCallerResult modifyTask(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) { + try { + taskExecutorClient.modifyJobParameters( + TaskSupervisorProxy.getExecutorIdentifierByExecutorEndpoint(executorEndpoint), + jobContext.getJobIdentity(), + JsonUtils.toJson(jobContext.getJobParameters())); + return TaskCallerResult.SUCCESS_RESULT; + } catch (Exception e) { + return TaskCallerResult.failed(e); + } + } + + /** + * it's only db related operation TODO(lx):it will be removed out of this class + * + * @param jobContext + * @return + */ + public TaskCallerResult finish(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) + throws JobException { + TaskCallerResult taskCallerResult = TaskCallerResult.SUCCESS_RESULT; + if (null == executorEndpoint) { + log.info("job finished success, it's not created yet"); + } else { + log.info("try finished job, executorEndpoint={}", executorEndpoint); + taskCallerResult = stopTask(supervisorEndpoint, executorEndpoint, jobContext); + if (!taskCallerResult.getSucceed()) { + jobEventHandler.finishFailed(executorEndpoint, jobContext); + } + } + jobEventHandler.afterFinished(null, jobContext); + return taskCallerResult; + } + + /** + * for supervisor agent, it will always be true, cause command will be routed to right agent + * + * @param jobContext + * @return + */ + public TaskCallerResult canBeFinish(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) + throws JobException { + return TaskCallerResult.SUCCESS_RESULT; + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/endpoint/ExecutorEndpoint.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/endpoint/ExecutorEndpoint.java new file mode 100644 index 0000000000..094be8bf29 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/endpoint/ExecutorEndpoint.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.endpoint; + +import lombok.AllArgsConstructor; +import lombok.Data; + +/** + * @author longpeng.zlp + * @date 2024/10/28 16:41 + */ +@Data +@AllArgsConstructor +public class ExecutorEndpoint { + private String protocol; + private String host; + private Integer supervisorPort; + private Integer executorPort; + private String identifier; + + public ExecutorEndpoint() {} +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/endpoint/SupervisorEndpoint.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/endpoint/SupervisorEndpoint.java new file mode 100644 index 0000000000..ea4584a2fa --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/endpoint/SupervisorEndpoint.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.endpoint; + +import lombok.AllArgsConstructor; +import lombok.Data; + +/** + * @author longpeng.zlp + * @date 2024/10/29 14:24 + */ +@Data +@AllArgsConstructor +public class SupervisorEndpoint { + public static final SupervisorEndpoint SELF_ENDPOINT = new SupervisorEndpoint("localhost", -1); + private String host; + private Integer port; + + public SupervisorEndpoint() {} +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/CommandType.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/CommandType.java new file mode 100644 index 0000000000..243096dded --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/CommandType.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.protocol; + +/** + * @author longpeng.zlp + * @date 2024/10/29 15:21 + */ +public enum CommandType { + START, + STOP, + IS_TASK_ALIVE +} + diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/GeneralTaskCommand.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/GeneralTaskCommand.java new file mode 100644 index 0000000000..df45d48313 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/GeneralTaskCommand.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.protocol; + +import java.util.function.Supplier; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; + +import lombok.Getter; + +/** + * @author longpeng.zlp + * @date 2024/10/29 15:47 + */ +public class GeneralTaskCommand extends TaskCommand { + protected static final String EXECUTOR_ENC_POINT_STR = "executorEndpoint"; + @Getter + protected ExecutorEndpoint executorEndpoint; + protected CommandType commandType; + + + public void append(ObjectNode objectNode) { + objectNode.put(EXECUTOR_ENC_POINT_STR, JsonUtils.toJson(executorEndpoint)); + } + + public CommandType commandType() { + return commandType; + } + + public static T fromJsonNode(Supplier commandSupplier, JsonNode jsonNode, + CommandType commandType) { + T command = commandSupplier.get(); + JsonNode executorEndpointNode = jsonNode.get(EXECUTOR_ENC_POINT_STR); + ExecutorEndpoint endpoint = JsonUtils + .fromJson(null == executorEndpointNode ? null : executorEndpointNode.asText(), ExecutorEndpoint.class); + command.fillCommonFields(jsonNode); + command.executorEndpoint = endpoint; + command.commandType = commandType; + return command; + } + + public static GeneralTaskCommand create(JobContext jobContext, ExecutorEndpoint endpoint, CommandType commandType) { + GeneralTaskCommand ret = new GeneralTaskCommand(); + ret.commandType = commandType; + ret.version = COMMAND_VERSION; + ret.jobContext = jobContext; + ret.executorEndpoint = endpoint; + return ret; + } + +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/StartTaskCommand.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/StartTaskCommand.java new file mode 100644 index 0000000000..464cb3ff15 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/StartTaskCommand.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.protocol; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.ProcessConfig; + +import lombok.Getter; + +/** + * @author longpeng.zlp + * @date 2024/10/29 15:25 + */ +public class StartTaskCommand extends TaskCommand { + protected static final String PROCESS_CONFIG_STR = "processConfig"; + @Getter + private ProcessConfig processConfig; + + @Override + public CommandType commandType() { + return CommandType.START; + } + + @Override + public void append(ObjectNode objectNode) { + objectNode.put(PROCESS_CONFIG_STR, JsonUtils.toJson(processConfig)); + } + + public static StartTaskCommand fromJsonNode(JsonNode jsonNode) { + StartTaskCommand startTaskCommand = new StartTaskCommand(); + JsonNode processConfigNode = jsonNode.get(PROCESS_CONFIG_STR); + startTaskCommand.processConfig = + JsonUtils.fromJson(null == processConfigNode ? null : processConfigNode.asText(), ProcessConfig.class); + startTaskCommand.fillCommonFields(jsonNode); + return startTaskCommand; + } + + public static StartTaskCommand create(JobContext jobContext, ProcessConfig processConfig) { + StartTaskCommand ret = new StartTaskCommand(); + ret.processConfig = processConfig; + ret.jobContext = jobContext; + ret.version = COMMAND_VERSION; + return ret; + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/TaskCommand.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/TaskCommand.java new file mode 100644 index 0000000000..46a1a30af8 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/TaskCommand.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.protocol; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.service.task.caller.DefaultJobContext; +import com.oceanbase.odc.service.task.caller.JobContext; + +import lombok.Getter; + +/** + * @author longpeng.zlp + * @date 2024/10/29 15:13 + */ +public abstract class TaskCommand { + public static final ObjectMapper objectMapper = new ObjectMapper(); + public static final int COMMAND_VERSION = 0; + public static final String VERSION_NAME = "version"; + public static final String JOB_CONTEXT_NAME = "jobContext"; + public static final String COMMAND_TYPE_NAME = "command"; + @Getter + protected JobContext jobContext; + @Getter + protected int version; + + public abstract CommandType commandType(); + + public String serialize() { + ObjectNode jsonNode = objectMapper.createObjectNode(); + jsonNode.put(VERSION_NAME, version); + jsonNode.put(JOB_CONTEXT_NAME, JsonUtils.toJson(jobContext)); + jsonNode.put(COMMAND_TYPE_NAME, commandType().toString().toLowerCase()); + append(jsonNode); + return jsonNode.toPrettyString(); + } + + protected void fillCommonFields(JsonNode jsonNode) { + JsonNode versionNode = jsonNode.get(VERSION_NAME); + version = null == versionNode ? 0 : versionNode.asInt(); + JsonNode jobContextNode = jsonNode.get(JOB_CONTEXT_NAME); + jobContext = + jobContextNode == null ? null : JsonUtils.fromJson(jobContextNode.asText(), DefaultJobContext.class); + } + + public abstract void append(ObjectNode objectNode); +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/TaskCommandSender.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/TaskCommandSender.java new file mode 100644 index 0000000000..ade50754ef --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/protocol/TaskCommandSender.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.protocol; + +import java.io.IOException; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.util.HttpClientUtils; + +/** + * @author longpeng.zlp + * @date 2024/10/29 15:01 + */ +public class TaskCommandSender { + /** + * send command to supervisor end point and return reponse + * + * @param supervisorEndpoint + * @param taskCommand + * @return + */ + public String sendCommand(SupervisorEndpoint supervisorEndpoint, TaskCommand taskCommand) throws IOException { + String url = buildUrl(supervisorEndpoint, taskCommand); + String requestBody = taskCommand.serialize(); + return HttpClientUtils.request("POST", url, requestBody, new TypeReference() {}); + } + + /** + * send heartbeat command + */ + public String heartbeat(SupervisorEndpoint supervisorEndpoint) throws IOException { + StringBuilder sb = new StringBuilder(64); + appendHttpURlBase(supervisorEndpoint, sb); + sb.append("/heartbeat"); + return HttpClientUtils.request("GET", sb.toString(), new TypeReference() {}); + } + + protected String buildUrl(SupervisorEndpoint supervisorEndpoint, TaskCommand taskCommand) { + StringBuilder sb = new StringBuilder(64); + appendHttpURlBase(supervisorEndpoint, sb); + // create command url + sb.append("/task/command/").append(taskCommand.commandType().name().toLowerCase()); + return sb.toString(); + } + + protected void appendHttpURlBase(SupervisorEndpoint supervisorEndpoint, StringBuilder sb) { + // create base + sb.append("http://") + .append(supervisorEndpoint.getHost()).append(":") + .append(supervisorEndpoint.getPort()); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/proxy/LocalTaskSupervisorProxy.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/proxy/LocalTaskSupervisorProxy.java new file mode 100644 index 0000000000..7ea0c53502 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/proxy/LocalTaskSupervisorProxy.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.proxy; + +import java.io.IOException; + +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.ProcessConfig; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.supervisor.TaskSupervisor; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.supervisor.protocol.TaskCommandSender; + +import lombok.extern.slf4j.Slf4j; + +/** + * command executor to route task command local impl + * + * @author longpeng.zlp + * @date 2024/10/29 11:43 + */ +@Slf4j +public class LocalTaskSupervisorProxy implements TaskSupervisorProxy { + private final TaskSupervisor taskSupervisor; + private final RemoteTaskSupervisorProxy remoteTaskSupervisorProxy; + private final SupervisorEndpoint localEndPoint; + + public LocalTaskSupervisorProxy(SupervisorEndpoint supervisorEndpoint, + String mainClassName) { + this.localEndPoint = supervisorEndpoint; + log.info("LocalTaskSupervisorProxy start with endpoint={}", supervisorEndpoint); + remoteTaskSupervisorProxy = new RemoteTaskSupervisorProxy(new TaskCommandSender()); + taskSupervisor = new TaskSupervisor(supervisorEndpoint, mainClassName); + } + + @Override + public ExecutorEndpoint startTask(SupervisorEndpoint supervisorEndpoint, JobContext jobContext, + ProcessConfig processConfig) throws JobException, IOException { + if (isLocalCommandCall(supervisorEndpoint)) { + log.info("local call start task, supervisorEndpoint={}, jobContext={}, processConfig={}", + supervisorEndpoint, jobContext, processConfig); + return taskSupervisor.startTask(jobContext, processConfig); + } else { + log.info("remote call start task, supervisorEndpoint={}, jobContext={}, processConfig={}", + supervisorEndpoint, jobContext, processConfig); + return remoteTaskSupervisorProxy.startTask(supervisorEndpoint, jobContext, processConfig); + } + } + + @Override + public boolean stopTask(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) throws JobException, IOException { + if (null == executorEndpoint) { + throw new JobException("empty executor endpoint to stop"); + } + if (isLocalCommandCall(supervisorEndpoint)) { + log.info("local call stop task, supervisorEndpoint={}, executorEndpoint={}, jobContext={}", + supervisorEndpoint, executorEndpoint, jobContext); + return taskSupervisor.stopTask(executorEndpoint, jobContext); + } else { + log.info("remote call stop task, supervisorEndpoint={}, executorEndpoint={}, jobContext={}", + supervisorEndpoint, executorEndpoint, jobContext); + return remoteTaskSupervisorProxy.stopTask(supervisorEndpoint, executorEndpoint, jobContext); + } + } + + + @Override + public boolean isTaskAlive(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) throws JobException, IOException { + if (isLocalCommandCall(supervisorEndpoint)) { + log.info("local call canBeFinished task, supervisorEndpoint={}, executorEndpoint={}, jobContext={}", + supervisorEndpoint, executorEndpoint, jobContext); + return taskSupervisor.isTaskAlive(TaskSupervisor.getExecutorIdentifier(executorEndpoint)); + } else { + log.info("remote call canBeFinished task, supervisorEndpoint={}, executorEndpoint={}, jobContext={}", + supervisorEndpoint, executorEndpoint, jobContext); + return remoteTaskSupervisorProxy.isTaskAlive(supervisorEndpoint, executorEndpoint, jobContext); + } + } + + @Override + public boolean isSupervisorAlive(SupervisorEndpoint supervisorEndpoint) { + if (isLocalCommandCall(supervisorEndpoint)) { + log.info("local call isSupervisorAlive task, supervisorEndpoint={}", supervisorEndpoint); + return true; + } else { + log.info("remote call isSupervisorAlive task, supervisorEndpoint={}", supervisorEndpoint); + return remoteTaskSupervisorProxy.isSupervisorAlive(supervisorEndpoint); + } + } + + protected boolean isLocalCommandCall(SupervisorEndpoint supervisorEndpoint) { + if (null == supervisorEndpoint) { + throw new IllegalStateException("end point must be given for task supervisor proxy"); + } + return supervisorEndpoint.equals(SupervisorEndpoint.SELF_ENDPOINT) || supervisorEndpoint.equals(localEndPoint); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/proxy/RemoteTaskSupervisorProxy.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/proxy/RemoteTaskSupervisorProxy.java new file mode 100644 index 0000000000..185c66d721 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/proxy/RemoteTaskSupervisorProxy.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.proxy; + +import java.io.IOException; + +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.common.util.StringUtils; +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.ProcessConfig; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; +import com.oceanbase.odc.service.task.supervisor.protocol.CommandType; +import com.oceanbase.odc.service.task.supervisor.protocol.GeneralTaskCommand; +import com.oceanbase.odc.service.task.supervisor.protocol.StartTaskCommand; +import com.oceanbase.odc.service.task.supervisor.protocol.TaskCommandSender; + +import lombok.extern.slf4j.Slf4j; + +/** + * remote supervisor proxy to compatible with current desgin 1. start command will be send to + * supervisor agent 2. other command will be send to task + * + * @author longpeng.zlp + * @date 2024/10/29 14:42 + */ +@Slf4j +public class RemoteTaskSupervisorProxy implements TaskSupervisorProxy { + // command sender to supervisor client + private final TaskCommandSender taskCommandSender; + + public RemoteTaskSupervisorProxy(TaskCommandSender taskCommandSender) { + this.taskCommandSender = taskCommandSender; + } + + @Override + public ExecutorEndpoint startTask(SupervisorEndpoint supervisorEndpoint, JobContext jobContext, + ProcessConfig processConfig) throws IOException { + String ret = + taskCommandSender.sendCommand(supervisorEndpoint, StartTaskCommand.create(jobContext, processConfig)); + log.info("start task to supervisorEndpoint = {}, jobContext = {}, with response = {}", supervisorEndpoint, + jobContext, ret); + return JsonUtils.fromJson(ret, ExecutorEndpoint.class); + } + + @Override + public boolean stopTask(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) throws IOException { + String ret = taskCommandSender.sendCommand(supervisorEndpoint, + GeneralTaskCommand.create(jobContext, executorEndpoint, CommandType.STOP)); + log.info("stop task to supervisorEndpoint = {}, with response = {}", supervisorEndpoint, ret); + return Boolean.parseBoolean(StringUtils.trim(ret)); + } + + @Override + public boolean isTaskAlive(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) throws IOException { + String ret = taskCommandSender.sendCommand(supervisorEndpoint, + GeneralTaskCommand.create(jobContext, executorEndpoint, CommandType.IS_TASK_ALIVE)); + log.info("send task is alive command to supervisorEndpoint = {}, with response = {}", supervisorEndpoint, ret); + return Boolean.parseBoolean(StringUtils.trim(ret)); + } + + @Override + public boolean isSupervisorAlive(SupervisorEndpoint supervisorEndpoint) { + try { + taskCommandSender.heartbeat(supervisorEndpoint); + return true; + } catch (Throwable e) { + log.info("heartbeat failed", e); + return false; + } + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/proxy/TaskSupervisorProxy.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/proxy/TaskSupervisorProxy.java new file mode 100644 index 0000000000..d1c6c2332a --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/proxy/TaskSupervisorProxy.java @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.proxy; + +import java.io.IOException; + +import com.oceanbase.odc.service.task.caller.JobContext; +import com.oceanbase.odc.service.task.caller.ProcessConfig; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; + +/** + * execute remote/local call for given supervisor endpoint + * + * @author longpeng.zlp + * @date 2024/10/29 11:48 + */ +public interface TaskSupervisorProxy { + /** + * execute start task command to supervisorEndpoint + * + * @param supervisorEndpoint + * @param jobContext + * @param processConfig + * @return + */ + ExecutorEndpoint startTask(SupervisorEndpoint supervisorEndpoint, JobContext jobContext, + ProcessConfig processConfig) + throws JobException, IOException; + + /** + * execute stop task command to supervisorEndpoint + * + * @param supervisorEndpoint + * @param jobContext + * @return + */ + boolean stopTask(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, JobContext jobContext) + throws IOException, JobException; + + /** + * detect can be finish command to supervisorEndpoint + * + * @param supervisorEndpoint + * @param jobContext + * @return + */ + boolean isTaskAlive(SupervisorEndpoint supervisorEndpoint, ExecutorEndpoint executorEndpoint, + JobContext jobContext) + throws JobException, IOException; + + /** + * if supervisor is alvie + */ + boolean isSupervisorAlive(SupervisorEndpoint supervisorEndpoint); + + static String getExecutorIdentifierByExecutorEndpoint(ExecutorEndpoint executorEndpoint) { + return executorEndpoint.getIdentifier(); + } + + static String getExecutorEndpoint(ExecutorEndpoint executorEndpoint) { + return "http://" + executorEndpoint.getHost() + ":" + executorEndpoint.getExecutorPort(); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/LocalTaskCommandExecutor.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/LocalTaskCommandExecutor.java new file mode 100644 index 0000000000..ce8d07fdc2 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/LocalTaskCommandExecutor.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.runtime; + +import com.oceanbase.odc.common.json.JsonUtils; +import com.oceanbase.odc.service.task.caller.ExecutorIdentifierParser; +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.supervisor.TaskSupervisor; +import com.oceanbase.odc.service.task.supervisor.endpoint.ExecutorEndpoint; +import com.oceanbase.odc.service.task.supervisor.protocol.GeneralTaskCommand; +import com.oceanbase.odc.service.task.supervisor.protocol.StartTaskCommand; +import com.oceanbase.odc.service.task.supervisor.protocol.TaskCommand; + +/** + * @author longpeng.zlp + * @date 2024/10/29 17:45 + */ +public class LocalTaskCommandExecutor implements TaskCommandExecutor { + private final TaskSupervisor taskSupervisor; + + public LocalTaskCommandExecutor(TaskSupervisor taskSupervisor) { + this.taskSupervisor = taskSupervisor; + } + + public String onCommand(TaskCommand taskCommand) throws JobException { + String ret = null; + switch (taskCommand.commandType()) { + case START: + StartTaskCommand startTaskCommand = (StartTaskCommand) taskCommand; + ExecutorEndpoint endpoint = + taskSupervisor.startTask(startTaskCommand.getJobContext(), startTaskCommand.getProcessConfig()); + ret = JsonUtils.toJson(endpoint); + break; + default: + boolean succeed = callTaskSupervisorFunc((GeneralTaskCommand) taskCommand); + ret = String.valueOf(succeed); + break; + } + return String.valueOf(ret); + } + + protected boolean callTaskSupervisorFunc(GeneralTaskCommand generalTaskCommand) throws JobException { + switch (generalTaskCommand.commandType()) { + case STOP: + return taskSupervisor.stopTask(generalTaskCommand.getExecutorEndpoint(), + generalTaskCommand.getJobContext()); + case IS_TASK_ALIVE: + return taskSupervisor.isTaskAlive( + ExecutorIdentifierParser.parser(generalTaskCommand.getExecutorEndpoint().getIdentifier())); + default: + throw new IllegalStateException("not recognized command " + generalTaskCommand); + } + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/TaskCommandDeserializer.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/TaskCommandDeserializer.java new file mode 100644 index 0000000000..e3fdb4f361 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/TaskCommandDeserializer.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.runtime; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.oceanbase.odc.service.task.supervisor.protocol.CommandType; +import com.oceanbase.odc.service.task.supervisor.protocol.GeneralTaskCommand; +import com.oceanbase.odc.service.task.supervisor.protocol.StartTaskCommand; +import com.oceanbase.odc.service.task.supervisor.protocol.TaskCommand; + +/** + * @author longpeng.zlp + * @date 2024/10/29 15:59 + */ +public class TaskCommandDeserializer { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public TaskCommand deserializeTaskCommand(String commandStr) throws JsonProcessingException { + JsonNode objectNode = OBJECT_MAPPER.readTree(commandStr); + JsonNode commandTypeNode = objectNode.get(TaskCommand.COMMAND_TYPE_NAME); + if (null == commandTypeNode) { + throw new IllegalStateException("invalid command, str=" + commandStr); + } + CommandType commandType = CommandType.valueOf(commandTypeNode.asText().toUpperCase()); + switch (commandType) { + case START: + return StartTaskCommand.fromJsonNode(objectNode); + case STOP: + case IS_TASK_ALIVE: + return GeneralTaskCommand.fromJsonNode(GeneralTaskCommand::new, objectNode, commandType); + default: + throw new IllegalStateException("not support command type, str=" + commandType); + } + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/TaskCommandExecutor.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/TaskCommandExecutor.java new file mode 100644 index 0000000000..9ef4ab1a23 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/TaskCommandExecutor.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.runtime; + +import com.oceanbase.odc.service.task.exception.JobException; +import com.oceanbase.odc.service.task.supervisor.protocol.TaskCommand; + +/** + * task command executor for execute + * + * @author longpeng.zlp + * @date 2024/11/22 14:50 + */ +public interface TaskCommandExecutor { + /** + * execute command and write response if needed + * + * @param taskCommand + * @return result in string + * @throws JobException + */ + String onCommand(TaskCommand taskCommand) throws JobException; +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/TaskSupervisorServer.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/TaskSupervisorServer.java new file mode 100644 index 0000000000..196d9e2690 --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/supervisor/runtime/TaskSupervisorServer.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.runtime; + +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; + +import com.google.common.annotations.VisibleForTesting; +import com.oceanbase.odc.common.util.StringUtils; +import com.oceanbase.odc.service.task.net.HttpServerContainer; +import com.oceanbase.odc.service.task.net.RequestHandler; +import com.oceanbase.odc.service.task.supervisor.protocol.TaskCommand; + +import io.netty.handler.codec.http.HttpMethod; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +/** + * + * @author longpeng.zlp + * @date 2024/10/29 18:05 + */ +@Slf4j +public class TaskSupervisorServer extends HttpServerContainer { + // executor to handle command + @Getter + private final TaskCommandExecutor taskCommandExecutor; + // port to listen + private final int port; + // deserialize task command + private TaskCommandDeserializer taskCommandDeserializer = new TaskCommandDeserializer(); + private AtomicInteger serverPort = new AtomicInteger(-1); + + public TaskSupervisorServer(int port, TaskCommandExecutor taskCommandExecutor) { + this.port = port; + this.taskCommandExecutor = taskCommandExecutor; + } + + @Override + protected int getPort() { + return port; + } + + @Override + protected RequestHandler getRequestHandler() { + return new RequestHandler() { + @Override + public String process(HttpMethod httpMethod, String uri, String requestData) { + // handle heartbeat request + if (StringUtils.contains(uri, "heartbeat")) { + return "true"; + } + try { + TaskCommand taskCommand = taskCommandDeserializer.deserializeTaskCommand(requestData); + return taskCommandExecutor.onCommand(taskCommand); + } catch (Throwable e) { + return processException(e); + } + } + + @Override + public String processException(Throwable e) { + return e.getMessage(); + } + }; + } + + @Override + protected String getModuleName() { + return "TaskSupervisor"; + } + + @Override + protected Thread createThread(Runnable r) { + return new Thread(r); + } + + @Override + protected Consumer portConsumer() { + return (p) -> serverPort.set(p); + } + + @VisibleForTesting + public int getServerPort() { + return serverPort.get(); + } +} diff --git a/server/odc-service/src/main/java/com/oceanbase/odc/service/task/util/TaskSupervisorUtil.java b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/util/TaskSupervisorUtil.java new file mode 100644 index 0000000000..7447a4327b --- /dev/null +++ b/server/odc-service/src/main/java/com/oceanbase/odc/service/task/util/TaskSupervisorUtil.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.util; + +import org.springframework.boot.autoconfigure.web.ServerProperties; + +import com.oceanbase.odc.common.util.SystemUtils; +import com.oceanbase.odc.service.common.util.SpringContextUtil; +import com.oceanbase.odc.service.task.config.TaskFrameworkProperties; +import com.oceanbase.odc.service.task.enums.TaskRunMode; +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; + +import lombok.extern.slf4j.Slf4j; + +/** + * @author longpeng.zlp + * @date 2024/12/2 10:50 + */ +@Slf4j +public class TaskSupervisorUtil { + public static SupervisorEndpoint getDefaultSupervisorEndpoint() { + String host = SystemUtils.getLocalIpAddress(); + ServerProperties serverProperties = SpringContextUtil.getBean(ServerProperties.class); + int port = serverProperties.getPort(); + return new SupervisorEndpoint(host, (port + 1000) % 65535); + } + + /** + * current task supervisor agent only support process mode + * + * @param taskFrameworkProperties + * @return + */ + public static boolean isTaskSupervisorEnabled(TaskFrameworkProperties taskFrameworkProperties) { + return (taskFrameworkProperties.isEnableTaskSupervisorAgent() + && taskFrameworkProperties.getRunMode() == TaskRunMode.PROCESS); + } +} diff --git a/server/odc-service/src/test/java/com/oceanbase/odc/service/resource/k8s/K8sResourceOperatorTest.java b/server/odc-service/src/test/java/com/oceanbase/odc/service/resource/k8s/K8sResourceOperatorTest.java index d71e1831b1..e07d6fe6e4 100644 --- a/server/odc-service/src/test/java/com/oceanbase/odc/service/resource/k8s/K8sResourceOperatorTest.java +++ b/server/odc-service/src/test/java/com/oceanbase/odc/service/resource/k8s/K8sResourceOperatorTest.java @@ -158,7 +158,7 @@ private K8sPodResource buildByK8sContext(K8sResourceContext k8sResourceContext) DefaultResourceOperatorBuilder.CLOUD_K8S_POD_TYPE, k8sResourceContext.resourceNamespace(), k8sResourceContext.getResourceName(), ResourceState.CREATING, - "localhost:8080", new Date(1024)); + "localhost:8080", "8089", new Date(1024)); } } } diff --git a/server/odc-service/src/test/java/com/oceanbase/odc/service/task/net/HttpServerTest.java b/server/odc-service/src/test/java/com/oceanbase/odc/service/task/net/HttpServerTest.java new file mode 100644 index 0000000000..d4bcbc1eab --- /dev/null +++ b/server/odc-service/src/test/java/com/oceanbase/odc/service/task/net/HttpServerTest.java @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.net; + +import java.io.IOException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.oceanbase.odc.common.util.StringUtils; +import com.oceanbase.odc.service.task.util.HttpClientUtils; + +import io.netty.handler.codec.http.HttpMethod; + +/** + * @author longpeng.zlp + * @date 2024/11/25 10:55 + */ +public class HttpServerTest { + private HttpServerContext httpServerContext; + private HttpServer httpServer; + private Thread serverThread; + private AtomicInteger startedPort = new AtomicInteger(0); + private SimpleRequestHandler simpleRequestHandler; + private CountDownLatch countDownLatch = new CountDownLatch(1); + + @Before + public void setUp() throws InterruptedException { + simpleRequestHandler = new SimpleRequestHandler(); + httpServerContext = new HttpServerContext() { + @Override + public int listenPort() { + return 0; + } + + @Override + public String moduleName() { + return "testModule"; + } + + @Override + public RequestHandler requestHandler() { + return simpleRequestHandler; + } + + @Override + public Consumer portListener() { + return (port) -> { + startedPort.set(port); + countDownLatch.countDown(); + }; + } + }; + httpServer = new HttpServer<>(httpServerContext); + httpServer.start(); + serverThread = new Thread(httpServer::waitStop); + serverThread.start(); + countDownLatch.await(); + } + + @After + public void shutdown() throws InterruptedException { + httpServer.stop(); + serverThread.join(); + } + + @Test + public void testHttpServerProcessRequest() throws IOException { + Assert.assertEquals(httpServer.getRealListenPort(), startedPort.get()); + // send command + int port = httpServer.getRealListenPort(); + Assert.assertEquals("getResult", HttpClientUtils.request("GET", "http://127.0.0.1:" + port + "/api/get", + new TypeReference() {})); + Assert.assertEquals("postResult", HttpClientUtils.request("POST", "http://127.0.0.1:" + port + "/api/post", + "{}", new TypeReference() {})); + } + + @Test + public void testHttpServerProcessRequestThrowsException() throws IOException { + simpleRequestHandler.setShouldThrowsException(true); + // send command + int port = httpServer.getRealListenPort(); + Assert.assertEquals("exception throws", HttpClientUtils.request("GET", "http://127.0.0.1:" + port + "/api/get", + new TypeReference() {})); + } + + private static final class SimpleRequestHandler implements RequestHandler { + private boolean shouldThrowsException = false; + + public void setShouldThrowsException(boolean shouldThrowsException) { + this.shouldThrowsException = shouldThrowsException; + } + + @Override + public String process(HttpMethod httpMethod, String uri, String requestData) { + if (shouldThrowsException) { + throw new RuntimeException("exception throws"); + } + if (StringUtils.contains(uri, "get")) { + return "getResult"; + } else { + return "postResult"; + } + } + + @Override + public String processException(Throwable e) { + return e.getMessage(); + } + } +} diff --git a/server/odc-service/src/test/java/com/oceanbase/odc/service/task/supervisor/protocol/TaskCommandSenderTest.java b/server/odc-service/src/test/java/com/oceanbase/odc/service/task/supervisor/protocol/TaskCommandSenderTest.java new file mode 100644 index 0000000000..ceb711813d --- /dev/null +++ b/server/odc-service/src/test/java/com/oceanbase/odc/service/task/supervisor/protocol/TaskCommandSenderTest.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.oceanbase.odc.service.task.supervisor.protocol; + +import org.junit.Assert; +import org.junit.Test; + +import com.oceanbase.odc.service.task.supervisor.endpoint.SupervisorEndpoint; + +/** + * @author longpeng.zlp + * @date 2024/11/25 16:44 + */ +public class TaskCommandSenderTest { + @Test + public void testCommandSenderBuildStartUrl() { + TaskCommandSender taskCommandSender = new TaskCommandSender(); + SupervisorEndpoint supervisorEndpoint = new SupervisorEndpoint("127.0.0.1", 9999); + TaskCommand taskCommand = StartTaskCommand.create(null, null); + Assert.assertEquals(taskCommandSender.buildUrl(supervisorEndpoint, taskCommand), + "http://127.0.0.1:9999/task/command/start"); + } + + @Test + public void testCommandSenderBuildNoneStartUrl() { + TaskCommandSender taskCommandSender = new TaskCommandSender(); + SupervisorEndpoint supervisorEndpoint = new SupervisorEndpoint("127.0.0.1", 9999); + for (CommandType commandType : CommandType.values()) { + if (commandType == CommandType.START) { + continue; + } + TaskCommand taskCommand = GeneralTaskCommand.create(null, null, commandType); + Assert.assertEquals(taskCommandSender.buildUrl(supervisorEndpoint, taskCommand), + "http://127.0.0.1:9999/task/command/" + commandType.name().toLowerCase()); + } + } +}