
  import React from 'react'
  import { MDXTag } from '@mdx-js/tag'
  import TutorialLoadSQLButton from "view/tutorial/tutorial-load-sql-button";
import { 
    SQL_BOOKS_CREATE_PIPELINE,
    SQL_BOOKS_SELECT_PIPELINE_FILES,
    SQL_BOOKS_START_PIPELINE_FORE,
    SQL_BOOKS_SELECT_STAR,
    SQL_BOOKS_ALTER_PIPELINE,
    SQL_BOOKS_START_PIPELINE_BACK,
} from "./books-queries";
import InternalLink from "view/components/internal-link";
import ExtLink from "view/components/external-link";

const layoutProps = {
  
};
export default class MDXContent extends React.Component {
  constructor(props) {
    super(props)
    this.layout = null
  }
  render() {
    const { components, ...props } = this.props

    return <MDXTag
             name="wrapper"
             
             components={components}>

<MDXTag name="h4" components={components}>{`Step 2: Create a Pipeline to Ingest Data`}</MDXTag>
<MDXTag name="p" components={components}>{`To create the pipeline, you will need the following information:`}</MDXTag>
<MDXTag name="ul" components={components}>
<MDXTag name="li" components={components} parentName="ul">{`The name of the bucket, such as: my-bucket-name`}</MDXTag>
<MDXTag name="li" components={components} parentName="ul">{`The name of the bucket’s region, such as: us-west-1`}</MDXTag>
</MDXTag>
<MDXTag name="p" components={components}>{`Using these identifiers, execute the `}<MDXTag name="inlineCode" components={components} parentName="p">{`CREATE PIPELINE`}</MDXTag>{` query, replacing the placeholder values with your own:`}</MDXTag>
<TutorialLoadSQLButton id="books-create-pipeline" query={SQL_BOOKS_CREATE_PIPELINE} text="Paste Create Pipeline query" />
<MDXTag name="p" components={components}>{`Click `}<MDXTag name="strong" components={components} parentName="p">{`Run`}</MDXTag>{` (Ctrl+Enter/Cmd+Enter)`}</MDXTag>
<MDXTag name="p" components={components}>{`You can see what files the pipeline wants to load by running the following:`}</MDXTag>
<TutorialLoadSQLButton id="books-create-pipeline" query={SQL_BOOKS_SELECT_PIPELINE_FILES} text="Paste Select From Pipeline query" />
<MDXTag name="p" components={components}>{`Click `}<MDXTag name="strong" components={components} parentName="p">{`Run`}</MDXTag>{` (Ctrl+Enter/Cmd+Enter)`}</MDXTag>
<MDXTag name="p" components={components}>{`If everything is properly configured, you should see one row in the `}<MDXTag name="inlineCode" components={components} parentName="p">{`Unloaded`}</MDXTag>{` state, corresponding to `}<MDXTag name="inlineCode" components={components} parentName="p">{`books.txt`}</MDXTag>{`.`}</MDXTag>
<MDXTag name="p" components={components}>{`The `}<MDXTag name="inlineCode" components={components} parentName="p">{`CREATE PIPELINE`}</MDXTag>{` statement creates a new pipeline named `}<MDXTag name="inlineCode" components={components} parentName="p">{`library`}</MDXTag>{`, but the pipeline has not yet been started, and no data has been loaded. A MemSQL pipeline can run either in the background or be triggered by a foreground query. Start it in the foreground first.`}</MDXTag>
<TutorialLoadSQLButton id="books-start-pipeline-foreground" query={SQL_BOOKS_START_PIPELINE_FORE} text="Paste Start Pipeline query" />
<MDXTag name="p" components={components}>{`Click `}<MDXTag name="strong" components={components} parentName="p">{`Run`}</MDXTag>{` (Ctrl+Enter/Cmd+Enter)`}</MDXTag>
<MDXTag name="p" components={components}>{`When this command returns success, all files from your bucket will be loaded. If you check `}<MDXTag name="inlineCode" components={components} parentName="p">{`information_schema.PIPELINES_FILES`}</MDXTag>{` again, you should see all files in the `}<MDXTag name="inlineCode" components={components} parentName="p">{`Loaded`}</MDXTag>{` state. `}</MDXTag>
<MDXTag name="p" components={components}>{`Now you can query the `}<MDXTag name="inlineCode" components={components} parentName="p">{`classic_books`}</MDXTag>{` table to make sure the data has actually loaded.`}</MDXTag>
<TutorialLoadSQLButton id="books-select-star" query={SQL_BOOKS_SELECT_STAR} text="Paste Select query " />
<MDXTag name="p" components={components}>{`Click `}<MDXTag name="strong" components={components} parentName="p">{`Run`}</MDXTag>{` (Ctrl+Enter/Cmd+Enter)`}</MDXTag>
<MDXTag name="p" components={components}>{`You can also have MemSQL run your pipeline in background. In such a configuration, MemSQL will periodically poll S3 for new files and continuously them as they are added to the bucket. Before running your pipeline in the background, you must reset the state of the pipeline and the table.`}</MDXTag>
<TutorialLoadSQLButton id="books-alter-pipeline" query={SQL_BOOKS_ALTER_PIPELINE} text="Paste Delete and Alter queries" />
<MDXTag name="p" components={components}>{`Click `}<MDXTag name="strong" components={components} parentName="p">{`Run`}</MDXTag>{` (Ctrl+Enter/Cmd+Enter)`}</MDXTag>
<MDXTag name="p" components={components}>{`The first command deletes all rows from the target table. The second causes the pipeline to start from the beginning, in this case, "forgetting" it already loaded `}<MDXTag name="inlineCode" components={components} parentName="p">{`books.txt`}</MDXTag>{` so you can load it again.`}</MDXTag>
<MDXTag name="p" components={components}>{`To start a pipeline in the background, run the `}<MDXTag name="inlineCode" components={components} parentName="p">{`START PIPELINE`}</MDXTag>{` query.`}</MDXTag>
<TutorialLoadSQLButton id="books-start-pipeline-background" query={SQL_BOOKS_START_PIPELINE_BACK} text="Paste Start Pipeline query" />
<MDXTag name="p" components={components}>{`Click `}<MDXTag name="strong" components={components} parentName="p">{`Run`}</MDXTag>{` (Ctrl+Enter/Cmd+Enter)`}</MDXTag>
<MDXTag name="p" components={components}>{`This statement starts the pipeline. To see the status of your pipeline, view the Pipelines page in Studio.`}</MDXTag>
<InternalLink category="tutorial" routeInfo={{name: "cluster.pipelines", params: {}}} clusterLink>View Pipelines</InternalLink>
<MDXTag name="p" components={components}>{`At this point, the pipeline is running and the contents of the `}<MDXTag name="inlineCode" components={components} parentName="p">{`books.txt`}</MDXTag>{` file should once again be present in the `}<MDXTag name="inlineCode" components={components} parentName="p">{`classic_books`}</MDXTag>{` table.`}</MDXTag>
<MDXTag name="h4" components={components}>{`Next Steps`}</MDXTag>
<MDXTag name="p" components={components}>{`Now that you have a running pipeline, any new files you add to your bucket will be automatically ingested. To understand how an S3 pipeline ingests large amounts of objects in a bucket, see the `}<ExtLink name="parallel-loading" category="tutorial">{`Parallelized Data Loading`}</ExtLink>{` section in the `}<ExtLink name="pipeline-extractors" category="tutorial">{`Extractors`}</ExtLink>{` topic. You can also learn more about how to transform the ingested data by reading the `}<ExtLink name="pipeline-transforms" category="tutorial">{`Transforms`}</ExtLink>{` topic.`}</MDXTag>
           </MDXTag>
  }
}
MDXContent.isMDXComponent = true
  