Your AI pair programmer
GitHub Copilot uses the OpenAI Codex to suggest code and entire functions in real-time, right from your editor.
#!/usr/bin/env ts-node
import { fetch } from "fetch-h2";
// Determine whether the sentiment of text is positive
// Use a web service
async function isPositive(text: string): Promise<boolean> {
const response = await fetch(`http://text-processing.com/api/sentiment/`, {
method: "POST",
body: `text=${text}`,
headers: {
"Content-Type": "application/x-www-form-urlencoded",
},
});
const json = await response.json();
return json.label === "pos";
}
package main
type CategorySummary struct {
Title string
Tasks int
AvgValue float64
}
func createTables(db *sql.DB) {
db.Exec("CREATE TABLE tasks (id INTEGER PRIMARY KEY, title TEXT, value INTEGER, category TEXT)")
}
func createCategorySummaries(db *sql.DB) ([]CategorySummary, error) {
var summaries []CategorySummary
rows, err := db.Query("SELECT category, COUNT(category), AVG(value) FROM tasks GROUP BY category")
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var summary CategorySummary
err := rows.Scan(&summary.Title, &summary.Tasks, &summary.AvgValue)
if err != nil {
return nil, err
}
summaries = append(summaries, summary)
}
return summaries, nil
}
import datetime
def parse_expenses(expenses_string):
"""Parse the list of expenses and return the list of triples (date, value, currency).
Ignore lines starting with #.
Parse the date using datetime.
Example expenses_string:
2016-01-02 -34.01 USD
2016-01-03 2.59 DKK
2016-01-03 -2.72 EUR
"""
expenses = []
for line in expenses_string.splitlines():
if line.startswith("#"):
continue
date, value, currency = line.split(" ")
expenses.append((datetime.datetime.strptime(date, "%Y-%m-%d"),
float(value),
currency))
return expenses
class CreateShippingAddresses < ActiveRecord::Migration
def change
create_table :shipping_addresses do |t|
t.string :name
t.string :address
t.string :city
t.string :zip
t.string :state
t.string :phone
t.string :email
t.references :shipping_method, index: true, foreign_key: true
t.references :customer, index: true, foreign_key: true
t.timestamps null: false
end
end
end
Trained on billions of lines of code, GitHub Copilot turns natural language prompts into coding suggestions across dozens of languages.
Focus on solving bigger problems
Spend less time creating boilerplate and repetitive code patterns, and more time on what matters: building great software. Write a comment describing the logic you want and GitHub Copilot will immediately suggest code to implement the solution.
package main
type Run struct {
Time int // in milliseconds
Results string
Failed bool
}
// Get average runtime of successful runs in seconds
func averageRuntimeInSeconds(runs []Run) float64 {
var totalTime int
var failedRuns int
for _, run := range runs {
if run.Failed {
failedRuns++
} else {
totalTime += run.Time
}
}
averageRuntime := float64(totalTime) / float64(len(runs) - failedRuns) / 1000
return averageRuntime
}
class Course < ApplicationRecord
has_many :enrollments, dependent: :destroy
has_many :students, through: :enrollments, source: :user
has_many :teachers, through: :enrollments, source: :user
has_many :assignments, dependent: :destroy
has_many :submissions, through: :assignments
validates :name, presence: true
validates :start_date, presence: true
validates :end_date, presence: true
validates :term, presence: true
validates :year, presence: true
def self.find_by_name(name)
Course.find_by(name: name)
end
end
const seconds = 3600
const minutes = seconds / 60
const hours = minutes / 60
const days = hours / 24
const weeks = days / 7
const months = days / 30
const years = months / 12
import static org.junit.Assert.*;
import org.junit.Test;
public class IsPrimeTest {
// Math.isPrime(int) returns whether the given number is prime or not
@Test
public void testIsPrime() {
assertTrue(Math.isPrime(2));
assertTrue(Math.isPrime(3));
assertTrue(Math.isPrime(5));
assertTrue(Math.isPrime(7));
assertTrue(Math.isPrime(11));
assertTrue(Math.isPrime(13));
assertTrue(Math.isPrime(17));
assertTrue(Math.isPrime(19));
assertTrue(Math.isPrime(23));
assertTrue(Math.isPrime(29));
}
Get AI-based suggestions, just for you
Visual Studio Code
def max_sum_slice(xs):
max_ending = max_so_far = 0
for x in xs:
max_ending = max(0, max_ending + x)
max_so_far = max(max_so_far, max_ending)
return max_so_far
Visual Studio Code
def max_sum_slice(xs):
if not xs:
return 0
max_ending = max_slice = 0
for x in xs:
max_ending = max(0, max_ending + x)
max_slice = max(max_slice, max_ending)
return max_slice
Visual Studio Code
def max_sum_slice(xs):
max_ending = max_so_far = 0
begin = end = 0
for i, x in enumerate(xs):
if max_ending + x > 0:
max_ending += x
else:
max_ending = 0
begin = i
if max_ending > max_so_far:
max_so_far = max_ending
end = i
return begin, end, max_so_far
Visual Studio Code
def max_sum_slice(xs):
if not xs:
return 0
current_max = 0
max_so_far = 0
for x in xs:
current_max = max(0, current_max + x)
max_so_far = max(max_so_far, current_max)
return max_so_far
Visual Studio Code
def max_sum_slice(xs):
max_sum = 0
max_sum_so_far = 0
start_index = 0
end_index = 0
for i in range(len(xs)):
max_sum_so_far += xs[i]
if max_sum_so_far > max_sum:
max_sum = max_sum_so_far
start_index = i
end_index = i
elif max_sum_so_far < 0:
max_sum_so_far = 0
start_index = i
else:
end_index = i
return (start_index, end_index, max_sum)
Visual Studio Code
def max_sum_slice(xs):
if len(xs) == 0:
return 0
max_sum = 0
max_sum_ending_here = 0
max_ending_here = 0
for x in xs:
max_ending_here = max(0, max_ending_here + x)
max_sum_ending_here = max(max_sum_ending_here, max_ending_here)
max_sum = max(max_sum, max_sum_ending_here)
return max_sum
Visual Studio Code
def max_sum_slice(xs):
best_sum, best_start, best_end = 0, None, None
for i, x in enumerate(xs):
if best_start is None or best_sum < x + best_sum:
best_sum = x + best_sum
best_start = i
best_end = i
elif best_sum > x + best_sum:
best_sum = x + best_sum
best_end = i
return best_sum, best_start, best_end
Visual Studio Code
def max_sum_slice(xs):
"""
Return a tuple (i,j) where i is the index of the first slice with a max value,
and j is the index of the slice whose sum is the max
"""
if len(xs) == 0:
return None
elif len(xs) == 1:
return (0,0)
elif len(xs) == 2:
return (0,1) if xs[0] >= xs[1] else (1,0)
else:
return max_sum_slice_helper(xs,0,len(xs)-1)
Keep flying with your favorite editor
The numbers speak for themselves
Code confidently in unfamiliar territory
Whether you’re working in a new language or framework, or just learning to code, GitHub Copilot can help you find your way. Tackle a bug, or learn how to use a new framework without spending most of your time spelunking through the docs or searching the web.
const token = process.env["TWITTER_BEARER_TOKEN"]
const fetchTweetsFromUser = async (screenName, count) => {
const response = await fetch(
`https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=${screenName}&count=${count}`,
{
headers: {
Authorization: `Bearer ${token}`,
},
}
)
const json = await response.json()
return json
}
import tweepy, os # secrets in environment variables
def fetch_tweets_from_user(user_name):
# authentification
auth = tweepy.OAuthHandler(os.environ['TWITTER_KEY'], os.environ['TWITTER_SECRET'])
auth.set_access_token(os.environ['TWITTER_TOKEN'], os.environ['TWITTER_TOKEN_SECRET'])
api = tweepy.API(auth)
# fetch tweets
tweets = api.user_timeline(screen_name=user, count=200, include_rts=False)
return tweets
require 'twitter'
def fetch_tweets_from_user(handle)
twitter = Twitter::REST::Client.new do |config|
config.consumer_key = ENV['TWITTER_CONSUMER_KEY']
config.consumer_secret = ENV['TWITTER_CONSUMER_SECRET']
config.access_token = ENV['TWITTER_ACCESS_TOKEN']
config.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET']
end
tweets = twitter.user_timeline(handle)
tweets
end
const token = process.env["TWITTER_BEARER_TOKEN"]
const fetchTweetsFromUser = (userName: string) => {
const url = `https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=${userName}&count=20`
return fetch(url, {
headers: {
"Authorization": `Bearer ${token}`
}
})
.then(res => res.json())
.then(tweets => tweets.map(tweet => ({
id: tweet.id,
text: tweet.text,
created_at: tweet.created_at,
user: {
id: tweet.user.id,
name: tweet.user.name,
screen_name: tweet.user.screen_name,
profile_image_url: tweet.user.profile_image_url
}
})))
}
package main
var apiKey = os.Getenv("TWITTER_BEARER_TOKEN")
type Tweet = struct{ Text string }
func fetchTweetsFromUser(user string) ([]Tweet, error) {
url := "https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=" + user + "&count=200"
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+apiKey)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("bad status: %d", resp.StatusCode)
}
var tweets []Tweet
if err := json.NewDecoder(resp.Body).Decode(&tweets); err != nil {
return nil, err
}
return tweets, nil
}
import d3 from "d3"
const drawScatterplot = (data, height, width) => {
const svg = d3.select("#scatterplot")
.append("svg")
.attr("height", height)
.attr("width", width)
svg.selectAll("circle")
.data(data)
.enter()
.append("circle")
.attr("cx", d => d.x)
.attr("cy", d => d.y)
.attr("r", 5)
}
import matplotlib.pyplot as plt
def draw_scatterplot(x_values, y_values):
plt.scatter(x_values, y_values, s=20)
plt.title("Scatter Plot")
plt.xlabel("x values")
plt.ylabel("y values")
plt.show()
require 'gruff'
def draw_scatterplot(x_values, y_values)
g = Gruff::Scatter.new(400)
g.title = "GCD"
g.x_axis_label = "X"
g.y_axis_label = "Y"
g.data('data', x_values, y_values)
g.write("plot.png")
end
import d3 from "d3"
const drawScatterplot = (
data: Array<{ x: number; y: number; size: number }>,
x: d3.ScaleLinear<number, number>,
y: d3.ScaleLinear<number, number>,
svg: d3.Selection<SVGGElement, unknown, HTMLElement, any>,
margin: { top: number; bottom: number; left: number; right: number }
) => {
const width = svg.attr("width") - margin.left - margin.right
const height = svg.attr("height") - margin.top - margin.bottom
svg.selectAll("*").remove()
const g = svg.append("g")
.attr("transform", `translate(${margin.left}, ${margin.top})`)
const xAxis = g
.append("g")
.attr("transform", `translate(0, ${height})`)
.call(d3.axisBottom(x))
...
const memoize = fn => {
const cache = {};
return (...args) => {
const key = JSON.stringify(args);
return (cache[key] = cache[key] || fn(...args));
};
}
def memoize(func):
cache = {}
def wrapper(*args):
if args in cache:
return cache[args]
else:
cache[args] = func(*args)
return cache[args]
return wrapper
def memoize(func)
cache = {}
lambda { |*args| cache[args] ||= func.call(*args) }
end
const memoize = (fn: Function): Function => {
const cache: { [key: string]: any } = {};
return (...args: any[]): any => {
const key = JSON.stringify(args);
if (cache[key]) {
return cache[key];
}
const result = fn(...args);
cache[key] = result;
return result;
};
}
package main
func Memoize(fn func(int) int) func(int) int {
cache := make(map[int]int)
return func(n int) int {
if v, ok := cache[n]; ok {
return v
}
cache[n] = fn(n)
return cache[n]
}
}
const https = require('https')
const parseString = require('xml2js').parseString
const apiKey = process.env["GOODREADS_API_KEY"]
const getRating = title => {
return new Promise((resolve, reject) => {
https.get(`https://www.goodreads.com/book/title.xml?key=${apiKey}&title=${title}`, res => {
let xml = ''
res.setEncoding('utf8')
res.on('data', chunk => {
xml += chunk
})
res.on('end', () => {
parseString(xml, (err, result) => {
if (err) {
reject(err)
} else {
resolve(result.GoodreadsResponse.book[0].average_rating)
}
})
})
})
})
}
import os, requests, json, bs4
key = os.environ['GOODREADS_API_KEY']
def get_rating(title) :
"""Get the average rating of the book from GoodReads, and return a float."""
url = 'https://www.goodreads.com/book/title.xml?key=' + key + '&title=' + title
response = requests.get(url)
soup = bs4.BeautifulSoup(response.text, 'xml')
rating = soup.find('average_rating').text
return float(rating)
require 'goodreads'
def get_rating_of_book_by_title(title)
client = Goodreads::Client.new(
api_key: "YOUR_API_KEY",
api_secret: "YOUR_API_SECRET"
)
book = client.book(title)
book.rating
end
const apiKey = process.env["GOODREADS_API_KEY"]
const getRating = (title: string) => {
return fetch(`https://www.goodreads.com/book/title.xml?key=${apiKey}&title=${title}`)
.then(res => res.text())
.then(res => {
const parser = new DOMParser();
const xml = parser.parseFromString(res, "text/xml");
const rating = xml.getElementsByTagName("average_rating")[0].textContent;
return rating;
})
.catch(error => console.log(error));
}
package main
import ("log"; "net/http"; "net/url"; "os"; "strconv"; "github.com/beevik/etree")
var (
apiKey = os.Getenv("GOODREADS_API_KEY")
)
// GetRating gets the average rating of a book using the Goodreads API.
func GetRating(title string) (float64, error) {
u := url.URL{
Scheme: "https",
Host: "www.goodreads.com",
Path: "/book/title.xml",
RawQuery: url.Values{
"key": {apiKey},
"title": {title},
}.Encode(),
}
resp, err := http.Get(u.String())
if err != nil {
return 0, err
}
defer resp.Body.Close()
doc := etree.NewDocument()
if _, err := doc.ReadFrom(resp.Body); err != nil {
return 0, err
}
ratings := doc.FindElements("//average_rating")
if len(ratings) == 0 {
return 0, nil
}
rating, err := strconv.ParseFloat(ratings[0].Text(), 64)
if err != nil {
return 0, err
}
return rating, nil
}
Flight Reports
Hundreds of engineers, including our own, use GitHub Copilot every day.
This is the single most mind-blowing application of machine learning I’ve ever seen.
Mike Krieger // Co-founder, Instagram
GitHub Copilot works shockingly well. I will never develop software without it again.
Lars Gyrup Brink Nielsen
I was stunned when I started writing Clojure with GitHub Copilot and it filled an idiomatic namespace require, just like I was going to write it.
Gunnika Batra // Senior Analyst
Trying to code in an unfamiliar language by googling everything is like navigating a foreign country with just a phrasebook. Using GitHub Copilot is like hiring an interpreter.
Harri Edwards // Open AI
Don't fly solo
Developers all over the world use GitHub Copilot to code faster, focus on business logic over boilerplate, and do what matters most: building great software.
Which plan is right for you?
- Plugs right into your editor
- Turns natural language prompts into code
- Offers multi-line function suggestions
- Speeds up test generation
- Blocks suggestions matching public code
- Everything included in Copilot for Individuals, plus...
- Simple license management
- Organization-wide policy management
- Industry-leading privacy
Learn about GitHub Copilot Terms and Conditions
Frequently asked questions
General
What is GitHub Copilot?
GitHub Copilot is an AI pair programmer that helps you write code faster and with less work. It draws context from comments and code to suggest individual lines and whole functions instantly. GitHub Copilot is powered by OpenAI Codex, a generative pretrained language model created by OpenAI. It is available as an extension for Visual Studio Code, Visual Studio, Neovim, and the JetBrains suite of integrated development environments (IDEs).
What data has GitHub Copilot been trained on?
GitHub Copilot is powered by Codex, a generative pretrained AI model created by OpenAI. It has been trained on natural language text and source code from publicly available sources, including code in public repositories on GitHub.
Does GitHub Copilot write perfect code?
In a recent evaluation, we found that users accepted on average 26% of all completions shown by GitHub Copilot. We also found that on average more than 27% of developers’ code files were generated by GitHub Copilot, and in certain languages like Python that goes up to 40%. However, GitHub Copilot does not write perfect code. It is designed to generate the best code possible given the context it has access to, but it doesn’t test the code it suggests so the code may not always work, or even make sense. GitHub Copilot can only hold a very limited context, so it may not make use of helpful functions defined elsewhere in your project or even in the same file. And it may suggest old or deprecated uses of libraries and languages. When converting comments written in non-English to code, there may be performance disparities when compared to English. For suggested code, certain languages like Python, JavaScript, TypeScript, and Go might perform better compared to other programming languages.
Like any other code, code suggested by GitHub Copilot should be carefully tested, reviewed, and vetted. As the developer, you are always in charge.
Will GitHub Copilot help me write code for a new platform?
GitHub Copilot is trained on public code. When a new library, framework, or API is released, there is less public code available for the model to learn from. That reduces GitHub Copilot’s ability to provide suggestions for the new codebase. As more examples enter the public space, we integrate them into the training set and suggestion relevance improves. In the future, we will provide ways to highlight newer APIs and samples to raise their relevance in GitHub Copilot’s suggestions.
How does a customer get the most out of GitHub Copilot?
GitHub Copilot works best when you divide your code into small functions, use meaningful names for functions parameters, and write good docstrings and comments as you go. It also seems to do best when it’s helping you navigate unfamiliar libraries or frameworks.
How can a customer contribute?
By using GitHub Copilot and sharing your feedback in the feedback forum, you help to improve GitHub Copilot. Please also report incidents (e.g., offensive output, code vulnerabilities, apparent personal information in code generation) directly to copilot-safety@github.com so that we can improve our safeguards. GitHub takes safety and security very seriously and we are committed to continually improving.
Human oversight
Can GitHub Copilot introduce insecure code in its suggestions?
Public code may contain insecure coding patterns, bugs, or references to outdated APIs or idioms. When GitHub Copilot synthesizes code suggestions based on this data, it can also synthesize code that contains these undesirable patterns. This is something we care a lot about at GitHub, and in recent years we’ve provided tools such as GitHub Actions, Dependabot, and CodeQL to open source projects to help improve code quality. Of course, you should always use GitHub Copilot together with good testing and code review practices and security tools, as well as your own judgment.
Does GitHub own the code generated by GitHub Copilot?
GitHub Copilot is a tool, like a compiler or a pen. GitHub does not own the suggestions GitHub Copilot provides to you. You are responsible for the code you write with GitHub Copilot’s help. We recommend that you carefully test, review, and vet the code before pushing it to production, as you would with any code you write that incorporates material you did not independently originate.
Does GitHub Copilot copy code from the training set?
GitHub Copilot’s suggestions are all generated through AI. GitHub Copilot generates new code in a probabilistic way, and the probability that they produce the same code as a snippet that occurred in training is low. The models do not contain a database of code, and they do not ‘look up’ snippets. Our latest internal research shows that about 1% of the time, a suggestion may contain some code snippets longer than ~150 characters that matches the training set. Previous research showed that many of these cases happen when GitHub Copilot is unable to glean sufficient context from the code you are writing, or when there is a common, perhaps even universal, solution to the problem.
What can I do to reduce GitHub Copilot’s suggestion of code that matches public code?
We built a filter to help detect and suppress GitHub Copilot suggestions which contain code that matches public code on GitHub.
Copilot for Individual users have the choice to enable that filter during setup on their individual accounts. For Copilot for Business users, the Enterprise administrator controls how the filter is applied. They can control suggestions for all organizations or defer control to individual organization administrators. These organization administrators can turn the filter on or off during setup (assuming their Enterprise administrator has deferred control) for the users in their organization.
With the filter enabled, GitHub Copilot checks code suggestions with its surrounding code for matches or near matches (ignoring whitespace) against public code on GitHub of about 150 characters. If there is a match, the suggestion will not be shown to you. In addition, we have announced that we are building a feature that will provide a reference for suggestions that resemble public code on GitHub so that you can make a more informed decision about whether and how to use that code, as well as explore and learn how that code is used in other projects.
Just like when you write any code that uses material you did not independently originate, you should take precautions to understand how it works and ensure its suitability. These include rigorous testing, IP scanning, and checking for security vulnerabilities. You should make sure your IDE or editor does not automatically compile or run generated code before you review it.
Other than the filter, what other measures can I take to assess code suggested by GitHub Copilot?
You should take the same precautions as you would with any code you write that uses material you did not independently originate, and should take precautions to ensure its suitability. These include rigorous testing, IP scanning, and checking for security vulnerabilities. You should make sure your IDE or editor does not automatically compile or run generated code before you review it.
Fairness and broader impact
Will GitHub Copilot work as well using languages other than English?
Given public sources are predominantly in English, GitHub Copilot will likely work less well in scenarios where natural language prompts provided by the developer are not in English and/or are grammatically incorrect. Therefore, non-English speakers might experience a lower quality of service.
Does GitHub Copilot support accessibility features?
We are conducting internal testing of GitHub Copilot’s ease of use by developers with disabilities and working to ensure that GitHub Copilot is accessible to all developers. Please feel free to share your feedback on GitHub Copilot accessibility in our feedback forum.
Does GitHub Copilot produce offensive outputs?
GitHub Copilot includes filters to block offensive language in the prompts and to avoid synthesizing suggestions in sensitive contexts. We continue to work on improving the filter system to more intelligently detect and remove offensive outputs. If you see offensive outputs, please report them directly to copilot-safety@github.com so that we can improve our safeguards. GitHub takes this challenge very seriously and we are committed to addressing it.
How will advanced code generation tools like GitHub Copilot affect developer jobs?
Bringing in more intelligent systems has the potential to bring enormous change to the developer experience. We do not expect GitHub Copilot to replace developers. Rather, we expect GitHub Copilot to partner with developers, augment their capabilities, and enable them to be more productive, reduce manual tasks, and help them focus on interesting work. We also believe that GitHub Copilot has the potential to lower barriers to entry, enabling more people to explore software development, and join the next generation of developers. We are working to test these hypotheses with both internal and external research.
Privacy – Copilot for Business
What data does Copilot for Business collect?
GitHub Copilot relies on file content and additional data to work. It collects data to provide the service, some of which is then retained for further analysis and product improvements.
Copilot for Business collects data as described below:
User Engagement Data
When you use GitHub Copilot it will collect usage information about events generated when interacting with the IDE or editor. These events include user edit actions like completions accepted and dismissed, and error and general usage data to identify metrics like latency and features engagement. This information may include personal data, such as pseudonymous identifiers.
Code Snippets Data
GitHub Copilot transmits snippets of your code from your IDE to GitHub to provide Suggestions to you. Code snippets data is only transmitted in real-time to return Suggestions, and is discarded once a Suggestion is returned. Copilot for Business does not retain any Code Snippets Data.
How can users of Copilot for Business control use of their data?
User engagement data (which includes pseudonymous identifiers and general usage data), is required for the use of GitHub Copilot and will continue to be collected, processed, and shared with Microsoft and OpenAI when you use GitHub Copilot.
Copilot for Business does not retain any Code Snippets Data.
Privacy – Copilot for Individuals
What data does Copilot for Individuals collect?
GitHub Copilot relies on file content and additional data to work. It collects data to provide the service, some of which is then retained for further analysis and product improvements. GitHub Copilot collects the following data for individual users:
User Engagement Data
When you use GitHub Copilot it will collect usage information about events generated when interacting with the IDE or editor. These events include user edit actions like completions accepted and dismissed, and error and general usage data to identify metrics like latency and features engagement. This information may include personal data, such as pseudonymous identifiers.
Code Snippets Data
Depending on your preferred telemetry settings, GitHub Copilot may also collect and retain the following, collectively referred to as “code snippets”: source code that you are editing, related files and other files open in the same IDE or editor, URLs of repositories and files path.
How is the transmitted Code Snippets data protected?
We know that user edit actions, source code snippets, and URLs of repositories and file paths are sensitive data. Consequently, several measures of protection are applied, including:
- The transmitted data is encrypted in transit and at rest
- Access is strictly controlled. The data can only be accessed by (1) named GitHub personnel working on the GitHub Copilot team or on the GitHub platform health team, (2) Microsoft personnel working on or with the GitHub Copilot team, and (3) OpenAI personnel who work on GitHub Copilot
- Role-based access controls and multi-factor authentication are required for personnel accessing code snippet data
How can users of Copilot for Individuals control use of their Code Snippets Data?
GitHub Copilot gives you choices about how it uses the data it collects.
User Engagement Data (which includes pseudonymous identifiers and general usage data), is required for the use of GitHub Copilot and will continue to be collected, processed, and shared with Microsoft and OpenAI as you use GitHub Copilot.
Users of Copilot for Individuals can choose whether Code Snippets Data is retained by GitHub and further processed and shared with Microsoft and OpenAI by adjusting user settings.
Users of Copilot for Individuals can request deletion of Code Snippet Data associated with their GitHub identity by filling out a support ticket.
Does GitHub Copilot ever output personal data?
Because Codex, the model powering GitHub Copilot, was trained on publicly available code, its training set included public personal data that was included in that code. From our internal testing, we found it to be very rare that GitHub Copilot suggestions included personal data verbatim from the training set. In some cases, the model will suggest what appears to be personal data – email addresses, phone numbers, etc. – but those suggestions are actually fictitious information synthesized from patterns in training data and therefore do not relate to any particular individual. For example, when one of our engineers prompted GitHub Copilot with, “My name is Mona and my birthdate is,” GitHub Copilot suggested a random, fictitious date of “December 12,” which is not Mona’s actual birthdate. We have also implemented a filter that blocks emails when shown in standard formats, but it’s still possible to get the model to suggest this sort of content if you try hard enough. We will keep improving the filter system to be more intelligent to detect and remove more personal data from the GitHub Copilot suggestions.
Where can I learn more about GitHub Privacy and data protection?
For more information on how GitHub processes and uses personal data, please see the GitHub Copilot Privacy Statement.